Initial WebM release
diff --git a/vp8/decoder/arm/armv5/dequantize_v5.asm b/vp8/decoder/arm/armv5/dequantize_v5.asm
new file mode 100644
index 0000000..eb3f030
--- /dev/null
+++ b/vp8/decoder/arm/armv5/dequantize_v5.asm
@@ -0,0 +1,51 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dequantize_b_armv5|
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+
+q RN r0
+dqc RN r1
+cnt RN r2
+
+;void dequantize_b_armv5(short *Q, short *DQC)
+|vp8_dequantize_b_armv5| PROC
+ stmdb sp!, {r4, lr}
+ ldr r3, [q]
+ ldr r4, [dqc], #8
+
+ mov cnt, #4
+dequant_loop
+ smulbb lr, r3, r4
+ smultt r12, r3, r4
+
+ ldr r3, [q, #4]
+ ldr r4, [dqc, #-4]
+
+ strh lr, [q], #2
+ strh r12, [q], #2
+
+ smulbb lr, r3, r4
+ smultt r12, r3, r4
+
+ subs cnt, cnt, #1
+ ldrne r3, [q, #4]
+ ldrne r4, [dqc], #8
+
+ strh lr, [q], #2
+ strh r12, [q], #2
+
+ bne dequant_loop
+
+ ldmia sp!, {r4, pc}
+ ENDP ;|vp8_dequantize_b_arm|
+
+ END
diff --git a/vp8/decoder/arm/armv6/dboolhuff_v6.asm b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
new file mode 100644
index 0000000..143e33e
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
@@ -0,0 +1,162 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_decode_value_v6|
+ EXPORT |vp8dx_start_decode_v6|
+ EXPORT |vp8dx_stop_decode_v6|
+ EXPORT |vp8dx_decode_bool_v6|
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ INCLUDE vpx_asm_offsets.asm
+
+br RN r0
+prob RN r1
+bits RN r1
+ AREA |.text|, CODE, READONLY ; name this block of code
+
+; int z = 0;
+; int bit;
+; for ( bit=bits-1; bit>=0; bit-- )
+; {
+; z |= (vp8dx_decode_bool(br, 0x80)<<bit);
+; }
+; return z;
+
+;int vp8_decode_value_v6 ( BOOL_DECODER *br, int bits )
+|vp8_decode_value_v6| PROC
+ stmdb sp!, {r4 - r6, lr}
+ mov r4, br
+ mov r5, bits
+ mov r6, #0
+
+ subs r5, r5, #1
+ bmi decode_value_exit
+
+decode_value_loop
+ mov prob, #0x80
+ mov br, r4
+ bl vp8dx_decode_bool_v6_internal ; needed for conversion to s file
+ orr r6, r6, r0, lsl r5
+ subs r5, r5, #1
+ bpl decode_value_loop
+
+decode_value_exit
+ mov r0, r6
+ ldmia sp!, {r4 - r6, pc}
+ ENDP ; |vp8_decode_value_v6|
+
+
+;void vp8dx_start_decode_v6 ( BOOL_DECODER *br, unsigned char *source )
+|vp8dx_start_decode_v6| PROC
+ stmdb sp!, {r4 - r5, lr}
+ mov r2, #0
+ mov r3, #255
+
+ str r2, [br, #bool_decoder_lowvalue]
+ str r3, [br, #bool_decoder_range]
+ str r1, [br, #bool_decoder_buffer]
+
+ mov r3, #8
+ mov r2, #4
+ str r3, [br, #bool_decoder_count]
+ str r2, [br, #bool_decoder_pos]
+
+ ldrb r2, [r1, #3]
+ ldrb r3, [r1, #2]
+ ldrb r4, [r1, #1]
+ ldrb r5, [r1]
+
+ orr r1, r2, r3, lsl #8
+ orr r1, r1, r4, lsl #16
+ orr r1, r1, r5, lsl #24
+
+ str r1, [br, #bool_decoder_value]
+
+ ldmia sp!, {r4 - r5, pc}
+ ENDP ; |vp8dx_start_decode_v6|
+
+
+;void vp8dx_stop_decode_v6 ( BOOL_DECODER *bc );
+|vp8dx_stop_decode_v6| PROC
+ mov pc, lr
+ ENDP ; |vp8dx_stop_decode_v6|
+
+
+; bigsplit RN r1
+; buffer_v RN r1
+; count_v RN r4
+; range_v RN r2
+; value_v RN r3
+; pos_v RN r5
+; split RN r6
+; bit RN lr
+;int vp8dx_decode_bool_v6 ( BOOL_DECODER *br, int probability )
+|vp8dx_decode_bool_v6| PROC
+vp8dx_decode_bool_v6_internal
+ stmdb sp!, {r4 - r6, lr}
+
+ ldr r2, [br, #bool_decoder_range]
+ ldr r3, [br, #bool_decoder_value]
+
+ mov r6, r2, lsl #8
+ sub r6, r6, #256 ; split = 1 + (((range-1) * probability) >> 8)
+ mov r12, #1
+ smlawb r6, r6, prob, r12
+
+ mov lr, #0
+ subs r5, r3, r6, lsl #24
+
+ ;cmp r3, r1
+ movhs lr, #1
+ movhs r3, r5
+ subhs r2, r2, r6
+ movlo r2, r6
+
+ cmp r2, #0x80
+ blt range_less_0x80
+ ;strd r2, r3, [br, #bool_decoder_range]
+ str r2, [br, #bool_decoder_range]
+ str r3, [br, #bool_decoder_value]
+ mov r0, lr
+ ldmia sp!, {r4 - r6, pc}
+
+range_less_0x80
+ ldr r5, [br, #bool_decoder_pos]
+ ldr r1, [br, #bool_decoder_buffer]
+ ldr r4, [br, #bool_decoder_count]
+ add r1, r1, r5
+
+ clz r12, r2
+ sub r12, r12, #24
+ subs r4, r4, r12
+ ldrleb r6, [r1], #1
+ mov r2, r2, lsl r12
+ mov r3, r3, lsl r12
+ addle r4, r4, #8
+ rsble r12, r4, #8
+ addle r5, r5, #1
+ orrle r3, r3, r6, lsl r12
+
+ ;strd r2, r3, [br, #bool_decoder_range]
+ ;strd r4, r5, [br, #bool_decoder_count]
+ str r2, [br, #bool_decoder_range]
+ str r3, [br, #bool_decoder_value]
+ str r4, [br, #bool_decoder_count]
+ str r5, [br, #bool_decoder_pos]
+
+ mov r0, lr
+
+ ldmia sp!, {r4 - r6, pc}
+ ENDP ; |vp8dx_decode_bool_v6|
+
+ END
diff --git a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
new file mode 100644
index 0000000..3daa9b3
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
@@ -0,0 +1,202 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dequant_dc_idct_v6|
+ ; ARM
+ ; REQUIRE8
+ ; PRESERVE8
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+;void vp8_dequant_dc_idct_v6(short *input, short *dq, short *output, int pitch,int Dc)
+|vp8_dequant_dc_idct_v6| PROC
+ stmdb sp!, {r4-r11, lr}
+
+ ldr r6, [sp, #36] ;load Dc
+
+ ldr r4, [r0] ;input
+ ldr r5, [r1], #4 ;dq
+
+ sub sp, sp, #4
+ str r0, [sp]
+
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ mov r12, #3
+
+dequant_dc_idct_loop
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ subs r12, r12, #1
+
+ ldrne r4, [r0, #4]
+ ldrne r5, [r1], #4
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ bne dequant_dc_idct_loop
+
+ sub r0, r0, #32
+ mov r1, r2
+ mov r2, r3
+
+; short_idct4x4llm_v6_dual
+
+ mov r3, #0x00004E00 ; cos
+ orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
+ mov r4, #0x00008A00 ; sin
+ orr r4, r4, #0x0000008C ; sinpi8sqrt2
+ mov r5, #0x2 ; i=2 i
+loop1_dual_11
+ ldr r6, [r0, #(4*2)] ; i5 | i4 5|4
+ ldr r12, [r0, #(12*2)] ; i13 | i12 13|12
+ ldr r14, [r0, #(8*2)] ; i9 | i8 9|8
+
+ smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
+ smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c
+ smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
+ smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s
+ pkhbt r7, r7, r9, lsl #16 ; 5c | 4c
+ smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c
+ pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
+ uadd16 r6, r6, r7 ; 5c+5 | 4c+4
+ smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s
+ smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c
+ smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s
+ subs r5, r5, #0x1 ; i-- --
+ pkhbt r9, r9, r11, lsl #16 ; 13c | 12c
+ ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0
+ pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
+ uadd16 r7, r12, r9 ; 13c+13 | 12c+12
+ usub16 r7, r8, r7 ; c c
+ uadd16 r6, r6, r10 ; d d
+ uadd16 r10, r11, r14 ; a a
+ usub16 r8, r11, r14 ; b b
+ uadd16 r9, r10, r6 ; a+d a+d
+ usub16 r10, r10, r6 ; a-d a-d
+ uadd16 r6, r8, r7 ; b+c b+c
+ usub16 r7, r8, r7 ; b-c b-c
+ str r6, [r1, r2] ; o5 | o4
+ add r6, r2, r2 ; pitch * 2 p2
+ str r7, [r1, r6] ; o9 | o8
+ add r6, r6, r2 ; pitch * 3 p3
+ str r10, [r1, r6] ; o13 | o12
+ str r9, [r1], #0x4 ; o1 | o0 ++
+ bne loop1_dual_11 ;
+ mov r5, #0x2 ; i=2 i
+ sub r0, r1, #8 ; reset input/output i/o
+loop2_dual_22
+ ldr r6, [r0, r2] ; i5 | i4 5|4
+ ldr r1, [r0] ; i1 | i0 1|0
+ ldr r12, [r0, #0x4] ; i3 | i2 3|2
+ add r14, r2, #0x4 ; pitch + 2 p+2
+ ldr r14, [r0, r14] ; i7 | i6 7|6
+ smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
+ smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c
+ smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
+ smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s
+ pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4
+ pkhbt r7, r9, r7, lsl #16 ; 1c | 5c
+ pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1
+ pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5
+ uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2
+ pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6
+ uadd16 r10, r11, r9 ; a a
+ usub16 r9, r11, r9 ; b b
+ pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7
+ subs r5, r5, #0x1 ; i-- --
+ smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c
+ smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s
+ smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c
+ smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s
+
+ pkhbt r7, r12, r7, lsl #16 ; 3c | 7c
+ pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1
+ uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2
+ usub16 r12, r8, r6 ; c (o1 | o5) c
+ uadd16 r6, r11, r1 ; d (o3 | o7) d
+ uadd16 r7, r10, r6 ; a+d a+d
+ mov r8, #0x4 ; set up 4's 4
+ orr r8, r8, #0x40000 ; 4|4
+ usub16 r6, r10, r6 ; a-d a-d
+ uadd16 r6, r6, r8 ; a-d+4 3|7
+ uadd16 r7, r7, r8 ; a+d+4 0|4
+ uadd16 r10, r9, r12 ; b+c b+c
+ usub16 r1, r9, r12 ; b-c b-c
+ uadd16 r10, r10, r8 ; b+c+4 1|5
+ uadd16 r1, r1, r8 ; b-c+4 2|6
+ mov r8, r10, asr #19 ; o1 >> 3
+ strh r8, [r0, #2] ; o1
+ mov r8, r1, asr #19 ; o2 >> 3
+ strh r8, [r0, #4] ; o2
+ mov r8, r6, asr #19 ; o3 >> 3
+ strh r8, [r0, #6] ; o3
+ mov r8, r7, asr #19 ; o0 >> 3
+ strh r8, [r0], r2 ; o0 +p
+ sxth r10, r10 ;
+ mov r8, r10, asr #3 ; o5 >> 3
+ strh r8, [r0, #2] ; o5
+ sxth r1, r1 ;
+ mov r8, r1, asr #3 ; o6 >> 3
+ strh r8, [r0, #4] ; o6
+ sxth r6, r6 ;
+ mov r8, r6, asr #3 ; o7 >> 3
+ strh r8, [r0, #6] ; o7
+ sxth r7, r7 ;
+ mov r8, r7, asr #3 ; o4 >> 3
+ strh r8, [r0], r2 ; o4 +p
+;;;;; subs r5, r5, #0x1 ; i-- --
+ bne loop2_dual_22 ;
+
+
+;vpx_memset
+ ldr r0, [sp]
+ add sp, sp, #4
+
+ mov r12, #0
+ str r12, [r0]
+ str r12, [r0, #4]
+ str r12, [r0, #8]
+ str r12, [r0, #12]
+ str r12, [r0, #16]
+ str r12, [r0, #20]
+ str r12, [r0, #24]
+ str r12, [r0, #28]
+
+ ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
+
+ ENDP ;|vp8_dequant_dc_idct_v68|
+
+ END
diff --git a/vp8/decoder/arm/armv6/dequantidct_v6.asm b/vp8/decoder/arm/armv6/dequantidct_v6.asm
new file mode 100644
index 0000000..61bb48d
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequantidct_v6.asm
@@ -0,0 +1,183 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dequant_idct_v6|
+ ; ARM
+ ; REQUIRE8
+ ; PRESERVE8
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+;void vp8_dequant_idct_v6(short *input, short *dq, short *output, int pitch)
+|vp8_dequant_idct_v6| PROC
+ stmdb sp!, {r4-r11, lr}
+
+ ldr r4, [r0] ;input
+ ldr r5, [r1], #4 ;dq
+
+ sub sp, sp, #4
+ str r0, [sp]
+
+ mov r12, #4
+
+dequant_idct_loop
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ ldr r4, [r0, #4] ;input
+ ldr r5, [r1], #4 ;dq
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ smulbb r6, r4, r5
+ smultt r7, r4, r5
+
+ subs r12, r12, #1
+
+ ldrne r4, [r0, #4]
+ ldrne r5, [r1], #4
+
+ strh r6, [r0], #2
+ strh r7, [r0], #2
+
+ bne dequant_idct_loop
+
+ sub r0, r0, #32
+ mov r1, r2
+ mov r2, r3
+
+; short_idct4x4llm_v6_dual
+
+ mov r3, #0x00004E00 ; cos
+ orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
+ mov r4, #0x00008A00 ; sin
+ orr r4, r4, #0x0000008C ; sinpi8sqrt2
+ mov r5, #0x2 ; i=2 i
+loop1_dual_1
+ ldr r6, [r0, #(4*2)] ; i5 | i4 5|4
+ ldr r12, [r0, #(12*2)] ; i13 | i12 13|12
+ ldr r14, [r0, #(8*2)] ; i9 | i8 9|8
+
+ smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
+ smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c
+ smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
+ smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s
+ pkhbt r7, r7, r9, lsl #16 ; 5c | 4c
+ smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c
+ pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
+ uadd16 r6, r6, r7 ; 5c+5 | 4c+4
+ smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s
+ smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c
+ smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s
+ subs r5, r5, #0x1 ; i-- --
+ pkhbt r9, r9, r11, lsl #16 ; 13c | 12c
+ ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0
+ pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
+ uadd16 r7, r12, r9 ; 13c+13 | 12c+12
+ usub16 r7, r8, r7 ; c c
+ uadd16 r6, r6, r10 ; d d
+ uadd16 r10, r11, r14 ; a a
+ usub16 r8, r11, r14 ; b b
+ uadd16 r9, r10, r6 ; a+d a+d
+ usub16 r10, r10, r6 ; a-d a-d
+ uadd16 r6, r8, r7 ; b+c b+c
+ usub16 r7, r8, r7 ; b-c b-c
+ str r6, [r1, r2] ; o5 | o4
+ add r6, r2, r2 ; pitch * 2 p2
+ str r7, [r1, r6] ; o9 | o8
+ add r6, r6, r2 ; pitch * 3 p3
+ str r10, [r1, r6] ; o13 | o12
+ str r9, [r1], #0x4 ; o1 | o0 ++
+ bne loop1_dual_1 ;
+ mov r5, #0x2 ; i=2 i
+ sub r0, r1, #8 ; reset input/output i/o
+loop2_dual_2
+ ldr r6, [r0, r2] ; i5 | i4 5|4
+ ldr r1, [r0] ; i1 | i0 1|0
+ ldr r12, [r0, #0x4] ; i3 | i2 3|2
+ add r14, r2, #0x4 ; pitch + 2 p+2
+ ldr r14, [r0, r14] ; i7 | i6 7|6
+ smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
+ smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c
+ smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
+ smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s
+ pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4
+ pkhbt r7, r9, r7, lsl #16 ; 1c | 5c
+ pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1
+ pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5
+ uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2
+ pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6
+ uadd16 r10, r11, r9 ; a a
+ usub16 r9, r11, r9 ; b b
+ pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7
+ subs r5, r5, #0x1 ; i-- --
+ smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c
+ smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s
+ smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c
+ smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s
+
+ pkhbt r7, r12, r7, lsl #16 ; 3c | 7c
+ pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1
+ uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2
+ usub16 r12, r8, r6 ; c (o1 | o5) c
+ uadd16 r6, r11, r1 ; d (o3 | o7) d
+ uadd16 r7, r10, r6 ; a+d a+d
+ mov r8, #0x4 ; set up 4's 4
+ orr r8, r8, #0x40000 ; 4|4
+ usub16 r6, r10, r6 ; a-d a-d
+ uadd16 r6, r6, r8 ; a-d+4 3|7
+ uadd16 r7, r7, r8 ; a+d+4 0|4
+ uadd16 r10, r9, r12 ; b+c b+c
+ usub16 r1, r9, r12 ; b-c b-c
+ uadd16 r10, r10, r8 ; b+c+4 1|5
+ uadd16 r1, r1, r8 ; b-c+4 2|6
+ mov r8, r10, asr #19 ; o1 >> 3
+ strh r8, [r0, #2] ; o1
+ mov r8, r1, asr #19 ; o2 >> 3
+ strh r8, [r0, #4] ; o2
+ mov r8, r6, asr #19 ; o3 >> 3
+ strh r8, [r0, #6] ; o3
+ mov r8, r7, asr #19 ; o0 >> 3
+ strh r8, [r0], r2 ; o0 +p
+ sxth r10, r10 ;
+ mov r8, r10, asr #3 ; o5 >> 3
+ strh r8, [r0, #2] ; o5
+ sxth r1, r1 ;
+ mov r8, r1, asr #3 ; o6 >> 3
+ strh r8, [r0, #4] ; o6
+ sxth r6, r6 ;
+ mov r8, r6, asr #3 ; o7 >> 3
+ strh r8, [r0, #6] ; o7
+ sxth r7, r7 ;
+ mov r8, r7, asr #3 ; o4 >> 3
+ strh r8, [r0], r2 ; o4 +p
+;;;;; subs r5, r5, #0x1 ; i-- --
+ bne loop2_dual_2 ;
+ ;
+
+;vpx_memset
+ ldr r0, [sp]
+ add sp, sp, #4
+
+ mov r12, #0
+ str r12, [r0]
+ str r12, [r0, #4]
+ str r12, [r0, #8]
+ str r12, [r0, #12]
+ str r12, [r0, #16]
+ str r12, [r0, #20]
+ str r12, [r0, #24]
+ str r12, [r0, #28]
+
+ ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
+
+ ENDP ;|vp8_dequant_idct_v6|
+
+ END
diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm
new file mode 100644
index 0000000..95e3859
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequantize_v6.asm
@@ -0,0 +1,68 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dequantize_b_loop_v6|
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+;-------------------------------
+;void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
+; r0 short *Q,
+; r1 short *DQC
+; r2 short *DQ
+|vp8_dequantize_b_loop_v6| PROC
+ stmdb sp!, {r4-r9, lr}
+
+ ldr r3, [r0] ;load Q
+ ldr r4, [r1] ;load DQC
+ ldr r5, [r0, #4]
+ ldr r6, [r1, #4]
+
+ mov r12, #2 ;loop counter
+
+dequant_loop
+ smulbb r7, r3, r4 ;multiply
+ smultt r8, r3, r4
+ smulbb r9, r5, r6
+ smultt lr, r5, r6
+
+ ldr r3, [r0, #8]
+ ldr r4, [r1, #8]
+ ldr r5, [r0, #12]
+ ldr r6, [r1, #12]
+
+ strh r7, [r2], #2 ;store result
+ smulbb r7, r3, r4 ;multiply
+ strh r8, [r2], #2
+ smultt r8, r3, r4
+ strh r9, [r2], #2
+ smulbb r9, r5, r6
+ strh lr, [r2], #2
+ smultt lr, r5, r6
+
+ subs r12, r12, #1
+
+ add r0, r0, #16
+ add r1, r1, #16
+
+ ldrne r3, [r0]
+ strh r7, [r2], #2 ;store result
+ ldrne r4, [r1]
+ strh r8, [r2], #2
+ ldrne r5, [r0, #4]
+ strh r9, [r2], #2
+ ldrne r6, [r1, #4]
+ strh lr, [r2], #2
+
+ bne dequant_loop
+
+ ldmia sp!, {r4-r9, pc}
+ ENDP ;|vp8_dequantize_b_loop_v6|
+
+ END
diff --git a/vp8/decoder/arm/dboolhuff_arm.h b/vp8/decoder/arm/dboolhuff_arm.h
new file mode 100644
index 0000000..495004f
--- /dev/null
+++ b/vp8/decoder/arm/dboolhuff_arm.h
@@ -0,0 +1,49 @@
+#ifndef DBOOLHUFF_ARM_H
+#define DBOOLHUFF_ARM_H
+
+/* JLK
+ * There are currently no arm-optimized versions of
+ * these functions. As they are implemented, they
+ * can be uncommented below and added to
+ * arm/dsystemdependent.c
+ *
+ * The existing asm code is likely so different as
+ * to be useless. However, its been left (for now)
+ * for reference.
+ */
+/*
+#if HAVE_ARMV6
+#undef vp8_dbool_start
+#define vp8_dbool_start vp8dx_start_decode_v6
+
+#undef vp8_dbool_stop
+#define vp8_dbool_stop vp8dx_stop_decode_v6
+
+#undef vp8_dbool_fill
+#define vp8_dbool_fill vp8_bool_decoder_fill_v6
+
+#undef vp8_dbool_debool
+#define vp8_dbool_debool vp8_decode_bool_v6
+
+#undef vp8_dbool_devalue
+#define vp8_dbool_devalue vp8_decode_value_v6
+#endif // HAVE_ARMV6
+
+#if HAVE_ARMV7
+#undef vp8_dbool_start
+#define vp8_dbool_start vp8dx_start_decode_neon
+
+#undef vp8_dbool_stop
+#define vp8_dbool_stop vp8dx_stop_decode_neon
+
+#undef vp8_dbool_fill
+#define vp8_dbool_fill vp8_bool_decoder_fill_neon
+
+#undef vp8_dbool_debool
+#define vp8_dbool_debool vp8_decode_bool_neon
+
+#undef vp8_dbool_devalue
+#define vp8_dbool_devalue vp8_decode_value_neon
+#endif // HAVE_ARMV7
+*/
+#endif // DBOOLHUFF_ARM_H
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
new file mode 100644
index 0000000..54006a9
--- /dev/null
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "dequantize.h"
+#include "predictdc.h"
+#include "idct.h"
+#include "vpx_mem/vpx_mem.h"
+
+#if HAVE_ARMV7
+extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);
+#endif
+
+#if HAVE_ARMV6
+extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
+#endif
+
+#if HAVE_ARMV7
+
+void vp8_dequantize_b_neon(BLOCKD *d)
+{
+ int i;
+ short *DQ = d->dqcoeff;
+ short *Q = d->qcoeff;
+ short *DQC = &d->dequant[0][0];
+
+ vp8_dequantize_b_loop_neon(Q, DQC, DQ);
+}
+#endif
+
+#if HAVE_ARMV6
+void vp8_dequantize_b_v6(BLOCKD *d)
+{
+ int i;
+ short *DQ = d->dqcoeff;
+ short *Q = d->qcoeff;
+ short *DQC = &d->dequant[0][0];
+
+ vp8_dequantize_b_loop_v6(Q, DQC, DQ);
+}
+#endif
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
new file mode 100644
index 0000000..c8a61a4
--- /dev/null
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#ifndef DEQUANTIZE_ARM_H
+#define DEQUANTIZE_ARM_H
+
+#if HAVE_ARMV6
+extern prototype_dequant_block(vp8_dequantize_b_v6);
+extern prototype_dequant_idct(vp8_dequant_idct_v6);
+extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_v6);
+
+#undef vp8_dequant_block
+#define vp8_dequant_block vp8_dequantize_b_v6
+
+#undef vp8_dequant_idct
+#define vp8_dequant_idct vp8_dequant_idct_v6
+
+#undef vp8_dequant_idct_dc
+#define vp8_dequant_idct_dc vp8_dequant_dc_idct_v6
+#endif
+
+#if HAVE_ARMV7
+extern prototype_dequant_block(vp8_dequantize_b_neon);
+extern prototype_dequant_idct(vp8_dequant_idct_neon);
+extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_neon);
+
+#undef vp8_dequant_block
+#define vp8_dequant_block vp8_dequantize_b_neon
+
+#undef vp8_dequant_idct
+#define vp8_dequant_idct vp8_dequant_idct_neon
+
+#undef vp8_dequant_idct_dc
+#define vp8_dequant_idct_dc vp8_dequant_dc_idct_neon
+#endif
+
+#endif
diff --git a/vp8/decoder/arm/detokenizearm_sjl.c b/vp8/decoder/arm/detokenizearm_sjl.c
new file mode 100644
index 0000000..c714452
--- /dev/null
+++ b/vp8/decoder/arm/detokenizearm_sjl.c
@@ -0,0 +1,730 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "type_aliases.h"
+#include "blockd.h"
+#include "onyxd_int.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
+
+#define BR_COUNT 8
+#define BOOL_DATA UINT8
+
+#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
+//ALIGN16 UINT16 onyx_coef_bands_x[16] = { 0, 1*OCB_X, 2*OCB_X, 3*OCB_X, 6*OCB_X, 4*OCB_X, 5*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 7*OCB_X};
+DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
+
+#define EOB_CONTEXT_NODE 0
+#define ZERO_CONTEXT_NODE 1
+#define ONE_CONTEXT_NODE 2
+#define LOW_VAL_CONTEXT_NODE 3
+#define TWO_CONTEXT_NODE 4
+#define THREE_CONTEXT_NODE 5
+#define HIGH_LOW_CONTEXT_NODE 6
+#define CAT_ONE_CONTEXT_NODE 7
+#define CAT_THREEFOUR_CONTEXT_NODE 8
+#define CAT_THREE_CONTEXT_NODE 9
+#define CAT_FIVE_CONTEXT_NODE 10
+
+
+
+
+DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
+{
+ { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN
+ { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN
+ { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN
+ { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN
+ { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN
+ { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1
+ { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2
+ { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3
+ { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4
+ { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5
+ { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, //DCT_VAL_CATEGORY6
+ { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN
+};
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0 //end of vp8_block2above
+};
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
+{
+ ENTROPY_CONTEXT **const A = x->above_context;
+ ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+
+ ENTROPY_CONTEXT *a;
+ ENTROPY_CONTEXT *l;
+ int i;
+
+ for (i = 0; i < 24; i++)
+ {
+
+ a = A[ vp8_block2context[i] ] + vp8_block2above[i];
+ l = L[ vp8_block2context[i] ] + vp8_block2left[i];
+
+ *a = *l = 0;
+ }
+
+ if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ {
+ a = A[Y2CONTEXT] + vp8_block2above[24];
+ l = L[Y2CONTEXT] + vp8_block2left[24];
+ *a = *l = 0;
+ }
+
+
+}
+
+#define ONYXBLOCK2CONTEXT_OFFSET 0
+#define ONYXBLOCK2LEFT_OFFSET 25
+#define ONYXBLOCK2ABOVE_OFFSET 50
+
+DECLARE_ALIGNED(16, const static unsigned char, norm[128]) =
+{
+ 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+void init_detokenizer(VP8D_COMP *dx)
+{
+ const VP8_COMMON *const oc = & dx->common;
+ MACROBLOCKD *x = & dx->mb;
+
+ dx->detoken.norm_ptr = (unsigned char *)norm;
+ dx->detoken.vp8_coef_tree_ptr = (vp8_tree_index *)vp8_coef_tree;
+ dx->detoken.ptr_onyxblock2context_leftabove = (UINT8 *)vp8_block2context_leftabove;
+ dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x;
+ dx->detoken.scan = (int *)vp8_default_zig_zag1d;
+ dx->detoken.teb_base_ptr = (TOKENEXTRABITS *)vp8d_token_extra_bits2;
+
+ dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
+
+
+ dx->detoken.coef_probs[0] = (unsigned char *)(oc->fc.coef_probs [0] [ 0 ] [0]);
+ dx->detoken.coef_probs[1] = (unsigned char *)(oc->fc.coef_probs [1] [ 0 ] [0]);
+ dx->detoken.coef_probs[2] = (unsigned char *)(oc->fc.coef_probs [2] [ 0 ] [0]);
+ dx->detoken.coef_probs[3] = (unsigned char *)(oc->fc.coef_probs [3] [ 0 ] [0]);
+
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+
+//shift = norm[range]; \
+// shift = norm_ptr[range]; \
+
+#define NORMALIZE \
+ /*if(range < 0x80)*/ \
+ { \
+ shift = detoken->norm_ptr[range]; \
+ range <<= shift; \
+ value <<= shift; \
+ count -= shift; \
+ if(count <= 0) \
+ { \
+ count += BR_COUNT ; \
+ value |= (*bufptr) << (BR_COUNT-count); \
+ bufptr++; \
+ } \
+ }
+#if 1
+#define DECODE_AND_APPLYSIGN(value_to_sign) \
+ split = (range + 1) >> 1; \
+ if ( (value >> 24) < split ) \
+ { \
+ range = split; \
+ v= value_to_sign; \
+ } \
+ else \
+ { \
+ range = range-split; \
+ value = value-(split<<24); \
+ v = -value_to_sign; \
+ } \
+ range +=range; \
+ value +=value; \
+ if (!--count) \
+ { \
+ count = BR_COUNT; \
+ value |= *bufptr; \
+ bufptr++; \
+ }
+
+#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
+ { \
+ split = 1 + ((( probability*(range-1) ) )>> 8); \
+ if ( (value >> 24) < split ) \
+ { \
+ range = split; \
+ NORMALIZE \
+ goto branch; \
+ } \
+ value -= (split<<24); \
+ range = range - split; \
+ NORMALIZE \
+ }
+
+#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
+ { \
+ split = 1 + ((( probability*(range-1) ) ) >> 8); \
+ if ( (value >> 24) < split ) \
+ { \
+ range = split; \
+ NORMALIZE \
+ Prob = coef_probs; \
+ ++c; \
+ Prob += vp8_coef_bands_x[c]; \
+ goto branch; \
+ } \
+ value -= (split<<24); \
+ range = range - split; \
+ NORMALIZE \
+ }
+
+#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
+ DECODE_AND_APPLYSIGN(val) \
+ Prob = coef_probs + (ENTROPY_NODES*2); \
+ if(c < 15){\
+ qcoeff_ptr [ scan[c] ] = (INT16) v; \
+ ++c; \
+ goto DO_WHILE; }\
+ qcoeff_ptr [ scan[15] ] = (INT16) v; \
+ goto BLOCK_FINISHED;
+
+
+#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
+ split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
+ if(value >= (split<<24))\
+ {\
+ range = range-split;\
+ value = value-(split<<24);\
+ val += ((UINT16)1<<bits_count);\
+ }\
+ else\
+ {\
+ range = split;\
+ }\
+ NORMALIZE
+#endif
+
+#if 0
+int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
+{
+ ENTROPY_CONTEXT **const A = x->above_context;
+ ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+ const VP8_COMMON *const oc = & dx->common;
+
+ BOOL_DECODER *bc = x->current_bc;
+
+ ENTROPY_CONTEXT *a;
+ ENTROPY_CONTEXT *l;
+ int i;
+
+ int eobtotal = 0;
+
+ register int count;
+
+ BOOL_DATA *bufptr;
+ register unsigned int range;
+ register unsigned int value;
+ const int *scan;
+ register unsigned int shift;
+ UINT32 split;
+ INT16 *qcoeff_ptr;
+
+ UINT8 *coef_probs;
+ int type;
+ int stop;
+ INT16 val, bits_count;
+ INT16 c;
+ INT16 t;
+ INT16 v;
+ vp8_prob *Prob;
+
+ //int *scan;
+ type = 3;
+ i = 0;
+ stop = 16;
+
+ if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ {
+ i = 24;
+ stop = 24;
+ type = 1;
+ qcoeff_ptr = &x->qcoeff[24*16];
+ scan = vp8_default_zig_zag1d;
+ eobtotal -= 16;
+ }
+ else
+ {
+ scan = vp8_default_zig_zag1d;
+ qcoeff_ptr = &x->qcoeff[0];
+ }
+
+ count = bc->count;
+ range = bc->range;
+ value = bc->value;
+ bufptr = &bc->buffer[bc->pos];
+
+
+ coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
+
+BLOCK_LOOP:
+ a = A[ vp8_block2context[i] ] + vp8_block2above[i];
+ l = L[ vp8_block2context[i] ] + vp8_block2left[i];
+ c = (INT16)(!type);
+
+ VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
+ Prob = coef_probs;
+ Prob += t * ENTROPY_NODES;
+
+DO_WHILE:
+ Prob += vp8_coef_bands_x[c];
+ DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
+
+CHECK_0_:
+ DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_);
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val;
+ bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length;
+
+ do
+ {
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count);
+ bits_count -- ;
+ }
+ while (bits_count >= 0);
+
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+CAT_FIVE_CONTEXT_NODE_0_:
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val;
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+CAT_THREEFOUR_CONTEXT_NODE_0_:
+ DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_);
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val;
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+CAT_THREE_CONTEXT_NODE_0_:
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val;
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+HIGH_LOW_CONTEXT_NODE_0_:
+ DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_);
+
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val;
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+CAT_ONE_CONTEXT_NODE_0_:
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val;
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+LOW_VAL_CONTEXT_NODE_0_:
+ DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
+
+THREE_CONTEXT_NODE_0_:
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
+
+TWO_CONTEXT_NODE_0_:
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
+
+ONE_CONTEXT_NODE_0_:
+ DECODE_AND_APPLYSIGN(1);
+ Prob = coef_probs + ENTROPY_NODES;
+
+ if (c < 15)
+ {
+ qcoeff_ptr [ scan[c] ] = (INT16) v;
+ ++c;
+ goto DO_WHILE;
+ }
+
+ qcoeff_ptr [ scan[15] ] = (INT16) v;
+BLOCK_FINISHED:
+ t = ((x->Block[i].eob = c) != !type); // any nonzero data?
+ eobtotal += x->Block[i].eob;
+ *a = *l = t;
+ qcoeff_ptr += 16;
+
+ i++;
+
+ if (i < stop)
+ goto BLOCK_LOOP;
+
+ if (i == 25)
+ {
+ scan = vp8_default_zig_zag1d;//x->scan_order1d;
+ type = 0;
+ i = 0;
+ stop = 16;
+ coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
+ qcoeff_ptr = &x->qcoeff[0];
+ goto BLOCK_LOOP;
+ }
+
+ if (i == 16)
+ {
+ type = 2;
+ coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
+ stop = 24;
+ goto BLOCK_LOOP;
+ }
+
+ bc->count = count;
+ bc->value = value;
+ bc->range = range;
+ bc->pos = bufptr - bc->buffer;
+ return eobtotal;
+
+}
+//#endif
+#else
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+#if 0
+//uses relative offsets
+
+const vp8_tree_index vp8_coef_tree_x[ 22] = /* corresponding _CONTEXT_NODEs */
+{
+ -DCT_EOB_TOKEN, 1, /* 0 = EOB */
+ -ZERO_TOKEN, 1, /* 1 = ZERO */
+ -ONE_TOKEN, 1, /* 2 = ONE */
+ 2, 5, /* 3 = LOW_VAL */
+ -TWO_TOKEN, 1, /* 4 = TWO */
+ -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
+ 2, 3, /* 6 = HIGH_LOW */
+ -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
+ 2, 3, /* 8 = CAT_THREEFOUR */
+ -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
+ -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
+};
+#endif
+
+#define _SCALEDOWN 8 //16 //8
+
+int vp8_decode_mb_tokens_v5(DETOK *detoken, int type);
+
+int vp8_decode_mb_tokens_v5_c(DETOK *detoken, int type)
+{
+ BOOL_DECODER *bc = detoken->current_bc;
+
+ ENTROPY_CONTEXT *a;
+ ENTROPY_CONTEXT *l;
+ int i;
+
+ register int count;
+
+ BOOL_DATA *bufptr;
+ register unsigned int range;
+ register unsigned int value;
+ register unsigned int shift;
+ UINT32 split;
+ INT16 *qcoeff_ptr;
+
+ UINT8 *coef_probs;
+// int type;
+ int stop;
+ INT16 c;
+ INT16 t;
+ INT16 v;
+ vp8_prob *Prob;
+
+
+
+// type = 3;
+ i = 0;
+ stop = 16;
+ qcoeff_ptr = detoken->qcoeff_start_ptr;
+
+// if( detoken->mode != B_PRED && detoken->mode != SPLITMV)
+ if (type == 1)
+ {
+ i += 24;
+ stop += 8; //24;
+// type = 1;
+ qcoeff_ptr += 24 * 16;
+// eobtotal-=16;
+ }
+
+ count = bc->count;
+ range = bc->range;
+ value = bc->value;
+ bufptr = &bc->buffer[bc->pos];
+
+
+ coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
+
+BLOCK_LOOP:
+ a = detoken->A[ detoken->ptr_onyxblock2context_leftabove[i] ];
+ l = detoken->L[ detoken->ptr_onyxblock2context_leftabove[i] ];
+ c = !type;
+ a += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2ABOVE_OFFSET];
+ l += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2LEFT_OFFSET];
+
+ //#define ONYX_COMBINEENTROPYCONTEXTS( Dest, A, B) \
+ //Dest = ((A)!=0) + ((B)!=0);
+
+ VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
+
+ Prob = coef_probs;
+ Prob += t * ENTROPY_NODES;
+ t = 0;
+
+ do
+ {
+
+ {
+// onyx_tree_index * onyx_coef_tree_ptr = onyx_coef_tree_x;
+
+ Prob += detoken->ptr_onyx_coef_bands_x[c];
+
+ GET_TOKEN_START:
+
+ do
+ {
+ split = 1 + (((range - 1) * (Prob[t>>1])) >> 8);
+
+ if (value >> 24 >= split)
+ {
+ range = range - split;
+ value = value - (split << 24);
+ t += 1;
+
+ //used to eliminate else branch
+ split = range;
+ }
+
+ range = split;
+
+ t = detoken->vp8_coef_tree_ptr[ t ];
+
+ NORMALIZE
+
+ }
+ while (t > 0) ;
+ }
+ GET_TOKEN_STOP:
+
+ if (t == -DCT_EOB_TOKEN)
+ {
+ break;
+ }
+
+ v = -t;
+
+ if (v > FOUR_TOKEN)
+ {
+ INT16 bits_count;
+ TOKENEXTRABITS *teb_ptr;
+
+// teb_ptr = &onyxd_token_extra_bits2[t];
+// teb_ptr = &onyxd_token_extra_bits2[v];
+ teb_ptr = &detoken->teb_base_ptr[v];
+
+
+ v = teb_ptr->min_val;
+ bits_count = teb_ptr->Length;
+
+ do
+ {
+ split = 1 + (((range - 1) * teb_ptr->Probs[bits_count]) >> _SCALEDOWN);
+
+ if ((value >> 24) >= split)
+ {
+ range = range - split;
+ value = value - (split << 24);
+ v += ((UINT16)1 << bits_count);
+
+ //used to eliminate else branch
+ split = range;
+ }
+
+ range = split;
+
+ NORMALIZE
+
+ bits_count -- ;
+ }
+ while (bits_count >= 0);
+ }
+
+ Prob = coef_probs;
+
+ if (t)
+ {
+ split = 1 + (((range - 1) * vp8_prob_half) >> 8);
+
+ if ((value >> 24) >= split)
+ {
+ range = range - split;
+ value = value - (split << 24);
+ v = (v ^ -1) + 1; /* negate w/out conditionals */
+
+ //used to eliminate else branch
+ split = range;
+ }
+
+ range = split;
+
+ NORMALIZE
+ Prob += ENTROPY_NODES;
+
+ if (t < -ONE_TOKEN)
+ Prob += ENTROPY_NODES;
+
+ t = -2;
+ }
+
+ //if t is zero, we will skip the eob table check
+ t += 2;
+ qcoeff_ptr [detoken->scan [c] ] = (INT16) v;
+
+ }
+ while (++c < 16);
+
+ if (t != -DCT_EOB_TOKEN)
+ {
+ --c;
+ }
+
+ t = ((detoken->eob[i] = c) != !type); // any nonzero data?
+// eobtotal += detoken->eob[i];
+ *a = *l = t;
+ qcoeff_ptr += 16;
+
+ i++;
+
+ if (i < stop)
+ goto BLOCK_LOOP;
+
+ if (i == 25)
+ {
+ type = 0;
+ i = 0;
+ stop = 16;
+// coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
+ coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
+ qcoeff_ptr = detoken->qcoeff_start_ptr;
+ goto BLOCK_LOOP;
+ }
+
+ if (i == 16)
+ {
+ type = 2;
+// coef_probs =(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
+ coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
+ stop = 24;
+ goto BLOCK_LOOP;
+ }
+
+ bc->count = count;
+ bc->value = value;
+ bc->range = range;
+ bc->pos = bufptr - bc->buffer;
+ return 0;
+}
+//#if 0
+int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
+{
+// const ONYX_COMMON * const oc = & dx->common;
+ int eobtotal = 0;
+ int i, type;
+ /*
+ dx->detoken.norm_ptr = norm;
+ dx->detoken.onyx_coef_tree_ptr = onyx_coef_tree;
+ dx->detoken.ptr_onyxblock2context_leftabove = ONYXBLOCK2CONTEXT_LEFTABOVE;
+ dx->detoken.ptr_onyx_coef_bands_x = onyx_coef_bands_x;
+ dx->detoken.scan = default_zig_zag1d;
+ dx->detoken.teb_base_ptr = onyxd_token_extra_bits2;
+
+ dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
+
+ dx->detoken.A = x->above_context;
+ dx->detoken.L = x->left_context;
+
+ dx->detoken.coef_probs[0] = (unsigned char *)( oc->fc.coef_probs [0] [ 0 ] [0]);
+ dx->detoken.coef_probs[1] = (unsigned char *)( oc->fc.coef_probs [1] [ 0 ] [0]);
+ dx->detoken.coef_probs[2] = (unsigned char *)( oc->fc.coef_probs [2] [ 0 ] [0]);
+ dx->detoken.coef_probs[3] = (unsigned char *)( oc->fc.coef_probs [3] [ 0 ] [0]);
+ */
+
+ dx->detoken.current_bc = x->current_bc;
+ dx->detoken.A = x->above_context;
+ dx->detoken.L = x->left_context;
+
+ type = 3;
+
+ if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ {
+ type = 1;
+ eobtotal -= 16;
+ }
+
+ vp8_decode_mb_tokens_v5(&dx->detoken, type);
+
+ for (i = 0; i < 25; i++)
+ {
+ x->Block[i].eob = dx->detoken.eob[i];
+ eobtotal += dx->detoken.eob[i];
+ }
+
+ return eobtotal;
+}
+#endif
diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm
new file mode 100644
index 0000000..4d87ee5
--- /dev/null
+++ b/vp8/decoder/arm/detokenizearm_v6.asm
@@ -0,0 +1,364 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_decode_mb_tokens_v5|
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+
+ INCLUDE vpx_asm_offsets.asm
+
+l_qcoeff EQU 0
+l_i EQU 4
+l_type EQU 8
+l_stop EQU 12
+l_c EQU 16
+l_l_ptr EQU 20
+l_a_ptr EQU 24
+l_bc EQU 28
+l_coef_ptr EQU 32
+l_stacksize EQU 64
+
+
+;; constant offsets -- these should be created at build time
+c_onyxblock2left_offset EQU 25
+c_onyxblock2above_offset EQU 50
+c_entropy_nodes EQU 11
+c_dct_eob_token EQU 11
+
+|vp8_decode_mb_tokens_v5| PROC
+ stmdb sp!, {r4 - r11, lr}
+ sub sp, sp, #l_stacksize
+ mov r7, r1
+ mov r9, r0 ;DETOK *detoken
+
+ ldr r1, [r9, #detok_current_bc]
+ ldr r0, [r9, #detok_qcoeff_start_ptr]
+ mov r11, #0
+ mov r3, #0x10
+
+ cmp r7, #1
+ addeq r11, r11, #24
+ addeq r3, r3, #8
+ addeq r0, r0, #3, 24
+
+ str r0, [sp, #l_qcoeff]
+ str r11, [sp, #l_i]
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+ str r1, [sp, #l_bc]
+
+ add lr, r9, r7, lsl #2
+
+ ldr r2, [r1, #bool_decoder_buffer]
+ ldr r3, [r1, #bool_decoder_pos]
+
+ ldr r10, [lr, #detok_coef_probs]
+ ldr r5, [r1, #bool_decoder_count]
+ ldr r6, [r1, #bool_decoder_range]
+ ldr r4, [r1, #bool_decoder_value]
+ add r8, r2, r3
+
+ str r10, [sp, #l_coef_ptr]
+
+
+ ;align 4
+BLOCK_LOOP
+ ldr r3, [r9, #detok_ptr_onyxblock2context_leftabove]
+ ldr r2, [r9, #DETOK_A]
+ ldr r1, [r9, #DETOK_L]
+ ldrb r12, [r3, +r11] ; detoken->ptr_onyxblock2context_leftabove[i]
+
+ cmp r7, #0 ; check type
+ moveq r7, #1
+ movne r7, #0
+
+ ldr r0, [r2, +r12, lsl #2] ; a
+ add r1, r1, r12, lsl #4
+ add r3, r3, r11
+
+ ldrb r2, [r3, #c_onyxblock2above_offset]
+ ldrb r3, [r3, #c_onyxblock2left_offset]
+ mov lr, #c_entropy_nodes
+;; ;++
+
+ ldr r2, [r0, +r2, lsl #2]!
+ add r3, r1, r3, lsl #2
+ str r3, [sp, #l_l_ptr]
+ ldr r3, [r3]
+
+ cmp r2, #0
+ movne r2, #1
+ cmp r3, #0
+ addne r2, r2, #1
+
+ str r0, [sp, #l_a_ptr]
+ smlabb r0, r2, lr, r10
+ mov r1, #0 ; t = 0
+ str r7, [sp, #l_c]
+
+ ;align 4
+COEFF_LOOP
+ ldr r3, [r9, #detok_ptr_onyx_coef_bands_x]
+ ldr lr, [r9, #detok_onyx_coef_tree_ptr]
+
+;;the following two lines are used if onyx_coef_bands_x is UINT16
+;; add r3, r3, r7, lsl #1
+;; ldrh r3, [r3]
+
+;;the following line is used if onyx_coef_bands_x is UINT8
+ ldrb r3, [r7, +r3]
+
+
+;; ;++
+;; pld [r8]
+ ;++
+ add r0, r0, r3
+
+ ;align 4
+get_token_loop
+ ldrb r2, [r0, +r1, asr #1]
+ mov r3, r6, lsl #8
+ sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
+ mov r10, #1
+
+ smlawb r2, r3, r2, r10
+ ldrb r12, [r8] ;load cx data byte in stall slot
+ ;++
+
+ subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
+ addhs r1, r1, #1 ;t += 1
+ movhs r4, r3 ;update value
+ subhs r2, r6, r2 ;range = range - split
+ movlo r6, r2
+
+;;; ldrsbhs r1, [r1, +lr]
+ ldrsb r1, [r1, +lr]
+
+
+;; use branch for short pipelines ???
+;; cmp r2, #0x80
+;; bcs |$LN22@decode_mb_to|
+
+ clz r3, r2
+ sub r3, r3, #24
+ subs r5, r5, r3
+ mov r6, r2, lsl r3
+ mov r4, r4, lsl r3
+
+;; use branch for short pipelines ???
+;; bgt |$LN22@decode_mb_to|
+
+ addle r5, r5, #8
+ rsble r3, r5, #8
+ addle r8, r8, #1
+ orrle r4, r4, r12, lsl r3
+
+;;|$LN22@decode_mb_to|
+
+ cmp r1, #0
+ bgt get_token_loop
+
+ cmn r1, #c_dct_eob_token ;if(t == -DCT_EOB_TOKEN)
+ beq END_OF_BLOCK
+
+ rsb lr, r1, #0 ;v = -t;
+
+ cmp lr, #4 ;if(v > FOUR_TOKEN)
+ ble SKIP_EXTRABITS
+
+ ldr r3, [r9, #detok_teb_base_ptr]
+ mov r11, #1
+ add r7, r3, lr, lsl #4
+
+ ldrsh lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val
+ ldrsh r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length
+
+extrabits_loop
+ add r3, r0, r7
+
+ ldrb r2, [r3, #4]
+ mov r3, r6, lsl #8
+ sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
+ mov r10, #1
+
+ smlawb r2, r3, r2, r10
+ ldrb r12, [r8]
+ ;++
+
+ subs r10, r4, r2, lsl #24 ;x = value-(split<<24)
+ movhs r4, r10 ;update value
+ subhs r2, r6, r2 ;range = range - split
+ addhs lr, lr, r11, lsl r0 ;v += ((UINT16)1<<bits_count)
+ movlo r6, r2 ;range = split
+
+
+;; use branch for short pipelines ???
+;; cmp r2, #0x80
+;; bcs |$LN10@decode_mb_to|
+
+ clz r3, r2
+ sub r3, r3, #24
+ subs r5, r5, r3
+ mov r6, r2, lsl r3 ;range
+ mov r4, r4, lsl r3 ;value
+
+ addle r5, r5, #8
+ addle r8, r8, #1
+ rsble r3, r5, #8
+ orrle r4, r4, r12, lsl r3
+
+;;|$LN10@decode_mb_to|
+ subs r0, r0, #1
+ bpl extrabits_loop
+
+
+SKIP_EXTRABITS
+ ldr r11, [sp, #l_qcoeff]
+ ldr r0, [sp, #l_coef_ptr]
+
+ cmp r1, #0 ;check for nonzero token
+ beq SKIP_EOB_CHECK ;if t is zero, we will skip the eob table chec
+
+ sub r3, r6, #1 ;range - 1
+ ;++
+ mov r3, r3, lsl #7 ; *= onyx_prob_half (128)
+ ;++
+ mov r3, r3, lsr #8
+ add r2, r3, #1 ;split
+
+ subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
+ movhs r4, r3 ;update value
+ subhs r2, r6, r2 ;range = range - split
+ mvnhs r3, lr
+ addhs lr, r3, #1 ;v = (v ^ -1) + 1
+ movlo r6, r2 ;range = split
+
+;; use branch for short pipelines ???
+;; cmp r2, #0x80
+;; bcs |$LN6@decode_mb_to|
+
+ clz r3, r2
+ sub r3, r3, #24
+ subs r5, r5, r3
+ mov r6, r2, lsl r3
+ mov r4, r4, lsl r3
+ ldrleb r2, [r8], #1
+ addle r5, r5, #8
+ rsble r3, r5, #8
+ orrle r4, r4, r2, lsl r3
+
+;;|$LN6@decode_mb_to|
+ add r0, r0, #0xB
+
+ cmn r1, #1
+
+ addlt r0, r0, #0xB
+
+ mvn r1, #1
+
+SKIP_EOB_CHECK
+ ldr r7, [sp, #l_c]
+ ldr r3, [r9, #detok_scan]
+ add r1, r1, #2
+ cmp r7, #(0x10 - 1) ;assume one less for now.... increment below
+
+ ldr r3, [r3, +r7, lsl #2]
+ add r7, r7, #1
+ add r3, r11, r3, lsl #1
+
+ str r7, [sp, #l_c]
+ strh lr, [r3]
+
+ blt COEFF_LOOP
+
+ sub r7, r7, #1 ;if(t != -DCT_EOB_TOKEN) --c
+
+END_OF_BLOCK
+ ldr r3, [sp, #l_type]
+ ldr r10, [sp, #l_coef_ptr]
+ ldr r0, [sp, #l_qcoeff]
+ ldr r11, [sp, #l_i]
+ ldr r12, [sp, #l_stop]
+
+ cmp r3, #0
+ moveq r1, #1
+ movne r1, #0
+ add r3, r11, r9
+
+ cmp r7, r1
+ strb r7, [r3, #detok_eob]
+
+ ldr r7, [sp, #l_l_ptr]
+ ldr r2, [sp, #l_a_ptr]
+ movne r3, #1
+ moveq r3, #0
+
+ add r0, r0, #0x20
+ add r11, r11, #1
+ str r3, [r7]
+ str r3, [r2]
+ str r0, [sp, #l_qcoeff]
+ str r11, [sp, #l_i]
+
+ cmp r11, r12 ;i >= stop ?
+ ldr r7, [sp, #l_type]
+ mov lr, #0xB
+
+ blt BLOCK_LOOP
+
+ cmp r11, #0x19
+ bne ln2_decode_mb_to
+
+ ldr r12, [r9, #detok_qcoeff_start_ptr]
+ ldr r10, [r9, #detok_coef_probs]
+ mov r7, #0
+ mov r3, #0x10
+ str r12, [sp, #l_qcoeff]
+ str r7, [sp, #l_i]
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+
+ str r10, [sp, #l_coef_ptr]
+
+ b BLOCK_LOOP
+
+ln2_decode_mb_to
+ cmp r11, #0x10
+ bne ln1_decode_mb_to
+
+ ldr r10, [r9, #0x30]
+
+ mov r7, #2
+ mov r3, #0x18
+
+ str r7, [sp, #l_type]
+ str r3, [sp, #l_stop]
+
+ str r10, [sp, #l_coef_ptr]
+ b BLOCK_LOOP
+
+ln1_decode_mb_to
+ ldr r2, [sp, #l_bc]
+ mov r0, #0
+ nop
+
+ ldr r3, [r2, #bool_decoder_buffer]
+ str r5, [r2, #bool_decoder_count]
+ str r4, [r2, #bool_decoder_value]
+ sub r3, r8, r3
+ str r3, [r2, #bool_decoder_pos]
+ str r6, [r2, #bool_decoder_range]
+
+ add sp, sp, #l_stacksize
+ ldmia sp!, {r4 - r11, pc}
+
+ ENDP ; |vp8_decode_mb_tokens_v5|
+
+ END
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
new file mode 100644
index 0000000..455c83a
--- /dev/null
+++ b/vp8/decoder/arm/dsystemdependent.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "blockd.h"
+#include "pragmas.h"
+#include "postproc.h"
+#include "dboolhuff.h"
+#include "dequantize.h"
+#include "onyxd_int.h"
+
+void vp8_dmachine_specific_config(VP8D_COMP *pbi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+ pbi->mb.rtcd = &pbi->common.rtcd;
+#if HAVE_ARMV7
+ pbi->dequant.block = vp8_dequantize_b_neon;
+ pbi->dequant.idct = vp8_dequant_idct_neon;
+ pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon;
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.stop = vp8dx_stop_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+ pbi->dboolhuff.debool = vp8dx_decode_bool_c;
+ pbi->dboolhuff.devalue = vp8dx_decode_value_c;
+
+#elif HAVE_ARMV6
+ pbi->dequant.block = vp8_dequantize_b_v6;
+ pbi->dequant.idct = vp8_dequant_idct_v6;
+ pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6;
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.stop = vp8dx_stop_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+ pbi->dboolhuff.debool = vp8dx_decode_bool_c;
+ pbi->dboolhuff.devalue = vp8dx_decode_value_c;
+#endif
+#endif
+}
diff --git a/vp8/decoder/arm/neon/dboolhuff_neon.asm b/vp8/decoder/arm/neon/dboolhuff_neon.asm
new file mode 100644
index 0000000..7ec62a3
--- /dev/null
+++ b/vp8/decoder/arm/neon/dboolhuff_neon.asm
@@ -0,0 +1,159 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_decode_value_neon|
+ EXPORT |vp8dx_start_decode_neon|
+ EXPORT |vp8dx_stop_decode_neon|
+ EXPORT |vp8dx_decode_bool_neon|
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ INCLUDE vpx_asm_offsets.asm
+
+ AREA |.text|, CODE, READONLY ; name this block of code
+
+; int z = 0;
+; int bit;
+; for ( bit=bits-1; bit>=0; bit-- )
+; {
+; z |= (vp8dx_decode_bool(br, 0x80)<<bit);
+; }
+; return z;
+
+;int vp8_decode_value_neon ( BOOL_DECODER *br, int bits )
+|vp8_decode_value_neon| PROC
+ stmdb sp!, {r4 - r6, lr}
+ mov r4, r0
+ mov r5, r1
+ mov r6, #0
+
+ subs r5, r5, #1
+ bmi decode_value_exit
+
+decode_value_loop
+ mov r1, #0x80
+ mov r0, r4
+ bl vp8dx_decode_bool_neon_internal ; needed for conversion to s file
+ orr r6, r6, r0, lsl r5
+ subs r5, r5, #1
+ bpl decode_value_loop
+
+decode_value_exit
+ mov r0, r6
+ ldmia sp!, {r4 - r6, pc}
+ ENDP ; |vp8_decode_value_neon|
+
+
+;void vp8dx_start_decode_neon ( BOOL_DECODER *br, unsigned char *source )
+|vp8dx_start_decode_neon| PROC
+ stmdb sp!, {r4 - r5, lr}
+ mov r2, #0
+ mov r3, #255
+
+ str r2, [r0, #bool_decoder_lowvalue]
+ str r3, [r0, #bool_decoder_range]
+ str r1, [r0, #bool_decoder_buffer]
+
+ mov r3, #8
+ mov r2, #4
+ str r3, [r0, #bool_decoder_count]
+ str r2, [r0, #bool_decoder_pos]
+
+ ldrb r2, [r1, #3]
+ ldrb r3, [r1, #2]
+ ldrb r4, [r1, #1]
+ ldrb r5, [r1]
+
+ orr r1, r2, r3, lsl #8
+ orr r1, r1, r4, lsl #16
+ orr r1, r1, r5, lsl #24
+
+ str r1, [r0, #bool_decoder_value]
+
+ ldmia sp!, {r4 - r5, pc}
+ ENDP ; |vp8dx_start_decode_neon|
+
+
+;void vp8dx_stop_decode_neon ( BOOL_DECODER *bc );
+|vp8dx_stop_decode_neon| PROC
+ mov pc, lr
+ ENDP ; |vp8dx_stop_decode_neon|
+
+
+; bigsplit RN r1
+; buffer_v RN r1
+; count_v RN r4
+; range_v RN r2
+; value_v RN r3
+; pos_v RN r5
+; split RN r6
+; bit RN lr
+;int vp8dx_decode_bool_neon ( BOOL_DECODER *br, int probability )
+|vp8dx_decode_bool_neon| PROC
+vp8dx_decode_bool_neon_internal
+;LDRD and STRD doubleword data transfers must be eight-byte aligned. Use ALIGN 8
+;before memory allocation
+ stmdb sp!, {r4 - r5, lr}
+
+ ldr r2, [r0, #bool_decoder_range] ;load range (r2), value(r3)
+ ldr r3, [r0, #bool_decoder_value]
+ ;ldrd r2, r3, [r0, #bool_decoder_range] ;ldrd costs 2 cycles
+ ;
+
+ mov r4, r2, lsl #8
+ sub r4, r4, #256
+ mov r12, #1
+
+ smlawb r4, r4, r1, r12 ;split = 1 + (((range-1) * probability) >> 8)
+
+ mov lr, r0
+ mov r0, #0 ;bit = 0
+ ;
+ subs r5, r3, r4, lsl #24
+
+ subhs r2, r2, r4 ;range = br->range-split
+ movlo r2, r4 ;range = split
+ movhs r0, #1 ;bit = 1
+ movhs r3, r5 ;value = value-bigsplit
+
+ cmp r2, #0x80
+ blt range_less_0x80
+ strd r2, r3, [lr, #bool_decoder_range] ;store result
+
+ ldmia sp!, {r4 - r5, pc}
+
+range_less_0x80
+
+ ldrd r4, r5, [lr, #bool_decoder_count] ;load count, pos, buffer
+ ldr r1, [lr, #bool_decoder_buffer]
+
+ clz r12, r2
+ add r1, r1, r5
+
+ sub r12, r12, #24
+ subs r4, r4, r12 ;count -= shift
+ mov r2, r2, lsl r12 ;range <<= shift
+ mov r3, r3, lsl r12 ;value <<= shift
+ addle r4, r4, #8 ;count += 8
+ ldrleb r12, [r1], #1 ;br->buffer[br->pos]
+
+ rsble r1, r4, #8 ;-count
+ addle r5, r5, #1 ;br->pos++
+ orrle r3, r3, r12, lsl r1 ;value |= (br->buffer[br->pos]) << (-count)
+
+ strd r2, r3, [lr, #bool_decoder_range] ;store result
+ strd r4, r5, [lr, #bool_decoder_count]
+
+ ldmia sp!, {r4 - r5, pc}
+ ENDP ; |vp8dx_decode_bool_neon|
+
+ END
diff --git a/vp8/decoder/arm/neon/dequantdcidct_neon.asm b/vp8/decoder/arm/neon/dequantdcidct_neon.asm
new file mode 100644
index 0000000..3392f2c
--- /dev/null
+++ b/vp8/decoder/arm/neon/dequantdcidct_neon.asm
@@ -0,0 +1,133 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dequant_dc_idct_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc);
+; r0 short *input,
+; r1 short *dq,
+; r2 short *output,
+; r3 int pitch,
+; (stack) int Dc
+|vp8_dequant_dc_idct_neon| PROC
+ vld1.16 {q3, q4}, [r0]
+ vld1.16 {q5, q6}, [r1]
+
+ ldr r1, [sp] ;load Dc from stack
+
+ ldr r12, _dcidct_coeff_
+
+ vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
+ vmul.i16 q2, q4, q6
+
+ vmov.16 d2[0], r1
+
+;|short_idct4x4llm_neon| PROC
+ vld1.16 {d0}, [r12]
+ vswp d3, d4 ;q2(vp[4] vp[12])
+
+ vqdmulh.s16 q3, q2, d0[2]
+ vqdmulh.s16 q4, q2, d0[0]
+
+ vqadd.s16 d12, d2, d3 ;a1
+ vqsub.s16 d13, d2, d3 ;b1
+
+ vshr.s16 q3, q3, #1
+ vshr.s16 q4, q4, #1
+
+ vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q4, q4, q2
+
+ ;d6 - c1:temp1
+ ;d7 - d1:temp2
+ ;d8 - d1:temp1
+ ;d9 - c1:temp2
+
+ vqsub.s16 d10, d6, d9 ;c1
+ vqadd.s16 d11, d7, d8 ;d1
+
+ vqadd.s16 d2, d12, d11
+ vqadd.s16 d3, d13, d10
+ vqsub.s16 d4, d13, d10
+ vqsub.s16 d5, d12, d11
+
+ vtrn.32 d2, d4
+ vtrn.32 d3, d5
+ vtrn.16 d2, d3
+ vtrn.16 d4, d5
+
+; memset(input, 0, 32) -- 32bytes
+ vmov.i16 q14, #0
+
+ vswp d3, d4
+ vqdmulh.s16 q3, q2, d0[2]
+ vqdmulh.s16 q4, q2, d0[0]
+
+ vqadd.s16 d12, d2, d3 ;a1
+ vqsub.s16 d13, d2, d3 ;b1
+
+ vmov q15, q14
+
+ vshr.s16 q3, q3, #1
+ vshr.s16 q4, q4, #1
+
+ vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q4, q4, q2
+
+ vqsub.s16 d10, d6, d9 ;c1
+ vqadd.s16 d11, d7, d8 ;d1
+
+ vqadd.s16 d2, d12, d11
+ vqadd.s16 d3, d13, d10
+ vqsub.s16 d4, d13, d10
+ vqsub.s16 d5, d12, d11
+
+ vst1.16 {q14, q15}, [r0]
+
+ vrshr.s16 d2, d2, #3
+ vrshr.s16 d3, d3, #3
+ vrshr.s16 d4, d4, #3
+ vrshr.s16 d5, d5, #3
+
+ add r1, r2, r3
+ add r12, r1, r3
+ add r0, r12, r3
+
+ vtrn.32 d2, d4
+ vtrn.32 d3, d5
+ vtrn.16 d2, d3
+ vtrn.16 d4, d5
+
+ vst1.16 {d2}, [r2]
+ vst1.16 {d3}, [r1]
+ vst1.16 {d4}, [r12]
+ vst1.16 {d5}, [r0]
+
+ bx lr
+
+ ENDP
+
+;-----------------
+ AREA dcidct4x4_dat, DATA, READWRITE ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_dcidct_coeff_
+ DCD dcidct_coeff
+dcidct_coeff
+ DCD 0x4e7b4e7b, 0x8a8c8a8c
+
+;20091, 20091, 35468, 35468
+
+ END
diff --git a/vp8/decoder/arm/neon/dequantidct_neon.asm b/vp8/decoder/arm/neon/dequantidct_neon.asm
new file mode 100644
index 0000000..bba4d5d
--- /dev/null
+++ b/vp8/decoder/arm/neon/dequantidct_neon.asm
@@ -0,0 +1,128 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dequant_idct_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+;void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch);
+; r0 short *input,
+; r1 short *dq,
+; r2 short *output,
+; r3 int pitch,
+|vp8_dequant_idct_neon| PROC
+ vld1.16 {q3, q4}, [r0]
+ vld1.16 {q5, q6}, [r1]
+
+ ldr r12, _didct_coeff_
+
+ vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
+ vmul.i16 q2, q4, q6
+
+;|short_idct4x4llm_neon| PROC
+ vld1.16 {d0}, [r12]
+ vswp d3, d4 ;q2(vp[4] vp[12])
+
+ vqdmulh.s16 q3, q2, d0[2]
+ vqdmulh.s16 q4, q2, d0[0]
+
+ vqadd.s16 d12, d2, d3 ;a1
+ vqsub.s16 d13, d2, d3 ;b1
+
+ vshr.s16 q3, q3, #1
+ vshr.s16 q4, q4, #1
+
+ vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q4, q4, q2
+
+ ;d6 - c1:temp1
+ ;d7 - d1:temp2
+ ;d8 - d1:temp1
+ ;d9 - c1:temp2
+
+ vqsub.s16 d10, d6, d9 ;c1
+ vqadd.s16 d11, d7, d8 ;d1
+
+ vqadd.s16 d2, d12, d11
+ vqadd.s16 d3, d13, d10
+ vqsub.s16 d4, d13, d10
+ vqsub.s16 d5, d12, d11
+
+ vtrn.32 d2, d4
+ vtrn.32 d3, d5
+ vtrn.16 d2, d3
+ vtrn.16 d4, d5
+
+; memset(input, 0, 32) -- 32bytes
+ vmov.i16 q14, #0
+
+ vswp d3, d4
+ vqdmulh.s16 q3, q2, d0[2]
+ vqdmulh.s16 q4, q2, d0[0]
+
+ vqadd.s16 d12, d2, d3 ;a1
+ vqsub.s16 d13, d2, d3 ;b1
+
+ vmov q15, q14
+
+ vshr.s16 q3, q3, #1
+ vshr.s16 q4, q4, #1
+
+ vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+ vqadd.s16 q4, q4, q2
+
+ vqsub.s16 d10, d6, d9 ;c1
+ vqadd.s16 d11, d7, d8 ;d1
+
+ vqadd.s16 d2, d12, d11
+ vqadd.s16 d3, d13, d10
+ vqsub.s16 d4, d13, d10
+ vqsub.s16 d5, d12, d11
+
+ vst1.16 {q14, q15}, [r0]
+
+ vrshr.s16 d2, d2, #3
+ vrshr.s16 d3, d3, #3
+ vrshr.s16 d4, d4, #3
+ vrshr.s16 d5, d5, #3
+
+ add r1, r2, r3
+ add r12, r1, r3
+ add r0, r12, r3
+
+ vtrn.32 d2, d4
+ vtrn.32 d3, d5
+ vtrn.16 d2, d3
+ vtrn.16 d4, d5
+
+ vst1.16 {d2}, [r2]
+ vst1.16 {d3}, [r1]
+ vst1.16 {d4}, [r12]
+ vst1.16 {d5}, [r0]
+
+ bx lr
+
+ ENDP
+
+;-----------------
+ AREA didct4x4_dat, DATA, READWRITE ;read/write by default
+;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
+;One word each is reserved. Label filter_coeff can be used to access the data.
+;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
+_didct_coeff_
+ DCD didct_coeff
+didct_coeff
+ DCD 0x4e7b4e7b, 0x8a8c8a8c
+
+;20091, 20091, 35468, 35468
+
+ END
diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm
new file mode 100644
index 0000000..1bde946
--- /dev/null
+++ b/vp8/decoder/arm/neon/dequantizeb_neon.asm
@@ -0,0 +1,33 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_dequantize_b_loop_neon|
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+; r0 short *Q,
+; r1 short *DQC
+; r2 short *DQ
+|vp8_dequantize_b_loop_neon| PROC
+ vld1.16 {q0, q1}, [r0]
+ vld1.16 {q2, q3}, [r1]
+
+ vmul.i16 q4, q0, q2
+ vmul.i16 q5, q1, q3
+
+ vst1.16 {q4, q5}, [r2]
+
+ bx lr
+
+ ENDP
+
+ END
diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c
new file mode 100644
index 0000000..442054e
--- /dev/null
+++ b/vp8/decoder/dboolhuff.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "dboolhuff.h"
+#include "vpx_ports/mem.h"
+#include "vpx_mem/vpx_mem.h"
+
+DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
+{
+ 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+
+static void copy_in(BOOL_DECODER *br, unsigned int to_write)
+{
+ if (to_write > br->user_buffer_sz)
+ to_write = br->user_buffer_sz;
+
+ memcpy(br->write_ptr, br->user_buffer, to_write);
+ br->user_buffer += to_write;
+ br->user_buffer_sz -= to_write;
+ br->write_ptr = br_ptr_advance(br->write_ptr, to_write);
+}
+
+int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
+ unsigned int source_sz)
+{
+ br->lowvalue = 0;
+ br->range = 255;
+ br->count = 0;
+ br->user_buffer = source;
+ br->user_buffer_sz = source_sz;
+
+ if (source_sz && !source)
+ return 1;
+
+ /* Allocate the ring buffer backing store with alignment equal to the
+ * buffer size*2 so that a single pointer can be used for wrapping rather
+ * than a pointer+offset.
+ */
+ br->decode_buffer = vpx_memalign(VP8_BOOL_DECODER_SZ * 2,
+ VP8_BOOL_DECODER_SZ);
+
+ if (!br->decode_buffer)
+ return 1;
+
+ /* Populate the buffer */
+ br->read_ptr = br->decode_buffer;
+ br->write_ptr = br->decode_buffer;
+ copy_in(br, VP8_BOOL_DECODER_SZ);
+
+ /* Read the first byte */
+ br->value = (*br->read_ptr++) << 8;
+ return 0;
+}
+
+
+void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
+{
+ int left, right;
+
+ /* Find available room in the buffer */
+ left = 0;
+ right = br->read_ptr - br->write_ptr;
+
+ if (right < 0)
+ {
+ /* Read pointer is behind the write pointer. We can write from the
+ * write pointer to the end of the buffer.
+ */
+ right = VP8_BOOL_DECODER_SZ - (br->write_ptr - br->decode_buffer);
+ left = br->read_ptr - br->decode_buffer;
+ }
+
+ if (right + left < 128)
+ return;
+
+ if (right)
+ copy_in(br, right);
+
+ if (left)
+ {
+ br->write_ptr = br->decode_buffer;
+ copy_in(br, left);
+ }
+
+}
+
+
+void vp8dx_stop_decode_c(BOOL_DECODER *bc)
+{
+ vpx_free(bc->decode_buffer);
+ bc->decode_buffer = 0;
+}
+
+#if 0
+/*
+ * Until optimized versions of these functions are available, we
+ * keep the implementation in the header to allow inlining.
+ *
+ * The RTCD-style invocations are still in place so this can
+ * be switched by just uncommenting these functions here and
+ * the DBOOLHUFF_INVOKE calls in the header.
+ */
+int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
+{
+ unsigned int bit=0;
+ unsigned int split;
+ unsigned int bigsplit;
+ register unsigned int range = br->range;
+ register unsigned int value = br->value;
+
+ split = 1 + (((range-1) * probability) >> 8);
+ bigsplit = (split<<8);
+
+ range = split;
+ if(value >= bigsplit)
+ {
+ range = br->range-split;
+ value = value-bigsplit;
+ bit = 1;
+ }
+
+ /*if(range>=0x80)
+ {
+ br->value = value;
+ br->range = range;
+ return bit;
+ }*/
+
+ {
+ int count = br->count;
+ register unsigned int shift = vp8dx_bitreader_norm[range];
+ range <<= shift;
+ value <<= shift;
+ count -= shift;
+ if(count <= 0)
+ {
+ value |= (*br->read_ptr) << (-count);
+ br->read_ptr = br_ptr_advance(br->read_ptr, 1);
+ count += 8 ;
+ }
+ br->count = count;
+ }
+ br->value = value;
+ br->range = range;
+ return bit;
+}
+
+int vp8dx_decode_value_c(BOOL_DECODER *br, int bits)
+{
+ int z = 0;
+ int bit;
+ for ( bit=bits-1; bit>=0; bit-- )
+ {
+ z |= (vp8dx_decode_bool(br, 0x80)<<bit);
+ }
+ return z;
+}
+#endif
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
new file mode 100644
index 0000000..f5c9822
--- /dev/null
+++ b/vp8/decoder/dboolhuff.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#ifndef DBOOLHUFF_H
+#define DBOOLHUFF_H
+#include "vpx_ports/config.h"
+#include "vpx_ports/mem.h"
+#include "vpx_ports/vpx_integer.h"
+
+/* Size of the bool decoder backing storage
+ *
+ * This size was chosen to be greater than the worst case encoding of a
+ * single macroblock. This was calcluated as follows (python):
+ *
+ * def max_cost(prob):
+ * return max(prob_costs[prob], prob_costs[255-prob]) / 256;
+ *
+ * tree_nodes_cost = 7 * max_cost(255)
+ * extra_bits_cost = sum([max_cost(bit) for bit in extra_bits])
+ * sign_bit_cost = max_cost(128)
+ * total_cost = tree_nodes_cost + extra_bits_cost + sign_bit_cost
+ *
+ * where the prob_costs table was taken from the C vp8_prob_cost table in
+ * boolhuff.c and the extra_bits table was taken from the 11 extrabits for
+ * a category 6 token as defined in vp8d_token_extra_bits2/detokenize.c
+ *
+ * This equation produced a maximum of 79 bits per coefficient. Scaling up
+ * to the macroblock level:
+ *
+ * 79 bits/coeff * 16 coeff/block * 25 blocks/macroblock = 31600 b/mb
+ *
+ * 4096 bytes = 32768 bits > 31600
+ */
+#define VP8_BOOL_DECODER_SZ 4096
+#define VP8_BOOL_DECODER_MASK (VP8_BOOL_DECODER_SZ-1)
+#define VP8_BOOL_DECODER_PTR_MASK (~(uintptr_t)(VP8_BOOL_DECODER_SZ))
+
+struct vp8_dboolhuff_rtcd_vtable;
+
+typedef struct
+{
+ unsigned int lowvalue;
+ unsigned int range;
+ unsigned int value;
+ int count;
+ const unsigned char *user_buffer;
+ unsigned int user_buffer_sz;
+ unsigned char *decode_buffer;
+ const unsigned char *read_ptr;
+ unsigned char *write_ptr;
+#if CONFIG_RUNTIME_CPU_DETECT
+ struct vp8_dboolhuff_rtcd_vtable *rtcd;
+#endif
+} BOOL_DECODER;
+
+#define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
+ const unsigned char *source, unsigned int source_sz)
+#define prototype_dbool_stop(sym) void sym(BOOL_DECODER *bc)
+#define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
+#define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
+#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
+
+#if ARCH_ARM
+#include "arm/dboolhuff_arm.h"
+#endif
+
+#ifndef vp8_dbool_start
+#define vp8_dbool_start vp8dx_start_decode_c
+#endif
+
+#ifndef vp8_dbool_stop
+#define vp8_dbool_stop vp8dx_stop_decode_c
+#endif
+
+#ifndef vp8_dbool_fill
+#define vp8_dbool_fill vp8dx_bool_decoder_fill_c
+#endif
+
+#ifndef vp8_dbool_debool
+#define vp8_dbool_debool vp8dx_decode_bool_c
+#endif
+
+#ifndef vp8_dbool_devalue
+#define vp8_dbool_devalue vp8dx_decode_value_c
+#endif
+
+extern prototype_dbool_start(vp8_dbool_start);
+extern prototype_dbool_stop(vp8_dbool_stop);
+extern prototype_dbool_fill(vp8_dbool_fill);
+extern prototype_dbool_debool(vp8_dbool_debool);
+extern prototype_dbool_devalue(vp8_dbool_devalue);
+
+typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
+typedef prototype_dbool_stop((*vp8_dbool_stop_fn_t));
+typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
+typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
+typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
+
+typedef struct vp8_dboolhuff_rtcd_vtable {
+ vp8_dbool_start_fn_t start;
+ vp8_dbool_stop_fn_t stop;
+ vp8_dbool_fill_fn_t fill;
+ vp8_dbool_debool_fn_t debool;
+ vp8_dbool_devalue_fn_t devalue;
+} vp8_dboolhuff_rtcd_vtable_t;
+
+// There are no processor-specific versions of these
+// functions right now. Disable RTCD to avoid using
+// function pointers which gives a speed boost
+//#ifdef ENABLE_RUNTIME_CPU_DETECT
+//#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
+//#define IF_RTCD(x) (x)
+//#else
+#define DBOOLHUFF_INVOKE(ctx,fn) vp8_dbool_##fn
+#define IF_RTCD(x) NULL
+//#endif
+
+static unsigned char *br_ptr_advance(const unsigned char *_ptr,
+ unsigned int n)
+{
+ uintptr_t ptr = (uintptr_t)_ptr;
+
+ ptr += n;
+ ptr &= VP8_BOOL_DECODER_PTR_MASK;
+
+ return (void *)ptr;
+}
+
+DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+
+/* wrapper functions to hide RTCD. static means inline means hopefully no
+ * penalty
+ */
+static int vp8dx_start_decode(BOOL_DECODER *br,
+ struct vp8_dboolhuff_rtcd_vtable *rtcd,
+ const unsigned char *source, unsigned int source_sz) {
+#if CONFIG_RUNTIME_CPU_DETECT
+ br->rtcd = rtcd;
+#endif
+ return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
+}
+static void vp8dx_stop_decode(BOOL_DECODER *br) {
+ DBOOLHUFF_INVOKE(br->rtcd, stop)(br);
+}
+static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
+ DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
+}
+static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
+ /*
+ * Until optimized versions of this function are available, we
+ * keep the implementation in the header to allow inlining.
+ *
+ *return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
+ */
+ unsigned int bit = 0;
+ unsigned int split;
+ unsigned int bigsplit;
+ register unsigned int range = br->range;
+ register unsigned int value = br->value;
+
+ split = 1 + (((range - 1) * probability) >> 8);
+ bigsplit = (split << 8);
+
+ range = split;
+
+ if (value >= bigsplit)
+ {
+ range = br->range - split;
+ value = value - bigsplit;
+ bit = 1;
+ }
+
+ /*if(range>=0x80)
+ {
+ br->value = value;
+ br->range = range;
+ return bit
+ }*/
+
+ {
+ int count = br->count;
+ register unsigned int shift = vp8dx_bitreader_norm[range];
+ range <<= shift;
+ value <<= shift;
+ count -= shift;
+
+ if (count <= 0)
+ {
+ value |= (*br->read_ptr) << (-count);
+ br->read_ptr = br_ptr_advance(br->read_ptr, 1);
+ count += 8 ;
+ }
+
+ br->count = count;
+ }
+ br->value = value;
+ br->range = range;
+ return bit;
+}
+
+static int vp8_decode_value(BOOL_DECODER *br, int bits)
+{
+ /*
+ * Until optimized versions of this function are available, we
+ * keep the implementation in the header to allow inlining.
+ *
+ *return DBOOLHUFF_INVOKE(br->rtcd, devalue)(br, bits);
+ */
+ int z = 0;
+ int bit;
+
+ for (bit = bits - 1; bit >= 0; bit--)
+ {
+ z |= (vp8dx_decode_bool(br, 0x80) << bit);
+ }
+
+ return z;
+}
+#endif
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
new file mode 100644
index 0000000..6035f3e
--- /dev/null
+++ b/vp8/decoder/decodemv.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "treereader.h"
+#include "entropymv.h"
+#include "entropymode.h"
+#include "onyxd_int.h"
+#include "findnearmv.h"
+#include "demode.h"
+#if CONFIG_DEBUG
+#include <assert.h>
+#endif
+
+static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
+{
+ const vp8_prob *const p = (const vp8_prob *) mvc;
+ int x = 0;
+
+ if (vp8_read(r, p [mvpis_short])) /* Large */
+ {
+ int i = 0;
+
+ do
+ {
+ x += vp8_read(r, p [MVPbits + i]) << i;
+ }
+ while (++i < 3);
+
+ i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */
+
+ do
+ {
+ x += vp8_read(r, p [MVPbits + i]) << i;
+ }
+ while (--i > 3);
+
+ if (!(x & 0xFFF0) || vp8_read(r, p [MVPbits + 3]))
+ x += 8;
+ }
+ else /* small */
+ x = vp8_treed_read(r, vp8_small_mvtree, p + MVPshort);
+
+ if (x && vp8_read(r, p [MVPsign]))
+ x = -x;
+
+ return x;
+}
+
+static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc)
+{
+ mv->row = (short)(read_mvcomponent(r, mvc) << 1);
+ mv->col = (short)(read_mvcomponent(r, ++mvc) << 1);
+}
+
+
+static void read_mvcontexts(vp8_reader *bc, MV_CONTEXT *mvc)
+{
+ int i = 0;
+
+ do
+ {
+ const vp8_prob *up = vp8_mv_update_probs[i].prob;
+ vp8_prob *p = (vp8_prob *)(mvc + i);
+ vp8_prob *const pstop = p + MVPcount;
+
+ do
+ {
+ if (vp8_read(bc, *up++))
+ {
+ const vp8_prob x = (vp8_prob)vp8_read_literal(bc, 7);
+
+ *p = x ? x << 1 : 1;
+ }
+ }
+ while (++p < pstop);
+ }
+ while (++i < 2);
+}
+
+
+static MB_PREDICTION_MODE read_mv_ref(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_mv_ref_tree, p);
+
+ return (MB_PREDICTION_MODE)i;
+}
+
+static MB_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_sub_mv_ref_tree, p);
+
+ return (MB_PREDICTION_MODE)i;
+}
+unsigned int vp8_mv_cont_count[5][4] =
+{
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 }
+};
+
+void vp8_decode_mode_mvs(VP8D_COMP *pbi)
+{
+ const MV Zero = { 0, 0};
+
+ VP8_COMMON *const pc = & pbi->common;
+ vp8_reader *const bc = & pbi->bc;
+
+ MODE_INFO *mi = pc->mi, *ms;
+ const int mis = pc->mode_info_stride;
+
+ MV_CONTEXT *const mvc = pc->fc.mvc;
+
+ int mb_row = -1;
+
+ vp8_prob prob_intra;
+ vp8_prob prob_last;
+ vp8_prob prob_gf;
+ vp8_prob prob_skip_false = 0;
+
+ if (pc->mb_no_coeff_skip)
+ prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
+
+ prob_intra = (vp8_prob)vp8_read_literal(bc, 8);
+ prob_last = (vp8_prob)vp8_read_literal(bc, 8);
+ prob_gf = (vp8_prob)vp8_read_literal(bc, 8);
+
+ ms = pc->mi - 1;
+
+ if (vp8_read_bit(bc))
+ {
+ int i = 0;
+
+ do
+ {
+ pc->fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+ }
+ while (++i < 4);
+ }
+
+ if (vp8_read_bit(bc))
+ {
+ int i = 0;
+
+ do
+ {
+ pc->fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+ }
+ while (++i < 3);
+ }
+
+ read_mvcontexts(bc, mvc);
+
+ while (++mb_row < pc->mb_rows)
+ {
+ int mb_col = -1;
+
+ while (++mb_col < pc->mb_cols)
+ {
+ MB_MODE_INFO *const mbmi = & mi->mbmi;
+ MV *const mv = & mbmi->mv.as_mv;
+ VP8_COMMON *const pc = &pbi->common;
+ MACROBLOCKD *xd = &pbi->mb;
+
+ vp8dx_bool_decoder_fill(bc);
+
+ // Distance of Mb to the various image edges.
+ // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+ xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+
+ // If required read in new segmentation data for this MB
+ if (pbi->mb.update_mb_segmentation_map)
+ vp8_read_mb_features(bc, mbmi, &pbi->mb);
+
+ // Read the macroblock coeff skip flag if this feature is in use, else default to 0
+ if (pc->mb_no_coeff_skip)
+ mbmi->mb_skip_coeff = vp8_read(bc, prob_skip_false);
+ else
+ mbmi->mb_skip_coeff = 0;
+
+ mbmi->uv_mode = DC_PRED;
+
+ if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, prob_intra))) /* inter MB */
+ {
+ int rct[4];
+ vp8_prob mv_ref_p [VP8_MVREFS-1];
+ MV nearest, nearby, best_mv;
+
+ if (vp8_read(bc, prob_last))
+ {
+ mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, prob_gf)));
+ }
+
+ vp8_find_near_mvs(xd, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias);
+
+ vp8_mv_ref_probs(mv_ref_p, rct);
+
+ switch (mbmi->mode = read_mv_ref(bc, mv_ref_p))
+ {
+ case SPLITMV:
+ {
+ const int s = mbmi->partitioning = vp8_treed_read(
+ bc, vp8_mbsplit_tree, vp8_mbsplit_probs
+ );
+ const int num_p = vp8_mbsplit_count [s];
+ const int *const L = vp8_mbsplits [s];
+ int j = 0;
+
+ do /* for each subset j */
+ {
+ B_MODE_INFO *const bmi = mbmi->partition_bmi + j;
+ MV *const mv = & bmi->mv.as_mv;
+
+ int k = -1; /* first block in subset j */
+ int mv_contz;
+
+ while (j != L[++k])
+ if (k >= 16)
+#if CONFIG_DEBUG
+ assert(0);
+
+#else
+ ;
+#endif
+
+ mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
+
+ switch (bmi->mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
+ {
+ case NEW4X4:
+ read_mv(bc, mv, (const MV_CONTEXT *) mvc);
+ mv->row += best_mv.row;
+ mv->col += best_mv.col;
+#ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][3]++;
+#endif
+ break;
+ case LEFT4X4:
+ *mv = vp8_left_bmi(mi, k)->mv.as_mv;
+#ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][0]++;
+#endif
+ break;
+ case ABOVE4X4:
+ *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv;
+#ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][1]++;
+#endif
+ break;
+ case ZERO4X4:
+ *mv = Zero;
+#ifdef VPX_MODE_COUNT
+ vp8_mv_cont_count[mv_contz][2]++;
+#endif
+ break;
+ default:
+ break;
+ }
+
+ /* Fill (uniform) modes, mvs of jth subset.
+ Must do it here because ensuing subsets can
+ refer back to us via "left" or "above". */
+ do
+ if (j == L[k])
+ mi->bmi[k] = *bmi;
+
+ while (++k < 16);
+ }
+ while (++j < num_p);
+ }
+
+ *mv = mi->bmi[15].mv.as_mv;
+
+ break; /* done with SPLITMV */
+
+ case NEARMV:
+ *mv = nearby;
+
+ // Clip "next_nearest" so that it does not extend to far out of image
+ if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
+ mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
+ else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
+ mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+
+ if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
+ mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
+ else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
+ mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+
+ goto propagate_mv;
+
+ case NEARESTMV:
+ *mv = nearest;
+
+ // Clip "next_nearest" so that it does not extend to far out of image
+ if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
+ mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
+ else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
+ mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+
+ if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
+ mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
+ else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
+ mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+
+ goto propagate_mv;
+
+ case ZEROMV:
+ *mv = Zero;
+ goto propagate_mv;
+
+ case NEWMV:
+ read_mv(bc, mv, (const MV_CONTEXT *) mvc);
+ mv->row += best_mv.row;
+ mv->col += best_mv.col;
+ /* Encoder should not produce invalid motion vectors, but since
+ * arbitrary length MVs can be parsed from the bitstream, we
+ * need to clamp them here in case we're reading bad data to
+ * avoid a crash.
+ */
+#if CONFIG_DEBUG
+ assert(mv->col >= (xd->mb_to_left_edge - LEFT_TOP_MARGIN));
+ assert(mv->col <= (xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN));
+ assert(mv->row >= (xd->mb_to_top_edge - LEFT_TOP_MARGIN));
+ assert(mv->row <= (xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN));
+#endif
+
+ if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
+ mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
+ else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
+ mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+
+ if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
+ mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
+ else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
+ mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+
+ propagate_mv: /* same MV throughout */
+ {
+ //int i=0;
+ //do
+ //{
+ // mi->bmi[i].mv.as_mv = *mv;
+ //}
+ //while( ++i < 16);
+
+ mi->bmi[0].mv.as_mv = *mv;
+ mi->bmi[1].mv.as_mv = *mv;
+ mi->bmi[2].mv.as_mv = *mv;
+ mi->bmi[3].mv.as_mv = *mv;
+ mi->bmi[4].mv.as_mv = *mv;
+ mi->bmi[5].mv.as_mv = *mv;
+ mi->bmi[6].mv.as_mv = *mv;
+ mi->bmi[7].mv.as_mv = *mv;
+ mi->bmi[8].mv.as_mv = *mv;
+ mi->bmi[9].mv.as_mv = *mv;
+ mi->bmi[10].mv.as_mv = *mv;
+ mi->bmi[11].mv.as_mv = *mv;
+ mi->bmi[12].mv.as_mv = *mv;
+ mi->bmi[13].mv.as_mv = *mv;
+ mi->bmi[14].mv.as_mv = *mv;
+ mi->bmi[15].mv.as_mv = *mv;
+ }
+
+ break;
+
+ default:;
+#if CONFIG_DEBUG
+ assert(0);
+#endif
+ }
+
+ }
+ else
+ {
+ /* MB is intra coded */
+
+ int j = 0;
+
+ do
+ {
+ mi->bmi[j].mv.as_mv = Zero;
+ }
+ while (++j < 16);
+
+ *mv = Zero;
+
+ if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pc->fc.ymode_prob)) == B_PRED)
+ {
+ int j = 0;
+
+ do
+ {
+ mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pc->fc.bmode_prob);
+ }
+ while (++j < 16);
+ }
+
+ mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pc->fc.uv_mode_prob);
+ }
+
+ mi++; // next macroblock
+ }
+
+ mi++; // skip left predictor each row
+ }
+}
diff --git a/vp8/decoder/decodemv.h b/vp8/decoder/decodemv.h
new file mode 100644
index 0000000..4030071
--- /dev/null
+++ b/vp8/decoder/decodemv.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "onyxd_int.h"
+
+void vp8_decode_mode_mvs(VP8D_COMP *);
diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h
new file mode 100644
index 0000000..ebc5c27
--- /dev/null
+++ b/vp8/decoder/decoderthreading.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+
+
+
+#ifndef _DECODER_THREADING_H
+#define _DECODER_THREADING_H
+
+
+extern void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
+ MACROBLOCKD *xd);
+extern void vp8_stop_lfthread(VP8D_COMP *pbi);
+extern void vp8_start_lfthread(VP8D_COMP *pbi);
+extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
+extern void vp8_decoder_create_threads(VP8D_COMP *pbi);
+#endif
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
new file mode 100644
index 0000000..4edf4f6
--- /dev/null
+++ b/vp8/decoder/decodframe.c
@@ -0,0 +1,907 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "onyxd_int.h"
+#include "header.h"
+#include "reconintra.h"
+#include "reconintra4x4.h"
+#include "recon.h"
+#include "reconinter.h"
+#include "dequantize.h"
+#include "detokenize.h"
+#include "invtrans.h"
+#include "alloccommon.h"
+#include "entropymode.h"
+#include "quant_common.h"
+#include "segmentation_common.h"
+#include "setupintrarecon.h"
+#include "demode.h"
+#include "decodemv.h"
+#include "extend.h"
+#include "vpx_mem/vpx_mem.h"
+#include "idct.h"
+#include "dequantize.h"
+#include "predictdc.h"
+#include "threading.h"
+#include "decoderthreading.h"
+#include "dboolhuff.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
+{
+ int r, c;
+ int i;
+ int Q;
+ VP8_COMMON *const pc = & pbi->common;
+
+ for (Q = 0; Q < QINDEX_RANGE; Q++)
+ {
+ pc->Y1dequant[Q][0][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q);
+ pc->Y2dequant[Q][0][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
+ pc->UVdequant[Q][0][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
+
+ // all the ac values = ;
+ for (i = 1; i < 16; i++)
+ {
+ int rc = vp8_default_zig_zag1d[i];
+ r = (rc >> 2);
+ c = (rc & 3);
+
+ pc->Y1dequant[Q][r][c] = (short)vp8_ac_yquant(Q);
+ pc->Y2dequant[Q][r][c] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
+ pc->UVdequant[Q][r][c] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
+ }
+ }
+}
+
+static void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
+{
+ int i;
+ int QIndex;
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ VP8_COMMON *const pc = & pbi->common;
+
+ // Decide whether to use the default or alternate baseline Q value.
+ if (xd->segmentation_enabled)
+ {
+ // Abs Value
+ if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+ QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
+
+ // Delta Value
+ else
+ {
+ QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
+ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range
+ }
+ }
+ else
+ QIndex = pc->base_qindex;
+
+ // Set up the block level dequant pointers
+ for (i = 0; i < 16; i++)
+ {
+ xd->block[i].dequant = pc->Y1dequant[QIndex];
+ }
+
+ for (i = 16; i < 24; i++)
+ {
+ xd->block[i].dequant = pc->UVdequant[QIndex];
+ }
+
+ xd->block[24].dequant = pc->Y2dequant[QIndex];
+
+}
+
+#if CONFIG_RUNTIME_CPU_DETECT
+#define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
+#else
+#define RTCD_VTABLE(x) NULL
+#endif
+
+//skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
+// to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
+static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
+{
+ if (xd->frame_type == KEY_FRAME || xd->mbmi.ref_frame == INTRA_FRAME)
+ {
+
+ vp8_build_intra_predictors_mbuv_s(xd);
+ vp8_build_intra_predictors_mby_s_ptr(xd);
+
+ }
+ else
+ {
+ vp8_build_inter_predictors_mb_s(xd);
+ }
+}
+
+static void reconstruct_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
+{
+ if (xd->frame_type == KEY_FRAME || xd->mbmi.ref_frame == INTRA_FRAME)
+ {
+ vp8_build_intra_predictors_mbuv(xd);
+
+ if (xd->mbmi.mode != B_PRED)
+ {
+ vp8_build_intra_predictors_mby_ptr(xd);
+ vp8_recon16x16mb(RTCD_VTABLE(recon), xd);
+ }
+ else
+ {
+ vp8_recon_intra4x4mb(RTCD_VTABLE(recon), xd);
+ }
+ }
+ else
+ {
+ vp8_build_inter_predictors_mb(xd);
+ vp8_recon16x16mb(RTCD_VTABLE(recon), xd);
+ }
+}
+
+
+static void de_quantand_idct(VP8D_COMP *pbi, MACROBLOCKD *xd)
+{
+ int i;
+ BLOCKD *b = &xd->block[24];
+
+
+ if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV)
+ {
+ DEQUANT_INVOKE(&pbi->dequant, block)(b);
+
+ // do 2nd order transform on the dc block
+ if (b->eob > 1)
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
+ ((int *)b->qcoeff)[0] = 0;
+ ((int *)b->qcoeff)[1] = 0;
+ ((int *)b->qcoeff)[2] = 0;
+ ((int *)b->qcoeff)[3] = 0;
+ ((int *)b->qcoeff)[4] = 0;
+ ((int *)b->qcoeff)[5] = 0;
+ ((int *)b->qcoeff)[6] = 0;
+ ((int *)b->qcoeff)[7] = 0;
+ }
+ else
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
+ ((int *)b->qcoeff)[0] = 0;
+ }
+
+
+ for (i = 0; i < 16; i++)
+ {
+
+ b = &xd->block[i];
+
+ if (b->eob > 1)
+ {
+ DEQUANT_INVOKE(&pbi->dequant, idct_dc)(b->qcoeff, &b->dequant[0][0], b->diff, 32, xd->block[24].diff[i]);
+ }
+ else
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(xd->block[24].diff[i], b->diff, 32);
+ }
+ }
+
+ for (i = 16; i < 24; i++)
+ {
+ b = &xd->block[i];
+
+ if (b->eob > 1)
+ {
+ DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, 16);
+ }
+ else
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, 16);
+ ((int *)b->qcoeff)[0] = 0;
+ }
+ }
+ }
+ else
+ {
+ for (i = 0; i < 24; i++)
+ {
+
+ b = &xd->block[i];
+
+ if (b->eob > 1)
+ {
+ DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, (32 - (i & 16)));
+ }
+ else
+ {
+ IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, (32 - (i & 16)));
+ ((int *)b->qcoeff)[0] = 0;
+ }
+ }
+ }
+}
+
+void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
+{
+ int eobtotal = 0;
+
+ if (xd->mbmi.mb_skip_coeff)
+ {
+ vp8_reset_mb_tokens_context(xd);
+ }
+ else
+ {
+ eobtotal = vp8_decode_mb_tokens(pbi, xd);
+ }
+
+ xd->mode_info_context->mbmi.dc_diff = 1;
+
+ if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV && eobtotal == 0)
+ {
+ xd->mode_info_context->mbmi.dc_diff = 0;
+ skip_recon_mb(pbi, xd);
+ return;
+ }
+
+ if (xd->segmentation_enabled)
+ mb_init_dequantizer(pbi, xd);
+
+ de_quantand_idct(pbi, xd);
+ reconstruct_mb(pbi, xd);
+}
+
+static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
+{
+ int ret_val = 0;
+
+ if (vp8_read_bit(bc))
+ {
+ ret_val = vp8_read_literal(bc, 4);
+
+ if (vp8_read_bit(bc))
+ ret_val = -ret_val;
+ }
+
+ /* Trigger a quantizer update if the delta-q value has changed */
+ if (ret_val != prev)
+ *q_update = 1;
+
+ return ret_val;
+}
+
+#ifdef PACKET_TESTING
+#include <stdio.h>
+FILE *vpxlog = 0;
+#endif
+
+
+
+void vp8_decode_mb_row(VP8D_COMP *pbi,
+ VP8_COMMON *pc,
+ int mb_row,
+ MACROBLOCKD *xd)
+{
+
+ int i;
+ int recon_yoffset, recon_uvoffset;
+ int mb_col;
+ int recon_y_stride = pc->last_frame.y_stride;
+ int recon_uv_stride = pc->last_frame.uv_stride;
+
+ vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
+ recon_yoffset = mb_row * recon_y_stride * 16;
+ recon_uvoffset = mb_row * recon_uv_stride * 8;
+ // reset above block coeffs
+
+ xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
+ xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
+ xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
+ xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
+ xd->up_available = (mb_row != 0);
+
+ xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+
+ for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
+ {
+ // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
+ vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
+
+ if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ BLOCKD *d = &xd->block[i];
+ vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+ }
+ }
+
+ // Distance of Mb to the various image edges.
+ // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+
+ xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
+ xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
+
+ xd->left_available = (mb_col != 0);
+
+ // Select the appropriate reference frame for this MB
+ if (xd->mbmi.ref_frame == LAST_FRAME)
+ {
+ xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
+ }
+ else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
+ {
+ // Golden frame reconstruction buffer
+ xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
+ }
+ else
+ {
+ // Alternate reference frame reconstruction buffer
+ xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
+ }
+
+ vp8_build_uvmvs(xd, pc->full_pixel);
+
+ /*
+ if(pbi->common.current_video_frame==0 &&mb_col==1 && mb_row==0)
+ pbi->debugoutput =1;
+ else
+ pbi->debugoutput =0;
+ */
+ vp8dx_bool_decoder_fill(xd->current_bc);
+ vp8_decode_macroblock(pbi, xd);
+
+
+ recon_yoffset += 16;
+ recon_uvoffset += 8;
+
+ ++xd->mode_info_context; /* next mb */
+
+ xd->gf_active_ptr++; // GF useage flag for next MB
+
+ xd->above_context[Y1CONTEXT] += 4;
+ xd->above_context[UCONTEXT ] += 2;
+ xd->above_context[VCONTEXT ] += 2;
+ xd->above_context[Y2CONTEXT] ++;
+
+ pbi->current_mb_col_main = mb_col;
+ }
+
+ // adjust to the next row of mbs
+ vp8_extend_mb_row(
+ &pc->new_frame,
+ xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
+ );
+
+ ++xd->mode_info_context; /* skip prediction column */
+
+ pbi->last_mb_row_decoded = mb_row;
+}
+
+
+static unsigned int read_partition_size(const unsigned char *cx_size)
+{
+ const unsigned int size =
+ cx_size[0] + (cx_size[1] << 8) + (cx_size[2] << 16);
+ return size;
+}
+
+
+static void setup_token_decoder(VP8D_COMP *pbi,
+ const unsigned char *cx_data)
+{
+ int num_part;
+ int i;
+ VP8_COMMON *pc = &pbi->common;
+ const unsigned char *user_data_end = pbi->Source + pbi->source_sz;
+ vp8_reader *bool_decoder;
+ const unsigned char *partition;
+
+ /* Parse number of token partitions to use */
+ pc->multi_token_partition = (TOKEN_PARTITION)vp8_read_literal(&pbi->bc, 2);
+ num_part = 1 << pc->multi_token_partition;
+
+ /* Set up pointers to the first partition */
+ partition = cx_data;
+ bool_decoder = &pbi->bc2;
+
+ if (num_part > 1)
+ {
+ CHECK_MEM_ERROR(pbi->mbc, vpx_malloc(num_part * sizeof(vp8_reader)));
+ bool_decoder = pbi->mbc;
+ partition += 3 * (num_part - 1);
+ }
+
+ for (i = 0; i < num_part; i++)
+ {
+ const unsigned char *partition_size_ptr = cx_data + i * 3;
+ unsigned int partition_size;
+
+ /* Calculate the length of this partition. The last partition
+ * size is implicit.
+ */
+ if (i < num_part - 1)
+ {
+ partition_size = read_partition_size(partition_size_ptr);
+ }
+ else
+ {
+ partition_size = user_data_end - partition;
+ }
+
+ if (partition + partition_size > user_data_end)
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt partition "
+ "%d length", i + 1);
+
+ if (vp8dx_start_decode(bool_decoder, IF_RTCD(&pbi->dboolhuff),
+ partition, partition_size))
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate bool decoder %d", i + 1);
+
+ /* Advance to the next partition */
+ partition += partition_size;
+ bool_decoder++;
+ }
+
+ /* Clamp number of decoder threads */
+ if (pbi->decoding_thread_count > num_part - 1)
+ pbi->decoding_thread_count = num_part - 1;
+}
+
+
+static void stop_token_decoder(VP8D_COMP *pbi)
+{
+ int i;
+ VP8_COMMON *pc = &pbi->common;
+
+ if (pc->multi_token_partition != ONE_PARTITION)
+ {
+ int num_part = (1 << pc->multi_token_partition);
+
+ for (i = 0; i < num_part; i++)
+ {
+ vp8dx_stop_decode(&pbi->mbc[i]);
+ }
+
+ vpx_free(pbi->mbc);
+ }
+ else
+ vp8dx_stop_decode(& pbi->bc2);
+}
+
+static void init_frame(VP8D_COMP *pbi)
+{
+ VP8_COMMON *const pc = & pbi->common;
+ MACROBLOCKD *const xd = & pbi->mb;
+
+ if (pc->frame_type == KEY_FRAME)
+ {
+ // Various keyframe initializations
+ vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
+
+ vp8_init_mbmode_probs(pc);
+
+ vp8_default_coef_probs(pc);
+ vp8_kf_default_bmode_probs(pc->kf_bmode_prob);
+
+ // reset the segment feature data to 0 with delta coding (Default state).
+ vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
+ xd->mb_segement_abs_delta = SEGMENT_DELTADATA;
+
+ // reset the mode ref deltasa for loop filter
+ vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
+ vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
+
+ // All buffers are implicitly updated on key frames.
+ pc->refresh_golden_frame = 1;
+ pc->refresh_alt_ref_frame = 1;
+ pc->copy_buffer_to_gf = 0;
+ pc->copy_buffer_to_arf = 0;
+
+ // Note that Golden and Altref modes cannot be used on a key frame so
+ // ref_frame_sign_bias[] is undefined and meaningless
+ pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0;
+ pc->ref_frame_sign_bias[ALTREF_FRAME] = 0;
+ }
+ else
+ {
+ if (!pc->use_bilinear_mc_filter)
+ pc->mcomp_filter_type = SIXTAP;
+ else
+ pc->mcomp_filter_type = BILINEAR;
+
+ // To enable choice of different interploation filters
+ if (pc->mcomp_filter_type == SIXTAP)
+ {
+ xd->subpixel_predict = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap4x4);
+ xd->subpixel_predict8x4 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap8x4);
+ xd->subpixel_predict8x8 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap8x8);
+ xd->subpixel_predict16x16 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap16x16);
+ }
+ else
+ {
+ xd->subpixel_predict = SUBPIX_INVOKE(RTCD_VTABLE(subpix), bilinear4x4);
+ xd->subpixel_predict8x4 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), bilinear8x4);
+ xd->subpixel_predict8x8 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), bilinear8x8);
+ xd->subpixel_predict16x16 = SUBPIX_INVOKE(RTCD_VTABLE(subpix), bilinear16x16);
+ }
+ }
+
+ xd->left_context = pc->left_context;
+ xd->mode_info_context = pc->mi;
+ xd->frame_type = pc->frame_type;
+ xd->mbmi.mode = DC_PRED;
+ xd->mode_info_stride = pc->mode_info_stride;
+}
+
+int vp8_decode_frame(VP8D_COMP *pbi)
+{
+ vp8_reader *const bc = & pbi->bc;
+ VP8_COMMON *const pc = & pbi->common;
+ MACROBLOCKD *const xd = & pbi->mb;
+ const unsigned char *data = (const unsigned char *)pbi->Source;
+ const unsigned char *const data_end = data + pbi->source_sz;
+ int first_partition_length_in_bytes;
+
+ int mb_row;
+ int i, j, k, l;
+ const int *const mb_feature_data_bits = vp8_mb_feature_data_bits;
+
+ pc->frame_type = (FRAME_TYPE)(data[0] & 1);
+ pc->version = (data[0] >> 1) & 7;
+ pc->show_frame = (data[0] >> 4) & 1;
+ first_partition_length_in_bytes =
+ (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
+ data += 3;
+
+ if (data + first_partition_length_in_bytes > data_end)
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt partition 0 length");
+ vp8_setup_version(pc);
+
+ if (pc->frame_type == KEY_FRAME)
+ {
+ const int Width = pc->Width;
+ const int Height = pc->Height;
+
+ // vet via sync code
+ if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a)
+ vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid frame sync code");
+
+ pc->Width = (data[3] | (data[4] << 8)) & 0x3fff;
+ pc->horiz_scale = data[4] >> 6;
+ pc->Height = (data[5] | (data[6] << 8)) & 0x3fff;
+ pc->vert_scale = data[6] >> 6;
+ data += 7;
+
+ if (Width != pc->Width || Height != pc->Height)
+ {
+ if (pc->Width <= 0)
+ {
+ pc->Width = Width;
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid frame width");
+ }
+
+ if (pc->Height <= 0)
+ {
+ pc->Height = Height;
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid frame height");
+ }
+
+ if (vp8_alloc_frame_buffers(&pbi->common, pc->Width, pc->Height))
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffers");
+ }
+ }
+
+ if (pc->Width == 0 || pc->Height == 0)
+ {
+ return -1;
+ }
+
+ init_frame(pbi);
+
+ if (vp8dx_start_decode(bc, IF_RTCD(&pbi->dboolhuff),
+ data, data_end - data))
+ vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate bool decoder 0");
+ if (pc->frame_type == KEY_FRAME) {
+ pc->clr_type = (YUV_TYPE)vp8_read_bit(bc);
+ pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc);
+ }
+
+ // Is segmentation enabled
+ xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc);
+
+ if (xd->segmentation_enabled)
+ {
+ // Signal whether or not the segmentation map is being explicitly updated this frame.
+ xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc);
+ xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc);
+
+ if (xd->update_mb_segmentation_data)
+ {
+ xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc);
+
+ vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
+
+ // For each segmentation feature (Quant and loop filter level)
+ for (i = 0; i < MB_LVL_MAX; i++)
+ {
+ for (j = 0; j < MAX_MB_SEGMENTS; j++)
+ {
+ // Frame level data
+ if (vp8_read_bit(bc))
+ {
+ xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]);
+
+ if (vp8_read_bit(bc))
+ xd->segment_feature_data[i][j] = -xd->segment_feature_data[i][j];
+ }
+ else
+ xd->segment_feature_data[i][j] = 0;
+ }
+ }
+ }
+
+ if (xd->update_mb_segmentation_map)
+ {
+ // Which macro block level features are enabled
+ vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
+
+ // Read the probs used to decode the segment id for each macro block.
+ for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
+ {
+ // If not explicitly set value is defaulted to 255 by memset above
+ if (vp8_read_bit(bc))
+ xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8);
+ }
+ }
+ }
+
+ // Read the loop filter level and type
+ pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc);
+ pc->filter_level = vp8_read_literal(bc, 6);
+ pc->sharpness_level = vp8_read_literal(bc, 3);
+
+ // Read in loop filter deltas applied at the MB level based on mode or ref frame.
+ xd->mode_ref_lf_delta_update = 0;
+ xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc);
+
+ if (xd->mode_ref_lf_delta_enabled)
+ {
+ // Do the deltas need to be updated
+ xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc);
+
+ if (xd->mode_ref_lf_delta_update)
+ {
+ // Send update
+ for (i = 0; i < MAX_REF_LF_DELTAS; i++)
+ {
+ if (vp8_read_bit(bc))
+ {
+ //sign = vp8_read_bit( bc );
+ xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6);
+
+ if (vp8_read_bit(bc)) // Apply sign
+ xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1;
+ }
+ }
+
+ // Send update
+ for (i = 0; i < MAX_MODE_LF_DELTAS; i++)
+ {
+ if (vp8_read_bit(bc))
+ {
+ //sign = vp8_read_bit( bc );
+ xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6);
+
+ if (vp8_read_bit(bc)) // Apply sign
+ xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1;
+ }
+ }
+ }
+ }
+
+ setup_token_decoder(pbi, data + first_partition_length_in_bytes);
+ xd->current_bc = &pbi->bc2;
+
+ // Read the default quantizers.
+ {
+ int Q, q_update;
+
+ Q = vp8_read_literal(bc, 7); // AC 1st order Q = default
+ pc->base_qindex = Q;
+ q_update = 0;
+ pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update);
+ pc->y2dc_delta_q = get_delta_q(bc, pc->y2dc_delta_q, &q_update);
+ pc->y2ac_delta_q = get_delta_q(bc, pc->y2ac_delta_q, &q_update);
+ pc->uvdc_delta_q = get_delta_q(bc, pc->uvdc_delta_q, &q_update);
+ pc->uvac_delta_q = get_delta_q(bc, pc->uvac_delta_q, &q_update);
+
+ if (q_update)
+ vp8cx_init_de_quantizer(pbi);
+
+ // MB level dequantizer setup
+ mb_init_dequantizer(pbi, &pbi->mb);
+ }
+
+ // Determine if the golden frame or ARF buffer should be updated and how.
+ // For all non key frames the GF and ARF refresh flags and sign bias
+ // flags must be set explicitly.
+ if (pc->frame_type != KEY_FRAME)
+ {
+ // Should the GF or ARF be updated from the current frame
+ pc->refresh_golden_frame = vp8_read_bit(bc);
+ pc->refresh_alt_ref_frame = vp8_read_bit(bc);
+
+ // Buffer to buffer copy flags.
+ pc->copy_buffer_to_gf = 0;
+
+ if (!pc->refresh_golden_frame)
+ pc->copy_buffer_to_gf = vp8_read_literal(bc, 2);
+
+ pc->copy_buffer_to_arf = 0;
+
+ if (!pc->refresh_alt_ref_frame)
+ pc->copy_buffer_to_arf = vp8_read_literal(bc, 2);
+
+ pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp8_read_bit(bc);
+ pc->ref_frame_sign_bias[ALTREF_FRAME] = vp8_read_bit(bc);
+ }
+
+ pc->refresh_entropy_probs = vp8_read_bit(bc);
+ if (pc->refresh_entropy_probs == 0)
+ {
+ vpx_memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc));
+ }
+
+ pc->refresh_last_frame = pc->frame_type == KEY_FRAME || vp8_read_bit(bc);
+
+ if (0)
+ {
+ FILE *z = fopen("decodestats.stt", "a");
+ fprintf(z, "%6d F:%d,G:%d,A:%d,L:%d,Q:%d\n",
+ pc->current_video_frame,
+ pc->frame_type,
+ pc->refresh_golden_frame,
+ pc->refresh_alt_ref_frame,
+ pc->refresh_last_frame,
+ pc->base_qindex);
+ fclose(z);
+ }
+
+
+ vp8dx_bool_decoder_fill(bc);
+ {
+ // read coef probability tree
+
+ for (i = 0; i < BLOCK_TYPES; i++)
+ for (j = 0; j < COEF_BANDS; j++)
+ for (k = 0; k < PREV_COEF_CONTEXTS; k++)
+ for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++)
+ {
+
+ vp8_prob *const p = pc->fc.coef_probs [i][j][k] + l;
+
+ if (vp8_read(bc, vp8_coef_update_probs [i][j][k][l]))
+ {
+ *p = (vp8_prob)vp8_read_literal(bc, 8);
+
+ }
+ }
+ }
+
+ vpx_memcpy(&xd->pre, &pc->last_frame, sizeof(YV12_BUFFER_CONFIG));
+ vpx_memcpy(&xd->dst, &pc->new_frame, sizeof(YV12_BUFFER_CONFIG));
+
+ // set up frame new frame for intra coded blocks
+ vp8_setup_intra_recon(&pc->new_frame);
+
+ vp8_setup_block_dptrs(xd);
+
+ vp8_build_block_doffsets(xd);
+
+ // clear out the coeff buffer
+ vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
+
+ // Read the mb_no_coeff_skip flag
+ pc->mb_no_coeff_skip = (int)vp8_read_bit(bc);
+
+ if (pc->frame_type == KEY_FRAME)
+ vp8_kfread_modes(pbi);
+ else
+ vp8_decode_mode_mvs(pbi);
+
+ // reset since these guys are used as iterators
+ vpx_memset(pc->above_context[Y1CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 4);
+ vpx_memset(pc->above_context[UCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
+ vpx_memset(pc->above_context[VCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
+ vpx_memset(pc->above_context[Y2CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols);
+
+ xd->gf_active_ptr = (signed char *)pc->gf_active_flags; // Point to base of GF active flags data structure
+
+
+ vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));
+
+
+ if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
+ vp8_start_lfthread(pbi);
+
+ if (pbi->b_multithreaded_rd && pbi->common.multi_token_partition != ONE_PARTITION)
+ {
+ vp8_mtdecode_mb_rows(pbi, xd);
+ }
+ else
+ {
+ int ibc = 0;
+ int num_part = 1 << pbi->common.multi_token_partition;
+
+ // Decode the individual macro block
+ for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
+ {
+
+ if (num_part > 1)
+ {
+ xd->current_bc = & pbi->mbc[ibc];
+ ibc++;
+
+ if (ibc == num_part)
+ ibc = 0;
+ }
+
+ vp8_decode_mb_row(pbi, pc, mb_row, xd);
+ }
+
+ pbi->last_mb_row_decoded = mb_row;
+ }
+
+
+ stop_token_decoder(pbi);
+
+ vp8dx_stop_decode(bc);
+
+ // vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos);
+
+ // If this was a kf or Gf note the Q used
+ if ((pc->frame_type == KEY_FRAME) || (pc->refresh_golden_frame) || pbi->common.refresh_alt_ref_frame)
+ pc->last_kf_gf_q = pc->base_qindex;
+
+ if (pc->refresh_entropy_probs == 0)
+ {
+ vpx_memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc));
+ }
+
+#ifdef PACKET_TESTING
+ {
+ FILE *f = fopen("decompressor.VP8", "ab");
+ unsigned int size = pbi->bc2.pos + pbi->bc.pos + 8;
+ fwrite((void *) &size, 4, 1, f);
+ fwrite((void *) pbi->Source, size, 1, f);
+ fclose(f);
+ }
+#endif
+
+ return 0;
+}
diff --git a/vp8/decoder/demode.c b/vp8/decoder/demode.c
new file mode 100644
index 0000000..fd05e6d
--- /dev/null
+++ b/vp8/decoder/demode.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "onyxd_int.h"
+#include "entropymode.h"
+#include "findnearmv.h"
+
+
+int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
+
+ return i;
+}
+
+
+int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_ymode_tree, p);
+
+ return i;
+}
+
+int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p);
+
+ return i;
+}
+
+
+
+int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
+{
+ const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p);
+
+ return i;
+}
+
+void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
+{
+ // Is segmentation enabled
+ if (x->segmentation_enabled && x->update_mb_segmentation_map)
+ {
+ // If so then read the segment id.
+ if (vp8_read(r, x->mb_segment_tree_probs[0]))
+ mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
+ else
+ mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
+ }
+}
+
+void vp8_kfread_modes(VP8D_COMP *pbi)
+{
+ VP8_COMMON *const cp = & pbi->common;
+ vp8_reader *const bc = & pbi->bc;
+
+ MODE_INFO *m = cp->mi;
+ const int ms = cp->mode_info_stride;
+
+ int mb_row = -1;
+ vp8_prob prob_skip_false = 0;
+
+ if (cp->mb_no_coeff_skip)
+ prob_skip_false = (vp8_prob)(vp8_read_literal(bc, 8));
+
+ while (++mb_row < cp->mb_rows)
+ {
+ int mb_col = -1;
+
+ while (++mb_col < cp->mb_cols)
+ {
+ MB_PREDICTION_MODE y_mode;
+
+ vp8dx_bool_decoder_fill(bc);
+ // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
+ // By default on a key frame reset all MBs to segment 0
+ m->mbmi.segment_id = 0;
+
+ if (pbi->mb.update_mb_segmentation_map)
+ vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
+
+ // Read the macroblock coeff skip flag if this feature is in use, else default to 0
+ if (cp->mb_no_coeff_skip)
+ m->mbmi.mb_skip_coeff = vp8_read(bc, prob_skip_false);
+ else
+ m->mbmi.mb_skip_coeff = 0;
+
+ y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, cp->kf_ymode_prob);
+
+ m->mbmi.ref_frame = INTRA_FRAME;
+
+ if ((m->mbmi.mode = y_mode) == B_PRED)
+ {
+ int i = 0;
+
+ do
+ {
+ const B_PREDICTION_MODE A = vp8_above_bmi(m, i, ms)->mode;
+ const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode;
+
+ m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, cp->kf_bmode_prob [A] [L]);
+ }
+ while (++i < 16);
+ }
+ else
+ {
+ int BMode;
+ int i = 0;
+
+ switch (y_mode)
+ {
+ case DC_PRED:
+ BMode = B_DC_PRED;
+ break;
+ case V_PRED:
+ BMode = B_VE_PRED;
+ break;
+ case H_PRED:
+ BMode = B_HE_PRED;
+ break;
+ case TM_PRED:
+ BMode = B_TM_PRED;
+ break;
+ default:
+ BMode = B_DC_PRED;
+ break;
+ }
+
+ do
+ {
+ m->bmi[i].mode = (B_PREDICTION_MODE)BMode;
+ }
+ while (++i < 16);
+ }
+
+ (m++)->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, cp->kf_uv_mode_prob);
+ }
+
+ m++; // skip the border
+ }
+}
diff --git a/vp8/decoder/demode.h b/vp8/decoder/demode.h
new file mode 100644
index 0000000..51bbc5e
--- /dev/null
+++ b/vp8/decoder/demode.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "onyxd_int.h"
+
+/* Read (intra) modes for all blocks in a keyframe */
+
+void vp8_kfread_modes(VP8D_COMP *pbi);
+
+/* Intra mode for a Y subblock */
+
+int vp8_read_bmode(vp8_reader *, const vp8_prob *);
+
+/* MB intra Y mode trees differ for key and inter frames. */
+
+int vp8_read_ymode(vp8_reader *, const vp8_prob *);
+int vp8_kfread_ymode(vp8_reader *, const vp8_prob *);
+
+/* MB intra UV mode trees are the same for key and inter frames. */
+
+int vp8_read_uv_mode(vp8_reader *, const vp8_prob *);
+
+/* Read any macroblock-level features that may be present. */
+
+void vp8_read_mb_features(vp8_reader *, MB_MODE_INFO *, MACROBLOCKD *);
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
new file mode 100644
index 0000000..14798d9
--- /dev/null
+++ b/vp8/decoder/dequantize.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "dequantize.h"
+#include "predictdc.h"
+#include "idct.h"
+#include "vpx_mem/vpx_mem.h"
+
+extern void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) ;
+extern void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch);
+
+
+void vp8_dequantize_b_c(BLOCKD *d)
+{
+ int i;
+ short *DQ = d->dqcoeff;
+ short *Q = d->qcoeff;
+ short *DQC = &d->dequant[0][0];
+
+ for (i = 0; i < 16; i++)
+ {
+ DQ[i] = Q[i] * DQC[i];
+ }
+}
+
+void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ input[i] = dq[i] * input[i];
+ }
+
+ vp8_short_idct4x4llm_c(input, output, pitch);
+ vpx_memset(input, 0, 32);
+}
+
+void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc)
+{
+ int i;
+
+ input[0] = (short)Dc;
+
+ for (i = 1; i < 16; i++)
+ {
+ input[i] = dq[i] * input[i];
+ }
+
+ vp8_short_idct4x4llm_c(input, output, pitch);
+ vpx_memset(input, 0, 32);
+}
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
new file mode 100644
index 0000000..d16b02e
--- /dev/null
+++ b/vp8/decoder/dequantize.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#ifndef DEQUANTIZE_H
+#define DEQUANTIZE_H
+#include "blockd.h"
+
+#define prototype_dequant_block(sym) \
+ void sym(BLOCKD *x)
+
+#define prototype_dequant_idct(sym) \
+ void sym(short *input, short *dq, short *output, int pitch)
+
+#define prototype_dequant_idct_dc(sym) \
+ void sym(short *input, short *dq, short *output, int pitch, int dc)
+
+#if ARCH_X86 || ARCH_X86_64
+#include "x86/dequantize_x86.h"
+#endif
+
+#if ARCH_ARM
+#include "arm/dequantize_arm.h"
+#endif
+
+#ifndef vp8_dequant_block
+#define vp8_dequant_block vp8_dequantize_b_c
+#endif
+extern prototype_dequant_block(vp8_dequant_block);
+
+#ifndef vp8_dequant_idct
+#define vp8_dequant_idct vp8_dequant_idct_c
+#endif
+extern prototype_dequant_idct(vp8_dequant_idct);
+
+#ifndef vp8_dequant_idct_dc
+#define vp8_dequant_idct_dc vp8_dequant_dc_idct_c
+#endif
+extern prototype_dequant_idct_dc(vp8_dequant_idct_dc);
+
+
+typedef prototype_dequant_block((*vp8_dequant_block_fn_t));
+typedef prototype_dequant_idct((*vp8_dequant_idct_fn_t));
+typedef prototype_dequant_idct_dc((*vp8_dequant_idct_dc_fn_t));
+typedef struct
+{
+ vp8_dequant_block_fn_t block;
+ vp8_dequant_idct_fn_t idct;
+ vp8_dequant_idct_dc_fn_t idct_dc;
+} vp8_dequant_rtcd_vtable_t;
+
+#if CONFIG_RUNTIME_CPU_DETECT
+#define DEQUANT_INVOKE(ctx,fn) (ctx)->fn
+#else
+#define DEQUANT_INVOKE(ctx,fn) vp8_dequant_##fn
+#endif
+
+#endif
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
new file mode 100644
index 0000000..a42f18d
--- /dev/null
+++ b/vp8/decoder/detokenize.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "type_aliases.h"
+#include "blockd.h"
+#include "onyxd_int.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
+
+#define BR_COUNT 8
+#define BOOL_DATA UINT8
+
+#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
+DECLARE_ALIGNED(16, UINT16, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
+#define EOB_CONTEXT_NODE 0
+#define ZERO_CONTEXT_NODE 1
+#define ONE_CONTEXT_NODE 2
+#define LOW_VAL_CONTEXT_NODE 3
+#define TWO_CONTEXT_NODE 4
+#define THREE_CONTEXT_NODE 5
+#define HIGH_LOW_CONTEXT_NODE 6
+#define CAT_ONE_CONTEXT_NODE 7
+#define CAT_THREEFOUR_CONTEXT_NODE 8
+#define CAT_THREE_CONTEXT_NODE 9
+#define CAT_FIVE_CONTEXT_NODE 10
+
+/*
+//the definition is put in "onyxd_int.h"
+typedef struct
+{
+ INT16 min_val;
+ INT16 Length;
+ UINT8 Probs[12];
+} TOKENEXTRABITS;
+*/
+
+DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
+{
+ { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN
+ { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN
+ { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN
+ { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN
+ { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN
+ { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1
+ { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2
+ { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3
+ { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4
+ { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5
+ { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, //DCT_VAL_CATEGORY6
+ { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN
+};
+
+
+void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
+{
+ ENTROPY_CONTEXT **const A = x->above_context;
+ ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+
+ ENTROPY_CONTEXT *a;
+ ENTROPY_CONTEXT *l;
+ int i;
+
+ for (i = 0; i < 24; i++)
+ {
+
+ a = A[ vp8_block2context[i] ] + vp8_block2above[i];
+ l = L[ vp8_block2context[i] ] + vp8_block2left[i];
+
+ *a = *l = 0;
+ }
+
+ if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ {
+ a = A[Y2CONTEXT] + vp8_block2above[24];
+ l = L[Y2CONTEXT] + vp8_block2left[24];
+ *a = *l = 0;
+ }
+
+
+}
+DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+#define NORMALIZE \
+ /*if(range < 0x80)*/ \
+ { \
+ shift = vp8dx_bitreader_norm[range]; \
+ range <<= shift; \
+ value <<= shift; \
+ count -= shift; \
+ if(count <= 0) \
+ { \
+ count += BR_COUNT ; \
+ value |= (*bufptr) << (BR_COUNT-count); \
+ bufptr = br_ptr_advance(bufptr, 1); \
+ } \
+ }
+
+#define DECODE_AND_APPLYSIGN(value_to_sign) \
+ split = (range + 1) >> 1; \
+ if ( (value >> 8) < split ) \
+ { \
+ range = split; \
+ v= value_to_sign; \
+ } \
+ else \
+ { \
+ range = range-split; \
+ value = value-(split<<8); \
+ v = -value_to_sign; \
+ } \
+ range +=range; \
+ value +=value; \
+ if (!--count) \
+ { \
+ count = BR_COUNT; \
+ value |= *bufptr; \
+ bufptr = br_ptr_advance(bufptr, 1); \
+ }
+
+#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
+ { \
+ split = 1 + ((( probability*(range-1) ) )>> 8); \
+ if ( (value >> 8) < split ) \
+ { \
+ range = split; \
+ NORMALIZE \
+ goto branch; \
+ } \
+ value -= (split<<8); \
+ range = range - split; \
+ NORMALIZE \
+ }
+
+#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
+ { \
+ split = 1 + ((( probability*(range-1) ) ) >> 8); \
+ if ( (value >> 8) < split ) \
+ { \
+ range = split; \
+ NORMALIZE \
+ Prob = coef_probs; \
+ if(c<15) {\
+ ++c; \
+ Prob += vp8_coef_bands_x[c]; \
+ goto branch; \
+ } goto BLOCK_FINISHED; /*for malformed input */\
+ } \
+ value -= (split<<8); \
+ range = range - split; \
+ NORMALIZE \
+ }
+
+#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
+ DECODE_AND_APPLYSIGN(val) \
+ Prob = coef_probs + (ENTROPY_NODES*2); \
+ if(c < 15){\
+ qcoeff_ptr [ scan[c] ] = (INT16) v; \
+ ++c; \
+ goto DO_WHILE; }\
+ qcoeff_ptr [ scan[15] ] = (INT16) v; \
+ goto BLOCK_FINISHED;
+
+
+#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
+ split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
+ if(value >= (split<<8))\
+ {\
+ range = range-split;\
+ value = value-(split<<8);\
+ val += ((UINT16)1<<bits_count);\
+ }\
+ else\
+ {\
+ range = split;\
+ }\
+ NORMALIZE
+
+int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
+{
+ ENTROPY_CONTEXT **const A = x->above_context;
+ ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+ const VP8_COMMON *const oc = & dx->common;
+
+ BOOL_DECODER *bc = x->current_bc;
+
+ ENTROPY_CONTEXT *a;
+ ENTROPY_CONTEXT *l;
+ int i;
+
+ int eobtotal = 0;
+
+ register int count;
+
+ const BOOL_DATA *bufptr;
+ register unsigned int range;
+ register unsigned int value;
+ const int *scan;
+ register unsigned int shift;
+ UINT32 split;
+ INT16 *qcoeff_ptr;
+
+ const vp8_prob *coef_probs;
+ int type;
+ int stop;
+ INT16 val, bits_count;
+ INT16 c;
+ INT16 t;
+ INT16 v;
+ const vp8_prob *Prob;
+
+ //int *scan;
+ type = 3;
+ i = 0;
+ stop = 16;
+
+ if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+ {
+ i = 24;
+ stop = 24;
+ type = 1;
+ qcoeff_ptr = &x->qcoeff[24*16];
+ scan = vp8_default_zig_zag1d;
+ eobtotal -= 16;
+ }
+ else
+ {
+ scan = vp8_default_zig_zag1d;
+ qcoeff_ptr = &x->qcoeff[0];
+ }
+
+ count = bc->count;
+ range = bc->range;
+ value = bc->value;
+ bufptr = bc->read_ptr;
+
+
+ coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
+
+BLOCK_LOOP:
+ a = A[ vp8_block2context[i] ] + vp8_block2above[i];
+ l = L[ vp8_block2context[i] ] + vp8_block2left[i];
+ c = (INT16)(!type);
+
+ VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
+ Prob = coef_probs;
+ Prob += t * ENTROPY_NODES;
+
+DO_WHILE:
+ Prob += vp8_coef_bands_x[c];
+ DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
+
+CHECK_0_:
+ DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_);
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val;
+ bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length;
+
+ do
+ {
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count);
+ bits_count -- ;
+ }
+ while (bits_count >= 0);
+
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+CAT_FIVE_CONTEXT_NODE_0_:
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val;
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+CAT_THREEFOUR_CONTEXT_NODE_0_:
+ DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_);
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val;
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+CAT_THREE_CONTEXT_NODE_0_:
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val;
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+HIGH_LOW_CONTEXT_NODE_0_:
+ DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_);
+
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val;
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1);
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+CAT_ONE_CONTEXT_NODE_0_:
+ val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val;
+ DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
+
+LOW_VAL_CONTEXT_NODE_0_:
+ DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
+ DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
+
+THREE_CONTEXT_NODE_0_:
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
+
+TWO_CONTEXT_NODE_0_:
+ DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
+
+ONE_CONTEXT_NODE_0_:
+ DECODE_AND_APPLYSIGN(1);
+ Prob = coef_probs + ENTROPY_NODES;
+
+ if (c < 15)
+ {
+ qcoeff_ptr [ scan[c] ] = (INT16) v;
+ ++c;
+ goto DO_WHILE;
+ }
+
+ qcoeff_ptr [ scan[15] ] = (INT16) v;
+BLOCK_FINISHED:
+ t = ((x->block[i].eob = c) != !type); // any nonzero data?
+ eobtotal += x->block[i].eob;
+ *a = *l = t;
+ qcoeff_ptr += 16;
+
+ i++;
+
+ if (i < stop)
+ goto BLOCK_LOOP;
+
+ if (i == 25)
+ {
+ scan = vp8_default_zig_zag1d;//x->scan_order1d;
+ type = 0;
+ i = 0;
+ stop = 16;
+ coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
+ qcoeff_ptr = &x->qcoeff[0];
+ goto BLOCK_LOOP;
+ }
+
+ if (i == 16)
+ {
+ type = 2;
+ coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
+ stop = 24;
+ goto BLOCK_LOOP;
+ }
+
+ bc->count = count;
+ bc->value = value;
+ bc->range = range;
+ bc->read_ptr = bufptr;
+ return eobtotal;
+
+}
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
new file mode 100644
index 0000000..6a9a476
--- /dev/null
+++ b/vp8/decoder/detokenize.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#ifndef detokenize_h
+#define detokenize_h 1
+
+#include "onyxd_int.h"
+
+void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
+int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
+
+#endif /* detokenize_h */
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
new file mode 100644
index 0000000..302b64b
--- /dev/null
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "dequantize.h"
+#include "onyxd_int.h"
+
+extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
+
+void vp8_dmachine_specific_config(VP8D_COMP *pbi)
+{
+ // Pure C:
+#if CONFIG_RUNTIME_CPU_DETECT
+ pbi->mb.rtcd = &pbi->common.rtcd;
+ pbi->dequant.block = vp8_dequantize_b_c;
+ pbi->dequant.idct = vp8_dequant_idct_c;
+ pbi->dequant.idct_dc = vp8_dequant_dc_idct_c;
+ pbi->dboolhuff.start = vp8dx_start_decode_c;
+ pbi->dboolhuff.stop = vp8dx_stop_decode_c;
+ pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
+#if 0 //For use with RTCD, when implemented
+ pbi->dboolhuff.debool = vp8dx_decode_bool_c;
+ pbi->dboolhuff.devalue = vp8dx_decode_value_c;
+#endif
+#endif
+
+#if ARCH_X86 || ARCH_X86_64
+ vp8_arch_x86_decode_init(pbi);
+#endif
+}
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
new file mode 100644
index 0000000..6875585
--- /dev/null
+++ b/vp8/decoder/onyxd_if.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "onyxc_int.h"
+#if CONFIG_POSTPROC
+#include "postproc.h"
+#endif
+#include "onyxd.h"
+#include "onyxd_int.h"
+#include "vpx_mem/vpx_mem.h"
+#include "alloccommon.h"
+#include "vpx_scale/yv12extend.h"
+#include "loopfilter.h"
+#include "swapyv12buffer.h"
+#include "g_common.h"
+#include "threading.h"
+#include "decoderthreading.h"
+#include <stdio.h>
+#include "segmentation_common.h"
+#include "quant_common.h"
+#include "vpx_scale/vpxscale.h"
+#include "systemdependent.h"
+#include "vpx_ports/vpx_timer.h"
+
+
+extern void vp8_init_loop_filter(VP8_COMMON *cm);
+
+extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
+
+// DEBUG code
+#if CONFIG_DEBUG
+void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
+{
+ FILE *yuv_file = fopen((char *)name, "ab");
+ unsigned char *src = s->y_buffer;
+ int h = s->y_height;
+
+ do
+ {
+ fwrite(src, s->y_width, 1, yuv_file);
+ src += s->y_stride;
+ }
+ while (--h);
+
+ src = s->u_buffer;
+ h = s->uv_height;
+
+ do
+ {
+ fwrite(src, s->uv_width, 1, yuv_file);
+ src += s->uv_stride;
+ }
+ while (--h);
+
+ src = s->v_buffer;
+ h = s->uv_height;
+
+ do
+ {
+ fwrite(src, s->uv_width, 1, yuv_file);
+ src += s->uv_stride;
+ }
+ while (--h);
+
+ fclose(yuv_file);
+}
+#endif
+
+void vp8dx_initialize()
+{
+ static int init_done = 0;
+
+ if (!init_done)
+ {
+ vp8_initialize_common();
+ vp8_scale_machine_specific_config();
+ init_done = 1;
+ }
+}
+
+
+VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
+{
+ VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));
+
+ if (!pbi)
+ return NULL;
+
+ vpx_memset(pbi, 0, sizeof(VP8D_COMP));
+
+ if (setjmp(pbi->common.error.jmp))
+ {
+ pbi->common.error.setjmp = 0;
+ vp8dx_remove_decompressor(pbi);
+ return 0;
+ }
+
+ pbi->common.error.setjmp = 1;
+ vp8dx_initialize();
+
+ vp8_create_common(&pbi->common);
+ vp8_dmachine_specific_config(pbi);
+
+ pbi->common.current_video_frame = 0;
+ pbi->ready_for_new_data = 1;
+
+ pbi->CPUFreq = 0; //vp8_get_processor_freq();
+ pbi->max_threads = oxcf->max_threads;
+ vp8_decoder_create_threads(pbi);
+
+ //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
+ // unnecessary calling of vp8cx_init_de_quantizer() for every frame.
+ vp8cx_init_de_quantizer(pbi);
+
+ {
+ VP8_COMMON *cm = &pbi->common;
+
+ vp8_init_loop_filter(cm);
+ cm->last_frame_type = KEY_FRAME;
+ cm->last_filter_type = cm->filter_type;
+ cm->last_sharpness_level = cm->sharpness_level;
+ }
+
+ pbi->common.error.setjmp = 0;
+ return (VP8D_PTR) pbi;
+}
+
+
+void vp8dx_remove_decompressor(VP8D_PTR ptr)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) ptr;
+
+ if (!pbi)
+ return;
+
+ vp8_decoder_remove_threads(pbi);
+ vp8_remove_common(&pbi->common);
+ vpx_free(pbi);
+}
+
+
+void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) comp;
+
+ (void) pbi;
+ (void) x;
+
+ switch (oxst)
+ {
+ case VP8D_OK:
+ break;
+ }
+}
+
+int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) comp;
+
+ (void) pbi;
+
+ switch (oxst)
+ {
+ case VP8D_OK:
+ break;
+ }
+
+ return -1;
+}
+
+int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) ptr;
+ VP8_COMMON *cm = &pbi->common;
+
+ if (ref_frame_flag == VP8_LAST_FLAG)
+ vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
+
+ else if (ref_frame_flag == VP8_GOLD_FLAG)
+ vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
+
+ else if (ref_frame_flag == VP8_ALT_FLAG)
+ vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
+
+ else
+ return -1;
+
+ return 0;
+}
+int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) ptr;
+ VP8_COMMON *cm = &pbi->common;
+
+ if (ref_frame_flag == VP8_LAST_FLAG)
+ vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
+
+ else if (ref_frame_flag == VP8_GOLD_FLAG)
+ vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
+
+ else if (ref_frame_flag == VP8_ALT_FLAG)
+ vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
+
+ else
+ return -1;
+
+ return 0;
+}
+
+//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.
+#if HAVE_ARMV7
+extern void vp8_push_neon(INT64 *store);
+extern void vp8_pop_neon(INT64 *store);
+static INT64 dx_store_reg[8];
+#endif
+int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) ptr;
+ VP8_COMMON *cm = &pbi->common;
+ int retcode = 0;
+
+ struct vpx_usec_timer timer;
+
+// if(pbi->ready_for_new_data == 0)
+// return -1;
+
+ if (ptr == 0)
+ {
+ return -1;
+ }
+
+ pbi->common.error.error_code = VPX_CODEC_OK;
+
+ if (setjmp(pbi->common.error.jmp))
+ {
+ pbi->common.error.setjmp = 0;
+ return -1;
+ }
+
+ pbi->common.error.setjmp = 1;
+
+#if HAVE_ARMV7
+ vp8_push_neon(dx_store_reg);
+#endif
+
+ vpx_usec_timer_start(&timer);
+
+ //cm->current_video_frame++;
+ pbi->Source = source;
+ pbi->source_sz = size;
+
+ retcode = vp8_decode_frame(pbi);
+
+ if (retcode < 0)
+ {
+#if HAVE_ARMV7
+ vp8_pop_neon(dx_store_reg);
+#endif
+ pbi->common.error.error_code = VPX_CODEC_ERROR;
+ pbi->common.error.setjmp = 0;
+ return retcode;
+ }
+
+ // Update the GF useage maps.
+ vp8_update_gf_useage_maps(cm, &pbi->mb);
+
+ if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
+ vp8_stop_lfthread(pbi);
+
+ if (cm->refresh_last_frame)
+ {
+ vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
+
+ cm->frame_to_show = &cm->last_frame;
+ }
+ else
+ {
+ cm->frame_to_show = &cm->new_frame;
+ }
+
+ if (!pbi->b_multithreaded_lf)
+ {
+ struct vpx_usec_timer lpftimer;
+ vpx_usec_timer_start(&lpftimer);
+ // Apply the loop filter if appropriate.
+
+ if (cm->filter_level > 0)
+ {
+ vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
+ cm->last_frame_type = cm->frame_type;
+ cm->last_filter_type = cm->filter_type;
+ cm->last_sharpness_level = cm->sharpness_level;
+
+ }
+
+ vpx_usec_timer_mark(&lpftimer);
+ pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+ }
+
+ vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
+
+#if 0
+ // DEBUG code
+ //vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+ if (cm->current_video_frame <= 5)
+ write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
+#endif
+
+ // If any buffer copy / swaping is signalled it should be done here.
+ if (cm->copy_buffer_to_arf)
+ {
+ if (cm->copy_buffer_to_arf == 1)
+ {
+ if (cm->refresh_last_frame)
+ vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
+ else
+ vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
+ }
+ else if (cm->copy_buffer_to_arf == 2)
+ vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
+ }
+
+ if (cm->copy_buffer_to_gf)
+ {
+ if (cm->copy_buffer_to_gf == 1)
+ {
+ if (cm->refresh_last_frame)
+ vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
+ else
+ vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+ }
+ else if (cm->copy_buffer_to_gf == 2)
+ vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
+ }
+
+ // Should the golden or alternate reference frame be refreshed?
+ if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
+ {
+ if (cm->refresh_golden_frame)
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
+
+ if (cm->refresh_alt_ref_frame)
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
+
+ //vpx_log("Decoder: recovery frame received \n");
+
+ // Update data structures that monitors GF useage
+ vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+ cm->gf_active_count = cm->mb_rows * cm->mb_cols;
+ }
+
+ vp8_clear_system_state();
+
+ vpx_usec_timer_mark(&timer);
+ pbi->decode_microseconds = vpx_usec_timer_elapsed(&timer);
+
+ pbi->time_decoding += pbi->decode_microseconds;
+
+// vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);
+
+ if (cm->show_frame)
+ cm->current_video_frame++;
+
+ pbi->ready_for_new_data = 0;
+ pbi->last_time_stamp = time_stamp;
+
+#if 0
+ {
+ int i;
+ INT64 earliest_time = pbi->dr[0].time_stamp;
+ INT64 latest_time = pbi->dr[0].time_stamp;
+ INT64 time_diff = 0;
+ int bytes = 0;
+
+ pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;;
+ pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp;
+
+ for (i = 0; i < 16; i++)
+ {
+
+ bytes += pbi->dr[i].size;
+
+ if (pbi->dr[i].time_stamp < earliest_time)
+ earliest_time = pbi->dr[i].time_stamp;
+
+ if (pbi->dr[i].time_stamp > latest_time)
+ latest_time = pbi->dr[i].time_stamp;
+ }
+
+ time_diff = latest_time - earliest_time;
+
+ if (time_diff > 0)
+ {
+ pbi->common.bitrate = 80000.00 * bytes / time_diff ;
+ pbi->common.framerate = 160000000.00 / time_diff ;
+ }
+
+ }
+#endif
+
+#if HAVE_ARMV7
+ vp8_pop_neon(dx_store_reg);
+#endif
+ pbi->common.error.setjmp = 0;
+ return retcode;
+}
+int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags)
+{
+ int ret = -1;
+ VP8D_COMP *pbi = (VP8D_COMP *) ptr;
+
+ if (pbi->ready_for_new_data == 1)
+ return ret;
+
+ // ie no raw frame to show!!!
+ if (pbi->common.show_frame == 0)
+ return ret;
+
+ pbi->ready_for_new_data = 1;
+ *time_stamp = pbi->last_time_stamp;
+ *time_end_stamp = 0;
+
+ sd->clrtype = pbi->common.clr_type;
+#if CONFIG_POSTPROC
+ ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
+#else
+
+ if (pbi->common.frame_to_show)
+ {
+ *sd = *pbi->common.frame_to_show;
+ sd->y_width = pbi->common.Width;
+ sd->y_height = pbi->common.Height;
+ sd->uv_height = pbi->common.Height / 2;
+ ret = 0;
+ }
+ else
+ {
+ ret = -1;
+ }
+
+#endif //!CONFIG_POSTPROC
+ vp8_clear_system_state();
+ return ret;
+}
diff --git a/vp8/decoder/onyxd_if_sjl.c b/vp8/decoder/onyxd_if_sjl.c
new file mode 100644
index 0000000..363ad5d
--- /dev/null
+++ b/vp8/decoder/onyxd_if_sjl.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "onyxc_int.h"
+#include "postproc.h"
+#include "onyxd.h"
+#include "onyxd_int.h"
+#include "vpx_mem/vpx_mem.h"
+#include "alloccommon.h"
+#include "vpx_scale/yv12extend.h"
+#include "loopfilter.h"
+#include "swapyv12buffer.h"
+#include "g_common.h"
+#include "threading.h"
+#include "decoderthreading.h"
+#include <stdio.h>
+#include "segmentation_common.h"
+#include "quant_common.h"
+#include "vpx_scale/vpxscale.h"
+#include "systemdependent.h"
+#include "vpx_ports/vpx_timer.h"
+
+
+#ifndef VPX_NO_GLOBALS
+static int init_ct = 0;
+#else
+# include "vpx_global_handling.h"
+# define init_ct ((int)vpxglobalm(onyxd,init_ct))
+#endif
+
+extern void vp8_init_loop_filter(VP8_COMMON *cm);
+
+extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
+extern void init_detokenizer(VP8D_COMP *dx);
+
+// DEBUG code
+void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
+{
+ FILE *yuv_file = fopen((char *)name, "ab");
+ unsigned char *src = s->y_buffer;
+ int h = s->y_height;
+
+ do
+ {
+ fwrite(src, s->y_width, 1, yuv_file);
+ src += s->y_stride;
+ }
+ while (--h);
+
+ src = s->u_buffer;
+ h = s->uv_height;
+
+ do
+ {
+ fwrite(src, s->uv_width, 1, yuv_file);
+ src += s->uv_stride;
+ }
+ while (--h);
+
+ src = s->v_buffer;
+ h = s->uv_height;
+
+ do
+ {
+ fwrite(src, s->uv_width, 1, yuv_file);
+ src += s->uv_stride;
+ }
+ while (--h);
+
+ fclose(yuv_file);
+}
+
+void vp8dx_initialize()
+{
+ if (!init_ct++)
+ {
+ vp8_initialize_common();
+ vp8_scale_machine_specific_config();
+ }
+}
+
+void vp8dx_shutdown()
+{
+ if (!--init_ct)
+ {
+ vp8_shutdown_common();
+ }
+}
+
+
+VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
+{
+ VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));
+
+ if (!pbi)
+ return NULL;
+
+ vpx_memset(pbi, 0, sizeof(VP8D_COMP));
+
+ vp8dx_initialize();
+
+ vp8_create_common(&pbi->common);
+ vp8_dmachine_specific_config(pbi);
+
+ pbi->common.current_video_frame = 0;
+ pbi->ready_for_new_data = 1;
+
+ pbi->CPUFreq = 0; //vp8_get_processor_freq();
+ pbi->max_threads = oxcf->max_threads;
+ vp8_decoder_create_threads(pbi);
+
+ //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
+ // unnecessary calling of vp8cx_init_de_quantizer() for every frame.
+ vp8cx_init_de_quantizer(pbi);
+
+ {
+ VP8_COMMON *cm = &pbi->common;
+
+ vp8_init_loop_filter(cm);
+ cm->last_frame_type = KEY_FRAME;
+ cm->last_filter_type = cm->filter_type;
+ cm->last_sharpness_level = cm->sharpness_level;
+ }
+
+ init_detokenizer(pbi);
+
+ return (VP8D_PTR) pbi;
+}
+void vp8dx_remove_decompressor(VP8D_PTR ptr)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) ptr;
+
+ if (!pbi)
+ return;
+
+ vp8_decoder_remove_threads(pbi);
+ vp8_remove_common(&pbi->common);
+ vpx_free(pbi);
+ vp8dx_shutdown();
+
+}
+
+void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) comp;
+
+ (void) pbi;
+ (void) x;
+
+ switch (oxst)
+ {
+ case VP8D_OK:
+ break;
+ }
+}
+
+int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) comp;
+
+ (void) pbi;
+
+ switch (oxst)
+ {
+ case VP8D_OK:
+ break;
+ }
+
+ return -1;
+}
+
+int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) ptr;
+ VP8_COMMON *cm = &pbi->common;
+
+ if (ref_frame_flag == VP8_LAST_FLAG)
+ vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
+
+ else if (ref_frame_flag == VP8_GOLD_FLAG)
+ vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
+
+ else if (ref_frame_flag == VP8_ALT_FLAG)
+ vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
+
+ else
+ return -1;
+
+ return 0;
+}
+int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) ptr;
+ VP8_COMMON *cm = &pbi->common;
+
+ if (ref_frame_flag == VP8_LAST_FLAG)
+ vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
+
+ else if (ref_frame_flag == VP8_GOLD_FLAG)
+ vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
+
+ else if (ref_frame_flag == VP8_ALT_FLAG)
+ vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
+
+ else
+ return -1;
+
+ return 0;
+}
+int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, char *source, INT64 time_stamp)
+{
+ VP8D_COMP *pbi = (VP8D_COMP *) ptr;
+ VP8_COMMON *cm = &pbi->common;
+ int retcode = 0;
+
+ struct vpx_usec_timer timer;
+ (void) size;
+
+// if(pbi->ready_for_new_data == 0)
+// return -1;
+
+ vpx_usec_timer_start(&timer);
+
+ if (ptr == 0)
+ {
+ return -1;
+ }
+
+ //cm->current_video_frame++;
+ pbi->Source = source;
+
+ retcode = vp8_decode_frame(pbi);
+
+ if (retcode < 0)
+ return retcode;
+
+ // Update the GF useage maps.
+ vp8_update_gf_useage_maps(cm, &pbi->mb);
+
+ if (pbi->b_multithreaded)
+ vp8_stop_lfthread(pbi);
+
+ if (cm->refresh_last_frame)
+ {
+ vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
+
+ cm->frame_to_show = &cm->last_frame;
+ }
+ else
+ {
+ cm->frame_to_show = &cm->new_frame;
+ }
+
+ if (!pbi->b_multithreaded)
+ {
+ struct vpx_usec_timer lpftimer;
+ vpx_usec_timer_start(&lpftimer);
+ // Apply the loop filter if appropriate.
+
+ if (cm->filter_level > 0)
+ {
+ vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
+ cm->last_frame_type = cm->frame_type;
+ cm->last_filter_type = cm->filter_type;
+ cm->last_sharpness_level = cm->sharpness_level;
+
+ }
+
+ vpx_usec_timer_mark(&lpftimer);
+ pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+ }
+
+ vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
+
+#if 0
+ // DEBUG code
+ //vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+ if (cm->current_video_frame <= 5)
+ write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
+#endif
+
+ // If any buffer copy / swaping is signalled it should be done here.
+ if (cm->copy_buffer_to_arf)
+ {
+ if (cm->copy_buffer_to_arf == 1)
+ {
+ if (cm->refresh_last_frame)
+ vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
+ else
+ vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
+ }
+ else if (cm->copy_buffer_to_arf == 2)
+ vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
+ }
+
+ if (cm->copy_buffer_to_gf)
+ {
+ if (cm->copy_buffer_to_gf == 1)
+ {
+ if (cm->refresh_last_frame)
+ vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
+ else
+ vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+ }
+ else if (cm->copy_buffer_to_gf == 2)
+ vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
+ }
+
+ // Should the golden or alternate reference frame be refreshed?
+ if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
+ {
+ if (cm->refresh_golden_frame)
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
+
+ if (cm->refresh_alt_ref_frame)
+ vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
+
+ //vpx_log("Decoder: recovery frame received \n");
+
+ // Update data structures that monitors GF useage
+ vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+ cm->gf_active_count = cm->mb_rows * cm->mb_cols;
+ }
+
+ vp8_clear_system_state();
+
+ vpx_usec_timer_mark(&timer);
+ pbi->decode_microseconds = vpx_usec_timer_elapsed(&timer);
+
+ pbi->time_decoding += pbi->decode_microseconds;
+
+// vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);
+
+ cm->current_video_frame++;
+ pbi->ready_for_new_data = 0;
+ pbi->last_time_stamp = time_stamp;
+
+ {
+ int i;
+ INT64 earliest_time = pbi->dr[0].time_stamp;
+ INT64 latest_time = pbi->dr[0].time_stamp;
+ INT64 time_diff = 0;
+ int bytes = 0;
+
+ pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;;
+ pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp;
+
+ for (i = 0; i < 16; i++)
+ {
+
+ bytes += pbi->dr[i].size;
+
+ if (pbi->dr[i].time_stamp < earliest_time)
+ earliest_time = pbi->dr[i].time_stamp;
+
+ if (pbi->dr[i].time_stamp > latest_time)
+ latest_time = pbi->dr[i].time_stamp;
+ }
+
+ time_diff = latest_time - earliest_time;
+
+ if (time_diff > 0)
+ {
+ pbi->common.bitrate = 80000.00 * bytes / time_diff ;
+ pbi->common.framerate = 160000000.00 / time_diff ;
+ }
+
+ }
+ return retcode;
+}
+int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags)
+{
+ int ret = -1;
+ VP8D_COMP *pbi = (VP8D_COMP *) ptr;
+
+ if (pbi->ready_for_new_data == 1)
+ return ret;
+
+ // ie no raw frame to show!!!
+ if (pbi->common.show_frame == 0)
+ return ret;
+
+ pbi->ready_for_new_data = 1;
+ *time_stamp = pbi->last_time_stamp;
+ *time_end_stamp = 0;
+
+ sd->clrtype = pbi->common.clr_type;
+ ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
+ vp8_clear_system_state();
+ return ret;
+}
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
new file mode 100644
index 0000000..fa4fa48
--- /dev/null
+++ b/vp8/decoder/onyxd_int.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#ifndef __INC_VP8D_INT_H
+#define __INC_VP8D_INT_H
+#include "vpx_ports/config.h"
+#include "onyxd.h"
+#include "treereader.h"
+#include "onyxc_int.h"
+#include "threading.h"
+#include "dequantize.h"
+
+typedef struct
+{
+ int ithread;
+ void *ptr1;
+ void *ptr2;
+} DECODETHREAD_DATA;
+
+typedef struct
+{
+ MACROBLOCKD mbd;
+ int mb_row;
+ int current_mb_col;
+ short *coef_ptr;
+} MB_ROW_DEC;
+
+typedef struct
+{
+ INT64 time_stamp;
+ int size;
+} DATARATE;
+
+typedef struct
+{
+ INT16 min_val;
+ INT16 Length;
+ UINT8 Probs[12];
+} TOKENEXTRABITS;
+
+typedef struct
+{
+ int *scan;
+ UINT8 *ptr_onyxblock2context_leftabove;
+ vp8_tree_index *vp8_coef_tree_ptr; //onyx_coef_tree_ptr; ???
+ TOKENEXTRABITS *teb_base_ptr;
+ unsigned char *norm_ptr;
+// UINT16 *ptr_onyx_coef_bands_x;
+ UINT8 *ptr_onyx_coef_bands_x;
+
+ ENTROPY_CONTEXT **A;
+ ENTROPY_CONTEXT(*L)[4];
+
+ INT16 *qcoeff_start_ptr;
+ BOOL_DECODER *current_bc;
+
+ UINT8 *coef_probs[4];
+
+ UINT8 eob[25];
+
+} DETOK;
+
+typedef struct VP8Decompressor
+{
+ DECLARE_ALIGNED(16, MACROBLOCKD, mb);
+
+ DECLARE_ALIGNED(16, VP8_COMMON, common);
+
+ vp8_reader bc, bc2;
+
+ VP8D_CONFIG oxcf;
+
+
+ const unsigned char *Source;
+ unsigned int source_sz;
+
+
+ unsigned int CPUFreq;
+ unsigned int decode_microseconds;
+ unsigned int time_decoding;
+ unsigned int time_loop_filtering;
+
+ volatile int b_multithreaded_rd;
+ volatile int b_multithreaded_lf;
+ int max_threads;
+ int last_mb_row_decoded;
+ int current_mb_col_main;
+ int decoding_thread_count;
+ int allocated_decoding_thread_count;
+
+ // variable for threading
+ DECLARE_ALIGNED(16, MACROBLOCKD, lpfmb);
+#if CONFIG_MULTITHREAD
+ pthread_t h_thread_lpf; // thread for postprocessing
+ sem_t h_event_lpf; // Event for post_proc completed
+ sem_t h_event_start_lpf;
+#endif
+ MB_ROW_DEC *mb_row_di;
+ DECODETHREAD_DATA *de_thread_data;
+#if CONFIG_MULTITHREAD
+ pthread_t *h_decoding_thread;
+ sem_t *h_event_mbrdecoding;
+ sem_t h_event_main;
+ // end of threading data
+#endif
+ vp8_reader *mbc;
+ INT64 last_time_stamp;
+ int ready_for_new_data;
+
+ DATARATE dr[16];
+
+ DETOK detoken;
+
+#if CONFIG_RUNTIME_CPU_DETECT
+ vp8_dequant_rtcd_vtable_t dequant;
+ struct vp8_dboolhuff_rtcd_vtable dboolhuff;
+#endif
+
+} VP8D_COMP;
+
+int vp8_decode_frame(VP8D_COMP *cpi);
+void vp8_dmachine_specific_config(VP8D_COMP *pbi);
+
+
+#if CONFIG_DEBUG
+#define CHECK_MEM_ERROR(lval,expr) do {\
+ lval = (expr); \
+ if(!lval) \
+ vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,\
+ "Failed to allocate "#lval" at %s:%d", \
+ __FILE__,__LINE__);\
+ } while(0)
+#else
+#define CHECK_MEM_ERROR(lval,expr) do {\
+ lval = (expr); \
+ if(!lval) \
+ vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,\
+ "Failed to allocate "#lval);\
+ } while(0)
+#endif
+
+#endif
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
new file mode 100644
index 0000000..e35d175
--- /dev/null
+++ b/vp8/decoder/threading.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#ifndef WIN32
+# include <unistd.h>
+#endif
+#include "onyxd_int.h"
+#include "vpx_mem/vpx_mem.h"
+#include "threading.h"
+
+#include "loopfilter.h"
+#include "extend.h"
+#include "vpx_ports/vpx_timer.h"
+
+extern void vp8_decode_mb_row(VP8D_COMP *pbi,
+ VP8_COMMON *pc,
+ int mb_row,
+ MACROBLOCKD *xd);
+
+extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
+extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd);
+
+void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
+{
+
+
+
+#if CONFIG_MULTITHREAD
+ VP8_COMMON *const pc = & pbi->common;
+ int i, j;
+
+ for (i = 0; i < count; i++)
+ {
+ MACROBLOCKD *mbd = &mbrd[i].mbd;
+#if CONFIG_RUNTIME_CPU_DETECT
+ mbd->rtcd = xd->rtcd;
+#endif
+
+
+ mbd->subpixel_predict = xd->subpixel_predict;
+ mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
+ mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
+ mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
+ mbd->gf_active_ptr = xd->gf_active_ptr;
+
+ mbd->mode_info = pc->mi - 1;
+ mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
+ mbd->mode_info_stride = pc->mode_info_stride;
+
+ mbd->frame_type = pc->frame_type;
+ mbd->frames_since_golden = pc->frames_since_golden;
+ mbd->frames_till_alt_ref_frame = pc->frames_till_alt_ref_frame;
+
+ mbd->pre = pc->last_frame;
+ mbd->dst = pc->new_frame;
+
+
+
+
+ vp8_setup_block_dptrs(mbd);
+ vp8_build_block_doffsets(mbd);
+ mbd->segmentation_enabled = xd->segmentation_enabled;
+ mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
+ vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
+
+ mbd->mbmi.mode = DC_PRED;
+ mbd->mbmi.uv_mode = DC_PRED;
+
+ mbd->current_bc = &pbi->bc2;
+
+ for (j = 0; j < 25; j++)
+ {
+ mbd->block[j].dequant = xd->block[j].dequant;
+ }
+ }
+
+#else
+ (void) pbi;
+ (void) xd;
+ (void) mbrd;
+ (void) count;
+#endif
+}
+
+
+THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
+{
+#if CONFIG_MULTITHREAD
+ int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
+ VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
+ MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
+ ENTROPY_CONTEXT mb_row_left_context[4][4];
+
+ while (1)
+ {
+ if (pbi->b_multithreaded_rd == 0)
+ break;
+
+ //if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0)
+ if (sem_wait(&pbi->h_event_mbrdecoding[ithread]) == 0)
+ {
+ if (pbi->b_multithreaded_rd == 0)
+ break;
+ else
+ {
+ VP8_COMMON *pc = &pbi->common;
+ int mb_row = mbrd->mb_row;
+ MACROBLOCKD *xd = &mbrd->mbd;
+
+ //printf("ithread:%d mb_row %d\n", ithread, mb_row);
+ int i;
+ int recon_yoffset, recon_uvoffset;
+ int mb_col;
+ int recon_y_stride = pc->last_frame.y_stride;
+ int recon_uv_stride = pc->last_frame.uv_stride;
+
+ volatile int *last_row_current_mb_col;
+
+ if (ithread > 0)
+ last_row_current_mb_col = &pbi->mb_row_di[ithread-1].current_mb_col;
+ else
+ last_row_current_mb_col = &pbi->current_mb_col_main;
+
+ recon_yoffset = mb_row * recon_y_stride * 16;
+ recon_uvoffset = mb_row * recon_uv_stride * 8;
+ // reset above block coeffs
+
+ xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
+ xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
+ xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
+ xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
+ xd->left_context = mb_row_left_context;
+ vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
+ xd->up_available = (mb_row != 0);
+
+ xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+
+ for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
+ {
+
+ while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != pc->mb_cols - 1)
+ {
+ x86_pause_hint();
+ thread_sleep(0);
+ }
+
+ // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
+ vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
+
+ if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ BLOCKD *d = &xd->block[i];
+ vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+ }
+ }
+
+ // Distance of Mb to the various image edges.
+ // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+
+ xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
+ xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
+
+ xd->left_available = (mb_col != 0);
+
+ // Select the appropriate reference frame for this MB
+ if (xd->mbmi.ref_frame == LAST_FRAME)
+ {
+ xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
+ }
+ else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
+ {
+ // Golden frame reconstruction buffer
+ xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
+ }
+ else
+ {
+ // Alternate reference frame reconstruction buffer
+ xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
+ xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
+ }
+
+ vp8_build_uvmvs(xd, pc->full_pixel);
+
+ vp8dx_bool_decoder_fill(xd->current_bc);
+ vp8_decode_macroblock(pbi, xd);
+
+
+ recon_yoffset += 16;
+ recon_uvoffset += 8;
+
+ ++xd->mode_info_context; /* next mb */
+
+ xd->gf_active_ptr++; // GF useage flag for next MB
+
+ xd->above_context[Y1CONTEXT] += 4;
+ xd->above_context[UCONTEXT ] += 2;
+ xd->above_context[VCONTEXT ] += 2;
+ xd->above_context[Y2CONTEXT] ++;
+ pbi->mb_row_di[ithread].current_mb_col = mb_col;
+
+ }
+
+ // adjust to the next row of mbs
+ vp8_extend_mb_row(
+ &pc->new_frame,
+ xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
+ );
+
+ ++xd->mode_info_context; /* skip prediction column */
+
+ // since we have multithread
+ xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
+
+ //memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
+ if ((mb_row & 1) == 1)
+ {
+ pbi->last_mb_row_decoded = mb_row;
+ //printf("S%d", pbi->last_mb_row_decoded);
+ }
+
+ if (ithread == (pbi->decoding_thread_count - 1) || mb_row == pc->mb_rows - 1)
+ {
+ //SetEvent(pbi->h_event_main);
+ sem_post(&pbi->h_event_main);
+
+ }
+ }
+ }
+ }
+
+#else
+ (void) p_data;
+#endif
+
+ return 0 ;
+}
+
+THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
+{
+#if CONFIG_MULTITHREAD
+ VP8D_COMP *pbi = (VP8D_COMP *)p_data;
+
+ while (1)
+ {
+ if (pbi->b_multithreaded_lf == 0)
+ break;
+
+ //printf("before waiting for start_lpf\n");
+
+ //if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0)
+ if (sem_wait(&pbi->h_event_start_lpf) == 0)
+ {
+ if (pbi->b_multithreaded_lf == 0) // we're shutting down
+ break;
+ else
+ {
+
+ VP8_COMMON *cm = &pbi->common;
+ MACROBLOCKD *mbd = &pbi->lpfmb;
+ int default_filt_lvl = pbi->common.filter_level;
+
+ YV12_BUFFER_CONFIG *post = &cm->new_frame;
+ loop_filter_info *lfi = cm->lf_info;
+
+ int mb_row;
+ int mb_col;
+
+
+ int baseline_filter_level[MAX_MB_SEGMENTS];
+ int filter_level;
+ int alt_flt_enabled = mbd->segmentation_enabled;
+
+ int i;
+ unsigned char *y_ptr, *u_ptr, *v_ptr;
+
+ volatile int *last_mb_row_decoded = &pbi->last_mb_row_decoded;
+
+ //MODE_INFO * this_mb_mode_info = cm->mi;
+ mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list
+
+ // Note the baseline filter values for each segment
+ if (alt_flt_enabled)
+ {
+ for (i = 0; i < MAX_MB_SEGMENTS; i++)
+ {
+ if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+ baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+ else
+ {
+ baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+ baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
+ }
+ }
+ }
+ else
+ {
+ for (i = 0; i < MAX_MB_SEGMENTS; i++)
+ baseline_filter_level[i] = default_filt_lvl;
+ }
+
+ // Initialize the loop filter for this frame.
+ vp8_init_loop_filter(cm);
+
+ // Set up the buffer pointers
+ y_ptr = post->y_buffer;
+ u_ptr = post->u_buffer;
+ v_ptr = post->v_buffer;
+
+ // vp8_filter each macro block
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+ {
+
+ while (mb_row >= *last_mb_row_decoded)
+ {
+ x86_pause_hint();
+ thread_sleep(0);
+ }
+
+ //printf("R%d", mb_row);
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+ {
+ int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
+
+ filter_level = baseline_filter_level[Segment];
+
+ // Apply any context driven MB level adjustment
+ vp8_adjust_mb_lf_value(mbd, &filter_level);
+
+ if (filter_level)
+ {
+ if (mb_col > 0)
+ cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+
+ if (mbd->mode_info_context->mbmi.dc_diff > 0)
+ cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+
+ // don't apply across umv border
+ if (mb_row > 0)
+ cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+
+ if (mbd->mode_info_context->mbmi.dc_diff > 0)
+ cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+ }
+
+ y_ptr += 16;
+ u_ptr += 8;
+ v_ptr += 8;
+
+ mbd->mode_info_context++; // step to next MB
+
+ }
+
+ y_ptr += post->y_stride * 16 - post->y_width;
+ u_ptr += post->uv_stride * 8 - post->uv_width;
+ v_ptr += post->uv_stride * 8 - post->uv_width;
+
+ mbd->mode_info_context++; // Skip border mb
+ }
+
+ //printf("R%d\n", mb_row);
+ // When done, signal main thread that ME is finished
+ //SetEvent(pbi->h_event_lpf);
+ sem_post(&pbi->h_event_lpf);
+ }
+
+ }
+ }
+
+#else
+ (void) p_data;
+#endif
+ return 0;
+}
+
+void vp8_decoder_create_threads(VP8D_COMP *pbi)
+{
+#if CONFIG_MULTITHREAD
+ int core_count = 0;
+ int ithread;
+
+ pbi->b_multithreaded_rd = 0;
+ pbi->b_multithreaded_lf = 0;
+ pbi->allocated_decoding_thread_count = 0;
+ core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; //vp8_get_proc_core_count();
+ if (core_count > 1)
+ {
+ sem_init(&pbi->h_event_lpf, 0, 0);
+ sem_init(&pbi->h_event_start_lpf, 0, 0);
+ pbi->b_multithreaded_lf = 1;
+ pthread_create(&pbi->h_thread_lpf, 0, vp8_thread_loop_filter, (pbi));
+ }
+
+ if (core_count > 1)
+ {
+ pbi->b_multithreaded_rd = 1;
+ pbi->decoding_thread_count = core_count - 1;
+
+ CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
+ CHECK_MEM_ERROR(pbi->h_event_mbrdecoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
+ CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
+ vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
+ CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
+
+ for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
+ {
+ sem_init(&pbi->h_event_mbrdecoding[ithread], 0, 0);
+
+ pbi->de_thread_data[ithread].ithread = ithread;
+ pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
+ pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread];
+
+ pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
+
+ }
+
+ sem_init(&pbi->h_event_main, 0, 0);
+ pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
+ }
+
+#else
+ (void) pbi;
+#endif
+}
+
+void vp8_decoder_remove_threads(VP8D_COMP *pbi)
+{
+#if CONFIG_MULTITHREAD
+
+ if (pbi->b_multithreaded_lf)
+ {
+ pbi->b_multithreaded_lf = 0;
+ sem_post(&pbi->h_event_start_lpf);
+ pthread_join(pbi->h_thread_lpf, 0);
+ sem_destroy(&pbi->h_event_start_lpf);
+ }
+
+ //shutdown MB Decoding thread;
+ if (pbi->b_multithreaded_rd)
+ {
+ pbi->b_multithreaded_rd = 0;
+ // allow all threads to exit
+ {
+ int i;
+
+ for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ {
+
+ sem_post(&pbi->h_event_mbrdecoding[i]);
+ pthread_join(pbi->h_decoding_thread[i], NULL);
+ }
+ }
+ {
+
+ int i;
+ for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+ {
+ sem_destroy(&pbi->h_event_mbrdecoding[i]);
+ }
+
+
+ }
+
+ sem_destroy(&pbi->h_event_main);
+
+ if (pbi->h_decoding_thread)
+ {
+ vpx_free(pbi->h_decoding_thread);
+ pbi->h_decoding_thread = NULL;
+ }
+
+ if (pbi->h_event_mbrdecoding)
+ {
+ vpx_free(pbi->h_event_mbrdecoding);
+ pbi->h_event_mbrdecoding = NULL;
+ }
+
+ if (pbi->mb_row_di)
+ {
+ vpx_free(pbi->mb_row_di);
+ pbi->mb_row_di = NULL ;
+ }
+
+ if (pbi->de_thread_data)
+ {
+ vpx_free(pbi->de_thread_data);
+ pbi->de_thread_data = NULL;
+ }
+ }
+
+#else
+ (void) pbi;
+#endif
+}
+
+
+void vp8_start_lfthread(VP8D_COMP *pbi)
+{
+#if CONFIG_MULTITHREAD
+ memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
+ pbi->last_mb_row_decoded = 0;
+ sem_post(&pbi->h_event_start_lpf);
+#else
+ (void) pbi;
+#endif
+}
+
+void vp8_stop_lfthread(VP8D_COMP *pbi)
+{
+#if CONFIG_MULTITHREAD
+ struct vpx_usec_timer timer;
+
+ vpx_usec_timer_start(&timer);
+
+ sem_wait(&pbi->h_event_lpf);
+
+ vpx_usec_timer_mark(&timer);
+ pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer);
+#else
+ (void) pbi;
+#endif
+}
+
+
+void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
+ MACROBLOCKD *xd)
+{
+#if CONFIG_MULTITHREAD
+ int mb_row;
+ VP8_COMMON *pc = &pbi->common;
+
+ int ibc = 0;
+ int num_part = 1 << pbi->common.multi_token_partition;
+
+ vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
+
+ for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
+ {
+ int i;
+ pbi->current_mb_col_main = -1;
+
+ xd->current_bc = &pbi->mbc[ibc];
+ ibc++ ;
+
+ if (ibc == num_part)
+ ibc = 0;
+
+ for (i = 0; i < pbi->decoding_thread_count; i++)
+ {
+ if ((mb_row + i + 1) >= pc->mb_rows)
+ break;
+
+ pbi->mb_row_di[i].mb_row = mb_row + i + 1;
+ pbi->mb_row_di[i].mbd.current_bc = &pbi->mbc[ibc];
+ ibc++;
+
+ if (ibc == num_part)
+ ibc = 0;
+
+ pbi->mb_row_di[i].current_mb_col = -1;
+ sem_post(&pbi->h_event_mbrdecoding[i]);
+ }
+
+ vp8_decode_mb_row(pbi, pc, mb_row, xd);
+
+ xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
+
+ if (mb_row < pc->mb_rows - 1)
+ {
+ sem_wait(&pbi->h_event_main);
+ }
+ }
+
+ pbi->last_mb_row_decoded = mb_row;
+#else
+ (void) pbi;
+ (void) xd;
+#endif
+}
diff --git a/vp8/decoder/treereader.h b/vp8/decoder/treereader.h
new file mode 100644
index 0000000..eb10e24
--- /dev/null
+++ b/vp8/decoder/treereader.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#ifndef tree_reader_h
+#define tree_reader_h 1
+
+#include "treecoder.h"
+
+#include "dboolhuff.h"
+
+typedef BOOL_DECODER vp8_reader;
+
+#define vp8_read vp8dx_decode_bool
+#define vp8_read_literal vp8_decode_value
+#define vp8_read_bit( R) vp8_read( R, vp8_prob_half)
+
+
+/* Intent of tree data structure is to make decoding trivial. */
+
+static int vp8_treed_read(
+ vp8_reader *const r, /* !!! must return a 0 or 1 !!! */
+ vp8_tree t,
+ const vp8_prob *const p
+)
+{
+ register vp8_tree_index i = 0;
+
+ while ((i = t[ i + vp8_read(r, p[i>>1])]) > 0) ;
+
+ return -i;
+}
+
+
+/* Variant reads a binary number given distributions on each bit.
+ Note that tree is arbitrary; probability of decoding a zero
+ may or may not depend on previously decoded bits. */
+
+static int vp8_treed_read_num(
+ vp8_reader *const r, /* !!! must return a 0 or 1 !!! */
+ vp8_tree t,
+ const vp8_prob *const p
+)
+{
+ vp8_tree_index i = 0;
+ int v = 0, b;
+
+ do
+ {
+ b = vp8_read(r, p[i>>1]);
+ v = (v << 1) + b;
+ }
+ while ((i = t[i+b]) > 0);
+
+ return v;
+}
+#endif /* tree_reader_h */
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
new file mode 100644
index 0000000..02be487
--- /dev/null
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -0,0 +1,410 @@
+;
+; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+
+;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
+global sym(vp8_dequantize_b_impl_mmx)
+sym(vp8_dequantize_b_impl_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 3
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;sq
+ mov rdi, arg(1) ;dq
+ mov rax, arg(2) ;q
+
+ movq mm1, [rsi]
+ pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers.
+ movq [rdi], mm1
+
+ movq mm1, [rsi+8]
+ pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers.
+ movq [rdi+8], mm1
+
+ movq mm1, [rsi+16]
+ pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers.
+ movq [rdi+16], mm1
+
+ movq mm1, [rsi+24]
+ pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers.
+ movq [rdi+24], mm1
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void dequant_idct_mmx(short *input, short *dq, short *output, int pitch)
+global sym(vp8_dequant_idct_mmx)
+sym(vp8_dequant_idct_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 4
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rax, arg(0) ;input
+ mov rdx, arg(1) ;dq
+
+
+ movq mm0, [rax ]
+ pmullw mm0, [rdx]
+
+ movq mm1, [rax +8]
+ pmullw mm1, [rdx +8]
+
+ movq mm2, [rax+16]
+ pmullw mm2, [rdx+16]
+
+ movq mm3, [rax+24]
+ pmullw mm3, [rdx+24]
+
+ mov rdx, arg(2) ;output
+ pxor mm7, mm7
+
+
+ movq [rax], mm7
+ movq [rax+8], mm7
+
+ movq [rax+16],mm7
+ movq [rax+24],mm7
+
+
+ movsxd rax, dword ptr arg(3) ;pitch
+
+ psubw mm0, mm2 ; b1= 0-2
+ paddw mm2, mm2 ;
+
+ movq mm5, mm1
+ paddw mm2, mm0 ; a1 =0+2
+
+ pmulhw mm5, [x_s1sqr2 GLOBAL];
+ paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movq mm7, mm3 ;
+ pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+
+ paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw mm7, mm5 ; c1
+
+ movq mm5, mm1
+ movq mm4, mm3
+
+ pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ paddw mm5, mm1
+
+ pmulhw mm3, [x_s1sqr2 GLOBAL]
+ paddw mm3, mm4
+
+ paddw mm3, mm5 ; d1
+ movq mm6, mm2 ; a1
+
+ movq mm4, mm0 ; b1
+ paddw mm2, mm3 ;0
+
+ paddw mm4, mm7 ;1
+ psubw mm0, mm7 ;2
+
+ psubw mm6, mm3 ;3
+
+ movq mm1, mm2 ; 03 02 01 00
+ movq mm3, mm4 ; 23 22 21 20
+
+ punpcklwd mm1, mm0 ; 11 01 10 00
+ punpckhwd mm2, mm0 ; 13 03 12 02
+
+ punpcklwd mm3, mm6 ; 31 21 30 20
+ punpckhwd mm4, mm6 ; 33 23 32 22
+
+ movq mm0, mm1 ; 11 01 10 00
+ movq mm5, mm2 ; 13 03 12 02
+
+ punpckldq mm0, mm3 ; 30 20 10 00
+ punpckhdq mm1, mm3 ; 31 21 11 01
+
+ punpckldq mm2, mm4 ; 32 22 12 02
+ punpckhdq mm5, mm4 ; 33 23 13 03
+
+ movq mm3, mm5 ; 33 23 13 03
+
+ psubw mm0, mm2 ; b1= 0-2
+ paddw mm2, mm2 ;
+
+ movq mm5, mm1
+ paddw mm2, mm0 ; a1 =0+2
+
+ pmulhw mm5, [x_s1sqr2 GLOBAL];
+ paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movq mm7, mm3 ;
+ pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+
+ paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw mm7, mm5 ; c1
+
+ movq mm5, mm1
+ movq mm4, mm3
+
+ pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ paddw mm5, mm1
+
+ pmulhw mm3, [x_s1sqr2 GLOBAL]
+ paddw mm3, mm4
+
+ paddw mm3, mm5 ; d1
+ paddw mm0, [fours GLOBAL]
+
+ paddw mm2, [fours GLOBAL]
+ movq mm6, mm2 ; a1
+
+ movq mm4, mm0 ; b1
+ paddw mm2, mm3 ;0
+
+ paddw mm4, mm7 ;1
+ psubw mm0, mm7 ;2
+
+ psubw mm6, mm3 ;3
+ psraw mm2, 3
+
+ psraw mm0, 3
+ psraw mm4, 3
+
+ psraw mm6, 3
+
+ movq mm1, mm2 ; 03 02 01 00
+ movq mm3, mm4 ; 23 22 21 20
+
+ punpcklwd mm1, mm0 ; 11 01 10 00
+ punpckhwd mm2, mm0 ; 13 03 12 02
+
+ punpcklwd mm3, mm6 ; 31 21 30 20
+ punpckhwd mm4, mm6 ; 33 23 32 22
+
+ movq mm0, mm1 ; 11 01 10 00
+ movq mm5, mm2 ; 13 03 12 02
+
+ punpckldq mm0, mm3 ; 30 20 10 00
+ punpckhdq mm1, mm3 ; 31 21 11 01
+
+ punpckldq mm2, mm4 ; 32 22 12 02
+ punpckhdq mm5, mm4 ; 33 23 13 03
+
+ movq [rdx], mm0
+
+ movq [rdx+rax], mm1
+ movq [rdx+rax*2], mm2
+
+ add rdx, rax
+ movq [rdx+rax*2], mm5
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void dequant_dc_idct_mmx(short *input, short *dq, short *output, int pitch, int Dc)
+global sym(vp8_dequant_dc_idct_mmx)
+sym(vp8_dequant_dc_idct_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 5
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rax, arg(0) ;input
+ mov rdx, arg(1) ;dq
+
+ movsxd rcx, dword ptr arg(4) ;Dc
+
+ movq mm0, [rax ]
+ pmullw mm0, [rdx]
+
+ movq mm1, [rax +8]
+ pmullw mm1, [rdx +8]
+
+ movq mm2, [rax+16]
+ pmullw mm2, [rdx+16]
+
+ movq mm3, [rax+24]
+ pmullw mm3, [rdx+24]
+
+ mov rdx, arg(2) ;output
+ pxor mm7, mm7
+
+
+ movq [rax], mm7
+ movq [rax+8], mm7
+
+ movq [rax+16],mm7
+ movq [rax+24],mm7
+
+ pinsrw mm0, rcx, 0
+ movsxd rax, dword ptr arg(3) ;pitch
+
+ psubw mm0, mm2 ; b1= 0-2
+ paddw mm2, mm2 ;
+
+ movq mm5, mm1
+ paddw mm2, mm0 ; a1 =0+2
+
+ pmulhw mm5, [x_s1sqr2 GLOBAL];
+ paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movq mm7, mm3 ;
+ pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+
+ paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw mm7, mm5 ; c1
+
+ movq mm5, mm1
+ movq mm4, mm3
+
+ pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ paddw mm5, mm1
+
+ pmulhw mm3, [x_s1sqr2 GLOBAL]
+ paddw mm3, mm4
+
+ paddw mm3, mm5 ; d1
+ movq mm6, mm2 ; a1
+
+ movq mm4, mm0 ; b1
+ paddw mm2, mm3 ;0
+
+ paddw mm4, mm7 ;1
+ psubw mm0, mm7 ;2
+
+ psubw mm6, mm3 ;3
+
+ movq mm1, mm2 ; 03 02 01 00
+ movq mm3, mm4 ; 23 22 21 20
+
+ punpcklwd mm1, mm0 ; 11 01 10 00
+ punpckhwd mm2, mm0 ; 13 03 12 02
+
+ punpcklwd mm3, mm6 ; 31 21 30 20
+ punpckhwd mm4, mm6 ; 33 23 32 22
+
+ movq mm0, mm1 ; 11 01 10 00
+ movq mm5, mm2 ; 13 03 12 02
+
+ punpckldq mm0, mm3 ; 30 20 10 00
+ punpckhdq mm1, mm3 ; 31 21 11 01
+
+ punpckldq mm2, mm4 ; 32 22 12 02
+ punpckhdq mm5, mm4 ; 33 23 13 03
+
+ movq mm3, mm5 ; 33 23 13 03
+
+ psubw mm0, mm2 ; b1= 0-2
+ paddw mm2, mm2 ;
+
+ movq mm5, mm1
+ paddw mm2, mm0 ; a1 =0+2
+
+ pmulhw mm5, [x_s1sqr2 GLOBAL];
+ paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
+
+ movq mm7, mm3 ;
+ pmulhw mm7, [x_c1sqr2less1 GLOBAL];
+
+ paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
+ psubw mm7, mm5 ; c1
+
+ movq mm5, mm1
+ movq mm4, mm3
+
+ pmulhw mm5, [x_c1sqr2less1 GLOBAL]
+ paddw mm5, mm1
+
+ pmulhw mm3, [x_s1sqr2 GLOBAL]
+ paddw mm3, mm4
+
+ paddw mm3, mm5 ; d1
+ paddw mm0, [fours GLOBAL]
+
+ paddw mm2, [fours GLOBAL]
+ movq mm6, mm2 ; a1
+
+ movq mm4, mm0 ; b1
+ paddw mm2, mm3 ;0
+
+ paddw mm4, mm7 ;1
+ psubw mm0, mm7 ;2
+
+ psubw mm6, mm3 ;3
+ psraw mm2, 3
+
+ psraw mm0, 3
+ psraw mm4, 3
+
+ psraw mm6, 3
+
+ movq mm1, mm2 ; 03 02 01 00
+ movq mm3, mm4 ; 23 22 21 20
+
+ punpcklwd mm1, mm0 ; 11 01 10 00
+ punpckhwd mm2, mm0 ; 13 03 12 02
+
+ punpcklwd mm3, mm6 ; 31 21 30 20
+ punpckhwd mm4, mm6 ; 33 23 32 22
+
+ movq mm0, mm1 ; 11 01 10 00
+ movq mm5, mm2 ; 13 03 12 02
+
+ punpckldq mm0, mm3 ; 30 20 10 00
+ punpckhdq mm1, mm3 ; 31 21 11 01
+
+ punpckldq mm2, mm4 ; 32 22 12 02
+ punpckhdq mm5, mm4 ; 33 23 13 03
+
+ movq [rdx], mm0
+
+ movq [rdx+rax], mm1
+ movq [rdx+rax*2], mm2
+
+ add rdx, rax
+ movq [rdx+rax*2], mm5
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+SECTION_RODATA
+align 16
+x_s1sqr2:
+ times 4 dw 0x8A8C
+align 16
+x_c1sqr2less1:
+ times 4 dw 0x4E7B
+align 16
+fours:
+ times 4 dw 0x0004
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
new file mode 100644
index 0000000..5def406
--- /dev/null
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#ifndef DEQUANTIZE_X86_H
+#define DEQUANTIZE_X86_H
+
+
+/* Note:
+ *
+ * This platform is commonly built for runtime CPU detection. If you modify
+ * any of the function mappings present in this file, be sure to also update
+ * them in the function pointer initialization code
+ */
+#if HAVE_MMX
+extern prototype_dequant_block(vp8_dequantize_b_mmx);
+extern prototype_dequant_idct(vp8_dequant_idct_mmx);
+extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_mmx);
+
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp8_dequant_block
+#define vp8_dequant_block vp8_dequantize_b_mmx
+
+#undef vp8_dequant_idct
+#define vp8_dequant_idct vp8_dequant_idct_mmx
+
+#undef vp8_dequant_idct_dc
+#define vp8_dequant_idct_dc vp8_dequant_dc_idct_mmx
+
+#endif
+#endif
+
+#endif
diff --git a/vp8/decoder/x86/onyxdxv.c b/vp8/decoder/x86/onyxdxv.c
new file mode 100644
index 0000000..75a676a
--- /dev/null
+++ b/vp8/decoder/x86/onyxdxv.c
@@ -0,0 +1,1079 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+/****************************************************************************
+*
+* Module Title : onyxdxv.c
+*
+* Description : VP80 interface to DXV.
+*
+*****************************************************************************
+*/
+/****************************************************************************
+* Header Files
+****************************************************************************/
+#include <math.h> // For Abs()
+#include "pragmas.h"
+
+#include "vpxdxv.h"
+#include "vpxdxv_plugin.h"
+
+#include "onyxd_int.h"
+#include "onyx.h"
+#include "codec_common_interface.h"
+#include "vpx_scale/vpxscale.h"
+#include "vpx_mem/vpx_mem.h"
+#include "postproc.h"
+#include "vpxblit.h"
+#include "g_common.h"
+#include "vpx_scale/yv12extend.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include "scale_mode.h"
+#include "onyx_pb_interface.h"
+
+/****************************************************************************
+* Macros
+****************************************************************************/
+
+#define VP8_FOURCC DXL_MKFOURCC( 'V', 'P', '8', '0')
+
+extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch);
+
+
+/****************************************************************************
+* Typedefs
+****************************************************************************/
+
+typedef struct // YUV buffer configuration structure
+{
+ int y_width;
+ int y_height;
+ int y_stride;
+
+ int uv_width;
+ int uv_height;
+ int uv_stride;
+
+ char *y_buffer;
+ char *u_buffer;
+ char *v_buffer;
+
+ char *uv_start;
+ int uv_dst_area;
+ int uv_used_area;
+
+ unsigned char *y_ptr_scrn;
+ unsigned char *u_ptr_scrn;
+ unsigned char *v_ptr_scrn;
+
+
+} DXV_YUV_BUFFER_CONFIG;
+
+
+typedef void ((*vp8blit_func)(unsigned char *, int, YUV_BUFFER_CONFIG *));
+
+/* define an x_image structure based on the core x_image struct */
+typedef struct t_ximage_codec
+{
+ DXV_YUV_BUFFER_CONFIG frame_buffer;
+ VP8D_COMP *my_pbi;
+ VP8_COMMON *common;
+ int owned;
+ int decompressed_once;
+
+ int sizeof_pixel;
+ vp8blit_func blitter;
+
+ unsigned int ppl_tag;
+ unsigned int bd_tag;
+ unsigned int *supported_output_format_list;
+
+ int cpu_free;
+ int postproc;
+ int add_noise;
+ int deinterlace;
+
+ int post_proc2time;
+ int post_proc4time;
+
+ int hs;
+ int hr;
+ int vs;
+ int vr;
+ YV12_BUFFER_CONFIG this_buffer;
+ YV12_BUFFER_CONFIG scaled_buffer;
+ YV12_BUFFER_CONFIG *passed_in_buffer;
+
+ int avgq;
+ int ppcount;
+
+
+} VP8_XIMAGE, *VP8_XIMAGE_HANDLE;
+
+
+/****************************************************************************
+* Modul Statics
+****************************************************************************/
+static unsigned int g_vp8_preferred_output_format_list[] =
+{
+ VPXDXV_YUY2,
+ VPXDXV_UYVY,
+ VPXDXV_RGB8888,
+ VPXDXV_RGB888,
+ VPXDXV_RGB555,
+ VPXDXV_RGB565,
+ VPXDXV_YV12,
+ VPXDXV_I420,
+
+// VPXDXV_YV12,
+// VPXDXV_YUY2,
+// VPXDXV_RGB565,
+// VPXDXV_UYVY,
+ 0
+};
+
+/****************************************************************************
+* Forward declarationss
+****************************************************************************/
+void onyx_set_parameter(XIMAGE_HANDLE src, int Command, unsigned int Parameter);
+
+static int onyx_get_output_format(XIMAGE_HANDLE src, unsigned int *bd_tag);
+static int onyx_set_output_format(XIMAGE_HANDLE src, unsigned int bd_tag);
+
+static int vpx_get_size_of_pixel(unsigned int bd);
+
+/****************************************************************************
+* Imports
+****************************************************************************/
+
+#define __Clamp255(x) (unsigned char) ( (x) < 0 ? 0 : ( (x) <= 255 ? (x) : 255 ) )
+
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+void
+convert_yv12_buffer_types(YV12_BUFFER_CONFIG *source, DXV_YUV_BUFFER_CONFIG *dest)
+{
+ dest->y_buffer = (char *)source->y_buffer;
+ dest->u_buffer = (char *)source->u_buffer;
+ dest->v_buffer = (char *)source->v_buffer;
+ dest->y_width = source->y_width;
+ dest->y_height = source->y_height;
+ dest->y_stride = source->y_stride;
+ dest->uv_width = source->uv_width;
+ dest->uv_height = source->uv_height;
+ dest->uv_stride = source->uv_stride;
+}
+
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+
+int onyx_blit
+(
+ XIMAGE_HANDLE src,
+ VSCREEN_HANDLE v_screen,
+ DXV_YUV_BUFFER_CONFIG *frame_buffer,
+ int x,
+ int y
+)
+{
+ VP8_XIMAGE_HANDLE tab = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+ VP8D_COMP *pbi;
+ VP8_COMMON *common = tab->common;
+ pbi = tab->my_pbi;
+
+ if (v_screen) /* if there is a v_screen, blit to it */
+ {
+ unsigned char *ptr_scrn;
+ int this_pitch, vs_height, vs_width;
+ unsigned int start_tick, stop_tick;
+
+ vpxdxv_get_vscreen_attributes(v_screen, (void **)&ptr_scrn, &vs_width, &vs_height, &this_pitch);
+
+ if (ptr_scrn)
+ {
+ int w, h;
+
+ int p_size;
+ int view_x, view_y, view_w;
+ int hs, hr, vs, vr;
+ int neww, newh;
+ int cw, ch;
+ int microseconds_available = (int)(1000000 / 30);
+
+ microseconds_available = microseconds_available * tab->cpu_free / 100;
+
+ if (pbi)
+ {
+ microseconds_available -= pbi->decode_microseconds;
+
+ if (tab->cpu_free == 0)
+ microseconds_available = INT_MAX;
+
+ if (tab->post_proc2time == 0)
+ tab->post_proc2time = pbi->decode_microseconds * 1 / 2;
+
+ if (tab->post_proc4time == 0)
+ tab->post_proc4time = pbi->decode_microseconds;
+ }
+
+
+ if (tab->ppcount == 0)
+ {
+ tab->post_proc2time = 0;
+ tab->post_proc4time = 0;
+ tab->ppcount = 64;
+ }
+ else
+ {
+ tab->ppcount --;
+ }
+
+ vpxdxv_get_vscreen_view(v_screen, &view_x, &view_y, &view_w, NULL);
+
+ Scale2Ratio(common->horiz_scale, &hr, &hs);
+ Scale2Ratio(common->vert_scale, &vr, &vs);
+
+ if (tab->postproc && tab->passed_in_buffer == 0)
+ {
+ int show_text = 0;
+
+ unsigned char message[512];
+
+ int pp = tab->postproc;
+ int q = (tab->avgq + 4) / 8;
+ int noise = 0;
+
+ vp8_clear_system_state();
+
+ if (pp >= 1000)
+ {
+ pp -= 1000;
+ noise = pp / 100;
+ pp = pp - noise * 100;
+ }
+
+ if (pp >= 300)
+ {
+ pp -= 300;
+ show_text = 3;
+ }
+ else if (pp >= 200)
+ {
+ pp -= 200;
+ show_text = 2;
+ }
+ else if (pp >= 100)
+ {
+ pp -= 100;
+ show_text = 1;
+ }
+
+ if (pbi && (pbi->mb.segmentation_enabled & SEGMENT_PF) && tab->deinterlace)
+ {
+ de_interlace(common->frame_to_show->y_buffer, common->post_proc_buffer.y_buffer,
+ common->post_proc_buffer.y_width, common->post_proc_buffer.y_height,
+ common->post_proc_buffer.y_stride);
+
+ de_interlace(common->frame_to_show->u_buffer, common->post_proc_buffer.u_buffer,
+ common->post_proc_buffer.uv_width, common->post_proc_buffer.uv_height,
+ common->post_proc_buffer.uv_stride);
+ de_interlace(common->frame_to_show->v_buffer, common->post_proc_buffer.v_buffer,
+ common->post_proc_buffer.uv_width, common->post_proc_buffer.uv_height,
+ common->post_proc_buffer.uv_stride);
+ }
+ else
+ {
+ if (pp >= 10 && pp <= 20)
+ {
+ q = q + (pp - 15) * 10;
+
+ if (q < 0)
+ q = 0;
+ }
+
+ start_tick = vp8_get_high_res_timer_tick();
+
+ if (pp > 3 && tab->post_proc4time < microseconds_available)
+ {
+ vp8_deblock_and_de_macro_block(common->frame_to_show, &common->post_proc_buffer, q, 1, 0);
+
+ stop_tick = vp8_get_high_res_timer_tick();
+
+ if (pbi)
+ tab->post_proc4time = vp8_get_time_in_micro_sec(start_tick, stop_tick);
+ }
+
+ else if (pp > 0 && tab->post_proc2time < microseconds_available)
+ {
+ vp8_deblock(common->frame_to_show, &common->post_proc_buffer, q , 1, 0);
+ stop_tick = vp8_get_high_res_timer_tick();
+
+ if (pbi)
+ tab->post_proc2time = vp8_get_time_in_micro_sec(start_tick, stop_tick);
+ }
+ else
+ {
+ vp8_yv12_copy_frame(common->frame_to_show, &common->post_proc_buffer);
+ }
+
+ }
+
+ vp8_clear_system_state();
+
+ if (tab->add_noise == 1)
+ {
+
+ vp8_plane_add_noise(common->post_proc_buffer.y_buffer,
+ common->post_proc_buffer.y_width, common->post_proc_buffer.y_height,
+ common->post_proc_buffer.y_stride, 63 - q, noise);
+ }
+
+
+ if (show_text == 1)
+ {
+#ifdef PACKET_TESTING
+ {
+ VP8_HEADER *oh2 = (VP8_HEADER *) pbi->Source;
+ sprintf(message, "%8d %d%d%d%d%d size:%d\n",
+ oh2->frame_number ,
+ oh2->update_gold ,
+ oh2->update_last ,
+ oh2->uses_gold ,
+ oh2->uses_last ,
+ oh2->type,
+ vpxdxv_get_ximage_csize(src));
+ }
+#else
+ sprintf(message, "F:%1ldG:%1ldQ:%3ldF:%3ld,%3ldP:%d_s:%6ld,N:%d,",
+ (common->frame_type == KEY_FRAME),
+ common->refresh_golden_frame,
+ common->base_qindex,
+ common->filter_level,
+ q,
+ tab->postproc,
+ vpxdxv_get_ximage_csize(src), noise);
+#endif
+
+ vp8_blit_text(message, common->post_proc_buffer.y_buffer, common->post_proc_buffer.y_stride);
+
+ }
+ else if (show_text == 2)
+ {
+ int i, j;
+ unsigned char *y_ptr;
+ YV12_BUFFER_CONFIG *post = &common->post_proc_buffer;
+ int mb_rows = post->y_height >> 4;
+ int mb_cols = post->y_width >> 4;
+ int mb_index = 0;
+ MODE_INFO *mi = common->mi;
+
+ y_ptr = post->y_buffer + 4 * post->y_stride + 4;
+
+ // vp8_filter each macro block
+ for (i = 0; i < mb_rows; i++)
+ {
+ for (j = 0; j < mb_cols; j++)
+ {
+ char zz[4];
+
+ if (pp == 4)
+ sprintf(zz, "%c", mi[mb_index].mbmi.mode + 'a');
+ else
+ sprintf(zz, "%c", mi[mb_index].mbmi.ref_frame + 'a');
+
+ vp8_blit_text(zz, y_ptr, post->y_stride);
+ mb_index ++;
+ y_ptr += 16;
+ }
+
+ mb_index ++; //border
+ y_ptr += post->y_stride * 16 - post->y_width;
+
+ }
+ }
+ else if (show_text == 3)
+ {
+ int i, j;
+ unsigned char *y_ptr;
+ YV12_BUFFER_CONFIG *post = &common->post_proc_buffer;
+ int mb_rows = post->y_height >> 4;
+ int mb_cols = post->y_width >> 4;
+ int mb_index = 0;
+ MODE_INFO *mi = common->mi;
+
+ y_ptr = post->y_buffer + 4 * post->y_stride + 4;
+
+ // vp8_filter each macro block
+ for (i = 0; i < mb_rows; i++)
+ {
+ for (j = 0; j < mb_cols; j++)
+ {
+ char zz[4];
+
+ if (j == 0)
+ sprintf(zz, "%c", '0' + i % 10);
+ else
+ sprintf(zz, "%c", '0' + j % 10);
+
+ vp8_blit_text(zz, y_ptr, post->y_stride);
+ mb_index ++;
+ y_ptr += 16;
+ }
+
+ y_ptr += post->y_stride * 16 - post->y_width;
+
+ }
+ }
+
+ vpx_memcpy(&tab->this_buffer, &common->post_proc_buffer, sizeof(YV12_BUFFER_CONFIG));
+ }
+ else
+ {
+ vpx_memcpy(&tab->this_buffer, common->frame_to_show, sizeof(YV12_BUFFER_CONFIG));
+ }
+
+
+ /* get a frame pointer to the scaled and postprocessed reconstructed buffer */
+ if (tab->passed_in_buffer == 0)
+ {
+ if (common->horiz_scale != NORMAL || common->vert_scale != NORMAL)
+ {
+ neww = hs * tab->this_buffer.y_width / hr;
+ newh = vs * tab->this_buffer.y_height / vr;
+
+ neww += neww & 1;
+
+ if (tab->hs != hs || tab->hr != hr || tab->vs != vs || tab->vr != vr)
+ {
+ vp8_yv12_alloc_frame_buffer(&tab->scaled_buffer, neww, newh , 8);
+ }
+
+ vp8_yv12_scale_or_center(&tab->this_buffer,
+ &tab->scaled_buffer,
+ neww, newh, SCALE_TO_FIT, hs, hr, vs, vr);
+
+ convert_yv12_buffer_types(&tab->scaled_buffer, frame_buffer);
+
+ cw = hs * common->Width / hr;
+ ch = vs * common->Height / vr;
+
+ }
+ else
+ {
+ convert_yv12_buffer_types(&tab->this_buffer, frame_buffer);
+
+ cw = common->Width;
+ ch = common->Height;
+ }
+ }
+ else
+ {
+ convert_yv12_buffer_types(tab->passed_in_buffer, frame_buffer);
+ cw = common->Width;
+ ch = common->Height;
+ tab->passed_in_buffer = 0;
+ }
+
+ frame_buffer->y_width = cw;
+ frame_buffer->y_height = ch;
+ frame_buffer->uv_width = cw / 2;
+ frame_buffer->uv_height = ch / 2;
+
+ p_size = vpx_get_size_of_pixel(tab->bd_tag);
+
+ /* remember to offset if requested */
+ y += view_y;
+ x += view_x ;
+
+ /* for planar destinations */
+ w = view_w;
+ h = vs_height;
+
+ if (w < frame_buffer->y_width)
+ {
+ frame_buffer->y_width = w;
+ frame_buffer->uv_width = (w + 1) / 2;
+ }
+
+ if (h < frame_buffer->y_height)
+ {
+ frame_buffer->y_height = h;
+ frame_buffer->uv_height = (h + 1) / 2;
+ }
+
+ if (frame_buffer->y_width < view_w)
+ x += (view_w - frame_buffer->y_width) / 2;
+
+ if (x & 1)
+ x -= 1;
+
+ if (frame_buffer->y_height < vs_height)
+ y += (vs_height - frame_buffer->y_height) / 2;
+
+
+ ptr_scrn += (x * p_size) + (y * this_pitch);
+
+ frame_buffer->y_stride *= -1;
+ frame_buffer->uv_stride *= -1;
+
+ if (tab->bd_tag == VPXDXV_YV12 || tab->bd_tag == VPXDXV_I420)
+ {
+ if (this_pitch < 0)
+ {
+ frame_buffer->uv_start = (char *)(ptr_scrn + abs(this_pitch) + abs(this_pitch) * h / 4 + this_pitch / 2);
+ frame_buffer->uv_dst_area = abs((this_pitch * h) / 4);
+ frame_buffer->uv_used_area = 0;
+ }
+ else
+ {
+ frame_buffer->uv_start = (char *)(ptr_scrn + (this_pitch * h));
+ frame_buffer->uv_dst_area = (((this_pitch + 1) / 2) * ((h + 1) / 2));
+ frame_buffer->uv_used_area = (((this_pitch + 1) / 2) * frame_buffer->uv_height);
+ }
+ }
+
+ if ((pbi->mb.segmentation_enabled & SEGMENT_PF) && (tab->bd_tag != VPXDXV_YV12 && tab->bd_tag != VPXDXV_I420))
+ {
+ int ypitch = frame_buffer->y_stride;
+ int uvpitch = frame_buffer->uv_stride;
+
+ frame_buffer->y_stride <<= 1;
+ frame_buffer->y_height >>= 1;
+ frame_buffer->uv_stride <<= 1;
+ frame_buffer->uv_height >>= 1;
+
+ ptr_scrn += this_pitch;
+ frame_buffer->y_buffer -= ypitch;
+ frame_buffer->u_buffer -= uvpitch;
+ frame_buffer->v_buffer -= uvpitch;
+ tab->blitter(ptr_scrn, 2 * this_pitch, (YUV_BUFFER_CONFIG *)(&tab->frame_buffer));
+
+ ptr_scrn -= this_pitch;
+ frame_buffer->y_buffer += ypitch;
+ frame_buffer->u_buffer += uvpitch;
+ frame_buffer->v_buffer += uvpitch;
+ tab->blitter(ptr_scrn, 2 * this_pitch, (YUV_BUFFER_CONFIG *)(&tab->frame_buffer));
+
+ }
+ else
+ {
+ /* blit the screen */
+ tab->blitter(ptr_scrn, this_pitch, (YUV_BUFFER_CONFIG *)(&tab->frame_buffer));
+ vpx_log("Decoder: Frame shown \n");
+ }
+
+ }
+ else
+ vpx_log("Decoder: Frame not shown scrn pointer 0\n");
+ }
+ else
+ vpx_log("Decoder: Frame not shown vscreen 0\n");
+
+ return DXV_OK;
+}
+/****************************************************************************
+ *
+ * ROUTINE : onyx_decompress
+ *
+ * INPUTS : None
+ *
+ * OUTPUTS : None
+ *
+ * RETURNS : None.
+ *
+ * FUNCTION :
+ *
+ * SPECIAL NOTES :
+ *
+ ****************************************************************************/
+static
+int onyx_decompress(XIMAGE_HANDLE src, VSCREEN_HANDLE v_screen)
+{
+ VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+ unsigned char *c_addr;
+ unsigned int c_size;
+ int w, h, x, y;
+ int vp8_rv;
+
+ c_addr = vpxdxv_get_ximage_cdata_addr(src);
+ c_size = vpxdxv_get_ximage_csize(src);
+ vpxdxv_get_ximage_xywh(src, &x, &y, &w, &h);
+
+ // if we have a compressed frame decompress it ( otherwise we'll just redo
+ // the scaling and postprocessing from the last frame )
+ if (c_addr)
+ {
+ if (c_size != 0)
+ {
+ int flags;
+ int ret_val;
+
+ int f;
+
+ // decode the frame
+ ret_val = vp8d_decompress_frame((VP8D_PTR) this_algorithm_base->my_pbi,
+ c_size,
+ (char *) c_addr,
+ &this_algorithm_base->this_buffer,
+ &flags);
+
+
+ f = this_algorithm_base->my_pbi->common.filter_level * 10 / 6;
+
+ if (this_algorithm_base->my_pbi->common.frame_type == KEY_FRAME)
+ this_algorithm_base->avgq = 8 * f;
+ else
+ this_algorithm_base->avgq = this_algorithm_base->avgq * 7 / 8 + f;
+
+
+
+ if (ret_val != 0)
+ {
+ if (ret_val == -1)
+ return DXV_VERSION_CONFLICT;
+ else
+ return DXV_BAD_DATA;
+ }
+
+ }
+ }
+
+
+ vp8_rv = onyx_blit(src, v_screen, &this_algorithm_base->frame_buffer, x, y);
+
+
+ return vp8_rv;
+}
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+static
+int vp8_ximagedestroy(XIMAGE_HANDLE src)
+{
+ VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+
+ if (this_algorithm_base)
+ {
+
+ vp8_yv12_de_alloc_frame_buffer(&this_algorithm_base->scaled_buffer);
+
+ /* safety check in case stopdecode was not called */
+ if (this_algorithm_base->owned)
+ vp8dx_remove_decompressor((VP8D_PTR)(this_algorithm_base->my_pbi));
+
+ duck_free(this_algorithm_base);
+ }
+
+ return DXV_OK;
+}
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+static int
+onyx_get_post_proc(XIMAGE_HANDLE src, unsigned int *ppl)
+{
+ VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+
+ if (this_algorithm_base)
+ {
+ *ppl = this_algorithm_base->ppl_tag;
+
+ return DXV_OK;
+ }
+
+ return DXV_NULL_BASE;
+}
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+static int
+onyx_set_post_proc(XIMAGE_HANDLE src, unsigned int ppl)
+{
+ VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+
+ if (this_algorithm_base)
+ {
+ this_algorithm_base->ppl_tag = ppl;
+
+ return DXV_OK;
+ }
+
+ return DXV_NULL_BASE;
+}
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+static
+int vp8_ximagestop_decode(XIMAGE_HANDLE src)
+{
+ VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+
+ if (this_algorithm_base)
+ {
+
+ vp8_yv12_de_alloc_frame_buffer(&this_algorithm_base->scaled_buffer);
+
+ if (this_algorithm_base->owned)
+ vp8dx_remove_decompressor((VP8D_PTR)(this_algorithm_base->my_pbi));
+
+ this_algorithm_base->owned = 0;
+ }
+
+ return DXV_OK;
+}
+
+
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+static
+int vp8_ximagestart_decode
+(
+ XIMAGE_HANDLE src
+)
+{
+ VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+ XIMAGE_INFO_PTR xinfo = vpxdxv_get_ximage_info(src);
+ VP8D_CONFIG ocf;
+
+ if (xinfo)
+ {
+ ocf.Width = xinfo->width;
+ ocf.Height = xinfo->height;
+ }
+
+ if (this_algorithm_base->common == 0)
+ {
+ this_algorithm_base->my_pbi = (VP8D_COMP *) vp8dx_create_decompressor(&ocf);
+ this_algorithm_base->owned = 1;
+ this_algorithm_base->common = &this_algorithm_base->my_pbi->common;
+ this_algorithm_base->avgq = 0;
+
+ }
+
+ this_algorithm_base->passed_in_buffer = 0;
+ this_algorithm_base->post_proc2time = 0;
+ this_algorithm_base->post_proc4time = 0;
+ this_algorithm_base->ppcount = 64;
+
+ return DXV_OK;
+}
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+static
+DXV_HANDLE vp8_ximagecreate(XIMAGE_HANDLE src)
+{
+ VP8_XIMAGE_HANDLE this_algorithm_base;
+
+ /* create a new algorithm base container */
+ this_algorithm_base = (VP8_XIMAGE_HANDLE)duck_calloc(1, sizeof(VP8_XIMAGE), DMEM_GENERAL);
+
+ if (this_algorithm_base == NULL)
+ return NULL;
+
+ vp8_scale_machine_specific_config();
+
+ vpxdxv_register_ximage_start_decode(src, vp8_ximagestart_decode);
+
+ vpxdxv_register_ximage_stop_decode(src, vp8_ximagestop_decode);
+
+ vpxdxv_register_ximage_destroy(src, vp8_ximagedestroy);
+
+ vpxdxv_register_ximage_dx(src, onyx_decompress);
+
+ vpxdxv_register_ximage_set_parameter(src, onyx_set_parameter);
+
+ vpxdxv_register_ximage_output_format_func(src,
+ onyx_get_output_format,
+ onyx_set_output_format);
+
+ vpxdxv_register_ximage_post_proc_level_func(src,
+ onyx_get_post_proc,
+ onyx_set_post_proc);
+
+ return (DXV_HANDLE)this_algorithm_base;
+}
+
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+static int store_output_list(unsigned int supported, int count,
+ unsigned int *outlist)
+{
+ int i = 0, j = 0,
+ ret = DXV_OK;
+
+ while (i < count)
+ {
+ while (supported && !(supported & 0x01))
+ {
+ supported >>= 1;
+ ++j;
+ }
+
+ *(outlist + i) = g_vp8_preferred_output_format_list[j];
+ ++i;
+ ++j;
+ supported >>= 1;
+ }
+
+
+ return ret;
+}
+
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+static int onyx_get_output_list(XIMAGE_INFO_PTR xinfo, unsigned int *outlist,
+ unsigned int *size)
+{
+ int i,
+ ret = DXV_INVALID_REQUEST;
+ unsigned int supported = 0,
+ count = 0;
+ (void)xinfo;
+
+ if (size)
+ {
+ for (i = 0; i < sizeof(g_vp8_preferred_output_format_list) / sizeof(unsigned int) && i < 32; ++i)
+ {
+ if (vpx_get_blitter(g_vp8_preferred_output_format_list[i]) != (void *)0xffffffff)
+ {
+ supported |= (1 << i);
+ ++count;
+ }
+ }
+
+ if (outlist)
+ {
+ if (count && ((count + 1) == (*size / sizeof(int))))
+ ret = store_output_list(supported, count, outlist);
+ else
+ *outlist = 0;
+ }
+ else
+ {
+ *size = (count + 1) * sizeof(int);
+ ret = DXV_OK;
+ }
+ }
+
+ return ret;
+}
+
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+int onyx_init(void)
+{
+ int vp8_rv;
+
+ /* register VPX blitters based on cpu */
+ vpx_set_blit();
+
+ vp8_rv = vpxdxv_register_ximage(vp8_ximagecreate, onyx_get_output_list, VP8_FOURCC);
+ return vp8_rv;
+
+ return DXV_OK;
+}
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+int onyx_exit(void)
+{
+
+ vpxdxv_un_register_ximage(VP8_FOURCC);
+
+ return DXV_OK;
+}
+/****************************************************************************
+ *
+ * ROUTINE : onyx_set_parameter
+ *
+ * INPUTS : XIMAGE_HANDLE src :
+ * int Command :
+ * unsigned long Parameter :
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION :
+ *
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void onyx_set_parameter(XIMAGE_HANDLE src, int Command, unsigned int Parameter)
+{
+ VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+
+ switch (Command)
+ {
+ case PBC_SET_CPUFREE:
+ this_algorithm_base->cpu_free = Parameter;
+ break;
+ case PBC_SET_POSTPROC:
+ this_algorithm_base->postproc = Parameter;
+ break;
+
+ case PBC_SET_BLITBUFF:
+ this_algorithm_base->passed_in_buffer = (YV12_BUFFER_CONFIG *) Parameter;
+ break;
+
+ case PBC_SET_REFERENCEFRAME:
+ {
+ VP8_XIMAGE_HANDLE tab = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+ VP8D_COMP *pbi;
+ pbi = tab->my_pbi;
+ vp8_yv12_copy_frame((YV12_BUFFER_CONFIG *) Parameter, &pbi->common.last_frame);
+ }
+ break;
+
+ case PBC_SET_COMMON:
+
+ if (Parameter)
+ {
+ this_algorithm_base->common = (VP8_COMMON *)Parameter;
+ }
+
+ break;
+ case PBC_SET_ADDNOISE:
+ this_algorithm_base->add_noise = Parameter;
+ break;
+ case PBC_SET_DEINTERLACEMODE:
+ this_algorithm_base->deinterlace = Parameter;
+ break;
+
+ }
+}
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+static int
+onyx_get_output_format(XIMAGE_HANDLE src, unsigned int *format_tag)
+{
+ VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+
+ if (this_algorithm_base)
+ {
+ *format_tag = this_algorithm_base->bd_tag;
+ return DXV_OK;
+ }
+
+ return DXV_NULL_BASE;
+}
+
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+static int
+onyx_set_output_format(XIMAGE_HANDLE src, unsigned int bd_tag)
+{
+ VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src);
+ int i;
+ unsigned int bd_tag_found;
+
+ if (this_algorithm_base)
+ {
+ i = 0;
+ bd_tag_found = 0;
+
+ while (g_vp8_preferred_output_format_list[i] != 0)
+ {
+ if (g_vp8_preferred_output_format_list[i] == bd_tag)
+ {
+ bd_tag_found = 1;
+ break;
+ }
+
+ i++;
+ }
+
+ if (bd_tag_found)
+ {
+ this_algorithm_base->blitter = (vp8blit_func)vpx_get_blitter(bd_tag);
+ this_algorithm_base->bd_tag = bd_tag;
+ return DXV_OK;
+ }
+
+ return DXV_INVALID_BLIT;
+ }
+
+ return DXV_NULL_BASE;
+}
+
+/*
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+int
+vpx_get_size_of_pixel(unsigned int bd)
+{
+ int vp8_rv;
+
+ switch (bd)
+ {
+ case VPXDXV_YV12:
+ case VPXDXV_I420:
+ vp8_rv = 1;
+ break;
+
+#ifdef _ENABLE_SPLIT_PIXEL_
+ case VPXDXV_SPLIT565:
+#endif
+ case VPXDXV_RGB555:
+ case VPXDXV_RGB565:
+ case VPXDXV_YUY2:
+ case VPXDXV_UYVY:
+ case VPXDXV_YVYU:
+ vp8_rv = 2;
+ break;
+
+ case VPXDXV_RGB888:
+ vp8_rv = 3;
+ break;
+
+ case VPXDXV_RGB8888:
+ vp8_rv = 4;
+ break;
+
+ default:
+ vp8_rv = -1;
+ break;
+ }
+
+ return vp8_rv;
+}
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
new file mode 100644
index 0000000..6d7cc36
--- /dev/null
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/x86.h"
+#include "onyxd_int.h"
+
+
+#if HAVE_MMX
+void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
+
+void vp8_dequantize_b_mmx(BLOCKD *d)
+{
+ short *sq = (short *) d->qcoeff;
+ short *dq = (short *) d->dqcoeff;
+ short *q = (short *) d->dequant;
+ vp8_dequantize_b_impl_mmx(sq, dq, q);
+}
+#endif
+
+void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
+{
+ int flags = x86_simd_caps();
+
+ /* Note:
+ *
+ * This platform can be built without runtime CPU detection as well. If
+ * you modify any of the function mappings present in this file, be sure
+ * to also update them in static mapings (<arch>/filename_<arch>.h)
+ */
+#if CONFIG_RUNTIME_CPU_DETECT
+ /* Override default functions with fastest ones for this CPU. */
+#if HAVE_MMX
+
+ if (flags & HAS_MMX)
+ {
+ pbi->dequant.block = vp8_dequantize_b_mmx;
+ pbi->dequant.idct = vp8_dequant_idct_mmx;
+ pbi->dequant.idct_dc = vp8_dequant_dc_idct_mmx;
+ }
+
+#endif
+#endif
+}
diff --git a/vp8/decoder/xprintf.c b/vp8/decoder/xprintf.c
new file mode 100644
index 0000000..cb2221c
--- /dev/null
+++ b/vp8/decoder/xprintf.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+/****************************************************************************
+*
+* Module Title : xprintf.cpp
+*
+* Description : Display a printf style message on the current video frame.
+*
+****************************************************************************/
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+
+#include <stdio.h>
+#include <stdarg.h>
+#ifdef _WIN32_WCE
+#include <windows.h>
+#endif
+#include "xprintf.h"
+
+/****************************************************************************
+ *
+ * ROUTINE : xprintf
+ *
+ * INPUTS : const PB_INSTANCE *ppbi : Pointer to decoder instance.
+ * long n_pixel : Offset into buffer to write text.
+ * const char *format : Format string for print.
+ * ... : Variable length argument list.
+ *
+ * OUTPUTS : None.
+ *
+ * RETURNS : int: Size (in bytes) of the formatted text.
+ *
+ * FUNCTION : Display a printf style message on the current video frame.
+ *
+ * SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+int onyx_xprintf(unsigned char *ppbuffer, long n_pixel, long n_size, long n_stride, const char *format, ...)
+{
+ BOOL b_rc;
+ va_list arglist;
+ HFONT hfont, hfonto;
+
+ int rc = 0;
+ char sz_formatted[256] = "";
+ unsigned char *p_dest = &ppbuffer[n_pixel];
+
+#ifdef _WIN32_WCE
+ // Set up temporary bitmap
+ HDC hdc_memory = NULL;
+ HBITMAP hbm_temp = NULL;
+ HBITMAP hbm_orig = NULL;
+
+ RECT rect;
+
+ // Copy bitmap to video frame
+ long x;
+ long y;
+
+ // Format text
+ va_start(arglist, format);
+ _vsnprintf(sz_formatted, sizeof(sz_formatted), format, arglist);
+ va_end(arglist);
+
+ rect.left = 0;
+ rect.top = 0;
+ rect.right = 8 * strlen(sz_formatted);
+ rect.bottom = 8;
+
+ hdc_memory = create_compatible_dc(NULL);
+
+ if (hdc_memory == NULL)
+ goto Exit;
+
+ hbm_temp = create_bitmap(rect.right, rect.bottom, 1, 1, NULL);
+
+ if (hbm_temp == NULL)
+ goto Exit;
+
+ hbm_orig = (HBITMAP)(select_object(hdc_memory, hbm_temp));
+
+ if (!hbm_orig)
+ goto Exit;
+
+ // Write text into bitmap
+ // font?
+ hfont = create_font(8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, VARIABLE_PITCH | FF_SWISS, "");
+
+ if (hfont == NULL)
+ goto Exit;
+
+ hfonto = (HFONT)(select_object(hdc_memory, hbm_temp));
+
+ if (!hfonto)
+ goto Exit;
+
+ select_object(hdc_memory, hfont);
+ set_text_color(hdc_memory, 1);
+ set_bk_color(hdc_memory, 0);
+ set_bk_mode(hdc_memory, TRANSPARENT);
+
+ b_rc = bit_blt(hdc_memory, rect.left, rect.top, rect.right, rect.bottom, hdc_memory, rect.left, rect.top, BLACKNESS);
+
+ if (!b_rc)
+ goto Exit;
+
+ b_rc = ext_text_out(hdc_memory, 0, 0, ETO_CLIPPED, &rect, sz_formatted, strlen(sz_formatted), NULL);
+
+ if (!b_rc)
+ goto Exit;
+
+ for (y = rect.top; y < rect.bottom; ++y)
+ {
+ for (x = rect.left; x < rect.right; ++x)
+ {
+ if (get_pixel(hdc_memory, x, rect.bottom - 1 - y))
+ p_dest[x] = 255;
+ }
+
+ p_dest += n_stride;
+ }
+
+ rc = strlen(sz_formatted);
+
+Exit:
+
+ if (hbm_temp != NULL)
+ {
+ if (hbm_orig != NULL)
+ {
+ select_object(hdc_memory, hbm_orig);
+ }
+
+ delete_object(hbm_temp);
+ }
+
+ if (hfont != NULL)
+ {
+ if (hfonto != NULL)
+ select_object(hdc_memory, hfonto);
+
+ delete_object(hfont);
+ }
+
+ if (hdc_memory != NULL)
+ delete_dc(hdc_memory);
+
+ hdc_memory = 0;
+
+#endif
+
+ return rc;
+}
diff --git a/vp8/decoder/xprintf.h b/vp8/decoder/xprintf.h
new file mode 100644
index 0000000..2f175e9
--- /dev/null
+++ b/vp8/decoder/xprintf.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+/****************************************************************************
+*
+* Module Title : xprintf.h
+*
+* Description : Debug print interface header file.
+*
+****************************************************************************/
+#ifndef __INC_XPRINTF_H
+#define __INC_XPRINTF_H
+
+/****************************************************************************
+* Header Files
+****************************************************************************/
+
+/****************************************************************************
+* Functions
+****************************************************************************/
+
+// Display a printf style message on the current video frame
+extern int onyx_xprintf(unsigned char *ppbuffer, long n_pixel, long n_size, long n_stride, const char *format, ...);
+
+#endif