Merge "Fix relative include paths"
diff --git a/configure b/configure
index ed19906..0f55de3 100755
--- a/configure
+++ b/configure
@@ -40,7 +40,6 @@
${toggle_runtime_cpu_detect} runtime cpu detection
${toggle_shared} shared library support
${toggle_small} favor smaller size over speed
- ${toggle_arm_asm_detok} assembly version of the detokenizer (ARM platforms only)
${toggle_postproc_visualizer} macro block / block level visualizers
Codecs:
@@ -255,7 +254,6 @@
realtime_only
shared
small
- arm_asm_detok
postproc_visualizer
os_support
"
@@ -296,7 +294,6 @@
realtime_only
shared
small
- arm_asm_detok
postproc_visualizer
"
diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm
deleted file mode 100644
index 0c164f1..0000000
--- a/vp8/decoder/arm/detokenize.asm
+++ /dev/null
@@ -1,320 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_decode_mb_tokens_v6|
-
- AREA |.text|, CODE, READONLY ; name this block of code
-
- INCLUDE asm_dec_offsets.asm
-
-l_qcoeff EQU 0
-l_i EQU 4
-l_type EQU 8
-l_stop EQU 12
-l_c EQU 16
-l_l_ptr EQU 20
-l_a_ptr EQU 24
-l_bc EQU 28
-l_coef_ptr EQU 32
-l_stacksize EQU 64
-
-
-;; constant offsets -- these should be created at build time
-c_block2above_offset EQU 25
-c_entropy_nodes EQU 11
-c_dct_eob_token EQU 11
-
-|vp8_decode_mb_tokens_v6| PROC
- stmdb sp!, {r4 - r11, lr}
- sub sp, sp, #l_stacksize
- mov r7, r1 ; type
- mov r9, r0 ; detoken
-
- ldr r1, [r9, #detok_current_bc]
- ldr r0, [r9, #detok_qcoeff_start_ptr]
- mov r11, #0 ; i
- mov r3, #16 ; stop
-
- cmp r7, #1 ; type ?= 1
- addeq r11, r11, #24 ; i = 24
- addeq r3, r3, #8 ; stop = 24
- addeq r0, r0, #3, 24 ; qcoefptr += 24*16
-
- str r0, [sp, #l_qcoeff]
- str r11, [sp, #l_i]
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
- str r1, [sp, #l_bc]
-
- add lr, r9, r7, lsl #2 ; detoken + type*4
-
- ldr r8, [r1, #bool_decoder_user_buffer]
-
- ldr r10, [lr, #detok_coef_probs]
- ldr r5, [r1, #bool_decoder_count]
- ldr r6, [r1, #bool_decoder_range]
- ldr r4, [r1, #bool_decoder_value]
-
- str r10, [sp, #l_coef_ptr]
-
-BLOCK_LOOP
- ldr r3, [r9, #detok_ptr_block2leftabove]
- ldr r1, [r9, #detok_L]
- ldr r2, [r9, #detok_A]
- ldrb r12, [r3, r11]! ; block2left[i]
- ldrb r3, [r3, #c_block2above_offset]; block2above[i]
-
- cmp r7, #0 ; c = !type
- moveq r7, #1
- movne r7, #0
-
- ldrb r0, [r1, r12]! ; *(L += block2left[i])
- ldrb r3, [r2, r3]! ; *(A += block2above[i])
- mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11
-
-; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
- cmp r0, #0 ; *l ?= 0
- movne r0, #1
- cmp r3, #0 ; *a ?= 0
- addne r0, r0, #1 ; t
-
- str r1, [sp, #l_l_ptr] ; save &l
- str r2, [sp, #l_a_ptr] ; save &a
- smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
- mov r1, #0 ; t = 0
- str r7, [sp, #l_c]
-
- ;align 4
-COEFF_LOOP
- ldr r3, [r9, #detok_ptr_coef_bands_x]
- ldr lr, [r9, #detok_coef_tree_ptr]
- ;STALL
- ldrb r3, [r3, r7] ; coef_bands_x[c]
- ;STALL
- ;STALL
- add r0, r0, r3 ; Prob += coef_bands_x[c]
-
-get_token_loop
- ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1]
- mov r3, r6, lsl #8 ; range << 8
- sub r3, r3, #256 ; (range << 8) - (1 << 8)
- mov r10, #1 ; 1
-
- smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8)
-
- ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr
- ;++
-
- subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE
- addhs r1, r1, #1 ; t += 1
- movhs r4, r3 ; value -= bigsplit (split << 24)
- subhs r2, r6, r2 ; range -= split
- ; movlo r6, r2 ; range = split
-
- ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t]
-
-; NORMALIZE
- clz r3, r2 ; vp8dx_bitreader_norm[range] + 24
- sub r3, r3, #24 ; vp8dx_bitreader_norm[range]
- subs r5, r5, r3 ; count -= shift
- mov r6, r2, lsl r3 ; range <<= shift
- mov r4, r4, lsl r3 ; value <<= shift
-
-; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
- addle r5, r5, #8 ; count += 8
- rsble r3, r5, #24 ; 24 - count
- addle r8, r8, #1 ; bufptr++
- orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16
-
- cmp r1, #0 ; t ?= 0
- bgt get_token_loop ; while (t > 0)
-
- cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN)
- beq END_OF_BLOCK ; break
-
- rsb lr, r1, #0 ; v = -t;
-
- cmp lr, #4 ; if(v > FOUR_TOKEN)
- ble SKIP_EXTRABITS
-
- ldr r3, [r9, #detok_teb_base_ptr]
- mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count
- add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4)
-
- ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
- ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
-
-extrabits_loop
- add r3, r0, r7 ; &teb_ptr->Probs[bits_count]
-
- ldrb r2, [r3, #4] ; probability. why +4?
- mov r3, r6, lsl #8 ; range << 8
- sub r3, r3, #256 ; range << 8 + 1 << 8
-
- smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8)
-
- ldrb r12, [r8] ; *bufptr
- ;++
-
- subs r10, r4, r2, lsl #24 ; value - (split<<24)
- movhs r4, r10 ; value = value - (split << 24)
- subhs r2, r6, r2 ; range = range - split
- addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<<bits_count)
-
-; NORMALIZE
- clz r3, r2 ; shift - leading zeros in split
- sub r3, r3, #24 ; don't count first 3 bytes
- subs r5, r5, r3 ; count -= shift
- mov r6, r2, lsl r3 ; range = range << shift
- mov r4, r4, lsl r3 ; value <<= shift
-
- addle r5, r5, #8 ; count += BR_COUNT
- addle r8, r8, #1 ; bufptr++
- rsble r3, r5, #24 ; BR_COUNT - count
- orrle r4, r4, r12, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
-
- subs r0, r0, #1 ; bits_count --
- bpl extrabits_loop
-
-
-SKIP_EXTRABITS
- ldr r11, [sp, #l_qcoeff]
- ldr r0, [sp, #l_coef_ptr] ; Prob = coef_probs
-
- cmp r1, #0 ; check for nonzero token - if (t)
- beq SKIP_EOB_CHECK ; if t is zero, we will skip the eob table chec
-
- add r3, r6, #1 ; range + 1
- mov r2, r3, lsr #1 ; split = (range + 1) >> 1
-
- subs r3, r4, r2, lsl #24 ; value - (split<<24)
- movhs r4, r3 ; value -= (split << 24)
- subhs r2, r6, r2 ; range -= split
- mvnhs r3, lr ; -v
- addhs lr, r3, #1 ; v = (v ^ -1) + 1
-
-; NORMALIZE
- clz r3, r2 ; leading 0s in split
- sub r3, r3, #24 ; shift
- subs r5, r5, r3 ; count -= shift
- mov r6, r2, lsl r3 ; range <<= shift
- mov r4, r4, lsl r3 ; value <<= shift
- ldrleb r2, [r8], #1 ; *(bufptr++)
- addle r5, r5, #8 ; count += 8
- rsble r3, r5, #24 ; BR_COUNT - count
- orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
-
- add r0, r0, #11 ; Prob += ENTROPY_NODES (11)
-
- cmn r1, #1 ; t < -ONE_TOKEN
-
- addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11)
-
- mvn r1, #1 ; t = -1 ???? C is -2
-
-SKIP_EOB_CHECK
- ldr r7, [sp, #l_c] ; c
- ldr r3, [r9, #detok_scan]
- add r1, r1, #2 ; t+= 2
- cmp r7, #15 ; c should will be one higher
-
- ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value
- add r7, r7, #1 ; c++
- add r3, r11, r3, lsl #1 ; qcoeff + scan[c]
-
- str r7, [sp, #l_c] ; store c
- strh lr, [r3] ; qcoef_ptr[scan[c]] = v
-
- blt COEFF_LOOP
-
- sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c
-
-END_OF_BLOCK
- ldr r3, [sp, #l_type] ; type
- ldr r10, [sp, #l_coef_ptr] ; coef_ptr
- ldr r0, [sp, #l_qcoeff] ; qcoeff
- ldr r11, [sp, #l_i] ; i
- ldr r12, [sp, #l_stop] ; stop
-
- cmp r3, #0 ; type ?= 0
- moveq r1, #1
- movne r1, #0
- add r3, r11, r9 ; detok + i
-
- cmp r7, r1 ; c ?= !type
- strb r7, [r3, #detok_eob] ; eob[i] = c
-
- ldr r7, [sp, #l_l_ptr] ; l
- ldr r2, [sp, #l_a_ptr] ; a
- movne r3, #1 ; t
- moveq r3, #0
-
- add r0, r0, #32 ; qcoeff += 32 (16 * 2?)
- add r11, r11, #1 ; i++
- strb r3, [r7] ; *l = t
- strb r3, [r2] ; *a = t
- str r0, [sp, #l_qcoeff] ; qcoeff
- str r11, [sp, #l_i] ; i
-
- cmp r11, r12 ; i < stop
- ldr r7, [sp, #l_type] ; type
-
- blt BLOCK_LOOP
-
- cmp r11, #25 ; i ?= 25
- bne ln2_decode_mb_to
-
- ldr r12, [r9, #detok_qcoeff_start_ptr]
- ldr r10, [r9, #detok_coef_probs]
- mov r7, #0 ; type/i = 0
- mov r3, #16 ; stop = 16
- str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr
- str r7, [sp, #l_i]
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
-
- str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0]
-
- b BLOCK_LOOP
-
-ln2_decode_mb_to
- cmp r11, #16 ; i ?= 16
- bne ln1_decode_mb_to
-
- mov r10, #detok_coef_probs
- add r10, r10, #2*4 ; coef_probs[type]
- ldr r10, [r9, r10] ; detok + detok_coef_probs[type]
-
- mov r7, #2 ; type = 2
- mov r3, #24 ; stop = 24
-
- str r7, [sp, #l_type]
- str r3, [sp, #l_stop]
-
- str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type]
- b BLOCK_LOOP
-
-ln1_decode_mb_to
- ldr r2, [sp, #l_bc]
- mov r0, #0
- nop
-
- str r8, [r2, #bool_decoder_user_buffer]
- str r5, [r2, #bool_decoder_count]
- str r4, [r2, #bool_decoder_value]
- str r6, [r2, #bool_decoder_range]
-
- add sp, sp, #l_stacksize
- ldmia sp!, {r4 - r11, pc}
-
- ENDP ; |vp8_decode_mb_tokens_v6|
-
- END
diff --git a/vp8/decoder/arm/detokenize_arm.h b/vp8/decoder/arm/detokenize_arm.h
deleted file mode 100644
index 9bb19b6..0000000
--- a/vp8/decoder/arm/detokenize_arm.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef DETOKENIZE_ARM_H
-#define DETOKENIZE_ARM_H
-
-#if HAVE_ARMV6
-#if CONFIG_ARM_ASM_DETOK
-void vp8_init_detokenizer(VP8D_COMP *dx);
-void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
-#endif
-#endif
-
-#endif
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 58ac6d2..83fa14d 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -74,37 +74,6 @@
}
}
-#if CONFIG_ARM_ASM_DETOK
-/* mashup of vp8_block2left and vp8_block2above so we only need one pointer
- * for the assembly version.
- */
-DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) =
-{
- /* vp8_block2left */
- 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
- /* vp8_block2above */
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
-};
-
-void vp8_init_detokenizer(VP8D_COMP *dx)
-{
- const VP8_COMMON *const oc = & dx->common;
- MACROBLOCKD *x = & dx->mb;
-
- dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree;
- dx->detoken.ptr_block2leftabove = vp8_block2leftabove;
- dx->detoken.ptr_coef_bands_x = vp8_coef_bands_x;
- dx->detoken.scan = vp8_default_zig_zag1d;
- dx->detoken.teb_base_ptr = vp8d_token_extra_bits2;
- dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
- dx->detoken.coef_probs[0] = (oc->fc.coef_probs [0] [ 0 ] [0]);
- dx->detoken.coef_probs[1] = (oc->fc.coef_probs [1] [ 0 ] [0]);
- dx->detoken.coef_probs[2] = (oc->fc.coef_probs [2] [ 0 ] [0]);
- dx->detoken.coef_probs[3] = (oc->fc.coef_probs [3] [ 0 ] [0]);
-}
-#endif
-
DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
#define FILL \
if(count < 0) \
@@ -202,35 +171,6 @@
}\
NORMALIZE
-#if CONFIG_ARM_ASM_DETOK
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
- int eobtotal = 0;
- int i, type;
-
- dx->detoken.current_bc = x->current_bc;
- dx->detoken.A = x->above_context;
- dx->detoken.L = x->left_context;
-
- type = 3;
-
- if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
- {
- type = 1;
- eobtotal -= 16;
- }
-
- vp8_decode_mb_tokens_v6(&dx->detoken, type);
-
- for (i = 0; i < 25; i++)
- {
- x->eobs[i] = dx->detoken.eob[i];
- eobtotal += dx->detoken.eob[i];
- }
-
- return eobtotal;
-}
-#else
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
{
ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
@@ -423,4 +363,3 @@
return eobtotal;
}
-#endif /*!CONFIG_ASM_DETOK*/
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
index 294a4a5..8640bda 100644
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -14,10 +14,6 @@
#include "onyxd_int.h"
-#if ARCH_ARM
-#include "arm/detokenize_arm.h"
-#endif
-
void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 4ac419a..bfb00a5 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -133,9 +133,6 @@
cm->last_sharpness_level = cm->sharpness_level;
}
-#if CONFIG_ARM_ASM_DETOK
- vp8_init_detokenizer(pbi);
-#endif
pbi->common.error.setjmp = 0;
return (VP8D_PTR) pbi;
}
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 8c77da4..7fc9010 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -473,22 +473,16 @@
if (pbi->b_multithreaded_rd)
{
- if (pbi->mt_current_mb_col)
- {
vpx_free(pbi->mt_current_mb_col);
pbi->mt_current_mb_col = NULL ;
- }
/* Free above_row buffers. */
if (pbi->mt_yabove_row)
{
for (i=0; i< mb_rows; i++)
{
- if (pbi->mt_yabove_row[i])
- {
vpx_free(pbi->mt_yabove_row[i]);
pbi->mt_yabove_row[i] = NULL ;
- }
}
vpx_free(pbi->mt_yabove_row);
pbi->mt_yabove_row = NULL ;
@@ -498,11 +492,8 @@
{
for (i=0; i< mb_rows; i++)
{
- if (pbi->mt_uabove_row[i])
- {
vpx_free(pbi->mt_uabove_row[i]);
pbi->mt_uabove_row[i] = NULL ;
- }
}
vpx_free(pbi->mt_uabove_row);
pbi->mt_uabove_row = NULL ;
@@ -512,11 +503,8 @@
{
for (i=0; i< mb_rows; i++)
{
- if (pbi->mt_vabove_row[i])
- {
vpx_free(pbi->mt_vabove_row[i]);
pbi->mt_vabove_row[i] = NULL ;
- }
}
vpx_free(pbi->mt_vabove_row);
pbi->mt_vabove_row = NULL ;
@@ -527,11 +515,8 @@
{
for (i=0; i< mb_rows; i++)
{
- if (pbi->mt_yleft_col[i])
- {
vpx_free(pbi->mt_yleft_col[i]);
pbi->mt_yleft_col[i] = NULL ;
- }
}
vpx_free(pbi->mt_yleft_col);
pbi->mt_yleft_col = NULL ;
@@ -541,11 +526,8 @@
{
for (i=0; i< mb_rows; i++)
{
- if (pbi->mt_uleft_col[i])
- {
vpx_free(pbi->mt_uleft_col[i]);
pbi->mt_uleft_col[i] = NULL ;
- }
}
vpx_free(pbi->mt_uleft_col);
pbi->mt_uleft_col = NULL ;
@@ -555,11 +537,8 @@
{
for (i=0; i< mb_rows; i++)
{
- if (pbi->mt_vleft_col[i])
- {
vpx_free(pbi->mt_vleft_col[i]);
pbi->mt_vleft_col[i] = NULL ;
- }
}
vpx_free(pbi->mt_vleft_col);
pbi->mt_vleft_col = NULL ;
@@ -644,29 +623,17 @@
sem_destroy(&pbi->h_event_end_decoding);
- if (pbi->h_decoding_thread)
- {
vpx_free(pbi->h_decoding_thread);
pbi->h_decoding_thread = NULL;
- }
- if (pbi->h_event_start_decoding)
- {
vpx_free(pbi->h_event_start_decoding);
pbi->h_event_start_decoding = NULL;
- }
- if (pbi->mb_row_di)
- {
vpx_free(pbi->mb_row_di);
pbi->mb_row_di = NULL ;
- }
- if (pbi->de_thread_data)
- {
vpx_free(pbi->de_thread_data);
pbi->de_thread_data = NULL;
- }
}
}
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index 87f159c..5852afd 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -29,8 +29,8 @@
#if HAVE_ARMV6
if (has_media)
{
- /*cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
- cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
+ cpi->rtcd.variance.sad16x16 = vp8_sad16x16_armv6;
+ /*cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c;
cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/
diff --git a/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
new file mode 100644
index 0000000..c759f7c
--- /dev/null
+++ b/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm
@@ -0,0 +1,84 @@
+;
+; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_sad16x16_armv6|
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0 const unsigned char *src_ptr
+; r1 int src_stride
+; r2 const unsigned char *ref_ptr
+; r3 int ref_stride
+; stack max_sad (not used)
+|vp8_sad16x16_armv6| PROC
+ stmfd sp!, {r4-r12, lr}
+ mov r4, #0 ; sad = 0;
+ mov r5, #8 ; loop count
+
+loop
+ ; 1st row
+ ldr r6, [r0, #0x0] ; load 4 src pixels (1A)
+ ldr r8, [r2, #0x0] ; load 4 ref pixels (1A)
+ ldr r7, [r0, #0x4] ; load 4 src pixels (1A)
+ ldr r9, [r2, #0x4] ; load 4 ref pixels (1A)
+ ldr r10, [r0, #0x8] ; load 4 src pixels (1B)
+ ldr r11, [r0, #0xC] ; load 4 src pixels (1B)
+
+ usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels
+ usad8 r8, r7, r9 ; calculate sad for 4 pixels
+
+ ldr r12, [r2, #0x8] ; load 4 ref pixels (1B)
+ ldr lr, [r2, #0xC] ; load 4 ref pixels (1B)
+
+ add r0, r0, r1 ; set src pointer to next row
+ add r2, r2, r3 ; set dst pointer to next row
+
+ usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
+ usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
+
+ ldr r6, [r0, #0x0] ; load 4 src pixels (2A)
+ ldr r7, [r0, #0x4] ; load 4 src pixels (2A)
+ add r4, r4, r8 ; add partial sad values
+
+ ; 2nd row
+ ldr r8, [r2, #0x0] ; load 4 ref pixels (2A)
+ ldr r9, [r2, #0x4] ; load 4 ref pixels (2A)
+ ldr r10, [r0, #0x8] ; load 4 src pixels (2B)
+ ldr r11, [r0, #0xC] ; load 4 src pixels (2B)
+
+ usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels
+ usad8 r8, r7, r9 ; calculate sad for 4 pixels
+
+ ldr r12, [r2, #0x8] ; load 4 ref pixels (2B)
+ ldr lr, [r2, #0xC] ; load 4 ref pixels (2B)
+
+ add r0, r0, r1 ; set src pointer to next row
+ add r2, r2, r3 ; set dst pointer to next row
+
+ usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
+ usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
+
+ subs r5, r5, #1 ; decrement loop counter
+ add r4, r4, r8 ; add partial sad values
+
+ bne loop
+
+ mov r0, r4 ; return sad
+ ldmfd sp!, {r4-r12, pc}
+
+ ENDP
+
+ END
+
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 06d7287..c807e29 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -14,11 +14,15 @@
#if HAVE_ARMV6
+extern prototype_sad(vp8_sad16x16_armv6);
extern prototype_variance(vp8_variance16x16_armv6);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp8_variance_sad16x16
+#define vp8_variance_sad16x16 vp8_sad16x16_armv6
+
#undef vp8_variance_subpixvar16x16
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 2e12533..cd66016 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -58,21 +58,6 @@
RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
}
-void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode)
-{
- vp8_predict_intra4x4(b, best_mode, b->predictor);
-
- ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
-
- x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
-
- x->quantize_b(be, b);
-
- IDCT_INVOKE(&rtcd->common->idct, idct16)(b->dqcoeff, b->diff, 32);
-
- RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-}
-
void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb)
{
int i;
@@ -168,17 +153,3 @@
vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
}
-void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
-{
- vp8_build_intra_predictors_mbuv(&x->e_mbd);
-
- ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
-
- vp8_transform_mbuv(x);
-
- vp8_quantize_mbuv(x);
-
- vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
-
- vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
-}
diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h
index c0247b0..b8b80f1 100644
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@@ -19,6 +19,5 @@
void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
void vp8_update_mode_context(int *abmode, int *lbmode, int i, int best_mode);
void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode);
-void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *, MACROBLOCK *x);
#endif
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index d9923fb..33aaa2c 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -779,15 +779,17 @@
int *num00,
const vp8_variance_fn_ptr_t *vfp,
int *mvsadcost[2],
- int *mvcost[2]
+ int *mvcost[2],
+ MV *center_mv
)
{
MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
- MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
+ MV neighbors[8] = { { -1, -1}, {0, -1}, {1, -1}, { -1, 0}, {1, 0}, { -1, 1}, {0, 1}, {1, 1} } ;
int i, j;
unsigned char *src = (*(b->base_src) + b->src);
int src_stride = b->src_stride;
- int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
+ int rr = center_mv->row, rc = center_mv->col;
+ int br = ref_mv->row >> 3, bc = ref_mv->col >> 3, tr, tc;
unsigned int besterr, thiserr = 0x7fffffff;
int k = -1, tk;
@@ -892,7 +894,7 @@
best_mv->row = br;
best_mv->col = bc;
- return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
+ return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + vp8_mv_err_cost(best_mv, center_mv, mvcost, error_per_bit) ;
}
#undef MVC
#undef PRE
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 7600f87..83f95c6 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -43,8 +43,8 @@
int *num00,
const vp8_variance_fn_ptr_t *vf,
int *mvsadcost[2],
- int *mvcost[2]
-
+ int *mvcost[2],
+ MV *center_mv
);
typedef int (fractional_mv_step_fp)
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index af84a03..6ab1b39 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -262,38 +262,32 @@
void vp8_dealloc_compressor_data(VP8_COMP *cpi)
{
- vpx_free(cpi->tplist);
+ vpx_free(cpi->tplist);
cpi->tplist = NULL;
// Delete last frame MV storage buffers
- if (cpi->lfmv != 0)
vpx_free(cpi->lfmv);
cpi->lfmv = 0;
- if (cpi->lf_ref_frame_sign_bias != 0)
vpx_free(cpi->lf_ref_frame_sign_bias);
cpi->lf_ref_frame_sign_bias = 0;
- if (cpi->lf_ref_frame != 0)
vpx_free(cpi->lf_ref_frame);
cpi->lf_ref_frame = 0;
// Delete sementation map
- if (cpi->segmentation_map != 0)
vpx_free(cpi->segmentation_map);
cpi->segmentation_map = 0;
- if (cpi->active_map != 0)
vpx_free(cpi->active_map);
cpi->active_map = 0;
// Delete first pass motion map
- if (cpi->fp_motion_map != 0)
vpx_free(cpi->fp_motion_map);
cpi->fp_motion_map = 0;
@@ -318,23 +312,19 @@
cpi->tok = 0;
// Structure used to monitor GF usage
- if (cpi->gf_active_flags != 0)
vpx_free(cpi->gf_active_flags);
cpi->gf_active_flags = 0;
- if(cpi->mb.pip)
vpx_free(cpi->mb.pip);
cpi->mb.pip = 0;
#if !(CONFIG_REALTIME_ONLY)
- if(cpi->total_stats)
vpx_free(cpi->total_stats);
cpi->total_stats = 0;
- if(cpi->this_frame_stats)
vpx_free(cpi->this_frame_stats);
cpi->this_frame_stats = 0;
@@ -441,7 +431,6 @@
set_segment_data(ptr, &feature_data[0][0], SEGMENT_DELTADATA);
// Delete sementation map
- if (seg_map != 0)
vpx_free(seg_map);
seg_map = 0;
@@ -535,7 +524,6 @@
set_segment_data((VP8_PTR)cpi, &feature_data[0][0], SEGMENT_DELTADATA);
// Delete sementation map
- if (seg_map != 0)
vpx_free(seg_map);
seg_map = 0;
@@ -601,6 +589,7 @@
sf->first_step = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->improved_mv_pred = 1;
cpi->do_full[0] = 0;
cpi->do_full[1] = 0;
@@ -643,34 +632,6 @@
sf->first_step = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
-
- if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
- {
- sf->thresh_mult[THR_NEWMV ] = INT_MAX;
- sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
- sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
- sf->thresh_mult[THR_NEARMV ] = INT_MAX;
- sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
- }
-
- if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
- {
- sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
- sf->thresh_mult[THR_ZEROG ] = INT_MAX;
- sf->thresh_mult[THR_NEARG ] = INT_MAX;
- sf->thresh_mult[THR_NEWG ] = INT_MAX;
- sf->thresh_mult[THR_SPLITG ] = INT_MAX;
- }
-
- if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
- {
- sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
- sf->thresh_mult[THR_ZEROA ] = INT_MAX;
- sf->thresh_mult[THR_NEARA ] = INT_MAX;
- sf->thresh_mult[THR_NEWA ] = INT_MAX;
- sf->thresh_mult[THR_SPLITA ] = INT_MAX;
- }
-
break;
case 1:
case 3:
@@ -728,41 +689,22 @@
sf->full_freq[0] = 15;
sf->full_freq[1] = 31;
- sf->first_step = 0;
- sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
-
- if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
- {
- sf->thresh_mult[THR_NEWMV ] = INT_MAX;
- sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
- sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
- sf->thresh_mult[THR_NEARMV ] = INT_MAX;
- sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
- }
-
- if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
- {
- sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
- sf->thresh_mult[THR_ZEROG ] = INT_MAX;
- sf->thresh_mult[THR_NEARG ] = INT_MAX;
- sf->thresh_mult[THR_NEWG ] = INT_MAX;
- sf->thresh_mult[THR_SPLITG ] = INT_MAX;
- }
-
- if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
- {
- sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
- sf->thresh_mult[THR_ZEROA ] = INT_MAX;
- sf->thresh_mult[THR_NEARA ] = INT_MAX;
- sf->thresh_mult[THR_NEWA ] = INT_MAX;
- sf->thresh_mult[THR_SPLITA ] = INT_MAX;
- }
-
if (Speed > 0)
{
- // Disable coefficient optimization above speed 0
+ /* Disable coefficient optimization above speed 0 */
sf->optimize_coefficients = 0;
+ sf->use_fastquant_for_pick = 1;
+ sf->no_skip_block4x4_search = 0;
+ sf->first_step = 1;
+
+ cpi->mode_check_freq[THR_SPLITG] = 2;
+ cpi->mode_check_freq[THR_SPLITA] = 2;
+ cpi->mode_check_freq[THR_SPLITMV] = 0;
+ }
+
+ if (Speed > 1)
+ {
cpi->mode_check_freq[THR_SPLITG] = 4;
cpi->mode_check_freq[THR_SPLITA] = 4;
cpi->mode_check_freq[THR_SPLITMV] = 2;
@@ -795,18 +737,10 @@
sf->thresh_mult[THR_NEWA ] = 2000;
sf->thresh_mult[THR_SPLITA ] = 20000;
}
-
- sf->use_fastquant_for_pick = 1;
-
- sf->first_step = 1;
- sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
- sf->no_skip_block4x4_search = 0;
}
- if (Speed > 1)
+ if (Speed > 2)
{
- sf->use_fastquant_for_pick = 0;
-
cpi->mode_check_freq[THR_SPLITG] = 15;
cpi->mode_check_freq[THR_SPLITA] = 15;
cpi->mode_check_freq[THR_SPLITMV] = 7;
@@ -840,8 +774,6 @@
sf->thresh_mult[THR_SPLITA ] = 50000;
}
- sf->first_step = 1;
-
sf->improved_quant = 0;
sf->improved_dct = 0;
@@ -851,38 +783,14 @@
sf->full_freq[0] = 31;
sf->full_freq[1] = 63;
-
- }
-
- if (Speed > 2)
- {
- sf->auto_filter = 0; // Faster selection of loop filter
- cpi->mode_check_freq[THR_V_PRED] = 2;
- cpi->mode_check_freq[THR_H_PRED] = 2;
- cpi->mode_check_freq[THR_B_PRED] = 2;
-
- if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
- {
- cpi->mode_check_freq[THR_NEARG] = 2;
- cpi->mode_check_freq[THR_NEWG] = 4;
- }
-
- if (cpi->ref_frame_flags & VP8_ALT_FLAG)
- {
- cpi->mode_check_freq[THR_NEARA] = 2;
- cpi->mode_check_freq[THR_NEWA] = 4;
- }
-
- sf->thresh_mult[THR_SPLITA ] = INT_MAX;
- sf->thresh_mult[THR_SPLITG ] = INT_MAX;
- sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
-
- sf->full_freq[0] = 63;
- sf->full_freq[1] = 127;
}
if (Speed > 3)
{
+ sf->thresh_mult[THR_SPLITA ] = INT_MAX;
+ sf->thresh_mult[THR_SPLITG ] = INT_MAX;
+ sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
+
cpi->mode_check_freq[THR_V_PRED] = 0;
cpi->mode_check_freq[THR_H_PRED] = 0;
cpi->mode_check_freq[THR_B_PRED] = 0;
@@ -894,13 +802,16 @@
sf->auto_filter = 1;
sf->recode_loop = 0; // recode loop off
sf->RD = 0; // Turn rd off
- sf->full_freq[0] = INT_MAX;
- sf->full_freq[1] = INT_MAX;
+
+ sf->full_freq[0] = 63;
+ sf->full_freq[1] = 127;
}
if (Speed > 4)
{
sf->auto_filter = 0; // Faster selection of loop filter
+ sf->full_freq[0] = INT_MAX;
+ sf->full_freq[1] = INT_MAX;
cpi->mode_check_freq[THR_V_PRED] = 2;
cpi->mode_check_freq[THR_H_PRED] = 2;
@@ -966,33 +877,6 @@
sf->full_freq[1] = 31;
sf->search_method = NSTEP;
- if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
- {
- sf->thresh_mult[THR_NEWMV ] = INT_MAX;
- sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
- sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
- sf->thresh_mult[THR_NEARMV ] = INT_MAX;
- sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
- }
-
- if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
- {
- sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
- sf->thresh_mult[THR_ZEROG ] = INT_MAX;
- sf->thresh_mult[THR_NEARG ] = INT_MAX;
- sf->thresh_mult[THR_NEWG ] = INT_MAX;
- sf->thresh_mult[THR_SPLITG ] = INT_MAX;
- }
-
- if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
- {
- sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
- sf->thresh_mult[THR_ZEROA ] = INT_MAX;
- sf->thresh_mult[THR_NEARA ] = INT_MAX;
- sf->thresh_mult[THR_NEWA ] = INT_MAX;
- sf->thresh_mult[THR_SPLITA ] = INT_MAX;
- }
-
if (Speed > 0)
{
cpi->mode_check_freq[THR_SPLITG] = 4;
@@ -1121,6 +1005,7 @@
#else
sf->search_method = DIAMOND;
#endif
+ sf->iterative_sub_pixel = 0;
cpi->mode_check_freq[THR_V_PRED] = 4;
cpi->mode_check_freq[THR_H_PRED] = 4;
@@ -1172,7 +1057,6 @@
int total_skip;
int min = 2000;
- sf->iterative_sub_pixel = 0;
if (cpi->oxcf.encode_breakout > 2000)
min = cpi->oxcf.encode_breakout;
@@ -1228,6 +1112,7 @@
sf->thresh_mult[THR_V_PRED] = INT_MAX;
sf->thresh_mult[THR_H_PRED] = INT_MAX;
+ sf->improved_mv_pred = 0;
}
if (Speed > 8)
@@ -1273,7 +1158,36 @@
vpx_memset(cpi->error_bins, 0, sizeof(cpi->error_bins));
- };
+ }; /* switch */
+
+ /* disable frame modes if flags not set */
+ if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
+ {
+ sf->thresh_mult[THR_NEWMV ] = INT_MAX;
+ sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
+ sf->thresh_mult[THR_ZEROMV ] = INT_MAX;
+ sf->thresh_mult[THR_NEARMV ] = INT_MAX;
+ sf->thresh_mult[THR_SPLITMV ] = INT_MAX;
+ }
+
+ if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
+ {
+ sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
+ sf->thresh_mult[THR_ZEROG ] = INT_MAX;
+ sf->thresh_mult[THR_NEARG ] = INT_MAX;
+ sf->thresh_mult[THR_NEWG ] = INT_MAX;
+ sf->thresh_mult[THR_SPLITG ] = INT_MAX;
+ }
+
+ if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
+ {
+ sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
+ sf->thresh_mult[THR_ZEROA ] = INT_MAX;
+ sf->thresh_mult[THR_NEARA ] = INT_MAX;
+ sf->thresh_mult[THR_NEWA ] = INT_MAX;
+ sf->thresh_mult[THR_SPLITA ] = INT_MAX;
+ }
+
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
@@ -1353,6 +1267,8 @@
static void alloc_raw_frame_buffers(VP8_COMP *cpi)
{
int i, buffers;
+ /* allocate source_buffer to be multiples of 16 */
+ int width = (cpi->oxcf.Width + 15) & ~15;
buffers = cpi->oxcf.lag_in_frames;
@@ -1364,7 +1280,7 @@
for (i = 0; i < buffers; i++)
if (vp8_yv12_alloc_frame_buffer(&cpi->src_buffer[i].source_buffer,
- cpi->oxcf.Width, cpi->oxcf.Height,
+ width, cpi->oxcf.Height,
16))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate lag buffer");
@@ -1372,7 +1288,7 @@
#if VP8_TEMPORAL_ALT_REF
if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer.source_buffer,
- cpi->oxcf.Width, cpi->oxcf.Height, 16))
+ width, cpi->oxcf.Height, 16))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate altref buffer");
@@ -1383,7 +1299,6 @@
static int vp8_alloc_partition_data(VP8_COMP *cpi)
{
- if(cpi->mb.pip)
vpx_free(cpi->mb.pip);
cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) *
@@ -1430,7 +1345,6 @@
"Failed to allocate scaled source buffer");
- if (cpi->tok != 0)
vpx_free(cpi->tok);
{
@@ -1446,7 +1360,6 @@
// Structures used to minitor GF usage
- if (cpi->gf_active_flags != 0)
vpx_free(cpi->gf_active_flags);
CHECK_MEM_ERROR(cpi->gf_active_flags, vpx_calloc(1, cm->mb_rows * cm->mb_cols));
@@ -1454,12 +1367,10 @@
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
#if !(CONFIG_REALTIME_ONLY)
- if(cpi->total_stats)
vpx_free(cpi->total_stats);
cpi->total_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
- if(cpi->this_frame_stats)
vpx_free(cpi->this_frame_stats);
cpi->this_frame_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
@@ -1480,6 +1391,8 @@
cpi->mt_sync_range = 16;
#endif
+ vpx_free(cpi->tplist);
+
CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
}
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 8ec1ecc..a9ccc89 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -185,6 +185,7 @@
int use_fastquant_for_pick;
int no_skip_block4x4_search;
+ int improved_mv_pred;
} SPEED_FEATURES;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 487126d..6ab85ad 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -608,7 +608,7 @@
continue;
}
- if(x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
+ if(cpi->sf.improved_mv_pred && x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
{
if(!saddone)
{
@@ -685,40 +685,53 @@
int n = 0;
int sadpb = x->sadperbit16;
- int col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3;
- int col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3;
- int row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3;
- int row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3;
+ int col_min;
+ int col_max;
+ int row_min;
+ int row_max;
int tmp_col_min = x->mv_col_min;
int tmp_col_max = x->mv_col_max;
int tmp_row_min = x->mv_row_min;
int tmp_row_max = x->mv_row_max;
- // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
- if (x->mv_col_min < col_min )
- x->mv_col_min = col_min;
- if (x->mv_col_max > col_max )
- x->mv_col_max = col_max;
- if (x->mv_row_min < row_min )
- x->mv_row_min = row_min;
- if (x->mv_row_max > row_max )
- x->mv_row_max = row_max;
+ int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8)? 3 : 2) : 1;
// Further step/diamond searches as necessary
+ step_param = cpi->sf.first_step + speed_adjust;
+
+ if(cpi->sf.improved_mv_pred)
{
- int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8)? 3 : 2) : 1;
- step_param = cpi->sf.first_step + speed_adjust;
sr += speed_adjust;
//adjust search range according to sr from mv prediction
if(sr > step_param)
step_param = sr;
- further_steps = (cpi->Speed >= 8)? 0: (cpi->sf.max_step_search_steps - 1 - step_param);
+
+ col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3;
+ col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3;
+ row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3;
+ row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3;
+
+ // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
+ if (x->mv_col_min < col_min )
+ x->mv_col_min = col_min;
+ if (x->mv_col_max > col_max )
+ x->mv_col_max = col_max;
+ if (x->mv_row_min < row_min )
+ x->mv_row_min = row_min;
+ if (x->mv_row_max > row_max )
+ x->mv_row_max = row_max;
+ }else
+ {
+ mvp.row = best_ref_mv.row;
+ mvp.col = best_ref_mv.col;
}
+ further_steps = (cpi->Speed >= 8)? 0: (cpi->sf.max_step_search_steps - 1 - step_param);
+
if (cpi->sf.search_method == HEX)
{
- bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
+ bestsme = vp8_hex_search(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv);
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
}
@@ -760,10 +773,13 @@
}
}
- x->mv_col_min = tmp_col_min;
- x->mv_col_max = tmp_col_max;
- x->mv_row_min = tmp_row_min;
- x->mv_row_max = tmp_row_max;
+ if(cpi->sf.improved_mv_pred)
+ {
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+ }
if (bestsme < INT_MAX)
cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost);
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index a27b662..ba5d337 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -584,14 +584,41 @@
*Rate = vp8_rdcost_mby(mb);
}
+static void save_predictor(unsigned char *predictor, unsigned char *dst)
+{
+ int r, c;
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ *dst = predictor[c];
+ dst++;
+ }
+
+ predictor += 16;
+ }
+}
+static void restore_predictor(unsigned char *predictor, unsigned char *dst)
+{
+ int r, c;
+ for (r = 0; r < 4; r++)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ predictor[c] = *dst;
+ dst++;
+ }
+
+ predictor += 16;
+ }
+}
static int rd_pick_intra4x4block(
VP8_COMP *cpi,
MACROBLOCK *x,
BLOCK *be,
BLOCKD *b,
B_PREDICTION_MODE *best_mode,
- B_PREDICTION_MODE above,
- B_PREDICTION_MODE left,
+ unsigned int *bmode_costs,
ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l,
@@ -600,31 +627,27 @@
int *bestdistortion)
{
B_PREDICTION_MODE mode;
- int best_rd = INT_MAX; // 1<<30
+ int best_rd = INT_MAX;
int rate = 0;
int distortion;
- unsigned int *mode_costs;
ENTROPY_CONTEXT ta = *a, tempa = *a;
ENTROPY_CONTEXT tl = *l, templ = *l;
-
- if (x->e_mbd.frame_type == KEY_FRAME)
- {
- mode_costs = x->bmode_costs[above][left];
- }
- else
- {
- mode_costs = x->inter_bmode_costs;
- }
+ DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16);
+ DECLARE_ALIGNED_ARRAY(16, short, dqcoeff, 16);
for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
{
int this_rd;
int ratey;
- rate = mode_costs[mode];
- vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, mode);
+ rate = bmode_costs[mode];
+
+ vp8_predict_intra4x4(b, mode, b->predictor);
+ ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
+ x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
+ x->quantize_b(be, b);
tempa = ta;
templ = tl;
@@ -644,17 +667,23 @@
*best_mode = mode;
*a = tempa;
*l = templ;
+ save_predictor(b->predictor, predictor);
+ vpx_memcpy(dqcoeff, b->dqcoeff, 32);
}
}
b->bmi.mode = (B_PREDICTION_MODE)(*best_mode);
- vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, b->bmi.mode);
+
+ restore_predictor(b->predictor, predictor);
+ vpx_memcpy(b->dqcoeff, dqcoeff, 32);
+
+ IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(b->dqcoeff, b->diff, 32);
+ RECON_INVOKE(IF_RTCD(&cpi->rtcd.common->recon), recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
return best_rd;
}
-
int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
int *rate_y, int *Distortion, int best_rd)
{
@@ -667,6 +696,7 @@
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
+ unsigned int *bmode_costs;
vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
@@ -676,17 +706,25 @@
vp8_intra_prediction_down_copy(xd);
+ bmode_costs = mb->inter_bmode_costs;
+
for (i = 0; i < 16; i++)
{
MODE_INFO *const mic = xd->mode_info_context;
const int mis = xd->mode_info_stride;
- const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode;
- const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode;
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
+ if (mb->e_mbd.frame_type == KEY_FRAME)
+ {
+ const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode;
+ const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode;
+
+ bmode_costs = mb->bmode_costs[A][L];
+ }
+
total_rd += rd_pick_intra4x4block(
- cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L,
+ cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
ta + vp8_block2above[i],
tl + vp8_block2left[i], &r, &ry, &d);
@@ -708,7 +746,6 @@
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
-
int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
MACROBLOCK *x,
int *Rate,
@@ -776,15 +813,6 @@
}
-unsigned int vp8_get_mbuvrecon_error(const vp8_variance_rtcd_vtable_t *rtcd, const MACROBLOCK *x) // sum of squares
-{
- unsigned int sse0, sse1;
- int sum0, sum1;
- VARIANCE_INVOKE(rtcd, get8x8var)(x->src.u_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer, x->e_mbd.dst.uv_stride, &sse0, &sum0);
- VARIANCE_INVOKE(rtcd, get8x8var)(x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride, &sse1, &sum1);
- return (sse0 + sse1);
-}
-
static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel)
{
vp8_build_uvmvs(&x->e_mbd, fullpixel);
@@ -812,7 +840,12 @@
int this_rd;
x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
- vp8_encode_intra16x16mbuvrd(IF_RTCD(&cpi->rtcd), x);
+ vp8_build_intra_predictors_mbuv(&x->e_mbd);
+ ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff,
+ x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor,
+ x->src.uv_stride);
+ vp8_transform_mbuv(x);
+ vp8_quantize_mbuv(x);
rate_to = rd_cost_mbuv(x);
rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
@@ -1149,7 +1182,7 @@
if (cpi->sf.search_method == HEX)
bestsme = vp8_hex_search(x, c, e, bsi->ref_mv,
- &mode_mv[NEW4X4], step_param, sadpb, &num00, v_fn_ptr, x->mvsadcost, x->mvcost);
+ &mode_mv[NEW4X4], step_param, sadpb, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv);
else
{
@@ -2091,7 +2124,7 @@
if (cpi->sf.search_method == HEX)
{
- bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
+ bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv);
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
}
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 9d1d21a..9ff8887 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -204,7 +204,7 @@
step_param,
sadpb/*x->errorperbit*/,
&num00, &cpi->fn_ptr[BLOCK_16X16],
- mvsadcost, mvcost);
+ mvsadcost, mvcost, &best_ref_mv1);
}
else
{
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index 5754175..f0336ab 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -586,52 +586,45 @@
STACK_FRAME_CREATE_X3
- lea end_ptr, [src_ptr+src_stride*8]
-
- lea end_ptr, [end_ptr+src_stride*8]
- pxor mm7, mm7
+ mov end_ptr, 4
+ pxor xmm7, xmm7
.vp8_sad16x16_sse3_loop:
-
- movq ret_var, mm7
- cmp ret_var, max_err
- jg .vp8_sad16x16_early_exit
-
- movq mm0, QWORD PTR [src_ptr]
- movq mm2, QWORD PTR [src_ptr+8]
-
- movq mm1, QWORD PTR [ref_ptr]
- movq mm3, QWORD PTR [ref_ptr+8]
-
- movq mm4, QWORD PTR [src_ptr+src_stride]
- movq mm5, QWORD PTR [ref_ptr+ref_stride]
-
- psadbw mm0, mm1
- psadbw mm2, mm3
-
- movq mm1, QWORD PTR [src_ptr+src_stride+8]
- movq mm3, QWORD PTR [ref_ptr+ref_stride+8]
-
- psadbw mm4, mm5
- psadbw mm1, mm3
+ movdqa xmm0, XMMWORD PTR [src_ptr]
+ movdqu xmm1, XMMWORD PTR [ref_ptr]
+ movdqa xmm2, XMMWORD PTR [src_ptr+src_stride]
+ movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride]
lea src_ptr, [src_ptr+src_stride*2]
lea ref_ptr, [ref_ptr+ref_stride*2]
- paddw mm0, mm2
- paddw mm4, mm1
+ movdqa xmm4, XMMWORD PTR [src_ptr]
+ movdqu xmm5, XMMWORD PTR [ref_ptr]
+ movdqa xmm6, XMMWORD PTR [src_ptr+src_stride]
- paddw mm7, mm0
- paddw mm7, mm4
+ psadbw xmm0, xmm1
- cmp src_ptr, end_ptr
+ movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride]
+
+ psadbw xmm2, xmm3
+ psadbw xmm4, xmm5
+ psadbw xmm6, xmm1
+
+ lea src_ptr, [src_ptr+src_stride*2]
+ lea ref_ptr, [ref_ptr+ref_stride*2]
+
+ paddw xmm7, xmm0
+ paddw xmm7, xmm2
+ paddw xmm7, xmm4
+ paddw xmm7, xmm6
+
+ sub end_ptr, 1
jne .vp8_sad16x16_sse3_loop
- movq ret_var, mm7
-
-.vp8_sad16x16_early_exit:
-
- mov rax, ret_var
+ movq xmm0, xmm7
+ psrldq xmm7, 8
+ paddw xmm0, xmm7
+ movq rax, xmm0
STACK_FRAME_DESTROY_X3
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index abc5dc8..7980a0f 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -34,6 +34,7 @@
#File list for armv6
# encoder
+VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM)
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index 080c9af..03084c5 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -15,7 +15,6 @@
VP8_CX_SRCS-$(ARCH_ARM) += decoder/asm_dec_offsets.c
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c
-VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK) += decoder/arm/detokenize$(ASM)
#File list for armv6
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)
diff --git a/vpx_scale/blackfin/yv12config.c b/vpx_scale/blackfin/yv12config.c
index 42538af..c404202 100644
--- a/vpx_scale/blackfin/yv12config.c
+++ b/vpx_scale/blackfin/yv12config.c
@@ -39,10 +39,7 @@
{
if (ybf)
{
- if (ybf->buffer_alloc)
- {
duck_free(ybf->buffer_alloc);
- }
ybf->buffer_alloc = 0;
}
diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c
index 420f719..4468e9d 100644
--- a/vpx_scale/generic/bicubic_scaler.c
+++ b/vpx_scale/generic/bicubic_scaler.c
@@ -271,17 +271,17 @@
{
if (!g_first_time)
{
- if (g_b_scaler.l_w) vpx_free(g_b_scaler.l_w);
+ vpx_free(g_b_scaler.l_w);
- if (g_b_scaler.l_h) vpx_free(g_b_scaler.l_h);
+ vpx_free(g_b_scaler.l_h);
- if (g_b_scaler.l_h_uv) vpx_free(g_b_scaler.l_h_uv);
+ vpx_free(g_b_scaler.l_h_uv);
- if (g_b_scaler.c_w) vpx_free(g_b_scaler.c_w);
+ vpx_free(g_b_scaler.c_w);
- if (g_b_scaler.c_h) vpx_free(g_b_scaler.c_h);
+ vpx_free(g_b_scaler.c_h);
- if (g_b_scaler.c_h_uv) vpx_free(g_b_scaler.c_h_uv);
+ vpx_free(g_b_scaler.c_h_uv);
vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
}
@@ -342,21 +342,21 @@
d_h_uv = (in_height / 2) / gcd_h_uv;
// allocate memory for the coefficents
- if (g_b_scaler.l_w) vpx_free(g_b_scaler.l_w);
+ vpx_free(g_b_scaler.l_w);
- if (g_b_scaler.l_h) vpx_free(g_b_scaler.l_h);
+ vpx_free(g_b_scaler.l_h);
- if (g_b_scaler.l_h_uv) vpx_free(g_b_scaler.l_h_uv);
+ vpx_free(g_b_scaler.l_h_uv);
g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
- if (g_b_scaler.c_w) vpx_free(g_b_scaler.c_w);
+ vpx_free(g_b_scaler.c_w);
- if (g_b_scaler.c_h) vpx_free(g_b_scaler.c_h);
+ vpx_free(g_b_scaler.c_h);
- if (g_b_scaler.c_h_uv) vpx_free(g_b_scaler.c_h_uv);
+ vpx_free(g_b_scaler.c_h_uv);
g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index e7c5b18..9539dca 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -24,10 +24,7 @@
{
if (ybf)
{
- if (ybf->buffer_alloc)
- {
duck_free(ybf->buffer_alloc);
- }
ybf->buffer_alloc = 0;
}