Merge "correct cost for implicit bit in mvs"
diff --git a/build/make/armlink_adapter.sh b/build/make/armlink_adapter.sh
index 571e46e..b53669c 100755
--- a/build/make/armlink_adapter.sh
+++ b/build/make/armlink_adapter.sh
@@ -17,15 +17,17 @@
on_of=1
elif [ "$i" == "-v" ]; then
verbose=1
+ elif [ "$i" == "-g" ]; then
+ args="${args} --debug"
elif [ "$on_of" == "1" ]; then
outfile=$i
- on_of=0
+ on_of=0
elif [ -f "$i" ]; then
infiles="$infiles $i"
elif [ "${i:0:2}" == "-l" ]; then
libs="$libs ${i#-l}"
elif [ "${i:0:2}" == "-L" ]; then
- libpaths="${libpaths} ${i#-L}"
+ libpaths="${libpaths} ${i#-L}"
else
args="${args} ${i}"
fi
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 88698fd..7234b79 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -711,7 +711,7 @@
TOOLCHAIN_PATH=${SDK_PATH}/usr/bin
CC=${TOOLCHAIN_PATH}/gcc
AR=${TOOLCHAIN_PATH}/ar
- LD=${TOOLCHAIN_PATH}/arm-apple-darwin9-gcc-4.2.1
+ LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-gcc-4.2.1
AS=${TOOLCHAIN_PATH}/as
STRIP=${TOOLCHAIN_PATH}/strip
NM=${TOOLCHAIN_PATH}/nm
@@ -725,14 +725,14 @@
add_cflags -arch ${tgt_isa}
add_ldflags -arch_only ${tgt_isa}
- add_cflags "-isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS3.1.sdk"
+ add_cflags "-isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.sdk"
# This should be overridable
- alt_libc=${SDK_PATH}/SDKs/iPhoneOS3.1.sdk
+ alt_libc=${SDK_PATH}/SDKs/iPhoneOS4.2.sdk
# Add the paths for the alternate libc
# for d in usr/include usr/include/gcc/darwin/4.0/; do
- for d in usr/include usr/include/gcc/darwin/4.0/ usr/lib/gcc/arm-apple-darwin9/4.0.1/include/; do
+ for d in usr/include usr/include/gcc/darwin/4.0/ usr/lib/gcc/arm-apple-darwin10/4.2.1/include/; do
try_dir="${alt_libc}/${d}"
[ -d "${try_dir}" ] && add_cflags -I"${try_dir}"
done
diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
index e01870f..3c54b24 100644
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -590,7 +590,7 @@
//log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
//log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
- fp = fopen("vpx_asm_offsets.asm", "w");
+ fp = fopen("assembly_offsets.asm", "w");
if (fp == NULL)
{
diff --git a/libs.mk b/libs.mk
index 9ded394..338822c 100644
--- a/libs.mk
+++ b/libs.mk
@@ -230,10 +230,39 @@
#
# Add assembler dependencies for configuration and offsets
#
-#$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm $(BUILD_PFX)vpx_asm_offsets.asm
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
$(filter %.asm.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
+#
+# Calculate platform- and compiler-specific offsets for hand coded assembly
+#
+ifeq ($(ARCH_ARM), yes)
+ asm_com_offsets.asm: obj_int_extract
+ asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o
+ ./obj_int_extract rvds $< $(ADS2GAS) > $@
+ OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o
+ CLEAN-OBJS += asm_com_offsets.asm
+ $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
+
+ ifeq ($(CONFIG_VP8_ENCODER), yes)
+ asm_enc_offsets.asm: obj_int_extract
+ asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
+ ./obj_int_extract rvds $< $(ADS2GAS) > $@
+ OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o
+ CLEAN-OBJS += asm_enc_offsets.asm
+ $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
+ endif
+
+ ifeq ($(CONFIG_VP8_DECODER), yes)
+ asm_dec_offsets.asm: obj_int_extract
+ asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
+ ./obj_int_extract rvds $< $(ADS2GAS) > $@
+ OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o
+ CLEAN-OBJS += asm_dec_offsets.asm
+ $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
+ endif
+endif
+
$(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
diff --git a/vp8/common/arm/armv6/bilinearfilter_v6.asm b/vp8/common/arm/armv6/bilinearfilter_v6.asm
index 09d7338..a86ed5d 100644
--- a/vp8/common/arm/armv6/bilinearfilter_v6.asm
+++ b/vp8/common/arm/armv6/bilinearfilter_v6.asm
@@ -15,19 +15,19 @@
AREA |.text|, CODE, READONLY ; name this block of code
;-------------------------------------
-; r0 unsigned char *src_ptr,
-; r1 unsigned short *output_ptr,
-; r2 unsigned int src_pixels_per_line,
-; r3 unsigned int output_height,
-; stack unsigned int output_width,
-; stack const short *vp8_filter
+; r0 unsigned char *src_ptr,
+; r1 unsigned short *dst_ptr,
+; r2 unsigned int src_pitch,
+; r3 unsigned int height,
+; stack unsigned int width,
+; stack const short *vp8_filter
;-------------------------------------
; The output is transposed stroed in output array to make it easy for second pass filtering.
|vp8_filter_block2d_bil_first_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
- ldr r4, [sp, #36] ; output width
+ ldr r4, [sp, #36] ; width
mov r12, r3 ; outer-loop counter
sub r2, r2, r4 ; src increment for height loop
@@ -38,10 +38,10 @@
ldr r5, [r11] ; load up filter coefficients
- mov r3, r3, lsl #1 ; output_height*2
+ mov r3, r3, lsl #1 ; height*2
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
- mov r11, r1 ; save output_ptr for each row
+ mov r11, r1 ; save dst_ptr for each row
cmp r5, #128 ; if filter coef = 128, then skip the filter
beq bil_null_1st_filter
@@ -140,17 +140,17 @@
;---------------------------------
; r0 unsigned short *src_ptr,
-; r1 unsigned char *output_ptr,
-; r2 int output_pitch,
-; r3 unsigned int output_height,
-; stack unsigned int output_width,
-; stack const short *vp8_filter
+; r1 unsigned char *dst_ptr,
+; r2 int dst_pitch,
+; r3 unsigned int height,
+; stack unsigned int width,
+; stack const short *vp8_filter
;---------------------------------
|vp8_filter_block2d_bil_second_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
- ldr r4, [sp, #36] ; output width
+ ldr r4, [sp, #36] ; width
ldr r5, [r11] ; load up filter coefficients
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index 65afb41..961d142 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -10,128 +10,48 @@
#include <math.h>
+#include "filter.h"
#include "subpixel.h"
-#define BLOCK_HEIGHT_WIDTH 4
-#define VP8_FILTER_WEIGHT 128
-#define VP8_FILTER_SHIFT 7
-
-static const short bilinear_filters[8][2] =
-{
- { 128, 0 },
- { 112, 16 },
- { 96, 32 },
- { 80, 48 },
- { 64, 64 },
- { 48, 80 },
- { 32, 96 },
- { 16, 112 }
-};
-
-
extern void vp8_filter_block2d_bil_first_pass_armv6
(
- unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp8_filter
+ unsigned char *src_ptr,
+ unsigned short *dst_ptr,
+ unsigned int src_pitch,
+ unsigned int height,
+ unsigned int width,
+ const short *vp8_filter
);
extern void vp8_filter_block2d_bil_second_pass_armv6
(
unsigned short *src_ptr,
- unsigned char *output_ptr,
- int output_pitch,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp8_filter
+ unsigned char *dst_ptr,
+ int dst_pitch,
+ unsigned int height,
+ unsigned int width,
+ const short *vp8_filter
);
-#if 0
-void vp8_filter_block2d_bil_first_pass_6
-(
- unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp8_filter
-)
-{
- unsigned int i, j;
-
- for ( i=0; i<output_height; i++ )
- {
- for ( j=0; j<output_width; j++ )
- {
- /* Apply bilinear filter */
- output_ptr[j] = ( ( (int)src_ptr[0] * vp8_filter[0]) +
- ((int)src_ptr[1] * vp8_filter[1]) +
- (VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
- src_ptr++;
- }
-
- /* Next row... */
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_width;
- }
-}
-
-void vp8_filter_block2d_bil_second_pass_6
-(
- unsigned short *src_ptr,
- unsigned char *output_ptr,
- int output_pitch,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp8_filter
-)
-{
- unsigned int i,j;
- int Temp;
-
- for ( i=0; i<output_height; i++ )
- {
- for ( j=0; j<output_width; j++ )
- {
- /* Apply filter */
- Temp = ((int)src_ptr[0] * vp8_filter[0]) +
- ((int)src_ptr[output_width] * vp8_filter[1]) +
- (VP8_FILTER_WEIGHT/2);
- output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
- src_ptr++;
- }
-
- /* Next row... */
- /*src_ptr += src_pixels_per_line - output_width;*/
- output_ptr += output_pitch;
- }
-}
-#endif
-
void vp8_filter_block2d_bil_armv6
(
unsigned char *src_ptr,
- unsigned char *output_ptr,
- unsigned int src_pixels_per_line,
+ unsigned char *dst_ptr,
+ unsigned int src_pitch,
unsigned int dst_pitch,
- const short *HFilter,
- const short *VFilter,
+ const short *HFilter,
+ const short *VFilter,
int Width,
int Height
)
{
-
- unsigned short FData[36*16]; /* Temp data bufffer used in filtering */
+ unsigned short FData[36*16]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
- /* pixel_step = 1; */
- vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);
+ vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
/* then 1-D vertically... */
- vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
+ vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
}
@@ -148,8 +68,8 @@
const short *HFilter;
const short *VFilter;
- HFilter = bilinear_filters[xoffset];
- VFilter = bilinear_filters[yoffset];
+ HFilter = vp8_bilinear_filters[xoffset];
+ VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
}
@@ -167,8 +87,8 @@
const short *HFilter;
const short *VFilter;
- HFilter = bilinear_filters[xoffset];
- VFilter = bilinear_filters[yoffset];
+ HFilter = vp8_bilinear_filters[xoffset];
+ VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
}
@@ -186,8 +106,8 @@
const short *HFilter;
const short *VFilter;
- HFilter = bilinear_filters[xoffset];
- VFilter = bilinear_filters[yoffset];
+ HFilter = vp8_bilinear_filters[xoffset];
+ VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
}
@@ -205,8 +125,8 @@
const short *HFilter;
const short *VFilter;
- HFilter = bilinear_filters[xoffset];
- VFilter = bilinear_filters[yoffset];
+ HFilter = vp8_bilinear_filters[xoffset];
+ VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
}
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index b4f2fe6..2612fc1 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -11,26 +11,10 @@
#include "vpx_ports/config.h"
#include <math.h>
+#include "filter.h"
#include "subpixel.h"
#include "vpx_ports/mem.h"
-#define BLOCK_HEIGHT_WIDTH 4
-#define VP8_FILTER_WEIGHT 128
-#define VP8_FILTER_SHIFT 7
-
-DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
-{
- { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
- { 0, -6, 123, 12, -1, 0 },
- { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
- { 0, -9, 93, 50, -6, 0 },
- { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
- { 0, -6, 50, 93, -9, 0 },
- { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
- { 0, -1, 12, 123, -6, 0 },
-};
-
-
extern void vp8_filter_block2d_first_pass_armv6
(
unsigned char *src_ptr,
@@ -93,11 +77,11 @@
{
const short *HFilter;
const short *VFilter;
- DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; /* 6 tap */
- VFilter = sub_pel_filters[yoffset]; /* 6 tap */
+ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* Vfilter is null. First pass only */
if (xoffset && !yoffset)
@@ -129,47 +113,6 @@
}
}
-#if 0
-void vp8_sixtap_predict8x4_armv6
-(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch
-)
-{
- const short *HFilter;
- const short *VFilter;
- DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
-
- HFilter = sub_pel_filters[xoffset]; /* 6 tap */
- VFilter = sub_pel_filters[yoffset]; /* 6 tap */
-
-
- /*if (xoffset && !yoffset)
- {
- vp8_filter_block2d_first_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
- }*/
- /* Hfilter is null. Second pass only */
- /*else if (!xoffset && yoffset)
- {
- vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
- }
- else
- {
- if (yoffset & 0x1)
- vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
- else*/
-
- vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );
-
- vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
- /*}*/
-}
-#endif
-
void vp8_sixtap_predict8x8_armv6
(
unsigned char *src_ptr,
@@ -182,10 +125,10 @@
{
const short *HFilter;
const short *VFilter;
- DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; /* 6 tap */
- VFilter = sub_pel_filters[yoffset]; /* 6 tap */
+ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
if (xoffset && !yoffset)
{
@@ -224,10 +167,10 @@
{
const short *HFilter;
const short *VFilter;
- DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data bufffer used in filtering */
+ DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; /* 6 tap */
- VFilter = sub_pel_filters[yoffset]; /* 6 tap */
+ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
if (xoffset && !yoffset)
{
diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c
deleted file mode 100644
index 5baf8cc..0000000
--- a/vp8/common/arm/vpx_asm_offsets.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include <stddef.h>
-
-#if CONFIG_VP8_ENCODER
-#include "vpx_scale/yv12config.h"
-#endif
-
-#if CONFIG_VP8_DECODER
-#include "onyxd_int.h"
-#endif
-
-#define DEFINE(sym, val) int sym = val;
-
-/*
-#define BLANK() asm volatile("\n->" : : )
-*/
-
-/*
- * int main(void)
- * {
- */
-
-#if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER
-DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
-DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
-DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
-DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
-DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
-DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
-DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
-DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
-DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
-DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
-#endif
-
-#if CONFIG_VP8_DECODER
-DEFINE(mb_diff, offsetof(MACROBLOCKD, diff));
-DEFINE(mb_predictor, offsetof(MACROBLOCKD, predictor));
-DEFINE(mb_dst_y_stride, offsetof(MACROBLOCKD, dst.y_stride));
-DEFINE(mb_dst_y_buffer, offsetof(MACROBLOCKD, dst.y_buffer));
-DEFINE(mb_dst_u_buffer, offsetof(MACROBLOCKD, dst.u_buffer));
-DEFINE(mb_dst_v_buffer, offsetof(MACROBLOCKD, dst.v_buffer));
-DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_available));
-DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available));
-
-DEFINE(detok_scan, offsetof(DETOK, scan));
-DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove));
-DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr));
-DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr));
-DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr));
-DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x));
-
-DEFINE(detok_A, offsetof(DETOK, A));
-DEFINE(detok_L, offsetof(DETOK, L));
-
-DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr));
-DEFINE(detok_current_bc, offsetof(DETOK, current_bc));
-DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs));
-DEFINE(detok_eob, offsetof(DETOK, eob));
-
-DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
-DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
-DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
-DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
-DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
-
-DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
-DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length));
-#endif
-
-//add asserts for any offset that is not supported by assembly code
-//add asserts for any size that is not supported by assembly code
-/*
- * return 0;
- * }
- */
diff --git a/vp8/common/asm_com_offsets.c b/vp8/common/asm_com_offsets.c
new file mode 100644
index 0000000..d299dd2
--- /dev/null
+++ b/vp8/common/asm_com_offsets.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include <stddef.h>
+
+#include "vpx_scale/yv12config.h"
+
+#define ct_assert(name,cond) \
+ static void assert_##name(void) UNUSED;\
+ static void assert_##name(void) {switch(0){case 0:case !!(cond):;}}
+
+#define DEFINE(sym, val) int sym = val;
+
+/*
+#define BLANK() asm volatile("\n->" : : )
+*/
+
+/*
+ * int main(void)
+ * {
+ */
+
+//vpx_scale
+DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
+DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
+DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
+DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
+DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
+DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
+DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
+DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
+DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
+DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
+
+//add asserts for any offset that is not supported by assembly code
+//add asserts for any size that is not supported by assembly code
+/*
+ * return 0;
+ * }
+ */
diff --git a/vp8/common/filter_c.c b/vp8/common/filter.c
similarity index 66%
rename from vp8/common/filter_c.c
rename to vp8/common/filter.c
index 399a847..6e364a9 100644
--- a/vp8/common/filter_c.c
+++ b/vp8/common/filter.c
@@ -10,13 +10,10 @@
#include <stdlib.h>
+#include "filter.h"
+#include "vpx_ports/mem.h"
-#define BLOCK_HEIGHT_WIDTH 4
-#define VP8_FILTER_WEIGHT 128
-#define VP8_FILTER_SHIFT 7
-
-
-static const int bilinear_filters[8][2] =
+DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
{
{ 128, 0 },
{ 112, 16 },
@@ -28,8 +25,7 @@
{ 16, 112 }
};
-
-static const short sub_pel_filters[8][6] =
+DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
{
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
@@ -40,9 +36,6 @@
{ 0, -6, 50, 93, -9, 0 },
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
{ 0, -1, 12, 123, -6, 0 },
-
-
-
};
void vp8_filter_block2d_first_pass
@@ -146,7 +139,7 @@
const short *VFilter
)
{
- int FData[9*4]; /* Temp data bufffer used in filtering */
+ int FData[9*4]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
@@ -195,8 +188,8 @@
const short *HFilter;
const short *VFilter;
- HFilter = sub_pel_filters[xoffset]; /* 6 tap */
- VFilter = sub_pel_filters[yoffset]; /* 6 tap */
+ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
}
@@ -212,10 +205,10 @@
{
const short *HFilter;
const short *VFilter;
- int FData[13*16]; /* Temp data bufffer used in filtering */
+ int FData[13*16]; /* Temp data buffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; /* 6 tap */
- VFilter = sub_pel_filters[yoffset]; /* 6 tap */
+ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
@@ -238,10 +231,10 @@
{
const short *HFilter;
const short *VFilter;
- int FData[13*16]; /* Temp data bufffer used in filtering */
+ int FData[13*16]; /* Temp data buffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; /* 6 tap */
- VFilter = sub_pel_filters[yoffset]; /* 6 tap */
+ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
@@ -264,11 +257,11 @@
{
const short *HFilter;
const short *VFilter;
- int FData[21*24]; /* Temp data bufffer used in filtering */
+ int FData[21*24]; /* Temp data buffer used in filtering */
- HFilter = sub_pel_filters[xoffset]; /* 6 tap */
- VFilter = sub_pel_filters[yoffset]; /* 6 tap */
+ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
+ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
@@ -283,57 +276,50 @@
*
* ROUTINE : filter_block2d_bil_first_pass
*
- * INPUTS : UINT8 *src_ptr : Pointer to source block.
- * UINT32 src_pixels_per_line : Stride of input block.
- * UINT32 pixel_step : Offset between filter input samples (see notes).
- * UINT32 output_height : Input block height.
- * UINT32 output_width : Input block width.
- * INT32 *vp8_filter : Array of 2 bi-linear filter taps.
+ * INPUTS : UINT8 *src_ptr : Pointer to source block.
+ * UINT32 src_stride : Stride of source block.
+ * UINT32 height : Block height.
+ * UINT32 width : Block width.
+ * INT32 *vp8_filter : Array of 2 bi-linear filter taps.
*
- * OUTPUTS : INT32 *output_ptr : Pointer to filtered block.
+ * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
*
* RETURNS : void
*
- * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
- * either horizontal or vertical direction to produce the
- * filtered output block. Used to implement first-pass
- * of 2-D separable filter.
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
+ * in the horizontal direction to produce the filtered output
+ * block. Used to implement first-pass of 2-D separable filter.
*
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
* Two filter taps should sum to VP8_FILTER_WEIGHT.
- * pixel_step defines whether the filter is applied
- * horizontally (pixel_step=1) or vertically (pixel_step=stride).
- * It defines the offset required to move from one input
- * to the next.
*
****************************************************************************/
void vp8_filter_block2d_bil_first_pass
(
- unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const int *vp8_filter
+ unsigned char *src_ptr,
+ unsigned short *dst_ptr,
+ unsigned int src_stride,
+ unsigned int height,
+ unsigned int width,
+ const short *vp8_filter
)
{
unsigned int i, j;
- for (i = 0; i < output_height; i++)
+ for (i = 0; i < height; i++)
{
- for (j = 0; j < output_width; j++)
+ for (j = 0; j < width; j++)
{
/* Apply bilinear filter */
- output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
- ((int)src_ptr[pixel_step] * vp8_filter[1]) +
- (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
+ dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
+ ((int)src_ptr[1] * vp8_filter[1]) +
+ (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
src_ptr++;
}
/* Next row... */
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_width;
+ src_ptr += src_stride - width;
+ dst_ptr += width;
}
}
@@ -341,60 +327,51 @@
*
* ROUTINE : filter_block2d_bil_second_pass
*
- * INPUTS : INT32 *src_ptr : Pointer to source block.
- * UINT32 src_pixels_per_line : Stride of input block.
- * UINT32 pixel_step : Offset between filter input samples (see notes).
- * UINT32 output_height : Input block height.
- * UINT32 output_width : Input block width.
- * INT32 *vp8_filter : Array of 2 bi-linear filter taps.
+ * INPUTS : INT32 *src_ptr : Pointer to source block.
+ * UINT32 dst_pitch : Destination block pitch.
+ * UINT32 height : Block height.
+ * UINT32 width : Block width.
+ * INT32 *vp8_filter : Array of 2 bi-linear filter taps.
*
- * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
+ * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
*
* RETURNS : void
*
- * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
- * either horizontal or vertical direction to produce the
- * filtered output block. Used to implement second-pass
- * of 2-D separable filter.
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
+ * in the vertical direction to produce the filtered output
+ * block. Used to implement second-pass of 2-D separable filter.
*
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
* Two filter taps should sum to VP8_FILTER_WEIGHT.
- * pixel_step defines whether the filter is applied
- * horizontally (pixel_step=1) or vertically (pixel_step=stride).
- * It defines the offset required to move from one input
- * to the next.
*
****************************************************************************/
void vp8_filter_block2d_bil_second_pass
(
unsigned short *src_ptr,
- unsigned char *output_ptr,
- int output_pitch,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const int *vp8_filter
+ unsigned char *dst_ptr,
+ int dst_pitch,
+ unsigned int height,
+ unsigned int width,
+ const short *vp8_filter
)
{
unsigned int i, j;
int Temp;
- for (i = 0; i < output_height; i++)
+ for (i = 0; i < height; i++)
{
- for (j = 0; j < output_width; j++)
+ for (j = 0; j < width; j++)
{
/* Apply filter */
- Temp = ((int)src_ptr[0] * vp8_filter[0]) +
- ((int)src_ptr[pixel_step] * vp8_filter[1]) +
+ Temp = ((int)src_ptr[0] * vp8_filter[0]) +
+ ((int)src_ptr[width] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2);
- output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
+ dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
src_ptr++;
}
/* Next row... */
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_pitch;
+ dst_ptr += dst_pitch;
}
}
@@ -404,11 +381,14 @@
* ROUTINE : filter_block2d_bil
*
* INPUTS : UINT8 *src_ptr : Pointer to source block.
- * UINT32 src_pixels_per_line : Stride of input block.
- * INT32 *HFilter : Array of 2 horizontal filter taps.
- * INT32 *VFilter : Array of 2 vertical filter taps.
+ * UINT32 src_pitch : Stride of source block.
+ * UINT32 dst_pitch : Stride of destination block.
+ * INT32 *HFilter : Array of 2 horizontal filter taps.
+ * INT32 *VFilter : Array of 2 vertical filter taps.
+ * INT32 Width : Block width
+ * INT32 Height : Block height
*
- * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
+ * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
*
* RETURNS : void
*
@@ -422,23 +402,23 @@
void vp8_filter_block2d_bil
(
unsigned char *src_ptr,
- unsigned char *output_ptr,
- unsigned int src_pixels_per_line,
+ unsigned char *dst_ptr,
+ unsigned int src_pitch,
unsigned int dst_pitch,
- const int *HFilter,
- const int *VFilter,
+ const short *HFilter,
+ const short *VFilter,
int Width,
int Height
)
{
- unsigned short FData[17*16]; /* Temp data bufffer used in filtering */
+ unsigned short FData[17*16]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
- vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);
+ vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
/* then 1-D vertically... */
- vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
+ vp8_filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
}
@@ -452,11 +432,11 @@
int dst_pitch
)
{
- const int *HFilter;
- const int *VFilter;
+ const short *HFilter;
+ const short *VFilter;
- HFilter = bilinear_filters[xoffset];
- VFilter = bilinear_filters[yoffset];
+ HFilter = vp8_bilinear_filters[xoffset];
+ VFilter = vp8_bilinear_filters[yoffset];
#if 0
{
int i;
@@ -490,11 +470,11 @@
int dst_pitch
)
{
- const int *HFilter;
- const int *VFilter;
+ const short *HFilter;
+ const short *VFilter;
- HFilter = bilinear_filters[xoffset];
- VFilter = bilinear_filters[yoffset];
+ HFilter = vp8_bilinear_filters[xoffset];
+ VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
@@ -510,11 +490,11 @@
int dst_pitch
)
{
- const int *HFilter;
- const int *VFilter;
+ const short *HFilter;
+ const short *VFilter;
- HFilter = bilinear_filters[xoffset];
- VFilter = bilinear_filters[yoffset];
+ HFilter = vp8_bilinear_filters[xoffset];
+ VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
@@ -530,11 +510,11 @@
int dst_pitch
)
{
- const int *HFilter;
- const int *VFilter;
+ const short *HFilter;
+ const short *VFilter;
- HFilter = bilinear_filters[xoffset];
- VFilter = bilinear_filters[yoffset];
+ HFilter = vp8_bilinear_filters[xoffset];
+ VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
}
diff --git a/vp8/common/filter.h b/vp8/common/filter.h
new file mode 100644
index 0000000..0f225c2
--- /dev/null
+++ b/vp8/common/filter.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef FILTER_H
+#define FILTER_H
+
+#define BLOCK_HEIGHT_WIDTH 4
+#define VP8_FILTER_WEIGHT 128
+#define VP8_FILTER_SHIFT 7
+
+extern const short vp8_bilinear_filters[8][2];
+extern const short vp8_sub_pel_filters[8][6];
+
+#endif //FILTER_H
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
index e9741e2..02e45d1 100644
--- a/vp8/decoder/arm/arm_dsystemdependent.c
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -14,7 +14,6 @@
#include "blockd.h"
#include "pragmas.h"
#include "postproc.h"
-#include "dboolhuff.h"
#include "dequantize.h"
#include "onyxd_int.h"
@@ -35,12 +34,6 @@
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
-#if 0 /*For use with RTCD, when implemented*/
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
- pbi->dboolhuff.debool = vp8dx_decode_bool_c;
- pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-#endif
}
#endif
@@ -54,12 +47,6 @@
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
-#if 0 /*For use with RTCD, when implemented*/
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
- pbi->dboolhuff.debool = vp8dx_decode_bool_c;
- pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-#endif
}
#endif
#endif
diff --git a/vp8/decoder/arm/armv6/dboolhuff_v6.asm b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
deleted file mode 100644
index 6515804..0000000
--- a/vp8/decoder/arm/armv6/dboolhuff_v6.asm
+++ /dev/null
@@ -1,163 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_decode_value_v6|
- EXPORT |vp8dx_start_decode_v6|
- EXPORT |vp8dx_stop_decode_v6|
- EXPORT |vp8dx_decode_bool_v6|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- INCLUDE vpx_asm_offsets.asm
-
-br RN r0
-prob RN r1
-bits RN r1
- AREA |.text|, CODE, READONLY ; name this block of code
-
-; int z = 0;
-; int bit;
-; for ( bit=bits-1; bit>=0; bit-- )
-; {
-; z |= (vp8dx_decode_bool(br, 0x80)<<bit);
-; }
-; return z;
-
-;int vp8_decode_value_v6 ( BOOL_DECODER *br, int bits )
-|vp8_decode_value_v6| PROC
- stmdb sp!, {r4 - r6, lr}
- mov r4, br
- mov r5, bits
- mov r6, #0
-
- subs r5, r5, #1
- bmi decode_value_exit
-
-decode_value_loop
- mov prob, #0x80
- mov br, r4
- bl vp8dx_decode_bool_v6_internal ; needed for conversion to s file
- orr r6, r6, r0, lsl r5
- subs r5, r5, #1
- bpl decode_value_loop
-
-decode_value_exit
- mov r0, r6
- ldmia sp!, {r4 - r6, pc}
- ENDP ; |vp8_decode_value_v6|
-
-
-;void vp8dx_start_decode_v6 ( BOOL_DECODER *br, unsigned char *source )
-|vp8dx_start_decode_v6| PROC
- stmdb sp!, {r4 - r5, lr}
- mov r2, #0
- mov r3, #255
-
- str r2, [br, #bool_decoder_lowvalue]
- str r3, [br, #bool_decoder_range]
- str r1, [br, #bool_decoder_buffer]
-
- mov r3, #8
- mov r2, #4
- str r3, [br, #bool_decoder_count]
- str r2, [br, #bool_decoder_pos]
-
- ldrb r2, [r1, #3]
- ldrb r3, [r1, #2]
- ldrb r4, [r1, #1]
- ldrb r5, [r1]
-
- orr r1, r2, r3, lsl #8
- orr r1, r1, r4, lsl #16
- orr r1, r1, r5, lsl #24
-
- str r1, [br, #bool_decoder_value]
-
- ldmia sp!, {r4 - r5, pc}
- ENDP ; |vp8dx_start_decode_v6|
-
-
-;void vp8dx_stop_decode_v6 ( BOOL_DECODER *bc );
-|vp8dx_stop_decode_v6| PROC
- mov pc, lr
- ENDP ; |vp8dx_stop_decode_v6|
-
-
-; bigsplit RN r1
-; buffer_v RN r1
-; count_v RN r4
-; range_v RN r2
-; value_v RN r3
-; pos_v RN r5
-; split RN r6
-; bit RN lr
-;int vp8dx_decode_bool_v6 ( BOOL_DECODER *br, int probability )
-|vp8dx_decode_bool_v6| PROC
-vp8dx_decode_bool_v6_internal
- stmdb sp!, {r4 - r6, lr}
-
- ldr r2, [br, #bool_decoder_range]
- ldr r3, [br, #bool_decoder_value]
-
- mov r6, r2, lsl #8
- sub r6, r6, #256 ; split = 1 + (((range-1) * probability) >> 8)
- mov r12, #1
- smlawb r6, r6, prob, r12
-
- mov lr, #0
- subs r5, r3, r6, lsl #24
-
- ;cmp r3, r1
- movhs lr, #1
- movhs r3, r5
- subhs r2, r2, r6
- movlo r2, r6
-
- cmp r2, #0x80
- blt range_less_0x80
- ;strd r2, r3, [br, #bool_decoder_range]
- str r2, [br, #bool_decoder_range]
- str r3, [br, #bool_decoder_value]
- mov r0, lr
- ldmia sp!, {r4 - r6, pc}
-
-range_less_0x80
- ldr r5, [br, #bool_decoder_pos]
- ldr r1, [br, #bool_decoder_buffer]
- ldr r4, [br, #bool_decoder_count]
- add r1, r1, r5
-
- clz r12, r2
- sub r12, r12, #24
- subs r4, r4, r12
- ldrleb r6, [r1], #1
- mov r2, r2, lsl r12
- mov r3, r3, lsl r12
- addle r4, r4, #8
- rsble r12, r4, #8
- addle r5, r5, #1
- orrle r3, r3, r6, lsl r12
-
- ;strd r2, r3, [br, #bool_decoder_range]
- ;strd r4, r5, [br, #bool_decoder_count]
- str r2, [br, #bool_decoder_range]
- str r3, [br, #bool_decoder_value]
- str r4, [br, #bool_decoder_count]
- str r5, [br, #bool_decoder_pos]
-
- mov r0, lr
-
- ldmia sp!, {r4 - r6, pc}
- ENDP ; |vp8dx_decode_bool_v6|
-
- END
diff --git a/vp8/decoder/arm/dboolhuff_arm.h b/vp8/decoder/arm/dboolhuff_arm.h
deleted file mode 100644
index 985951c..0000000
--- a/vp8/decoder/arm/dboolhuff_arm.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef DBOOLHUFF_ARM_H
-#define DBOOLHUFF_ARM_H
-
-/* JLK
- * There are currently no arm-optimized versions of
- * these functions. As they are implemented, they
- * can be uncommented below and added to
- * arm/dsystemdependent.c
- *
- * The existing asm code is likely so different as
- * to be useless. However, its been left (for now)
- * for reference.
- */
-#if 0
-#if HAVE_ARMV6
-#undef vp8_dbool_start
-#define vp8_dbool_start vp8dx_start_decode_v6
-
-#undef vp8_dbool_fill
-#define vp8_dbool_fill vp8_bool_decoder_fill_v6
-
-#undef vp8_dbool_debool
-#define vp8_dbool_debool vp8_decode_bool_v6
-
-#undef vp8_dbool_devalue
-#define vp8_dbool_devalue vp8_decode_value_v6
-#endif /* HAVE_ARMV6 */
-
-#if HAVE_ARMV7
-#undef vp8_dbool_start
-#define vp8_dbool_start vp8dx_start_decode_neon
-
-#undef vp8_dbool_fill
-#define vp8_dbool_fill vp8_bool_decoder_fill_neon
-
-#undef vp8_dbool_debool
-#define vp8_dbool_debool vp8_decode_bool_neon
-
-#undef vp8_dbool_devalue
-#define vp8_dbool_devalue vp8_decode_value_neon
-#endif /* HAVE_ARMV7 */
-#endif
-#endif /* DBOOLHUFF_ARM_H */
diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm
index 45e068a..0c164f1 100644
--- a/vp8/decoder/arm/detokenize.asm
+++ b/vp8/decoder/arm/detokenize.asm
@@ -13,7 +13,7 @@
AREA |.text|, CODE, READONLY ; name this block of code
- INCLUDE vpx_asm_offsets.asm
+ INCLUDE asm_dec_offsets.asm
l_qcoeff EQU 0
l_i EQU 4
diff --git a/vp8/decoder/arm/neon/dboolhuff_neon.asm b/vp8/decoder/arm/neon/dboolhuff_neon.asm
deleted file mode 100644
index ff3ffda..0000000
--- a/vp8/decoder/arm/neon/dboolhuff_neon.asm
+++ /dev/null
@@ -1,160 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_decode_value_neon|
- EXPORT |vp8dx_start_decode_neon|
- EXPORT |vp8dx_stop_decode_neon|
- EXPORT |vp8dx_decode_bool_neon|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- INCLUDE vpx_asm_offsets.asm
-
- AREA |.text|, CODE, READONLY ; name this block of code
-
-; int z = 0;
-; int bit;
-; for ( bit=bits-1; bit>=0; bit-- )
-; {
-; z |= (vp8dx_decode_bool(br, 0x80)<<bit);
-; }
-; return z;
-
-;int vp8_decode_value_neon ( BOOL_DECODER *br, int bits )
-|vp8_decode_value_neon| PROC
- stmdb sp!, {r4 - r6, lr}
- mov r4, r0
- mov r5, r1
- mov r6, #0
-
- subs r5, r5, #1
- bmi decode_value_exit
-
-decode_value_loop
- mov r1, #0x80
- mov r0, r4
- bl vp8dx_decode_bool_neon_internal ; needed for conversion to s file
- orr r6, r6, r0, lsl r5
- subs r5, r5, #1
- bpl decode_value_loop
-
-decode_value_exit
- mov r0, r6
- ldmia sp!, {r4 - r6, pc}
- ENDP ; |vp8_decode_value_neon|
-
-
-;void vp8dx_start_decode_neon ( BOOL_DECODER *br, unsigned char *source )
-|vp8dx_start_decode_neon| PROC
- stmdb sp!, {r4 - r5, lr}
- mov r2, #0
- mov r3, #255
-
- str r2, [r0, #bool_decoder_lowvalue]
- str r3, [r0, #bool_decoder_range]
- str r1, [r0, #bool_decoder_buffer]
-
- mov r3, #8
- mov r2, #4
- str r3, [r0, #bool_decoder_count]
- str r2, [r0, #bool_decoder_pos]
-
- ldrb r2, [r1, #3]
- ldrb r3, [r1, #2]
- ldrb r4, [r1, #1]
- ldrb r5, [r1]
-
- orr r1, r2, r3, lsl #8
- orr r1, r1, r4, lsl #16
- orr r1, r1, r5, lsl #24
-
- str r1, [r0, #bool_decoder_value]
-
- ldmia sp!, {r4 - r5, pc}
- ENDP ; |vp8dx_start_decode_neon|
-
-
-;void vp8dx_stop_decode_neon ( BOOL_DECODER *bc );
-|vp8dx_stop_decode_neon| PROC
- mov pc, lr
- ENDP ; |vp8dx_stop_decode_neon|
-
-
-; bigsplit RN r1
-; buffer_v RN r1
-; count_v RN r4
-; range_v RN r2
-; value_v RN r3
-; pos_v RN r5
-; split RN r6
-; bit RN lr
-;int vp8dx_decode_bool_neon ( BOOL_DECODER *br, int probability )
-|vp8dx_decode_bool_neon| PROC
-vp8dx_decode_bool_neon_internal
-;LDRD and STRD doubleword data transfers must be eight-byte aligned. Use ALIGN 8
-;before memory allocation
- stmdb sp!, {r4 - r5, lr}
-
- ldr r2, [r0, #bool_decoder_range] ;load range (r2), value(r3)
- ldr r3, [r0, #bool_decoder_value]
- ;ldrd r2, r3, [r0, #bool_decoder_range] ;ldrd costs 2 cycles
- ;
-
- mov r4, r2, lsl #8
- sub r4, r4, #256
- mov r12, #1
-
- smlawb r4, r4, r1, r12 ;split = 1 + (((range-1) * probability) >> 8)
-
- mov lr, r0
- mov r0, #0 ;bit = 0
- ;
- subs r5, r3, r4, lsl #24
-
- subhs r2, r2, r4 ;range = br->range-split
- movlo r2, r4 ;range = split
- movhs r0, #1 ;bit = 1
- movhs r3, r5 ;value = value-bigsplit
-
- cmp r2, #0x80
- blt range_less_0x80
- strd r2, r3, [lr, #bool_decoder_range] ;store result
-
- ldmia sp!, {r4 - r5, pc}
-
-range_less_0x80
-
- ldrd r4, r5, [lr, #bool_decoder_count] ;load count, pos, buffer
- ldr r1, [lr, #bool_decoder_buffer]
-
- clz r12, r2
- add r1, r1, r5
-
- sub r12, r12, #24
- subs r4, r4, r12 ;count -= shift
- mov r2, r2, lsl r12 ;range <<= shift
- mov r3, r3, lsl r12 ;value <<= shift
- addle r4, r4, #8 ;count += 8
- ldrleb r12, [r1], #1 ;br->buffer[br->pos]
-
- rsble r1, r4, #8 ;-count
- addle r5, r5, #1 ;br->pos++
- orrle r3, r3, r12, lsl r1 ;value |= (br->buffer[br->pos]) << (-count)
-
- strd r2, r3, [lr, #bool_decoder_range] ;store result
- strd r4, r5, [lr, #bool_decoder_count]
-
- ldmia sp!, {r4 - r5, pc}
- ENDP ; |vp8dx_decode_bool_neon|
-
- END
diff --git a/vp8/decoder/asm_dec_offsets.c b/vp8/decoder/asm_dec_offsets.c
new file mode 100644
index 0000000..e485cb4
--- /dev/null
+++ b/vp8/decoder/asm_dec_offsets.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include <stddef.h>
+
+#include "onyxd_int.h"
+
+#define DEFINE(sym, val) int sym = val;
+
+/*
+#define BLANK() asm volatile("\n->" : : )
+*/
+
+/*
+ * int main(void)
+ * {
+ */
+
+DEFINE(detok_scan, offsetof(DETOK, scan));
+DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove));
+DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr));
+DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr));
+DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr));
+DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x));
+
+DEFINE(detok_A, offsetof(DETOK, A));
+DEFINE(detok_L, offsetof(DETOK, L));
+
+DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr));
+DEFINE(detok_current_bc, offsetof(DETOK, current_bc));
+DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs));
+DEFINE(detok_eob, offsetof(DETOK, eob));
+
+DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
+DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
+DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
+DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
+DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
+
+DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
+DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length));
+
+//add asserts for any offset that is not supported by assembly code
+//add asserts for any size that is not supported by assembly code
+/*
+ * return 0;
+ * }
+ */
diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c
index 57cba16..8527d51 100644
--- a/vp8/decoder/dboolhuff.c
+++ b/vp8/decoder/dboolhuff.c
@@ -26,8 +26,9 @@
};
-int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
- unsigned int source_sz)
+int vp8dx_start_decode(BOOL_DECODER *br,
+ const unsigned char *source,
+ unsigned int source_sz)
{
br->user_buffer_end = source+source_sz;
br->user_buffer = source;
@@ -39,13 +40,13 @@
return 1;
/* Populate the buffer */
- vp8dx_bool_decoder_fill_c(br);
+ vp8dx_bool_decoder_fill(br);
return 0;
}
-void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
+void vp8dx_bool_decoder_fill(BOOL_DECODER *br)
{
const unsigned char *bufptr;
const unsigned char *bufend;
@@ -62,69 +63,3 @@
br->value = value;
br->count = count;
}
-
-#if 0
-/*
- * Until optimized versions of these functions are available, we
- * keep the implementation in the header to allow inlining.
- *
- * The RTCD-style invocations are still in place so this can
- * be switched by just uncommenting these functions here and
- * the DBOOLHUFF_INVOKE calls in the header.
- */
-int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
-{
- unsigned int bit=0;
- VP8_BD_VALUE value;
- unsigned int split;
- VP8_BD_VALUE bigsplit;
- int count;
- unsigned int range;
-
- value = br->value;
- count = br->count;
- range = br->range;
-
- split = 1 + (((range-1) * probability) >> 8);
- bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
-
- range = split;
- if(value >= bigsplit)
- {
- range = br->range-split;
- value = value-bigsplit;
- bit = 1;
- }
-
- /*if(range>=0x80)
- {
- br->value = value;
- br->range = range;
- return bit;
- }*/
-
- {
- register unsigned int shift = vp8dx_bitreader_norm[range];
- range <<= shift;
- value <<= shift;
- count -= shift;
- }
- br->value = value;
- br->count = count;
- br->range = range;
- if (count < 0)
- vp8dx_bool_decoder_fill_c(br);
- return bit;
-}
-
-int vp8dx_decode_value_c(BOOL_DECODER *br, int bits)
-{
- int z = 0;
- int bit;
- for ( bit=bits-1; bit>=0; bit-- )
- {
- z |= (vp8dx_decode_bool(br, 0x80)<<bit);
- }
- return z;
-}
-#endif
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index d14f4dc..a83e3f0 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -25,10 +25,6 @@
Even relatively modest values like 100 would work fine.*/
# define VP8_LOTS_OF_BITS (0x40000000)
-
-
-struct vp8_dboolhuff_rtcd_vtable;
-
typedef struct
{
const unsigned char *user_buffer_end;
@@ -36,82 +32,15 @@
VP8_BD_VALUE value;
int count;
unsigned int range;
-#if CONFIG_RUNTIME_CPU_DETECT
- struct vp8_dboolhuff_rtcd_vtable *rtcd;
-#endif
} BOOL_DECODER;
-#define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
- const unsigned char *source, unsigned int source_sz)
-#define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
-#define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
-#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits)
-
-#if ARCH_ARM
-#include "arm/dboolhuff_arm.h"
-#endif
-
-#ifndef vp8_dbool_start
-#define vp8_dbool_start vp8dx_start_decode_c
-#endif
-
-#ifndef vp8_dbool_fill
-#define vp8_dbool_fill vp8dx_bool_decoder_fill_c
-#endif
-
-#ifndef vp8_dbool_debool
-#define vp8_dbool_debool vp8dx_decode_bool_c
-#endif
-
-#ifndef vp8_dbool_devalue
-#define vp8_dbool_devalue vp8dx_decode_value_c
-#endif
-
-extern prototype_dbool_start(vp8_dbool_start);
-extern prototype_dbool_fill(vp8_dbool_fill);
-extern prototype_dbool_debool(vp8_dbool_debool);
-extern prototype_dbool_devalue(vp8_dbool_devalue);
-
-typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
-typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
-typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
-typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
-
-typedef struct vp8_dboolhuff_rtcd_vtable {
- vp8_dbool_start_fn_t start;
- vp8_dbool_fill_fn_t fill;
- vp8_dbool_debool_fn_t debool;
- vp8_dbool_devalue_fn_t devalue;
-} vp8_dboolhuff_rtcd_vtable_t;
-
-/* There are no processor-specific versions of these
- * functions right now. Disable RTCD to avoid using
- * function pointers which gives a speed boost
- */
-/*#ifdef ENABLE_RUNTIME_CPU_DETECT
-#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
-#define IF_RTCD(x) (x)
-#else*/
-#define DBOOLHUFF_INVOKE(ctx,fn) vp8_dbool_##fn
-#define IF_RTCD(x) NULL
-/*#endif*/
-
DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
-/* wrapper functions to hide RTCD. static means inline means hopefully no
- * penalty
- */
-static int vp8dx_start_decode(BOOL_DECODER *br,
- struct vp8_dboolhuff_rtcd_vtable *rtcd,
- const unsigned char *source, unsigned int source_sz) {
-#if CONFIG_RUNTIME_CPU_DETECT
- br->rtcd = rtcd;
-#endif
- return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
-}
-static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
- DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
-}
+int vp8dx_start_decode(BOOL_DECODER *br,
+ const unsigned char *source,
+ unsigned int source_sz);
+
+void vp8dx_bool_decoder_fill(BOOL_DECODER *br);
/*The refill loop is used in several places, so define it in a macro to make
sure they're all consistent.
@@ -138,12 +67,6 @@
static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
- /*
- * Until optimized versions of this function are available, we
- * keep the implementation in the header to allow inlining.
- *
- *return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
- */
unsigned int bit = 0;
VP8_BD_VALUE value;
unsigned int split;
@@ -167,13 +90,6 @@
bit = 1;
}
- /*if(range>=0x80)
- {
- br->value = value;
- br->range = range;
- return bit
- }*/
-
{
register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift;
@@ -190,12 +106,6 @@
static int vp8_decode_value(BOOL_DECODER *br, int bits)
{
- /*
- * Until optimized versions of this function are available, we
- * keep the implementation in the header to allow inlining.
- *
- *return DBOOLHUFF_INVOKE(br->rtcd, devalue)(br, bits);
- */
int z = 0;
int bit;
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index d356fca..f4bb664 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -475,8 +475,7 @@
"Truncated packet or corrupt partition "
"%d length", i + 1);
- if (vp8dx_start_decode(bool_decoder, IF_RTCD(&pbi->dboolhuff),
- partition, partition_size))
+ if (vp8dx_start_decode(bool_decoder, partition, partition_size))
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder %d", i + 1);
@@ -653,8 +652,7 @@
init_frame(pbi);
- if (vp8dx_start_decode(bc, IF_RTCD(&pbi->dboolhuff),
- data, data_end - data))
+ if (vp8dx_start_decode(bc, data, data_end - data))
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
if (pc->frame_type == KEY_FRAME) {
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 2e28472..709ec59 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -27,12 +27,6 @@
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
- pbi->dboolhuff.start = vp8dx_start_decode_c;
- pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
-#if 0 /*For use with RTCD, when implemented*/
- pbi->dboolhuff.debool = vp8dx_decode_bool_c;
- pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-#endif
#endif
#if ARCH_X86 || ARCH_X86_64
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 34c4244..09a8976 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -126,7 +126,6 @@
#if CONFIG_RUNTIME_CPU_DETECT
vp8_dequant_rtcd_vtable_t dequant;
- struct vp8_dboolhuff_rtcd_vtable dboolhuff;
#endif
diff --git a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
index e78dc33..3c05f57 100644
--- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -14,7 +14,7 @@
EXPORT |vp8_stop_encode|
EXPORT |vp8_encode_value|
- INCLUDE vpx_vp8_enc_asm_offsets.asm
+ INCLUDE asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
index b2abadf..d939287 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -11,7 +11,7 @@
EXPORT |vp8cx_pack_tokens_armv5|
- INCLUDE vpx_vp8_enc_asm_offsets.asm
+ INCLUDE asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
index f9c3852..ac2bba6 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -11,7 +11,7 @@
EXPORT |vp8cx_pack_mb_row_tokens_armv5|
- INCLUDE vpx_vp8_enc_asm_offsets.asm
+ INCLUDE asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
index 42dae13..c2eccdb 100644
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -11,7 +11,7 @@
EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
- INCLUDE vpx_vp8_enc_asm_offsets.asm
+ INCLUDE asm_enc_offsets.asm
ARM
REQUIRE8
diff --git a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c b/vp8/encoder/asm_enc_offsets.c
similarity index 72%
rename from vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
rename to vp8/encoder/asm_enc_offsets.c
index 4703a84..cd49532 100644
--- a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
+++ b/vp8/encoder/asm_enc_offsets.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -12,9 +12,9 @@
#include "vpx_ports/config.h"
#include <stddef.h>
-#include "../treewriter.h"
-#include "../tokenize.h"
-#include "../onyx_int.h"
+#include "treewriter.h"
+#include "tokenize.h"
+#include "onyx_int.h"
#define ct_assert(name,cond) \
static void assert_##name(void) UNUSED;\
@@ -31,6 +31,7 @@
* {
*/
+//pack tokens
DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue));
DEFINE(vp8_writer_range, offsetof(vp8_writer, range));
DEFINE(vp8_writer_value, offsetof(vp8_writer, value));
@@ -40,19 +41,19 @@
DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token));
DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra));
-DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree));
+DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree));
DEFINE(tokenextra_skip_eob_node, offsetof(TOKENEXTRA, skip_eob_node));
DEFINE(TOKENEXTRA_SZ, sizeof(TOKENEXTRA));
-DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct));
+DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct));
DEFINE(vp8_token_value, offsetof(vp8_token, value));
DEFINE(vp8_token_len, offsetof(vp8_token, Len));
-DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
-DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
-DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
-DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
+DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
+DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
+DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
+DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist));
DEFINE(vp8_comp_common, offsetof(VP8_COMP, common));
@@ -62,12 +63,14 @@
DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop));
DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
-DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
+DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
-// These two sizes are used in vp7cx_pack_tokens. They are hard coded
-// so if the size changes this will have to be adjusted.
+// These two sizes are used in vp8cx_pack_tokens. They are hard coded
+// so if the size changes this will have to be adjusted.
+#if HAVE_ARMV5TE
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
+#endif
//add asserts for any offset that is not supported by assembly code
//add asserts for any size that is not supported by assembly code
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index cde7de9..c871e60 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1165,7 +1165,7 @@
Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
- Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4);
+ Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16);
rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
}
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 1ece1bf..632bca4 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -584,7 +584,7 @@
*Rate = vp8_rdcost_mby(mb);
}
-static void rd_pick_intra4x4block(
+static int rd_pick_intra4x4block(
VP8_COMP *cpi,
MACROBLOCK *x,
BLOCK *be,
@@ -650,16 +650,20 @@
b->bmi.mode = (B_PREDICTION_MODE)(*best_mode);
vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, b->bmi.mode);
+ return best_rd;
+
}
-int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int *rate_y, int *Distortion)
+int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate,
+ int *rate_y, int *Distortion, int best_rd)
{
MACROBLOCKD *const xd = &mb->e_mbd;
int i;
int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
int distortion = 0;
int tot_rate_y = 0;
+ int total_rd = 0;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
@@ -681,7 +685,7 @@
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
- rd_pick_intra4x4block(
+ total_rd += rd_pick_intra4x4block(
cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L,
ta + vp8_block2above[i],
tl + vp8_block2left[i], &r, &ry, &d);
@@ -690,8 +694,14 @@
distortion += d;
tot_rate_y += ry;
mic->bmi[i].mode = xd->block[i].bmi.mode = best_mode;
+
+ if(total_rd >= best_rd)
+ break;
}
+ if(total_rd >= best_rd)
+ return INT_MAX;
+
*Rate = cost;
*rate_y += tot_rate_y;
*Distortion = distortion;
@@ -1419,6 +1429,7 @@
return bsi.segment_rd;
}
+#endif
static void swap(int *x,int *y)
{
@@ -1706,6 +1717,7 @@
}
}
+#if !(CONFIG_REALTIME_ONLY)
int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra)
{
BLOCK *b = &x->block[0];
@@ -1958,15 +1970,28 @@
switch (this_mode)
{
case B_PRED:
+ {
+ int tmp_rd;
+
// Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED];
- vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion);
+ tmp_rd = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd);
rate2 += rate;
distortion2 += distortion;
- rate2 += uv_intra_rate;
- rate_uv = uv_intra_rate_tokenonly;
- distortion2 += uv_intra_distortion;
- distortion_uv = uv_intra_distortion;
- break;
+
+ if(tmp_rd < best_yrd)
+ {
+ rate2 += uv_intra_rate;
+ rate_uv = uv_intra_rate_tokenonly;
+ distortion2 += uv_intra_distortion;
+ distortion_uv = uv_intra_distortion;
+ }
+ else
+ {
+ this_rd = INT_MAX;
+ disable_skip = 1;
+ }
+ }
+ break;
case SPLITMV:
{
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index d874409..72ba9a0 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -12,7 +12,7 @@
#ifndef __INC_RDOPT_H
#define __INC_RDOPT_H
void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);
-int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion);
+int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion, int best_rd);
int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *returnrate, int *rate_to, int *returndistortion);
int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_to, int *distortion);
extern int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index cf47626..25909456 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -35,7 +35,7 @@
VP8_COMMON_SRCS-yes += common/entropymode.c
VP8_COMMON_SRCS-yes += common/entropymv.c
VP8_COMMON_SRCS-yes += common/extend.c
-VP8_COMMON_SRCS-yes += common/filter_c.c
+VP8_COMMON_SRCS-yes += common/filter.c
VP8_COMMON_SRCS-yes += common/findnearmv.c
VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
VP8_COMMON_SRCS-yes += common/idctllm.c
@@ -111,6 +111,7 @@
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
endif
+VP8_COMMON_SRCS-$(ARCH_ARM) += common/asm_com_offsets.c
VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c
# common (c)
@@ -118,7 +119,6 @@
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/filter_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/loopfilter_arm.c
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/vpx_asm_offsets.c
# common (armv6)
VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM)
@@ -161,16 +161,3 @@
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/save_neon_reg$(ASM)
VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon_neon.c
-
-
-#
-# Rule to extract assembly constants from C sources
-#
-ifeq ($(ARCH_ARM),yes)
-vpx_asm_offsets.asm: obj_int_extract
-vpx_asm_offsets.asm: $(VP8_PREFIX)common/arm/vpx_asm_offsets.c.o
- ./obj_int_extract rvds $< $(ADS2GAS) > $@
-OBJS-yes += $(VP8_PREFIX)common/arm/vpx_asm_offsets.c.o
-CLEAN-OBJS += vpx_asm_offsets.asm
-$(filter %$(ASM).o,$(OBJS-yes)): vpx_asm_offsets.asm
-endif
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index b23bd95..ea99c61 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -912,8 +912,8 @@
ctx->preview_img.x_chroma_shift = 1;
ctx->preview_img.y_chroma_shift = 1;
- ctx->preview_img.d_w = ctx->cfg.g_w;
- ctx->preview_img.d_h = ctx->cfg.g_h;
+ ctx->preview_img.d_w = sd.y_width;
+ ctx->preview_img.d_h = sd.y_height;
ctx->preview_img.stride[VPX_PLANE_Y] = sd.y_stride;
ctx->preview_img.stride[VPX_PLANE_U] = sd.uv_stride;
ctx->preview_img.stride[VPX_PLANE_V] = sd.uv_stride;
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index 6b624a7..b23ac96 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -14,6 +14,7 @@
#File list for arm
# encoder
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c
+VP8_CX_SRCS-$(ARCH_ARM) += encoder/asm_enc_offsets.c
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/variance_arm.c
@@ -50,17 +51,3 @@
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_memcpy_neon$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
-
-VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/vpx_vp8_enc_asm_offsets.c
-
-#
-# Rule to extract assembly constants from C sources
-#
-ifeq ($(ARCH_ARM),yes)
-vpx_vp8_enc_asm_offsets.asm: obj_int_extract
-vpx_vp8_enc_asm_offsets.asm: $(VP8_PREFIX)encoder/arm/vpx_vp8_enc_asm_offsets.c.o
- ./obj_int_extract rvds $< $(ADS2GAS) > $@
-OBJS-yes += $(VP8_PREFIX)encoder/arm/vpx_vp8_enc_asm_offsets.c.o
-CLEAN-OBJS += vpx_vp8_enc_asm_offsets.asm
-$(filter %$(ASM).o,$(OBJS-yes)): vpx_vp8_enc_asm_offsets.asm
-endif
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index 0803a9c..080c9af 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -12,6 +12,7 @@
#VP8_DX_SRCS list is modified according to different platforms.
VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c
+VP8_CX_SRCS-$(ARCH_ARM) += decoder/asm_dec_offsets.c
VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c
VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK) += decoder/arm/detokenize$(ASM)
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
index 24d46cb..e6bb486 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
@@ -14,7 +14,7 @@
REQUIRE8
PRESERVE8
- INCLUDE vpx_asm_offsets.asm
+ INCLUDE asm_com_offsets.asm
AREA ||.text||, CODE, READONLY, ALIGN=2
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
index 6534827..febccc2 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
@@ -16,7 +16,7 @@
REQUIRE8
PRESERVE8
- INCLUDE vpx_asm_offsets.asm
+ INCLUDE asm_com_offsets.asm
AREA ||.text||, CODE, READONLY, ALIGN=2
;void vpxyv12_copy_frame_yonly(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
index dfc8db5..ec64dbc 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
@@ -14,7 +14,7 @@
REQUIRE8
PRESERVE8
- INCLUDE vpx_asm_offsets.asm
+ INCLUDE asm_com_offsets.asm
AREA ||.text||, CODE, READONLY, ALIGN=2
;Note: This function is used to copy source data in src_buffer[i] at beginning of
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
index e475b92..b0a3b93 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
@@ -14,7 +14,7 @@
REQUIRE8
PRESERVE8
- INCLUDE vpx_asm_offsets.asm
+ INCLUDE asm_com_offsets.asm
AREA ||.text||, CODE, READONLY, ALIGN=2
;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf);