Merge "vp9: add multi-threaded tile decoder"
diff --git a/build/make/Makefile b/build/make/Makefile
index 7a25239..030c1b5 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -114,6 +114,10 @@
$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3
$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1
$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1
+$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx
+$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx
+$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2
+$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2
$(BUILD_PFX)%.c.d: %.c
$(if $(quiet),@echo " [DEP] $@")
diff --git a/build/make/configure.sh b/build/make/configure.sh
index c6c8660..83f480a 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -1108,6 +1108,18 @@
soft_enable sse4_1
fi
+ if enabled gcc && ! disabled avx && ! check_cflags -mavx; then
+ RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx "
+ else
+ soft_enable avx
+ fi
+
+ if enabled gcc && ! disabled avx2 && ! check_cflags -mavx2; then
+ RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx2 "
+ else
+ soft_enable avx2
+ fi
+
case "${AS}" in
auto|"")
which nasm >/dev/null 2>&1 && AS=nasm
diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh
index c531e95..2967b5a 100755
--- a/build/make/rtcd.sh
+++ b/build/make/rtcd.sh
@@ -327,11 +327,11 @@
require c
case $arch in
x86)
- ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1)
+ ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2)
x86
;;
x86_64)
- ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1)
+ ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2)
REQUIRES=${REQUIRES:-mmx sse sse2}
require $(filter $REQUIRES)
x86
diff --git a/configure b/configure
index 297cec4..621161c 100755
--- a/configure
+++ b/configure
@@ -234,6 +234,8 @@
sse3
ssse3
sse4_1
+ avx
+ avx2
altivec
"
@@ -422,7 +424,7 @@
fi
# The write_common_config (config.mk) logic is deferred until after the
- # recursive calls to configure complete, becuase we want our universal
+ # recursive calls to configure complete, because we want our universal
# targets to be executed last.
write_common_config_targets
enabled universal && echo "FAT_ARCHS=${fat_bin_archs}" >> config.mk
@@ -608,7 +610,12 @@
check_add_cflags -Wuninitialized
check_add_cflags -Wunused-variable
case ${CC} in
- *clang*) ;;
+ *clang*)
+ # libvpx and/or clang have issues with aliasing:
+ # https://code.google.com/p/webm/issues/detail?id=603
+ # work around them until they are fixed
+ check_add_cflags -fno-strict-aliasing
+ ;;
*) check_add_cflags -Wunused-but-set-variable ;;
esac
enabled extra_warnings || check_add_cflags -Wno-unused-function
diff --git a/test/test_libvpx.cc b/test/test_libvpx.cc
index a4dbca4..80aca98 100644
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -45,6 +45,10 @@
append_gtest_filter(":-SSSE3/*");
if (!(simd_caps & HAS_SSE4_1))
append_gtest_filter(":-SSE4_1/*");
+ if (!(simd_caps & HAS_AVX))
+ append_gtest_filter(":-AVX/*");
+ if (!(simd_caps & HAS_AVX2))
+ append_gtest_filter(":-AVX2/*");
#endif
#if !CONFIG_SHARED
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 759d842..35a22c7 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -512,15 +512,15 @@
else
{
mbmi->mode = NEARMV;
- vp8_clamp_mv2(&near_mvs[CNT_NEAR], &pbi->mb);
mbmi->mv.as_int = near_mvs[CNT_NEAR].as_int;
+ vp8_clamp_mv2(&mbmi->mv, &pbi->mb);
}
}
else
{
mbmi->mode = NEARESTMV;
- vp8_clamp_mv2(&near_mvs[CNT_NEAREST], &pbi->mb);
mbmi->mv.as_int = near_mvs[CNT_NEAREST].as_int;
+ vp8_clamp_mv2(&mbmi->mv, &pbi->mb);
}
}
else
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 85ac6d2..218e12e 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -16,12 +16,6 @@
#include "vp9/common/vp9_seg_common.h"
-struct loop_filter_info {
- const uint8_t *mblim;
- const uint8_t *lim;
- const uint8_t *hev_thr;
-};
-
// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
// Each 1 bit represents a position in which we want to apply the loop filter.
// Left_ entries refer to whether we apply a filter on the border to the
@@ -259,8 +253,8 @@
if (block_inside_limit < 1)
block_inside_limit = 1;
- vpx_memset(lfi->lim[lvl], block_inside_limit, SIMD_WIDTH);
- vpx_memset(lfi->mblim[lvl], (2 * (lvl + 2) + block_inside_limit),
+ vpx_memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
+ vpx_memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
SIMD_WIDTH);
}
}
@@ -268,7 +262,7 @@
void vp9_loop_filter_init(VP9_COMMON *cm) {
loop_filter_info_n *lfi = &cm->lf_info;
struct loopfilter *lf = &cm->lf;
- int i;
+ int lvl;
// init limits for given sharpness
update_sharpness(lfi, lf->sharpness_level);
@@ -278,8 +272,8 @@
lf_init_lut(lfi);
// init hev threshold const vectors
- for (i = 0; i < 4; i++)
- vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
+ for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
+ vpx_memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
}
void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
@@ -330,16 +324,14 @@
static int build_lfi(const loop_filter_info_n *lfi_n,
const MB_MODE_INFO *mbmi,
- struct loop_filter_info *lfi) {
+ const loop_filter_thresh **lfi) {
const int seg = mbmi->segment_id;
const int ref = mbmi->ref_frame[0];
const int mode = lfi_n->mode_lf_lut[mbmi->mode];
const int filter_level = lfi_n->lvl[seg][ref][mode];
if (filter_level > 0) {
- lfi->mblim = lfi_n->mblim[filter_level];
- lfi->lim = lfi_n->lim[filter_level];
- lfi->hev_thr = lfi_n->hev_thr[filter_level >> 4];
+ *lfi = &lfi_n->lfthr[filter_level];
return 1;
} else {
return 0;
@@ -351,11 +343,13 @@
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
- const struct loop_filter_info *lfi) {
+ const loop_filter_thresh **p_lfi) {
unsigned int mask;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= 1) {
+ const loop_filter_thresh *lfi = *p_lfi;
+
if (mask & 1) {
if (mask_16x16 & 1) {
vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim,
@@ -379,7 +373,7 @@
vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
s += 8;
- lfi++;
+ p_lfi++;
mask_16x16 >>= 1;
mask_8x8 >>= 1;
mask_4x4 >>= 1;
@@ -393,12 +387,14 @@
unsigned int mask_4x4,
unsigned int mask_4x4_int,
int only_4x4_1,
- const struct loop_filter_info *lfi) {
+ const loop_filter_thresh **p_lfi) {
unsigned int mask;
int count;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= count) {
+ const loop_filter_thresh *lfi = *p_lfi;
+
count = 1;
if (mask & 1) {
if (!only_4x4_1) {
@@ -432,7 +428,7 @@
lfi->lim, lfi->hev_thr, 1);
}
s += 8 * count;
- lfi += count;
+ p_lfi += count;
mask_16x16 >>= count;
mask_8x8 >>= count;
mask_4x4 >>= count;
@@ -805,7 +801,7 @@
unsigned int mask_8x8[MI_BLOCK_SIZE] = {0};
unsigned int mask_4x4[MI_BLOCK_SIZE] = {0};
unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0};
- struct loop_filter_info lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
+ const loop_filter_thresh *lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
int r, c;
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
@@ -834,7 +830,7 @@
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
// Filter level can vary per MI
- if (!build_lfi(&cm->lf_info, &mi[0].mbmi, lfi[r] + (c >> ss_x)))
+ if (!build_lfi(&cm->lf_info, &mi[0].mbmi, &lfi[r][c >> ss_x]))
continue;
// Build masks based on the transform size of each block
@@ -925,7 +921,7 @@
struct buf_2d *const dst = &plane->dst;
uint8_t* const dst0 = dst->buf;
unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0};
- struct loop_filter_info lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
+ const loop_filter_thresh *lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
int r, c;
int row_shift = 3 - ss_x;
int row_mask = 0xff >> (ss_x << 2);
@@ -938,8 +934,8 @@
// Determine the vertical edges that need filtering
for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO *mi = mi_8x8[c];
- if (!build_lfi(&cm->lf_info, &mi[0].mbmi, lfi[r] + (c >> ss_x)))
- continue;
+
+ build_lfi(&cm->lf_info, &mi[0].mbmi, &lfi[r][c >> ss_x]);
}
if (!plane->plane_type) {
mask_4x4_int[r] = MASK_ROW(lfm->int_4x4_y);
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index c698090..62389ea 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -46,12 +46,13 @@
// Need to align this structure so when it is declared and
// passed it can be loaded into vector registers.
typedef struct {
- DECLARE_ALIGNED(SIMD_WIDTH, uint8_t,
- mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
- DECLARE_ALIGNED(SIMD_WIDTH, uint8_t,
- lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
- DECLARE_ALIGNED(SIMD_WIDTH, uint8_t,
- hev_thr[4][SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]);
+} loop_filter_thresh;
+
+typedef struct {
+ loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
uint8_t mode_lf_lut[MB_MODE_COUNT];
} loop_filter_info_n;
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index a869dc0..19032bf 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -127,14 +127,14 @@
return get_tx_probs(bsize, context, tx_probs);
}
-static void update_tx_counts(BLOCK_SIZE bsize, uint8_t context,
- TX_SIZE tx_size, struct tx_counts *tx_counts) {
- if (bsize >= BLOCK_32X32)
- tx_counts->p32x32[context][tx_size]++;
- else if (bsize >= BLOCK_16X16)
- tx_counts->p16x16[context][tx_size]++;
+static unsigned int *get_tx_counts(BLOCK_SIZE bsize, uint8_t context,
+ struct tx_counts *tx_counts) {
+ if (bsize < BLOCK_16X16)
+ return tx_counts->p8x8[context];
+ else if (bsize < BLOCK_32X32)
+ return tx_counts->p16x16[context];
else
- tx_counts->p8x8[context][tx_size]++;
+ return tx_counts->p32x32[context];
}
#endif // VP9_COMMON_VP9_PRED_COMMON_H_
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 2d9fbff..5e049c6 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -22,10 +22,11 @@
# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
[ "$CONFIG_USE_X86INC" = "yes" ] && mmx_x86inc=mmx && sse_x86inc=sse &&
- sse2_x86inc=sse2 && ssse3_x86inc=ssse3
+ sse2_x86inc=sse2 && ssse3_x86inc=ssse3 && avx_x86inc=avx && avx2_x86inc=avx2
# this variable is for functions that are 64 bit only.
-[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && ssse3_x86_64=ssse3
+[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 &&
+ ssse3_x86_64=ssse3 && avx_x86_64=avx && avx2_x86_64=avx2
#
# RECON
@@ -671,10 +672,10 @@
prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
specialize vp9_subtract_block $sse2_x86inc
-prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
+prototype void vp9_quantize_b "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
specialize vp9_quantize_b $ssse3_x86_64
-prototype void vp9_quantize_b_32x32 "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
+prototype void vp9_quantize_b_32x32 "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
specialize vp9_quantize_b_32x32 $ssse3_x86_64
#
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 6bc51e8..475a299 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -72,7 +72,7 @@
}
if (!cm->frame_parallel_decoding_mode)
- update_tx_counts(bsize, context, tx_size, &cm->counts.tx);
+ ++get_tx_counts(bsize, context, &cm->counts.tx)[tx_size];
return tx_size;
}
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index f5e4592..12b3f5c 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -1151,7 +1151,11 @@
setup_tile_info(cm, rb);
sz = vp9_rb_read_literal(rb, 16);
- return sz > 0 ? sz : -1;
+ if (sz == 0)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid header size");
+
+ return sz;
}
static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
@@ -1268,15 +1272,9 @@
YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
if (!first_partition_size) {
- if (!keyframe) {
// showing a frame directly
*p_data_end = data + 1;
return 0;
- } else {
- vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "Invalid key frame");
- return -1;
- }
}
if (!pbi->decoded_key_frame && !keyframe)
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 5ff59a8..9408e54 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2493,7 +2493,7 @@
(mbmi->skip_coeff ||
vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) {
const uint8_t context = vp9_get_pred_context_tx_size(xd);
- update_tx_counts(bsize, context, mbmi->tx_size, &cm->counts.tx);
+ ++get_tx_counts(bsize, context, &cm->counts.tx)[mbmi->tx_size];
} else {
int x, y;
TX_SIZE sz = tx_mode_to_biggest_tx_size[cm->tx_mode];
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 7ad8d1f..fca7525 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -22,12 +22,14 @@
extern int enc_debug;
#endif
-void vp9_quantize_b_c(int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
- int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr,
- int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
- int16_t *dqcoeff_ptr, int16_t *dequant_ptr,
- int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan) {
+void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
int i, rc, eob;
int zbins[2], nzbins[2], zbin;
int x, y, z, sz;
@@ -86,14 +88,15 @@
*eob_ptr = eob + 1;
}
-void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
+void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
- int16_t *zbin_ptr, int16_t *round_ptr,
- int16_t *quant_ptr, int16_t *quant_shift_ptr,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
- int16_t *dequant_ptr, int zbin_oq_value,
- uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan) {
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
int i, rc, eob;
int zbins[2], nzbins[2];
int x, y, z, sz;
@@ -174,25 +177,19 @@
return res;
}
-void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
- int y_blocks) {
- MACROBLOCKD *const xd = &mb->e_mbd;
+void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int y_blocks, int b_idx,
+ const int16_t *scan, const int16_t *iscan) {
+ MACROBLOCKD *const xd = &x->e_mbd;
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
- const int16_t *scan = get_scan_4x4(tx_type);
- const int16_t *iscan = get_iscan_4x4(tx_type);
+ struct macroblock_plane* p = &x->plane[pb_idx.plane];
+ struct macroblockd_plane* pd = &xd->plane[pb_idx.plane];
- vp9_quantize_b(BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block),
- 16, mb->skip_block,
- mb->plane[pb_idx.plane].zbin,
- mb->plane[pb_idx.plane].round,
- mb->plane[pb_idx.plane].quant,
- mb->plane[pb_idx.plane].quant_shift,
- BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block),
- BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block),
- xd->plane[pb_idx.plane].dequant,
- mb->plane[pb_idx.plane].zbin_extra,
- &xd->plane[pb_idx.plane].eobs[pb_idx.block],
- scan, iscan);
+ vp9_quantize_b(BLOCK_OFFSET(p->coeff, pb_idx.block),
+ 16, x->skip_block,
+ p->zbin, p->round, p->quant, p->quant_shift,
+ BLOCK_OFFSET(pd->qcoeff, pb_idx.block),
+ BLOCK_OFFSET(pd->dqcoeff, pb_idx.block),
+ pd->dequant, p->zbin_extra, &pd->eobs[pb_idx.block], scan, iscan);
}
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 459aa33..c078e1d 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -13,8 +13,9 @@
#include "vp9/encoder/vp9_block.h"
-void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
- int y_blocks);
+void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int y_blocks, int b_idx,
+ const int16_t *scan, const int16_t *iscan);
+
struct VP9_COMP;
void vp9_set_quantizer(struct VP9_COMP *cpi, int q);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index d25112b..c134208 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1032,10 +1032,10 @@
ENTROPY_CONTEXT ta[2], tempa[2];
ENTROPY_CONTEXT tl[2], templ[2];
- TX_TYPE tx_type = DCT_DCT;
+
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
- int idx, idy, block;
+ int idx, idy;
uint8_t best_dst[8 * 8];
assert(ib < 4);
@@ -1071,8 +1071,8 @@
const int16_t *nb;
uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
-
- block = ib + idy * 2 + idx;
+ const int block = ib + idy * 2 + idx;
+ TX_TYPE tx_type;
xd->mi_8x8[0]->bmi[block].as_mode = mode;
src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
@@ -1086,13 +1086,15 @@
dst, dst_stride);
tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
+ get_scan_nb_4x4(tx_type, &scan, &nb);
+
if (tx_type != DCT_DCT)
vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, 8);
- vp9_regular_quantize_b_4x4(x, block, tx_type, 16);
- get_scan_nb_4x4(tx_type, &scan, &nb);
+ vp9_regular_quantize_b_4x4(x, 16, block, scan, get_iscan_4x4(tx_type));
+
ratey += cost_coeffs(x, 0, block,
tempa + idx, templ + idy, TX_4X4, scan, nb);
distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
@@ -1558,7 +1560,8 @@
coeff = BLOCK_OFFSET(p->coeff, k);
x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
coeff, 8);
- vp9_regular_quantize_b_4x4(x, k, DCT_DCT, 16);
+ vp9_regular_quantize_b_4x4(x, 16, k, get_scan_4x4(DCT_DCT),
+ get_iscan_4x4(DCT_DCT));
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
16, &ssz);
thissse += ssz;
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 0f12d88..4d39670 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -995,8 +995,9 @@
if (data) {
int res;
vpx_scaling_mode_t scalemode = *(vpx_scaling_mode_t *)data;
- res = vp9_set_internal_size(ctx->cpi, scalemode.h_scaling_mode,
- scalemode.v_scaling_mode);
+ res = vp9_set_internal_size(ctx->cpi,
+ (VPX_SCALING)scalemode.h_scaling_mode,
+ (VPX_SCALING)scalemode.v_scaling_mode);
if (!res) {
return VPX_CODEC_OK;
diff --git a/vpx/vp8.h b/vpx/vp8.h
index 57d3cae..056fa7a 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -100,14 +100,17 @@
/*!\brief reference frame data struct
*
- * define the data struct to access vp8 reference frames
+ * Define the data struct to access vp8 reference frames.
*/
-
typedef struct vpx_ref_frame {
vpx_ref_frame_type_t frame_type; /**< which reference frame */
vpx_image_t img; /**< reference frame data in image format */
} vpx_ref_frame_t;
+/*!\brief VP9 specific reference frame data struct
+ *
+ * Define the data struct to access vp9 reference frames.
+ */
typedef struct vp9_ref_frame {
int idx; /**< frame index to get (input) */
vpx_image_t img; /**< img structure to populate (output) */
@@ -117,7 +120,6 @@
*
* defines the data type for each of VP8 decoder control function requires
*/
-
VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *)
VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *)
VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *)
@@ -127,7 +129,6 @@
VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int)
VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *)
-
/*! @} - end defgroup vp8 */
#ifdef __cplusplus
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 92fdb00..9f68c38 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -7,7 +7,8 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-
+#ifndef VP8CX_H
+#define VP8CX_H
/*!\defgroup vp8_encoder WebM VP8 Encoder
* \ingroup vp8
@@ -20,8 +21,6 @@
* \brief Provides definitions for using the VP8 encoder algorithm within the
* vpx Codec Interface.
*/
-#ifndef VP8CX_H
-#define VP8CX_H
#ifdef __cplusplus
extern "C" {
@@ -223,16 +222,17 @@
*/
typedef struct vpx_roi_map {
- unsigned char *roi_map; /**< specify an id between 0 and 3 for each 16x16 region within a frame */
- unsigned int rows; /**< number of rows */
- unsigned int cols; /**< number of cols */
+ /*! An id between 0 and 3 for each 16x16 region within a frame. */
+ unsigned char *roi_map;
+ unsigned int rows; /**< Number of rows. */
+ unsigned int cols; /**< Number of columns. */
// TODO(paulwilkins): broken for VP9 which has 8 segments
// q and loop filter deltas for each segment
// (see MAX_MB_SEGMENTS)
- int delta_q[4];
- int delta_lf[4];
- // Static breakout threshold for each segment
- unsigned int static_threshold[4];
+ int delta_q[4]; /**< Quantizer deltas. */
+ int delta_lf[4]; /**< Loop filter deltas. */
+ /*! Static breakout threshold for each segment. */
+ unsigned int static_threshold[4];
} vpx_roi_map_t;
/*!\brief vpx active region map
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index 50a223f..d3093c4 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -45,7 +45,8 @@
#include "vp8.h"
-/*!\brief VP8 decoder control functions
+/*!\enum vp8_dec_control_id
+ * \brief VP8 decoder control functions
*
* This set of macros define the control functions available for the VP8
* decoder interface.
@@ -78,12 +79,17 @@
VP8_DECODER_CTRL_ID_MAX
};
+/*!\brief Structure to hold decryption state
+ *
+ * Defines a structure to hold the decryption state and access function.
+ */
typedef struct vp8_decrypt_init {
/** Decrypt n bytes of data from input -> output, using the decrypt_state
* passed in VP8D_SET_DECRYPTOR.
*/
void (*decrypt_cb)(void *decrypt_state, const unsigned char *input,
unsigned char *output, int count);
+ /*! Decryption state. */
void *decrypt_state;
} vp8_decrypt_init;
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index 2e6f1e7..3ea36d6 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -36,12 +36,13 @@
* Once initialized, the instance is manged using other functions from
* the vpx_codec_* family.
*/
+#ifndef VPX_CODEC_H
+#define VPX_CODEC_H
+
#ifdef __cplusplus
extern "C" {
#endif
-#ifndef VPX_CODEC_H
-#define VPX_CODEC_H
#include "vpx_integer.h"
#include "vpx_image.h"
@@ -550,9 +551,8 @@
/*!@} - end defgroup cap_xma*/
/*!@} - end defgroup codec*/
-
-
-#endif
#ifdef __cplusplus
}
#endif
+#endif
+
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index e7701e5..2dcd024 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -7,7 +7,8 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-
+#ifndef VPX_DECODER_H
+#define VPX_DECODER_H
/*!\defgroup decoder Decoder Algorithm Interface
* \ingroup codec
@@ -28,8 +29,6 @@
extern "C" {
#endif
-#ifndef VPX_DECODER_H
-#define VPX_DECODER_H
#include "vpx_codec.h"
/*!\brief Current ABI version number
@@ -328,9 +327,8 @@
/*!@} - end defgroup cap_put_slice*/
/*!@} - end defgroup decoder*/
-
-#endif
-
#ifdef __cplusplus
}
#endif
+#endif
+
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 56fd2d9..56752cf 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -7,7 +7,8 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-
+#ifndef VPX_ENCODER_H
+#define VPX_ENCODER_H
/*!\defgroup encoder Encoder Algorithm Interface
* \ingroup codec
@@ -28,8 +29,6 @@
extern "C" {
#endif
-#ifndef VPX_ENCODER_H
-#define VPX_ENCODER_H
#include "vpx_codec.h"
/*! Temporal Scalability: Maximum length of the sequence defining frame
@@ -930,8 +929,8 @@
/*!@} - end defgroup encoder*/
-
-#endif
#ifdef __cplusplus
}
#endif
+#endif
+
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index b009c35..2990583 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -88,12 +88,14 @@
#endif
#endif /* end others */
-#define HAS_MMX 0x01
-#define HAS_SSE 0x02
-#define HAS_SSE2 0x04
-#define HAS_SSE3 0x08
-#define HAS_SSSE3 0x10
-#define HAS_SSE4_1 0x20
+#define HAS_MMX 0x01
+#define HAS_SSE 0x02
+#define HAS_SSE2 0x04
+#define HAS_SSE3 0x08
+#define HAS_SSSE3 0x10
+#define HAS_SSE4_1 0x20
+#define HAS_AVX 0x40
+#define HAS_AVX2 0x80
#ifndef BIT
#define BIT(n) (1<<n)
#endif
@@ -132,12 +134,16 @@
if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
- if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
+ if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
- if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
+ if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
+ if (reg_ecx & BIT(28)) flags |= HAS_AVX;
+
+ if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
+
return flags & mask;
}