Merge "Flexible support for various pattern searches"
diff --git a/configure b/configure
index 24be893..64f0165 100755
--- a/configure
+++ b/configure
@@ -314,6 +314,7 @@
gprof
gcov
pic
+ use_x86inc
optimizations
ccache
runtime_cpu_detect
diff --git a/test/sad_test.cc b/test/sad_test.cc
index bf3e0b8..b85da2f 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -428,6 +428,7 @@
#if HAVE_SSE
#if CONFIG_VP9_ENCODER
+#if CONFIG_USE_X86INC
const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
const sad_m_by_n_fn_t sad_4x8_sse_vp9 = vp9_sad4x8_sse;
INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values(
@@ -441,6 +442,7 @@
make_tuple(4, 4, sad_4x4x4d_sse)));
#endif
#endif
+#endif
#if HAVE_SSE2
#if CONFIG_VP8_ENCODER
@@ -451,6 +453,7 @@
const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
#endif
#if CONFIG_VP9_ENCODER
+#if CONFIG_USE_X86INC
const sad_m_by_n_fn_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
const sad_m_by_n_fn_t sad_64x32_sse2_vp9 = vp9_sad64x32_sse2;
const sad_m_by_n_fn_t sad_32x64_sse2_vp9 = vp9_sad32x64_sse2;
@@ -463,6 +466,7 @@
const sad_m_by_n_fn_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
const sad_m_by_n_fn_t sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
#endif
+#endif
const sad_m_by_n_test_param_t sse2_tests[] = {
#if CONFIG_VP8_ENCODER
make_tuple(16, 16, sad_16x16_wmt),
@@ -472,6 +476,7 @@
make_tuple(4, 4, sad_4x4_wmt),
#endif
#if CONFIG_VP9_ENCODER
+#if CONFIG_USE_X86INC
make_tuple(64, 64, sad_64x64_sse2_vp9),
make_tuple(64, 32, sad_64x32_sse2_vp9),
make_tuple(32, 64, sad_32x64_sse2_vp9),
@@ -484,6 +489,7 @@
make_tuple(8, 8, sad_8x8_sse2_vp9),
make_tuple(8, 4, sad_8x4_sse2_vp9),
#endif
+#endif
};
INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 238290b..a09558f 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -57,12 +57,6 @@
unsigned char vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
-static INLINE const vp9_prob *vp9_get_pred_probs_switchable_interp(
- const VP9_COMMON *cm, const MACROBLOCKD *xd) {
- const int pred_context = vp9_get_pred_context_switchable_interp(xd);
- return &cm->fc.switchable_interp_prob[pred_context][0];
-}
-
unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd);
static INLINE vp9_prob vp9_get_pred_prob_intra_inter(const VP9_COMMON *cm,
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 798df0e..f61d26d 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -480,82 +480,82 @@
specialize vp9_sub_pixel_avg_variance4x4 sse ssse3
prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad64x64 sse2
+specialize vp9_sad64x64 $sse2_x86inc
prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x64 sse2
+specialize vp9_sad32x64 $sse2_x86inc
prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad64x32 sse2
+specialize vp9_sad64x32 $sse2_x86inc
prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x16 sse2
+specialize vp9_sad32x16 $sse2_x86inc
prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x32 sse2
+specialize vp9_sad16x32 $sse2_x86inc
prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x32 sse2
+specialize vp9_sad32x32 $sse2_x86inc
prototype unsigned int vp9_sad16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x16 mmx sse2
+specialize vp9_sad16x16 mmx $sse2_x86inc
prototype unsigned int vp9_sad16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x8 mmx sse2
+specialize vp9_sad16x8 mmx $sse2_x86inc
prototype unsigned int vp9_sad8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad8x16 mmx sse2
+specialize vp9_sad8x16 mmx $sse2_x86inc
prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad8x8 mmx sse2
+specialize vp9_sad8x8 mmx $sse2_x86inc
prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad8x4 sse2
+specialize vp9_sad8x4 $sse2_x86inc
prototype unsigned int vp9_sad4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad4x8 sse
+specialize vp9_sad4x8 $sse_x86inc
prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad4x4 mmx sse
+specialize vp9_sad4x4 mmx $sse_x86inc
prototype unsigned int vp9_sad64x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad64x64_avg sse2
+specialize vp9_sad64x64_avg $sse2_x86inc
prototype unsigned int vp9_sad32x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x64_avg sse2
+specialize vp9_sad32x64_avg $sse2_x86inc
prototype unsigned int vp9_sad64x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad64x32_avg sse2
+specialize vp9_sad64x32_avg $sse2_x86inc
prototype unsigned int vp9_sad32x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x16_avg sse2
+specialize vp9_sad32x16_avg $sse2_x86inc
prototype unsigned int vp9_sad16x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x32_avg sse2
+specialize vp9_sad16x32_avg $sse2_x86inc
prototype unsigned int vp9_sad32x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x32_avg sse2
+specialize vp9_sad32x32_avg $sse2_x86inc
prototype unsigned int vp9_sad16x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x16_avg sse2
+specialize vp9_sad16x16_avg $sse2_x86inc
prototype unsigned int vp9_sad16x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x8_avg sse2
+specialize vp9_sad16x8_avg $sse2_x86inc
prototype unsigned int vp9_sad8x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x16_avg sse2
+specialize vp9_sad8x16_avg $sse2_x86inc
prototype unsigned int vp9_sad8x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x8_avg sse2
+specialize vp9_sad8x8_avg $sse2_x86inc
prototype unsigned int vp9_sad8x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x4_avg sse2
+specialize vp9_sad8x4_avg $sse2_x86inc
prototype unsigned int vp9_sad4x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad4x8_avg sse
+specialize vp9_sad4x8_avg $sse_x86inc
prototype unsigned int vp9_sad4x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad4x4_avg sse
+specialize vp9_sad4x4_avg $sse_x86inc
prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance_halfpixvar16x16_h sse2
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 2d9be6a..074b3e9 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -370,9 +370,9 @@
VP9D_COMP *pbi, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- const vp9_prob *probs = vp9_get_pred_probs_switchable_interp(cm, xd);
- const int type = treed_read(r, vp9_switchable_interp_tree, probs);
const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ const int type = treed_read(r, vp9_switchable_interp_tree,
+ cm->fc.switchable_interp_prob[ctx]);
++cm->counts.switchable_interp[ctx][type];
return type;
}
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index e79bc81..6a50c80 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -477,12 +477,13 @@
}
}
- if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ if (pc->mcomp_filter_type == SWITCHABLE) {
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
write_token(bc, vp9_switchable_interp_tree,
- vp9_get_pred_probs_switchable_interp(&cpi->common, xd),
+ pc->fc.switchable_interp_prob[ctx],
&vp9_switchable_interp_encodings[mi->interp_filter]);
} else {
- assert(mi->interp_filter == cpi->common.mcomp_filter_type);
+ assert(mi->interp_filter == pc->mcomp_filter_type);
}
if (bsize < BLOCK_8X8) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 8bd3500..82859c5 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -333,13 +333,14 @@
static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
BLOCK_SIZE_TYPE bsize, int output_enabled) {
int i, x_idx, y;
- MACROBLOCK * const x = &cpi->mb;
- MACROBLOCKD * const xd = &x->e_mbd;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *mi = &ctx->mic;
- MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi;
+ MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
int mb_mode_index = ctx->best_mode_index;
- const int mis = cpi->common.mode_info_stride;
+ const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -351,15 +352,12 @@
// Restore the coding context of the MB to that that was in place
// when the mode was picked for it
- for (y = 0; y < mi_height; y++) {
- for (x_idx = 0; x_idx < mi_width; x_idx++) {
+ for (y = 0; y < mi_height; y++)
+ for (x_idx = 0; x_idx < mi_width; x_idx++)
if ((xd->mb_to_right_edge >> (3 + LOG2_MI_SIZE)) + mi_width > x_idx
- && (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + mi_height > y) {
- MODE_INFO *mi_addr = xd->mode_info_context + x_idx + y * mis;
- *mi_addr = *mi;
- }
- }
- }
+ && (xd->mb_to_bottom_edge >> (3 + LOG2_MI_SIZE)) + mi_height > y)
+ xd->mode_info_context[x_idx + y * mis] = *mi;
+
// FIXME(rbultje) I'm pretty sure this should go to the end of this block
// (i.e. after the output_enabled)
if (bsize < BLOCK_32X32) {
@@ -383,7 +381,7 @@
cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i];
}
- if (cpi->common.frame_type == KEY_FRAME) {
+ if (cm->frame_type == KEY_FRAME) {
// Restore the coding modes to that held in the coding context
// if (mb_mode == I4X4_PRED)
// for (i = 0; i < 16; i++)
@@ -436,19 +434,17 @@
xd->mode_info_context[mis * j + i].mbmi = *mbmi;
}
- if (cpi->common.mcomp_filter_type == SWITCHABLE
- && is_inter_mode(mbmi->mode)) {
- ++cpi->common.counts.switchable_interp[
- vp9_get_pred_context_switchable_interp(xd)][mbmi->interp_filter];
+ if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) {
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
}
cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff;
cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
- for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+ for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
- }
}
}
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 8990aa7..238c981 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2427,13 +2427,12 @@
return scaled_ref_frame;
}
-static INLINE int get_switchable_rate(MACROBLOCK *x) {
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
-
- const int c = vp9_get_pred_context_switchable_interp(xd);
- const int m = mbmi->interp_filter;
- return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
+static INLINE int get_switchable_rate(const MACROBLOCK *x) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ return SWITCHABLE_INTERP_RATE_FACTOR *
+ x->switchable_interp_costs[ctx][mbmi->interp_filter];
}
static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 572a28d..5a96fba 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -80,7 +80,6 @@
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
@@ -89,6 +88,7 @@
ifeq ($(USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
endif