Merge "Add a best_yrd shortcut in splitmv mode search."
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 3320a16..3b72129 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -551,7 +551,7 @@
const ConvolveFunctions convolve8_neon(
vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
- vp9_convolve8_c, vp9_convolve8_avg_c);
+ vp9_convolve8_neon, vp9_convolve8_avg_neon);
INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_neon),
diff --git a/vp9/common/arm/neon/vp9_convolve_neon.c b/vp9/common/arm/neon/vp9_convolve_neon.c
new file mode 100644
index 0000000..6e37ff6
--- /dev/null
+++ b/vp9/common/arm/neon/vp9_convolve_neon.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_common.h"
+
+void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
+ * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
+ */
+ uint8_t temp[64 * 72];
+
+ // Account for the vertical phase needing 3 lines prior and 4 lines post
+ int intermediate_height = h + 7;
+
+ if (x_step_q4 != 16 || y_step_q4 != 16)
+ return vp9_convolve8_c(src, src_stride,
+ dst, dst_stride,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+
+ /* Filter starting 3 lines back. The neon implementation will ignore the
+ * given height and filter a multiple of 4 lines. Since this goes in to
+ * the temp buffer which has lots of extra room and is subsequently discarded
+ * this is safe if somewhat less than ideal.
+ */
+ vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
+ temp, 64,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, intermediate_height);
+
+ /* Step into the temp buffer 3 lines to get the actual frame data */
+ vp9_convolve8_vert_neon(temp + 64 * 3, 64,
+ dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+}
+
+void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ uint8_t temp[64 * 72];
+ int intermediate_height = h + 7;
+
+ if (x_step_q4 != 16 || y_step_q4 != 16)
+ return vp9_convolve8_avg_c(src, src_stride,
+ dst, dst_stride,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+
+ /* This implementation has the same issues as above. In addition, we only want
+ * to average the values after both passes.
+ */
+ vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
+ temp, 64,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, intermediate_height);
+ vp9_convolve8_avg_vert_neon(temp + 64 * 3,
+ 64, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+}
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index ed6af66..c4bdb6b 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -12,17 +12,12 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_entropymv.h"
-//#define MV_COUNT_TESTING
-
#define MV_COUNT_SAT 20
#define MV_MAX_UPDATE_FACTOR 128
/* Integer pel reference mv threshold for use of high-precision 1/8 mv */
#define COMPANDED_MVREF_THRESH 8
-/* Smooth or bias the mv-counts before prob computation */
-/* #define SMOOTH_MV_COUNTS */
-
const vp9_tree_index vp9_mv_joint_tree[2 * MV_JOINTS - 2] = {
-MV_JOINT_ZERO, 2,
-MV_JOINT_HNZVZ, 4,
@@ -160,24 +155,6 @@
}
}
-#ifdef SMOOTH_MV_COUNTS
-static void smooth_counts(nmv_component_counts *mvcomp) {
- static const int flen = 3; // (filter_length + 1) / 2
- static const int fval[] = {8, 3, 1};
- static const int fvalbits = 4;
- int i;
- unsigned int smvcount[MV_VALS];
- vpx_memcpy(smvcount, mvcomp->mvcount, sizeof(smvcount));
- smvcount[MV_MAX] = (smvcount[MV_MAX - 1] + smvcount[MV_MAX + 1]) >> 1;
- for (i = flen - 1; i <= MV_VALS - flen; ++i) {
- int j, s = smvcount[i] * fval[0];
- for (j = 1; j < flen; ++j)
- s += (smvcount[i - j] + smvcount[i + j]) * fval[j];
- mvcomp->mvcount[i] = (s + (1 << (fvalbits - 1))) >> fvalbits;
- }
-}
-#endif
-
static void counts_to_context(nmv_component_counts *mvcomp, int usehp) {
int v;
vpx_memset(mvcomp->sign, 0, sizeof(nmv_component_counts) - sizeof(mvcomp->mvcount));
@@ -187,11 +164,9 @@
}
}
-void vp9_inc_mv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
- int usehp) {
+void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx) {
const MV_JOINT_TYPE j = vp9_get_mv_joint(mv);
mvctx->joints[j]++;
- usehp = usehp && vp9_use_mv_hp(ref);
if (mv_joint_vertical(j))
inc_mv_component_count(mv->row, &mvctx->comps[0], 1);
@@ -246,108 +221,41 @@
void vp9_adapt_mv_probs(VP9_COMMON *cm, int usehp) {
int i, j;
-#ifdef MV_COUNT_TESTING
- printf("joints count: ");
- for (j = 0; j < MV_JOINTS; ++j) printf("%d ", cm->fc.NMVcount.joints[j]);
- printf("\n"); fflush(stdout);
- printf("signs count:\n");
- for (i = 0; i < 2; ++i)
- printf("%d/%d ", cm->fc.NMVcount.comps[i].sign[0], cm->fc.NMVcount.comps[i].sign[1]);
- printf("\n"); fflush(stdout);
- printf("classes count:\n");
+
+ nmv_context *ctx = &cm->fc.nmvc;
+ nmv_context *pre_ctx = &cm->fc.pre_nmvc;
+ nmv_context_counts *cts = &cm->fc.NMVcount;
+
+ vp9_counts_process(cts, usehp);
+
+ adapt_probs(0, vp9_mv_joint_tree, ctx->joints, pre_ctx->joints, cts->joints);
+
for (i = 0; i < 2; ++i) {
- for (j = 0; j < MV_CLASSES; ++j)
- printf("%d ", cm->fc.NMVcount.comps[i].classes[j]);
- printf("\n"); fflush(stdout);
+ adapt_prob(&ctx->comps[i].sign, pre_ctx->comps[i].sign, cts->comps[i].sign);
+ adapt_probs(0, vp9_mv_class_tree, ctx->comps[i].classes,
+ pre_ctx->comps[i].classes, cts->comps[i].classes);
+ adapt_probs(0, vp9_mv_class0_tree, ctx->comps[i].class0,
+ pre_ctx->comps[i].class0, cts->comps[i].class0);
+
+ for (j = 0; j < MV_OFFSET_BITS; ++j)
+ adapt_prob(&ctx->comps[i].bits[j], pre_ctx->comps[i].bits[j],
+ cts->comps[i].bits[j]);
}
- printf("class0 count:\n");
+
for (i = 0; i < 2; ++i) {
for (j = 0; j < CLASS0_SIZE; ++j)
- printf("%d ", cm->fc.NMVcount.comps[i].class0[j]);
- printf("\n"); fflush(stdout);
- }
- printf("bits count:\n");
- for (i = 0; i < 2; ++i) {
- for (j = 0; j < MV_OFFSET_BITS; ++j)
- printf("%d/%d ", cm->fc.NMVcount.comps[i].bits[j][0],
- cm->fc.NMVcount.comps[i].bits[j][1]);
- printf("\n"); fflush(stdout);
- }
- printf("class0_fp count:\n");
- for (i = 0; i < 2; ++i) {
- for (j = 0; j < CLASS0_SIZE; ++j) {
- printf("{");
- for (k = 0; k < 4; ++k)
- printf("%d ", cm->fc.NMVcount.comps[i].class0_fp[j][k]);
- printf("}, ");
- }
- printf("\n"); fflush(stdout);
- }
- printf("fp count:\n");
- for (i = 0; i < 2; ++i) {
- for (j = 0; j < 4; ++j)
- printf("%d ", cm->fc.NMVcount.comps[i].fp[j]);
- printf("\n"); fflush(stdout);
- }
- if (usehp) {
- printf("class0_hp count:\n");
- for (i = 0; i < 2; ++i)
- printf("%d/%d ", cm->fc.NMVcount.comps[i].class0_hp[0],
- cm->fc.NMVcount.comps[i].class0_hp[1]);
- printf("\n"); fflush(stdout);
- printf("hp count:\n");
- for (i = 0; i < 2; ++i)
- printf("%d/%d ", cm->fc.NMVcount.comps[i].hp[0],
- cm->fc.NMVcount.comps[i].hp[1]);
- printf("\n"); fflush(stdout);
- }
-#endif
-#ifdef SMOOTH_MV_COUNTS
- smooth_counts(&cm->fc.NMVcount.comps[0]);
- smooth_counts(&cm->fc.NMVcount.comps[1]);
-#endif
- vp9_counts_process(&cm->fc.NMVcount, usehp);
+ adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].class0_fp[j],
+ pre_ctx->comps[i].class0_fp[j], cts->comps[i].class0_fp[j]);
- adapt_probs(0, vp9_mv_joint_tree,
- cm->fc.nmvc.joints, cm->fc.pre_nmvc.joints,
- cm->fc.NMVcount.joints);
+ adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].fp, pre_ctx->comps[i].fp,
+ cts->comps[i].fp);
+ }
- for (i = 0; i < 2; ++i) {
- adapt_prob(&cm->fc.nmvc.comps[i].sign,
- cm->fc.pre_nmvc.comps[i].sign,
- cm->fc.NMVcount.comps[i].sign);
- adapt_probs(0, vp9_mv_class_tree,
- cm->fc.nmvc.comps[i].classes, cm->fc.pre_nmvc.comps[i].classes,
- cm->fc.NMVcount.comps[i].classes);
- adapt_probs(0, vp9_mv_class0_tree,
- cm->fc.nmvc.comps[i].class0, cm->fc.pre_nmvc.comps[i].class0,
- cm->fc.NMVcount.comps[i].class0);
- for (j = 0; j < MV_OFFSET_BITS; ++j) {
- adapt_prob(&cm->fc.nmvc.comps[i].bits[j],
- cm->fc.pre_nmvc.comps[i].bits[j],
- cm->fc.NMVcount.comps[i].bits[j]);
- }
- }
- for (i = 0; i < 2; ++i) {
- for (j = 0; j < CLASS0_SIZE; ++j) {
- adapt_probs(0, vp9_mv_fp_tree,
- cm->fc.nmvc.comps[i].class0_fp[j],
- cm->fc.pre_nmvc.comps[i].class0_fp[j],
- cm->fc.NMVcount.comps[i].class0_fp[j]);
- }
- adapt_probs(0, vp9_mv_fp_tree,
- cm->fc.nmvc.comps[i].fp,
- cm->fc.pre_nmvc.comps[i].fp,
- cm->fc.NMVcount.comps[i].fp);
- }
if (usehp) {
for (i = 0; i < 2; ++i) {
- adapt_prob(&cm->fc.nmvc.comps[i].class0_hp,
- cm->fc.pre_nmvc.comps[i].class0_hp,
- cm->fc.NMVcount.comps[i].class0_hp);
- adapt_prob(&cm->fc.nmvc.comps[i].hp,
- cm->fc.pre_nmvc.comps[i].hp,
- cm->fc.NMVcount.comps[i].hp);
+ adapt_prob(&ctx->comps[i].class0_hp, pre_ctx->comps[i].class0_hp,
+ cts->comps[i].class0_hp);
+ adapt_prob(&ctx->comps[i].hp, pre_ctx->comps[i].hp, cts->comps[i].hp);
}
}
}
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index 895df30..d7d6576 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -125,8 +125,7 @@
nmv_component_counts comps[2];
} nmv_context_counts;
-void vp9_inc_mv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
- int usehp);
+void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx);
void vp9_counts_process(nmv_context_counts *NMVcount, int usehp);
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index abde63d..3103be2 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -124,9 +124,7 @@
// Rows
for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = input[j];
- vp9_idct4_1d(temp_in, outptr);
+ vp9_idct4_1d(input, outptr);
input += 4;
outptr += 4;
}
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index 5c8e346..3d33dbd 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -367,10 +367,9 @@
return pred_context;
}
// Returns a context number for the given MB prediction signal
-unsigned char vp9_get_pred_context_tx_size(const VP9_COMMON *cm,
- const MACROBLOCKD *xd) {
+unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd) {
const MODE_INFO *const mi = xd->mode_info_context;
- const MODE_INFO *const above_mi = mi - cm->mode_info_stride;
+ const MODE_INFO *const above_mi = mi - xd->mode_info_stride;
const MODE_INFO *const left_mi = mi - 1;
const int left_in_image = xd->left_available && left_mi->mbmi.mb_in_image;
const int above_in_image = xd->up_available && above_mi->mbmi.mb_in_image;
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 7fc9a1c..cb4c1d3 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -110,19 +110,18 @@
return cm->fc.single_ref_prob[pred_context][1];
}
-unsigned char vp9_get_pred_context_tx_size(const VP9_COMMON *cm,
- const MACROBLOCKD *xd);
+unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd);
-static INLINE const vp9_prob *vp9_get_pred_probs_tx_size(const VP9_COMMON *cm,
- const MACROBLOCKD * xd) {
+static const vp9_prob *vp9_get_pred_probs_tx_size(const MACROBLOCKD *xd,
+ const struct tx_probs *tx_probs) {
const MODE_INFO *const mi = xd->mode_info_context;
- const int pred_context = vp9_get_pred_context_tx_size(cm, xd);
+ const int pred_context = vp9_get_pred_context_tx_size(xd);
if (mi->mbmi.sb_type < BLOCK_SIZE_MB16X16)
- return cm->fc.tx_probs.p8x8[pred_context];
+ return tx_probs->p8x8[pred_context];
else if (mi->mbmi.sb_type < BLOCK_SIZE_SB32X32)
- return cm->fc.tx_probs.p16x16[pred_context];
+ return tx_probs->p16x16[pred_context];
else
- return cm->fc.tx_probs.p32x32[pred_context];
+ return tx_probs->p32x32[pred_context];
}
#endif // VP9_COMMON_VP9_PRED_COMMON_H_
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 812b015..c36efbd 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -271,7 +271,7 @@
specialize vp9_convolve_avg sse2
prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8 ssse3
+specialize vp9_convolve8 ssse3 neon
prototype void vp9_convolve8_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8_horiz ssse3 neon
@@ -280,7 +280,7 @@
specialize vp9_convolve8_vert ssse3 neon
prototype void vp9_convolve8_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg ssse3
+specialize vp9_convolve8_avg ssse3 neon
prototype void vp9_convolve8_avg_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8_avg_horiz ssse3 neon
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 5fb572e..88ede1a 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -26,16 +26,6 @@
#include "vp9/decoder/vp9_dsubexp.h"
#include "vp9/decoder/vp9_treereader.h"
-// #define DEBUG_DEC_MV
-#ifdef DEBUG_DEC_MV
-int dec_mvcount = 0;
-#endif
-
-// #define DEC_DEBUG
-#ifdef DEC_DEBUG
-extern int dec_debug;
-#endif
-
static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
return (MB_PREDICTION_MODE)treed_read(r, vp9_intra_mode_tree, p);
}
@@ -50,8 +40,8 @@
static TX_SIZE read_selected_txfm_size(VP9_COMMON *cm, MACROBLOCKD *xd,
BLOCK_SIZE_TYPE bsize, vp9_reader *r) {
- const int context = vp9_get_pred_context_tx_size(cm, xd);
- const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(cm, xd);
+ const int context = vp9_get_pred_context_tx_size(xd);
+ const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(xd, &cm->fc.tx_probs);
TX_SIZE txfm_size = vp9_read(r, tx_probs[0]);
if (txfm_size != TX_4X4 && bsize >= BLOCK_SIZE_MB16X16) {
txfm_size += vp9_read(r, tx_probs[1]);
@@ -255,7 +245,7 @@
if (mv_joint_horizontal(j))
diff.col = read_mv_component(r, &ctx->comps[1], usehp);
- vp9_inc_mv(&diff, ref, counts, usehp);
+ vp9_inc_mv(&diff, counts);
mv->row = ref->row + diff.row;
mv->col = ref->col + diff.col;
@@ -486,11 +476,6 @@
ref0 = mbmi->ref_frame[0];
ref1 = mbmi->ref_frame[1];
-#ifdef DEC_DEBUG
- if (dec_debug)
- printf("%d %d\n", xd->mode_info_context->mbmi.mv[0].as_mv.row,
- xd->mode_info_context->mbmi.mv[0].as_mv.col);
-#endif
vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context,
ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias);
@@ -510,13 +495,6 @@
best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int;
}
-#ifdef DEC_DEBUG
- if (dec_debug)
- printf("[D %d %d] %d %d %d %d\n", ref_frame,
- mbmi->mb_mode_context[ref_frame],
- mv_ref_p[0], mv_ref_p[1], mv_ref_p[2], mv_ref_p[3]);
-#endif
-
mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE
? read_switchable_filter_type(pbi, r)
: cm->mcomp_filter_type;
@@ -645,6 +623,31 @@
}
}
+static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
+ int i;
+
+ cm->comp_pred_mode = cm->allow_comp_inter_inter ? read_comp_pred_mode(r)
+ : SINGLE_PREDICTION_ONLY;
+
+ if (cm->comp_pred_mode == HYBRID_PREDICTION)
+ for (i = 0; i < COMP_INTER_CONTEXTS; i++)
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
+
+ if (cm->comp_pred_mode != COMP_PREDICTION_ONLY)
+ for (i = 0; i < REF_CONTEXTS; i++) {
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
+ }
+
+ if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
+ for (i = 0; i < REF_CONTEXTS; i++)
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
+}
+
void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
int k;
@@ -669,31 +672,8 @@
if (vp9_read(r, VP9_MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &cm->fc.intra_inter_prob[i]);
- if (cm->allow_comp_inter_inter) {
- cm->comp_pred_mode = read_comp_pred_mode(r);
- if (cm->comp_pred_mode == HYBRID_PREDICTION)
- for (i = 0; i < COMP_INTER_CONTEXTS; i++)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
- } else {
- cm->comp_pred_mode = SINGLE_PREDICTION_ONLY;
- }
+ read_comp_pred(cm, r);
- if (cm->comp_pred_mode != COMP_PREDICTION_ONLY)
- for (i = 0; i < REF_CONTEXTS; i++) {
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
-
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
- }
-
- if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
- for (i = 0; i < REF_CONTEXTS; i++)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
-
- // VP9_INTRA_MODES
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
for (i = 0; i < VP9_INTRA_MODES - 1; ++i)
if (vp9_read(r, VP9_MODE_UPDATE_PROB))
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 928fb70..bd2928e 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -202,9 +202,9 @@
static void write_selected_txfm_size(const VP9_COMP *cpi, TX_SIZE tx_size,
BLOCK_SIZE_TYPE bsize, vp9_writer *w) {
- const VP9_COMMON *const c = &cpi->common;
+ const VP9_COMMON *const cm = &cpi->common;
const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
- const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(c, xd);
+ const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(xd, &cm->fc.tx_probs);
vp9_write(w, tx_size != TX_4X4, tx_probs[0]);
if (bsize >= BLOCK_SIZE_MB16X16 && tx_size != TX_4X4) {
vp9_write(w, tx_size != TX_8X8, tx_probs[1]);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 56390ab..e773c38 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2589,7 +2589,7 @@
!(mbmi->ref_frame[0] != INTRA_FRAME &&
(mbmi->mb_skip_coeff ||
vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP)))) {
- const int context = vp9_get_pred_context_tx_size(cm, xd);
+ const int context = vp9_get_pred_context_tx_size(xd);
if (bsize >= BLOCK_SIZE_SB32X32) {
cm->fc.tx_counts.p32x32[context][mbmi->txfm_size]++;
} else if (bsize >= BLOCK_SIZE_MB16X16) {
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index 8adad9d..f309c1c 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -510,44 +510,41 @@
void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
int_mv *best_ref_mv, int_mv *second_best_ref_mv) {
- MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
- MV mv;
- int bwl = b_width_log2(mbmi->sb_type), bw = 1 << bwl;
- int bhl = b_height_log2(mbmi->sb_type), bh = 1 << bhl;
+ MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
+ MV diff;
+ const int bw = 1 << b_width_log2(mbmi->sb_type);
+ const int bh = 1 << b_height_log2(mbmi->sb_type);
int idx, idy;
if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
- int i;
PARTITION_INFO *pi = x->partition_info;
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
- i = idy * 2 + idx;
+ const int i = idy * 2 + idx;
if (pi->bmi[i].mode == NEWMV) {
- mv.row = (pi->bmi[i].mv.as_mv.row - best_ref_mv->as_mv.row);
- mv.col = (pi->bmi[i].mv.as_mv.col - best_ref_mv->as_mv.col);
- vp9_inc_mv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount,
- x->e_mbd.allow_high_precision_mv);
+ diff.row = pi->bmi[i].mv.as_mv.row - best_ref_mv->as_mv.row;
+ diff.col = pi->bmi[i].mv.as_mv.col - best_ref_mv->as_mv.col;
+ vp9_inc_mv(&diff, &cpi->NMVcount);
+
if (x->e_mbd.mode_info_context->mbmi.ref_frame[1] > INTRA_FRAME) {
- mv.row = pi->bmi[i].second_mv.as_mv.row -
+ diff.row = pi->bmi[i].second_mv.as_mv.row -
second_best_ref_mv->as_mv.row;
- mv.col = pi->bmi[i].second_mv.as_mv.col -
+ diff.col = pi->bmi[i].second_mv.as_mv.col -
second_best_ref_mv->as_mv.col;
- vp9_inc_mv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount,
- x->e_mbd.allow_high_precision_mv);
+ vp9_inc_mv(&diff, &cpi->NMVcount);
}
}
}
}
} else if (mbmi->mode == NEWMV) {
- mv.row = mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row;
- mv.col = mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col;
- vp9_inc_mv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount,
- x->e_mbd.allow_high_precision_mv);
+ diff.row = mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row;
+ diff.col = mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col;
+ vp9_inc_mv(&diff, &cpi->NMVcount);
+
if (mbmi->ref_frame[1] > INTRA_FRAME) {
- mv.row = mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row;
- mv.col = mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col;
- vp9_inc_mv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount,
- x->e_mbd.allow_high_precision_mv);
+ diff.row = mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row;
+ diff.col = mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col;
+ vp9_inc_mv(&diff, &cpi->NMVcount);
}
}
}
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 9042111..2afcd27 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -796,6 +796,8 @@
sf->skip_encode_sb = 1;
sf->use_uv_intra_rd_estimate = 1;
sf->using_small_partition_info = 1;
+ sf->disable_splitmv =
+ (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
}
if (speed == 3) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -811,6 +813,7 @@
FLAG_SKIP_COMP_REFMISMATCH;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
+ sf->disable_splitmv = 1;
}
if (speed == 4) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -829,6 +832,8 @@
sf->optimize_coefficients = 0;
// sf->reduce_first_step_size = 1;
// sf->reference_masking = 1;
+
+ sf->disable_splitmv = 1;
}
/*
if (speed == 2) {
@@ -872,8 +877,6 @@
cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4;
- vp9_init_quantizer(cpi);
-
if (cpi->sf.iterative_sub_pixel == 1) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_step_iteratively;
} else if (cpi->sf.quarter_pixel_search) {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 7f5f0de..cb7a586 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -53,12 +53,12 @@
#define SPLITMV 0x10000
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
- {ZEROMV, LAST_FRAME, NONE},
- {DC_PRED, INTRA_FRAME, NONE},
-
{NEARESTMV, LAST_FRAME, NONE},
{NEARMV, LAST_FRAME, NONE},
+ {ZEROMV, LAST_FRAME, NONE},
+ {DC_PRED, INTRA_FRAME, NONE},
+
{ZEROMV, GOLDEN_FRAME, NONE},
{NEARESTMV, GOLDEN_FRAME, NONE},
@@ -869,7 +869,7 @@
int n, m;
int s0, s1;
- const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(cm, xd);
+ const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(xd, &cm->fc.tx_probs);
for (n = TX_4X4; n <= max_txfm_size; n++) {
r[n][1] = r[n][0];
@@ -976,7 +976,7 @@
double scale_rd[TX_SIZE_MAX_SB] = {1.73, 1.44, 1.20, 1.00};
// double scale_r[TX_SIZE_MAX_SB] = {2.82, 2.00, 1.41, 1.00};
- const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(cm, xd);
+ const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(xd, &cm->fc.tx_probs);
// for (n = TX_4X4; n <= max_txfm_size; n++)
// r[n][0] = (r[n][0] * scale_r[n]);
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 02eb7f6..196846e 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -85,6 +85,7 @@
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)