Merge "Removing experimental code from vp9_entropymv.c."
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 3320a16..3b72129 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -551,7 +551,7 @@
const ConvolveFunctions convolve8_neon(
vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
- vp9_convolve8_c, vp9_convolve8_avg_c);
+ vp9_convolve8_neon, vp9_convolve8_avg_neon);
INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
make_tuple(4, 4, &convolve8_neon),
diff --git a/vp9/common/arm/neon/vp9_convolve_neon.c b/vp9/common/arm/neon/vp9_convolve_neon.c
new file mode 100644
index 0000000..6e37ff6
--- /dev/null
+++ b/vp9/common/arm/neon/vp9_convolve_neon.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_common.h"
+
+void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
+ * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
+ */
+ uint8_t temp[64 * 72];
+
+ // Account for the vertical phase needing 3 lines prior and 4 lines post
+ int intermediate_height = h + 7;
+
+ if (x_step_q4 != 16 || y_step_q4 != 16)
+ return vp9_convolve8_c(src, src_stride,
+ dst, dst_stride,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+
+ /* Filter starting 3 lines back. The neon implementation will ignore the
+ * given height and filter a multiple of 4 lines. Since this goes in to
+ * the temp buffer which has lots of extra room and is subsequently discarded
+ * this is safe if somewhat less than ideal.
+ */
+ vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
+ temp, 64,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, intermediate_height);
+
+ /* Step into the temp buffer 3 lines to get the actual frame data */
+ vp9_convolve8_vert_neon(temp + 64 * 3, 64,
+ dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+}
+
+void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ uint8_t temp[64 * 72];
+ int intermediate_height = h + 7;
+
+ if (x_step_q4 != 16 || y_step_q4 != 16)
+ return vp9_convolve8_avg_c(src, src_stride,
+ dst, dst_stride,
+ filter_x, x_step_q4,
+ filter_y, y_step_q4,
+ w, h);
+
+ /* This implementation has the same issues as above. In addition, we only want
+ * to average the values after both passes.
+ */
+ vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
+ temp, 64,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, intermediate_height);
+ vp9_convolve8_avg_vert_neon(temp + 64 * 3,
+ 64, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+}
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index f56586a..0795975 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -389,9 +389,7 @@
static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
- const TX_SIZE size = mbmi->txfm_size;
- const TX_SIZE max_size = max_uv_txsize_lookup[mbmi->sb_type];
- return (size > max_size ? max_size : size);
+ return MIN(mbmi->txfm_size, max_uv_txsize_lookup[mbmi->sb_type]);
}
struct plane_block_idx {
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 812b015..c36efbd 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -271,7 +271,7 @@
specialize vp9_convolve_avg sse2
prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8 ssse3
+specialize vp9_convolve8 ssse3 neon
prototype void vp9_convolve8_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8_horiz ssse3 neon
@@ -280,7 +280,7 @@
specialize vp9_convolve8_vert ssse3 neon
prototype void vp9_convolve8_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg ssse3
+specialize vp9_convolve8_avg ssse3 neon
prototype void vp9_convolve8_avg_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
specialize vp9_convolve8_avg_horiz ssse3 neon
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 5fb572e..a193c9f 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -26,16 +26,6 @@
#include "vp9/decoder/vp9_dsubexp.h"
#include "vp9/decoder/vp9_treereader.h"
-// #define DEBUG_DEC_MV
-#ifdef DEBUG_DEC_MV
-int dec_mvcount = 0;
-#endif
-
-// #define DEC_DEBUG
-#ifdef DEC_DEBUG
-extern int dec_debug;
-#endif
-
static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
return (MB_PREDICTION_MODE)treed_read(r, vp9_intra_mode_tree, p);
}
@@ -486,11 +476,6 @@
ref0 = mbmi->ref_frame[0];
ref1 = mbmi->ref_frame[1];
-#ifdef DEC_DEBUG
- if (dec_debug)
- printf("%d %d\n", xd->mode_info_context->mbmi.mv[0].as_mv.row,
- xd->mode_info_context->mbmi.mv[0].as_mv.col);
-#endif
vp9_find_mv_refs(cm, xd, mi, xd->prev_mode_info_context,
ref0, mbmi->ref_mvs[ref0], cm->ref_frame_sign_bias);
@@ -510,13 +495,6 @@
best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int;
}
-#ifdef DEC_DEBUG
- if (dec_debug)
- printf("[D %d %d] %d %d %d %d\n", ref_frame,
- mbmi->mb_mode_context[ref_frame],
- mv_ref_p[0], mv_ref_p[1], mv_ref_p[2], mv_ref_p[3]);
-#endif
-
mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE
? read_switchable_filter_type(pbi, r)
: cm->mcomp_filter_type;
@@ -645,6 +623,31 @@
}
}
+static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
+ int i;
+
+ cm->comp_pred_mode = cm->allow_comp_inter_inter ? read_comp_pred_mode(r)
+ : SINGLE_PREDICTION_ONLY;
+
+ if (cm->comp_pred_mode == HYBRID_PREDICTION)
+ for (i = 0; i < COMP_INTER_CONTEXTS; i++)
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
+
+ if (cm->comp_pred_mode != COMP_PREDICTION_ONLY)
+ for (i = 0; i < REF_CONTEXTS; i++) {
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
+ }
+
+ if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
+ for (i = 0; i < REF_CONTEXTS; i++)
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
+}
+
void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
int k;
@@ -669,31 +672,8 @@
if (vp9_read(r, VP9_MODE_UPDATE_PROB))
vp9_diff_update_prob(r, &cm->fc.intra_inter_prob[i]);
- if (cm->allow_comp_inter_inter) {
- cm->comp_pred_mode = read_comp_pred_mode(r);
- if (cm->comp_pred_mode == HYBRID_PREDICTION)
- for (i = 0; i < COMP_INTER_CONTEXTS; i++)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
- } else {
- cm->comp_pred_mode = SINGLE_PREDICTION_ONLY;
- }
+ read_comp_pred(cm, r);
- if (cm->comp_pred_mode != COMP_PREDICTION_ONLY)
- for (i = 0; i < REF_CONTEXTS; i++) {
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
-
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
- }
-
- if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
- for (i = 0; i < REF_CONTEXTS; i++)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
-
- // VP9_INTRA_MODES
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
for (i = 0; i < VP9_INTRA_MODES - 1; ++i)
if (vp9_read(r, VP9_MODE_UPDATE_PROB))
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index ae9f0aa..b4c06f5 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -143,6 +143,11 @@
int rd_search;
int skip_encode;
+ // Used to store sub partition's choices.
+ int fast_ms;
+ int_mv pred_mv;
+ int subblock_ref;
+
// TODO(jingning): Need to refactor the structure arrays that buffers the
// coding mode decisions of each partition type.
PICK_MODE_CONTEXT ab4x4_context[4][4][4];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 48c1b33..3dd235a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1466,6 +1466,138 @@
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
}
+
+ x->fast_ms = 0;
+ x->pred_mv.as_int = 0;
+ x->subblock_ref = 0;
+
+ // Use 4 subblocks' motion estimation results to speed up current
+ // partition's checking.
+ if (cpi->sf.using_small_partition_info) {
+ // Only use 8x8 result for non HD videos.
+ // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0;
+ int use_8x8 = 1;
+
+ if (cm->frame_type && !cpi->is_src_frame_alt_ref &&
+ ((use_8x8 && bsize == BLOCK_SIZE_MB16X16) ||
+ bsize == BLOCK_SIZE_SB32X32 || bsize == BLOCK_SIZE_SB64X64)) {
+ int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0;
+
+ if (bsize == BLOCK_SIZE_MB16X16) {
+ ref0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.
+ ref_frame[0];
+ ref1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.
+ ref_frame[0];
+ ref2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.
+ ref_frame[0];
+ ref3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.
+ ref_frame[0];
+ } else if (bsize == BLOCK_SIZE_SB32X32) {
+ ref0 = x->mb_context[xd->sb_index][0].mic.mbmi.ref_frame[0];
+ ref1 = x->mb_context[xd->sb_index][1].mic.mbmi.ref_frame[0];
+ ref2 = x->mb_context[xd->sb_index][2].mic.mbmi.ref_frame[0];
+ ref3 = x->mb_context[xd->sb_index][3].mic.mbmi.ref_frame[0];
+ } else if (bsize == BLOCK_SIZE_SB64X64) {
+ ref0 = x->sb32_context[0].mic.mbmi.ref_frame[0];
+ ref1 = x->sb32_context[1].mic.mbmi.ref_frame[0];
+ ref2 = x->sb32_context[2].mic.mbmi.ref_frame[0];
+ ref3 = x->sb32_context[3].mic.mbmi.ref_frame[0];
+ }
+
+ // Currently, only consider 4 inter ref frames.
+ if (ref0 && ref1 && ref2 && ref3) {
+ int16_t mvr0 = 0, mvc0 = 0, mvr1 = 0, mvc1 = 0, mvr2 = 0, mvc2 = 0,
+ mvr3 = 0, mvc3 = 0;
+ int d01, d23, d02, d13; // motion vector distance between 2 blocks
+
+ // Get each subblock's motion vectors.
+ if (bsize == BLOCK_SIZE_MB16X16) {
+ mvr0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0].
+ as_mv.row;
+ mvc0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0].
+ as_mv.col;
+ mvr1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0].
+ as_mv.row;
+ mvc1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0].
+ as_mv.col;
+ mvr2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0].
+ as_mv.row;
+ mvc2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0].
+ as_mv.col;
+ mvr3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0].
+ as_mv.row;
+ mvc3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0].
+ as_mv.col;
+ } else if (bsize == BLOCK_SIZE_SB32X32) {
+ mvr0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.row;
+ mvc0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.col;
+ mvr1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.row;
+ mvc1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.col;
+ mvr2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.row;
+ mvc2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.col;
+ mvr3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.row;
+ mvc3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.col;
+ } else if (bsize == BLOCK_SIZE_SB64X64) {
+ mvr0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.row;
+ mvc0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.col;
+ mvr1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.row;
+ mvc1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.col;
+ mvr2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.row;
+ mvc2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.col;
+ mvr3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.row;
+ mvc3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.col;
+ }
+
+ // Adjust sign if ref is alt_ref
+ if (cm->ref_frame_sign_bias[ref0]) {
+ mvr0 *= -1;
+ mvc0 *= -1;
+ }
+
+ if (cm->ref_frame_sign_bias[ref1]) {
+ mvr1 *= -1;
+ mvc1 *= -1;
+ }
+
+ if (cm->ref_frame_sign_bias[ref2]) {
+ mvr2 *= -1;
+ mvc2 *= -1;
+ }
+
+ if (cm->ref_frame_sign_bias[ref3]) {
+ mvr3 *= -1;
+ mvc3 *= -1;
+ }
+
+ // Calculate mv distances.
+ d01 = MAX(abs(mvr0 - mvr1), abs(mvc0 - mvc1));
+ d23 = MAX(abs(mvr2 - mvr3), abs(mvc2 - mvc3));
+ d02 = MAX(abs(mvr0 - mvr2), abs(mvc0 - mvc2));
+ d13 = MAX(abs(mvr1 - mvr3), abs(mvc1 - mvc3));
+
+ if (d01 < 24 && d23 < 24 && d02 < 24 && d13 < 24) {
+ // Set fast motion search level.
+ x->fast_ms = 1;
+
+ // Calculate prediction MV
+ x->pred_mv.as_mv.row = (mvr0 + mvr1 + mvr2 + mvr3) >> 2;
+ x->pred_mv.as_mv.col = (mvc0 + mvc1 + mvc2 + mvc3) >> 2;
+
+ if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 &&
+ d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) {
+ // Set fast motion search level.
+ x->fast_ms = 2;
+
+ if (!d01 && !d23 && !d02 && !d13) {
+ x->fast_ms = 3;
+ x->subblock_ref = ref0;
+ }
+ }
+ }
+ }
+ }
+ }
+
if (!cpi->sf.use_partitions_less_than
|| (cpi->sf.use_partitions_less_than
&& bsize <= cpi->sf.less_than_block_size)) {
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index efee6e6..500bdfe 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -722,7 +722,8 @@
sf->last_chroma_intra_mode = TM_PRED;
sf->use_rd_breakout = 0;
sf->skip_encode_sb = 0;
-
+ sf->use_uv_intra_rd_estimate = 0;
+ sf->using_small_partition_info = 0;
// Skip any mode not chosen at size < X for all sizes > X
// Hence BLOCK_SIZE_SB64X64 (skip is off)
sf->unused_mode_skip_lvl = BLOCK_SIZE_SB64X64;
@@ -793,6 +794,10 @@
sf->last_chroma_intra_mode = DC_PRED;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
+ sf->use_uv_intra_rd_estimate = 1;
+ sf->using_small_partition_info = 1;
+ sf->disable_splitmv =
+ (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
}
if (speed == 3) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -808,6 +813,7 @@
FLAG_SKIP_COMP_REFMISMATCH;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
+ sf->disable_splitmv = 1;
}
if (speed == 4) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -826,6 +832,8 @@
sf->optimize_coefficients = 0;
// sf->reduce_first_step_size = 1;
// sf->reference_masking = 1;
+
+ sf->disable_splitmv = 1;
}
/*
if (speed == 2) {
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 7bc757e..19b1e3a 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -268,6 +268,7 @@
int adjust_partitioning_from_last_frame;
int last_partitioning_redo_frequency;
int disable_splitmv;
+ int using_small_partition_info;
// Implements various heuristics to skip searching modes
// The heuristics selected are based on flags
@@ -275,6 +276,7 @@
unsigned int mode_search_skip_flags;
MB_PREDICTION_MODE last_chroma_intra_mode;
int use_rd_breakout;
+ int use_uv_intra_rd_estimate;
} SPEED_FEATURES;
typedef struct VP9_COMP {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 58eeed2..6df1701 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1524,6 +1524,24 @@
return best_rd;
}
+static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE_TYPE bsize) {
+ int64_t this_rd;
+
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+ super_block_uvrd(&cpi->common, x, rate_tokenonly,
+ distortion, skippable, NULL, bsize);
+ *rate = *rate_tokenonly +
+ x->intra_uv_mode_cost[x->e_mbd.frame_type][DC_PRED];
+ this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
+
+ x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+
+ return this_rd;
+}
+
static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
int mode_context) {
MACROBLOCK *const x = &cpi->mb;
@@ -1641,7 +1659,7 @@
int64_t best_yrd,
int i,
int *labelyrate,
- int64_t *distortion,
+ int64_t *distortion, int64_t *sse,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) {
int k;
@@ -1666,7 +1684,7 @@
uint8_t* const dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
xd->plane[0].dst.buf,
xd->plane[0].dst.stride);
- int64_t thisdistortion = 0;
+ int64_t thisdistortion = 0, thissse = 0;
int thisrate = 0;
vp9_build_inter_predictor(pre,
@@ -1710,6 +1728,7 @@
thisdistortion += vp9_block_error(coeff,
BLOCK_OFFSET(xd->plane[0].dqcoeff,
k, 16), 16, &ssz);
+ thissse += ssz;
thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
ta + (k & 1),
tl + (k >> 1), TX_4X4, 16);
@@ -1717,6 +1736,7 @@
}
*distortion += thisdistortion;
*labelyrate += thisrate;
+ *sse = thissse >> 2;
*distortion >>= 2;
return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
@@ -1729,6 +1749,7 @@
int64_t segment_rd;
int r;
int64_t d;
+ int64_t sse;
int segment_yrate;
MB_PREDICTION_MODE modes[4];
int_mv mvs[4], second_mvs[4];
@@ -1777,7 +1798,7 @@
int_mv seg_mvs[4][MAX_REF_FRAMES],
int mi_row, int mi_col) {
int i, j, br = 0, rate = 0, sbr = 0, idx, idy;
- int64_t bd = 0, sbd = 0;
+ int64_t bd = 0, sbd = 0, subblock_sse = 0, block_sse = 0;
MB_PREDICTION_MODE this_mode;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
const int label_count = 4;
@@ -1830,7 +1851,7 @@
// search for the best motion vector on this segment
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int64_t this_rd;
- int64_t distortion;
+ int64_t distortion, sse;
int labelyrate;
ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
const struct buf_2d orig_src = x->plane[0].src;
@@ -1957,14 +1978,15 @@
this_rd = encode_inter_mb_segment(cpi, x,
bsi->segment_rd - this_segment_rd,
- i, &labelyrate,
- &distortion, t_above_s, t_left_s);
+ i, &labelyrate, &distortion, &sse,
+ t_above_s, t_left_s);
this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
rate += labelyrate;
if (this_rd < best_label_rd) {
sbr = rate;
sbd = distortion;
+ subblock_sse = sse;
bestlabelyrate = labelyrate;
mode_selected = this_mode;
best_label_rd = this_rd;
@@ -1984,6 +2006,7 @@
br += sbr;
bd += sbd;
+ block_sse += subblock_sse;
segmentyrate += bestlabelyrate;
this_segment_rd += best_label_rd;
@@ -2007,6 +2030,7 @@
bsi->d = bd;
bsi->segment_yrate = segmentyrate;
bsi->segment_rd = this_segment_rd;
+ bsi->sse = block_sse;
// store everything needed to come back to this!!
for (i = 0; i < 4; i++) {
@@ -2025,7 +2049,8 @@
int *returntotrate,
int *returnyrate,
int64_t *returndistortion,
- int *skippable, int mvthresh,
+ int *skippable, int64_t *psse,
+ int mvthresh,
int_mv seg_mvs[4][MAX_REF_FRAMES],
int mi_row, int mi_col) {
int i;
@@ -2074,6 +2099,7 @@
*returndistortion = bsi.d;
*returnyrate = bsi.segment_yrate;
*skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
+ *psse = bsi.sse;
mbmi->mode = bsi.modes[3];
return bsi.segment_rd;
@@ -2316,6 +2342,7 @@
int mi_row, int mi_col,
int_mv *tmp_mv, int *rate_mv) {
MACROBLOCKD *xd = &x->e_mbd;
+ VP9_COMMON *cm = &cpi->common;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
int bestsme = INT_MAX;
@@ -2346,18 +2373,37 @@
vp9_clamp_mv_min_max(x, &ref_mv);
- // Work out the size of the first step in the mv step search.
- // 0 here is maximum length first step. 1 is MAX >> 1 etc.
- if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
- step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude);
- } else {
- step_param = vp9_init_search_range(
- cpi, MIN(cpi->common.width, cpi->common.height));
- }
+ // Adjust search parameters based on small partitions' result.
+ if (x->fast_ms) {
+ // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
+ // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
+ // adjust search range
+ step_param = 6;
+ if (x->fast_ms > 1)
+ step_param = 8;
- // mvp_full.as_int = ref_mv[0].as_int;
- mvp_full.as_int =
- mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
+ // Get prediction MV.
+ mvp_full.as_int = x->pred_mv.as_int;
+
+ // Adjust MV sign if needed.
+ if (cm->ref_frame_sign_bias[ref]) {
+ mvp_full.as_mv.col *= -1;
+ mvp_full.as_mv.row *= -1;
+ }
+ } else {
+ // Work out the size of the first step in the mv step search.
+ // 0 here is maximum length first step. 1 is MAX >> 1 etc.
+ if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
+ step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude);
+ } else {
+ step_param = vp9_init_search_range(
+ cpi, MIN(cpi->common.width, cpi->common.height));
+ }
+
+ // mvp_full.as_int = ref_mv[0].as_int;
+ mvp_full.as_int =
+ mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
+ }
mvp_full.as_mv.col >>= 3;
mvp_full.as_mv.row >>= 3;
@@ -3093,20 +3139,41 @@
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
}
- if (!cpi->sf.use_avoid_tested_higherror
+
+ // If intra is not masked off then get uv intra mode rd.
+ if (x->fast_ms < 2 && (!cpi->sf.use_avoid_tested_higherror
|| (cpi->sf.use_avoid_tested_higherror
- && (ref_frame_mask & (1 << INTRA_FRAME)))) {
+ && (ref_frame_mask & (1 << INTRA_FRAME))))) {
+ // Note that the enumerator TXFM_MODE "matches" TX_SIZE.
+ // Eg. ONLY_4X4 = TX_4X4, ALLOW_8X8 = TX_8X8 etc such that the MIN
+ // operation below correctly constrains max_uvtxfm_size.
+ TX_SIZE max_uvtxfm_size =
+ MIN(max_uv_txsize_lookup[bsize], (TX_SIZE)cm->txfm_mode);
+ TX_SIZE min_uvtxfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL)
+ ? max_uvtxfm_size : TX_4X4;
+
mbmi->mode = DC_PRED;
mbmi->ref_frame[0] = INTRA_FRAME;
- for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 :
- (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 :
- (bsize < BLOCK_SIZE_SB64X64 ? TX_16X16 : TX_32X32)));
- i++) {
+
+ // Test all possible UV transform sizes that may be used in the main loop
+ for (i = min_uvtxfm_size; i <= max_uvtxfm_size; ++i) {
mbmi->txfm_size = i;
- rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i], &rate_uv_tokenonly[i],
- &dist_uv[i], &skip_uv[i],
- (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
- bsize);
+
+ // Use an estimated rd for uv_intra based on DC_PRED if the
+ // appropriate speed flag is set.
+ if (cpi->sf.use_uv_intra_rd_estimate) {
+ rd_sbuv_dcpred(cpi, x, &rate_uv_intra[i],
+ &rate_uv_tokenonly[i], &dist_uv[i], &skip_uv[i],
+ (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
+ bsize);
+ // Else do a proper rd search for each possible transform size that may
+ // be considered in the main rd loop.
+ } else {
+ rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i],
+ &rate_uv_tokenonly[i], &dist_uv[i], &skip_uv[i],
+ (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8
+ : bsize);
+ }
mode_uv[i] = mbmi->uv_mode;
}
}
@@ -3155,6 +3222,12 @@
x->skip = 0;
+ // Skip some checking based on small partitions' result.
+ if (x->fast_ms > 1 && !ref_frame)
+ continue;
+ if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
+ continue;
+
if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_SIZE_SB8X8) {
if (!(ref_frame_mask & (1 << ref_frame))) {
continue;
@@ -3330,13 +3403,7 @@
if (rate_y == INT_MAX)
continue;
- uv_tx = mbmi->txfm_size;
- if (bsize < BLOCK_SIZE_MB16X16 && uv_tx == TX_8X8)
- uv_tx = TX_4X4;
- if (bsize < BLOCK_SIZE_SB32X32 && uv_tx == TX_16X16)
- uv_tx = TX_8X8;
- else if (bsize < BLOCK_SIZE_SB64X64 && uv_tx == TX_32X32)
- uv_tx = TX_16X16;
+ uv_tx = MIN(mbmi->txfm_size, max_uv_txsize_lookup[bsize]);
rate_uv = rate_uv_intra[uv_tx];
distortion_uv = dist_uv[uv_tx];
@@ -3354,7 +3421,7 @@
int64_t this_rd_thresh;
int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
- int64_t tmp_best_distortion = INT_MAX;
+ int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
int tmp_best_skippable = 0;
int switchable_filter_index;
int_mv *second_ref = is_comp_pred ?
@@ -3397,7 +3464,7 @@
second_ref,
best_yrd,
&rate, &rate_y, &distortion,
- &skippable,
+ &skippable, &total_sse,
(int)this_rd_thresh, seg_mvs,
mi_row, mi_col);
if (tmp_rd == INT64_MAX) {
@@ -3423,6 +3490,7 @@
tmp_best_rate = rate;
tmp_best_ratey = rate_y;
tmp_best_distortion = distortion;
+ tmp_best_sse = total_sse;
tmp_best_skippable = skippable;
tmp_best_mbmode = *mbmi;
tmp_best_partition = *x->partition_info;
@@ -3456,7 +3524,7 @@
second_ref,
best_yrd,
&rate, &rate_y, &distortion,
- &skippable,
+ &skippable, &total_sse,
(int)this_rd_thresh, seg_mvs,
mi_row, mi_col);
if (tmp_rd == INT64_MAX)
@@ -3467,6 +3535,7 @@
tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
}
tmp_rd = tmp_best_rdu;
+ total_sse = tmp_best_sse;
rate = tmp_best_rate;
rate_y = tmp_best_ratey;
distortion = tmp_best_distortion;
@@ -3499,11 +3568,12 @@
BLOCK_SIZE_SB8X8);
vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
- &uv_skippable, NULL,
+ &uv_skippable, &uv_sse,
BLOCK_SIZE_SB8X8, TX_4X4);
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
+ total_sse += uv_sse;
txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
for (i = 0; i < NB_TXFM_MODES; ++i)
@@ -3565,7 +3635,7 @@
}
}
} else if (mb_skip_allowed && ref_frame != INTRA_FRAME &&
- this_mode != SPLITMV && !xd->lossless) {
+ !xd->lossless) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
// Add in the cost of the no skip flag.
@@ -3764,6 +3834,20 @@
break;
}
+ // If we used an estimate for the uv intra rd in the loop above...
+ if (cpi->sf.use_uv_intra_rd_estimate) {
+ // Do Intra UV best rd mode selection if best mode choice above was intra.
+ if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
+ TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
+ rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[uv_tx_size],
+ &rate_uv_tokenonly[uv_tx_size],
+ &dist_uv[uv_tx_size],
+ &skip_uv[uv_tx_size],
+ (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8
+ : bsize);
+ }
+ }
+
// If indicated then mark the index of the chosen mode to be inspected at
// other block sizes.
if (bsize <= cpi->sf.unused_mode_skip_lvl) {
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 02eb7f6..196846e 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -85,6 +85,7 @@
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index 60df0af..cc8da2a 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -103,9 +103,9 @@
const int c_h = (ybf->y_crop_height + subsampling_y) >> subsampling_y;
const int c_et = ext_size >> subsampling_y;
const int c_el = ext_size >> subsampling_x;
- const int c_eb = (ybf->border + ybf->y_height - ybf->y_crop_height +
+ const int c_eb = (ext_size + ybf->y_height - ybf->y_crop_height +
subsampling_y) >> subsampling_y;
- const int c_er = (ybf->border + ybf->y_width - ybf->y_crop_width +
+ const int c_er = (ext_size + ybf->y_width - ybf->y_crop_width +
subsampling_x) >> subsampling_x;
assert(ybf->y_height - ybf->y_crop_height < 16);