Merge "Remove unused 16x3/3x16 sad SSE2 functions."
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 16244e0..385dcc1 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -468,8 +468,8 @@
}
#if CONFIG_ALPHA
// TODO(jkoleszar): Using the Y w/h for now
- mb->plane[3].subsampling_x = 0;
- mb->plane[3].subsampling_y = 0;
+ xd->plane[3].subsampling_x = 0;
+ xd->plane[3].subsampling_y = 0;
#endif
}
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 31cf69a..37bdad2 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -55,32 +55,30 @@
return data > max ? max : data;
}
-static void setup_txfm_mode(VP9_COMMON *pc, int lossless, vp9_reader *r) {
- if (lossless) {
- pc->txfm_mode = ONLY_4X4;
- } else {
- pc->txfm_mode = vp9_read_literal(r, 2);
- if (pc->txfm_mode == ALLOW_32X32)
- pc->txfm_mode += vp9_read_bit(r);
+static TXFM_MODE read_tx_mode(vp9_reader *r) {
+ TXFM_MODE txfm_mode = vp9_read_literal(r, 2);
+ if (txfm_mode == ALLOW_32X32)
+ txfm_mode += vp9_read_bit(r);
+ return txfm_mode;
+}
- if (pc->txfm_mode == TX_MODE_SELECT) {
- int i, j;
- for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
- for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &pc->fc.tx_probs_8x8p[i][j]);
+static void read_tx_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
+ int i, j;
- for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
- for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &pc->fc.tx_probs_16x16p[i][j]);
+ for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
+ for (j = 0; j < TX_SIZE_MAX_SB - 3; ++j)
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &fc->tx_probs_8x8p[i][j]);
- for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
- for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j)
- if (vp9_read(r, VP9_MODE_UPDATE_PROB))
- vp9_diff_update_prob(r, &pc->fc.tx_probs_32x32p[i][j]);
- }
- }
+ for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
+ for (j = 0; j < TX_SIZE_MAX_SB - 2; ++j)
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &fc->tx_probs_16x16p[i][j]);
+
+ for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
+ for (j = 0; j < TX_SIZE_MAX_SB - 1; ++j)
+ if (vp9_read(r, VP9_MODE_UPDATE_PROB))
+ vp9_diff_update_prob(r, &fc->tx_probs_32x32p[i][j]);
}
static void mb_init_dequantizer(VP9_COMMON *pc, MACROBLOCKD *xd) {
@@ -370,33 +368,21 @@
static void read_coef_probs_common(FRAME_CONTEXT *fc, TX_SIZE tx_size,
vp9_reader *r) {
vp9_coeff_probs_model *coef_probs = fc->coef_probs[tx_size];
+ int i, j, k, l, m;
- if (vp9_read_bit(r)) {
- int i, j, k, l, m;
- for (i = 0; i < BLOCK_TYPES; i++) {
- for (j = 0; j < REF_TYPES; j++) {
- for (k = 0; k < COEF_BANDS; k++) {
- for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
- if (l >= 3 && k == 0)
- continue;
-
- for (m = 0; m < UNCONSTRAINED_NODES; m++) {
- vp9_prob *const p = coef_probs[i][j][k][l] + m;
-
- if (vp9_read(r, VP9_COEF_UPDATE_PROB))
- vp9_diff_update_prob(r, p);
- }
- }
- }
- }
- }
- }
+ if (vp9_read_bit(r))
+ for (i = 0; i < BLOCK_TYPES; i++)
+ for (j = 0; j < REF_TYPES; j++)
+ for (k = 0; k < COEF_BANDS; k++)
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++)
+ if (k > 0 || l < 3)
+ for (m = 0; m < UNCONSTRAINED_NODES; m++)
+ if (vp9_read(r, VP9_COEF_UPDATE_PROB))
+ vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]);
}
-static void read_coef_probs(VP9D_COMP *pbi, vp9_reader *r) {
- const TXFM_MODE txfm_mode = pbi->common.txfm_mode;
- FRAME_CONTEXT *const fc = &pbi->common.fc;
-
+static void read_coef_probs(FRAME_CONTEXT *fc, TXFM_MODE txfm_mode,
+ vp9_reader *r) {
read_coef_probs_common(fc, TX_4X4, r);
if (txfm_mode > ONLY_4X4)
@@ -923,6 +909,26 @@
return vp9_rb_read_literal(rb, 16);
}
+static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
+ size_t partition_size) {
+ VP9_COMMON *const cm = &pbi->common;
+ MACROBLOCKD *const xd = &pbi->mb;
+ vp9_reader r;
+
+ if (vp9_reader_init(&r, data, partition_size))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate bool decoder 0");
+
+ cm->txfm_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r);
+ if (cm->txfm_mode == TX_MODE_SELECT)
+ read_tx_probs(&cm->fc, &r);
+ read_coef_probs(&cm->fc, cm->txfm_mode, &r);
+
+ vp9_prepare_read_mode_info(pbi, &r);
+
+ return vp9_reader_has_error(&r);
+}
+
void vp9_init_dequantizer(VP9_COMMON *pc) {
int q;
@@ -939,7 +945,7 @@
int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
int i;
- vp9_reader header_bc, residual_bc;
+ vp9_reader residual_bc;
VP9_COMMON *const pc = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
@@ -973,10 +979,6 @@
xd->frame_type = pc->frame_type;
xd->mode_info_stride = pc->mode_info_stride;
- if (vp9_reader_init(&header_bc, data, first_partition_size))
- vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate bool decoder 0");
-
mb_init_dequantizer(pc, &pbi->mb); // MB level dequantizer setup
if (!keyframe)
@@ -986,15 +988,13 @@
update_frame_context(&pc->fc);
- setup_txfm_mode(pc, xd->lossless, &header_bc);
-
- read_coef_probs(pbi, &header_bc);
-
// Initialize xd pointers. Any reference should do for xd->pre, so use 0.
setup_pre_planes(xd, 0, &pc->yv12_fb[pc->active_ref_idx[0]], 0, 0,
NULL, NULL);
setup_dst_planes(xd, new_fb, 0, 0);
+ new_fb->corrupted |= read_compressed_header(pbi, data, first_partition_size);
+
// Create the segmentation map structure and set to 0
if (!pc->last_frame_seg_map)
CHECK_MEM_ERROR(pc, pc->last_frame_seg_map,
@@ -1008,14 +1008,12 @@
set_prev_mi(pc);
- vp9_prepare_read_mode_info(pbi, &header_bc);
-
decode_tiles(pbi, data, first_partition_size, &residual_bc);
pc->last_width = pc->width;
pc->last_height = pc->height;
- new_fb->corrupted = vp9_reader_has_error(&header_bc) | xd->corrupted;
+ new_fb->corrupted |= xd->corrupted;
if (!pbi->decoded_key_frame) {
if (keyframe && !new_fb->corrupted)
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 9c04d8a..adaf667 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -389,7 +389,7 @@
THR_TM /*TM_PRED*/,
THR_B_PRED /*I4X4_PRED*/,
};
- cpi->mode_chosen_counts[kf_mode_index[mb_mode]]++;
+ cpi->mode_chosen_counts[kf_mode_index[mi->mbmi.mode]]++;
#endif
} else {
// Note how often each mode chosen as best
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index cc3c5c0..67d0c4c 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -720,6 +720,7 @@
sf->disable_splitmv = 0;
sf->mode_search_skip_flags = 0;
sf->last_chroma_intra_mode = TM_PRED;
+ sf->use_rd_breakout = 0;
// Skip any mode not chosen at size < X for all sizes > X
// Hence BLOCK_SIZE_SB64X64 (skip is off)
@@ -767,6 +768,7 @@
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA;
sf->last_chroma_intra_mode = H_PRED;
+ sf->use_rd_breakout = 1;
}
if (speed == 2) {
sf->adjust_thresholds_by_speed = 1;
@@ -790,6 +792,7 @@
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_COMP_REFMISMATCH;
sf->last_chroma_intra_mode = DC_PRED;
+ sf->use_rd_breakout = 1;
}
if (speed == 3) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -804,6 +807,7 @@
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_COMP_REFMISMATCH;
+ sf->use_rd_breakout = 1;
}
if (speed == 4) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -818,6 +822,7 @@
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_COMP_REFMISMATCH;
+ sf->use_rd_breakout = 1;
}
/*
if (speed == 2) {
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 0ecbf35..08f6bb6 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -275,6 +275,7 @@
// defined in the MODE_SEARCH_SKIP_HEURISTICS enum
unsigned int mode_search_skip_flags;
MB_PREDICTION_MODE last_chroma_intra_mode;
+ int use_rd_breakout;
} SPEED_FEATURES;
enum BlockSize {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 6116f3f..4cb38f7 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -473,6 +473,31 @@
*out_dist_sum = dist_sum << 4;
}
+static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
+ MACROBLOCK *x, MACROBLOCKD *xd,
+ int *out_rate_sum, int64_t *out_dist_sum) {
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
+
+ // TODO(dkovalev) the same code in get_plane_block_size
+ const int bw = plane_block_width(bsize, pd);
+ const int bh = plane_block_height(bsize, pd);
+ const enum BlockSize bs = get_block_size(bw, bh);
+ unsigned int sse;
+ int rate;
+ int64_t dist;
+ (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, &sse);
+ // sse works better than var, since there is no dc prediction used
+ model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
+
+ *out_rate_sum = rate;
+ *out_dist_sum = dist << 4;
+}
+
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
TX_SIZE tx_size,
MACROBLOCK *x, MACROBLOCKD *xd,
@@ -1643,14 +1668,16 @@
return cost;
}
-static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
+static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
MACROBLOCK *x,
+ int64_t best_yrd,
int i,
int *labelyrate,
int64_t *distortion,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) {
int k;
+ VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
const int bw = plane_block_width(bsize, &xd->plane[0]);
@@ -1672,9 +1699,6 @@
int64_t thisdistortion = 0;
int thisrate = 0;
- *labelyrate = 0;
- *distortion = 0;
-
vp9_build_inter_predictor(pre,
xd->plane[0].pre[0].stride,
dst,
@@ -1684,9 +1708,6 @@
bw, bh, 0 /* no avg */, &xd->subpix,
MV_PRECISION_Q3);
- // TODO(debargha): Make this work properly with the
- // implicit-compoundinter-weight experiment when implicit
- // weighting for splitmv modes is turned on.
if (xd->mode_info_context->mbmi.ref_frame[1] > 0) {
uint8_t* const second_pre =
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
@@ -1699,10 +1720,28 @@
&xd->subpix, MV_PRECISION_Q3);
}
+ // Turning this section off for now since it hurts quality and does not
+ // improve speed much
+ /*
+ if (cpi->sf.use_rd_breakout &&
+ best_yrd < INT64_MAX) {
+ int64_t thisrd;
+ model_rd_for_sb_y(cpi, bsize, x, xd, &thisrate, &thisdistortion);
+ thisrd = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion);
+ if (thisrd / 2 > best_yrd) {
+ *distortion = thisdistortion;
+ *labelyrate = thisrate;
+ return thisrd;
+ }
+ }
+ */
+
vp9_subtract_block(bh, bw, src_diff, 8,
src, src_stride,
dst, xd->plane[0].dst.stride);
+ *labelyrate = 0;
+ *distortion = 0;
k = i;
for (idy = 0; idy < bh / 4; ++idy) {
for (idx = 0; idx < bw / 4; ++idx) {
@@ -1788,7 +1827,7 @@
MB_PREDICTION_MODE this_mode;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
const int label_count = 4;
- int64_t this_segment_rd = 0, other_segment_rd;
+ int64_t this_segment_rd = 0;
int label_mv_thresh;
int segmentyrate = 0;
int best_eobs[4] = { 0 };
@@ -1811,15 +1850,13 @@
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
- other_segment_rd = this_segment_rd;
-
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
// TODO(jingning,rbultje): rewrite the rate-distortion optimization
// loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
- int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
+ int64_t best_label_rd = INT64_MAX;
MB_PREDICTION_MODE mode_selected = ZEROMV;
int bestlabelyrate = 0;
i = idy * 2 + idx;
@@ -1960,8 +1997,9 @@
mv_check_bounds(x, &second_mode_mv[this_mode]))
continue;
- this_rd = encode_inter_mb_segment(&cpi->common,
- x, i, &labelyrate,
+ this_rd = encode_inter_mb_segment(cpi, x,
+ bsi->segment_rd - this_segment_rd,
+ i, &labelyrate,
&distortion, t_above_s, t_left_s);
this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
rate += labelyrate;
@@ -1990,7 +2028,11 @@
bd += sbd;
segmentyrate += bestlabelyrate;
this_segment_rd += best_label_rd;
- other_segment_rd += best_other_rd;
+
+ if (this_segment_rd > bsi->segment_rd) {
+ bsi->segment_rd = INT64_MAX;
+ return;
+ }
for (j = 1; j < bh; ++j)
vpx_memcpy(&x->partition_info->bmi[i + j * 2],
@@ -2003,33 +2045,31 @@
}
} /* for each label */
- if (this_segment_rd < bsi->segment_rd) {
- bsi->r = br;
- bsi->d = bd;
- bsi->segment_yrate = segmentyrate;
- bsi->segment_rd = this_segment_rd;
+ bsi->r = br;
+ bsi->d = bd;
+ bsi->segment_yrate = segmentyrate;
+ bsi->segment_rd = this_segment_rd;
- // store everything needed to come back to this!!
- for (i = 0; i < 4; i++) {
- bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
- if (mbmi->ref_frame[1] > 0)
- bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
- bsi->modes[i] = x->partition_info->bmi[i].mode;
- bsi->eobs[i] = best_eobs[i];
- }
+ // store everything needed to come back to this!!
+ for (i = 0; i < 4; i++) {
+ bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
+ if (mbmi->ref_frame[1] > 0)
+ bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
+ bsi->modes[i] = x->partition_info->bmi[i].mode;
+ bsi->eobs[i] = best_eobs[i];
}
}
-static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
- int_mv *best_ref_mv,
- int_mv *second_best_ref_mv,
- int64_t best_rd,
- int *returntotrate,
- int *returnyrate,
- int64_t *returndistortion,
- int *skippable, int mvthresh,
- int_mv seg_mvs[4][MAX_REF_FRAMES],
- int mi_row, int mi_col) {
+static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
+ int_mv *best_ref_mv,
+ int_mv *second_best_ref_mv,
+ int64_t best_rd,
+ int *returntotrate,
+ int *returnyrate,
+ int64_t *returndistortion,
+ int *skippable, int mvthresh,
+ int_mv seg_mvs[4][MAX_REF_FRAMES],
+ int mi_row, int mi_col) {
int i;
BEST_SEG_INFO bsi;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
@@ -2078,7 +2118,7 @@
*skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
mbmi->mode = bsi.modes[3];
- return (int)(bsi.segment_rd);
+ return bsi.segment_rd;
}
static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
@@ -2585,6 +2625,7 @@
int best_needs_copy = 0;
uint8_t *orig_dst[MAX_MB_PLANE];
int orig_dst_stride[MAX_MB_PLANE];
+ int rs = 0;
switch (this_mode) {
int rate_mv;
@@ -2658,6 +2699,14 @@
*rate2 += cost_mv_ref(cpi, this_mode,
mbmi->mb_mode_context[mbmi->ref_frame[0]]);
+ if (!(*mode_excluded)) {
+ if (is_comp_pred) {
+ *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
+ } else {
+ *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
+ }
+ }
+
pred_exists = 0;
interpolating_intpel_seen = 0;
// Are all MVs integer pel for Y and UV
@@ -2668,6 +2717,7 @@
(mbmi->mv[1].as_mv.col & 15) == 0;
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
+ *best_filter = EIGHTTAP;
if (cpi->sf.use_8tap_always) {
*best_filter = EIGHTTAP;
vp9_zero(cpi->rd_filter_cache);
@@ -2678,7 +2728,7 @@
cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
- int rs, j;
+ int j;
int64_t rs_rd;
const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
const int is_intpel_interp = intpel_mv &&
@@ -2730,6 +2780,15 @@
tmp_dist_sum = dist_sum;
}
}
+ if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
+ if (rd / 2 > ref_best_rd) {
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = orig_dst[i];
+ xd->plane[i].dst.stride = orig_dst_stride[i];
+ }
+ return INT64_MAX;
+ }
+ }
newbest = i == 0 || rd < best_rd;
if (newbest) {
@@ -2753,11 +2812,11 @@
xd->plane[i].dst.stride = orig_dst_stride[i];
}
}
-
// Set the appripriate filter
mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
cm->mcomp_filter_type : *best_filter;
vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+ rs = (cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(cm, x) : 0);
if (pred_exists) {
if (best_needs_copy) {
@@ -2773,6 +2832,23 @@
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
}
+
+ if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
+ int tmp_rate;
+ int64_t tmp_dist;
+ model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
+ rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
+ // if current pred_error modeled rd is substantially more than the best
+ // so far, do not bother doing full rd
+ if (rd / 2 > ref_best_rd) {
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = orig_dst[i];
+ xd->plane[i].dst.stride = orig_dst_stride[i];
+ }
+ return INT64_MAX;
+ }
+ }
+
if (cpi->common.mcomp_filter_type == SWITCHABLE)
*rate2 += get_switchable_rate(cm, x);
@@ -2817,7 +2893,7 @@
*distortion = sse + sse2;
*rate2 = 500;
- // for best_yrd calculation
+ // for best yrd calculation
*rate_uv = 0;
*distortion_uv = sse2;
@@ -2858,14 +2934,6 @@
*skippable = skippable_y && skippable_uv;
}
- if (!(*mode_excluded)) {
- if (is_comp_pred) {
- *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
- } else {
- *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
- }
- }
-
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].dst.buf = orig_dst[i];
xd->plane[i].dst.stride = orig_dst_stride[i];
@@ -2962,6 +3030,7 @@
cpi->gld_fb_idx,
cpi->alt_fb_idx};
int64_t best_rd = INT64_MAX;
+ int64_t best_yrd = INT64_MAX;
int64_t best_txfm_rd[NB_TXFM_MODES];
int64_t best_txfm_diff[NB_TXFM_MODES];
int64_t best_pred_diff[NB_PREDICTION_TYPES];
@@ -3357,16 +3426,20 @@
int newbest, rs;
int64_t rs_rd;
mbmi->interp_filter =
- vp9_switchable_interp[switchable_filter_index];
+ vp9_switchable_interp[switchable_filter_index];
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
&mbmi->ref_mvs[mbmi->ref_frame[0]][0],
- second_ref, INT64_MAX,
+ second_ref,
+ best_yrd,
&rate, &rate_y, &distortion,
&skippable,
(int)this_rd_thresh, seg_mvs,
mi_row, mi_col);
+ if (tmp_rd == INT64_MAX) {
+ continue;
+ }
cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
rs = get_switchable_rate(cm, x);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
@@ -3374,6 +3447,7 @@
MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd);
if (cm->mcomp_filter_type == SWITCHABLE)
tmp_rd += rs_rd;
+
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
tmp_best_filter = mbmi->interp_filter;
@@ -3392,8 +3466,21 @@
for (i = 0; i < 4; i++)
tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
pred_exists = 1;
+ if (switchable_filter_index == 0 &&
+ cpi->sf.use_rd_breakout &&
+ best_rd < INT64_MAX) {
+ if (tmp_best_rdu / 2 > best_rd) {
+ // skip searching the other filters if the first is
+ // already substantially larger than the best so far
+ tmp_best_filter = mbmi->interp_filter;
+ tmp_best_rdu = INT64_MAX;
+ break;
+ }
+ }
}
} // switchable_filter_index loop
+ if (tmp_best_rdu == INT64_MAX)
+ continue;
mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
tmp_best_filter : cm->mcomp_filter_type);
@@ -3403,11 +3490,14 @@
// switchable list (bilinear, 6-tap) is indicated at the frame level
tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
&mbmi->ref_mvs[mbmi->ref_frame[0]][0],
- second_ref, INT64_MAX,
+ second_ref,
+ best_yrd,
&rate, &rate_y, &distortion,
&skippable,
(int)this_rd_thresh, seg_mvs,
mi_row, mi_col);
+ if (tmp_rd == INT64_MAX)
+ continue;
} else {
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
int rs = get_switchable_rate(cm, x);
@@ -3430,29 +3520,32 @@
if (cpi->common.mcomp_filter_type == SWITCHABLE)
rate2 += get_switchable_rate(cm, x);
- // If even the 'Y' rd value of split is higher than best so far
- // then dont bother looking at UV
- vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
- BLOCK_SIZE_SB8X8);
- vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
- super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
- &uv_skippable, NULL, BLOCK_SIZE_SB8X8, TX_4X4);
- rate2 += rate_uv;
- distortion2 += distortion_uv;
- skippable = skippable && uv_skippable;
-
- txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- for (i = 0; i < NB_TXFM_MODES; ++i)
- txfm_cache[i] = txfm_cache[ONLY_4X4];
-
if (!mode_excluded) {
if (is_comp_pred)
mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
else
mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
}
-
compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred);
+
+ if (RDCOST(x->rdmult, x->rddiv, rate2, distortion2) <
+ best_rd) {
+ // If even the 'Y' rd value of split is higher than best so far
+ // then dont bother looking at UV
+ vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
+ BLOCK_SIZE_SB8X8);
+ vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
+ super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
+ &uv_skippable, NULL,
+ BLOCK_SIZE_SB8X8, TX_4X4);
+ rate2 += rate_uv;
+ distortion2 += distortion_uv;
+ skippable = skippable && uv_skippable;
+
+ txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ for (i = 0; i < NB_TXFM_MODES; ++i)
+ txfm_cache[i] = txfm_cache[ONLY_4X4];
+ }
} else {
compmode_cost = vp9_cost_bit(comp_mode_p,
mbmi->ref_frame[1] > INTRA_FRAME);
@@ -3494,7 +3587,7 @@
if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
- // for best_yrd calculation
+ // for best yrd calculation
rate_uv = 0;
if (mb_skip_allowed) {
@@ -3592,6 +3685,8 @@
*returnrate = rate2;
*returndistortion = distortion2;
best_rd = this_rd;
+ best_yrd = best_rd -
+ RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
best_mbmode = *mbmi;
best_skip2 = this_skip2;
best_partition = *x->partition_info;