Merge "Some rate control adjustments to control overshoot"
diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c
index dea29c0..d5cd46c 100644
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -677,7 +677,7 @@
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
- vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+ vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0)) {
die_codec(&codec, "Failed to set SVC");
@@ -712,6 +712,8 @@
layer_id.temporal_layer_id);
}
flags = layer_flags[frame_cnt % flag_periodicity];
+ if (layering_mode == 0)
+ flags = 0;
frame_avail = vpx_img_read(&raw, infile);
if (frame_avail)
++rc.layer_input_frames[layer_id.temporal_layer_id];
diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c
index 7094a01..3cd9f44 100644
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c
@@ -50,39 +50,25 @@
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
const int step = 1 << (tx_size << 1);
- int i;
+ int i = 0, r, c;
// If mb_to_right_edge is < 0 we are in a situation in which
// the current block size extends into the UMV and we won't
// visit the sub blocks that are wholly within the UMV.
- if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
- int r, c;
+ const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
+ xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
+ xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
- int max_blocks_wide = num_4x4_w;
- int max_blocks_high = num_4x4_h;
-
- // xd->mb_to_right_edge is in units of pixels * 8. This converts
- // it to 4x4 block sizes.
- if (xd->mb_to_right_edge < 0)
- max_blocks_wide += (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
-
- if (xd->mb_to_bottom_edge < 0)
- max_blocks_high += (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
-
- i = 0;
- // Unlike the normal case - in here we have to keep track of the
- // row and column of the blocks we use so that we know if we are in
- // the unrestricted motion border.
- for (r = 0; r < num_4x4_h; r += (1 << tx_size)) {
- for (c = 0; c < num_4x4_w; c += (1 << tx_size)) {
- if (r < max_blocks_high && c < max_blocks_wide)
- visit(plane, i, plane_bsize, tx_size, arg);
- i += step;
- }
+ // Keep track of the row and column of the blocks we use so that we know
+ // if we are in the unrestricted motion border.
+ for (r = 0; r < max_blocks_high; r += (1 << tx_size)) {
+ for (c = 0; c < num_4x4_w; c += (1 << tx_size)) {
+ // Skip visiting the sub blocks that are wholly within the UMV.
+ if (c < max_blocks_wide)
+ visit(plane, i, plane_bsize, tx_size, arg);
+ i += step;
}
- } else {
- for (i = 0; i < num_4x4_w * num_4x4_h; i += step)
- visit(plane, i, plane_bsize, tx_size, arg);
}
}
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index 048a0ed..4df1b58 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -65,7 +65,7 @@
void vp9_cyclic_refresh_postencode(struct VP9_COMP *const cpi);
// Set golden frame update interval, for non-svc 1 pass CBR mode.
-void vp9_cyclic_refresh_set_golden_update(struct VP9_COMP *cpi);
+void vp9_cyclic_refresh_set_golden_update(struct VP9_COMP *const cpi);
// Check if we should not update golden reference, based on past refresh stats.
void vp9_cyclic_refresh_check_golden_update(struct VP9_COMP *const cpi);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 5c841cf..cf8ac0a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -514,12 +514,6 @@
}
}
-#if CONFIG_VP9_HIGHBITDEPTH
-#define GLOBAL_MOTION 0
-#else
-#define GLOBAL_MOTION 1
-#endif
-
// This function chooses partitioning based on the variance between source and
// reconstructed last, where variance is computed for down-sampled inputs.
static void choose_partitioning(VP9_COMP *cpi,
@@ -564,7 +558,7 @@
MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
unsigned int uv_sad;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
-#if GLOBAL_MOTION
+
const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
unsigned int y_sad, y_sad_g;
BLOCK_SIZE bsize;
@@ -576,9 +570,9 @@
bsize = BLOCK_64X32;
else
bsize = BLOCK_32X32;
-#endif
+
assert(yv12 != NULL);
-#if GLOBAL_MOTION
+
if (yv12_g && yv12_g != yv12) {
vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
&cm->frame_refs[GOLDEN_FRAME - 1].sf);
@@ -589,7 +583,7 @@
} else {
y_sad_g = UINT_MAX;
}
-#endif
+
vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
&cm->frame_refs[LAST_FRAME - 1].sf);
mbmi->ref_frame[0] = LAST_FRAME;
@@ -597,7 +591,7 @@
mbmi->sb_type = BLOCK_64X64;
mbmi->mv[0].as_int = 0;
mbmi->interp_filter = BILINEAR;
-#if GLOBAL_MOTION
+
y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
if (y_sad_g < y_sad) {
vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
@@ -608,29 +602,21 @@
} else {
x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv;
}
-#endif
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
for (i = 1; i <= 2; ++i) {
struct macroblock_plane *p = &x->plane[i];
struct macroblockd_plane *pd = &xd->plane[i];
-#if GLOBAL_MOTION
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
-#else
- const BLOCK_SIZE bs = get_plane_block_size(BLOCK_64X64, pd);
-#endif
+
if (bs == BLOCK_INVALID)
- uv_sad = INT_MAX;
+ uv_sad = UINT_MAX;
else
uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride);
-#if GLOBAL_MOTION
- x->color_sensitivity[i - 1] = uv_sad * 4 > y_sad;
-#else
- x->color_sensitivity[i - 1] = (uv_sad > 512);
-#endif
+ x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
}
d = xd->plane[0].dst.buf;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 0730467..12882e4 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1807,6 +1807,13 @@
int best_sad;
MV this_mv;
+#if CONFIG_VP9_HIGHBITDEPTH
+ tmp_mv->row = 0;
+ tmp_mv->col = 0;
+ return cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
+ xd->plane[0].pre[0].buf, ref_stride);
+#endif
+
// Set up prediction 1-D reference set
ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
for (idx = 0; idx < search_width; idx += 16) {
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index dedec5f..f1b19e0 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -137,10 +137,6 @@
int cost_list[5];
const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
ref);
- if (cpi->common.show_frame &&
- (x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[LAST_FRAME])
- return rv;
-
if (scaled_ref_frame) {
int i;
// Swap out the reference frame for a version that's been scaled to
@@ -784,15 +780,43 @@
continue;
if (this_mode == NEWMV) {
- if (ref_frame > LAST_FRAME)
- continue;
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
best_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize]))
continue;
- if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame],
- &rate_mv, best_rdc.rdcost))
+
+ if (ref_frame > LAST_FRAME) {
+ int tmp_sad;
+ int dis, cost_list[5];
+
+ if (bsize < BLOCK_16X16)
+ continue;
+
+ tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
+ if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
+ continue;
+
+ frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
+ rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
+ &mbmi->ref_mvs[ref_frame][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
+ frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
+
+ cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
+ &mbmi->ref_mvs[ref_frame][0].as_mv,
+ cpi->common.allow_high_precision_mv,
+ x->errorperbit,
+ &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dis,
+ &x->pred_sse[ref_frame], NULL, 0, 0);
+ } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame],
+ &rate_mv, best_rdc.rdcost)) {
continue;
+ }
}
if (this_mode != NEARESTMV &&
@@ -817,7 +841,7 @@
}
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
- pred_filter_search &&
+ pred_filter_search && (ref_frame == LAST_FRAME) &&
((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
(mbmi->mv[0].as_mv.col & 0x07) != 0)) {
int pf_rate[3];
@@ -1052,6 +1076,7 @@
mode_idx[INTRA_FRAME][mbmi->mode];
PREDICTION_MODE this_mode;
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
+ if (best_ref_frame != ref_frame) continue;
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
THR_MODES thr_mode_idx = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
int *freq_fact = &tile_data->thresh_freq_fact[bsize][thr_mode_idx];
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index c6273f1..7783f7b 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -377,7 +377,7 @@
rcf = rc->rate_correction_factors[INTER_NORMAL];
}
rcf *= rcf_mult[rc->frame_size_selector];
- return rcf > MAX_BPB_FACTOR ? MAX_BPB_FACTOR : rcf;
+ return fclamp(rcf, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
}
static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
@@ -386,6 +386,8 @@
// Normalize RCF to account for the size-dependent scaling factor.
factor /= rcf_mult[cpi->rc.frame_size_selector];
+ factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
+
if (cpi->common.frame_type == KEY_FRAME) {
rc->rate_correction_factors[KF_STD] = factor;
} else if (cpi->oxcf.pass == 2) {
diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
index f499499..618b5f7 100644
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -61,7 +61,7 @@
const int ref_stride, const int height) {
int idx;
__m128i zero = _mm_setzero_si128();
- __m128i src_line = _mm_load_si128((const __m128i *)ref);
+ __m128i src_line = _mm_loadu_si128((const __m128i *)ref);
__m128i s0 = _mm_unpacklo_epi8(src_line, zero);
__m128i s1 = _mm_unpackhi_epi8(src_line, zero);
__m128i t0, t1;
@@ -69,14 +69,14 @@
ref += ref_stride;
for (idx = 1; idx < height_1; idx += 2) {
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
s1 = _mm_adds_epu16(s1, t1);
ref += ref_stride;
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
@@ -84,7 +84,7 @@
ref += ref_stride;
}
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
s0 = _mm_adds_epu16(s0, t0);
@@ -101,9 +101,9 @@
s1 = _mm_srai_epi16(s1, 3);
}
- _mm_store_si128((__m128i *)hbuf, s0);
+ _mm_storeu_si128((__m128i *)hbuf, s0);
hbuf += 8;
- _mm_store_si128((__m128i *)hbuf, s1);
+ _mm_storeu_si128((__m128i *)hbuf, s1);
}
int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {