rtc: Fixes to dynamic resize for rtc
Several rate control fixes for dynamic resize
(resize on delta frame) for RTC encoding:
1) set cm->prev_frame before av1_set_size_dependent_vars(),
to set the correct prev->width/height. This is needed
since av1_set_size_dependent_vars() sets the Q, and
adjustment to Q is made (in adjust_q_cbr) based on
prev->width/height.
2) pass the width/height into adjust_q_cbr(), since
this is called in resize_reset_rc (via av1_rc_regulate_q)
and new width/height (resized one) should be passed in.
3) adjustments in resize_reset_rc(): decrease the
rate_correction_factor on resize back up, pass the
p_rc->avg_frame_qindex[INTER_FRAME] to the layer context,
and move the reset for temporal layers to end of the function
(since rate_correction_factors get modified).
These fixes significantly reduce overshoot when resolution
goes back up.
Added example in svc_encoder_rtc to test dynamic
resize (using AOME_SET_SCALEMODE) for 2x2/4x4 down
and back up.
Change-Id: Ic2f54ce0a9a510bbe32894930c660bf8338b6bfa
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 9a4b423..57feafb 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2429,6 +2429,7 @@
set_size_independent_vars(cpi);
av1_setup_frame_size(cpi);
+ cm->prev_frame = get_primary_ref_frame_buf(cm);
av1_set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
av1_set_mv_search_params(cpi);
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index 3a6ae92..8e1008e 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -458,7 +458,8 @@
}
}
-static int adjust_q_cbr(const AV1_COMP *cpi, int q, int active_worst_quality) {
+static int adjust_q_cbr(const AV1_COMP *cpi, int q, int active_worst_quality,
+ int width, int height) {
const RATE_CONTROL *const rc = &cpi->rc;
const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const AV1_COMMON *const cm = &cpi->common;
@@ -474,8 +475,8 @@
// then set this flag to indicate change in target bits per macroblock.
const int change_target_bits_mb =
cm->prev_frame &&
- (cm->width != cm->prev_frame->width ||
- cm->height != cm->prev_frame->height || change_avg_frame_bandwidth);
+ (width != cm->prev_frame->width || height != cm->prev_frame->height ||
+ change_avg_frame_bandwidth);
// Apply some control/clamp to QP under certain conditions.
if (cm->current_frame.frame_type != KEY_FRAME && !cpi->ppi->use_svc &&
rc->frames_since_key > 1 && !change_target_bits_mb &&
@@ -528,8 +529,7 @@
// For single spatial layer: if resolution has increased push q closer
// to the active_worst to avoid excess overshoot.
if (cpi->svc.number_spatial_layers <= 1 && cm->prev_frame &&
- (cm->width * cm->height >
- 1.5 * cm->prev_frame->width * cm->prev_frame->height))
+ (width * height > 1.5 * cm->prev_frame->width * cm->prev_frame->height))
q = (q + active_worst_quality) >> 1;
return AOMMAX(AOMMIN(q, cpi->rc.worst_quality), cpi->rc.best_quality);
}
@@ -872,7 +872,7 @@
find_closest_qindex_by_rate(target_bits_per_mb, cpi, correction_factor,
active_best_quality, active_worst_quality);
if (cpi->oxcf.rc_cfg.mode == AOM_CBR && has_no_stats_stage(cpi))
- return adjust_q_cbr(cpi, q, active_worst_quality);
+ return adjust_q_cbr(cpi, q, active_worst_quality, width, height);
return q;
}
@@ -2948,12 +2948,11 @@
RATE_CONTROL *const rc = &cpi->rc;
PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
SVC *const svc = &cpi->svc;
- double tot_scale_change = 1.0;
int target_bits_per_frame;
int active_worst_quality;
int qindex;
- tot_scale_change = (double)(resize_width * resize_height) /
- (double)(prev_width * prev_height);
+ double tot_scale_change = (double)(resize_width * resize_height) /
+ (double)(prev_width * prev_height);
// Reset buffer level to optimal, update target size.
p_rc->buffer_level = p_rc->optimal_buffer_level;
p_rc->bits_off_target = p_rc->optimal_buffer_level;
@@ -2971,20 +2970,8 @@
// If resize is down, check if projected q index is close to worst_quality,
// and if so, reduce the rate correction factor (since likely can afford
// lower q for resized frame).
- if (tot_scale_change < 1.0 && qindex > 90 * cpi->rc.worst_quality / 100)
+ if (tot_scale_change < 1.0 && qindex > 90 * rc->worst_quality / 100)
p_rc->rate_correction_factors[INTER_NORMAL] *= 0.85;
- // Apply the same rate control reset to all temporal layers.
- for (int tl = 0; tl < svc->number_temporal_layers; tl++) {
- LAYER_CONTEXT *lc = NULL;
- lc = &svc->layer_context[svc->spatial_layer_id *
- svc->number_temporal_layers +
- tl];
- lc->rc.resize_state = rc->resize_state;
- lc->p_rc.buffer_level = lc->p_rc.optimal_buffer_level;
- lc->p_rc.bits_off_target = lc->p_rc.optimal_buffer_level;
- lc->p_rc.rate_correction_factors[INTER_FRAME] =
- p_rc->rate_correction_factors[INTER_FRAME];
- }
// If resize is back up: check if projected q index is too much above the
// previous index, and if so, reduce the rate correction factor
// (since prefer to keep q for resized frame at least closet to previous q).
@@ -2995,7 +2982,21 @@
qindex > 130 * p_rc->last_q[INTER_FRAME] / 100)
p_rc->rate_correction_factors[INTER_NORMAL] *= 0.8;
if (qindex <= 120 * p_rc->last_q[INTER_FRAME] / 100)
- p_rc->rate_correction_factors[INTER_NORMAL] *= 2.0;
+ p_rc->rate_correction_factors[INTER_NORMAL] *= 1.5;
+ }
+ // Apply the same rate control reset to all temporal layers.
+ for (int tl = 0; tl < svc->number_temporal_layers; tl++) {
+ LAYER_CONTEXT *lc = NULL;
+ lc = &svc->layer_context[svc->spatial_layer_id *
+ svc->number_temporal_layers +
+ tl];
+ lc->rc.resize_state = rc->resize_state;
+ lc->p_rc.buffer_level = lc->p_rc.optimal_buffer_level;
+ lc->p_rc.bits_off_target = lc->p_rc.optimal_buffer_level;
+ lc->p_rc.rate_correction_factors[INTER_NORMAL] =
+ p_rc->rate_correction_factors[INTER_NORMAL];
+ lc->p_rc.avg_frame_qindex[INTER_FRAME] =
+ p_rc->avg_frame_qindex[INTER_FRAME];
}
}
diff --git a/examples/svc_encoder_rtc.c b/examples/svc_encoder_rtc.c
index 96b41f6..bceb7d2 100644
--- a/examples/svc_encoder_rtc.c
+++ b/examples/svc_encoder_rtc.c
@@ -1478,15 +1478,44 @@
if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
if (test_dynamic_scaling_single_layer) {
- if (frame_cnt >= 200 && frame_cnt <= 400) {
+ // Example to scale source down by 2x2, then 4x4, and then back up to
+ // 2x2, and then back to original.
+ int frame_2x2 = 200;
+ int frame_4x4 = 400;
+ int frame_2x2up = 600;
+ int frame_orig = 800;
+ if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
// Scale source down by 2x2.
struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
- } else {
+ } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
+ // Scale source down by 4x4.
+ struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
+ aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
+ } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
+ // Source back up to 2x2.
+ struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
+ aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
+ } else if (frame_cnt >= frame_orig) {
// Source back up to original resolution (no scaling).
struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
}
+ if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
+ frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
+ // For dynamic resize testing on single layer: refresh all references
+ // on the resized frame: this is to avoid decode error:
+ // if resize goes down by >= 4x4 then libaom decoder will throw an
+ // error that some reference (even though not used) is beyond the
+ // limit size (must be smaller than 4x4).
+ for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
+ if (use_svc_control) {
+ aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
+ &ref_frame_config);
+ aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
+ &ref_frame_comp_pred);
+ }
+ }
}
// Do the layer encode.