rtc: Add dynamic resize mode, to 1 pass CBR
Make it work for 1 pass CBR mode.
Add a DYNAMIC_RESIZE mode and allow for 3/4 and
1/2 resize factors, where the resize logic is based
on the rate control.
Also force cyclic_refresh off on the resize trigger
frame. And disable golden and altref when the
resolution of the reference frame is different from
the source (to avoid slowness in scale_references).
Re-enabled/update two unittests for dynamic resize.
Change-Id: I10edf3ca9c2e90ba8ed8bbfac598c18aa442a5bf
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index fc1005a..95751fc 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -543,11 +543,6 @@
ERROR("Only --enable_chroma_deltaq=0 can be used with --lossless=1.");
}
- if (cfg->rc_resize_mode != RESIZE_NONE &&
- extra_cfg->aq_mode == CYCLIC_REFRESH_AQ) {
- ERROR("--aq_mode=3 is only supported for --resize-mode=0.");
- }
-
RANGE_CHECK(extra_cfg, max_reference_frames, 3, 7);
RANGE_CHECK(extra_cfg, enable_reduced_reference_set, 0, 1);
RANGE_CHECK_HI(extra_cfg, chroma_subsampling_x, 1);
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index 77813a5..a4ca9e5 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -488,4 +488,5 @@
memset(cr->map, 0, cm->mi_params.mi_rows * cm->mi_params.mi_cols);
cr->sb_index = 0;
cpi->refresh_frame.golden_frame = true;
+ cr->apply_cyclic_refresh = 0;
}
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index d157685..cc2b935 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2097,6 +2097,27 @@
av1_update_noise_estimate(cpi);
}
+ // For 1 spatial layer encoding: if the (non-LAST) reference has different
+ // resolution from the source then disable that reference. This is to avoid
+ // significant increase in encode time from scaling the references in
+ // av1_scale_references. Note GOLDEN is forced to update on the (first/tigger)
+ // resized frame and ALTREF will be refreshed ~4 frames later, so both
+ // references become available again after few frames.
+ if (svc->number_spatial_layers == 1) {
+ if (cpi->ref_frame_flags & av1_ref_frame_flag_list[GOLDEN_FRAME]) {
+ const YV12_BUFFER_CONFIG *const ref =
+ get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
+ if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height)
+ cpi->ref_frame_flags ^= AOM_GOLD_FLAG;
+ }
+ if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME]) {
+ const YV12_BUFFER_CONFIG *const ref =
+ get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
+ if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height)
+ cpi->ref_frame_flags ^= AOM_ALT_FLAG;
+ }
+ }
+
// For SVC the inter-layer/spatial prediction is not done for newmv
// (zero_mode is forced), and since the scaled references are only
// use for newmv search, we can avoid scaling here.
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 25230a9..ac189a4 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -133,9 +133,10 @@
} UENUM1BYTE(DELTAQ_MODE);
enum {
- RESIZE_NONE = 0, // No frame resizing allowed.
- RESIZE_FIXED = 1, // All frames are coded at the specified scale.
- RESIZE_RANDOM = 2, // All frames are coded at a random scale.
+ RESIZE_NONE = 0, // No frame resizing allowed.
+ RESIZE_FIXED = 1, // All frames are coded at the specified scale.
+ RESIZE_RANDOM = 2, // All frames are coded at a random scale.
+ RESIZE_DYNAMIC = 3, // Frames coded at lower scale based on rate control.
RESIZE_MODES
} UENUM1BYTE(RESIZE_MODE);
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index 07994cd..04f51ab 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -344,6 +344,11 @@
oxcf->input_cfg.init_framerate, rc->min_gf_interval);
rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
rc->avg_frame_low_motion = 0;
+
+ rc->resize_state = ORIG;
+ rc->resize_avg_qp = 0;
+ rc->resize_buffer_underflow = 0;
+ rc->resize_count = 0;
}
int av1_rc_drop_frame(AV1_COMP *cpi) {
@@ -2394,6 +2399,142 @@
return gf_update;
}
+/*!\brief ChecK for resize based on Q, for 1 pass real-time mode.
+ *
+ * Check if we should resize, based on average QP from past x frames.
+ * Only allow for resize at most 1/2 scale down for now, Scaling factor
+ * for each step may be 3/4 or 1/2.
+ *
+ * \ingroup rate_control
+ * \param[in] cpi Top level encoder structure
+ *
+ * \return Return resized width/height in \c cpi->resize_pending_params,
+ * and update some resize counters in \c rc.
+ */
+static void dynamic_resize_one_pass_cbr(AV1_COMP *cpi) {
+ const AV1_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ RESIZE_ACTION resize_action = NO_RESIZE;
+ const int avg_qp_thr1 = 70;
+ const int avg_qp_thr2 = 50;
+ // Don't allow for resized frame to go below 160x90, resize in steps of 3/4.
+ const int min_width = (160 * 4) / 3;
+ const int min_height = (90 * 4) / 3;
+ int down_size_on = 1;
+ // Don't resize on key frame; reset the counters on key frame.
+ if (cm->current_frame.frame_type == KEY_FRAME) {
+ rc->resize_avg_qp = 0;
+ rc->resize_count = 0;
+ rc->resize_buffer_underflow = 0;
+ return;
+ }
+ // No resizing down if frame size is below some limit.
+ if ((cm->width * cm->height) < min_width * min_height) down_size_on = 0;
+
+ // Resize based on average buffer underflow and QP over some window.
+ // Ignore samples close to key frame, since QP is usually high after key.
+ if (cpi->rc.frames_since_key > cpi->framerate) {
+ const int window = AOMMIN(30, (int)(2 * cpi->framerate));
+ rc->resize_avg_qp += rc->last_q[INTER_FRAME];
+ if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
+ ++rc->resize_buffer_underflow;
+ ++rc->resize_count;
+ // Check for resize action every "window" frames.
+ if (rc->resize_count >= window) {
+ int avg_qp = rc->resize_avg_qp / rc->resize_count;
+ // Resize down if buffer level has underflowed sufficient amount in past
+ // window, and we are at original or 3/4 of original resolution.
+ // Resize back up if average QP is low, and we are currently in a resized
+ // down state, i.e. 1/2 or 3/4 of original resolution.
+ // Currently, use a flag to turn 3/4 resizing feature on/off.
+ if (rc->resize_buffer_underflow > (rc->resize_count >> 2) &&
+ down_size_on) {
+ if (rc->resize_state == THREE_QUARTER) {
+ resize_action = DOWN_ONEHALF;
+ rc->resize_state = ONE_HALF;
+ } else if (rc->resize_state == ORIG) {
+ resize_action = DOWN_THREEFOUR;
+ rc->resize_state = THREE_QUARTER;
+ }
+ } else if (rc->resize_state != ORIG &&
+ avg_qp < avg_qp_thr1 * cpi->rc.worst_quality / 100) {
+ if (rc->resize_state == THREE_QUARTER ||
+ avg_qp < avg_qp_thr2 * cpi->rc.worst_quality / 100) {
+ resize_action = UP_ORIG;
+ rc->resize_state = ORIG;
+ } else if (rc->resize_state == ONE_HALF) {
+ resize_action = UP_THREEFOUR;
+ rc->resize_state = THREE_QUARTER;
+ }
+ }
+ // Reset for next window measurement.
+ rc->resize_avg_qp = 0;
+ rc->resize_count = 0;
+ rc->resize_buffer_underflow = 0;
+ }
+ }
+ // If decision is to resize, reset some quantities, and check is we should
+ // reduce rate correction factor,
+ if (resize_action != NO_RESIZE) {
+ SVC *const svc = &cpi->svc;
+ LAYER_CONTEXT *lc = NULL;
+ int resize_width = cpi->oxcf.frm_dim_cfg.width;
+ int resize_height = cpi->oxcf.frm_dim_cfg.height;
+ int resize_scale_num = 1;
+ int resize_scale_den = 1;
+ int target_bits_per_frame;
+ int active_worst_quality;
+ int qindex, tl, tot_scale_change;
+ if (resize_action == DOWN_THREEFOUR || resize_action == UP_THREEFOUR) {
+ resize_scale_num = 3;
+ resize_scale_den = 4;
+ } else if (resize_action == DOWN_ONEHALF) {
+ resize_scale_num = 1;
+ resize_scale_den = 2;
+ }
+ resize_width = resize_width * resize_scale_num / resize_scale_den;
+ resize_height = resize_height * resize_scale_num / resize_scale_den;
+ tot_scale_change = (resize_scale_den * resize_scale_den) /
+ (resize_scale_num * resize_scale_num);
+ // Reset buffer level to optimal, update target size.
+ rc->buffer_level = rc->optimal_buffer_level;
+ rc->bits_off_target = rc->optimal_buffer_level;
+ rc->this_frame_target =
+ av1_calc_pframe_target_size_one_pass_cbr(cpi, INTER_FRAME);
+ // Get the projected qindex, based on the scaled target frame size (scaled
+ // so target_bits_per_mb in av1_rc_regulate_q will be correct target).
+ target_bits_per_frame = (resize_action >= 0)
+ ? rc->this_frame_target * tot_scale_change
+ : rc->this_frame_target / tot_scale_change;
+ active_worst_quality = calc_active_worst_quality_no_stats_cbr(cpi);
+ qindex =
+ av1_rc_regulate_q(cpi, target_bits_per_frame, rc->best_quality,
+ active_worst_quality, resize_width, resize_height);
+ // If resize is down, check if projected q index is close to worst_quality,
+ // and if so, reduce the rate correction factor (since likely can afford
+ // lower q for resized frame).
+ if (resize_action > 0 && qindex > 90 * cpi->rc.worst_quality / 100)
+ rc->rate_correction_factors[INTER_NORMAL] *= 0.85;
+ // Apply the same rate control reset to all temporal layers.
+ for (tl = 0; tl < svc->number_temporal_layers; tl++) {
+ lc = &svc->layer_context[svc->spatial_layer_id *
+ svc->number_temporal_layers +
+ tl];
+ lc->rc.buffer_level = lc->rc.optimal_buffer_level;
+ lc->rc.bits_off_target = lc->rc.optimal_buffer_level;
+ lc->rc.rate_correction_factors[INTER_FRAME] =
+ rc->rate_correction_factors[INTER_FRAME];
+ }
+ // If resize is back up, check if projected q index is too much above the
+ // current base_qindex, and if so, reduce the rate correction factor
+ // (since prefer to keep q for resized frame at least close to previous q).
+ if (resize_action < 0 && qindex > 130 * rc->last_q[INTER_FRAME] / 100) {
+ rc->rate_correction_factors[INTER_NORMAL] *= 0.9;
+ }
+ }
+ return;
+}
+
void av1_get_one_pass_rt_params(AV1_COMP *cpi,
EncodeFrameParams *const frame_params,
unsigned int frame_flags) {
@@ -2444,6 +2585,25 @@
// Check for scene change, for non-SVC for now.
if (!cpi->use_svc && cpi->sf.rt_sf.check_scene_detection)
rc_scene_detection_onepass_rt(cpi);
+ // Check for dynamic resize, for single spatial layer for now.
+ // For temporal layers only check on base temporal layer.
+ if (cpi->oxcf.resize_cfg.resize_mode == RESIZE_DYNAMIC) {
+ ResizePendingParams *const resize_pending_params =
+ &cpi->resize_pending_params;
+ if (svc->number_spatial_layers == 1 && svc->temporal_layer_id == 0)
+ dynamic_resize_one_pass_cbr(cpi);
+ if (rc->resize_state == THREE_QUARTER) {
+ resize_pending_params->width = (3 + cpi->oxcf.frm_dim_cfg.width * 3) >> 2;
+ resize_pending_params->height =
+ (3 + cpi->oxcf.frm_dim_cfg.height * 3) >> 2;
+ } else if (rc->resize_state == ONE_HALF) {
+ resize_pending_params->width = (1 + cpi->oxcf.frm_dim_cfg.width) >> 1;
+ resize_pending_params->height = (1 + cpi->oxcf.frm_dim_cfg.height) >> 1;
+ } else {
+ resize_pending_params->width = cpi->oxcf.frm_dim_cfg.width;
+ resize_pending_params->height = cpi->oxcf.frm_dim_cfg.height;
+ }
+ }
// Set the GF interval and update flag.
gf_update = set_gf_interval_update_onepass_rt(cpi, frame_params->frame_type);
// Set target size.
diff --git a/av1/encoder/ratectrl.h b/av1/encoder/ratectrl.h
index d88dd91..7f26f38 100644
--- a/av1/encoder/ratectrl.h
+++ b/av1/encoder/ratectrl.h
@@ -76,6 +76,16 @@
FRAME_UPDATE_TYPES
} UENUM1BYTE(FRAME_UPDATE_TYPE);
+typedef enum {
+ NO_RESIZE = 0,
+ DOWN_THREEFOUR = 1, // From orig to 3/4.
+ DOWN_ONEHALF = 2, // From orig or 3/4 to 1/2.
+ UP_THREEFOUR = -1, // From 1/2 to 3/4.
+ UP_ORIG = -2, // From 1/2 or 3/4 to orig.
+} RESIZE_ACTION;
+
+typedef enum { ORIG = 0, THREE_QUARTER = 1, ONE_HALF = 2 } RESIZE_STATE;
+
/*!\endcond */
/*!
* \brief Rate Control parameters and status
@@ -281,6 +291,12 @@
int use_arf_in_this_kf_group;
// Track amount of low motion in scene
int avg_frame_low_motion;
+
+ // For dynamic resize, 1 pass cbr.
+ RESIZE_STATE resize_state;
+ int resize_avg_qp;
+ int resize_buffer_underflow;
+ int resize_count;
/*!\endcond */
} RATE_CONTROL;
diff --git a/examples/svc_encoder_rtc.c b/examples/svc_encoder_rtc.c
index 236afc8..1fc8425 100644
--- a/examples/svc_encoder_rtc.c
+++ b/examples/svc_encoder_rtc.c
@@ -20,6 +20,7 @@
#include "aom/aom_encoder.h"
#include "aom/aomcx.h"
#include "av1/common/enums.h"
+#include "av1/encoder/encoder.h"
#include "common/tools_common.h"
#include "common/video_writer.h"
#include "aom_ports/aom_timer.h"
@@ -700,6 +701,7 @@
cfg.rc_buf_initial_sz = 600;
cfg.rc_buf_optimal_sz = 600;
cfg.rc_buf_sz = 1000;
+ cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
// Use 1 thread as default.
cfg.g_threads = (unsigned int)strtoul(argv[11], NULL, 0);
diff --git a/test/resize_test.cc b/test/resize_test.cc
index 6942c20..1bccd20 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -13,6 +13,7 @@
#include <vector>
#include "aom_dsp/aom_dsp_common.h"
#include "common/tools_common.h"
+#include "av1/encoder/encoder.h"
#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
@@ -597,15 +598,25 @@
// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
// Run at low bitrate, with resize_allowed = 1, and verify that we get
// one resize down event.
-TEST_P(ResizeRealtimeTest, DISABLED_TestInternalResizeDown) {
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 299);
+TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
+ ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+ 0, 400);
DefaultConfig();
- cfg_.g_w = 352;
- cfg_.g_h = 288;
+ cfg_.g_w = 640;
+ cfg_.g_h = 480;
change_bitrate_ = false;
+ set_scale_mode_ = false;
+ set_scale_mode2_ = false;
mismatch_psnr_ = 0.0;
mismatch_nframes_ = 0;
+ DefaultConfig();
+ // Disable dropped frames.
+ cfg_.rc_dropframe_thresh = 0;
+ // Starting bitrate low.
+ cfg_.rc_target_bitrate = 150;
+ cfg_.rc_resize_mode = RESIZE_DYNAMIC;
+ cfg_.g_forced_max_frame_width = 1280;
+ cfg_.g_forced_max_frame_height = 1280;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
unsigned int last_w = cfg_.g_w;
@@ -635,19 +646,25 @@
// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
// Start at low target bitrate, raise the bitrate in the middle of the clip,
// scaling-up should occur after bitrate changed.
-TEST_P(ResizeRealtimeTest, DISABLED_TestInternalResizeDownUpChangeBitRate) {
- ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 359);
+TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
+ ::libaom_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+ 0, 400);
DefaultConfig();
- cfg_.g_w = 352;
- cfg_.g_h = 288;
+ cfg_.g_w = 640;
+ cfg_.g_h = 480;
change_bitrate_ = true;
+ set_scale_mode_ = false;
+ set_scale_mode2_ = false;
mismatch_psnr_ = 0.0;
mismatch_nframes_ = 0;
+ DefaultConfig();
// Disable dropped frames.
cfg_.rc_dropframe_thresh = 0;
// Starting bitrate low.
- cfg_.rc_target_bitrate = 80;
+ cfg_.rc_target_bitrate = 150;
+ cfg_.rc_resize_mode = RESIZE_DYNAMIC;
+ cfg_.g_forced_max_frame_width = 1280;
+ cfg_.g_forced_max_frame_height = 1280;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
unsigned int last_w = cfg_.g_w;