Make rate_correction_factor support hierarchical structure.
Rate correction factor is used to estimate the required bits to
encode a frame.
The factor is determined by the factor of its nearest frame of the
same frame type. For example, if the frame is an internal arf, its
factor relies on the previous internal arf's factor.
Here we compute the rate correction factor for each layer separately,
and use the averaged value as the base for current frame's factor.
Let each encoding layer has its corresponding rate_correction_factor.
Previously, 4 different layers are supported: key frame, arf_std,
arf_low, inter_normal.
This change allows it to support 6 layers:
key frame, arf (layer 1), internal arf (layer 2 - 5).
Performance:
new_spd4 Avg_psnr ovr_psnr ssim vmaf
Lowres -0.11 -0.14 0.17 -0.45
Midres -0.03 -0.09 0.37 -0.29
Hdres -0.05 -0.08 0.20 -0.27
Ugc360p 0.01 -0.16 0.20 -0.25
new_spd2 Avg_psnr ovr_psnr ssim vmaf
Lowres -0.06 -0.10 0.17 -0.38
Midres -0.04 -0.05 0.51 -0.22
Hdres 0.05 0.04 0.35 -0.07
Ugc360p 0.05 -0.09 0.32 -0.21
new_spd1 Avg_psnr ovr_psnr ssim vmaf
Lowres -0.19 -0.29 -0.07 -0.40
Midres -0.16 -0.41 0.24 -0.63
Hdres (still running)
Ugc360p -0.10 -0.43 0.03 -0.49
STATS_CHANGED
Change-Id: I78ee63a5162e21771e2290e137046899499e71fd
diff --git a/av1/encoder/firstpass.h b/av1/encoder/firstpass.h
index 6ca1215..a1798c9 100644
--- a/av1/encoder/firstpass.h
+++ b/av1/encoder/firstpass.h
@@ -30,8 +30,6 @@
#define VLOW_MOTION_THRESHOLD 950
-#define MAX_ARF_LAYERS 5
-
typedef struct {
// Frame number in display order, if stats are for a single frame.
// No real meaning for a collection of frames.
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index f6db83f..24cafb4 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -322,6 +322,12 @@
rc->rate_correction_factors[i] = 0.7;
}
rc->rate_correction_factors[KF_STD] = 1.0;
+ for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
+ rc->frame_count_pyramid[i] = 0;
+ for (int j = 0; j < 8; ++j) {
+ rc->rate_correction_factors_pyramid[i][j] = 0;
+ }
+ }
rc->min_gf_interval = oxcf->min_gf_interval;
rc->max_gf_interval = oxcf->max_gf_interval;
if (rc->min_gf_interval == 0)
@@ -399,20 +405,55 @@
return AOMMAX(AOMMIN(q, cpi->rc.worst_quality), cpi->rc.best_quality);
}
-static const RATE_FACTOR_LEVEL rate_factor_levels[FRAME_UPDATE_TYPES] = {
- KF_STD, // KF_UPDATE
- INTER_NORMAL, // LF_UPDATE
- GF_ARF_STD, // GF_UPDATE
- GF_ARF_STD, // ARF_UPDATE
- INTER_NORMAL, // OVERLAY_UPDATE
- INTER_NORMAL, // INTNL_OVERLAY_UPDATE
- GF_ARF_LOW, // INTNL_ARF_UPDATE
-};
-
static RATE_FACTOR_LEVEL get_rate_factor_level(const GF_GROUP *const gf_group) {
const FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_group->index];
assert(update_type < FRAME_UPDATE_TYPES);
- return rate_factor_levels[update_type];
+ if (update_type == OVERLAY_UPDATE || update_type == INTNL_OVERLAY_UPDATE) {
+ return INTER_LEAF;
+ }
+
+ if (update_type == KF_UPDATE) return KF_STD;
+ if (update_type == GF_UPDATE) return GF_ARF_LAYER_1;
+
+ const int layer_depth = gf_group->layer_depth[gf_group->index];
+ return layer_depth;
+}
+
+// The number of frames for each layer in the hierarchical structure.
+// index 0: key frame.
+// layer 1: arfs. layer 5: leaf nodes.
+static int num_frames_in_layers[MAX_ARF_LAYERS + 1] = { 1, 1, 1, 2, 4, 8 };
+
+// Store previous frames' rate_correction_factor at each layer.
+// The average factor is computed and used.
+static void set_rate_factors_in_pyramid(RATE_CONTROL *const rc,
+ const int frame_update_type,
+ const int layer_depth,
+ const double factor) {
+ if (frame_update_type == OVERLAY_UPDATE ||
+ frame_update_type == INTNL_OVERLAY_UPDATE) {
+ return;
+ }
+
+ int frame_count = rc->frame_count_pyramid[layer_depth];
+ if (frame_count >= num_frames_in_layers[layer_depth]) {
+ for (int i = 0; i < frame_count - 1; ++i) {
+ rc->rate_correction_factors_pyramid[layer_depth][i] =
+ rc->rate_correction_factors_pyramid[layer_depth][i + 1];
+ }
+ rc->rate_correction_factors_pyramid[layer_depth][frame_count - 1] = factor;
+ } else {
+ rc->rate_correction_factors_pyramid[layer_depth][frame_count] = factor;
+ ++frame_count;
+ ++rc->frame_count_pyramid[layer_depth];
+ }
+ double avg_rate_correction_factor = 0;
+ for (int i = 0; i < frame_count; ++i) {
+ avg_rate_correction_factor +=
+ rc->rate_correction_factors_pyramid[layer_depth][i];
+ }
+ avg_rate_correction_factor /= frame_count;
+ rc->rate_correction_factors[layer_depth] = avg_rate_correction_factor;
}
static double get_rate_correction_factor(const AV1_COMP *cpi, int width,
@@ -429,9 +470,9 @@
if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
!rc->is_src_frame_alt_ref && !cpi->use_svc &&
(cpi->oxcf.rc_mode != AOM_CBR || cpi->oxcf.gf_cbr_boost_pct > 20))
- rcf = rc->rate_correction_factors[GF_ARF_STD];
+ rcf = rc->rate_correction_factors[GF_ARF_LAYER_1];
else
- rcf = rc->rate_correction_factors[INTER_NORMAL];
+ rcf = rc->rate_correction_factors[INTER_LEAF];
}
rcf *= resize_rate_factor(cpi, width, height);
return fclamp(rcf, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
@@ -449,15 +490,19 @@
if (cpi->common.current_frame.frame_type == KEY_FRAME) {
rc->rate_correction_factors[KF_STD] = factor;
} else if (is_stat_consumption_stage(cpi)) {
- const RATE_FACTOR_LEVEL rf_lvl = get_rate_factor_level(&cpi->gf_group);
- rc->rate_correction_factors[rf_lvl] = factor;
+ const GF_GROUP *const gf_group = &cpi->gf_group;
+ const FRAME_UPDATE_TYPE frame_update_type =
+ gf_group->update_type[gf_group->index];
+ const int layer_depth = gf_group->layer_depth[gf_group->index];
+ assert(layer_depth >= 0 && layer_depth <= MAX_ARF_LAYERS);
+ set_rate_factors_in_pyramid(rc, frame_update_type, layer_depth, factor);
} else {
if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
!rc->is_src_frame_alt_ref && !cpi->use_svc &&
(cpi->oxcf.rc_mode != AOM_CBR || cpi->oxcf.gf_cbr_boost_pct > 20))
- rc->rate_correction_factors[GF_ARF_STD] = factor;
+ rc->rate_correction_factors[GF_ARF_LAYER_1] = factor;
else
- rc->rate_correction_factors[INTER_NORMAL] = factor;
+ rc->rate_correction_factors[INTER_LEAF] = factor;
}
}
@@ -1038,22 +1083,18 @@
}
static const double rate_factor_deltas[RATE_FACTOR_LEVELS] = {
- 1.00, // INTER_NORMAL
- 1.50, // GF_ARF_LOW
- 2.00, // GF_ARF_STD
2.00, // KF_STD
+ 2.00, // GF_ARF_LAYER_1
+ 1.75, // GF_ARF_LAYER_2
+ 1.50, // GF_ARF_LAYER_3
+ 1.25, // GF_ARF_LAYER_4
+ 1.00, // INTER_LEAF
};
int av1_frame_type_qdelta(const AV1_COMP *cpi, int q) {
const RATE_FACTOR_LEVEL rf_lvl = get_rate_factor_level(&cpi->gf_group);
const FRAME_TYPE frame_type = (rf_lvl == KF_STD) ? KEY_FRAME : INTER_FRAME;
- double rate_factor;
-
- rate_factor = rate_factor_deltas[rf_lvl];
- if (rf_lvl == GF_ARF_LOW) {
- rate_factor -= (cpi->gf_group.layer_depth[cpi->gf_group.index] - 2) * 0.2;
- rate_factor = AOMMAX(rate_factor, 1.0);
- }
+ const double rate_factor = rate_factor_deltas[rf_lvl];
return av1_compute_qdelta_by_rate(&cpi->rc, frame_type, q, rate_factor,
cpi->common.seq_params.bit_depth);
}
diff --git a/av1/encoder/ratectrl.h b/av1/encoder/ratectrl.h
index 530c02a..a03c5df 100644
--- a/av1/encoder/ratectrl.h
+++ b/av1/encoder/ratectrl.h
@@ -44,6 +44,8 @@
#define MIN_PYRAMID_LVL 0
#define MAX_PYRAMID_LVL 4
+#define MAX_ARF_LAYERS 5
+
#define MIN_GF_INTERVAL 4
#define MAX_GF_INTERVAL 16
#define FIXED_GF_INTERVAL 8 // Used in some testing modes only
@@ -55,10 +57,12 @@
} size_params_type;
enum {
- INTER_NORMAL,
- GF_ARF_LOW,
- GF_ARF_STD,
- KF_STD,
+ KF_STD = 0, // Key frame (layer 0)
+ GF_ARF_LAYER_1 = 1, // Alt-ref frame (layer 1)
+ GF_ARF_LAYER_2 = 2, // Frames at hierarchical layer 2
+ GF_ARF_LAYER_3 = 3, // Frames at hierarchical layer 3
+ GF_ARF_LAYER_4 = 4, // Frames at hierarchical layer 4
+ INTER_LEAF = 5, // Leaf frames (layer 5)
RATE_FACTOR_LEVELS
} UENUM1BYTE(RATE_FACTOR_LEVEL);
@@ -87,6 +91,13 @@
int gfu_boost;
int kf_boost;
+ // The number of frames so-far at each layer.
+ int frame_count_pyramid[RATE_FACTOR_LEVELS];
+ // Rate correction factor at each layer for a group of picture.
+ // Layer 0 stands for key frame, not used.
+ // Layer 1 has 1 frame only. Layer 5 has 8 frames (leaf frames).
+ double rate_correction_factors_pyramid[RATE_FACTOR_LEVELS][8];
+ // The average factor for each layer.
double rate_correction_factors[RATE_FACTOR_LEVELS];
int frames_since_golden;