Speed-up GOP length decision for speed=5
GOP length is decided based on GF boost
and approximate tpl model for speed 5.
cpu-used Instruction Count BD-Rate Loss(%)
Reduction(%) avg.psnr ovr.psnr ssim
5 8.364 0.1476 0.1294 0.0998
STATS_CHANGED
Change-Id: I5a05be4fc2154e28238b470747d0b5b17d8f966b
diff --git a/av1/encoder/gop_structure.c b/av1/encoder/gop_structure.c
index 757232d..f768a57 100644
--- a/av1/encoder/gop_structure.c
+++ b/av1/encoder/gop_structure.c
@@ -45,7 +45,7 @@
gf_group->cur_frame_idx[*frame_ind] = *cur_frame_idx;
gf_group->layer_depth[*frame_ind] = MAX_ARF_LAYERS;
gf_group->arf_boost[*frame_ind] = av1_calc_arf_boost(
- twopass, rc, frame_info, start, end - start, 0, NULL, NULL);
+ twopass, rc, frame_info, start, end - start, 0, NULL, NULL, 0);
gf_group->frame_type[*frame_ind] = INTER_FRAME;
gf_group->refbuf_state[*frame_ind] = REFBUF_UPDATE;
gf_group->max_layer_depth =
@@ -67,7 +67,7 @@
// Get the boost factor for intermediate ARF frames.
gf_group->arf_boost[*frame_ind] = av1_calc_arf_boost(
- twopass, rc, frame_info, m, end - m, m - start, NULL, NULL);
+ twopass, rc, frame_info, m, end - m, m - start, NULL, NULL, 0);
++(*frame_ind);
// Frames displayed before this internal ARF.
diff --git a/av1/encoder/gop_structure.h b/av1/encoder/gop_structure.h
index 6cfca22..56978ab 100644
--- a/av1/encoder/gop_structure.h
+++ b/av1/encoder/gop_structure.h
@@ -69,7 +69,7 @@
int av1_calc_arf_boost(const TWO_PASS *twopass, const RATE_CONTROL *rc,
FRAME_INFO *frame_info, int offset, int f_frames,
int b_frames, int *num_fpstats_used,
- int *num_fpstats_required);
+ int *num_fpstats_required, int project_gfu_boost);
/*!\endcond */
#ifdef __cplusplus
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 2beb048..209c744 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -642,11 +642,12 @@
}
#define GF_MAX_BOOST 90.0
+#define GF_MIN_BOOST 50
#define MIN_DECAY_FACTOR 0.01
int av1_calc_arf_boost(const TWO_PASS *twopass, const RATE_CONTROL *rc,
FRAME_INFO *frame_info, int offset, int f_frames,
int b_frames, int *num_fpstats_used,
- int *num_fpstats_required) {
+ int *num_fpstats_required, int project_gfu_boost) {
int i;
GF_GROUP_STATS gf_stats;
init_gf_stats(&gf_stats);
@@ -719,16 +720,16 @@
}
arf_boost += (int)boost_score;
- if (num_fpstats_required) {
+ if (project_gfu_boost) {
+ assert(num_fpstats_required != NULL);
+ assert(num_fpstats_used != NULL);
*num_fpstats_required = f_frames + b_frames;
- if (num_fpstats_used) {
- arf_boost = get_projected_gfu_boost(rc, arf_boost, *num_fpstats_required,
- *num_fpstats_used);
- }
+ arf_boost = get_projected_gfu_boost(rc, arf_boost, *num_fpstats_required,
+ *num_fpstats_used);
}
- if (arf_boost < ((b_frames + f_frames) * 50))
- arf_boost = ((b_frames + f_frames) * 50);
+ if (arf_boost < ((b_frames + f_frames) * GF_MIN_BOOST))
+ arf_boost = ((b_frames + f_frames) * GF_MIN_BOOST);
return arf_boost;
}
@@ -2539,21 +2540,20 @@
// Calculate the boost for alt ref.
rc->gfu_boost = av1_calc_arf_boost(
twopass, rc, frame_info, alt_offset, forward_frames, ext_len,
- cpi->ppi->lap_enabled ? &rc->num_stats_used_for_gfu_boost : NULL,
- cpi->ppi->lap_enabled ? &rc->num_stats_required_for_gfu_boost : NULL);
+ &rc->num_stats_used_for_gfu_boost,
+ &rc->num_stats_required_for_gfu_boost, cpi->ppi->lap_enabled);
} else {
reset_fpf_position(twopass, start_pos);
gf_group->max_layer_depth_allowed = 0;
set_baseline_gf_interval(cpi, i, active_max_gf_interval, use_alt_ref,
is_final_pass);
- rc->gfu_boost = AOMMIN(
- MAX_GF_BOOST,
- av1_calc_arf_boost(
- twopass, rc, frame_info, alt_offset, ext_len, 0,
- cpi->ppi->lap_enabled ? &rc->num_stats_used_for_gfu_boost : NULL,
- cpi->ppi->lap_enabled ? &rc->num_stats_required_for_gfu_boost
- : NULL));
+ rc->gfu_boost =
+ AOMMIN(MAX_GF_BOOST,
+ av1_calc_arf_boost(twopass, rc, frame_info, alt_offset, ext_len,
+ 0, &rc->num_stats_used_for_gfu_boost,
+ &rc->num_stats_required_for_gfu_boost,
+ cpi->ppi->lap_enabled));
}
#define LAST_ALR_BOOST_FACTOR 0.2f
@@ -3674,7 +3674,7 @@
}
if (max_gop_length > 16 && oxcf->algo_cfg.enable_tpl_model &&
- !cpi->sf.tpl_sf.disable_gop_length_decision) {
+ !(cpi->sf.tpl_sf.gop_length_decision_method == 2)) {
int this_idx = rc->frames_since_key + rc->gf_intervals[rc->cur_gf_index] -
rc->regions_offset - 1;
int this_region =
@@ -3692,20 +3692,37 @@
// max_gop_length = 32 with look-ahead gf intervals.
define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 0);
this_frame = this_frame_copy;
- int is_temporal_filter_enabled =
- (rc->frames_since_key > 0 && gf_group->arf_index > -1);
- if (is_temporal_filter_enabled) {
- int arf_src_index = gf_group->arf_src_offset[gf_group->arf_index];
- FRAME_UPDATE_TYPE arf_update_type =
- gf_group->update_type[gf_group->arf_index];
- int is_forward_keyframe = 0;
- av1_temporal_filter(cpi, arf_src_index, arf_update_type,
- is_forward_keyframe, NULL);
- aom_extend_frame_borders(&cpi->alt_ref_buffer,
- av1_num_planes(&cpi->common));
+
+ int is_temporal_filter_enabled = 0;
+ int shorten_gf_interval = 0;
+ if (!cpi->sf.tpl_sf.gop_length_decision_method) {
+ is_temporal_filter_enabled =
+ (rc->frames_since_key > 0 && gf_group->arf_index > -1);
+ if (is_temporal_filter_enabled) {
+ int arf_src_index = gf_group->arf_src_offset[gf_group->arf_index];
+ FRAME_UPDATE_TYPE arf_update_type =
+ gf_group->update_type[gf_group->arf_index];
+ int is_forward_keyframe = 0;
+ av1_temporal_filter(cpi, arf_src_index, arf_update_type,
+ is_forward_keyframe, NULL);
+ aom_extend_frame_borders(&cpi->alt_ref_buffer,
+ av1_num_planes(&cpi->common));
+ }
+ shorten_gf_interval =
+ !av1_tpl_setup_stats(cpi, 1, frame_params, frame_input);
+ // Tpl stats is reused when the ARF is temporally filtered and gf
+ // interval is not shortened.
+ if (is_temporal_filter_enabled && !shorten_gf_interval)
+ cpi->tpl_data.skip_tpl_setup_stats = 1;
+ } else {
+ // GOP length is decided based on GF boost and approximate tpl model
+ shorten_gf_interval =
+ (rc->gfu_boost <
+ rc->num_stats_used_for_gfu_boost * GF_MIN_BOOST * 1.4) &&
+ !av1_tpl_setup_stats(cpi, 2, frame_params, frame_input);
}
- if (!av1_tpl_setup_stats(cpi, 1, frame_params, frame_input)) {
- // Tpl decides that a shorter gf interval is better.
+ if (shorten_gf_interval) {
+ // A shorter gf interval is better.
// TODO(jingning): Remove redundant computations here.
max_gop_length = 16;
calculate_gf_length(cpi, max_gop_length, 1);
@@ -3713,10 +3730,6 @@
(ori_gf_int - rc->gf_intervals[rc->cur_gf_index] < 4)) {
rc->gf_intervals[rc->cur_gf_index] = ori_gf_int;
}
- } else {
- // Tpl stats is reused only when the ARF frame is temporally filtered
- if (is_temporal_filter_enabled)
- cpi->tpl_data.skip_tpl_setup_stats = 1;
}
}
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 099b7c3..25d8f53 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -991,6 +991,7 @@
sf->tpl_sf.prune_starting_mv = 3;
sf->tpl_sf.use_y_only_rate_distortion = 1;
sf->tpl_sf.subpel_force_stop = FULL_PEL;
+ sf->tpl_sf.gop_length_decision_method = 1;
sf->winner_mode_sf.dc_blk_pred_level = 1;
}
@@ -1021,7 +1022,7 @@
sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
sf->mv_sf.use_bsize_dependent_search_method = 1;
- sf->tpl_sf.disable_gop_length_decision = 1;
+ sf->tpl_sf.gop_length_decision_method = 2;
sf->tpl_sf.disable_filtered_key_tpl = 1;
sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
@@ -1426,7 +1427,7 @@
}
static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) {
- tpl_sf->disable_gop_length_decision = 0;
+ tpl_sf->gop_length_decision_method = 0;
tpl_sf->prune_intra_modes = 0;
tpl_sf->prune_starting_mv = 0;
tpl_sf->reduce_first_step_size = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index c4f8e81..b85bcb2 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -375,8 +375,12 @@
/*!\cond */
typedef struct TPL_SPEED_FEATURES {
- // Enable/disable GOP length adaptive decision.
- int disable_gop_length_decision;
+ // GOP length adaptive decision.
+ // If set to 0, tpl model decides whether a shorter gf interval is better.
+ // If set to 1, approximate tpl model and GF boost decide whether a
+ // shorter gf interval is better. If set to 2, gop length adaptive decision is
+ // disabled.
+ int gop_length_decision_method;
// Prune the intra modes search by tpl.
// If set to 0, we will search all intra modes from DC_PRED to PAETH_PRED.
// If set to 1, we only search DC_PRED, V_PRED, and H_PRED.
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 91af86b..f1f9cc0 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1452,6 +1452,7 @@
int bottom_index, top_index;
EncodeFrameParams this_frame_params = *frame_params;
TplParams *const tpl_data = &cpi->tpl_data;
+ int approx_gop_eval = (gop_eval == 2);
if (cpi->superres_mode != AOM_SUPERRES_NONE) {
assert(cpi->superres_mode != AOM_SUPERRES_AUTO);
@@ -1499,8 +1500,13 @@
av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
cm->features.allow_high_precision_mv, cpi->td.mb.mv_costs);
+ // When approx_gop_eval = 1 tpl stats calculation is done for base layer
+ // and the next layer ARF.
+ int frame_idx_end =
+ approx_gop_eval ? AOMMIN(tpl_gf_group_frames - 1, gf_group->arf_index + 1)
+ : tpl_gf_group_frames - 1;
// Backward propagation from tpl_group_frames to 1.
- for (int frame_idx = cpi->gf_frame_index; frame_idx < tpl_gf_group_frames;
+ for (int frame_idx = cpi->gf_frame_index; frame_idx <= frame_idx_end;
++frame_idx) {
if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE ||
gf_group->update_type[frame_idx] == OVERLAY_UPDATE)
@@ -1520,8 +1526,8 @@
av1_num_planes(cm));
}
- for (int frame_idx = tpl_gf_group_frames - 1;
- frame_idx >= cpi->gf_frame_index; --frame_idx) {
+ for (int frame_idx = frame_idx_end; frame_idx >= cpi->gf_frame_index;
+ --frame_idx) {
if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE ||
gf_group->update_type[frame_idx] == OVERLAY_UPDATE)
continue;
@@ -1588,6 +1594,7 @@
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, av1_tpl_setup_stats_time);
#endif
+ if (approx_gop_eval) return beta[0] > 1.1;
// Allow larger GOP size if the base layer ARF has higher dependency factor
// than the intermediate ARF and both ARFs have reasonably high dependency