Speed-up GOP length decision for speed=4
For speed=4, GOP length is decided based on tpl
stats of ARFs from base layer, base+1 layer and
base+2 layer.
cpu-used Instruction Count BD-Rate Loss(%)
Reduction(%) avg.psnr ovr.psnr ssim
4 1.898 -0.0482 -0.0503 -0.0876
STATS_CHANGED
Change-Id: Ie2e11ef553af42c803b7d7202ac1002a6364890f
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index e9f4531..e425a73 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -1005,6 +1005,62 @@
return 0;
}
+static int is_shorter_gf_interval_better(AV1_COMP *cpi,
+ EncodeFrameParams *frame_params,
+ const EncodeFrameInput *frame_input) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ int gop_length_decision_method = cpi->sf.tpl_sf.gop_length_decision_method;
+ int shorten_gf_interval;
+
+ if (gop_length_decision_method == 2) {
+ // GF group length is decided based on GF boost and tpl stats of ARFs from
+ // base layer, (base+1) layer.
+ shorten_gf_interval =
+ (rc->gfu_boost <
+ rc->num_stats_used_for_gfu_boost * GF_MIN_BOOST * 1.4) &&
+ !av1_tpl_setup_stats(cpi, 3, frame_params, frame_input);
+ } else {
+ int do_complete_tpl = 1;
+ GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ int is_temporal_filter_enabled =
+ (rc->frames_since_key > 0 && gf_group->arf_index > -1);
+
+ if (is_temporal_filter_enabled) {
+ int arf_src_index = gf_group->arf_src_offset[gf_group->arf_index];
+ FRAME_UPDATE_TYPE arf_update_type =
+ gf_group->update_type[gf_group->arf_index];
+ int is_forward_keyframe = 0;
+ av1_temporal_filter(cpi, arf_src_index, arf_update_type,
+ is_forward_keyframe, NULL);
+ aom_extend_frame_borders(&cpi->alt_ref_buffer,
+ av1_num_planes(&cpi->common));
+ }
+
+ if (gop_length_decision_method == 1) {
+ // Check if tpl stats of ARFs from base layer, (base+1) layer,
+ // (base+2) layer can decide the GF group length.
+ int gop_length_eval =
+ av1_tpl_setup_stats(cpi, 2, frame_params, frame_input);
+
+ if (gop_length_eval != 2) {
+ do_complete_tpl = 0;
+ shorten_gf_interval = !gop_length_eval;
+ }
+ }
+
+ if (do_complete_tpl) {
+ // Decide GF group length based on complete tpl stats.
+ shorten_gf_interval =
+ !av1_tpl_setup_stats(cpi, 1, frame_params, frame_input);
+ // Tpl stats is reused when the ARF is temporally filtered and GF
+ // interval is not shortened.
+ if (is_temporal_filter_enabled && !shorten_gf_interval)
+ cpi->tpl_data.skip_tpl_setup_stats = 1;
+ }
+ }
+ return shorten_gf_interval;
+}
+
#define MIN_FWD_KF_INTERVAL 8
#define MIN_SHRINK_LEN 6 // the minimum length of gf if we are shrinking
#define SMOOTH_FILT_LEN 7
@@ -3694,7 +3750,7 @@
}
if (max_gop_length > 16 && oxcf->algo_cfg.enable_tpl_model &&
- !(cpi->sf.tpl_sf.gop_length_decision_method == 2)) {
+ cpi->sf.tpl_sf.gop_length_decision_method != 3) {
int this_idx = rc->frames_since_key + rc->gf_intervals[rc->cur_gf_index] -
rc->regions_offset - 1;
int this_region =
@@ -3713,35 +3769,7 @@
define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 0);
this_frame = this_frame_copy;
- int is_temporal_filter_enabled = 0;
- int shorten_gf_interval = 0;
- if (!cpi->sf.tpl_sf.gop_length_decision_method) {
- is_temporal_filter_enabled =
- (rc->frames_since_key > 0 && gf_group->arf_index > -1);
- if (is_temporal_filter_enabled) {
- int arf_src_index = gf_group->arf_src_offset[gf_group->arf_index];
- FRAME_UPDATE_TYPE arf_update_type =
- gf_group->update_type[gf_group->arf_index];
- int is_forward_keyframe = 0;
- av1_temporal_filter(cpi, arf_src_index, arf_update_type,
- is_forward_keyframe, NULL);
- aom_extend_frame_borders(&cpi->alt_ref_buffer,
- av1_num_planes(&cpi->common));
- }
- shorten_gf_interval =
- !av1_tpl_setup_stats(cpi, 1, frame_params, frame_input);
- // Tpl stats is reused when the ARF is temporally filtered and gf
- // interval is not shortened.
- if (is_temporal_filter_enabled && !shorten_gf_interval)
- cpi->tpl_data.skip_tpl_setup_stats = 1;
- } else {
- // GOP length is decided based on GF boost and approximate tpl model
- shorten_gf_interval =
- (rc->gfu_boost <
- rc->num_stats_used_for_gfu_boost * GF_MIN_BOOST * 1.4) &&
- !av1_tpl_setup_stats(cpi, 2, frame_params, frame_input);
- }
- if (shorten_gf_interval) {
+ if (is_shorter_gf_interval_better(cpi, frame_params, frame_input)) {
// A shorter gf interval is better.
// TODO(jingning): Remove redundant computations here.
max_gop_length = 16;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 8720f45..bd7ca67 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1007,6 +1007,7 @@
sf->tpl_sf.prune_starting_mv = 2;
sf->tpl_sf.subpel_force_stop = HALF_PEL;
sf->tpl_sf.search_method = FAST_BIGDIA;
+ sf->tpl_sf.gop_length_decision_method = 1;
sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1;
sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
@@ -1058,7 +1059,7 @@
sf->tpl_sf.prune_starting_mv = 3;
sf->tpl_sf.use_y_only_rate_distortion = 1;
sf->tpl_sf.subpel_force_stop = FULL_PEL;
- sf->tpl_sf.gop_length_decision_method = 1;
+ sf->tpl_sf.gop_length_decision_method = 2;
sf->winner_mode_sf.dc_blk_pred_level = 1;
}
@@ -1089,7 +1090,7 @@
sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
sf->mv_sf.use_bsize_dependent_search_method = 1;
- sf->tpl_sf.gop_length_decision_method = 2;
+ sf->tpl_sf.gop_length_decision_method = 3;
sf->tpl_sf.disable_filtered_key_tpl = 1;
sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index b85bcb2..750c6c6 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -377,9 +377,11 @@
typedef struct TPL_SPEED_FEATURES {
// GOP length adaptive decision.
// If set to 0, tpl model decides whether a shorter gf interval is better.
- // If set to 1, approximate tpl model and GF boost decide whether a
- // shorter gf interval is better. If set to 2, gop length adaptive decision is
- // disabled.
+ // If set to 1, tpl stats of ARFs from base layer, (base+1) layer and
+ // (base+2) layer decide whether a shorter gf interval is better.
+ // If set to 2, tpl stats of ARFs from base layer, (base+1) layer and GF boost
+ // decide whether a shorter gf interval is better.
+ // If set to 3, gop length adaptive decision is disabled.
int gop_length_decision_method;
// Prune the intra modes search by tpl.
// If set to 0, we will search all intra modes from DC_PRED to PAETH_PRED.
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index dd53ac2..9d4f1d7 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1434,6 +1434,26 @@
}
}
+static AOM_INLINE int eval_gop_length(double *beta, int gop_eval) {
+ switch (gop_eval) {
+ case 1:
+ // Allow larger GOP size if the base layer ARF has higher dependency
+ // factor than the intermediate ARF and both ARFs have reasonably high
+ // dependency factors.
+ return (beta[0] >= beta[1] + 0.7) && beta[0] > 8.0;
+ case 2:
+ if ((beta[0] >= beta[1] + 0.4) && beta[0] > 1.6)
+ return 1; // Don't shorten the gf interval
+ else if ((beta[0] < beta[1] + 0.1) || beta[0] <= 1.4)
+ return 0; // Shorten the gf interval
+ else
+ return 2; // Cannot decide the gf interval, so redo the
+ // tpl stats calculation.
+ case 3: return beta[0] > 1.1;
+ default: return 2;
+ }
+}
+
int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
const EncodeFrameParams *const frame_params,
const EncodeFrameInput *const frame_input) {
@@ -1447,7 +1467,14 @@
int bottom_index, top_index;
EncodeFrameParams this_frame_params = *frame_params;
TplParams *const tpl_data = &cpi->tpl_data;
- int approx_gop_eval = (gop_eval == 2);
+ int approx_gop_eval = (gop_eval > 1);
+ int num_arf_layers = MAX_ARF_LAYERS;
+
+ // When gop_eval is set to 2, tpl stats calculation is done for ARFs from base
+ // layer, (base+1) layer and (base+2) layer. When gop_eval is set to 3,
+ // tpl stats calculation is limited to ARFs from base layer and (base+1)
+ // layer.
+ if (approx_gop_eval) num_arf_layers = (gop_eval == 2) ? 3 : 2;
if (cpi->superres_mode != AOM_SUPERRES_NONE) {
assert(cpi->superres_mode != AOM_SUPERRES_AUTO);
@@ -1495,18 +1522,20 @@
av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
cm->features.allow_high_precision_mv, cpi->td.mb.mv_costs);
- // When approx_gop_eval = 1 tpl stats calculation is done for base layer
- // and the next layer ARF.
- int frame_idx_end =
- approx_gop_eval ? AOMMIN(tpl_gf_group_frames - 1, gf_group->arf_index + 1)
- : tpl_gf_group_frames - 1;
+ const int gop_length = get_gop_length(gf_group);
// Backward propagation from tpl_group_frames to 1.
- for (int frame_idx = cpi->gf_frame_index; frame_idx <= frame_idx_end;
+ for (int frame_idx = cpi->gf_frame_index; frame_idx < tpl_gf_group_frames;
++frame_idx) {
if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE ||
gf_group->update_type[frame_idx] == OVERLAY_UPDATE)
continue;
+ // When approx_gop_eval = 1, skip tpl stats calculation for higher layer
+ // frames and for frames beyond gop length.
+ if (approx_gop_eval && (gf_group->layer_depth[frame_idx] > num_arf_layers ||
+ frame_idx >= gop_length))
+ continue;
+
init_mc_flow_dispenser(cpi, frame_idx, pframe_qindex);
if (mt_info->num_workers > 1) {
tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read;
@@ -1521,12 +1550,16 @@
av1_num_planes(cm));
}
- for (int frame_idx = frame_idx_end; frame_idx >= cpi->gf_frame_index;
- --frame_idx) {
+ for (int frame_idx = tpl_gf_group_frames - 1;
+ frame_idx >= cpi->gf_frame_index; --frame_idx) {
if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE ||
gf_group->update_type[frame_idx] == OVERLAY_UPDATE)
continue;
+ if (approx_gop_eval && (gf_group->layer_depth[frame_idx] > num_arf_layers ||
+ frame_idx >= gop_length))
+ continue;
+
mc_flow_synthesizer(tpl_data, frame_idx, cm->mi_params.mi_rows,
cm->mi_params.mi_cols);
}
@@ -1589,12 +1622,7 @@
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, av1_tpl_setup_stats_time);
#endif
- if (approx_gop_eval) return beta[0] > 1.1;
-
- // Allow larger GOP size if the base layer ARF has higher dependency factor
- // than the intermediate ARF and both ARFs have reasonably high dependency
- // factors.
- return (beta[0] >= beta[1] + 0.7) && beta[0] > 8.0;
+ return eval_gop_length(beta, gop_eval);
}
void av1_tpl_rdmult_setup(AV1_COMP *cpi) {