Add additional encoder stage timing
Added more timing information for encoder stages.
Change-Id: I5d68d8ff9b6e1cba17a0f2359ff5c01dc1ea56e9
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 77c7faf..78c4f33 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -866,6 +866,9 @@
EncodeFrameInput *const frame_input,
EncodeFrameParams *const frame_params,
EncodeFrameResults *const frame_results) {
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ if (cpi->oxcf.pass == 2) start_timing(cpi, denoise_and_encode_time);
+#endif
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
AV1_COMMON *const cm = &cpi->common;
const GF_GROUP *const gf_group = &cpi->gf_group;
@@ -908,6 +911,10 @@
arf_src_index = gf_group->arf_src_offset[gf_group->index];
}
}
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ if (cpi->oxcf.pass == 2) start_timing(cpi, apply_filtering_time);
+#endif
// Save the pointer to the original source image.
YV12_BUFFER_CONFIG *source_buffer = frame_input->source;
// apply filtering to frame
@@ -929,6 +936,9 @@
cpi->show_existing_alt_ref = show_existing_alt_ref;
}
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ if (cpi->oxcf.pass == 2) end_timing(cpi, apply_filtering_time);
+#endif
// perform tpl after filtering
int allow_tpl = oxcf->gf_cfg.lag_in_frames > 1 &&
@@ -973,7 +983,9 @@
cm->features.interp_filter, 0, false, true);
cpi->unscaled_source = source_buffer;
}
-
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ if (cpi->oxcf.pass == 2) end_timing(cpi, denoise_and_encode_time);
+#endif
return AOM_CODEC_OK;
}
#endif // !CONFIG_REALTIME_ONLY
@@ -1124,7 +1136,13 @@
oxcf->mode == REALTIME &&
gf_cfg->lag_in_frames == 0;
if (!use_one_pass_rt_params && !is_stat_generation_stage(cpi)) {
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, av1_get_second_pass_params_time);
+#endif
av1_get_second_pass_params(cpi, &frame_params, &frame_input, *frame_flags);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, av1_get_second_pass_params_time);
+#endif
}
#endif
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 63aec92..f8b113b 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -775,7 +775,7 @@
const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
#if CONFIG_COLLECT_COMPONENT_TIMING
- start_timing(cpi, encode_sb_time);
+ start_timing(cpi, encode_sb_row_time);
#endif
// Initialize the left context for the new SB row
@@ -859,7 +859,7 @@
sb_cols_in_tile);
}
#if CONFIG_COLLECT_COMPONENT_TIMING
- end_timing(cpi, encode_sb_time);
+ end_timing(cpi, encode_sb_row_time);
#endif
}
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index c45d91e..672c3bb 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3169,23 +3169,6 @@
}
}
-#if CONFIG_COLLECT_COMPONENT_TIMING
- end_timing(cpi, encode_frame_to_data_rate_time);
-
- // Print out timing information.
- int i;
- fprintf(stderr, "\n Frame number: %d, Frame type: %s, Show Frame: %d\n",
- cm->current_frame.frame_number,
- get_frame_type_enum(cm->current_frame.frame_type), cm->show_frame);
- for (i = 0; i < kTimingComponents; i++) {
- cpi->component_time[i] += cpi->frame_component_time[i];
- fprintf(stderr, " %s: %" PRId64 " us (total: %" PRId64 " us)\n",
- get_component_name(i), cpi->frame_component_time[i],
- cpi->component_time[i]);
- cpi->frame_component_time[i] = 0;
- }
-#endif
-
cpi->last_frame_type = current_frame->frame_type;
av1_rc_postencode_update(cpi, *size);
@@ -3210,6 +3193,10 @@
// it is not shown, we still need update the count down.
if (cm->show_frame) ++current_frame->frame_number;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, encode_frame_to_data_rate_time);
+#endif
+
return AOM_CODEC_OK;
}
@@ -3531,9 +3518,36 @@
if (assign_cur_frame_new_fb(cm) == NULL) return AOM_CODEC_ERROR;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ // Only accumulate 2nd pass time.
+ if (cpi->oxcf.pass == 2) start_timing(cpi, av1_encode_strategy_time);
+#endif
+
const int result =
av1_encode_strategy(cpi, size, dest, frame_flags, time_stamp, time_end,
timestamp_ratio, flush);
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ if (cpi->oxcf.pass == 2) end_timing(cpi, av1_encode_strategy_time);
+
+ // Print out timing information.
+ // Note: Use "cpi->frame_component_time[0] > 100 us" to avoid showing of
+ // show_existing_frame and lag-in-frames.
+ if (cpi->oxcf.pass == 2 && cpi->frame_component_time[0] > 100) {
+ int i;
+ fprintf(stderr, "\n Frame number: %d, Frame type: %s, Show Frame: %d\n",
+ cm->current_frame.frame_number,
+ get_frame_type_enum(cm->current_frame.frame_type), cm->show_frame);
+ for (i = 0; i < kTimingComponents; i++) {
+ cpi->component_time[i] += cpi->frame_component_time[i];
+ fprintf(stderr, " %s: %" PRId64 " us (total: %" PRId64 " us)\n",
+ get_component_name(i), cpi->frame_component_time[i],
+ cpi->component_time[i]);
+ cpi->frame_component_time[i] = 0;
+ }
+ }
+#endif
+
if (result == -1) {
// Returning -1 indicates no frame encoded; more input is required
return -1;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 5b5cf0c..51f8ee4 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1495,6 +1495,11 @@
#include "aom_ports/aom_timer.h"
// Adjust the following to add new components.
enum {
+ av1_encode_strategy_time,
+ av1_get_second_pass_params_time,
+ denoise_and_encode_time,
+ apply_filtering_time,
+ av1_tpl_setup_stats_time,
encode_frame_to_data_rate_time,
encode_with_recode_loop_time,
loop_filter_time,
@@ -1504,13 +1509,25 @@
av1_encode_frame_time,
av1_compute_global_motion_time,
av1_setup_motion_field_time,
- encode_sb_time,
+ encode_sb_row_time,
+
rd_pick_partition_time,
+ av1_prune_partitions_time,
+ none_partition_search_time,
+ split_partition_search_time,
+ rectangular_partition_search_time,
+ ab_partitions_search_time,
+ rd_pick_4partition_time,
+ encode_sb_time,
+
rd_pick_sb_modes_time,
av1_rd_pick_intra_mode_sb_time,
av1_rd_pick_inter_mode_sb_time,
+ handle_inter_mode_time,
+ evaluate_motion_mode_for_winner_candidates_time,
handle_intra_mode_time,
do_tx_search_time,
+ av1_search_palette_mode_time,
handle_newmv_time,
compound_type_rd_time,
interpolation_filter_search_time,
@@ -1520,6 +1537,12 @@
static INLINE char const *get_component_name(int index) {
switch (index) {
+ case av1_encode_strategy_time: return "av1_encode_strategy_time";
+ case av1_get_second_pass_params_time:
+ return "av1_get_second_pass_params_time";
+ case denoise_and_encode_time: return "denoise_and_encode_time";
+ case apply_filtering_time: return "apply_filtering_time";
+ case av1_tpl_setup_stats_time: return "av1_tpl_setup_stats_time";
case encode_frame_to_data_rate_time:
return "encode_frame_to_data_rate_time";
case encode_with_recode_loop_time: return "encode_with_recode_loop_time";
@@ -1531,15 +1554,29 @@
case av1_compute_global_motion_time:
return "av1_compute_global_motion_time";
case av1_setup_motion_field_time: return "av1_setup_motion_field_time";
- case encode_sb_time: return "encode_sb_time";
+ case encode_sb_row_time: return "encode_sb_row_time";
+
case rd_pick_partition_time: return "rd_pick_partition_time";
+ case av1_prune_partitions_time: return "av1_prune_partitions_time";
+ case none_partition_search_time: return "none_partition_search_time";
+ case split_partition_search_time: return "split_partition_search_time";
+ case rectangular_partition_search_time:
+ return "rectangular_partition_search_time";
+ case ab_partitions_search_time: return "ab_partitions_search_time";
+ case rd_pick_4partition_time: return "rd_pick_4partition_time";
+ case encode_sb_time: return "encode_sb_time";
+
case rd_pick_sb_modes_time: return "rd_pick_sb_modes_time";
case av1_rd_pick_intra_mode_sb_time:
return "av1_rd_pick_intra_mode_sb_time";
case av1_rd_pick_inter_mode_sb_time:
return "av1_rd_pick_inter_mode_sb_time";
+ case handle_inter_mode_time: return "handle_inter_mode_time";
+ case evaluate_motion_mode_for_winner_candidates_time:
+ return "evaluate_motion_mode_for_winner_candidates_time";
case handle_intra_mode_time: return "handle_intra_mode_time";
case do_tx_search_time: return "do_tx_search_time";
+ case av1_search_palette_mode_time: return "av1_search_palette_mode_time";
case handle_newmv_time: return "handle_newmv_time";
case compound_type_rd_time: return "compound_type_rd_time";
case interpolation_filter_search_time:
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index 583e680..fdf7792 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -3537,6 +3537,9 @@
xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, av1_prune_partitions_time);
+#endif
int *partition_horz_allowed = &part_search_state.partition_rect_allowed[HORZ];
int *partition_vert_allowed = &part_search_state.partition_rect_allowed[VERT];
int *prune_horz = &part_search_state.prune_rect_part[HORZ];
@@ -3555,6 +3558,9 @@
&x->sb_enc, bsize, blk_params.has_rows && blk_params.has_cols,
&part_search_state.partition_none_allowed, partition_horz_allowed,
partition_vert_allowed, &part_search_state.do_square_split);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, av1_prune_partitions_time);
+#endif
// Partition search
BEGIN_PARTITION_SEARCH:
@@ -3572,18 +3578,28 @@
// Partition block source pixel variance.
unsigned int pb_source_variance = UINT_MAX;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, none_partition_search_time);
+#endif
// PARTITION_NONE search stage.
int64_t part_none_rd = INT64_MAX;
none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx,
&part_search_state, &best_rdc, &pb_source_variance,
none_rd, &part_none_rd);
-
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, none_partition_search_time);
+#endif
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, split_partition_search_time);
+#endif
// PARTITION_SPLIT search stage.
int64_t part_split_rd = INT64_MAX;
split_partition_search(cpi, td, tile_data, tp, x, pc_tree, sms_tree, &x_ctx,
&part_search_state, &best_rdc, multi_pass_mode,
&part_split_rd);
-
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, split_partition_search_time);
+#endif
// Terminate partition search for child partition,
// when NONE and SPLIT partition rd_costs are INT64_MAX.
if (cpi->sf.part_sf.early_term_after_none_split &&
@@ -3595,11 +3611,16 @@
// Prune partitions based on PARTITION_NONE and PARTITION_SPLIT.
prune_partitions_after_split(cpi, x, sms_tree, &part_search_state, &best_rdc,
part_none_rd, part_split_rd);
-
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, rectangular_partition_search_time);
+#endif
// Rectangular partitions search stage.
rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
&part_search_state, &best_rdc,
rect_part_win_info);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, rectangular_partition_search_time);
+#endif
if (pb_source_variance == UINT_MAX) {
av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
@@ -3619,11 +3640,16 @@
part_search_state.do_rectangular_split &&
bsize > cpi->sf.part_sf.ext_partition_eval_thresh &&
blk_params.has_rows && blk_params.has_cols;
-
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, ab_partitions_search_time);
+#endif
// AB partitions search stage.
ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
&part_search_state, &best_rdc, rect_part_win_info,
pb_source_variance, ext_partition_allowed);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, ab_partitions_search_time);
+#endif
// 4-way partitions search stage.
int part4_search_allowed[NUM_PART4_TYPES] = { 1, 1 };
@@ -3640,6 +3666,9 @@
part4_search_allowed);
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, rd_pick_4partition_time);
+#endif
// PARTITION_HORZ_4
assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
!part4_search_allowed[HORZ4]));
@@ -3669,6 +3698,9 @@
pc_tree->vertical4, &part_search_state, &best_rdc,
inc_step, PARTITION_VERT_4);
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, rd_pick_4partition_time);
+#endif
if (bsize == cm->seq_params.sb_size &&
!part_search_state.found_best_partition) {
@@ -3729,6 +3761,9 @@
// Reset the PC_TREE deallocation flag.
int pc_tree_dealloc = 0;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, encode_sb_time);
+#endif
// If a valid partition is found and reconstruction is required for future
// sub-blocks in the same group.
if (part_search_state.found_best_partition && pc_tree->index != 3) {
@@ -3749,6 +3784,9 @@
pc_tree, NULL);
}
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, encode_sb_time);
+#endif
// If the tree still exists (non-superblock), dealloc most nodes, only keep
// nodes for the best partition and PARTITION_NONE.
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index eb06253..aa16d78 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5569,12 +5569,17 @@
int64_t skip_rd[2] = { search_state.best_skip_rd[0],
search_state.best_skip_rd[1] };
int64_t this_yrd = INT64_MAX;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, handle_inter_mode_time);
+#endif
int64_t this_rd = handle_inter_mode(
cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
&this_yrd);
-
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, handle_inter_mode_time);
+#endif
if (sf->inter_sf.prune_comp_search_by_single_result > 0 &&
is_inter_singleref_mode(this_mode)) {
collect_single_states(x, &search_state, mbmi);
@@ -5620,6 +5625,9 @@
x->rdmult, &search_state, compmode_cost);
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
+#endif
if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
// For the single ref winner candidates, evaluate other motion modes (non
// simple translation).
@@ -5628,6 +5636,9 @@
&best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
&search_state, &best_inter_yrd);
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
+#endif
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, do_tx_search_time);
@@ -5646,7 +5657,6 @@
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, handle_intra_mode_time);
#endif
-
// Gate intra mode evaluation if best of inter is skip except when source
// variance is extremely low
if (sf->intra_sf.skip_intra_in_interframe &&
@@ -5707,9 +5717,15 @@
RD_STATS this_rd_cost;
int this_skippable = 0;
if (try_palette) {
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, av1_search_palette_mode_time);
+#endif
this_skippable = av1_search_palette_mode(
&search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
ctx, &this_rd_cost, search_state.best_rd);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, av1_search_palette_mode_time);
+#endif
if (this_rd_cost.rdcost < search_state.best_rd) {
search_state.best_mode_index = THR_DC;
mbmi->mv[0].as_int = 0;
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index bcf762e..1aa308a 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1120,6 +1120,9 @@
int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
const EncodeFrameParams *const frame_params,
const EncodeFrameInput *const frame_input) {
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, av1_tpl_setup_stats_time);
+#endif
AV1_COMMON *cm = &cpi->common;
MultiThreadInfo *const mt_info = &cpi->mt_info;
AV1TplRowMultiThreadInfo *const tpl_row_mt = &mt_info->tpl_row_mt;
@@ -1234,6 +1237,10 @@
(double)mc_dep_cost_base / intra_cost_base;
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, av1_tpl_setup_stats_time);
+#endif
+
// Allow larger GOP size if the base layer ARF has higher dependency factor
// than the intermediate ARF and both ARFs have reasonably high dependency
// factors.