Add more component timing information
Added more component timing information in the encoder under
CONFIG_COLLECT_COMPONENT_TIMING flag.
Change-Id: Ia24a2d2228de5cfccde748b95737b4140396fc7e
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 487b3ac..9c794c7 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5722,6 +5722,10 @@
const int mib_size_log2 = cm->seq_params.mib_size_log2;
const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, encode_sb_time);
+#endif
+
// Initialize the left context for the new SB row
av1_zero_left_context(xd);
@@ -5737,9 +5741,6 @@
// Code each SB in the row
for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
-#if CONFIG_COLLECT_COMPONENT_TIMING
- start_timing(cpi, encode_sb_time);
-#endif
(*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
sb_col_in_tile);
if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
@@ -5873,6 +5874,9 @@
reset_partition(pc_root, sb_size);
x->use_cb_search_range = 0;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, first_partition_search_pass_time);
+#endif
init_first_partition_pass_stats_tables(x->first_partition_pass_stats);
// Do the first pass if we need two pass partition search
if (cpi->sf.two_pass_partition_search &&
@@ -5883,6 +5887,10 @@
first_partition_search_pass(cpi, td, tile_data, mi_row, mi_col, tp);
}
#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, first_partition_search_pass_time);
+#endif
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, rd_pick_partition_time);
#endif
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
@@ -5907,10 +5915,10 @@
}
(*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
sb_col_in_tile, sb_cols_in_tile);
-#if CONFIG_COLLECT_COMPONENT_TIMING
- end_timing(cpi, encode_sb_time);
-#endif
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, encode_sb_time);
+#endif
}
static void init_encode_frame_mb_context(AV1_COMP *cpi) {
@@ -6513,6 +6521,9 @@
x->tx_search_count = 0;
#endif // CONFIG_SPEED_STATS
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, av1_compute_global_motion_time);
+#endif
av1_zero(rdc->global_motion_used);
av1_zero(cpi->gmparams_cost);
if (cpi->common.current_frame.frame_type == INTER_FRAME && cpi->source &&
@@ -6645,8 +6656,17 @@
}
memcpy(cm->cur_frame->global_motion, cm->global_motion,
REF_FRAMES * sizeof(WarpedMotionParams));
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, av1_compute_global_motion_time);
+#endif
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, av1_setup_motion_field_time);
+#endif
av1_setup_motion_field(cm);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, av1_setup_motion_field_time);
+#endif
cpi->all_one_sided_refs =
frame_is_intra_only(cm) ? 0 : av1_refs_are_one_sided(cm);
@@ -6781,20 +6801,6 @@
} else {
encode_frame_internal(cpi);
}
-
-#if CONFIG_COLLECT_COMPONENT_TIMING
- int i;
- fprintf(stderr, "\n Frame number: %d, Frame type: %s,\n",
- cm->current_frame.frame_number,
- get_frame_type_enum(cm->current_frame.frame_type));
- for (i = 0; i < kTimingComponents; i++) {
- cpi->component_time[i] += cpi->frame_component_time[i];
- fprintf(stderr, " %s: %" PRId64 " us (total: %" PRId64 " us)\n",
- get_component_name(i), cpi->frame_component_time[i],
- cpi->component_time[i]);
- cpi->frame_component_time[i] = 0;
- }
-#endif
}
static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 39cb372..40ce05b 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -4028,17 +4028,12 @@
struct loopfilter *lf = &cm->lf;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, loop_filter_time);
+#endif
if (use_loopfilter) {
- struct aom_usec_timer timer;
-
aom_clear_system_state();
-
- aom_usec_timer_start(&timer);
-
av1_pick_filter_level(cpi->source, cpi, cpi->sf.lpf_pick);
-
- aom_usec_timer_mark(&timer);
- cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer);
} else {
lf->filter_level[0] = 0;
lf->filter_level[1] = 0;
@@ -4059,17 +4054,26 @@
#endif
0, num_planes, 0);
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, loop_filter_time);
+#endif
if (use_restoration)
av1_loop_restoration_save_boundary_lines(&cm->cur_frame->buf, cm, 0);
if (use_cdef) {
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, cdef_time);
+#endif
// Find CDEF parameters
av1_cdef_search(&cm->cur_frame->buf, cpi->source, cm, xd,
cpi->sf.fast_cdef_search);
// Apply the filter
av1_cdef_frame(&cm->cur_frame->buf, cm, xd);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, cdef_time);
+#endif
} else {
cm->cdef_info.cdef_bits = 0;
cm->cdef_info.cdef_strengths[0] = 0;
@@ -4079,6 +4083,9 @@
superres_post_encode(cpi);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, loop_restoration_time);
+#endif
if (use_restoration) {
av1_loop_restoration_save_boundary_lines(&cm->cur_frame->buf, cm, 1);
av1_pick_filter_restoration(cpi->source, cpi);
@@ -4098,6 +4105,9 @@
cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, loop_restoration_time);
+#endif
}
static int get_refresh_frame_flags(const AV1_COMP *const cpi) {
@@ -4398,6 +4408,10 @@
int loop = 0;
int overshoot_seen = 0;
int undershoot_seen = 0;
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ printf("\n Encoding a frame:");
+#endif
do {
aom_clear_system_state();
@@ -4462,9 +4476,14 @@
segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
if (allow_recode) save_coding_context(cpi);
-
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, av1_encode_frame_time);
+#endif
// transform / motion compensation build reconstruction frame
av1_encode_frame(cpi);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, av1_encode_frame_time);
+#endif
// Update some stats from cyclic refresh, and check if we should not update
// golden reference, for 1 pass CBR.
@@ -4516,6 +4535,9 @@
++cpi->tot_recode_hits;
#endif
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ if (loop) printf("\n Recoding:");
+#endif
} while (loop);
return AOM_CODEC_OK;
@@ -4815,6 +4837,10 @@
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
struct segmentation *const seg = &cm->seg;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, encode_frame_to_data_rate_time);
+#endif
+
// frame type has been decided outside of this function call
cm->cur_frame->frame_type = current_frame->frame_type;
@@ -4987,8 +5013,14 @@
}
cm->timing_info_present &= !seq_params->reduced_still_picture_hdr;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, encode_with_recode_loop_time);
+#endif
if (encode_with_recode_loop(cpi, size, dest) != AOM_CODEC_OK)
return AOM_CODEC_ERROR;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, encode_with_recode_loop_time);
+#endif
#ifdef OUTPUT_YUV_SKINMAP
if (cpi->common.current_frame.frame_number > 1) {
@@ -5047,8 +5079,14 @@
finalize_encoded_frame(cpi);
// Build the bitstream
int largest_tile_id = 0; // Output from pack_bitstream
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, av1_pack_bitstream_final_time);
+#endif
if (av1_pack_bitstream(cpi, dest, size, &largest_tile_id) != AOM_CODEC_OK)
return AOM_CODEC_ERROR;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, av1_pack_bitstream_final_time);
+#endif
cpi->seq_params_locked = 1;
@@ -5109,6 +5147,23 @@
#endif // EXT_TILE_DEBUG
#undef EXT_TILE_DEBUG
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, encode_frame_to_data_rate_time);
+
+ // Print out timing information.
+ int i;
+ fprintf(stderr, "\n Frame number: %d, Frame type: %s,\n",
+ cm->current_frame.frame_number,
+ get_frame_type_enum(cm->current_frame.frame_type));
+ for (i = 0; i < kTimingComponents; i++) {
+ cpi->component_time[i] += cpi->frame_component_time[i];
+ fprintf(stderr, " %s: %" PRId64 " us (total: %" PRId64 " us)\n",
+ get_component_name(i), cpi->frame_component_time[i],
+ cpi->component_time[i]);
+ cpi->frame_component_time[i] = 0;
+ }
+#endif
+
cm->last_frame_type = current_frame->frame_type;
av1_rc_postencode_update(cpi, *size);
@@ -5234,7 +5289,6 @@
int64_t end_time) {
AV1_COMMON *const cm = &cpi->common;
const SequenceHeader *const seq_params = &cm->seq_params;
- struct aom_usec_timer timer;
int res = 0;
const int subsampling_x = sd->subsampling_x;
const int subsampling_y = sd->subsampling_y;
@@ -5242,8 +5296,10 @@
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
+#if CONFIG_INTERNAL_STATS
+ struct aom_usec_timer timer;
aom_usec_timer_start(&timer);
-
+#endif
#if CONFIG_DENOISE
if (cpi->oxcf.noise_level > 0)
if (apply_denoise_2d(cpi, sd, cpi->oxcf.noise_block_size,
@@ -5254,9 +5310,10 @@
if (av1_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
use_highbitdepth, frame_flags))
res = -1;
+#if CONFIG_INTERNAL_STATS
aom_usec_timer_mark(&timer);
cpi->time_receive_data += aom_usec_timer_elapsed(&timer);
-
+#endif
if ((seq_params->profile == PROFILE_0) && !seq_params->monochrome &&
(subsampling_x != 1 || subsampling_y != 1)) {
aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
@@ -5393,7 +5450,6 @@
const aom_rational_t *timebase) {
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
AV1_COMMON *const cm = &cpi->common;
- struct aom_usec_timer cmptimer;
#if CONFIG_BITSTREAM_DEBUG
assert(cpi->oxcf.max_threads == 0 &&
@@ -5409,8 +5465,10 @@
cm->showable_frame = 0;
*size = 0;
+#if CONFIG_INTERNAL_STATS
+ struct aom_usec_timer cmptimer;
aom_usec_timer_start(&cmptimer);
-
+#endif
set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV, 0);
// Normal defaults
@@ -5433,10 +5491,10 @@
// Returning -1 indicates no frame encoded; more input is required
return -1;
}
-
+#if CONFIG_INTERNAL_STATS
aom_usec_timer_mark(&cmptimer);
cpi->time_compress_data += aom_usec_timer_elapsed(&cmptimer);
-
+#endif
if (cpi->b_calculate_psnr) {
if (cm->show_existing_frame || (oxcf->pass != 1 && cm->show_frame)) {
generate_psnr_packet(cpi);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 794b31f..0be5037 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -638,25 +638,61 @@
#include "aom_ports/aom_timer.h"
// Adjust the following to add new components.
enum {
+ encode_frame_to_data_rate_time,
+ encode_with_recode_loop_time,
+ loop_filter_time,
+ cdef_time,
+ loop_restoration_time,
+ av1_pack_bitstream_final_time,
+ av1_encode_frame_time,
+ av1_compute_global_motion_time,
+ av1_setup_motion_field_time,
encode_sb_time,
+ first_partition_search_pass_time,
rd_pick_partition_time,
rd_pick_sb_modes_time,
av1_rd_pick_intra_mode_sb_time,
av1_rd_pick_inter_mode_sb_time,
+ handle_intra_mode_time,
handle_inter_mode_time,
+ do_tx_search_time,
+ handle_newmv_time,
+ compound_type_rd_time,
+ interpolation_filter_search_time,
motion_mode_rd_time,
kTimingComponents,
} UENUM1BYTE(TIMING_COMPONENT);
static INLINE char const *get_component_name(int index) {
switch (index) {
- case 0: return "encode_sb_time";
- case 1: return "rd_pick_partition_time";
- case 2: return "rd_pick_sb_modes_time";
- case 3: return "av1_rd_pick_intra_mode_sb_time";
- case 4: return "av1_rd_pick_inter_mode_sb_time";
- case 5: return "handle_inter_mode_time";
- case 6: return "motion_mode_rd_time";
+ case encode_frame_to_data_rate_time:
+ return "encode_frame_to_data_rate_time";
+ case encode_with_recode_loop_time: return "encode_with_recode_loop_time";
+ case loop_filter_time: return "loop_filter_time";
+ case cdef_time: return "cdef_time";
+ case loop_restoration_time: return "loop_restoration_time";
+ case av1_pack_bitstream_final_time: return "av1_pack_bitstream_final_time";
+ case av1_encode_frame_time: return "av1_encode_frame_time";
+ case av1_compute_global_motion_time:
+ return "av1_compute_global_motion_time";
+ case av1_setup_motion_field_time: return "av1_setup_motion_field_time";
+ case encode_sb_time: return "encode_sb_time";
+ case first_partition_search_pass_time:
+ return "first_partition_search_pass_time";
+ case rd_pick_partition_time: return "rd_pick_partition_time";
+ case rd_pick_sb_modes_time: return "rd_pick_sb_modes_time";
+ case av1_rd_pick_intra_mode_sb_time:
+ return "av1_rd_pick_intra_mode_sb_time";
+ case av1_rd_pick_inter_mode_sb_time:
+ return "av1_rd_pick_inter_mode_sb_time";
+ case handle_intra_mode_time: return "handle_intra_mode_time";
+ case handle_inter_mode_time: return "handle_inter_mode_time";
+ case do_tx_search_time: return "do_tx_search_time";
+ case handle_newmv_time: return "handle_newmv_time";
+ case compound_type_rd_time: return "compound_type_rd_time";
+ case interpolation_filter_search_time:
+ return "interpolation_filter_search_time";
+ case motion_mode_rd_time: return "motion_mode_rd_time";
default: assert(0);
}
return "error";
@@ -795,9 +831,11 @@
fractional_mv_step_fp *find_fractional_mv_step;
av1_diamond_search_fn_t diamond_search_sad;
aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL];
+
+#if CONFIG_INTERNAL_STATS
uint64_t time_receive_data;
uint64_t time_compress_data;
- uint64_t time_pick_lpf;
+#endif
#if CONFIG_FP_MB_STATS
int use_fp_mb_stats;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0fdaa85..3f72773 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -10200,6 +10200,9 @@
rate_mv = backup_rate_mv;
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, handle_newmv_time);
+#endif
if (cpi->sf.prune_single_motion_modes_by_simple_trans &&
args->single_ref_first_pass == 0 && !is_comp_pred) {
const int ref0 = mbmi->ref_frame[0];
@@ -10216,6 +10219,9 @@
backup_mv[1] = cur_mv[1];
backup_rate_mv = rate_mv;
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, handle_newmv_time);
+#endif
if (newmv_ret_val != 0) {
continue;
@@ -10310,6 +10316,9 @@
continue;
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, compound_type_rd_time);
+#endif
int skip_build_pred = 0;
if (is_comp_pred) {
if (mode_search_mask[comp_loop_idx] == (1 << COMPOUND_AVERAGE)) {
@@ -10377,11 +10386,20 @@
}
}
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, compound_type_rd_time);
+#endif
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, interpolation_filter_search_time);
+#endif
ret_val = interpolation_filter_search(
x, cpi, tile_data, bsize, mi_row, mi_col, &tmp_dst, &orig_dst,
args->single_filter, &rd, &rs, &skip_txfm_sb, &skip_sse_sb,
&skip_build_pred, args, ref_best_rd);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, interpolation_filter_search_time);
+#endif
if (args->modelled_rd != NULL && !is_comp_pred) {
args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
}
@@ -12772,6 +12790,9 @@
release_compound_type_rd_buffers(&rd_buffers);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, do_tx_search_time);
+#endif
if (!do_tx_search) {
inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
search_state.best_rd = INT64_MAX;
@@ -12844,7 +12865,13 @@
}
}
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, do_tx_search_time);
+#endif
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, handle_intra_mode_time);
+#endif
for (int j = 0; j < intra_mode_num; ++j) {
const int mode_index = intra_mode_idx_ls[j];
const MV_REFERENCE_FRAME ref_frame =
@@ -12887,6 +12914,10 @@
#endif
}
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, handle_intra_mode_time);
+#endif
+
// In effect only when speed >= 2.
sf_refine_fast_tx_type_search(
cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, search_state.best_mode_index,