Add timers to collect encoding component timing information Added timers to collect encoding component timing information, which was under a config flag. More components could be added. Change-Id: I9f40ae86ad46c46309151002d4e313a9ac0e213b
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index 417325a..e5bf57c 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -530,6 +530,10 @@ const DELTAQ_MODE deltaq_mode = cpi->oxcf.deltaq_mode; int i, orig_rdmult; +#if CONFIG_COLLECT_COMPONENT_TIMING + start_timing(cpi, rd_pick_sb_modes_time); +#endif + if (best_rd < 0) { ctx->rdcost = INT64_MAX; ctx->skip = 0; @@ -636,9 +640,18 @@ // Find best coding mode & reconstruct the MB so it is available // as a predictor for MBs that follow in the SB if (frame_is_intra_only(cm)) { +#if CONFIG_COLLECT_COMPONENT_TIMING + start_timing(cpi, av1_rd_pick_intra_mode_sb_time); +#endif av1_rd_pick_intra_mode_sb(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, best_rd); +#if CONFIG_COLLECT_COMPONENT_TIMING + end_timing(cpi, av1_rd_pick_intra_mode_sb_time); +#endif } else { +#if CONFIG_COLLECT_COMPONENT_TIMING + start_timing(cpi, av1_rd_pick_inter_mode_sb_time); +#endif if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col, rd_cost, bsize, ctx, best_rd); @@ -659,6 +672,9 @@ ctx->seg_feat = 0; #endif } +#if CONFIG_COLLECT_COMPONENT_TIMING + end_timing(cpi, av1_rd_pick_inter_mode_sb_time); +#endif } // Examine the resulting rate and for AQ mode 2 make a segment choice. @@ -679,6 +695,10 @@ ctx->rate = rd_cost->rate; ctx->dist = rd_cost->dist; ctx->rdcost = rd_cost->rdcost; + +#if CONFIG_COLLECT_COMPONENT_TIMING + end_timing(cpi, rd_pick_sb_modes_time); +#endif } static void update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts, @@ -5731,6 +5751,9 @@ // Code each SB in the row for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0; mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) { +#if CONFIG_COLLECT_COMPONENT_TIMING + start_timing(cpi, encode_sb_time); +#endif (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, sb_col_in_tile); if (tile_data->allow_update_cdf && (cpi->row_mt == 1) && @@ -5872,10 +5895,15 @@ cm->current_frame.frame_type != KEY_FRAME) { first_partition_search_pass(cpi, td, tile_data, mi_row, mi_col, tp); } - +#if CONFIG_COLLECT_COMPONENT_TIMING + start_timing(cpi, rd_pick_partition_time); +#endif rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sb_size, BLOCK_4X4, &dummy_rdc, INT64_MAX, pc_root, NULL); +#if CONFIG_COLLECT_COMPONENT_TIMING + end_timing(cpi, rd_pick_partition_time); +#endif } // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile. if (cpi->sf.inter_mode_rd_model_estimation == 1 && cm->tile_cols == 1 && @@ -5892,6 +5920,9 @@ } (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, sb_col_in_tile, sb_cols_in_tile); +#if CONFIG_COLLECT_COMPONENT_TIMING + end_timing(cpi, encode_sb_time); +#endif } } @@ -6765,6 +6796,20 @@ } else { encode_frame_internal(cpi); } + +#if CONFIG_COLLECT_COMPONENT_TIMING + int i; + fprintf(stderr, "\n Frame number: %d, Frame type: %s,\n", + cm->current_frame.frame_number, + get_frame_type_enum(cm->current_frame.frame_type)); + for (i = 0; i < kTimingComponents; i++) { + cpi->component_time[i] += cpi->frame_component_time[i]; + fprintf(stderr, " %s: %" PRId64 " us (total: %" PRId64 " us)\n", + get_component_name(i), cpi->frame_component_time[i], + cpi->component_time[i]); + cpi->frame_component_time[i] = 0; + } +#endif } static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index 02d5fac..fab81ff 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h
@@ -628,6 +628,35 @@ } PartitionStats; #endif +#if CONFIG_COLLECT_COMPONENT_TIMING +#include "aom_ports/aom_timer.h" +// Adjust the following to add new components. +enum { + encode_sb_time, + rd_pick_partition_time, + rd_pick_sb_modes_time, + av1_rd_pick_intra_mode_sb_time, + av1_rd_pick_inter_mode_sb_time, + handle_inter_mode_time, + motion_mode_rd_time, + kTimingComponents, +} UENUM1BYTE(TIMING_COMPONENT); + +static INLINE char const *get_component_name(int index) { + switch (index) { + case 0: return "encode_sb_time"; + case 1: return "rd_pick_partition_time"; + case 2: return "rd_pick_sb_modes_time"; + case 3: return "av1_rd_pick_intra_mode_sb_time"; + case 4: return "av1_rd_pick_inter_mode_sb_time"; + case 5: return "handle_inter_mode_time"; + case 6: return "motion_mode_rd_time"; + default: assert(0); + } + return "error"; +} +#endif + typedef struct AV1_COMP { QUANTS quants; ThreadData td; @@ -911,6 +940,14 @@ #if CONFIG_COLLECT_PARTITION_STATS PartitionStats partition_stats; #endif + +#if CONFIG_COLLECT_COMPONENT_TIMING + // component_time[] are initialized to zero while encoder starts. + uint64_t component_time[kTimingComponents]; + struct aom_usec_timer component_timer[kTimingComponents]; + // frame_component_time[] are initialized to zero at beginning of each frame. + uint64_t frame_component_time[kTimingComponents]; +#endif } AV1_COMP; typedef struct { @@ -1244,6 +1281,27 @@ } #endif +#if CONFIG_COLLECT_COMPONENT_TIMING +static INLINE void start_timing(AV1_COMP *cpi, int component) { + aom_usec_timer_start(&cpi->component_timer[component]); +} +static INLINE void end_timing(AV1_COMP *cpi, int component) { + aom_usec_timer_mark(&cpi->component_timer[component]); + cpi->frame_component_time[component] += + aom_usec_timer_elapsed(&cpi->component_timer[component]); +} +static INLINE char const *get_frame_type_enum(int type) { + switch (type) { + case 0: return "KEY_FRAME"; + case 1: return "INTER_FRAME"; + case 2: return "INTRA_ONLY_FRAME"; + case 3: return "S_FRAME"; + default: assert(0); + } + return "error"; +} +#endif + #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 2550d7b..b9f6c82 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -9948,7 +9948,7 @@ } inter_mode_info; static int64_t handle_inter_mode( - const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x, + AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x, BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, int *disable_skip, int mi_row, int mi_col, HandleInterModeArgs *args, int64_t ref_best_rd, uint8_t *const tmp_buf, @@ -10332,10 +10332,16 @@ rd_stats_y->dist = plane_dist[0]; rd_stats_uv->dist = plane_dist[1] + plane_dist[2]; } else { +#if CONFIG_COLLECT_COMPONENT_TIMING + start_timing(cpi, motion_mode_rd_time); +#endif ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, disable_skip, mi_row, mi_col, args, ref_best_rd, refs, &rate_mv, &orig_dst, best_est_rd, do_tx_search, inter_modes_info); +#if CONFIG_COLLECT_COMPONENT_TIMING + end_timing(cpi, motion_mode_rd_time); +#endif } mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int; mode_info[ref_mv_idx].rate_mv = rate_mv; @@ -12523,10 +12529,17 @@ if (midx < MAX_SINGLE_REF_MODES) { args.simple_rd_state = x->simple_rd_state[midx]; } + +#if CONFIG_COLLECT_COMPONENT_TIMING + start_timing(cpi, handle_inter_mode_time); +#endif this_rd = handle_inter_mode( cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &disable_skip, mi_row, mi_col, &args, ref_best_rd, tmp_buf, &rd_buffers, &best_est_rd, do_tx_search, inter_modes_info); +#if CONFIG_COLLECT_COMPONENT_TIMING + end_timing(cpi, handle_inter_mode_time); +#endif rate2 = rd_stats.rate; skippable = rd_stats.skip; distortion2 = rd_stats.dist;
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake index b1c835c..13e2d0b 100644 --- a/build/cmake/aom_config_defaults.cmake +++ b/build/cmake/aom_config_defaults.cmake
@@ -133,7 +133,8 @@ "Disable full_pixel_motion_search_based_split on BLOCK_8X8.") set_aom_config_var(CONFIG_COLLECT_PARTITION_STATS 0 NUMBER "Collect stats on partition decisions.") - +set_aom_config_var(CONFIG_COLLECT_COMPONENT_TIMING 0 NUMBER + "Collect encoding component timing information.") # # Variables in this section control optional features of the build system. #