Add timers to collect encoding component timing information

Added timers to collect encoding component timing information, which
was under a config flag. More components could be added.

Change-Id: I9f40ae86ad46c46309151002d4e313a9ac0e213b
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 417325a..e5bf57c 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -530,6 +530,10 @@
   const DELTAQ_MODE deltaq_mode = cpi->oxcf.deltaq_mode;
   int i, orig_rdmult;
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, rd_pick_sb_modes_time);
+#endif
+
   if (best_rd < 0) {
     ctx->rdcost = INT64_MAX;
     ctx->skip = 0;
@@ -636,9 +640,18 @@
   // Find best coding mode & reconstruct the MB so it is available
   // as a predictor for MBs that follow in the SB
   if (frame_is_intra_only(cm)) {
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    start_timing(cpi, av1_rd_pick_intra_mode_sb_time);
+#endif
     av1_rd_pick_intra_mode_sb(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx,
                               best_rd);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    end_timing(cpi, av1_rd_pick_intra_mode_sb_time);
+#endif
   } else {
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    start_timing(cpi, av1_rd_pick_inter_mode_sb_time);
+#endif
     if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
       av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
                                          rd_cost, bsize, ctx, best_rd);
@@ -659,6 +672,9 @@
       ctx->seg_feat = 0;
 #endif
     }
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    end_timing(cpi, av1_rd_pick_inter_mode_sb_time);
+#endif
   }
 
   // Examine the resulting rate and for AQ mode 2 make a segment choice.
@@ -679,6 +695,10 @@
   ctx->rate = rd_cost->rate;
   ctx->dist = rd_cost->dist;
   ctx->rdcost = rd_cost->rdcost;
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, rd_pick_sb_modes_time);
+#endif
 }
 
 static void update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts,
@@ -5731,6 +5751,9 @@
   // Code each SB in the row
   for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
        mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    start_timing(cpi, encode_sb_time);
+#endif
     (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
                                    sb_col_in_tile);
     if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
@@ -5872,10 +5895,15 @@
           cm->current_frame.frame_type != KEY_FRAME) {
         first_partition_search_pass(cpi, td, tile_data, mi_row, mi_col, tp);
       }
-
+#if CONFIG_COLLECT_COMPONENT_TIMING
+      start_timing(cpi, rd_pick_partition_time);
+#endif
       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
                         sb_size, BLOCK_4X4, &dummy_rdc, INT64_MAX, pc_root,
                         NULL);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+      end_timing(cpi, rd_pick_partition_time);
+#endif
     }
     // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
     if (cpi->sf.inter_mode_rd_model_estimation == 1 && cm->tile_cols == 1 &&
@@ -5892,6 +5920,9 @@
     }
     (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
                                     sb_col_in_tile, sb_cols_in_tile);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    end_timing(cpi, encode_sb_time);
+#endif
   }
 }
 
@@ -6765,6 +6796,20 @@
   } else {
     encode_frame_internal(cpi);
   }
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  int i;
+  fprintf(stderr, "\n Frame number: %d, Frame type: %s,\n",
+          cm->current_frame.frame_number,
+          get_frame_type_enum(cm->current_frame.frame_type));
+  for (i = 0; i < kTimingComponents; i++) {
+    cpi->component_time[i] += cpi->frame_component_time[i];
+    fprintf(stderr, " %s:  %" PRId64 " us (total: %" PRId64 " us)\n",
+            get_component_name(i), cpi->frame_component_time[i],
+            cpi->component_time[i]);
+    cpi->frame_component_time[i] = 0;
+  }
+#endif
 }
 
 static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 02d5fac..fab81ff 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -628,6 +628,35 @@
 } PartitionStats;
 #endif
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+#include "aom_ports/aom_timer.h"
+// Adjust the following to add new components.
+enum {
+  encode_sb_time,
+  rd_pick_partition_time,
+  rd_pick_sb_modes_time,
+  av1_rd_pick_intra_mode_sb_time,
+  av1_rd_pick_inter_mode_sb_time,
+  handle_inter_mode_time,
+  motion_mode_rd_time,
+  kTimingComponents,
+} UENUM1BYTE(TIMING_COMPONENT);
+
+static INLINE char const *get_component_name(int index) {
+  switch (index) {
+    case 0: return "encode_sb_time";
+    case 1: return "rd_pick_partition_time";
+    case 2: return "rd_pick_sb_modes_time";
+    case 3: return "av1_rd_pick_intra_mode_sb_time";
+    case 4: return "av1_rd_pick_inter_mode_sb_time";
+    case 5: return "handle_inter_mode_time";
+    case 6: return "motion_mode_rd_time";
+    default: assert(0);
+  }
+  return "error";
+}
+#endif
+
 typedef struct AV1_COMP {
   QUANTS quants;
   ThreadData td;
@@ -911,6 +940,14 @@
 #if CONFIG_COLLECT_PARTITION_STATS
   PartitionStats partition_stats;
 #endif
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  // component_time[] are initialized to zero while encoder starts.
+  uint64_t component_time[kTimingComponents];
+  struct aom_usec_timer component_timer[kTimingComponents];
+  // frame_component_time[] are initialized to zero at beginning of each frame.
+  uint64_t frame_component_time[kTimingComponents];
+#endif
 } AV1_COMP;
 
 typedef struct {
@@ -1244,6 +1281,27 @@
 }
 #endif
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+static INLINE void start_timing(AV1_COMP *cpi, int component) {
+  aom_usec_timer_start(&cpi->component_timer[component]);
+}
+static INLINE void end_timing(AV1_COMP *cpi, int component) {
+  aom_usec_timer_mark(&cpi->component_timer[component]);
+  cpi->frame_component_time[component] +=
+      aom_usec_timer_elapsed(&cpi->component_timer[component]);
+}
+static INLINE char const *get_frame_type_enum(int type) {
+  switch (type) {
+    case 0: return "KEY_FRAME";
+    case 1: return "INTER_FRAME";
+    case 2: return "INTRA_ONLY_FRAME";
+    case 3: return "S_FRAME";
+    default: assert(0);
+  }
+  return "error";
+}
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 2550d7b..b9f6c82 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9948,7 +9948,7 @@
 } inter_mode_info;
 
 static int64_t handle_inter_mode(
-    const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
+    AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
     RD_STATS *rd_stats_uv, int *disable_skip, int mi_row, int mi_col,
     HandleInterModeArgs *args, int64_t ref_best_rd, uint8_t *const tmp_buf,
@@ -10332,10 +10332,16 @@
         rd_stats_y->dist = plane_dist[0];
         rd_stats_uv->dist = plane_dist[1] + plane_dist[2];
       } else {
+#if CONFIG_COLLECT_COMPONENT_TIMING
+        start_timing(cpi, motion_mode_rd_time);
+#endif
         ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
                                  rd_stats_uv, disable_skip, mi_row, mi_col,
                                  args, ref_best_rd, refs, &rate_mv, &orig_dst,
                                  best_est_rd, do_tx_search, inter_modes_info);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+        end_timing(cpi, motion_mode_rd_time);
+#endif
       }
       mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int;
       mode_info[ref_mv_idx].rate_mv = rate_mv;
@@ -12523,10 +12529,17 @@
         if (midx < MAX_SINGLE_REF_MODES) {
           args.simple_rd_state = x->simple_rd_state[midx];
         }
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
+        start_timing(cpi, handle_inter_mode_time);
+#endif
         this_rd = handle_inter_mode(
             cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
             &disable_skip, mi_row, mi_col, &args, ref_best_rd, tmp_buf,
             &rd_buffers, &best_est_rd, do_tx_search, inter_modes_info);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+        end_timing(cpi, handle_inter_mode_time);
+#endif
         rate2 = rd_stats.rate;
         skippable = rd_stats.skip;
         distortion2 = rd_stats.dist;
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index b1c835c..13e2d0b 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -133,7 +133,8 @@
                    "Disable full_pixel_motion_search_based_split on BLOCK_8X8.")
 set_aom_config_var(CONFIG_COLLECT_PARTITION_STATS 0 NUMBER
                    "Collect stats on partition decisions.")
-
+set_aom_config_var(CONFIG_COLLECT_COMPONENT_TIMING 0 NUMBER
+                   "Collect encoding component timing information.")
 #
 # Variables in this section control optional features of the build system.
 #