Add more component timing information Added more component timing information in the encoder under CONFIG_COLLECT_COMPONENT_TIMING flag. Change-Id: Ia24a2d2228de5cfccde748b95737b4140396fc7e

commit: d1f32e6a82fdcb9c0734f5d555d2a9cd0cdf32d8 [log] [tgz]
author: Yunqing Wang <yunqingwang@google.com> Wed Feb 20 10:37:51 2019 -0800
committer: Yunqing Wang <yunqingwang@google.com> Thu Feb 21 02:16:52 2019 +0000
tree: 0a2e44a5893c331bf3bb08924f42927416fe78ff
parent: 5758a2a169d5e7a8e72b8fc5022aa4d964ebbfb5 [diff]
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 487b3ac..9c794c7 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c

@@ -5722,6 +5722,10 @@
   const int mib_size_log2 = cm->seq_params.mib_size_log2;
   const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, encode_sb_time);
+#endif
+
   // Initialize the left context for the new SB row
   av1_zero_left_context(xd);
 
@@ -5737,9 +5741,6 @@
   // Code each SB in the row
   for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
        mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
-#if CONFIG_COLLECT_COMPONENT_TIMING
-    start_timing(cpi, encode_sb_time);
-#endif
     (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
                                    sb_col_in_tile);
     if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
@@ -5873,6 +5874,9 @@
 
       reset_partition(pc_root, sb_size);
       x->use_cb_search_range = 0;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+      start_timing(cpi, first_partition_search_pass_time);
+#endif
       init_first_partition_pass_stats_tables(x->first_partition_pass_stats);
       // Do the first pass if we need two pass partition search
       if (cpi->sf.two_pass_partition_search &&
@@ -5883,6 +5887,10 @@
         first_partition_search_pass(cpi, td, tile_data, mi_row, mi_col, tp);
       }
 #if CONFIG_COLLECT_COMPONENT_TIMING
+      end_timing(cpi, first_partition_search_pass_time);
+#endif
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
       start_timing(cpi, rd_pick_partition_time);
 #endif
       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
@@ -5907,10 +5915,10 @@
     }
     (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
                                     sb_col_in_tile, sb_cols_in_tile);
-#if CONFIG_COLLECT_COMPONENT_TIMING
-    end_timing(cpi, encode_sb_time);
-#endif
   }
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, encode_sb_time);
+#endif
 }
 
 static void init_encode_frame_mb_context(AV1_COMP *cpi) {
@@ -6513,6 +6521,9 @@
   x->tx_search_count = 0;
 #endif  // CONFIG_SPEED_STATS
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, av1_compute_global_motion_time);
+#endif
   av1_zero(rdc->global_motion_used);
   av1_zero(cpi->gmparams_cost);
   if (cpi->common.current_frame.frame_type == INTER_FRAME && cpi->source &&
@@ -6645,8 +6656,17 @@
   }
   memcpy(cm->cur_frame->global_motion, cm->global_motion,
          REF_FRAMES * sizeof(WarpedMotionParams));
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, av1_compute_global_motion_time);
+#endif
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, av1_setup_motion_field_time);
+#endif
   av1_setup_motion_field(cm);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, av1_setup_motion_field_time);
+#endif
 
   cpi->all_one_sided_refs =
       frame_is_intra_only(cm) ? 0 : av1_refs_are_one_sided(cm);
@@ -6781,20 +6801,6 @@
   } else {
     encode_frame_internal(cpi);
   }
-
-#if CONFIG_COLLECT_COMPONENT_TIMING
-  int i;
-  fprintf(stderr, "\n Frame number: %d, Frame type: %s,\n",
-          cm->current_frame.frame_number,
-          get_frame_type_enum(cm->current_frame.frame_type));
-  for (i = 0; i < kTimingComponents; i++) {
-    cpi->component_time[i] += cpi->frame_component_time[i];
-    fprintf(stderr, " %s:  %" PRId64 " us (total: %" PRId64 " us)\n",
-            get_component_name(i), cpi->frame_component_time[i],
-            cpi->component_time[i]);
-    cpi->frame_component_time[i] = 0;
-  }
-#endif
 }
 
 static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,

diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 39cb372..40ce05b 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c

@@ -4028,17 +4028,12 @@
 
   struct loopfilter *lf = &cm->lf;
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, loop_filter_time);
+#endif
   if (use_loopfilter) {
-    struct aom_usec_timer timer;
-
     aom_clear_system_state();
-
-    aom_usec_timer_start(&timer);
-
     av1_pick_filter_level(cpi->source, cpi, cpi->sf.lpf_pick);
-
-    aom_usec_timer_mark(&timer);
-    cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer);
   } else {
     lf->filter_level[0] = 0;
     lf->filter_level[1] = 0;
@@ -4059,17 +4054,26 @@
 #endif
                             0, num_planes, 0);
   }
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, loop_filter_time);
+#endif
 
   if (use_restoration)
     av1_loop_restoration_save_boundary_lines(&cm->cur_frame->buf, cm, 0);
 
   if (use_cdef) {
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    start_timing(cpi, cdef_time);
+#endif
     // Find CDEF parameters
     av1_cdef_search(&cm->cur_frame->buf, cpi->source, cm, xd,
                     cpi->sf.fast_cdef_search);
 
     // Apply the filter
     av1_cdef_frame(&cm->cur_frame->buf, cm, xd);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    end_timing(cpi, cdef_time);
+#endif
   } else {
     cm->cdef_info.cdef_bits = 0;
     cm->cdef_info.cdef_strengths[0] = 0;
@@ -4079,6 +4083,9 @@
 
   superres_post_encode(cpi);
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, loop_restoration_time);
+#endif
   if (use_restoration) {
     av1_loop_restoration_save_boundary_lines(&cm->cur_frame->buf, cm, 1);
     av1_pick_filter_restoration(cpi->source, cpi);
@@ -4098,6 +4105,9 @@
     cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
   }
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, loop_restoration_time);
+#endif
 }
 
 static int get_refresh_frame_flags(const AV1_COMP *const cpi) {
@@ -4398,6 +4408,10 @@
   int loop = 0;
   int overshoot_seen = 0;
   int undershoot_seen = 0;
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  printf("\n Encoding a frame:");
+#endif
   do {
     aom_clear_system_state();
 
@@ -4462,9 +4476,14 @@
     segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
 
     if (allow_recode) save_coding_context(cpi);
-
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    start_timing(cpi, av1_encode_frame_time);
+#endif
     // transform / motion compensation build reconstruction frame
     av1_encode_frame(cpi);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    end_timing(cpi, av1_encode_frame_time);
+#endif
 
     // Update some stats from cyclic refresh, and check if we should not update
     // golden reference, for 1 pass CBR.
@@ -4516,6 +4535,9 @@
       ++cpi->tot_recode_hits;
 #endif
     }
+#if CONFIG_COLLECT_COMPONENT_TIMING
+    if (loop) printf("\n Recoding:");
+#endif
   } while (loop);
 
   return AOM_CODEC_OK;
@@ -4815,6 +4837,10 @@
   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
   struct segmentation *const seg = &cm->seg;
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, encode_frame_to_data_rate_time);
+#endif
+
   // frame type has been decided outside of this function call
   cm->cur_frame->frame_type = current_frame->frame_type;
 
@@ -4987,8 +5013,14 @@
   }
   cm->timing_info_present &= !seq_params->reduced_still_picture_hdr;
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, encode_with_recode_loop_time);
+#endif
   if (encode_with_recode_loop(cpi, size, dest) != AOM_CODEC_OK)
     return AOM_CODEC_ERROR;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, encode_with_recode_loop_time);
+#endif
 
 #ifdef OUTPUT_YUV_SKINMAP
   if (cpi->common.current_frame.frame_number > 1) {
@@ -5047,8 +5079,14 @@
   finalize_encoded_frame(cpi);
   // Build the bitstream
   int largest_tile_id = 0;  // Output from pack_bitstream
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, av1_pack_bitstream_final_time);
+#endif
   if (av1_pack_bitstream(cpi, dest, size, &largest_tile_id) != AOM_CODEC_OK)
     return AOM_CODEC_ERROR;
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, av1_pack_bitstream_final_time);
+#endif
 
   cpi->seq_params_locked = 1;
 
@@ -5109,6 +5147,23 @@
 #endif  // EXT_TILE_DEBUG
 #undef EXT_TILE_DEBUG
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, encode_frame_to_data_rate_time);
+
+  // Print out timing information.
+  int i;
+  fprintf(stderr, "\n Frame number: %d, Frame type: %s,\n",
+          cm->current_frame.frame_number,
+          get_frame_type_enum(cm->current_frame.frame_type));
+  for (i = 0; i < kTimingComponents; i++) {
+    cpi->component_time[i] += cpi->frame_component_time[i];
+    fprintf(stderr, " %s:  %" PRId64 " us (total: %" PRId64 " us)\n",
+            get_component_name(i), cpi->frame_component_time[i],
+            cpi->component_time[i]);
+    cpi->frame_component_time[i] = 0;
+  }
+#endif
+
   cm->last_frame_type = current_frame->frame_type;
 
   av1_rc_postencode_update(cpi, *size);
@@ -5234,7 +5289,6 @@
                           int64_t end_time) {
   AV1_COMMON *const cm = &cpi->common;
   const SequenceHeader *const seq_params = &cm->seq_params;
-  struct aom_usec_timer timer;
   int res = 0;
   const int subsampling_x = sd->subsampling_x;
   const int subsampling_y = sd->subsampling_y;
@@ -5242,8 +5296,10 @@
 
   check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
 
+#if CONFIG_INTERNAL_STATS
+  struct aom_usec_timer timer;
   aom_usec_timer_start(&timer);
-
+#endif
 #if CONFIG_DENOISE
   if (cpi->oxcf.noise_level > 0)
     if (apply_denoise_2d(cpi, sd, cpi->oxcf.noise_block_size,
@@ -5254,9 +5310,10 @@
   if (av1_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
                          use_highbitdepth, frame_flags))
     res = -1;
+#if CONFIG_INTERNAL_STATS
   aom_usec_timer_mark(&timer);
   cpi->time_receive_data += aom_usec_timer_elapsed(&timer);
-
+#endif
   if ((seq_params->profile == PROFILE_0) && !seq_params->monochrome &&
       (subsampling_x != 1 || subsampling_y != 1)) {
     aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
@@ -5393,7 +5450,6 @@
                             const aom_rational_t *timebase) {
   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
   AV1_COMMON *const cm = &cpi->common;
-  struct aom_usec_timer cmptimer;
 
 #if CONFIG_BITSTREAM_DEBUG
   assert(cpi->oxcf.max_threads == 0 &&
@@ -5409,8 +5465,10 @@
 
   cm->showable_frame = 0;
   *size = 0;
+#if CONFIG_INTERNAL_STATS
+  struct aom_usec_timer cmptimer;
   aom_usec_timer_start(&cmptimer);
-
+#endif
   set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV, 0);
 
   // Normal defaults
@@ -5433,10 +5491,10 @@
     // Returning -1 indicates no frame encoded; more input is required
     return -1;
   }
-
+#if CONFIG_INTERNAL_STATS
   aom_usec_timer_mark(&cmptimer);
   cpi->time_compress_data += aom_usec_timer_elapsed(&cmptimer);
-
+#endif
   if (cpi->b_calculate_psnr) {
     if (cm->show_existing_frame || (oxcf->pass != 1 && cm->show_frame)) {
       generate_psnr_packet(cpi);

diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 794b31f..0be5037 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h

@@ -638,25 +638,61 @@
 #include "aom_ports/aom_timer.h"
 // Adjust the following to add new components.
 enum {
+  encode_frame_to_data_rate_time,
+  encode_with_recode_loop_time,
+  loop_filter_time,
+  cdef_time,
+  loop_restoration_time,
+  av1_pack_bitstream_final_time,
+  av1_encode_frame_time,
+  av1_compute_global_motion_time,
+  av1_setup_motion_field_time,
   encode_sb_time,
+  first_partition_search_pass_time,
   rd_pick_partition_time,
   rd_pick_sb_modes_time,
   av1_rd_pick_intra_mode_sb_time,
   av1_rd_pick_inter_mode_sb_time,
+  handle_intra_mode_time,
   handle_inter_mode_time,
+  do_tx_search_time,
+  handle_newmv_time,
+  compound_type_rd_time,
+  interpolation_filter_search_time,
   motion_mode_rd_time,
   kTimingComponents,
 } UENUM1BYTE(TIMING_COMPONENT);
 
 static INLINE char const *get_component_name(int index) {
   switch (index) {
-    case 0: return "encode_sb_time";
-    case 1: return "rd_pick_partition_time";
-    case 2: return "rd_pick_sb_modes_time";
-    case 3: return "av1_rd_pick_intra_mode_sb_time";
-    case 4: return "av1_rd_pick_inter_mode_sb_time";
-    case 5: return "handle_inter_mode_time";
-    case 6: return "motion_mode_rd_time";
+    case encode_frame_to_data_rate_time:
+      return "encode_frame_to_data_rate_time";
+    case encode_with_recode_loop_time: return "encode_with_recode_loop_time";
+    case loop_filter_time: return "loop_filter_time";
+    case cdef_time: return "cdef_time";
+    case loop_restoration_time: return "loop_restoration_time";
+    case av1_pack_bitstream_final_time: return "av1_pack_bitstream_final_time";
+    case av1_encode_frame_time: return "av1_encode_frame_time";
+    case av1_compute_global_motion_time:
+      return "av1_compute_global_motion_time";
+    case av1_setup_motion_field_time: return "av1_setup_motion_field_time";
+    case encode_sb_time: return "encode_sb_time";
+    case first_partition_search_pass_time:
+      return "first_partition_search_pass_time";
+    case rd_pick_partition_time: return "rd_pick_partition_time";
+    case rd_pick_sb_modes_time: return "rd_pick_sb_modes_time";
+    case av1_rd_pick_intra_mode_sb_time:
+      return "av1_rd_pick_intra_mode_sb_time";
+    case av1_rd_pick_inter_mode_sb_time:
+      return "av1_rd_pick_inter_mode_sb_time";
+    case handle_intra_mode_time: return "handle_intra_mode_time";
+    case handle_inter_mode_time: return "handle_inter_mode_time";
+    case do_tx_search_time: return "do_tx_search_time";
+    case handle_newmv_time: return "handle_newmv_time";
+    case compound_type_rd_time: return "compound_type_rd_time";
+    case interpolation_filter_search_time:
+      return "interpolation_filter_search_time";
+    case motion_mode_rd_time: return "motion_mode_rd_time";
     default: assert(0);
   }
   return "error";
@@ -795,9 +831,11 @@
   fractional_mv_step_fp *find_fractional_mv_step;
   av1_diamond_search_fn_t diamond_search_sad;
   aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL];
+
+#if CONFIG_INTERNAL_STATS
   uint64_t time_receive_data;
   uint64_t time_compress_data;
-  uint64_t time_pick_lpf;
+#endif
 
 #if CONFIG_FP_MB_STATS
   int use_fp_mb_stats;

diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0fdaa85..3f72773 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -10200,6 +10200,9 @@
           rate_mv = backup_rate_mv;
         }
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+        start_timing(cpi, handle_newmv_time);
+#endif
         if (cpi->sf.prune_single_motion_modes_by_simple_trans &&
             args->single_ref_first_pass == 0 && !is_comp_pred) {
           const int ref0 = mbmi->ref_frame[0];
@@ -10216,6 +10219,9 @@
           backup_mv[1] = cur_mv[1];
           backup_rate_mv = rate_mv;
         }
+#if CONFIG_COLLECT_COMPONENT_TIMING
+        end_timing(cpi, handle_newmv_time);
+#endif
 
         if (newmv_ret_val != 0) {
           continue;
@@ -10310,6 +10316,9 @@
         continue;
       }
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+      start_timing(cpi, compound_type_rd_time);
+#endif
       int skip_build_pred = 0;
       if (is_comp_pred) {
         if (mode_search_mask[comp_loop_idx] == (1 << COMPOUND_AVERAGE)) {
@@ -10377,11 +10386,20 @@
           }
         }
       }
+#if CONFIG_COLLECT_COMPONENT_TIMING
+      end_timing(cpi, compound_type_rd_time);
+#endif
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+      start_timing(cpi, interpolation_filter_search_time);
+#endif
       ret_val = interpolation_filter_search(
           x, cpi, tile_data, bsize, mi_row, mi_col, &tmp_dst, &orig_dst,
           args->single_filter, &rd, &rs, &skip_txfm_sb, &skip_sse_sb,
           &skip_build_pred, args, ref_best_rd);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+      end_timing(cpi, interpolation_filter_search_time);
+#endif
       if (args->modelled_rd != NULL && !is_comp_pred) {
         args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
       }
@@ -12772,6 +12790,9 @@
 
   release_compound_type_rd_buffers(&rd_buffers);
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, do_tx_search_time);
+#endif
   if (!do_tx_search) {
     inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
     search_state.best_rd = INT64_MAX;
@@ -12844,7 +12865,13 @@
       }
     }
   }
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, do_tx_search_time);
+#endif
 
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  start_timing(cpi, handle_intra_mode_time);
+#endif
   for (int j = 0; j < intra_mode_num; ++j) {
     const int mode_index = intra_mode_idx_ls[j];
     const MV_REFERENCE_FRAME ref_frame =
@@ -12887,6 +12914,10 @@
 #endif
     }
   }
+#if CONFIG_COLLECT_COMPONENT_TIMING
+  end_timing(cpi, handle_intra_mode_time);
+#endif
+
   // In effect only when speed >= 2.
   sf_refine_fast_tx_type_search(
       cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, search_state.best_mode_index,
commit	d1f32e6a82fdcb9c0734f5d555d2a9cd0cdf32d8	[log] [tgz]
author	Yunqing Wang <yunqingwang@google.com>	Wed Feb 20 10:37:51 2019 -0800
committer	Yunqing Wang <yunqingwang@google.com>	Thu Feb 21 02:16:52 2019 +0000
tree	0a2e44a5893c331bf3bb08924f42927416fe78ff
parent	5758a2a169d5e7a8e72b8fc5022aa4d964ebbfb5 [diff]