RTC: Gather more detailed timing info
Change-Id: I4baf81f79bc850c6576caeabd9c8737c86b1adc2
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 6e77c46..95dba0f 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1905,12 +1905,13 @@
#endif // !CONFIG_REALTIME_ONLY
#define COLLECT_PICK_MODE_STAT 0
+#define COLLECT_NON_SQR_STAT 0
#if COLLECT_PICK_MODE_STAT
#include "aom_ports/aom_timer.h"
typedef struct _mode_search_stat {
int32_t num_blocks[BLOCK_SIZES];
- int64_t avg_block_times[BLOCK_SIZES];
+ int64_t total_block_times[BLOCK_SIZES];
int32_t num_searches[BLOCK_SIZES][MB_MODE_COUNT];
int32_t num_nonskipped_searches[BLOCK_SIZES][MB_MODE_COUNT];
int64_t search_times[BLOCK_SIZES][MB_MODE_COUNT];
@@ -1921,8 +1922,15 @@
int64_t txfm_time[BLOCK_SIZES][MB_MODE_COUNT];
struct aom_usec_timer timer1;
struct aom_usec_timer timer2;
- struct aom_usec_timer timer3;
+ struct aom_usec_timer bsize_timer;
} mode_search_stat;
+
+static void AOM_INLINE print_stage_time(const char *stage_name,
+ int64_t stage_time,
+ int64_t total_time) {
+ printf(" %s: %ld (%f%%)\n", stage_name, stage_time,
+ 100 * stage_time / (float)total_time);
+}
#endif // COLLECT_PICK_MODE_STAT
static void compute_intra_yprediction(const AV1_COMMON *cm,
@@ -2797,7 +2805,7 @@
const TxfmSearchParams *txfm_params = &x->txfm_search_params;
TxfmSearchInfo *txfm_info = &x->txfm_search_info;
#if COLLECT_PICK_MODE_STAT
- aom_usec_timer_start(&ms_stat.timer2);
+ aom_usec_timer_start(&ms_stat.bsize_timer);
#endif
int64_t thresh_sad_pred = INT64_MAX;
const int mi_row = xd->mi_row;
@@ -3172,9 +3180,6 @@
pd->dst.stride = bw;
}
}
-#if COLLECT_PICK_MODE_STAT
- ms_stat.num_nonskipped_searches[bsize][this_mode]++;
-#endif
if (idx == 0 && !skip_pred_mv) {
// Set color sensitivity on first tested mode only.
@@ -3302,6 +3307,10 @@
continue;
}
+#if COLLECT_PICK_MODE_STAT
+ ms_stat.num_nonskipped_searches[bsize][this_mode]++;
+#endif
+
const int skip_ctx = av1_get_skip_txfm_context(xd);
const int skip_txfm_cost = mode_costs->skip_txfm_cost[skip_ctx][1];
const int no_skip_txfm_cost = mode_costs->skip_txfm_cost[skip_ctx][0];
@@ -3493,6 +3502,7 @@
#if COLLECT_PICK_MODE_STAT
aom_usec_timer_start(&ms_stat.timer1);
ms_stat.num_searches[bsize][DC_PRED]++;
+ ms_stat.num_nonskipped_searches[bsize][DC_PRED]++;
#endif
if (!x->force_zeromv_skip)
@@ -3648,52 +3658,64 @@
store_coding_context(x, ctx);
#endif // CONFIG_INTERNAL_STATS
#if COLLECT_PICK_MODE_STAT
- aom_usec_timer_mark(&ms_stat.timer2);
- ms_stat.avg_block_times[bsize] += aom_usec_timer_elapsed(&ms_stat.timer2);
- //
+ aom_usec_timer_mark(&ms_stat.bsize_timer);
+ ms_stat.total_block_times[bsize] +=
+ aom_usec_timer_elapsed(&ms_stat.bsize_timer);
if ((mi_row + mi_size_high[bsize] >= (cpi->common.mi_params.mi_rows)) &&
(mi_col + mi_size_wide[bsize] >= (cpi->common.mi_params.mi_cols))) {
- int i, j;
- BLOCK_SIZE bss[5] = { BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64,
- BLOCK_128X128 };
int64_t total_time = 0l;
int32_t total_blocks = 0;
+ for (BLOCK_SIZE bs = 0; bs < BLOCK_SIZES; bs++) {
+ total_time += ms_stat.total_block_times[bs];
+ total_blocks += ms_stat.num_blocks[bs];
+ }
printf("\n");
- for (i = 0; i < 5; i++) {
- printf("BS(%d) Num %d, Avg_time %f:\n", bss[i],
- ms_stat.num_blocks[bss[i]],
- ms_stat.num_blocks[bss[i]] > 0
- ? (float)ms_stat.avg_block_times[bss[i]] /
- ms_stat.num_blocks[bss[i]]
- : 0);
- total_time += ms_stat.avg_block_times[bss[i]];
- total_blocks += ms_stat.num_blocks[bss[i]];
- for (j = 0; j < MB_MODE_COUNT; j++) {
- if (ms_stat.nonskipped_search_times[bss[i]][j] == 0) {
+ for (BLOCK_SIZE bs = 0; bs < BLOCK_SIZES; bs++) {
+ if (ms_stat.num_blocks[bs] == 0) {
+ continue;
+ }
+ if (!COLLECT_NON_SQR_STAT && block_size_wide[bs] != block_size_high[bs]) {
+ continue;
+ }
+
+ printf("BLOCK_%dX%d Num %d, Time: %ld (%f%%), Avg_time %f:\n",
+ block_size_wide[bs], block_size_high[bs], ms_stat.num_blocks[bs],
+ ms_stat.total_block_times[bs],
+ 100 * ms_stat.total_block_times[bs] / (float)total_time,
+ (float)ms_stat.total_block_times[bs] / ms_stat.num_blocks[bs]);
+ for (int j = 0; j < MB_MODE_COUNT; j++) {
+ if (ms_stat.nonskipped_search_times[bs][j] == 0) {
continue;
}
+ int64_t total_mode_time = ms_stat.nonskipped_search_times[bs][j];
printf(" Mode %d, %d/%d tps %f\n", j,
- ms_stat.num_nonskipped_searches[bss[i]][j],
- ms_stat.num_searches[bss[i]][j],
- ms_stat.num_nonskipped_searches[bss[i]][j] > 0
- ? (float)ms_stat.nonskipped_search_times[bss[i]][j] /
- ms_stat.num_nonskipped_searches[bss[i]][j]
+ ms_stat.num_nonskipped_searches[bs][j],
+ ms_stat.num_searches[bs][j],
+ ms_stat.num_nonskipped_searches[bs][j] > 0
+ ? (float)ms_stat.nonskipped_search_times[bs][j] /
+ ms_stat.num_nonskipped_searches[bs][j]
: 0l);
if (j >= INTER_MODE_START) {
- printf(" Motion Search Time: %ld\n", ms_stat.ms_time[bss[i]][j]);
- printf(" Filter Search Time: %ld\n", ms_stat.ifs_time[bss[i]][j]);
- printf(" Model RD Time: %ld\n",
- ms_stat.model_rd_time[bss[i]][j]);
- printf(" Tranfm Search Time: %ld\n", ms_stat.txfm_time[bss[i]][j]);
+ total_mode_time = ms_stat.ms_time[bs][j] + ms_stat.ifs_time[bs][j] +
+ ms_stat.model_rd_time[bs][j] +
+ ms_stat.txfm_time[bs][j];
+ print_stage_time("Motion Search Time", ms_stat.ms_time[bs][j],
+ total_time);
+ print_stage_time("Filter Search Time", ms_stat.ifs_time[bs][j],
+ total_time);
+ print_stage_time("Model RD Time", ms_stat.model_rd_time[bs][j],
+ total_time);
+ print_stage_time("Tranfm Search Time", ms_stat.txfm_time[bs][j],
+ total_time);
}
+ print_stage_time("Total Mode Time", total_mode_time, total_time);
}
printf("\n");
}
printf("Total time = %ld. Total blocks = %d\n", total_time, total_blocks);
}
- //
#endif // COLLECT_PICK_MODE_STAT
*rd_cost = best_rdc;
}