Fix partition timing data collection
The code under CONFIG_COLLECT_PARTITION_DATA was broken by a previous
refactoring. This commit restores its functionality.
Change-Id: I009f136ac4c9450c141784e130f1d77962b6021a
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h
index a67c734..462dd7e 100644
--- a/av1/encoder/encodeframe_utils.h
+++ b/av1/encoder/encodeframe_utils.h
@@ -96,6 +96,24 @@
BLOCK_SIZE split_bsize2;
} PartitionBlkParams;
+#if CONFIG_COLLECT_PARTITION_STATS
+typedef struct PartitionTimingStats {
+ // Tracks the number of partition decision used in the current call to \ref
+ // av1_rd_pick_partition
+ int partition_decisions[EXT_PARTITION_TYPES];
+ // Tracks the number of partition_block searched in the current call to \ref
+ // av1_rd_pick_partition
+ int partition_attempts[EXT_PARTITION_TYPES];
+ // Tracks the time spent on each partition search in the current call to \ref
+ // av1_rd_pick_partition
+ int64_t partition_times[EXT_PARTITION_TYPES];
+ // Timer used to time the partitions.
+ struct aom_usec_timer timer;
+ // Whether the timer is on
+ int timer_is_on;
+} PartitionTimingStats;
+#endif // CONFIG_COLLECT_PARTITION_STATS
+
// Structure holding state variables for partition search.
typedef struct {
// Intra partitioning related info.
@@ -149,6 +167,10 @@
// This flag will be set if best partition is found from the search.
bool found_best_partition;
+
+#if CONFIG_COLLECT_PARTITION_STATS
+ PartitionTimingStats part_timing_stats;
+#endif // CONFIG_COLLECT_PARTITION_STATS
} PartitionSearchState;
static AOM_INLINE void update_global_motion_used(PREDICTION_MODE mode,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 7000e98..44cefbb 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1054,9 +1054,9 @@
setup_tpl_buffers(cm, &cpi->tpl_data, cpi->oxcf.gf_cfg.lag_in_frames);
}
-#if CONFIG_COLLECT_PARTITION_STATS == 2
+#if CONFIG_COLLECT_PARTITION_STATS
av1_zero(cpi->partition_stats);
-#endif
+#endif // CONFIG_COLLECT_PARTITION_STATS
#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \
cpi->fn_ptr[BT].sdf = SDF; \
@@ -1501,7 +1501,8 @@
#if CONFIG_COLLECT_PARTITION_STATS == 2
if (!is_stat_generation_stage(cpi)) {
- av1_print_partition_stats(&cpi->partition_stats);
+ av1_print_fr_partition_timing_stats(&cpi->partition_stats,
+ "fr_part_timing_data.csv");
}
#endif
}
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 7b85348..95efd65 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1484,15 +1484,15 @@
/*!\cond */
-#if CONFIG_COLLECT_PARTITION_STATS == 2
-typedef struct PartitionStats {
+#if CONFIG_COLLECT_PARTITION_STATS
+typedef struct FramePartitionTimingStats {
int partition_decisions[6][EXT_PARTITION_TYPES];
int partition_attempts[6][EXT_PARTITION_TYPES];
int64_t partition_times[6][EXT_PARTITION_TYPES];
int partition_redo;
-} PartitionStats;
-#endif
+} FramePartitionTimingStats;
+#endif // CONFIG_COLLECT_PARTITION_STATS
#if CONFIG_COLLECT_COMPONENT_TIMING
#include "aom_ports/aom_timer.h"
@@ -2499,9 +2499,12 @@
*/
int is_screen_content_type;
-#if CONFIG_COLLECT_PARTITION_STATS == 2
- PartitionStats partition_stats;
-#endif
+#if CONFIG_COLLECT_PARTITION_STATS
+ /*!
+ * Accumulates the partition timing stat over the whole frame.
+ */
+ FramePartitionTimingStats partition_stats;
+#endif // CONFIG_COLLECT_PARTITION_STATS
#if CONFIG_COLLECT_COMPONENT_TIMING
/*!
@@ -3181,8 +3184,9 @@
}
#if CONFIG_COLLECT_PARTITION_STATS == 2
-static INLINE void av1_print_partition_stats(PartitionStats *part_stats) {
- FILE *f = fopen("partition_stats.csv", "w");
+static INLINE void av1_print_fr_partition_timing_stats(
+ const FramePartitionTimingStats *part_stats, const char *filename) {
+ FILE *f = fopen(filename, "w");
if (!f) {
return;
}
@@ -3199,7 +3203,7 @@
}
fprintf(f, "\n");
- const int bsizes[6] = { 128, 64, 32, 16, 8, 4 };
+ static const int bsizes[6] = { 128, 64, 32, 16, 8, 4 };
for (int bsize_idx = 0; bsize_idx < 6; bsize_idx++) {
fprintf(f, "%d,%d,", bsizes[bsize_idx], part_stats->partition_redo);
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index edb5ab4..16a7332 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -11,6 +11,7 @@
#include "aom_ports/system_state.h"
+#include "av1/common/av1_common_int.h"
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#include "av1/common/reconintra.h"
@@ -2286,6 +2287,63 @@
return true;
}
+#if CONFIG_COLLECT_PARTITION_STATS
+static void init_partition_block_timing_stats(
+ PartitionTimingStats *part_timing_stats) {
+ av1_zero(*part_timing_stats);
+}
+
+static INLINE void start_partition_block_timer(
+ PartitionTimingStats *part_timing_stats, PARTITION_TYPE partition_type) {
+ assert(!part_timing_stats->timer_is_on);
+ part_timing_stats->partition_attempts[partition_type] += 1;
+ aom_usec_timer_start(&part_timing_stats->timer);
+ part_timing_stats->timer_is_on = 1;
+}
+
+static INLINE void end_partition_block_timer(
+ PartitionTimingStats *part_timing_stats, PARTITION_TYPE partition_type) {
+ if (part_timing_stats->timer_is_on) {
+ aom_usec_timer_mark(&part_timing_stats->timer);
+ const int64_t time = aom_usec_timer_elapsed(&part_timing_stats->timer);
+ part_timing_stats->partition_times[partition_type] += time;
+ part_timing_stats->timer_is_on = 0;
+ }
+}
+
+static INLINE void print_partition_timing_stats(
+ const PartitionTimingStats *part_timing_stats, int intra_only,
+ int show_frame, const BLOCK_SIZE bsize, const char *filename) {
+ FILE *f = fopen(filename, "a");
+ fprintf(f, "%d,%d,%d,", bsize, show_frame, intra_only);
+ for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
+ fprintf(f, "%d,", part_timing_stats->partition_decisions[idx]);
+ }
+ for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
+ fprintf(f, "%d,", part_timing_stats->partition_attempts[idx]);
+ }
+ for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
+ fprintf(f, "%ld,", part_timing_stats->partition_times[idx]);
+ }
+ fprintf(f, "\n");
+ fclose(f);
+}
+
+static INLINE void accumulate_partition_timing_stats(
+ FramePartitionTimingStats *fr_part_timing_stats,
+ const PartitionTimingStats *part_timing_stats, BLOCK_SIZE bsize) {
+ const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
+ int *agg_attempts = fr_part_timing_stats->partition_attempts[bsize_idx];
+ int *agg_decisions = fr_part_timing_stats->partition_decisions[bsize_idx];
+ int64_t *agg_times = fr_part_timing_stats->partition_times[bsize_idx];
+ for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
+ agg_attempts[idx] += part_timing_stats->partition_attempts[idx];
+ agg_decisions[idx] += part_timing_stats->partition_decisions[idx];
+ agg_times[idx] += part_timing_stats->partition_times[idx];
+ }
+}
+#endif // CONFIG_COLLECT_PARTITION_STATS
+
// Initialize state variables of partition search used in
// av1_rd_pick_partition().
static void init_partition_search_state_params(
@@ -2382,6 +2440,10 @@
// Reset the flag indicating whether a partition leading to a rdcost lower
// than the bound best_rdc has been found.
part_search_state->found_best_partition = false;
+
+#if CONFIG_COLLECT_PARTITION_STATS
+ init_partition_block_timing_stats(&part_search_state->part_timing_stats);
+#endif // CONFIG_COLLECT_PARTITION_STATS
}
// Override partition cost buffer for the edge blocks.
@@ -2557,10 +2619,10 @@
sum_rdc->rate = part_search_state->partition_cost[partition_type];
sum_rdc->rdcost = RDCOST(x->rdmult, sum_rdc->rate, 0);
#if CONFIG_COLLECT_PARTITION_STATS
- if (best_rdc.rdcost - sum_rdc->rdcost >= 0) {
- partition_attempts[partition_type] += 1;
- aom_usec_timer_start(&partition_timer);
- partition_timer_on = 1;
+ PartitionTimingStats *part_timing_stats =
+ &part_search_state->part_timing_stats;
+ if (best_rdc->rdcost - sum_rdc->rdcost >= 0) {
+ start_partition_block_timer(part_timing_stats, partition_type);
}
#endif
@@ -2595,11 +2657,8 @@
mi_pos_rect[i][sub_part_idx][1], blk_params.subsize, partition_type);
}
#if CONFIG_COLLECT_PARTITION_STATS
- if (partition_timer_on) {
- aom_usec_timer_mark(&partition_timer);
- int64_t time = aom_usec_timer_elapsed(&partition_timer);
- partition_times[partition_type] += time;
- partition_timer_on = 0;
+ if (part_timing_stats->timer_is_on) {
+ end_partition_block_timer(part_timing_stats, partition_type);
}
#endif
// Update HORZ / VERT best partition.
@@ -2636,16 +2695,15 @@
const int bsize = blk_params.bsize;
#if CONFIG_COLLECT_PARTITION_STATS
+ PartitionTimingStats *part_timing_stats =
+ &part_search_state->part_timing_stats;
{
RD_STATS tmp_sum_rdc;
av1_init_rd_stats(&tmp_sum_rdc);
- tmp_sum_rdc.rate =
- x->partition_cost[part_search_state->pl_ctx_idx][part_type];
+ tmp_sum_rdc.rate = part_search_state->partition_cost[part_type];
tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
if (best_rdc->rdcost - tmp_sum_rdc.rdcost >= 0) {
- partition_attempts[part_type] += 1;
- aom_usec_timer_start(&partition_timer);
- partition_timer_on = 1;
+ start_partition_block_timer(part_timing_stats, part_type);
}
}
#endif
@@ -2656,11 +2714,8 @@
bsize, part_type, ab_subsize, ab_mi_pos, mode_cache);
#if CONFIG_COLLECT_PARTITION_STATS
- if (partition_timer_on) {
- aom_usec_timer_mark(&partition_timer);
- int64_t time = aom_usec_timer_elapsed(&partition_timer);
- partition_times[part_type] += time;
- partition_timer_on = 0;
+ if (part_timing_stats->timer_is_on) {
+ end_partition_block_timer(part_timing_stats, part_type);
}
#endif
av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
@@ -2916,10 +2971,10 @@
// Set mi positions for sub-block sizes.
set_mi_pos_partition4(inc_step, mi_pos, blk_params.mi_row, blk_params.mi_col);
#if CONFIG_COLLECT_PARTITION_STATS
- if (best_rdc.rdcost - part_search_state->sum_rdc.rdcost >= 0) {
- partition_attempts[partition_type] += 1;
- aom_usec_timer_start(&partition_timer);
- partition_timer_on = 1;
+ PartitionTimingStats *part_timing_stats =
+ &part_search_state->part_timing_stats;
+ if (best_rdc->rdcost - part_search_state->sum_rdc.rdcost >= 0) {
+ start_partition_block_timer(part_timing_stats, partition_type);
}
#endif
// Loop over sub-block partitions.
@@ -2945,11 +3000,8 @@
pc_tree->partitioning = partition_type;
}
#if CONFIG_COLLECT_PARTITION_STATS
- if (partition_timer_on) {
- aom_usec_timer_mark(&partition_timer);
- int64_t time = aom_usec_timer_elapsed(&partition_timer);
- partition_times[partition_type] += time;
- partition_timer_on = 0;
+ if (part_timing_stats->timer_is_on) {
+ start_partition_block_timer(part_timing_stats, partition_type);
}
#endif
av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col,
@@ -3230,10 +3282,10 @@
#if CONFIG_COLLECT_PARTITION_STATS
// Timer start for partition None.
- if (best_remain_rdcost >= 0) {
- partition_attempts[PARTITION_NONE] += 1;
- aom_usec_timer_start(&partition_timer);
- partition_timer_on = 1;
+ PartitionTimingStats *part_timing_stats =
+ &part_search_state->part_timing_stats;
+ if (best_remain_rdcost.rdcost >= 0) {
+ start_partition_block_timer(part_timing_stats, PARTITION_NONE);
}
#endif
// PARTITION_NONE evaluation and cost update.
@@ -3244,11 +3296,8 @@
#if CONFIG_COLLECT_PARTITION_STATS
// Timer end for partition None.
- if (partition_timer_on) {
- aom_usec_timer_mark(&partition_timer);
- int64_t time = aom_usec_timer_elapsed(&partition_timer);
- partition_times[PARTITION_NONE] += time;
- partition_timer_on = 0;
+ if (part_timing_stats->timer_is_on) {
+ end_partition_block_timer(part_timing_stats, PARTITION_NONE);
}
#endif
*pb_source_variance = x->source_variance;
@@ -3320,10 +3369,10 @@
int idx;
#if CONFIG_COLLECT_PARTITION_STATS
+ PartitionTimingStats *part_timing_stats =
+ &part_search_state->part_timing_stats;
if (best_rdc->rdcost - sum_rdc.rdcost >= 0) {
- partition_attempts[PARTITION_SPLIT] += 1;
- aom_usec_timer_start(&partition_timer);
- partition_timer_on = 1;
+ start_partition_block_timer(part_timing_stats, PARTITION_SPLIT);
}
#endif
// Recursive partition search on 4 sub-blocks.
@@ -3380,11 +3429,8 @@
}
}
#if CONFIG_COLLECT_PARTITION_STATS
- if (partition_timer_on) {
- aom_usec_timer_mark(&partition_timer);
- int64_t time = aom_usec_timer_elapsed(&partition_timer);
- partition_times[PARTITION_SPLIT] += time;
- partition_timer_on = 0;
+ if (part_timing_stats->timer_is_on) {
+ end_partition_block_timer(part_timing_stats, PARTITION_SPLIT);
}
#endif
const int reached_last_index = (idx == SUB_PARTITIONS_SPLIT);
@@ -3483,15 +3529,12 @@
(void)*tp_orig;
#if CONFIG_COLLECT_PARTITION_STATS
- int partition_decisions[EXT_PARTITION_TYPES] = { 0 };
- int partition_attempts[EXT_PARTITION_TYPES] = { 0 };
- int64_t partition_times[EXT_PARTITION_TYPES] = { 0 };
- struct aom_usec_timer partition_timer = { 0 };
- int partition_timer_on = 0;
-#if CONFIG_COLLECT_PARTITION_STATS == 2
- PartitionStats *part_stats = &cpi->partition_stats;
-#endif
-#endif
+ // Stats at the current quad tree
+ PartitionTimingStats *part_timing_stats =
+ &part_search_state.part_timing_stats;
+ // Stats aggregated at frame level
+ FramePartitionTimingStats *fr_part_timing_stats = &cpi->partition_stats;
+#endif // CONFIG_COLLECT_PARTITION_STATS
// Override partition costs at the edges of the frame in the same
// way as in read_partition (see decodeframe.c).
@@ -3707,9 +3750,9 @@
// Did not find a valid partition, go back and search again, with less
// constraint on which partition types to search.
x->must_find_valid_partition = 1;
-#if CONFIG_COLLECT_PARTITION_STATS == 2
- part_stats->partition_redo += 1;
-#endif
+#if CONFIG_COLLECT_PARTITION_STATS
+ fr_part_timing_stats->partition_redo += 1;
+#endif // CONFIG_COLLECT_PARTITION_STATS
goto BEGIN_PARTITION_SEARCH;
}
@@ -3722,41 +3765,19 @@
#if CONFIG_COLLECT_PARTITION_STATS
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) {
- partition_decisions[pc_tree->partitioning] += 1;
+ part_timing_stats->partition_decisions[pc_tree->partitioning] += 1;
}
-#endif
-#if CONFIG_COLLECT_PARTITION_STATS == 1
// If CONFIG_COLLECT_PARTITION_STATS is 1, then print out the stats for each
// prediction block.
- FILE *f = fopen("data.csv", "a");
- fprintf(f, "%d,%d,%d,", bsize, cm->show_frame, frame_is_intra_only(cm));
- for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
- fprintf(f, "%d,", partition_decisions[idx]);
- }
- for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
- fprintf(f, "%d,", partition_attempts[idx]);
- }
- for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
- fprintf(f, "%ld,", partition_times[idx]);
- }
- fprintf(f, "\n");
- fclose(f);
-#endif
-
-#if CONFIG_COLLECT_PARTITION_STATS == 2
+ print_partition_timing_stats(part_timing_stats, cm->show_frame,
+ frame_is_intra_only(cm), bsize,
+ "part_timing_data.csv");
// If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for
// the whole clip. So we need to pass the information upstream to the encoder.
- const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
- int *agg_attempts = part_stats->partition_attempts[bsize_idx];
- int *agg_decisions = part_stats->partition_decisions[bsize_idx];
- int64_t *agg_times = part_stats->partition_times[bsize_idx];
- for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
- agg_attempts[idx] += partition_attempts[idx];
- agg_decisions[idx] += partition_decisions[idx];
- agg_times[idx] += partition_times[idx];
- }
-#endif
+ accumulate_partition_timing_stats(fr_part_timing_stats, part_timing_stats,
+ bsize);
+#endif // CONFIG_COLLECT_PARTITION_STATS
// Reset the PC_TREE deallocation flag.
int pc_tree_dealloc = 0;
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 3bf1eeb..24020a8 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -128,7 +128,7 @@
set_aom_config_var(CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 1
"Disable full_pixel_motion_search_based_split on BLOCK_8X8.")
set_aom_config_var(CONFIG_COLLECT_PARTITION_STATS 0
- "Collect stats on partition decisions.")
+ "Collect partition timing stats. Can be 1 or 2.")
set_aom_config_var(CONFIG_COLLECT_COMPONENT_TIMING 0
"Collect encoding component timing information.")
set_aom_config_var(CONFIG_LPF_MASK 0