Add code to collect partition_stats

This commit adds codes used to generate prediction block partition
stats to facilitate partition pruning development.

The code is under CONFIG_COLLECT_PARTITION_STATS flag, which is off by
default.

Change-Id: Ie896d36b83ce68e4f02a50aa065a58e9d5b07f88
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 7271c37..f08b86b 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3827,6 +3827,13 @@
 
   (void)*tp_orig;
 
+#if CONFIG_COLLECT_PARTITION_STATS
+  PartitionStats *part_stats = &cpi->partition_stats;
+  const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
+  int *partition_decisions = part_stats->partition_decisions[bsize_idx];
+  int *partition_attempts = part_stats->partition_attempts[bsize_idx];
+#endif
+
   // Override partition costs at the edges of the frame in the same
   // way as in read_partition (see decodeframe.c)
   if (!(has_rows && has_cols)) {
@@ -4083,6 +4090,11 @@
     const int64_t best_remain_rdcost =
         (best_rdc.rdcost == INT64_MAX) ? INT64_MAX
                                        : (best_rdc.rdcost - partition_rd_cost);
+#if CONFIG_COLLECT_PARTITION_STATS
+    if (!frame_is_intra_only(cm) && best_remain_rdcost >= 0) {
+      partition_attempts[PARTITION_NONE] += 1;
+    }
+#endif
     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
                      PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
     pb_source_variance = x->source_variance;
@@ -4220,6 +4232,11 @@
     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
 
     int idx;
+#if CONFIG_COLLECT_PARTITION_STATS
+    if (!frame_is_intra_only(cm) && best_rdc.rdcost - sum_rdc.rdcost >= 0) {
+      partition_attempts[PARTITION_SPLIT] += 1;
+    }
+#endif
     for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
       const int x_idx = (idx & 1) * mi_step;
       const int y_idx = (idx >> 1) * mi_step;
@@ -4403,6 +4420,11 @@
     const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
                                            ? INT64_MAX
                                            : (best_rdc.rdcost - sum_rdc.rdcost);
+#if CONFIG_COLLECT_PARTITION_STATS
+    if (!frame_is_intra_only(cm) && best_remain_rdcost >= 0) {
+      partition_attempts[PARTITION_HORZ] += 1;
+    }
+#endif
     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
                      PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
                      best_remain_rdcost);
@@ -4480,6 +4502,11 @@
     const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
                                            ? INT64_MAX
                                            : (best_rdc.rdcost - sum_rdc.rdcost);
+#if CONFIG_COLLECT_PARTITION_STATS
+    if (!frame_is_intra_only(cm) && best_remain_rdcost >= 0) {
+      partition_attempts[PARTITION_VERT] += 1;
+    }
+#endif
     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
                      PARTITION_VERT, subsize, &pc_tree->vertical[0],
                      best_remain_rdcost);
@@ -4699,6 +4726,18 @@
         pc_tree->horizontala[2].ref_selected[0] = split_mbmi[2]->ref_frame[0];
       }
     }
+#if CONFIG_COLLECT_PARTITION_STATS
+    {
+      RD_STATS tmp_sum_rdc;
+      av1_init_rd_stats(&tmp_sum_rdc);
+      tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_A];
+      tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
+      if (!frame_is_intra_only(cm) &&
+          best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
+        partition_attempts[PARTITION_HORZ_A] += 1;
+      }
+    }
+#endif
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
                        pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
                        PARTITION_HORZ_A, mi_row, mi_col, bsize2, mi_row,
@@ -4758,6 +4797,18 @@
         pc_tree->horizontalb[2].ref_selected[0] = split_mbmi[3]->ref_frame[0];
       }
     }
+#if CONFIG_COLLECT_PARTITION_STATS
+    {
+      RD_STATS tmp_sum_rdc;
+      av1_init_rd_stats(&tmp_sum_rdc);
+      tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_B];
+      tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
+      if (!frame_is_intra_only(cm) &&
+          best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
+        partition_attempts[PARTITION_HORZ_B] += 1;
+      }
+    }
+#endif
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
                        pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
                        PARTITION_HORZ_B, mi_row, mi_col, subsize,
@@ -4815,6 +4866,18 @@
         pc_tree->verticala[2].ref_selected[0] = split_mbmi[1]->ref_frame[0];
       }
     }
+#if CONFIG_COLLECT_PARTITION_STATS
+    {
+      RD_STATS tmp_sum_rdc;
+      av1_init_rd_stats(&tmp_sum_rdc);
+      tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_A];
+      tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
+      if (!frame_is_intra_only(cm) &&
+          best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
+        partition_attempts[PARTITION_VERT_A] += 1;
+      }
+    }
+#endif
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
                        pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
                        PARTITION_VERT_A, mi_row, mi_col, bsize2,
@@ -4871,6 +4934,18 @@
         pc_tree->verticalb[2].ref_selected[0] = split_mbmi[3]->ref_frame[0];
       }
     }
+#if CONFIG_COLLECT_PARTITION_STATS
+    {
+      RD_STATS tmp_sum_rdc;
+      av1_init_rd_stats(&tmp_sum_rdc);
+      tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_B];
+      tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
+      if (!frame_is_intra_only(cm) &&
+          best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
+        partition_attempts[PARTITION_VERT_B] += 1;
+      }
+    }
+#endif
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
                        pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
                        PARTITION_VERT_B, mi_row, mi_col, subsize, mi_row,
@@ -4929,6 +5004,11 @@
     sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
 
+#if CONFIG_COLLECT_PARTITION_STATS
+    if (!frame_is_intra_only(cm) && best_rdc.rdcost - sum_rdc.rdcost >= 0) {
+      partition_attempts[PARTITION_HORZ_4] += 1;
+    }
+#endif
     for (int i = 0; i < 4; ++i) {
       const int this_mi_row = mi_row + i * quarter_step;
 
@@ -4975,6 +5055,11 @@
     sum_rdc.rate = partition_cost[PARTITION_VERT_4];
     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
 
+#if CONFIG_COLLECT_PARTITION_STATS
+    if (!frame_is_intra_only(cm) && best_rdc.rdcost - sum_rdc.rdcost >= 0) {
+      partition_attempts[PARTITION_VERT_4] += 1;
+    }
+#endif
     for (int i = 0; i < 4; ++i) {
       const int this_mi_col = mi_col + i * quarter_step;
 
@@ -5012,6 +5097,11 @@
     // Did not find a valid partition, go back and search again, with less
     // constraint on which partition types to search.
     x->must_find_valid_partition = 1;
+#if CONFIG_COLLECT_PARTITION_STATS
+    if (!frame_is_intra_only(cm)) {
+      part_stats->partition_redo += 1;
+    }
+#endif
     goto BEGIN_PARTITION_SEARCH;
   }
 
@@ -5022,6 +5112,13 @@
   (void)best_rd;
   *rd_cost = best_rdc;
 
+#if CONFIG_COLLECT_PARTITION_STATS
+  if (!frame_is_intra_only(cm) && best_rdc.rate < INT_MAX &&
+      best_rdc.dist < INT64_MAX) {
+    partition_decisions[pc_tree->partitioning] += 1;
+  }
+#endif
+
   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
       pc_tree->index != 3) {
     if (bsize == cm->seq_params.sb_size) {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index d79aab0..b1a681d 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2784,6 +2784,10 @@
     cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
   }
 
+#if CONFIG_COLLECT_PARTITION_STATS
+  av1_zero(cpi->partition_stats);
+#endif
+
 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \
   cpi->fn_ptr[BT].sdf = SDF;                                    \
   cpi->fn_ptr[BT].sdaf = SDAF;                                  \
@@ -3101,6 +3105,12 @@
       fprintf(stdout, "tx_search_count = %d\n", cpi->tx_search_count);
     }
 #endif  // CONFIG_SPEED_STATS
+
+#if CONFIG_COLLECT_PARTITION_STATS
+    if (cpi->oxcf.pass != 1) {
+      av1_print_partition_stats(&cpi->partition_stats);
+    }
+#endif
   }
 
   for (int frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 1dcc279..6cd3a08 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -599,6 +599,15 @@
   YV12_BUFFER_CONFIG buf;
 } EncRefCntBuffer;
 
+#if CONFIG_COLLECT_PARTITION_STATS
+typedef struct PartitionStats {
+  int partition_decisions[6][EXT_PARTITION_TYPES];
+  int partition_attempts[6][EXT_PARTITION_TYPES];
+
+  int partition_redo;
+} PartitionStats;
+#endif
+
 typedef struct AV1_COMP {
   QUANTS quants;
   ThreadData td;
@@ -870,6 +879,9 @@
 #endif
   // Set if screen content is set or relevant tools are enabled
   int is_screen_content_type;
+#if CONFIG_COLLECT_PARTITION_STATS
+  PartitionStats partition_stats;
+#endif
 } AV1_COMP;
 
 typedef struct {
@@ -1119,6 +1131,52 @@
 // field.
 aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi);
 
+#if CONFIG_COLLECT_PARTITION_STATS
+static INLINE void av1_print_partition_stats(PartitionStats *part_stats) {
+  FILE *f = fopen("partition_stats.csv", "w");
+  if (!f) {
+    return;
+  }
+
+  fprintf(f, "bsize,redo,");
+  for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
+    fprintf(f, "decision_%d,", part);
+  }
+  for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
+    fprintf(f, "attempt_%d,", part);
+  }
+  fprintf(f, "\n");
+
+  const int bsizes[6] = { 128, 64, 32, 16, 8, 4 };
+
+  for (int bsize_idx = 0; bsize_idx < 6; bsize_idx++) {
+    fprintf(f, "%d,%d,", bsizes[bsize_idx], part_stats->partition_redo);
+    for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
+      fprintf(f, "%d,", part_stats->partition_decisions[bsize_idx][part]);
+    }
+    for (int part = 0; part < EXT_PARTITION_TYPES; part++) {
+      fprintf(f, "%d,", part_stats->partition_attempts[bsize_idx][part]);
+    }
+    fprintf(f, "\n");
+  }
+  fclose(f);
+}
+
+static INLINE int av1_get_bsize_idx_for_part_stats(BLOCK_SIZE bsize) {
+  assert(bsize == BLOCK_128X128 || bsize == BLOCK_64X64 ||
+         bsize == BLOCK_32X32 || bsize == BLOCK_16X16 || bsize == BLOCK_8X8);
+  switch (bsize) {
+    case BLOCK_128X128: return 0;
+    case BLOCK_64X64: return 1;
+    case BLOCK_32X32: return 2;
+    case BLOCK_16X16: return 3;
+    case BLOCK_8X8: return 4;
+    case BLOCK_4X4: return 5;
+    default: assert(0 && "Invalid bsize for partition_stats."); return -1;
+  }
+}
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 5f8288a..a51a4f1 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -130,7 +130,9 @@
 set_aom_config_var(CONFIG_SHARP_SETTINGS 0 NUMBER "AV1 experiment flag.")
 set_aom_config_var(CONFIG_ONE_PASS_SVM 0 NUMBER "AV1 experiment flag.")
 set_aom_config_var(CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 1 NUMBER
-                   "Disable full_pixel_motion_search_based_split on BLOCK_8X8")
+                   "Disable full_pixel_motion_search_based_split on BLOCK_8X8.")
+set_aom_config_var(CONFIG_COLLECT_PARTITION_STATS 0 NUMBER
+                   "Collect stats on partition decisions.")
 
 #
 # Variables in this section control optional features of the build system.