Added per pixel inter rd hit count stats

Added some code to output normalized rd hit count stats.
In effect this approximates to the average number of rd
operations/tests per pixel for the sequence.

The results are not quite accurate and I have not bothered
to account for partial SB64s at frame edges and for key frames
However they do give some idea of the number of modes /
prediction methods being tested for each pixel across the
different partition sizes. This indicates how much scope their
is for further gains either by reducing the number of partitions
examined or the modes per partition through heuristics.

Patch 3 moved place where count incremented so partial rd
tests that are aborted with INT_MAX return are also counted.

Example numbers for first 50 frames of Akiyo.
Speed 0 ~84.4 rd operations / pixel
Speed 1 ~28.8
Speed 2 ~11.9

Change-Id: Ib956e787e12f7fa8b12d3a1a2f6cda19a65a6cb8
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 22ad682..e9c214f 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1725,6 +1725,10 @@
 
   vp9_zero(cpi->y_uv_mode_count)
 
+#ifdef MODE_TEST_HIT_STATS
+  vp9_zero(cpi->mode_test_hits)
+#endif
+
   return (VP9_PTR) cpi;
 }
 
@@ -1806,6 +1810,34 @@
 
 #endif
 
+#ifdef MODE_TEST_HIT_STATS
+    if (cpi->pass != 1) {
+      double norm_per_pixel_mode_tests = 0;
+      double norm_counts[BLOCK_SIZES];
+      int i;
+      int sb64_per_frame;
+      int norm_factors[BLOCK_SIZES] =
+        {256, 128, 128, 64, 32, 32, 16, 8, 8, 4, 2, 2, 1};
+      FILE *f = fopen("mode_hit_stats.stt", "a");
+
+      // On average, how many mode tests do we do
+      for (i = 0; i < BLOCK_SIZES; ++i) {
+        norm_counts[i] = (double)cpi->mode_test_hits[i] /
+                         (double)norm_factors[i];
+        norm_per_pixel_mode_tests += norm_counts[i];
+      }
+      // Convert to a number per 64x64 and per frame
+      sb64_per_frame = ((cpi->common.height + 63) / 64) *
+                       ((cpi->common.width + 63) / 64);
+      norm_per_pixel_mode_tests =
+        norm_per_pixel_mode_tests /
+        (double)(cpi->common.current_video_frame * sb64_per_frame);
+
+      fprintf(f, "%6.4f\n", norm_per_pixel_mode_tests);
+      fclose(f);
+    }
+#endif
+
 #ifdef ENTROPY_STATS
     {
       int i, j, k;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 0933185..252e982 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -36,6 +36,8 @@
 #define DISABLE_RC_LONG_TERM_MEM 0
 #endif
 
+// #define MODE_TEST_HIT_STATS
+
 // #define SPEEDSTATS 1
 #if CONFIG_MULTIPLE_ARF
 // Set MIN_GF_INTERVAL to 1 for the full decomposition.
@@ -664,6 +666,12 @@
 #ifdef ENTROPY_STATS
   int64_t mv_ref_stats[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
 #endif
+
+
+#ifdef MODE_TEST_HIT_STATS
+  // Debug / test stats
+  int64_t mode_test_hits[BLOCK_SIZES];
+#endif
 } VP9_COMP;
 
 static int get_ref_frame_idx(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 00bff0e..39b6544 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -3341,6 +3341,12 @@
       continue;
     }
 
+#ifdef MODE_TEST_HIT_STATS
+    // TEST/DEBUG CODE
+    // Keep a rcord of the number of test hits at each size
+    cpi->mode_test_hits[bsize]++;
+#endif
+
     if (this_mode == I4X4_PRED) {
       int rate;