Set up txb coeff processing timer

Allow the codec to time the average transform block coefficient
processing for sw speed check.

Change-Id: Ibdaf15ab5b7f1ea8264604cc00ef45e3ae3114c7
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index a920b4a..3952256 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -90,6 +90,10 @@
 
 #define NUM_PING_PONG_BUFFERS 2
 
+// TODO(jingning): Turning this on to set up transform coefficient
+// processing timer.
+#define TXCOEFF_TIMER 0
+
 typedef enum {
   SINGLE_REFERENCE = 0,
   COMPOUND_REFERENCE = 1,
@@ -569,6 +573,12 @@
 #if CONFIG_MFMV
   TPL_MV_REF *tpl_mvs;
 #endif
+
+#if TXCOEFF_TIMER
+  int64_t cum_txcoeff_timer;
+  int64_t txcoeff_timer;
+  int txb_count;
+#endif
 } AV1_COMMON;
 
 // TODO(hkuang): Don't need to lock the whole pool after implementing atomic
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 0fc9b84..f582aab 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -23,6 +23,7 @@
 #include "aom_dsp/bitreader.h"
 #include "aom_dsp/bitreader_buffer.h"
 #include "aom_mem/aom_mem.h"
+#include "aom_ports/aom_timer.h"
 #include "aom_ports/mem.h"
 #include "aom_ports/mem_ops.h"
 #include "aom_scale/aom_scale.h"
@@ -196,6 +197,10 @@
 
   if (!mbmi->skip) {
     struct macroblockd_plane *const pd = &xd->plane[plane];
+#if TXCOEFF_TIMER
+    struct aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+#endif
 #if CONFIG_LV_MAP
     int16_t max_scan_line = 0;
     int eob;
@@ -213,6 +218,13 @@
         av1_decode_block_tokens(cm, xd, plane, scan_order, col, row, tx_size,
                                 tx_type, &max_scan_line, r, mbmi->segment_id);
 #endif  // CONFIG_LV_MAP
+
+#if TXCOEFF_TIMER
+    aom_usec_timer_mark(&timer);
+    const int64_t elapsed_time = aom_usec_timer_elapsed(&timer);
+    cm->txcoeff_timer += elapsed_time;
+    ++cm->txb_count;
+#endif
     if (eob) {
       uint8_t *dst =
           &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
@@ -247,6 +259,10 @@
 
   if (tx_size == plane_tx_size) {
     PLANE_TYPE plane_type = get_plane_type(plane);
+#if TXCOEFF_TIMER
+    struct aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+#endif
 #if CONFIG_LV_MAP
     int16_t max_scan_line = 0;
     int eob;
@@ -264,6 +280,14 @@
         cm, xd, plane, sc, blk_col, blk_row, plane_tx_size, tx_type,
         &max_scan_line, r, mbmi->segment_id);
 #endif  // CONFIG_LV_MAP
+
+#if TXCOEFF_TIMER
+    aom_usec_timer_mark(&timer);
+    const int64_t elapsed_time = aom_usec_timer_elapsed(&timer);
+    cm->txcoeff_timer += elapsed_time;
+    ++cm->txb_count;
+#endif
+
     inverse_transform_block(xd, plane, tx_type, plane_tx_size,
                             &pd->dst.buf[(blk_row * pd->dst.stride + blk_col)
                                          << tx_size_wide_log2[0]],
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index f9425a5..d1b0589 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -366,6 +366,15 @@
   av1_decode_frame_from_obus(pbi, source, source + size, psource);
 #endif
 
+#if TXCOEFF_TIMER
+  cm->cum_txcoeff_timer += cm->txcoeff_timer;
+  fprintf(stderr,
+          "txb coeff block number: %d, frame time: %ld, cum time %ld in us\n",
+          cm->txb_count, cm->txcoeff_timer, cm->cum_txcoeff_timer);
+  cm->txcoeff_timer = 0;
+  cm->txb_count = 0;
+#endif
+
   swap_frame_buffers(pbi);
 
 #if CONFIG_EXT_TILE