Clean up and turn on rd_debug for non-sub8x8 inter block

Change-Id: Ib56273c23b7c7cb7c44d5ed95078156f1b09144c
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 84de8e4..1766a7c 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -225,9 +225,11 @@
   int skip;
 #if CONFIG_RD_DEBUG
   int txb_coeff_cost[MAX_MB_PLANE];
+#if CONFIG_VAR_TX
   int txb_coeff_cost_map[MAX_MB_PLANE][TXB_COEFF_COST_MAP_SIZE]
                         [TXB_COEFF_COST_MAP_SIZE];
-#endif
+#endif  // CONFIG_VAR_TX
+#endif  // CONFIG_RD_DEBUG
 } RD_STATS;
 
 // This structure now relates to 8x8 block regions.
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 7030210..4192801 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1742,7 +1742,9 @@
 static int rd_token_stats_mismatch(RD_STATS *rd_stats, TOKEN_STATS *token_stats,
                                    int plane) {
   if (rd_stats->txb_coeff_cost[plane] != token_stats->cost) {
+#if CONFIG_VAR_TX
     int r, c;
+#endif
     printf("\nplane %d rd_stats->txb_coeff_cost %d token_stats->cost %d\n",
            plane, rd_stats->txb_coeff_cost[plane], token_stats->cost);
 #if CONFIG_VAR_TX
@@ -1794,9 +1796,6 @@
   MODE_INFO *m;
   int plane;
   int bh, bw;
-#if CONFIG_RD_DEBUG
-  int64_t txb_coeff_cost[MAX_MB_PLANE] = { 0 };
-#endif
 #if CONFIG_PVQ
   MB_MODE_INFO *mbmi;
   BLOCK_SIZE bsize;
@@ -1888,9 +1887,9 @@
   if (!m->mbmi.skip) {
     assert(*tok < tok_end);
     for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+      MB_MODE_INFO *mbmi = &m->mbmi;
 #if CONFIG_VAR_TX
       const struct macroblockd_plane *const pd = &xd->plane[plane];
-      MB_MODE_INFO *mbmi = &m->mbmi;
       BLOCK_SIZE bsize = mbmi->sb_type;
       const BLOCK_SIZE plane_bsize =
           get_plane_block_size(AOMMAX(bsize, BLOCK_8X8), pd);
@@ -1930,7 +1929,8 @@
           }
         }
 #if CONFIG_RD_DEBUG
-        if (rd_token_stats_mismatch(&m->mbmi.rd_stats, &token_stats, plane)) {
+        if (mbmi->sb_type >= BLOCK_8X8 &&
+            rd_token_stats_mismatch(&m->mbmi.rd_stats, &token_stats, plane)) {
           dump_mode_info(m);
           assert(0);
         }
@@ -1949,8 +1949,17 @@
       TX_SIZE tx =
           plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) : m->mbmi.tx_size;
       TOKEN_STATS token_stats;
-      token_stats.cost = 0;
+      init_token_stats(&token_stats);
       pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx, &token_stats);
+#if CONFIG_RD_DEBUG
+      if (is_inter_block(mbmi) && mbmi->sb_type >= BLOCK_8X8 &&
+          rd_token_stats_mismatch(&m->mbmi.rd_stats, &token_stats, plane)) {
+        dump_mode_info(m);
+        assert(0);
+      }
+#else
+      (void)mbmi;
+#endif
 #endif  // CONFIG_VAR_TX
 
       assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index f293a04..0567de5 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1229,10 +1229,11 @@
   }
 #if !CONFIG_PVQ
   this_rd_stats.rate = rate_block(plane, block, coeff_ctx, tx_size, args);
-#if RD_DEBUG
+#if CONFIG_RD_DEBUG
   av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
                             this_rd_stats.rate);
 #endif
+
   args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0);
   args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0);
 #else
@@ -3608,10 +3609,6 @@
       mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
   mbmi->tx_size = best_tx;
   mbmi->min_tx_size = best_min_tx_size;
-#if CONFIG_RD_DEBUG
-  // record plane y's transform block coefficient cost
-  mbmi->rd_stats = *rd_stats;
-#endif
   memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
 }
 
@@ -7833,10 +7830,6 @@
       is_cost_valid_uv =
           super_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty);
 #endif  // CONFIG_VAR_TX
-#if CONFIG_RD_DEBUG
-      // record uv planes' transform block coefficient cost
-      if (is_cost_valid_uv) av1_merge_rd_stats(&mbmi->rd_stats, rd_stats_uv);
-#endif
       if (!is_cost_valid_uv) {
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
         continue;
@@ -7847,6 +7840,14 @@
       }
       /* clang-format on */
       av1_merge_rd_stats(rd_stats, rd_stats_uv);
+#if CONFIG_RD_DEBUG
+      // record transform block coefficient cost
+      // TODO(angiebird): So far rd_debug tool only detects descrepancy of
+      // coefficient cost. Therefore, it is fine to copy rd_stats into mbmi
+      // here because we already collect the coefficient cost. Move this part to
+      // other place when we need to compare non-coefficient cost.
+      mbmi->rd_stats = *rd_stats;
+#endif
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
       if (rd_stats->skip) {
         rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 462274e..8d1df95 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -30,17 +30,23 @@
 static INLINE void av1_update_txb_coeff_cost(RD_STATS *rd_stats, int plane,
                                              TX_SIZE tx_size, int blk_row,
                                              int blk_col, int txb_coeff_cost) {
-  const int txb_h = tx_size_high_unit[tx_size];
-  const int txb_w = tx_size_wide_unit[tx_size];
-  int idx, idy;
-
+  (void)blk_row;
+  (void)blk_col;
+  (void)tx_size;
   rd_stats->txb_coeff_cost[plane] += txb_coeff_cost;
 
-  for (idy = 0; idy < txb_h; ++idy)
-    for (idx = 0; idx < txb_w; ++idx)
-      rd_stats->txb_coeff_cost_map[plane][blk_row + idy][blk_col + idx] = 0;
+#if CONFIG_VAR_TX
+  {
+    const int txb_h = tx_size_high_unit[tx_size];
+    const int txb_w = tx_size_wide_unit[tx_size];
+    int idx, idy;
+    for (idy = 0; idy < txb_h; ++idy)
+      for (idx = 0; idx < txb_w; ++idx)
+        rd_stats->txb_coeff_cost_map[plane][blk_row + idy][blk_col + idx] = 0;
 
-  rd_stats->txb_coeff_cost_map[plane][blk_row][blk_col] = txb_coeff_cost;
+    rd_stats->txb_coeff_cost_map[plane][blk_row][blk_col] = txb_coeff_cost;
+  }
+#endif
 
   assert(blk_row < 16);
   assert(blk_col < 16);
@@ -57,11 +63,15 @@
   rd_stats->skip = 1;
 #if CONFIG_RD_DEBUG
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    int r, c;
     rd_stats->txb_coeff_cost[plane] = 0;
-    for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
-      for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
-        rd_stats->txb_coeff_cost_map[plane][r][c] = 0;
+#if CONFIG_VAR_TX
+    {
+      int r, c;
+      for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
+        for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
+          rd_stats->txb_coeff_cost_map[plane][r][c] = 0;
+    }
+#endif
   }
 #endif
 }
@@ -76,11 +86,15 @@
   rd_stats->skip = 0;
 #if CONFIG_RD_DEBUG
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    int r, c;
     rd_stats->txb_coeff_cost[plane] = INT_MAX;
-    for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
-      for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
-        rd_stats->txb_coeff_cost_map[plane][r][c] = INT_MAX;
+#if CONFIG_VAR_TX
+    {
+      int r, c;
+      for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
+        for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
+          rd_stats->txb_coeff_cost_map[plane][r][c] = INT_MAX;
+    }
+#endif
   }
 #endif
 }
@@ -96,17 +110,21 @@
   rd_stats_dst->skip &= rd_stats_src->skip;
 #if CONFIG_RD_DEBUG
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    int r, c;
-    int ref_txb_coeff_cost = 0;
     rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
-    // TODO(angiebird): optimize this part
-    for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
-      for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
-        rd_stats_dst->txb_coeff_cost_map[plane][r][c] +=
-            rd_stats_src->txb_coeff_cost_map[plane][r][c];
-        ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c];
-      }
-    assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]);
+#if CONFIG_VAR_TX
+    {
+      // TODO(angiebird): optimize this part
+      int r, c;
+      int ref_txb_coeff_cost = 0;
+      for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
+        for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
+          rd_stats_dst->txb_coeff_cost_map[plane][r][c] +=
+              rd_stats_src->txb_coeff_cost_map[plane][r][c];
+          ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c];
+        }
+      assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]);
+    }
+#endif
   }
 #endif
 }