Add mismatch debug tool

Now it supports inter blocks only

Change-Id: I6307664452588e654853b4ff18b83cce0343268b
diff --git a/aom_util/aom_util.cmake b/aom_util/aom_util.cmake
index 144d773..34e2261 100644
--- a/aom_util/aom_util.cmake
+++ b/aom_util/aom_util.cmake
@@ -14,14 +14,9 @@
 set(AOM_UTIL_SOURCES
     "${AOM_ROOT}/aom_util/aom_thread.c"
     "${AOM_ROOT}/aom_util/aom_thread.h"
-    "${AOM_ROOT}/aom_util/endian_inl.h")
-
-if (CONFIG_BITSTREAM_DEBUG)
-  set(AOM_UTIL_SOURCES
-      ${AOM_UTIL_SOURCES}
-      "${AOM_ROOT}/aom_util/debug_util.c"
-      "${AOM_ROOT}/aom_util/debug_util.h")
-endif ()
+    "${AOM_ROOT}/aom_util/endian_inl.h"
+    "${AOM_ROOT}/aom_util/debug_util.c"
+    "${AOM_ROOT}/aom_util/debug_util.h")
 
 # Creates the aom_util build target and makes libaom depend on it. The libaom
 # target must exist before this function is called.
diff --git a/aom_util/aom_util.mk b/aom_util/aom_util.mk
index 14b484a..c9e26a0 100644
--- a/aom_util/aom_util.mk
+++ b/aom_util/aom_util.mk
@@ -13,6 +13,6 @@
 UTIL_SRCS-yes += aom_util.mk
 UTIL_SRCS-yes += aom_thread.c
 UTIL_SRCS-yes += aom_thread.h
-UTIL_SRCS-$(CONFIG_BITSTREAM_DEBUG) += debug_util.c
-UTIL_SRCS-$(CONFIG_BITSTREAM_DEBUG) += debug_util.h
+UTIL_SRCS-yes += debug_util.c
+UTIL_SRCS-yes += debug_util.h
 UTIL_SRCS-yes += endian_inl.h
diff --git a/aom_util/debug_util.c b/aom_util/debug_util.c
index ea73df8..4f9bdc1 100644
--- a/aom_util/debug_util.c
+++ b/aom_util/debug_util.c
@@ -14,17 +14,6 @@
 #include <string.h>
 #include "aom_util/debug_util.h"
 
-#define QUEUE_MAX_SIZE 2000000
-static int result_queue[QUEUE_MAX_SIZE];
-static int nsymbs_queue[QUEUE_MAX_SIZE];
-static aom_cdf_prob cdf_queue[QUEUE_MAX_SIZE][16];
-
-static int queue_r = 0;
-static int queue_w = 0;
-static int queue_prev_w = -1;
-static int skip_r = 0;
-static int skip_w = 0;
-
 static int frame_idx_w = 0;
 
 static int frame_idx_r = 0;
@@ -37,6 +26,18 @@
 
 int bitstream_queue_get_frame_read(void) { return frame_idx_r; }
 
+#if CONFIG_BITSTREAM_DEBUG
+#define QUEUE_MAX_SIZE 2000000
+static int result_queue[QUEUE_MAX_SIZE];
+static int nsymbs_queue[QUEUE_MAX_SIZE];
+static aom_cdf_prob cdf_queue[QUEUE_MAX_SIZE][16];
+
+static int queue_r = 0;
+static int queue_w = 0;
+static int queue_prev_w = -1;
+static int skip_r = 0;
+static int skip_w = 0;
+
 void bitstream_queue_set_skip_write(int skip) { skip_w = skip; }
 
 void bitstream_queue_set_skip_read(int skip) { skip_r = skip; }
@@ -74,3 +75,178 @@
     }
   }
 }
+#endif  // CONFIG_BITSTREAM_DEBUG
+
+#if CONFIG_MISMATCH_DEBUG
+static int frame_buf_idx_r = 0;
+static int frame_buf_idx_w = 0;
+static int max_frame_buf_num = 5;
+#define MAX_FRAME_STRIDE 1280
+#define MAX_FRAME_HEIGHT 720
+static uint8_t
+    frame_pre[5][3][MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT];  // prediction only
+static uint8_t
+    frame_tx[5][3][MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT];  // prediction + txfm
+static int frame_stride = MAX_FRAME_STRIDE;
+static int frame_height = MAX_FRAME_HEIGHT;
+static int frame_size = MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT;
+void mismatch_move_frame_idx_w() {
+  frame_buf_idx_w = (frame_buf_idx_w + 1) % max_frame_buf_num;
+  if (frame_buf_idx_w == frame_buf_idx_r) {
+    printf("frame_buf overflow\n");
+    assert(0);
+  }
+}
+
+void mismatch_reset_frame() {
+  for (int plane = 0; plane < 3; ++plane) {
+    memset(frame_pre[frame_buf_idx_w][plane], 0,
+           sizeof(frame_pre[frame_buf_idx_w][plane][0]) * frame_size);
+    memset(frame_tx[frame_buf_idx_w][plane], 0,
+           sizeof(frame_tx[frame_buf_idx_w][plane][0]) * frame_size);
+  }
+}
+
+void mismatch_move_frame_idx_r() {
+  if (frame_buf_idx_w == frame_buf_idx_r) {
+    printf("frame_buf underflow\n");
+    assert(0);
+  }
+  frame_buf_idx_r = (frame_buf_idx_r + 1) % max_frame_buf_num;
+}
+
+void mismatch_record_block_pre(const uint8_t *src, int src_stride, int plane,
+                               int pixel_c, int pixel_r, int blk_w, int blk_h) {
+  if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
+    printf("frame_buf undersized\n");
+    assert(0);
+  }
+  for (int r = 0; r < blk_h; ++r) {
+    for (int c = 0; c < blk_w; ++c) {
+      frame_pre[frame_buf_idx_w][plane][(r + pixel_r) * frame_stride + c +
+                                        pixel_c] = src[r * src_stride + c];
+    }
+  }
+#if 0
+  int ref_frame_idx = 3;
+  int ref_plane = 1;
+  int ref_pixel_c = 162;
+  int ref_pixel_r = 16;
+  if (frame_idx_w == ref_frame_idx && plane == ref_plane &&
+      ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w &&
+      ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) {
+    printf(
+        "\nrecord_block_pre frame_idx %d plane %d pixel_c %d pixel_r %d blk_w "
+        "%d blk_h %d\n",
+        frame_idx_w, plane, pixel_c, pixel_r, blk_w, blk_h);
+  }
+#endif
+}
+void mismatch_record_block_tx(const uint8_t *src, int src_stride, int plane,
+                              int pixel_c, int pixel_r, int blk_w, int blk_h) {
+  if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
+    printf("frame_buf undersized\n");
+    assert(0);
+  }
+  for (int r = 0; r < blk_h; ++r) {
+    for (int c = 0; c < blk_w; ++c) {
+      frame_tx[frame_buf_idx_w][plane][(r + pixel_r) * frame_stride + c +
+                                       pixel_c] = src[r * src_stride + c];
+    }
+  }
+#if 0
+  int ref_frame_idx = 3;
+  int ref_plane = 1;
+  int ref_pixel_c = 162;
+  int ref_pixel_r = 16;
+  if (frame_idx_w == ref_frame_idx && plane == ref_plane &&
+      ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w &&
+      ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) {
+    printf(
+        "\nrecord_block_tx frame_idx %d plane %d pixel_c %d pixel_r %d blk_w "
+        "%d blk_h %d\n",
+        frame_idx_w, plane, pixel_c, pixel_r, blk_w, blk_h);
+  }
+#endif
+}
+void mismatch_check_block_pre(const uint8_t *src, int src_stride, int plane,
+                              int pixel_c, int pixel_r, int blk_w, int blk_h) {
+  if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
+    printf("frame_buf undersized\n");
+    assert(0);
+  }
+  int mismatch = 0;
+  for (int r = 0; r < blk_h; ++r) {
+    for (int c = 0; c < blk_w; ++c) {
+      if (frame_pre[frame_buf_idx_r][plane]
+                   [(r + pixel_r) * frame_stride + c + pixel_c] !=
+          src[r * src_stride + c]) {
+        mismatch = 1;
+      }
+    }
+  }
+  if (mismatch) {
+    printf(
+        "\ncheck_block_pre failed frame_idx %d plane %d pixel_c %d pixel_r "
+        "%d blk_w %d blk_h %d\n",
+        frame_idx_r, plane, pixel_c, pixel_r, blk_w, blk_h);
+    printf("enc\n");
+    for (int rr = 0; rr < blk_h; ++rr) {
+      for (int cc = 0; cc < blk_w; ++cc) {
+        printf("%d ", frame_pre[frame_buf_idx_r][plane]
+                               [(rr + pixel_r) * frame_stride + cc + pixel_c]);
+      }
+      printf("\n");
+    }
+
+    printf("dec\n");
+    for (int rr = 0; rr < blk_h; ++rr) {
+      for (int cc = 0; cc < blk_w; ++cc) {
+        printf("%d ", src[rr * src_stride + cc]);
+      }
+      printf("\n");
+    }
+    assert(0);
+  }
+}
+void mismatch_check_block_tx(const uint8_t *src, int src_stride, int plane,
+                             int pixel_c, int pixel_r, int blk_w, int blk_h) {
+  if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) {
+    printf("frame_buf undersized\n");
+    assert(0);
+  }
+  int mismatch = 0;
+  for (int r = 0; r < blk_h; ++r) {
+    for (int c = 0; c < blk_w; ++c) {
+      if (frame_tx[frame_buf_idx_r][plane]
+                  [(r + pixel_r) * frame_stride + c + pixel_c] !=
+          src[r * src_stride + c]) {
+        mismatch = 1;
+      }
+    }
+  }
+  if (mismatch) {
+    printf(
+        "\ncheck_block_tx failed frame_idx %d plane %d pixel_c %d pixel_r "
+        "%d blk_w %d blk_h %d\n",
+        frame_idx_r, plane, pixel_c, pixel_r, blk_w, blk_h);
+    printf("enc\n");
+    for (int rr = 0; rr < blk_h; ++rr) {
+      for (int cc = 0; cc < blk_w; ++cc) {
+        printf("%d ", frame_tx[frame_buf_idx_r][plane]
+                              [(rr + pixel_r) * frame_stride + cc + pixel_c]);
+      }
+      printf("\n");
+    }
+
+    printf("dec\n");
+    for (int rr = 0; rr < blk_h; ++rr) {
+      for (int cc = 0; cc < blk_w; ++cc) {
+        printf("%d ", src[rr * src_stride + cc]);
+      }
+      printf("\n");
+    }
+    assert(0);
+  }
+}
+#endif  // CONFIG_MISMATCH_DEBUG
diff --git a/aom_util/debug_util.h b/aom_util/debug_util.h
index 3740620..11c1296 100644
--- a/aom_util/debug_util.h
+++ b/aom_util/debug_util.h
@@ -19,6 +19,12 @@
 extern "C" {
 #endif
 
+void bitstream_queue_set_frame_write(int frame_idx);
+int bitstream_queue_get_frame_write(void);
+void bitstream_queue_set_frame_read(int frame_idx);
+int bitstream_queue_get_frame_read(void);
+
+#if CONFIG_BITSTREAM_DEBUG
 /* This is a debug tool used to detect bitstream error. On encoder side, it
  * pushes each bit and probability into a queue before the bit is written into
  * the Arithmetic coder. On decoder side, whenever a bit is read out from the
@@ -35,10 +41,21 @@
 void bitstream_queue_push(int result, const aom_cdf_prob *cdf, int nsymbs);
 void bitstream_queue_set_skip_write(int skip);
 void bitstream_queue_set_skip_read(int skip);
-void bitstream_queue_set_frame_write(int frame_idx);
-int bitstream_queue_get_frame_write(void);
-void bitstream_queue_set_frame_read(int frame_idx);
-int bitstream_queue_get_frame_read(void);
+#endif  // CONFIG_BITSTREAM_DEBUG
+
+#if CONFIG_MISMATCH_DEBUG
+void mismatch_move_frame_idx_w();
+void mismatch_move_frame_idx_r();
+void mismatch_reset_frame();
+void mismatch_record_block_pre(const uint8_t *src, int src_stride, int plane,
+                               int pixel_c, int pixel_r, int blk_w, int blk_h);
+void mismatch_record_block_tx(const uint8_t *src, int src_stride, int plane,
+                              int pixel_c, int pixel_r, int blk_w, int blk_h);
+void mismatch_check_block_pre(const uint8_t *src, int src_stride, int plane,
+                              int pixel_c, int pixel_r, int blk_w, int blk_h);
+void mismatch_check_block_tx(const uint8_t *src, int src_stride, int plane,
+                             int pixel_c, int pixel_r, int blk_w, int blk_h);
+#endif  // CONFIG_MISMATCH_DEBUG
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index fe9dd7b..1cd667a 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -470,6 +470,17 @@
          block_size_allowed;
 }
 
+#if CONFIG_MISMATCH_DEBUG
+static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col,
+                                   int mi_row, int tx_blk_col, int tx_blk_row,
+                                   int subsampling_x, int subsampling_y) {
+  *pixel_c = ((mi_col >> subsampling_x) << MI_SIZE_LOG2) +
+             (tx_blk_col << tx_size_wide_log2[0]);
+  *pixel_r = ((mi_row >> subsampling_y) << MI_SIZE_LOG2) +
+             (tx_blk_row << tx_size_high_log2[0]);
+}
+#endif
+
 enum mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 };
 
 struct buf_2d {
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 22517f4..7200a91 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -29,9 +29,9 @@
 #include "aom_scale/aom_scale.h"
 #include "aom_util/aom_thread.h"
 
-#if CONFIG_BITSTREAM_DEBUG
+#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
 #include "aom_util/debug_util.h"
-#endif  // CONFIG_BITSTREAM_DEBUG
+#endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
 
 #include "av1/common/alloccommon.h"
 #include "av1/common/cdef.h"
@@ -229,7 +229,10 @@
                                   aom_reader *r, MB_MODE_INFO *const mbmi,
                                   int plane, BLOCK_SIZE plane_bsize,
                                   int blk_row, int blk_col, int block,
-                                  TX_SIZE tx_size, int *eob_total) {
+                                  TX_SIZE tx_size, int *eob_total, int mi_row,
+                                  int mi_col) {
+  (void)mi_row;
+  (void)mi_col;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
   const int tx_row = blk_row >> (1 - pd->subsampling_y);
@@ -278,11 +281,20 @@
     ++cm->txb_count;
 #endif
 
-    inverse_transform_block(
-        xd, plane, tx_type, tx_size,
+    uint8_t *dst =
         &pd->dst
-             .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]],
-        pd->dst.stride, max_scan_line, eob, cm->reduced_tx_set_used);
+             .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
+    inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
+                            max_scan_line, eob, cm->reduced_tx_set_used);
+#if CONFIG_MISMATCH_DEBUG
+    int pixel_c, pixel_r;
+    int blk_w = block_size_wide[plane_bsize];
+    int blk_h = block_size_high[plane_bsize];
+    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, blk_col, blk_row,
+                    pd->subsampling_x, pd->subsampling_y);
+    mismatch_check_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r, blk_w,
+                            blk_h);
+#endif
     *eob_total += eob;
   } else {
     const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
@@ -302,7 +314,8 @@
         if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
         decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, offsetr,
-                              offsetc, block, sub_txs, eob_total);
+                              offsetc, block, sub_txs, eob_total, mi_row,
+                              mi_col);
         block += sub_step;
       }
     }
@@ -496,6 +509,21 @@
     if (mbmi->motion_mode == OBMC_CAUSAL) {
       av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
     }
+
+#if CONFIG_MISMATCH_DEBUG
+    for (int plane = 0; plane < 3; ++plane) {
+      const struct macroblockd_plane *pd = &xd->plane[plane];
+      int pixel_c, pixel_r;
+      mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
+                      pd->subsampling_x, pd->subsampling_y);
+      if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
+                               pd->subsampling_y))
+        continue;
+      mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c,
+                               pixel_r, pd->width, pd->height);
+    }
+#endif
+
     // Reconstruction
     if (!mbmi->skip) {
       int eobtotal = 0;
@@ -552,7 +580,7 @@
               for (blk_col = col; blk_col < unit_width; blk_col += bw_var_tx) {
                 decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize,
                                       blk_row, blk_col, block, max_tx_size,
-                                      &eobtotal);
+                                      &eobtotal, mi_row, mi_col);
                 block += step;
               }
             }
@@ -3315,6 +3343,9 @@
 #if CONFIG_BITSTREAM_DEBUG
   bitstream_queue_set_frame_read(cm->current_video_frame * 2 + cm->show_frame);
 #endif
+#if CONFIG_MISMATCH_DEBUG
+  mismatch_move_frame_idx_r();
+#endif
 
   for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
     cm->global_motion[i] = default_warp_params;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 170d403..7f511bf 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -23,6 +23,10 @@
 #include "aom_ports/aom_timer.h"
 #include "aom_ports/system_state.h"
 
+#if CONFIG_MISMATCH_DEBUG
+#include "aom_util/debug_util.h"
+#endif  // CONFIG_MISMATCH_DEBUG
+
 #if CONFIG_CFL
 #include "av1/common/cfl.h"
 #endif
@@ -4150,6 +4154,10 @@
 #endif  // CONFIG_FRAME_SIGN_BIAS
 #endif  // CONFIG_FRAME_MARKER
 
+#if CONFIG_MISMATCH_DEBUG
+  mismatch_reset_frame();
+#endif
+
   // In the longer term the encoder should be generalized to match the
   // decoder such that we allow compound where one of the 3 buffers has a
   // different sign bias and that buffer is then the fixed ref. However, this
@@ -4709,7 +4717,23 @@
       av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
     }
 
-    av1_encode_sb((AV1_COMMON *)cm, x, block_size, mi_row, mi_col);
+#if CONFIG_MISMATCH_DEBUG
+    if (dry_run == OUTPUT_ENABLED) {
+      for (int plane = 0; plane < 3; ++plane) {
+        const struct macroblockd_plane *pd = &xd->plane[plane];
+        int pixel_c, pixel_r;
+        mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
+                        pd->subsampling_x, pd->subsampling_y);
+        if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
+                                 pd->subsampling_y))
+          continue;
+        mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c,
+                                  pixel_r, pd->width, pd->height);
+      }
+    }
+#endif
+
+    av1_encode_sb((AV1_COMMON *)cm, x, block_size, mi_row, mi_col, dry_run);
     if (mbmi->skip) mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
     av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, block_size, rate,
                           tile_data->allow_update_cdf);
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index c98bfef..915b388 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -18,6 +18,10 @@
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem.h"
 
+#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
+#include "aom_util/debug_util.h"
+#endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
+
 #include "av1/common/idct.h"
 #include "av1/common/reconinter.h"
 #include "av1/common/reconintra.h"
@@ -33,7 +37,6 @@
 #include "av1/common/daala_inv_txfm.h"
 #endif
 #include "av1/encoder/rd.h"
-#include "av1/encoder/tokenize.h"
 
 #if CONFIG_CFL
 #include "av1/common/cfl.h"
@@ -610,7 +613,11 @@
 }
 
 static void encode_block(int plane, int block, int blk_row, int blk_col,
-                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg,
+                         int mi_row, int mi_col, RUN_TYPE dry_run) {
+  (void)mi_row;
+  (void)mi_col;
+  (void)dry_run;
   struct encode_b_args *const args = arg;
   AV1_COMMON *cm = args->cm;
   MACROBLOCK *const x = args->x;
@@ -654,11 +661,25 @@
                                 pd->dst.stride, p->eobs[block],
                                 cm->reduced_tx_set_used);
   }
+#if CONFIG_MISMATCH_DEBUG
+  if (dry_run == OUTPUT_ENABLED) {
+    int pixel_c, pixel_r;
+    int blk_w = block_size_wide[plane_bsize];
+    int blk_h = block_size_high[plane_bsize];
+    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, blk_col, blk_row,
+                    pd->subsampling_x, pd->subsampling_y);
+    mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
+                             blk_w, blk_h);
+  }
+#endif
 }
 
 static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
                                BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                               void *arg) {
+                               void *arg, int mi_row, int mi_col,
+                               RUN_TYPE dry_run) {
+  (void)mi_row;
+  (void)mi_col;
   struct encode_b_args *const args = arg;
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -682,7 +703,8 @@
       || pd->subsampling_x || pd->subsampling_y
 #endif  // DISABLE_VARTX_FOR_CHROMA
       ) {
-    encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
+    encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg,
+                 mi_row, mi_col, dry_run);
   } else {
     assert(tx_size < TX_SIZES_ALL);
     const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
@@ -702,7 +724,7 @@
         if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
         encode_block_inter(plane, block, offsetr, offsetc, plane_bsize, sub_txs,
-                           arg);
+                           arg, mi_row, mi_col, dry_run);
         block += step;
       }
     }
@@ -768,7 +790,8 @@
 }
 
 void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
-                   int mi_col) {
+                   int mi_col, RUN_TYPE dry_run) {
+  (void)dry_run;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx ctx;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -841,7 +864,7 @@
         for (blk_row = idy; blk_row < unit_height; blk_row += bh) {
           for (blk_col = idx; blk_col < unit_width; blk_col += bw) {
             encode_block_inter(plane, block, blk_row, blk_col, plane_bsize,
-                               max_tx_size, &arg);
+                               max_tx_size, &arg, mi_row, mi_col, dry_run);
             block += step;
           }
         }
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index adea7cd..cf7d3dd 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -15,7 +15,7 @@
 #include "./aom_config.h"
 #include "av1/common/onyxc_int.h"
 #include "av1/encoder/block.h"
-
+#include "av1/encoder/tokenize.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -44,7 +44,7 @@
 } AV1_XFORM_QUANT;
 
 void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
-                   int mi_col);
+                   int mi_col, RUN_TYPE dry_run);
 void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize);
 void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
                      int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 96a6fd8..0aac2cb 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -68,9 +68,9 @@
 #include "aom_ports/mem.h"
 #include "aom_ports/system_state.h"
 #include "aom_scale/aom_scale.h"
-#if CONFIG_BITSTREAM_DEBUG
+#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
 #include "aom_util/debug_util.h"
-#endif  // CONFIG_BITSTREAM_DEBUG
+#endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
 
 #if CONFIG_ENTROPY_STATS
 FRAME_COUNTS aggregate_fc;
@@ -5877,6 +5877,9 @@
 #if !CONFIG_XIPHRC
 static void Pass2Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest,
                         unsigned int *frame_flags) {
+#if CONFIG_MISMATCH_DEBUG
+  mismatch_move_frame_idx_w();
+#endif
   encode_frame_to_data_rate(cpi, size, dest, 0, frame_flags);
 
   // Do not do post-encoding update for those frames that do not have a spot in
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 62db8dd..64ec98b 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -85,6 +85,7 @@
 # Debugging flags.
 set(CONFIG_BITSTREAM_DEBUG 0 CACHE NUMBER "Bitstream debugging flag.")
 set(CONFIG_DEBUG 0 CACHE NUMBER "Debug build flag.")
+set(CONFIG_MISMATCH_DEBUG 0 CACHE NUMBER "Mismatch debugging flag.")
 
 # Testing flags.
 set(CONFIG_DECODE_PERF_TESTS 0 CACHE NUMBER "Enables decoder performance test.")
diff --git a/configure b/configure
index d82d5e4..6cbfbed 100755
--- a/configure
+++ b/configure
@@ -374,6 +374,7 @@
     decode_perf_tests
     encode_perf_tests
     bitstream_debug
+    mismatch_debug
     symbolrate
     coefficient_range_checking
     lowbitdepth
@@ -433,6 +434,7 @@
     encode_perf_tests
     coefficient_range_checking
     bitstream_debug
+    mismatch_debug
     symbolrate
     lowbitdepth
     highbitdepth