Fix bitstream debug tool w/ CONFIG_BYPASS_IMPROVEMENT

Fix the bitstream debugging tool (CONFIG_BITSTREAM_DEBUG)
when bypass improvement (CONFIG_BYPASS_IMPROVEMENT) is enabled.

Fixes #138
diff --git a/aom_dsp/bitreader.h b/aom_dsp/bitreader.h
index aff9f25..ee999ba 100644
--- a/aom_dsp/bitreader.h
+++ b/aom_dsp/bitreader.h
@@ -170,7 +170,7 @@
       assert(0);
     }
   }
-#endif
+#endif  // CONFIG_BITSTREAM_DEBUG
 
 #if CONFIG_ACCOUNTING
   if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
@@ -183,9 +183,51 @@
   return bit;
 }
 
+#if CONFIG_BITSTREAM_DEBUG
+// Pop a literal (one or more equi-probably symbols) and check
+// with decoded literal value.
+static INLINE void bitstream_queue_pop_literal(int data, int bits) {
+  for (int b = bits - 1; b >= 0; b--) {
+    int bit = 1 & (data >> b);
+    int i;
+    int ref_bit, ref_nsymbs;
+    aom_cdf_prob ref_cdf[16];
+    const int queue_r = bitstream_queue_get_read();
+    const int frame_idx = aom_bitstream_queue_get_frame_read();
+    bitstream_queue_pop(&ref_bit, ref_cdf, &ref_nsymbs);
+    if (ref_nsymbs != 2) {
+      fprintf(stderr,
+              "\n *** [bit] nsymbs error, frame_idx_r %d nsymbs %d ref_nsymbs "
+              "%d queue_r %d\n",
+              frame_idx, 2, ref_nsymbs, queue_r);
+      assert(0);
+    }
+    if ((ref_nsymbs != 2) || (ref_cdf[0] != 128) || (ref_cdf[1] != 32767)) {
+      fprintf(stderr,
+              "\n *** [bit] cdf error, frame_idx_r %d cdf {%d, %d} ref_cdf {%d",
+              frame_idx, 128, 32767, ref_cdf[0]);
+      for (i = 1; i < ref_nsymbs; ++i) fprintf(stderr, ", %d", ref_cdf[i]);
+      fprintf(stderr, "} queue_r %d literal %d size %d bit %d\n", queue_r, data,
+              bits, b);
+      assert(0);
+    }
+    if (bit != ref_bit) {
+      fprintf(stderr,
+              "\n *** [bit] symb error, frame_idx_r %d symb %d ref_symb %d "
+              "queue_r %d literal %d size %d bit %d\n",
+              frame_idx, bit, ref_bit, queue_r, data, bits, b);
+      assert(0);
+    }
+  }
+}
+#endif  // CONFIG_BITSTREAM_DEBUG
+
 #if CONFIG_BYPASS_IMPROVEMENT
 static INLINE int aom_read_bypass_(aom_reader *r ACCT_STR_PARAM) {
   int ret = od_ec_decode_literal_bypass(&r->ec, 1);
+#if CONFIG_BITSTREAM_DEBUG
+  bitstream_queue_pop_literal(ret, 1);
+#endif  // CONFIG_BITSTREAM_DEBUG
 #if CONFIG_ACCOUNTING
   if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
 #if CONFIG_THROUGHPUT_ANALYSIS
@@ -222,6 +264,9 @@
     literal += od_ec_decode_literal_bypass(&r->ec, n);
     n_bits -= n;
   }
+#if CONFIG_BITSTREAM_DEBUG
+  bitstream_queue_pop_literal(literal, bits);
+#endif  // CONFIG_BITSTREAM_DEBUG
 #if CONFIG_ACCOUNTING
   if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
 #if CONFIG_THROUGHPUT_ANALYSIS
@@ -239,11 +284,16 @@
 }
 
 #if CONFIG_BYPASS_IMPROVEMENT
-// Deocode unary coded symbol with truncation at max_bits.
-static INLINE int aom_read_unary_(aom_reader *r, int max_bits ACCT_STR_PARAM) {
-  int ret = od_ec_decode_unary_bypass(&r->ec, max_bits);
+// Deocode unary coded symbol with truncation at max_nbits.
+static INLINE int aom_read_unary_(aom_reader *r, int max_nbits ACCT_STR_PARAM) {
+  int ret = od_ec_decode_unary_bypass(&r->ec, max_nbits);
+#if CONFIG_BITSTREAM_DEBUG
+  int nbits = ret < max_nbits ? ret + 1 : max_nbits;
+  int data = ret == max_nbits ? 0 : 1;
+  bitstream_queue_pop_literal(data, nbits);
+#endif  // CONFIG_BITSTREAM_DEBUG
 #if CONFIG_ACCOUNTING
-  int n_bits = ret < max_bits ? ret + 1 : max_bits;
+  int n_bits = ret < max_nbits ? ret + 1 : max_nbits;
   if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
 #if CONFIG_THROUGHPUT_ANALYSIS
   aom_update_symb_counts(r, 1, 0, n_bits);
@@ -298,7 +348,7 @@
       assert(0);
     }
   }
-#endif
+#endif  // CONFIG_BITSTREAM_DEBUG
 
 #if CONFIG_ACCOUNTING
   if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
diff --git a/aom_dsp/bitwriter.h b/aom_dsp/bitwriter.h
index 8eca1a6..c73e3ff 100644
--- a/aom_dsp/bitwriter.h
+++ b/aom_dsp/bitwriter.h
@@ -69,18 +69,33 @@
 
 int aom_stop_encode(aom_writer *w);
 
+#if CONFIG_BITSTREAM_DEBUG
+// Push a literal (one or more equi-probably symbols) into
+// the bitstream debug queue, in the same order it is
+// encoded into the stream (msb to lsb).
+static INLINE void bitstream_queue_push_literal(int data, int bits) {
+  aom_cdf_prob cdf[2] = { 128, 32767 };
+  for (int bit = bits - 1; bit >= 0; bit--) {
+    bitstream_queue_push(1 & (data >> bit), cdf, 2);
+  }
+}
+#endif  // CONFIG_BITSTREAM_DEBUG
+
 static INLINE void aom_write(aom_writer *w, int bit, int probability) {
   int p = (0x7FFFFF - (probability << 15) + probability) >> 8;
 #if CONFIG_BITSTREAM_DEBUG
   aom_cdf_prob cdf[2] = { (aom_cdf_prob)p, 32767 };
   bitstream_queue_push(bit, cdf, 2);
-#endif
+#endif  // CONFIG_BITSTREAM_DEBUG
 
   od_ec_encode_bool_q15(&w->ec, bit, p);
 }
 
 static INLINE void aom_write_bit(aom_writer *w, int bit) {
 #if CONFIG_BYPASS_IMPROVEMENT
+#if CONFIG_BITSTREAM_DEBUG
+  bitstream_queue_push_literal(bit, 1);
+#endif  // CONFIG_BITSTREAM_DEBUG
   od_ec_encode_literal_bypass(&w->ec, bit, 1);
 #else
   aom_write(w, bit, 128);  // aom_prob_half
@@ -89,6 +104,9 @@
 
 static INLINE void aom_write_literal(aom_writer *w, int data, int bits) {
 #if CONFIG_BYPASS_IMPROVEMENT
+#if CONFIG_BITSTREAM_DEBUG
+  bitstream_queue_push_literal(data, bits);
+#endif  // CONFIG_BITSTREAM_DEBUG
   int n;
   while (bits > 0) {
     n = bits >= 8 ? 8 : bits;
@@ -107,7 +125,7 @@
                                  const aom_cdf_prob *cdf, int nsymbs) {
 #if CONFIG_BITSTREAM_DEBUG
   bitstream_queue_push(symb, cdf, nsymbs);
-#endif
+#endif  // CONFIG_BITSTREAM_DEBUG
 
   od_ec_encode_cdf_q15(&w->ec, symb, cdf, nsymbs);
 }