Merge "Moving txfm_size bits before prediction mode bits." into experimental
diff --git a/configure b/configure
index 8c22505..f22a2dd 100755
--- a/configure
+++ b/configure
@@ -240,7 +240,6 @@
 EXPERIMENT_LIST="
     csm
     implicit_segmentation
-    loop_dering
     oneshotq
     multiple_arf
     non420
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index e53e107..9626540 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -91,6 +91,7 @@
   return mode >= NEARESTMV && mode <= SPLITMV;
 }
 
+#define INTRA_MODE_COUNT (TM_PRED + 1)
 
 // Segment level features.
 typedef enum {
@@ -126,25 +127,7 @@
 
 #define WHT_UPSCALE_FACTOR 2
 
-typedef enum {
-  B_DC_PRED,          /* average of above and left pixels */
-  B_V_PRED,          /* vertical prediction */
-  B_H_PRED,          /* horizontal prediction */
-  B_D45_PRED,
-  B_D135_PRED,
-  B_D117_PRED,
-  B_D153_PRED,
-  B_D27_PRED,
-  B_D63_PRED,
-  B_TM_PRED,
-
-  B_MODE_COUNT
-} B_PREDICTION_MODE;
-
-#define VP9_BINTRAMODES (B_MODE_COUNT)
-
-#define VP9_KF_BINTRAMODES (VP9_BINTRAMODES)   /* 10 */
-#define VP9_NKF_BINTRAMODES (VP9_BINTRAMODES)  /* 10 */
+#define VP9_BINTRAMODES INTRA_MODE_COUNT
 
 /* For keyframes, intra block modes are predicted by the (already decoded)
    modes for the Y blocks to the left and above us; for interframes, there
@@ -254,7 +237,7 @@
 
 typedef struct {
   MB_MODE_INFO mbmi;
-  union b_mode_info bmi[16];
+  union b_mode_info bmi[4];
 } MODE_INFO;
 
 struct scale_factors {
@@ -532,25 +515,6 @@
   return subsize;
 }
 
-// convert MB_PREDICTION_MODE to B_PREDICTION_MODE
-static MB_PREDICTION_MODE pred_mode_conv(MB_PREDICTION_MODE mode) {
-  switch (mode) {
-    case DC_PRED: return DC_PRED;
-    case V_PRED: return V_PRED;
-    case H_PRED: return H_PRED;
-    case TM_PRED: return TM_PRED;
-    case D45_PRED: return D45_PRED;
-    case D135_PRED: return D135_PRED;
-    case D117_PRED: return D117_PRED;
-    case D153_PRED: return D153_PRED;
-    case D27_PRED: return D27_PRED;
-    case D63_PRED: return D63_PRED;
-    default:
-       assert(0);
-       return MB_MODE_COUNT;  // Dummy value
-  }
-}
-
 // transform mapping
 static TX_TYPE txfm_map(MB_PREDICTION_MODE bmode) {
   switch (bmode) {
@@ -573,7 +537,6 @@
   }
 }
 
-
 static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) {
   TX_TYPE tx_type = DCT_DCT;
   if (xd->lossless)
@@ -582,7 +545,7 @@
     tx_type = txfm_map(
         xd->mode_info_context->bmi[ib].as_mode.first);
   } else if (xd->mode_info_context->mbmi.mode <= TM_PRED) {
-    tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+    tx_type = txfm_map(xd->mode_info_context->mbmi.mode);
   }
   return tx_type;
 }
@@ -590,7 +553,7 @@
 static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) {
   TX_TYPE tx_type = DCT_DCT;
   if (xd->mode_info_context->mbmi.mode <= TM_PRED) {
-    tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+    tx_type = txfm_map(xd->mode_info_context->mbmi.mode);
   }
   return tx_type;
 }
@@ -598,7 +561,7 @@
 static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, int ib) {
   TX_TYPE tx_type = DCT_DCT;
   if (xd->mode_info_context->mbmi.mode <= TM_PRED) {
-    tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
+    tx_type = txfm_map(xd->mode_info_context->mbmi.mode);
   }
   return tx_type;
 }
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index bd062ff..b6252d9 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -60,8 +60,4 @@
   return (value + 15) & ~15;
 }
 
-// TODO(dkovalev): remove later
-#define HEADER_SIZE_IN_BYTES 4
-
-
 #endif  // VP9_COMMON_VP9_COMMON_H_
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index ddc7bbd..622f1dc 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -29,7 +29,7 @@
 
 static const unsigned int y_mode_cts  [VP9_YMODES] = {
   /* DC V   H  D45 135 117 153 D27 D63 TM i4X4 */
-  98, 19, 15, 14, 14, 14, 14, 12, 12, 13, 70
+  98, 19, 15, 14, 14, 14, 14, 12, 12, 13, 0
 };
 
 static const unsigned int uv_mode_cts [VP9_YMODES] [VP9_UV_MODES] = {
@@ -62,7 +62,7 @@
   { 122, 41, 35, 20, 20, 20, 20, 20, 20, 18}, /* I4X4 */
 };
 
-static const unsigned int bmode_cts[VP9_NKF_BINTRAMODES] = {
+static const unsigned int bmode_cts[VP9_BINTRAMODES] = {
   /* DC    V     H    D45   D135  D117  D153   D27   D63   TM  */
   43891, 10036, 3920, 3363, 2546, 5119, 2471, 1723, 3221, 17694
 };
@@ -89,29 +89,16 @@
 };
 
 /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
-
-const vp9_tree_index vp9_kf_bmode_tree[VP9_KF_BINTRAMODES * 2 - 2] = {
-  -B_DC_PRED, 2,                      /* 0 = DC_NODE */
-  -B_TM_PRED, 4,                      /* 1 = TM_NODE */
-  -B_V_PRED, 6,                       /* 2 = V_NODE */
-  8, 12,                              /* 3 = COM_NODE */
-  -B_H_PRED, 10,                      /* 4 = H_NODE */
-  -B_D135_PRED, -B_D117_PRED,         /* 5 = D135_NODE */
-  -B_D45_PRED, 14,                    /* 6 = D45_NODE */
-  -B_D63_PRED, 16,                    /* 7 = D63_NODE */
-  -B_D153_PRED, -B_D27_PRED           /* 8 = D153_NODE */
-};
-
-const vp9_tree_index vp9_bmode_tree[VP9_NKF_BINTRAMODES * 2 - 2] = {
-  -B_DC_PRED, 2,                      /* 0 = DC_NODE */
-  -B_TM_PRED, 4,                      /* 1 = TM_NODE */
-  -B_V_PRED, 6,                       /* 2 = V_NODE */
-  8, 12,                              /* 3 = COM_NODE */
-  -B_H_PRED, 10,                      /* 4 = H_NODE */
-  -B_D135_PRED, -B_D117_PRED,         /* 5 = D135_NODE */
-  -B_D45_PRED, 14,                    /* 6 = D45_NODE */
-  -B_D63_PRED, 16,                    /* 7 = D63_NODE */
-  -B_D153_PRED, -B_D27_PRED           /* 8 = D153_NODE */
+const vp9_tree_index vp9_bmode_tree[VP9_BINTRAMODES * 2 - 2] = {
+  -DC_PRED, 2,                      /* 0 = DC_NODE */
+  -TM_PRED, 4,                      /* 1 = TM_NODE */
+  -V_PRED, 6,                       /* 2 = V_NODE */
+  8, 12,                            /* 3 = COM_NODE */
+  -H_PRED, 10,                      /* 4 = H_NODE */
+  -D135_PRED, -D117_PRED,           /* 5 = D135_NODE */
+  -D45_PRED, 14,                    /* 6 = D45_NODE */
+  -D63_PRED, 16,                    /* 7 = D63_NODE */
+  -D153_PRED, -D27_PRED             /* 8 = D153_NODE */
 };
 
 /* Again, these trees use the same probability indices as their
@@ -173,8 +160,8 @@
   -PARTITION_VERT, -PARTITION_SPLIT
 };
 
-struct vp9_token vp9_bmode_encodings[VP9_NKF_BINTRAMODES];
-struct vp9_token vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES];
+struct vp9_token vp9_bmode_encodings[VP9_BINTRAMODES];
+struct vp9_token vp9_kf_bmode_encodings[VP9_BINTRAMODES];
 struct vp9_token vp9_ymode_encodings[VP9_YMODES];
 struct vp9_token vp9_sb_ymode_encodings[VP9_I32X32_MODES];
 struct vp9_token vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES];
@@ -222,14 +209,14 @@
 
 
 static void intra_bmode_probs_from_distribution(
-  vp9_prob p[VP9_NKF_BINTRAMODES - 1],
-  unsigned int branch_ct[VP9_NKF_BINTRAMODES - 1][2],
-  const unsigned int events[VP9_NKF_BINTRAMODES]) {
+  vp9_prob p[VP9_BINTRAMODES - 1],
+  unsigned int branch_ct[VP9_BINTRAMODES - 1][2],
+  const unsigned int events[VP9_BINTRAMODES]) {
   vp9_tree_probs_from_distribution(vp9_bmode_tree, p, branch_ct, events, 0);
 }
 
-void vp9_default_bmode_probs(vp9_prob p[VP9_NKF_BINTRAMODES - 1]) {
-  unsigned int branch_ct[VP9_NKF_BINTRAMODES - 1][2];
+void vp9_default_bmode_probs(vp9_prob p[VP9_BINTRAMODES - 1]) {
+  unsigned int branch_ct[VP9_BINTRAMODES - 1][2];
   intra_bmode_probs_from_distribution(p, branch_ct, bmode_cts);
 }
 
@@ -267,7 +254,7 @@
 const int vp9_is_interpolating_filter[SWITCHABLE + 1] = {0, 1, 1, 1, -1};
 
 void vp9_entropy_mode_init() {
-  vp9_tokens_from_tree(vp9_kf_bmode_encodings,   vp9_kf_bmode_tree);
+  vp9_tokens_from_tree(vp9_kf_bmode_encodings,   vp9_bmode_tree);
   vp9_tokens_from_tree(vp9_bmode_encodings,   vp9_bmode_tree);
   vp9_tokens_from_tree(vp9_ymode_encodings,   vp9_ymode_tree);
   vp9_tokens_from_tree(vp9_kf_ymode_encodings, vp9_kf_ymode_tree);
@@ -410,7 +397,7 @@
                       fc->uv_mode_counts[i], fc->pre_uv_mode_prob[i],
                       fc->uv_mode_prob[i], 0);
 
-  update_mode_probs(VP9_NKF_BINTRAMODES, vp9_bmode_tree,
+  update_mode_probs(VP9_BINTRAMODES, vp9_bmode_tree,
                     fc->bmode_counts, fc->pre_bmode_prob,
                     fc->bmode_prob, 0);
 
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index ec3bfed..8fbc6f2 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -18,13 +18,12 @@
 
 extern int vp9_mv_cont(const int_mv *l, const int_mv *a);
 
-extern const vp9_prob vp9_kf_default_bmode_probs[VP9_KF_BINTRAMODES]
-                                                [VP9_KF_BINTRAMODES]
-                                                [VP9_KF_BINTRAMODES -1 ];
+
+extern const vp9_prob vp9_kf_default_bmode_probs[VP9_BINTRAMODES]
+                                                [VP9_BINTRAMODES]
+                                                [VP9_BINTRAMODES -1 ];
 
 extern const vp9_tree_index vp9_bmode_tree[];
-extern const vp9_tree_index vp9_kf_bmode_tree[];
-
 extern const vp9_tree_index  vp9_ymode_tree[];
 extern const vp9_tree_index  vp9_kf_ymode_tree[];
 extern const vp9_tree_index  vp9_uv_mode_tree[];
@@ -34,8 +33,8 @@
 extern const vp9_tree_index  vp9_sb_mv_ref_tree[];
 extern const vp9_tree_index  vp9_sub_mv_ref_tree[];
 
-extern struct vp9_token vp9_bmode_encodings[VP9_NKF_BINTRAMODES];
-extern struct vp9_token vp9_kf_bmode_encodings[VP9_KF_BINTRAMODES];
+extern struct vp9_token vp9_bmode_encodings[VP9_BINTRAMODES];
+extern struct vp9_token vp9_kf_bmode_encodings[VP9_BINTRAMODES];
 extern struct vp9_token vp9_ymode_encodings[VP9_YMODES];
 extern struct vp9_token vp9_sb_ymode_encodings[VP9_I32X32_MODES];
 extern struct vp9_token vp9_sb_kf_ymode_encodings[VP9_I32X32_MODES];
@@ -68,7 +67,7 @@
                               MB_PREDICTION_MODE m,
                               const int context);
 
-void vp9_default_bmode_probs(vp9_prob dest[VP9_NKF_BINTRAMODES - 1]);
+void vp9_default_bmode_probs(vp9_prob dest[VP9_BINTRAMODES - 1]);
 
 void vp9_adapt_mode_probs(struct VP9Common *);
 
diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h
index 855ecab..bc5afb1 100644
--- a/vp9/common/vp9_findnearmv.h
+++ b/vp9/common/vp9_findnearmv.h
@@ -88,7 +88,7 @@
     --cur_mb;
 
     if (cur_mb->mbmi.mode <= TM_PRED) {
-      return pred_mode_conv(cur_mb->mbmi.mode);
+      return cur_mb->mbmi.mode;
     } else if (cur_mb->mbmi.mode == I4X4_PRED) {
       return ((cur_mb->bmi + 1 + b)->as_mode.first);
     } else {
@@ -106,7 +106,7 @@
     cur_mb -= mi_stride;
 
     if (cur_mb->mbmi.mode <= TM_PRED) {
-      return pred_mode_conv(cur_mb->mbmi.mode);
+      return cur_mb->mbmi.mode;
     } else if (cur_mb->mbmi.mode == I4X4_PRED) {
       return ((cur_mb->bmi + 2 + b)->as_mode.first);
     } else {
diff --git a/vp9/common/vp9_header.h b/vp9/common/vp9_header.h
new file mode 100644
index 0000000..96b04e7
--- /dev/null
+++ b/vp9/common/vp9_header.h
@@ -0,0 +1,39 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_HEADER_H_
+#define VP9_COMMON_VP9_HEADER_H_
+
+/* 24 bits total */
+typedef struct {
+  unsigned int type: 1;
+  unsigned int version: 3;
+  unsigned int show_frame: 1;
+
+  /* Allow 2^20 bytes = 8 megabits for first partition */
+
+  unsigned int first_partition_length_in_bytes: 19;
+
+#ifdef PACKET_TESTING
+  unsigned int frame_number;
+  unsigned int update_gold: 1;
+  unsigned int uses_gold: 1;
+  unsigned int update_last: 1;
+  unsigned int uses_last: 1;
+#endif
+} VP9_HEADER;
+
+#ifdef PACKET_TESTING
+#define VP9_HEADER_SIZE 8
+#else
+#define VP9_HEADER_SIZE 3
+#endif
+
+#endif  // VP9_COMMON_VP9_HEADER_H_
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index f61230c..026ba91 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -19,22 +19,32 @@
 #include "vp9/common/vp9_idct.h"
 
 void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
+   0.5 shifts per pixel. */
   int i;
   int16_t output[16];
-  int a1, b1, c1, d1;
+  int a1, b1, c1, d1, e1;
   int16_t *ip = input;
   int16_t *op = output;
 
   for (i = 0; i < 4; i++) {
-    a1 = (ip[0] + ip[3]) >> WHT_UPSCALE_FACTOR;
-    b1 = (ip[1] + ip[2]) >> WHT_UPSCALE_FACTOR;
-    c1 = (ip[1] - ip[2]) >> WHT_UPSCALE_FACTOR;
-    d1 = (ip[0] - ip[3]) >> WHT_UPSCALE_FACTOR;
+    a1 = ip[0] >> WHT_UPSCALE_FACTOR;
+    c1 = ip[1] >> WHT_UPSCALE_FACTOR;
+    d1 = ip[2] >> WHT_UPSCALE_FACTOR;
+    b1 = ip[3] >> WHT_UPSCALE_FACTOR;
 
-    op[0] = (a1 + b1 + 1) >> 1;
-    op[1] = (c1 + d1) >> 1;
-    op[2] = (a1 - b1) >> 1;
-    op[3] = (d1 - c1) >> 1;
+    c1 = a1 - c1;
+    b1 += d1;
+    e1 = (c1 - b1) >> 1;
+    a1 -= e1;
+    d1 += e1;
+    b1 = a1 - b1;
+    c1 -= d1;
+
+    op[0] = a1;
+    op[1] = b1;
+    op[2] = c1;
+    op[3] = d1;
 
     ip += 4;
     op += 4;
@@ -42,20 +52,23 @@
 
   ip = output;
   for (i = 0; i < 4; i++) {
-    a1 = ip[4 * 0] + ip[4 * 3];
-    b1 = ip[4 * 1] + ip[4 * 2];
-    c1 = ip[4 * 1] - ip[4 * 2];
-    d1 = ip[4 * 0] - ip[4 * 3];
+    a1 = ip[4 * 0];
+    c1 = ip[4 * 1];
+    d1 = ip[4 * 2];
+    b1 = ip[4 * 3];
 
+    c1 = a1 - c1;
+    b1 += d1;
+    e1 = (c1 - b1) >> 1;
+    a1 -= e1;
+    d1 += e1;
+    b1 = a1 - b1;
+    c1 -= d1;
 
-    dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] +
-                                       ((a1 + b1 + 1) >> 1));
-    dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] +
-                                       ((c1 + d1) >> 1));
-    dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] +
-                                       ((a1 - b1) >> 1));
-    dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] +
-                                       ((d1 - c1) >> 1));
+    dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1);
+    dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + b1);
+    dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + c1);
+    dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + d1);
 
     ip++;
     dest++;
@@ -64,23 +77,24 @@
 
 void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
   int i;
+  int a1, e1;
   int16_t tmp[4];
   int16_t *ip = in;
   int16_t *op = tmp;
 
-  op[0] = ((ip[0] >> WHT_UPSCALE_FACTOR) + 1) >> 1;
-  op[1] = op[2] = op[3] = (ip[0] >> WHT_UPSCALE_FACTOR) >> 1;
+  a1 = ip[0] >> WHT_UPSCALE_FACTOR;
+  e1 = a1 >> 1;
+  op[0] = op[1] = op[2] = a1 - e1;
+  op[3] = e1;
 
   ip = tmp;
   for (i = 0; i < 4; i++) {
-    dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] +
-                                       ((ip[0] + 1) >> 1));
-    dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] +
-                                       (ip[0] >> 1));
-    dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] +
-                                       (ip[0] >> 1));
-    dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] +
-                                       (ip[0] >> 1));
+    e1 = ip[0] >> 1;
+    a1 = ip[0] - e1;
+    dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1);
+    dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + a1);
+    dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + a1);
+    dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1);
     ip++;
     dest++;
   }
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index b668212..ab14994 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -187,7 +187,7 @@
                    int do_left_mb_v, int do_above_mb_h,
                    int do_left_mbuv_v, int do_above_mbuv_h,
                    uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr,
-                   int y_stride, int uv_stride, int dering) {
+                   int y_stride, int uv_stride) {
   loop_filter_info_n *lfi_n = &cm->lf_info;
   struct loop_filter_info lfi;
   int mode = mi->mbmi.mode;
@@ -254,21 +254,6 @@
                            y_stride, uv_stride, &lfi);
       }
     }
-    if (dering) {
-#if CONFIG_LOOP_DERING
-      vp9_post_proc_down_and_across(y_ptr, y_ptr,
-        y_stride, y_stride,
-        16, 16, dering);
-      if (u_ptr && v_ptr) {
-        vp9_post_proc_down_and_across(u_ptr, u_ptr,
-          uv_stride, uv_stride,
-          8, 8, dering);
-        vp9_post_proc_down_and_across(v_ptr, v_ptr,
-          uv_stride, uv_stride,
-          8, 8, dering);
-      }
-#endif
-    }
   }
 }
 
@@ -276,7 +261,7 @@
                      int mb_row, int mb_col,
                      uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr,
                      int y_stride, int uv_stride,
-                     int y_only, int dering) {
+                     int y_only) {
   BLOCK_SIZE_TYPE sb_type = mode_info_context->mbmi.sb_type;
   const int wbl = b_width_log2(sb_type), hbl = b_height_log2(sb_type);
   TX_SIZE tx_size = mode_info_context->mbmi.txfm_size;
@@ -298,7 +283,7 @@
       y_ptr,
       y_only? 0 : u_ptr,
       y_only? 0 : v_ptr,
-      y_stride, uv_stride, dering);
+      y_stride, uv_stride);
   // process 2nd MB top-right
   mi = mode_info_context + 2;
   do_left_v = !(wbl >= 3 /* 32x16 or >=32x32 */ && (tx_size >= TX_32X32 ||
@@ -313,7 +298,7 @@
       y_ptr + 16,
       y_only ? 0 : (u_ptr + 8),
       y_only ? 0 : (v_ptr + 8),
-      y_stride, uv_stride, dering);
+      y_stride, uv_stride);
 
   // process 3rd MB bottom-left
   mi = mode_info_context + (mis << 1);
@@ -329,7 +314,7 @@
       y_ptr + 16 * y_stride,
       y_only ? 0 : (u_ptr + 8 * uv_stride),
       y_only ? 0 : (v_ptr + 8 * uv_stride),
-      y_stride, uv_stride, dering);
+      y_stride, uv_stride);
 
   // process 4th MB bottom right
   mi = mode_info_context + ((mis + 1) << 1);
@@ -346,39 +331,38 @@
       y_ptr + 16 * y_stride + 16,
       y_only ? 0 : (u_ptr + 8 * uv_stride + 8),
       y_only ? 0 : (v_ptr + 8 * uv_stride + 8),
-      y_stride, uv_stride, dering);
+      y_stride, uv_stride);
 }
 
 static void lpf_sb64(VP9_COMMON *cm, const MODE_INFO *mode_info_context,
                      int mb_row, int mb_col,
                      uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr,
                      int y_stride, int uv_stride,
-                     int y_only, int dering) {
+                     int y_only) {
   lpf_sb32(cm, mode_info_context, mb_row, mb_col,
       y_ptr, u_ptr, v_ptr,
-      y_stride, uv_stride, y_only, dering);
+      y_stride, uv_stride, y_only);
   lpf_sb32(cm, mode_info_context + 4, mb_row, mb_col + 2,
       y_ptr + 32, u_ptr + 16, v_ptr + 16,
-      y_stride, uv_stride, y_only, dering);
+      y_stride, uv_stride, y_only);
   lpf_sb32(cm, mode_info_context + cm->mode_info_stride * 4,
       mb_row + 2, mb_col,
       y_ptr + 32 * y_stride,
       u_ptr + 16 * uv_stride,
       v_ptr + 16 * uv_stride,
-      y_stride, uv_stride, y_only, dering);
+      y_stride, uv_stride, y_only);
   lpf_sb32(cm, mode_info_context + cm->mode_info_stride * 4 + 4,
       mb_row + 2, mb_col + 2,
       y_ptr + 32 * y_stride + 32,
       u_ptr + 16 * uv_stride + 16,
       v_ptr + 16 * uv_stride + 16,
-      y_stride, uv_stride, y_only, dering);
+      y_stride, uv_stride, y_only);
 }
 
 void vp9_loop_filter_frame(VP9_COMMON *cm,
                            MACROBLOCKD *xd,
                            int frame_filter_level,
-                           int y_only,
-                           int dering) {
+                           int y_only) {
   YV12_BUFFER_CONFIG *post = cm->frame_to_show;
   int mb_row, mb_col;
   const int sb64_rows = cm->mb_rows / 4;
@@ -432,7 +416,7 @@
     for (mb_col = 0; mb_col < sb64_cols * 4; mb_col += 4) {
       lpf_sb64(cm, mode_info_context, mb_row, mb_col,
                y_ptr, u_ptr, v_ptr,
-               y_stride, uv_stride, y_only, dering);
+               y_stride, uv_stride, y_only);
       y_ptr += 64;
       u_ptr = y_only? 0 : u_ptr + 32;
       v_ptr = y_only? 0 : v_ptr + 32;
@@ -442,13 +426,13 @@
       // process 2 SB32s in the extra SB32 col
       lpf_sb32(cm, mode_info_context, mb_row, mb_col,
                y_ptr, u_ptr, v_ptr,
-               y_stride, uv_stride, y_only, dering);
+               y_stride, uv_stride, y_only);
       lpf_sb32(cm, mode_info_context + mis * 4,
                mb_row + 2, mb_col,
                y_ptr + 32 * y_stride,
                u_ptr + 16 * uv_stride,
                v_ptr + 16 * uv_stride,
-               y_stride, uv_stride, y_only, dering);
+               y_stride, uv_stride, y_only);
       y_ptr += 32;
       u_ptr = y_only? 0 : u_ptr + 16;
       v_ptr = y_only? 0 : v_ptr + 16;
@@ -469,7 +453,7 @@
                y_ptr + (k * 16) * y_stride,
                y_only ? 0 : (u_ptr + (k * 8) * uv_stride),
                y_only ? 0 : (v_ptr + (k * 8) * uv_stride),
-               y_stride, uv_stride, dering);
+               y_stride, uv_stride);
       }
 
       y_ptr += 16;
@@ -491,7 +475,7 @@
     for (mb_col = 0; mb_col < sb32_cols * 2; mb_col += 2) {
       lpf_sb32(cm, mode_info_context, mb_row, mb_col,
                y_ptr, u_ptr, v_ptr,
-               y_stride, uv_stride, y_only, dering);
+               y_stride, uv_stride, y_only);
       y_ptr += 32;
       u_ptr = y_only? 0 : u_ptr + 16;
       v_ptr = y_only? 0 : v_ptr + 16;
@@ -509,7 +493,7 @@
              y_ptr,
              y_only? NULL : u_ptr,
              y_only? NULL : v_ptr,
-             y_stride, uv_stride, dering);
+             y_stride, uv_stride);
       // process 2nd MB
       mi = mode_info_context + (mis << 1);
       do_left_v = (mb_col > 0);
@@ -521,7 +505,7 @@
              y_ptr + 16 * y_stride,
              y_only ? NULL : (u_ptr + 8 * uv_stride),
              y_only ? NULL : (v_ptr + 8 * uv_stride),
-             y_stride, uv_stride, dering);
+             y_stride, uv_stride);
       y_ptr += 16;
       u_ptr = y_only? 0 : u_ptr + 8;
       v_ptr = y_only? 0 : v_ptr + 8;
@@ -547,7 +531,7 @@
              y_ptr,
              y_only? 0 : u_ptr,
              y_only? 0 : v_ptr,
-             y_stride, uv_stride, dering);
+             y_stride, uv_stride);
       y_ptr += 16;
       u_ptr = y_only? 0 : u_ptr + 8;
       v_ptr = y_only? 0 : v_ptr + 8;
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 589984f..65f522b 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -73,8 +73,7 @@
 void vp9_loop_filter_frame(struct VP9Common *cm,
                            struct macroblockd *mbd,
                            int filter_level,
-                           int y_only,
-                           int dering);
+                           int y_only);
 
 void vp9_loop_filter_partial_frame(struct VP9Common *cm,
                                    struct macroblockd *mbd,
diff --git a/vp9/common/vp9_modecontext.c b/vp9/common/vp9_modecontext.c
index 697683a..5f084ea 100644
--- a/vp9/common/vp9_modecontext.c
+++ b/vp9/common/vp9_modecontext.c
@@ -11,9 +11,9 @@
 
 #include "vp9/common/vp9_entropymode.h"
 
-const vp9_prob vp9_kf_default_bmode_probs[VP9_KF_BINTRAMODES]
-                                         [VP9_KF_BINTRAMODES]
-                                         [VP9_KF_BINTRAMODES-1] = {
+const vp9_prob vp9_kf_default_bmode_probs[VP9_BINTRAMODES]
+                                         [VP9_BINTRAMODES]
+                                         [VP9_BINTRAMODES-1] = {
   {  // Above 0
     { 231,    9,  124,  138,   96,  200,   76,   42,   88, },  // left 0
     { 152,   11,  187,  112,  170,  139,  130,   91,  113, },  // left 1
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 7140ee6..c277ea3 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -24,6 +24,10 @@
 #include "vp9/common/vp9_postproc.h"
 #endif
 
+/*#ifdef PACKET_TESTING*/
+#include "vp9/common/vp9_header.h"
+/*#endif*/
+
 /* Create/destroy static data structures. */
 
 void vp9_initialize_common(void);
@@ -47,7 +51,7 @@
 #define MAX_LAG_BUFFERS 25
 
 typedef struct frame_contexts {
-  vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1];
+  vp9_prob bmode_prob[VP9_BINTRAMODES - 1];
   vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
   vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1];
   vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1];
@@ -55,12 +59,12 @@
 
   nmv_context nmvc;
   nmv_context pre_nmvc;
-  vp9_prob pre_bmode_prob[VP9_NKF_BINTRAMODES - 1];
+  vp9_prob pre_bmode_prob[VP9_BINTRAMODES - 1];
   vp9_prob pre_ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
   vp9_prob pre_sb_ymode_prob[VP9_I32X32_MODES - 1];
   vp9_prob pre_uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1];
   vp9_prob pre_partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
-  unsigned int bmode_counts[VP9_NKF_BINTRAMODES];
+  unsigned int bmode_counts[VP9_BINTRAMODES];
   unsigned int ymode_counts[VP9_YMODES];   /* interframe intra mode probs */
   unsigned int sb_ymode_counts[VP9_I32X32_MODES];
   unsigned int uv_mode_counts[VP9_YMODES][VP9_UV_MODES];
@@ -202,7 +206,6 @@
   int filter_level;
   int last_sharpness_level;
   int sharpness_level;
-  int dering_enabled;
 
   int refresh_frame_context;    /* Two state 0 = NO, 1 = YES */
 
@@ -218,9 +221,9 @@
 
   /* keyframe block modes are predicted by their above, left neighbors */
 
-  vp9_prob kf_bmode_prob[VP9_KF_BINTRAMODES]
-                        [VP9_KF_BINTRAMODES]
-                        [VP9_KF_BINTRAMODES - 1];
+  vp9_prob kf_bmode_prob[VP9_BINTRAMODES]
+                        [VP9_BINTRAMODES]
+                        [VP9_BINTRAMODES - 1];
   vp9_prob kf_ymode_prob[8][VP9_YMODES - 1]; /* keyframe "" */
   vp9_prob sb_kf_ymode_prob[8][VP9_I32X32_MODES - 1];
   int kf_ymode_probs_index;
@@ -255,6 +258,9 @@
   int near_boffset[3];
   int version;
 
+#ifdef PACKET_TESTING
+  VP9_HEADER oh;
+#endif
   double bitrate;
   double framerate;
 
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index d2c52ed..3aae669 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -53,7 +53,7 @@
   { RGB_TO_YUV(0xCC33FF) },   /* Magenta */
 };
 
-static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] = {
+static const unsigned char B_PREDICTION_MODE_colors[INTRA_MODE_COUNT][3] = {
   { RGB_TO_YUV(0x6633ff) },   /* Purple */
   { RGB_TO_YUV(0xcc33ff) },   /* Magenta */
   { RGB_TO_YUV(0xff33cc) },   /* Pink */
diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h
index b88761b..f5f5f42 100644
--- a/vp9/common/vp9_reconintra.h
+++ b/vp9/common/vp9_reconintra.h
@@ -14,12 +14,12 @@
 #include "vpx/vpx_integer.h"
 #include "vp9/common/vp9_blockd.h"
 
-B_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
-                                              int stride, int n,
-                                              int tx, int ty);
+MB_PREDICTION_MODE vp9_find_dominant_direction(uint8_t *ptr,
+                                               int stride, int n,
+                                               int tx, int ty);
 
-B_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, int block,
-                                         uint8_t *ptr, int stride);
+MB_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, int block,
+                                          uint8_t *ptr, int stride);
 
 void vp9_predict_intra_block(MACROBLOCKD *xd,
                             int block_idx,
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 9f6a0f2..6b511b5 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -35,13 +35,13 @@
 extern int dec_debug;
 #endif
 
-static B_PREDICTION_MODE read_bmode(vp9_reader *r, const vp9_prob *p) {
-  B_PREDICTION_MODE m = treed_read(r, vp9_bmode_tree, p);
+static MB_PREDICTION_MODE read_bmode(vp9_reader *r, const vp9_prob *p) {
+  MB_PREDICTION_MODE m = treed_read(r, vp9_bmode_tree, p);
   return m;
 }
 
-static B_PREDICTION_MODE read_kf_bmode(vp9_reader *r, const vp9_prob *p) {
-  return (B_PREDICTION_MODE)treed_read(r, vp9_kf_bmode_tree, p);
+static MB_PREDICTION_MODE read_kf_bmode(vp9_reader *r, const vp9_prob *p) {
+  return (MB_PREDICTION_MODE)treed_read(r, vp9_bmode_tree, p);
 }
 
 static MB_PREDICTION_MODE read_ymode(vp9_reader *r, const vp9_prob *p) {
@@ -591,18 +591,9 @@
     int_mv nearest, nearby, best_mv;
     int_mv nearest_second, nearby_second, best_mv_second;
     vp9_prob mv_ref_p[VP9_MVREFS - 1];
-
     const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame;
-    struct scale_factors *sf0 = &xd->scale_factor[0];
-    *sf0 = cm->active_ref_scale[mbmi->ref_frame - 1];
 
     {
-      // Select the appropriate reference frame for this MB
-      const int ref_fb_idx = cm->active_ref_idx[ref_frame - 1];
-
-      setup_pre_planes(xd, &cm->yv12_fb[ref_fb_idx], NULL,
-                       mi_row, mi_col, xd->scale_factor, xd->scale_factor_uv);
-
 #ifdef DEC_DEBUG
       if (dec_debug)
         printf("%d %d\n", xd->mode_info_context->mbmi.mv[0].as_mv.row,
@@ -661,12 +652,6 @@
         mbmi->second_ref_frame = 1;
       if (mbmi->second_ref_frame > 0) {
         const MV_REFERENCE_FRAME second_ref_frame = mbmi->second_ref_frame;
-        struct scale_factors *sf1 = &xd->scale_factor[1];
-        const int second_ref_fb_idx = cm->active_ref_idx[second_ref_frame - 1];
-        *sf1 = cm->active_ref_scale[second_ref_frame - 1];
-
-        setup_pre_planes(xd, NULL, &cm->yv12_fb[second_ref_fb_idx],
-                         mi_row, mi_col, xd->scale_factor, xd->scale_factor_uv);
 
         vp9_find_mv_refs(cm, xd, mi,
                          use_prev_in_find_mv_refs ?
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 44510b8..70e0c87 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -10,31 +10,29 @@
 
 #include <assert.h>
 
-#include "./vp9_rtcd.h"
-
-#include "vpx_scale/vpx_scale.h"
-#include "vpx_mem/vpx_mem.h"
-
+#include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_header.h"
 #include "vp9/common/vp9_reconintra.h"
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/common/vp9_entropy.h"
+#include "vp9/decoder/vp9_decodframe.h"
+#include "vp9/decoder/vp9_detokenize.h"
 #include "vp9/common/vp9_invtrans.h"
 #include "vp9/common/vp9_alloccommon.h"
 #include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_quant_common.h"
-#include "vp9/common/vp9_extend.h"
-#include "vp9/common/vp9_modecont.h"
-#include "vp9/common/vp9_seg_common.h"
-#include "vp9/common/vp9_tile_common.h"
+#include "vpx_scale/vpx_scale.h"
 
 #include "vp9/decoder/vp9_decodemv.h"
+#include "vp9/common/vp9_extend.h"
+#include "vp9/common/vp9_modecont.h"
+#include "vpx_mem/vpx_mem.h"
 #include "vp9/decoder/vp9_dboolhuff.h"
-#include "vp9/decoder/vp9_read_bit_buffer.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
-#include "vp9/decoder/vp9_decodframe.h"
-#include "vp9/decoder/vp9_detokenize.h"
 
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_tile_common.h"
+#include "./vp9_rtcd.h"
 
 // #define DEC_DEBUG
 #ifdef DEC_DEBUG
@@ -700,13 +698,6 @@
   pc->filter_level = vp9_read_literal(r, 6);
   pc->sharpness_level = vp9_read_literal(r, 3);
 
-#if CONFIG_LOOP_DERING
-  if (vp9_read_bit(r))
-    pc->dering_enabled = 1 + vp9_read_literal(r, 4);
-  else
-    pc->dering_enabled = 0;
-#endif
-
   // Read in loop filter deltas applied at the MB level based on mode or ref
   // frame.
   xd->mode_ref_lf_delta_update = 0;
@@ -949,7 +940,7 @@
 int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
   vp9_reader header_bc, residual_bc;
   VP9_COMMON *const pc = &pbi->common;
-  MACROBLOCKD *const xd = &pbi->mb;
+  MACROBLOCKD *const xd  = &pbi->mb;
   const uint8_t *data = pbi->source;
   const uint8_t *data_end = data + pbi->source_sz;
   size_t first_partition_size = 0;
@@ -959,40 +950,24 @@
   xd->corrupted = 0;  // start with no corruption of current frame
   new_fb->corrupted = 0;
 
-
-
   if (data_end - data < 3) {
     vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
   } else {
-    struct vp9_read_bit_buffer rb = {data, 0};
-
     int scaling_active;
     pc->last_frame_type = pc->frame_type;
-
-    pc->frame_type = (FRAME_TYPE) vp9_rb_read_bit(&rb);
-    pc->version = vp9_rb_read_literal(&rb, 3);
-    pc->show_frame = vp9_rb_read_bit(&rb);
-    scaling_active = vp9_rb_read_bit(&rb);
-    pc->subsampling_x = vp9_rb_read_bit(&rb);
-    pc->subsampling_y = vp9_rb_read_bit(&rb);
-
-    pc->clr_type = (YUV_TYPE)vp9_rb_read_bit(&rb);
-    pc->error_resilient_mode = vp9_rb_read_bit(&rb);
-    if (!pc->error_resilient_mode) {
-      pc->refresh_frame_context = vp9_rb_read_bit(&rb);
-      pc->frame_parallel_decoding_mode = vp9_rb_read_bit(&rb);
-    } else {
-      pc->refresh_frame_context = 0;
-      pc->frame_parallel_decoding_mode = 1;
-    }
-
-    first_partition_size = vp9_rb_read_literal(&rb, 16);
+    pc->frame_type = (FRAME_TYPE)(data[0] & 1);
+    pc->version = (data[0] >> 1) & 7;
+    pc->show_frame = (data[0] >> 4) & 1;
+    scaling_active = (data[0] >> 5) & 1;
+    pc->subsampling_x = (data[0] >> 6) & 1;
+    pc->subsampling_y = (data[0] >> 7) & 1;
+    first_partition_size = read_le16(data + 1);
 
     if (!read_is_valid(data, first_partition_size, data_end))
       vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                          "Truncated packet or corrupt partition 0 length");
 
-    data += HEADER_SIZE_IN_BYTES;  // header size
+    data += 3;
 
     vp9_setup_version(pc);
 
@@ -1026,6 +1001,9 @@
     vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate bool decoder 0");
 
+  pc->clr_type = (YUV_TYPE)vp9_read_bit(&header_bc);
+  pc->error_resilient_mode = vp9_read_bit(&header_bc);
+
   setup_loopfilter(pc, xd, &header_bc);
 
   setup_quantization(pbi, &header_bc);
@@ -1072,6 +1050,14 @@
     vp9_setup_interp_filters(xd, pc->mcomp_filter_type, pc);
   }
 
+  if (!pc->error_resilient_mode) {
+    pc->refresh_frame_context = vp9_read_bit(&header_bc);
+    pc->frame_parallel_decoding_mode = vp9_read_bit(&header_bc);
+  } else {
+    pc->refresh_frame_context = 0;
+    pc->frame_parallel_decoding_mode = 1;
+  }
+
   pc->frame_context_idx = vp9_read_literal(&header_bc, NUM_FRAME_CONTEXTS_LG2);
   pc->fc = pc->frame_contexts[pc->frame_context_idx];
 
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 5d5a543..24f9ca3 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -349,8 +349,7 @@
 
     if (cm->filter_level) {
       /* Apply the loop filter if appropriate. */
-      vp9_loop_filter_frame(cm, &pbi->mb, cm->filter_level, 0,
-                            cm->dering_enabled);
+      vp9_loop_filter_frame(cm, &pbi->mb, cm->filter_level, 0);
     }
 
 #if WRITE_RECON_BUFFER == 2
diff --git a/vp9/decoder/vp9_read_bit_buffer.h b/vp9/decoder/vp9_read_bit_buffer.h
deleted file mode 100644
index 7141722..0000000
--- a/vp9/decoder/vp9_read_bit_buffer.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_READ_BIT_BUFFER_
-#define VP9_READ_BIT_BUFFER_
-
-struct vp9_read_bit_buffer {
-  const uint8_t *const bit_buffer;
-  size_t bit_offset;
-};
-
-static int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb) {
-  const int off = rb->bit_offset;
-  const int p = off / CHAR_BIT;
-  const int q = /*CHAR_BIT - 1 -*/ off % CHAR_BIT;
-  const int bit = (rb->bit_buffer[p] & (1 << q)) >> q;
-  rb->bit_offset = off + 1;
-  return bit;
-}
-
-static int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits) {
-  int value = 0, bit;
-  for (bit = bits - 1; bit >= 0; bit--)
-    value |= vp9_rb_read_bit(rb) << bit;
-  return value;
-}
-
-#endif  // VP9_READ_BIT_BUFFER_
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 48f70e2..1525340 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -12,15 +12,20 @@
 #include <stdio.h>
 #include <limits.h>
 
-#include "vpx/vpx_encoder.h"
-#include "vpx_mem/vpx_mem.h"
-
+#include "vp9/common/vp9_header.h"
+#include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_entropymv.h"
 #include "vp9/common/vp9_findnearmv.h"
 #include "vp9/common/vp9_tile_common.h"
+#include "vp9/encoder/vp9_mcomp.h"
 #include "vp9/common/vp9_systemdependent.h"
 #include "vp9/common/vp9_pragmas.h"
+#include "vpx/vpx_encoder.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/encoder/vp9_bitstream.h"
+#include "vp9/encoder/vp9_segmentation.h"
+
 #include "vp9/common/vp9_seg_common.h"
 #include "vp9/common/vp9_pred_common.h"
 #include "vp9/common/vp9_entropy.h"
@@ -28,12 +33,6 @@
 #include "vp9/common/vp9_mvref_common.h"
 #include "vp9/common/vp9_treecoder.h"
 
-#include "vp9/encoder/vp9_encodemv.h"
-#include "vp9/encoder/vp9_mcomp.h"
-#include "vp9/encoder/vp9_bitstream.h"
-#include "vp9/encoder/vp9_segmentation.h"
-#include "vp9/encoder/vp9_write_bit_buffer.h"
-
 #if defined(SECTIONBITS_OUTPUT)
 unsigned __int64 Sectionbits[500];
 #endif
@@ -370,7 +369,7 @@
 }
 
 static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
-  write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m);
+  write_token(bc, vp9_bmode_tree, p, vp9_kf_bmode_encodings + m);
 }
 
 static int prob_update_savings(const unsigned int *ct,
@@ -854,8 +853,8 @@
   }
 
   if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
-    const B_PREDICTION_MODE A = above_block_mode(m, 0, mis);
-    const B_PREDICTION_MODE L = xd->left_available ?
+    const MB_PREDICTION_MODE A = above_block_mode(m, 0, mis);
+    const MB_PREDICTION_MODE L = xd->left_available ?
                                  left_block_mode(m, 0) : DC_PRED;
     write_kf_bmode(bc, ym, c->kf_bmode_prob[A][L]);
   }
@@ -867,8 +866,8 @@
     for (idy = 0; idy < 2; idy += bh) {
       for (idx = 0; idx < 2; idx += bw) {
         int i = idy * 2 + idx;
-        const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
-        const B_PREDICTION_MODE L = (xd->left_available || idx) ?
+        const MB_PREDICTION_MODE A = above_block_mode(m, i, mis);
+        const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
                                      left_block_mode(m, i) : DC_PRED;
         write_kf_bmode(bc, m->bmi[i].as_mode.first,
                        c->kf_bmode_prob[A][L]);
@@ -1279,6 +1278,10 @@
   }
 }
 
+#ifdef PACKET_TESTING
+FILE *vpxlogc = 0;
+#endif
+
 static void decide_kf_ymode_entropy(VP9_COMP *cpi) {
   int mode_cost[MB_MODE_COUNT];
   int bestcost = INT_MAX;
@@ -1331,14 +1334,6 @@
   // Encode the loop filter level and type
   vp9_write_literal(w, pc->filter_level, 6);
   vp9_write_literal(w, pc->sharpness_level, 3);
-#if CONFIG_LOOP_DERING
-  if (pc->dering_enabled) {
-    vp9_write_bit(w, 1);
-    vp9_write_literal(w, pc->dering_enabled - 1, 4);
-  } else {
-    vp9_write_bit(w, 0);
-  }
-#endif
 
   // Write out loop filter deltas applied at the MB level based on mode or
   // ref frame (if they are enabled).
@@ -1489,34 +1484,44 @@
 
 void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) {
   int i;
+  VP9_HEADER oh;
   VP9_COMMON *const pc = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   vp9_writer header_bc, residual_bc;
+  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   int extra_bytes_packed = 0;
+
   uint8_t *cx_data = dest;
 
-  cx_data += HEADER_SIZE_IN_BYTES;
+  oh.show_frame = (int) pc->show_frame;
+  oh.type = (int)pc->frame_type;
+  oh.version = pc->version;
+  oh.first_partition_length_in_bytes = 0;
+
+  cx_data += 3;
 
 #if defined(SECTIONBITS_OUTPUT)
-  Sectionbits[active_section = 1] += HEADER_SIZE_IN_BYTES * 8 * 256;
+  Sectionbits[active_section = 1] += sizeof(VP9_HEADER) * 8 * 256;
 #endif
 
   compute_update_table();
 
-  if (pc->frame_type == KEY_FRAME) {
+  /* every keyframe send startcode, width, height, scale factor, clamp
+   * and color type.
+   */
+  if (oh.type == KEY_FRAME) {
     // Start / synch code
     cx_data[0] = 0x49;
     cx_data[1] = 0x83;
     cx_data[2] = 0x42;
-    extra_bytes_packed += 3;
-    cx_data += 3;
+    extra_bytes_packed = 3;
+    cx_data += extra_bytes_packed;
   }
 
   if (pc->width != pc->display_width || pc->height != pc->display_height) {
     write_le16(cx_data, pc->display_width);
     write_le16(cx_data + 2, pc->display_height);
-    extra_bytes_packed += 4;
     cx_data += 4;
+    extra_bytes_packed += 4;
   }
 
   write_le16(cx_data, pc->width);
@@ -1526,6 +1531,12 @@
 
   vp9_start_encode(&header_bc, cx_data);
 
+  // TODO(jkoleszar): remove these two unused bits?
+  vp9_write_bit(&header_bc, pc->clr_type);
+
+  // error resilient mode
+  vp9_write_bit(&header_bc, pc->error_resilient_mode);
+
   encode_loopfilter(pc, xd, &header_bc);
 
   encode_quantization(pc, &header_bc);
@@ -1574,8 +1585,9 @@
     vp9_write_literal(&header_bc, cpi->alt_fb_idx, NUM_REF_FRAMES_LG2);
 
     // Indicate the sign bias for each reference frame buffer.
-    for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i)
+    for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
       vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[LAST_FRAME + i]);
+    }
 
     // Signal whether to allow high MV precision
     vp9_write_bit(&header_bc, (xd->allow_high_precision_mv) ? 1 : 0);
@@ -1605,6 +1617,11 @@
       vp9_write_literal(&header_bc, (pc->mcomp_filter_type), 2);
   }
 
+  if (!pc->error_resilient_mode) {
+    vp9_write_bit(&header_bc, pc->refresh_frame_context);
+    vp9_write_bit(&header_bc, pc->frame_parallel_decoding_mode);
+  }
+
   vp9_write_literal(&header_bc, pc->frame_context_idx,
                     NUM_FRAME_CONTEXTS_LG2);
 
@@ -1803,35 +1820,27 @@
 
   vp9_stop_encode(&header_bc);
 
+  oh.first_partition_length_in_bytes = header_bc.pos;
+
   /* update frame tag */
   {
-    const int first_partition_length_in_bytes = header_bc.pos;
     int scaling = (pc->width != pc->display_width ||
                    pc->height != pc->display_height);
+    int v = (oh.first_partition_length_in_bytes << 8) |
+            (pc->subsampling_y << 7) |
+            (pc->subsampling_x << 6) |
+            (scaling << 5) |
+            (oh.show_frame << 4) |
+            (oh.version << 1) |
+            oh.type;
 
-    struct vp9_write_bit_buffer wb = {dest, 0};
-
-    assert(first_partition_length_in_bytes <= 0xffff);
-
-    vp9_wb_write_bit(&wb, pc->frame_type);
-    vp9_wb_write_literal(&wb, pc->version, 3);
-    vp9_wb_write_bit(&wb, pc->show_frame);
-    vp9_wb_write_bit(&wb, scaling);
-    vp9_wb_write_bit(&wb, pc->subsampling_x);
-    vp9_wb_write_bit(&wb, pc->subsampling_y);
-
-    vp9_wb_write_bit(&wb, pc->clr_type);
-    vp9_wb_write_bit(&wb, pc->error_resilient_mode);
-    if (!pc->error_resilient_mode) {
-      vp9_wb_write_bit(&wb, pc->refresh_frame_context);
-      vp9_wb_write_bit(&wb, pc->frame_parallel_decoding_mode);
-    }
-
-
-    vp9_wb_write_literal(&wb, first_partition_length_in_bytes, 16);
+    assert(oh.first_partition_length_in_bytes <= 0xffff);
+    dest[0] = v;
+    dest[1] = v >> 8;
+    dest[2] = v >> 16;
   }
 
-  *size = HEADER_SIZE_IN_BYTES + extra_bytes_packed + header_bc.pos;
+  *size = VP9_HEADER_SIZE + extra_bytes_packed + header_bc.pos;
 
   if (pc->frame_type == KEY_FRAME) {
     decide_kf_ymode_entropy(cpi);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index b007c65..211eca4 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -116,8 +116,8 @@
 
   int mbmode_cost[2][MB_MODE_COUNT];
   int intra_uv_mode_cost[2][MB_MODE_COUNT];
-  int bmode_costs[VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES];
-  int inter_bmode_costs[B_MODE_COUNT];
+  int bmode_costs[VP9_BINTRAMODES][VP9_BINTRAMODES][VP9_BINTRAMODES];
+  int inter_bmode_costs[INTRA_MODE_COUNT];
   int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1]
                              [VP9_SWITCHABLE_FILTERS];
 
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index ebf40e4..d226444 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -591,23 +591,33 @@
   }
 }
 
+/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
+   pixel. */
 void vp9_short_walsh4x4_c(short *input, short *output, int pitch) {
   int i;
-  int a1, b1, c1, d1;
+  int a1, b1, c1, d1, e1;
   short *ip = input;
   short *op = output;
   int pitch_short = pitch >> 1;
 
   for (i = 0; i < 4; i++) {
-    a1 = ip[0 * pitch_short] + ip[3 * pitch_short];
-    b1 = ip[1 * pitch_short] + ip[2 * pitch_short];
-    c1 = ip[1 * pitch_short] - ip[2 * pitch_short];
-    d1 = ip[0 * pitch_short] - ip[3 * pitch_short];
+    a1 = ip[0 * pitch_short];
+    b1 = ip[1 * pitch_short];
+    c1 = ip[2 * pitch_short];
+    d1 = ip[3 * pitch_short];
 
-    op[0] = (a1 + b1 + 1) >> 1;
-    op[4] = (c1 + d1) >> 1;
-    op[8] = (a1 - b1) >> 1;
-    op[12] = (d1 - c1) >> 1;
+    b1 = a1 - b1;
+    c1 += d1;
+    e1 = (c1 - b1) >> 1;
+    a1 += e1;
+    d1 -= e1;
+    c1 = a1 - c1;
+    b1 -= d1;
+
+    op[0] = a1;
+    op[4] = c1;
+    op[8] = d1;
+    op[12] = b1;
 
     ip++;
     op++;
@@ -616,15 +626,23 @@
   op = output;
 
   for (i = 0; i < 4; i++) {
-    a1 = ip[0] + ip[3];
-    b1 = ip[1] + ip[2];
-    c1 = ip[1] - ip[2];
-    d1 = ip[0] - ip[3];
+    a1 = ip[0];
+    b1 = ip[1];
+    c1 = ip[2];
+    d1 = ip[3];
 
-    op[0] = ((a1 + b1 + 1) >> 1) << WHT_UPSCALE_FACTOR;
-    op[1] = ((c1 + d1) >> 1) << WHT_UPSCALE_FACTOR;
-    op[2] = ((a1 - b1) >> 1) << WHT_UPSCALE_FACTOR;
-    op[3] = ((d1 - c1) >> 1) << WHT_UPSCALE_FACTOR;
+    b1 = a1 - b1;
+    c1 += d1;
+    e1 = (c1 - b1) >> 1;
+    a1 += e1;
+    d1 -= e1;
+    c1 = a1 - c1;
+    b1 -= d1;
+
+    op[0] = a1 << WHT_UPSCALE_FACTOR;
+    op[1] = c1 << WHT_UPSCALE_FACTOR;
+    op[2] = d1 << WHT_UPSCALE_FACTOR;
+    op[3] = b1 << WHT_UPSCALE_FACTOR;
 
     ip += 4;
     op += 4;
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index 91866b2..5d7c244 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -32,7 +32,6 @@
     int i;
 
     for (i = 0; i < 16; i++) {
-      x->e_mbd.mode_info_context->bmi[i].as_mode.first = B_DC_PRED;
       encode_intra4x4block(x, i, BLOCK_SIZE_MB16X16);
     }
   }
@@ -40,10 +39,10 @@
   return vp9_get_mb_ss(x->plane[0].src_diff);
 }
 
+// This function is used only by the firstpass encoding.
 static void encode_intra4x4block(MACROBLOCK *x, int ib,
                                  BLOCK_SIZE_TYPE bsize) {
   MACROBLOCKD * const xd = &x->e_mbd;
-  TX_TYPE tx_type;
   uint8_t* const src =
       raster_block_offset_uint8(xd, bsize, 0, ib,
                                 x->plane[0].src.buf, x->plane[0].src.stride);
@@ -58,26 +57,17 @@
 
   assert(ib < (1 << (bwl + bhl)));
 
-  vp9_intra4x4_predict(&x->e_mbd, ib, bsize,
-                       xd->mode_info_context->bmi[ib].as_mode.first,
+  vp9_intra4x4_predict(&x->e_mbd, ib, bsize, DC_PRED,
                        dst, xd->plane[0].dst.stride);
   vp9_subtract_block(4, 4, src_diff, 4 << bwl,
                      src, x->plane[0].src.stride,
                      dst, xd->plane[0].dst.stride);
 
-  tx_type = get_tx_type_4x4(&x->e_mbd, ib);
-  if (tx_type != DCT_DCT) {
-    vp9_short_fht4x4(src_diff, coeff, 4 << bwl, tx_type);
-    x->quantize_b_4x4(x, ib, tx_type, 16);
-    vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), dst,
-                         xd->plane[0].dst.stride, tx_type);
-  } else {
-    x->fwd_txm4x4(src_diff, coeff, 8 << bwl);
-    x->quantize_b_4x4(x, ib, tx_type, 16);
-    vp9_inverse_transform_b_4x4_add(&x->e_mbd, xd->plane[0].eobs[ib],
-                                BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
-                                dst, xd->plane[0].dst.stride);
-  }
+  x->fwd_txm4x4(src_diff, coeff, 8 << bwl);
+  x->quantize_b_4x4(x, ib, DCT_DCT, 16);
+  vp9_inverse_transform_b_4x4_add(&x->e_mbd, xd->plane[0].eobs[ib],
+                              BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
+                              dst, xd->plane[0].dst.stride);
 }
 
 void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 3f2061c..b7f60b1 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -641,7 +641,7 @@
   else
     b_mode = mode;
 
-  assert(b_mode >= B_DC_PRED && b_mode <= B_TM_PRED);
+  assert(b_mode >= DC_PRED && b_mode <= TM_PRED);
 
   plane_b_size = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
   vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode,
diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c
index c4b42d6..171b44b 100644
--- a/vp9/encoder/vp9_modecosts.c
+++ b/vp9/encoder/vp9_modecosts.c
@@ -18,11 +18,11 @@
 void vp9_init_mode_costs(VP9_COMP *c) {
   VP9_COMMON *x = &c->common;
   const vp9_tree_p T = vp9_bmode_tree;
-  const vp9_tree_p KT = vp9_kf_bmode_tree;
+  const vp9_tree_p KT = vp9_bmode_tree;
   int i, j;
 
-  for (i = 0; i < VP9_KF_BINTRAMODES; i++) {
-    for (j = 0; j < VP9_KF_BINTRAMODES; j++) {
+  for (i = 0; i < VP9_BINTRAMODES; i++) {
+    for (j = 0; j < VP9_BINTRAMODES; j++) {
       vp9_cost_tokens((int *)c->mb.bmode_costs[i][j],
                       x->kf_bmode_prob[i][j], KT);
     }
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index d1b98fa..d42bcbb 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -258,6 +258,9 @@
     init_done = 1;
   }
 }
+#ifdef PACKET_TESTING
+extern FILE *vpxlogc;
+#endif
 
 static void setup_features(VP9_COMP *cpi) {
   MACROBLOCKD *xd = &cpi->mb.e_mbd;
@@ -2416,8 +2419,7 @@
 
   if (cm->filter_level > 0) {
     vp9_set_alt_lf_level(cpi, cm->filter_level);
-    vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level, 0,
-                          cm->dering_enabled);
+    vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level, 0);
   }
 
   vp9_extend_frame_borders(cm->frame_to_show,
@@ -3333,7 +3335,7 @@
     recon_err = vp9_calc_ss_err(cpi->Source,
                                 &cm->yv12_fb[cm->new_fb_idx]);
 
-    if (cpi->twopass.total_left_stats->coded_error != 0.0)
+    if (cpi->twopass.total_left_stats.coded_error != 0.0)
       fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d"
               "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
               "%6d %6d %5d %5d %5d %8.2f %10d %10.3f"
@@ -3356,9 +3358,9 @@
               cm->frame_type, cpi->gfu_boost,
               cpi->twopass.est_max_qcorrection_factor,
               (int)cpi->twopass.bits_left,
-              cpi->twopass.total_left_stats->coded_error,
+              cpi->twopass.total_left_stats.coded_error,
               (double)cpi->twopass.bits_left /
-              cpi->twopass.total_left_stats->coded_error,
+              cpi->twopass.total_left_stats.coded_error,
               cpi->tot_recode_hits, recon_err, cpi->kf_boost,
               cpi->kf_zeromotion_pct);
     else
@@ -3385,7 +3387,7 @@
               cm->frame_type, cpi->gfu_boost,
               cpi->twopass.est_max_qcorrection_factor,
               (int)cpi->twopass.bits_left,
-              cpi->twopass.total_left_stats->coded_error,
+              cpi->twopass.total_left_stats.coded_error,
               cpi->tot_recode_hits, recon_err, cpi->kf_boost,
               cpi->kf_zeromotion_pct);
 
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index cb0e5da..15f9571 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -84,7 +84,7 @@
   vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1];
   vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
   vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1];
-  vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1];
+  vp9_prob bmode_prob[VP9_BINTRAMODES - 1];
   vp9_prob partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
 
   vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
@@ -415,7 +415,7 @@
 
   int sb_ymode_count [VP9_I32X32_MODES];
   int ymode_count[VP9_YMODES];        /* intra MB type cts this frame */
-  int bmode_count[VP9_NKF_BINTRAMODES];
+  int bmode_count[VP9_BINTRAMODES];
   int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES];
   unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
 
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index d69b3a3..a87d058 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -163,7 +163,7 @@
 
   // Get baseline error score
   vp9_set_alt_lf_level(cpi, filt_mid);
-  vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_mid, 1, 0);
+  vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_mid, 1);
 
   best_err = vp9_calc_ss_err(sd, cm->frame_to_show);
   filt_best = filt_mid;
@@ -188,7 +188,7 @@
     if ((filt_direction <= 0) && (filt_low != filt_mid)) {
       // Get Low filter error score
       vp9_set_alt_lf_level(cpi, filt_low);
-      vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_low, 1, 0);
+      vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_low, 1);
 
       filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
 
@@ -208,7 +208,7 @@
     // Now look at filt_high
     if ((filt_direction >= 0) && (filt_high != filt_mid)) {
       vp9_set_alt_lf_level(cpi, filt_high);
-      vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1, 0);
+      vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1);
 
       filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
 
@@ -233,30 +233,4 @@
   }
 
   cm->filter_level = filt_best;
-
-#if CONFIG_LOOP_DERING
-  /* Decide whether to turn on deringing filter */
-  {  // NOLINT
-    int best_dering = 0;
-    int this_dering;
-    int last_err_diff = INT_MAX;
-
-    for (this_dering = 1; this_dering <= 16; this_dering++) {
-      vp9_set_alt_lf_level(cpi, filt_best);
-      vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_high, 1, this_dering);
-      filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
-      vp8_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
-      if (filt_err < best_err) {
-        best_err = filt_err;
-        best_dering = this_dering;
-        last_err_diff = INT_MAX;
-      } else {
-        if (filt_err - best_err > last_err_diff)
-          break;
-        last_err_diff = filt_err - best_err;
-      }
-    }
-    cm->dering_enabled = best_dering;
-  }
-#endif
 }
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 45b0c0c..2eb3f9b 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -576,7 +576,7 @@
 }
 
 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
-                                     B_PREDICTION_MODE *best_mode,
+                                     MB_PREDICTION_MODE *best_mode,
                                      int *bmode_costs,
                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                      int *bestrate, int *bestratey,
@@ -737,7 +737,7 @@
     for (idx = 0; idx < 2; idx += bw) {
       MODE_INFO *const mic = xd->mode_info_context;
       const int mis = xd->mode_info_stride;
-      B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
+      MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
       int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
       int UNINITIALIZED_IS_SAFE(d);
       i = idy * 2 + idx;
@@ -940,7 +940,7 @@
 
 static int labels2mode(MACROBLOCK *x,
                        int const *labelings, int which_label,
-                       B_PREDICTION_MODE this_mode,
+                       MB_PREDICTION_MODE this_mode,
                        int_mv *this_mv, int_mv *this_second_mv,
                        int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
                        int_mv seg_mvs[MAX_REF_FRAMES - 1],
@@ -1129,7 +1129,7 @@
   int r;
   int d;
   int segment_yrate;
-  B_PREDICTION_MODE modes[4];
+  MB_PREDICTION_MODE modes[4];
   int_mv mvs[4], second_mvs[4];
   int eobs[4];
   int mvthresh;
@@ -1890,10 +1890,8 @@
                                  int *rate_y, int *distortion_y,
                                  int *rate_uv, int *distortion_uv,
                                  int *mode_excluded, int *disable_skip,
-                                 int mode_index,
                                  INTERPOLATIONFILTERTYPE *best_filter,
-                                 int_mv frame_mv[MB_MODE_COUNT]
-                                                [MAX_REF_FRAMES],
+                                 int_mv *frame_mv,
                                  YV12_BUFFER_CONFIG **scaled_ref_frame,
                                  int mi_row, int mi_col,
                                  int_mv single_newmv[MAX_REF_FRAMES]) {
@@ -1968,8 +1966,8 @@
           scaled_first_yv12 = xd->plane[0].pre[0];
 
           // Initialize mv using single prediction mode result.
-          frame_mv[NEWMV][refs[0]].as_int = single_newmv[refs[0]].as_int;
-          frame_mv[NEWMV][refs[1]].as_int = single_newmv[refs[1]].as_int;
+          frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+          frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
 
           // Allow joint search multiple times iteratively for each ref frame
           // and break out the search loop if it couldn't find better mv.
@@ -1994,7 +1992,7 @@
             vp9_build_inter_predictor(ref_yv12[!id].buf,
                                       ref_yv12[!id].stride,
                                       second_pred, pw,
-                                      &frame_mv[NEWMV][refs[!id]],
+                                      &frame_mv[refs[!id]],
                                       &xd->scale_factor[!id],
                                       pw, ph, 0,
                                       &xd->subpix);
@@ -2005,7 +2003,7 @@
             vp9_clamp_mv_min_max(x, &ref_mv[id]);
 
             // Use mv result from single mode as mvp.
-            tmp_mv.as_int = frame_mv[NEWMV][refs[id]].as_int;
+            tmp_mv.as_int = frame_mv[refs[id]].as_int;
 
             tmp_mv.as_mv.col >>= 3;
             tmp_mv.as_mv.row >>= 3;
@@ -2040,7 +2038,7 @@
               xd->plane[0].pre[0] = scaled_first_yv12;
 
             if (bestsme < last_besterr[id]) {
-            frame_mv[NEWMV][refs[id]].as_int =
+              frame_mv[refs[id]].as_int =
                   xd->mode_info_context->bmi[0].as_mv[1].as_int = tmp_mv.as_int;
               last_besterr[id] = bestsme;
             } else {
@@ -2066,14 +2064,14 @@
           vpx_free(second_pred);
         }
 
-        if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV ||
-            frame_mv[NEWMV][refs[1]].as_int == INVALID_MV)
+        if (frame_mv[refs[0]].as_int == INVALID_MV ||
+            frame_mv[refs[1]].as_int == INVALID_MV)
           return INT64_MAX;
-        *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[0]],
+        *rate2 += vp9_mv_bit_cost(&frame_mv[refs[0]],
                                   &ref_mv[0],
                                   x->nmvjointcost, x->mvcost, 96,
                                   x->e_mbd.allow_high_precision_mv);
-        *rate2 += vp9_mv_bit_cost(&frame_mv[NEWMV][refs[1]],
+        *rate2 += vp9_mv_bit_cost(&frame_mv[refs[1]],
                                   &ref_mv[1],
                                   x->nmvjointcost, x->mvcost, 96,
                                   x->e_mbd.allow_high_precision_mv);
@@ -2140,7 +2138,7 @@
                                        x->nmvjointcost, x->mvcost,
                                        &dis, &sse);
         }
-        frame_mv[NEWMV][refs[0]].as_int =
+        frame_mv[refs[0]].as_int =
           xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
         single_newmv[refs[0]].as_int = tmp_mv.as_int;
 
@@ -2165,7 +2163,7 @@
       break;
   }
   for (i = 0; i < num_refs; ++i) {
-    cur_mv[i] = frame_mv[this_mode][refs[i]];
+    cur_mv[i] = frame_mv[refs[i]];
     // Clip "next_nearest" so that it does not extend to far out of image
     if (this_mode == NEWMV)
       assert(!clamp_mv2(&cur_mv[i], xd));
@@ -2869,7 +2867,7 @@
                                   &rate_y, &distortion_y,
                                   &rate_uv, &distortion_uv,
                                   &mode_excluded, &disable_skip,
-                                  mode_index, &tmp_best_filter, frame_mv,
+                                  &tmp_best_filter, frame_mv[this_mode],
                                   scaled_ref_frame, mi_row, mi_col,
                                   single_newmv);
       if (this_rd == INT64_MAX)
diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vp9/encoder/vp9_write_bit_buffer.h
deleted file mode 100644
index 88da236..0000000
--- a/vp9/encoder/vp9_write_bit_buffer.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_BIT_WRITE_BUFFER_H_
-#define VP9_BIT_WRITE_BUFFER_H_
-
-#include "vpx/vpx_integer.h"
-
-struct vp9_write_bit_buffer {
-  uint8_t *const bit_buffer;
-  size_t bit_offset;
-};
-
-static void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) {
-  const int off = wb->bit_offset;
-  const int p = off / CHAR_BIT;
-  const int q = /*CHAR_BIT - 1 -*/ off % CHAR_BIT;
-  wb->bit_buffer[p] &= ~(1 << q);
-  wb->bit_buffer[p] |= bit << q;
-  wb->bit_offset = off + 1;
-}
-
-static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb,
-                              int data, int bits) {
-  int bit;
-  for (bit = bits - 1; bit >= 0; bit--)
-    vp9_wb_write_bit(wb, (data >> bit) & 1);
-}
-
-
-#endif  // VP9_BIT_WRITE_BUFFER_H_
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 7328914..147804d 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -38,6 +38,7 @@
 VP9_COMMON_SRCS-yes += common/vp9_enums.h
 VP9_COMMON_SRCS-yes += common/vp9_extend.h
 VP9_COMMON_SRCS-yes += common/vp9_findnearmv.h
+VP9_COMMON_SRCS-yes += common/vp9_header.h
 VP9_COMMON_SRCS-yes += common/vp9_idct.h
 VP9_COMMON_SRCS-yes += common/vp9_invtrans.h
 VP9_COMMON_SRCS-yes += common/vp9_loopfilter.h
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 25fd4b3..ee11304 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -224,7 +224,7 @@
     si->is_kf = 0;
 
     if (data_sz >= 10 && !(data[0] & 0x01)) { /* I-Frame */
-      const uint8_t *c = data + HEADER_SIZE_IN_BYTES;
+      const uint8_t *c = data + 3;
       si->is_kf = 1;
 
       /* vet via sync code */
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 882449d..42ab02d 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -19,8 +19,6 @@
 
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
 VP9_CX_SRCS-yes += encoder/vp9_boolhuff.c
-VP9_CX_SRCS-yes += encoder/vp9_boolhuff.h
-VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.h
 VP9_CX_SRCS-yes += encoder/vp9_dct.c
 VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c
 VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h
@@ -29,6 +27,7 @@
 VP9_CX_SRCS-yes += encoder/vp9_encodemv.c
 VP9_CX_SRCS-yes += encoder/vp9_firstpass.c
 VP9_CX_SRCS-yes += encoder/vp9_block.h
+VP9_CX_SRCS-yes += encoder/vp9_boolhuff.h
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.h
 VP9_CX_SRCS-yes += encoder/vp9_encodeintra.h
 VP9_CX_SRCS-yes += encoder/vp9_encodemb.h
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index 92d5a86..3be0b6d 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -19,12 +19,11 @@
 
 VP9_DX_SRCS-yes += decoder/vp9_asm_dec_offsets.c
 VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.c
-VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.h
-VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.h
 VP9_DX_SRCS-yes += decoder/vp9_decodemv.c
 VP9_DX_SRCS-yes += decoder/vp9_decodframe.c
 VP9_DX_SRCS-yes += decoder/vp9_decodframe.h
 VP9_DX_SRCS-yes += decoder/vp9_detokenize.c
+VP9_DX_SRCS-yes += decoder/vp9_dboolhuff.h
 VP9_DX_SRCS-yes += decoder/vp9_decodemv.h
 VP9_DX_SRCS-yes += decoder/vp9_detokenize.h
 VP9_DX_SRCS-yes += decoder/vp9_onyxd.h