Merge "Exit with error when vpxenc cannot correctly scale the input."
diff --git a/build/make/Android.mk b/build/make/Android.mk
index 826ff2f..816334e 100644
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -38,8 +38,9 @@
 # For this we import the 'cpufeatures' module from the NDK sources.
 # libvpx can also be configured without this runtime detection method.
 # Configuring with --disable-runtime-cpu-detect will assume presence of NEON.
-# Configuring with --disable-runtime-cpu-detect --disable-neon will remove any
-# NEON dependency.
+# Configuring with --disable-runtime-cpu-detect --disable-neon \
+#     --disable-neon-asm
+# will remove any NEON dependency.
 
 # To change to building armeabi, run ./libvpx/configure again, but with
 # --target=arm5te-android-gcc and modify the Application.mk file to
diff --git a/examples/set_maps.c b/examples/set_maps.c
index 4343832..4ba38ee 100644
--- a/examples/set_maps.c
+++ b/examples/set_maps.c
@@ -64,7 +64,8 @@
 static void set_roi_map(const vpx_codec_enc_cfg_t *cfg,
                         vpx_codec_ctx_t *codec) {
   unsigned int i;
-  vpx_roi_map_t roi = {0};
+  vpx_roi_map_t roi;
+  memset(&roi, 0, sizeof(roi));
 
   roi.rows = (cfg->g_h + 15) / 16;
   roi.cols = (cfg->g_w + 15) / 16;
@@ -97,7 +98,7 @@
 static void set_active_map(const vpx_codec_enc_cfg_t *cfg,
                            vpx_codec_ctx_t *codec) {
   unsigned int i;
-  vpx_active_map_t map = {0};
+  vpx_active_map_t map = {0, 0, 0};
 
   map.rows = (cfg->g_h + 15) / 16;
   map.cols = (cfg->g_w + 15) / 16;
@@ -114,7 +115,7 @@
 
 static void unset_active_map(const vpx_codec_enc_cfg_t *cfg,
                              vpx_codec_ctx_t *codec) {
-  vpx_active_map_t map = {0};
+  vpx_active_map_t map = {0, 0, 0};
 
   map.rows = (cfg->g_h + 15) / 16;
   map.cols = (cfg->g_w + 15) / 16;
@@ -153,22 +154,23 @@
 
 int main(int argc, char **argv) {
   FILE *infile = NULL;
-  vpx_codec_ctx_t codec = {0};
-  vpx_codec_enc_cfg_t cfg = {0};
+  vpx_codec_ctx_t codec;
+  vpx_codec_enc_cfg_t cfg;
   int frame_count = 0;
-  vpx_image_t raw = {0};
+  vpx_image_t raw;
   vpx_codec_err_t res;
-  VpxVideoInfo info = {0};
+  VpxVideoInfo info;
   VpxVideoWriter *writer = NULL;
   const VpxInterface *encoder = NULL;
   const int fps = 2;        // TODO(dkovalev) add command line argument
   const double bits_per_pixel_per_frame = 0.067;
 
   exec_name = argv[0];
-
   if (argc != 6)
     die("Invalid number of arguments");
 
+  memset(&info, 0, sizeof(info));
+
   encoder = get_vpx_encoder_by_name(argv[1]);
   if (!encoder)
     die("Unsupported codec.");
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index cbb4036..a6dcc98 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -341,6 +341,9 @@
   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
     const InterpKernel *filters =
         vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
+    const InterpKernel *const eighttap_smooth =
+        vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
+
     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
         filter_block2d_8_c(in, kInputStride,
@@ -348,7 +351,7 @@
                            ref, kOutputStride,
                            Width(), Height());
 
-        if (filters == vp9_sub_pel_filters_8lp || (filter_x && filter_y))
+        if (filters == eighttap_smooth || (filter_x && filter_y))
           REGISTER_STATE_CHECK(
               UUT_->hv8_(in, kInputStride, out, kOutputStride,
                          filters[filter_x], 16, filters[filter_y], 16,
@@ -396,6 +399,8 @@
   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
     const InterpKernel *filters =
         vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank));
+    const InterpKernel *const eighttap_smooth =
+        vp9_get_interp_kernel(EIGHTTAP_SMOOTH);
 
     for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
       for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
@@ -404,7 +409,7 @@
                                    ref, kOutputStride,
                                    Width(), Height());
 
-        if (filters == vp9_sub_pel_filters_8lp || (filter_x && filter_y))
+        if (filters == eighttap_smooth || (filter_x && filter_y))
           REGISTER_STATE_CHECK(
               UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
                              filters[filter_x], 16, filters[filter_y], 16,
@@ -544,6 +549,7 @@
 TEST_P(ConvolveTest, CheckScalingFiltering) {
   uint8_t* const in = input();
   uint8_t* const out = output();
+  const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP);
 
   SetConstantInput(127);
 
@@ -551,8 +557,8 @@
     for (int step = 1; step <= 32; ++step) {
       /* Test the horizontal and vertical filters in combination. */
       REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
-                                      vp9_sub_pel_filters_8[frac], step,
-                                      vp9_sub_pel_filters_8[frac], step,
+                                      eighttap[frac], step,
+                                      eighttap[frac], step,
                                       Width(), Height()));
 
       CheckGuardBlocks();
diff --git a/vp8/common/arm/dequantize_arm.c b/vp8/common/arm/dequantize_arm.c
index 3e37e08..1f8157f 100644
--- a/vp8/common/arm/dequantize_arm.c
+++ b/vp8/common/arm/dequantize_arm.c
@@ -12,26 +12,9 @@
 #include "vpx_config.h"
 #include "vp8/common/blockd.h"
 
-#if HAVE_NEON_ASM
-extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);
-#endif
-
 #if HAVE_MEDIA
 extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
-#endif
 
-#if HAVE_NEON_ASM
-
-void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)
-{
-    short *DQ  = d->dqcoeff;
-    short *Q   = d->qcoeff;
-
-    vp8_dequantize_b_loop_neon(Q, DQC, DQ);
-}
-#endif
-
-#if HAVE_MEDIA
 void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
 {
     short *DQ  = d->dqcoeff;
diff --git a/vp8/common/arm/neon/dequantizeb_neon.c b/vp8/common/arm/neon/dequantizeb_neon.c
index 60f69c8..54e709d 100644
--- a/vp8/common/arm/neon/dequantizeb_neon.c
+++ b/vp8/common/arm/neon/dequantizeb_neon.c
@@ -10,18 +10,16 @@
 
 #include <arm_neon.h>
 
-void vp8_dequantize_b_loop_neon(
-        int16_t *Q,
-        int16_t *DQC,
-        int16_t *DQ) {
+#include "vp8/common/blockd.h"
+
+void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) {
     int16x8x2_t qQ, qDQC, qDQ;
 
-    qQ   = vld2q_s16(Q);
+    qQ   = vld2q_s16(d->qcoeff);
     qDQC = vld2q_s16(DQC);
 
     qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
     qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
 
-    vst2q_s16(DQ, qDQ);
-    return;
+    vst2q_s16(d->dqcoeff, qDQ);
 }
diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index 765fc3a..e55a33c 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -14,7 +14,7 @@
 #include "vp8/common/blockd.h"
 #include "vpx_mem/vpx_mem.h"
 
-#if HAVE_NEON_ARM
+#if HAVE_NEON_ASM
 extern void vp8_build_intra_predictors_mby_neon_func(
     unsigned char *y_buffer,
     unsigned char *ypred_ptr,
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index 83e8fa0..cbfd76a 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -29,9 +29,8 @@
 # Dequant
 #
 add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
-specialize qw/vp8_dequantize_b mmx media neon_asm/;
+specialize qw/vp8_dequantize_b mmx media neon/;
 $vp8_dequantize_b_media=vp8_dequantize_b_v6;
-$vp8_dequantize_b_neon_asm=vp8_dequantize_b_neon;
 
 add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
 specialize qw/vp8_dequant_idct_add mmx media neon dspr2/;
@@ -553,7 +552,7 @@
 # Denoiser filter
 #
 if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
-    add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude";
+    add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
     specialize qw/vp8_denoiser_filter sse2 neon/;
 }
 
diff --git a/vp8/encoder/arm/neon/denoising_neon.c b/vp8/encoder/arm/neon/denoising_neon.c
index 1bebe8f..32ce65a 100644
--- a/vp8/encoder/arm/neon/denoising_neon.c
+++ b/vp8/encoder/arm/neon/denoising_neon.c
@@ -50,7 +50,8 @@
                              unsigned char *running_avg_y,
                              int running_avg_y_stride,
                              unsigned char *sig, int sig_stride,
-                             unsigned int motion_magnitude) {
+                             unsigned int motion_magnitude,
+                             int increase_denoising) {
     /* If motion_magnitude is small, making the denoiser more aggressive by
      * increasing the adjustment for each level, level1 adjustment is
      * increased, the deltas stay the same.
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index dd733e5..34879cf 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -125,6 +125,7 @@
 
     int optimize;
     int q_index;
+    int increase_denoising;
 
 #if CONFIG_TEMPORAL_DENOISING
     MB_PREDICTION_MODE best_sse_inter_mode;
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index bfce280..1e645fb 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -21,6 +21,7 @@
  */
 static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
 static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
+static const unsigned int SSE_THRESHOLD_HIGH = 16 * 16 * 60;
 
 /*
  * The filter function was modified to reduce the computational complexity.
@@ -54,20 +55,29 @@
 int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride,
                           unsigned char *running_avg_y, int avg_y_stride,
                           unsigned char *sig, int sig_stride,
-                          unsigned int motion_magnitude)
+                          unsigned int motion_magnitude,
+                          int increase_denoising)
 {
     unsigned char *running_avg_y_start = running_avg_y;
     unsigned char *sig_start = sig;
-    int r, c, i;
+    int sum_diff_thresh;
+    int r, c;
     int sum_diff = 0;
     int adj_val[3] = {3, 4, 6};
-
+    int shift_inc1 = 0;
+    int shift_inc2 = 1;
     /* If motion_magnitude is small, making the denoiser more aggressive by
-     * increasing the adjustment for each level. */
+     * increasing the adjustment for each level. Add another increment for
+     * blocks that are labeled for increase denoising. */
     if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
     {
-        for (i = 0; i < 3; i++)
-            adj_val[i] += 1;
+      if (increase_denoising) {
+        shift_inc1 = 1;
+        shift_inc2 = 2;
+      }
+      adj_val[0] += shift_inc2;
+      adj_val[1] += shift_inc2;
+      adj_val[2] += shift_inc2;
     }
 
     for (r = 0; r < 16; ++r)
@@ -81,8 +91,9 @@
             diff = mc_running_avg_y[c] - sig[c];
             absdiff = abs(diff);
 
-            /* When |diff| < 4, use pixel value from last denoised raw. */
-            if (absdiff <= 3)
+            // When |diff| <= |3 + shift_inc1|, use pixel value from
+            // last denoised raw.
+            if (absdiff <= 3 + shift_inc1)
             {
                 running_avg_y[c] = mc_running_avg_y[c];
                 sum_diff += diff;
@@ -123,7 +134,9 @@
         running_avg_y += avg_y_stride;
     }
 
-    if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+    sum_diff_thresh= SUM_DIFF_THRESHOLD;
+    if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
+    if (abs(sum_diff) > sum_diff_thresh)
         return COPY_BLOCK;
 
     vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride);
@@ -187,7 +200,7 @@
     int mv_row;
     int mv_col;
     unsigned int motion_magnitude2;
-
+    unsigned int sse_thresh;
     MV_REFERENCE_FRAME frame = x->best_reference_frame;
     MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
 
@@ -272,7 +285,10 @@
     mv_row = x->best_sse_mv.as_mv.row;
     mv_col = x->best_sse_mv.as_mv.col;
     motion_magnitude2 = mv_row * mv_row + mv_col * mv_col;
-    if (best_sse > SSE_THRESHOLD || motion_magnitude2
+    sse_thresh = SSE_THRESHOLD;
+    if (x->increase_denoising) sse_thresh = SSE_THRESHOLD_HIGH;
+
+    if (best_sse > sse_thresh || motion_magnitude2
            > 8 * NOISE_MOTION_THRESHOLD)
     {
         decision = COPY_BLOCK;
@@ -290,7 +306,8 @@
         /* Filter. */
         decision = vp8_denoiser_filter(mc_running_avg_y, mc_avg_y_stride,
                                          running_avg_y, avg_y_stride,
-                                         x->thismb, 16, motion_magnitude2);
+                                         x->thismb, 16, motion_magnitude2,
+                                         x->increase_denoising);
     }
     if (decision == COPY_BLOCK)
     {
diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h
index cc9913a..ae744d2 100644
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h
@@ -18,6 +18,7 @@
 #endif
 
 #define SUM_DIFF_THRESHOLD (16 * 16 * 2)
+#define SUM_DIFF_THRESHOLD_HIGH (16 * 16 * 3)
 #define MOTION_MAGNITUDE_THRESHOLD (8*3)
 
 enum vp8_denoiser_decision
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 39a3baf..cf6a82f 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -1177,6 +1177,7 @@
             x->best_reference_frame = best_mbmode.ref_frame;
             best_sse = best_rd_sse;
         }
+        x->increase_denoising = 0;
         vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
                                 recon_yoffset, recon_uvoffset);
 
diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c
index d1f76b2c..5112f89 100644
--- a/vp8/encoder/x86/denoising_sse2.c
+++ b/vp8/encoder/x86/denoising_sse2.c
@@ -26,19 +26,24 @@
                              int mc_avg_y_stride,
                              unsigned char *running_avg_y, int avg_y_stride,
                              unsigned char *sig, int sig_stride,
-                             unsigned int motion_magnitude)
+                             unsigned int motion_magnitude,
+                             int increase_denoising)
 {
     unsigned char *running_avg_y_start = running_avg_y;
     unsigned char *sig_start = sig;
+    int sum_diff_thresh;
     int r;
+    int shift_inc  = (increase_denoising &&
+        motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;
     __m128i acc_diff = _mm_setzero_si128();
     const __m128i k_0 = _mm_setzero_si128();
-    const __m128i k_4 = _mm_set1_epi8(4);
+    const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
     const __m128i k_8 = _mm_set1_epi8(8);
     const __m128i k_16 = _mm_set1_epi8(16);
     /* Modify each level's adjustment according to motion_magnitude. */
     const __m128i l3 = _mm_set1_epi8(
-                      (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 : 6);
+                       (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
+                        7 + shift_inc : 6);
     /* Difference between level 3 and level 2 is 2. */
     const __m128i l32 = _mm_set1_epi8(2);
     /* Difference between level 2 and level 1 is 1. */
@@ -105,7 +110,9 @@
                  + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11]
                  + s.e[12] + s.e[13] + s.e[14] + s.e[15];
 
-        if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+        sum_diff_thresh = SUM_DIFF_THRESHOLD;
+        if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
+        if (abs(sum_diff) > sum_diff_thresh)
         {
             return COPY_BLOCK;
         }
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 911608c..8282547 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -158,10 +158,10 @@
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)
 
 # common (neon)
-VP8_COMMON_SRCS-$(ARCH_NEON_ASM)  += common/arm/reconintra_arm.c
+#VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/reconintra_arm.c
 VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/loopfilter_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
+#VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/idct_blk_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c
index 8f150a4..d2522bb 100644
--- a/vp9/common/vp9_debugmodes.c
+++ b/vp9/common/vp9_debugmodes.c
@@ -24,10 +24,9 @@
  */
 static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor,
                           size_t member_offset) {
-  int mi_row;
-  int mi_col;
+  int mi_row, mi_col;
   int mi_index = 0;
-  MODE_INFO **mi_8x8 = cm->mi_grid_visible;
+  MODE_INFO **mi = cm->mi_grid_visible;
   int rows = cm->mi_rows;
   int cols = cm->mi_cols;
   char prefix = descriptor[0];
@@ -38,7 +37,7 @@
     fprintf(file, "%c ", prefix);
     for (mi_col = 0; mi_col < cols; mi_col++) {
       fprintf(file, "%2d ",
-              *((int*) ((char *) (&mi_8x8[mi_index]->mbmi) +
+              *((int*) ((char *) (&mi[mi_index]->mbmi) +
                         member_offset)));
       mi_index++;
     }
@@ -52,7 +51,7 @@
   int mi_col;
   int mi_index = 0;
   FILE *mvs = fopen(file, "a");
-  MODE_INFO **mi_8x8 = cm->mi_grid_visible;
+  MODE_INFO **mi = cm->mi_grid_visible;
   int rows = cm->mi_rows;
   int cols = cm->mi_cols;
 
@@ -67,8 +66,8 @@
   for (mi_row = 0; mi_row < rows; mi_row++) {
     fprintf(mvs, "V ");
     for (mi_col = 0; mi_col < cols; mi_col++) {
-      fprintf(mvs, "%4d:%4d ", mi_8x8[mi_index]->mbmi.mv[0].as_mv.row,
-                               mi_8x8[mi_index]->mbmi.mv[0].as_mv.col);
+      fprintf(mvs, "%4d:%4d ", mi[mi_index]->mbmi.mv[0].as_mv.row,
+                               mi[mi_index]->mbmi.mv[0].as_mv.col);
       mi_index++;
     }
     fprintf(mvs, "\n");
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index 7474a88..afcdf22 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -32,7 +32,8 @@
 };
 
 // Lagrangian interpolation filter
-const InterpKernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS] = {
+DECLARE_ALIGNED(256, const InterpKernel,
+                vp9_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
   { 0,   0,   0, 128,   0,   0,   0,  0},
   { 0,   1,  -5, 126,   8,  -3,   1,  0},
   { -1,   3, -10, 122,  18,  -6,   2,  0},
@@ -52,7 +53,8 @@
 };
 
 // DCT based filter
-const InterpKernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS] = {
+DECLARE_ALIGNED(256, const InterpKernel,
+                vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
   {0,   0,   0, 128,   0,   0,   0, 0},
   {-1,   3,  -7, 127,   8,  -3,   1, 0},
   {-2,   5, -13, 125,  17,  -6,   3, -1},
@@ -72,7 +74,8 @@
 };
 
 // freqmultiplier = 0.5
-const InterpKernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS] = {
+DECLARE_ALIGNED(256, const InterpKernel,
+                vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
   { 0,  0,  0, 128,  0,  0,  0,  0},
   {-3, -1, 32,  64, 38,  1, -3,  0},
   {-2, -2, 29,  63, 41,  2, -3,  0},
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index 29d3867..8c359c7 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -41,12 +41,6 @@
 
 DECLARE_ALIGNED(256, extern const InterpKernel,
                 vp9_bilinear_filters[SUBPEL_SHIFTS]);
-DECLARE_ALIGNED(256, extern const InterpKernel,
-                vp9_sub_pel_filters_8[SUBPEL_SHIFTS]);
-DECLARE_ALIGNED(256, extern const InterpKernel,
-                vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]);
-DECLARE_ALIGNED(256, extern const InterpKernel,
-                vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]);
 
 // The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
 // filter kernel as a 2 tap filter.
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 5b43e23..efd0249 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -619,12 +619,12 @@
 // by mi_row, mi_col.
 // TODO(JBB): This function only works for yv12.
 void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
-                    MODE_INFO **mi_8x8, const int mode_info_stride,
+                    MODE_INFO **mi, const int mode_info_stride,
                     LOOP_FILTER_MASK *lfm) {
   int idx_32, idx_16, idx_8;
   const loop_filter_info_n *const lfi_n = &cm->lf_info;
-  MODE_INFO **mip = mi_8x8;
-  MODE_INFO **mip2 = mi_8x8;
+  MODE_INFO **mip = mi;
+  MODE_INFO **mip2 = mi;
 
   // These are offsets to the next mi in the 64x64 block. It is what gets
   // added to the mi ptr as we go through each loop.  It helps us to avoids
@@ -1192,32 +1192,33 @@
 }
 
 void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
-                          VP9_COMMON *cm, MACROBLOCKD *xd,
+                          VP9_COMMON *cm,
+                          struct macroblockd_plane planes[MAX_MB_PLANE],
                           int start, int stop, int y_only) {
   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
-  int mi_row, mi_col;
+  const int use_420 = y_only || (planes[1].subsampling_y == 1 &&
+                                 planes[1].subsampling_x == 1);
   LOOP_FILTER_MASK lfm;
-  int use_420 = y_only || (xd->plane[1].subsampling_y == 1 &&
-      xd->plane[1].subsampling_x == 1);
+  int mi_row, mi_col;
 
   for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
-    MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride;
+    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
 
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
       int plane;
 
-      vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
+      vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
 
       // TODO(JBB): Make setup_mask work for non 420.
       if (use_420)
-        vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride,
+        vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
                        &lfm);
 
       for (plane = 0; plane < num_planes; ++plane) {
         if (use_420)
-          vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+          vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);
         else
-          filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col,
+          filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
                                     mi_row, mi_col);
       }
     }
@@ -1239,7 +1240,7 @@
   }
   end_mi_row = start_mi_row + mi_rows_to_filter;
   vp9_loop_filter_frame_init(cm, frame_filter_level);
-  vp9_loop_filter_rows(frame, cm, xd,
+  vp9_loop_filter_rows(frame, cm, xd->plane,
                        start_mi_row, end_mi_row,
                        y_only);
 }
@@ -1247,7 +1248,7 @@
 int vp9_loop_filter_worker(void *arg1, void *arg2) {
   LFWorkerData *const lf_data = (LFWorkerData*)arg1;
   (void)arg2;
-  vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
+  vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
                        lf_data->start, lf_data->stop, lf_data->y_only);
   return 1;
 }
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 83463c5..6fa2773 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -112,15 +112,15 @@
 
 // Apply the loop filter to [start, stop) macro block rows in frame_buffer.
 void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
-                          struct VP9Common *cm, struct macroblockd *xd,
+                          struct VP9Common *cm,
+                          struct macroblockd_plane planes[MAX_MB_PLANE],
                           int start, int stop, int y_only);
 
 typedef struct LoopFilterWorkerData {
   const YV12_BUFFER_CONFIG *frame_buffer;
   struct VP9Common *cm;
-  struct macroblockd xd;  // TODO(jzern): most of this is unnecessary to the
-                          // loopfilter. the planes are necessary as their state
-                          // is changed during decode.
+  struct macroblockd_plane planes[MAX_MB_PLANE];
+
   int start;
   int stop;
   int y_only;
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index 5601a93..9f32104 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -24,61 +24,7 @@
 #include "vp9/common/vp9_systemdependent.h"
 #include "vp9/common/vp9_textblit.h"
 
-#define RGB_TO_YUV(t)                                            \
-  ( (0.257*(float)(t >> 16))  + (0.504*(float)(t >> 8 & 0xff)) + \
-    (0.098*(float)(t & 0xff)) + 16),                             \
-  (-(0.148*(float)(t >> 16))  - (0.291*(float)(t >> 8 & 0xff)) + \
-    (0.439*(float)(t & 0xff)) + 128),                            \
-  ( (0.439*(float)(t >> 16))  - (0.368*(float)(t >> 8 & 0xff)) - \
-    (0.071*(float)(t & 0xff)) + 128)
-
-/* global constants */
-#if 0 && CONFIG_POSTPROC_VISUALIZER
-static const unsigned char PREDICTION_MODE_colors[MB_MODE_COUNT][3] = {
-  { RGB_TO_YUV(0x98FB98) },   /* PaleGreen */
-  { RGB_TO_YUV(0x00FF00) },   /* Green */
-  { RGB_TO_YUV(0xADFF2F) },   /* GreenYellow */
-  { RGB_TO_YUV(0x8F0000) },   /* Dark Red */
-  { RGB_TO_YUV(0x008F8F) },   /* Dark Cyan */
-  { RGB_TO_YUV(0x008F8F) },   /* Dark Cyan */
-  { RGB_TO_YUV(0x008F8F) },   /* Dark Cyan */
-  { RGB_TO_YUV(0x8F0000) },   /* Dark Red */
-  { RGB_TO_YUV(0x8F0000) },   /* Dark Red */
-  { RGB_TO_YUV(0x228B22) },   /* ForestGreen */
-  { RGB_TO_YUV(0x006400) },   /* DarkGreen */
-  { RGB_TO_YUV(0x98F5FF) },   /* Cadet Blue */
-  { RGB_TO_YUV(0x6CA6CD) },   /* Sky Blue */
-  { RGB_TO_YUV(0x00008B) },   /* Dark blue */
-  { RGB_TO_YUV(0x551A8B) },   /* Purple */
-  { RGB_TO_YUV(0xFF0000) }    /* Red */
-  { RGB_TO_YUV(0xCC33FF) },   /* Magenta */
-};
-
-static const unsigned char B_PREDICTION_MODE_colors[INTRA_MODES][3] = {
-  { RGB_TO_YUV(0x6633ff) },   /* Purple */
-  { RGB_TO_YUV(0xcc33ff) },   /* Magenta */
-  { RGB_TO_YUV(0xff33cc) },   /* Pink */
-  { RGB_TO_YUV(0xff3366) },   /* Coral */
-  { RGB_TO_YUV(0x3366ff) },   /* Blue */
-  { RGB_TO_YUV(0xed00f5) },   /* Dark Blue */
-  { RGB_TO_YUV(0x2e00b8) },   /* Dark Purple */
-  { RGB_TO_YUV(0xff6633) },   /* Orange */
-  { RGB_TO_YUV(0x33ccff) },   /* Light Blue */
-  { RGB_TO_YUV(0x8ab800) },   /* Green */
-  { RGB_TO_YUV(0xffcc33) },   /* Light Orange */
-  { RGB_TO_YUV(0x33ffcc) },   /* Aqua */
-  { RGB_TO_YUV(0x66ff33) },   /* Light Green */
-  { RGB_TO_YUV(0xccff33) },   /* Yellow */
-};
-
-static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] = {
-  { RGB_TO_YUV(0x00ff00) },   /* Blue */
-  { RGB_TO_YUV(0x0000ff) },   /* Green */
-  { RGB_TO_YUV(0xffff00) },   /* Yellow */
-  { RGB_TO_YUV(0xff0000) },   /* Red */
-};
-#endif
-
+#if CONFIG_VP9_POSTPROC
 static const short kernel5[] = {
   1, 1, 4, 1, 1
 };
@@ -448,163 +394,6 @@
   }
 }
 
-/* Blend the macro block with a solid colored square.  Leave the
- * edges unblended to give distinction to macro blocks in areas
- * filled with the same color block.
- */
-void vp9_blend_mb_inner_c(uint8_t *y, uint8_t *u, uint8_t *v,
-                          int y1, int u1, int v1, int alpha, int stride) {
-  int i, j;
-  int y1_const = y1 * ((1 << 16) - alpha);
-  int u1_const = u1 * ((1 << 16) - alpha);
-  int v1_const = v1 * ((1 << 16) - alpha);
-
-  y += 2 * stride + 2;
-  for (i = 0; i < 12; i++) {
-    for (j = 0; j < 12; j++) {
-      y[j] = (y[j] * alpha + y1_const) >> 16;
-    }
-    y += stride;
-  }
-
-  stride >>= 1;
-
-  u += stride + 1;
-  v += stride + 1;
-
-  for (i = 0; i < 6; i++) {
-    for (j = 0; j < 6; j++) {
-      u[j] = (u[j] * alpha + u1_const) >> 16;
-      v[j] = (v[j] * alpha + v1_const) >> 16;
-    }
-    u += stride;
-    v += stride;
-  }
-}
-
-/* Blend only the edge of the macro block.  Leave center
- * unblended to allow for other visualizations to be layered.
- */
-void vp9_blend_mb_outer_c(uint8_t *y, uint8_t *u, uint8_t *v,
-                          int y1, int u1, int v1, int alpha, int stride) {
-  int i, j;
-  int y1_const = y1 * ((1 << 16) - alpha);
-  int u1_const = u1 * ((1 << 16) - alpha);
-  int v1_const = v1 * ((1 << 16) - alpha);
-
-  for (i = 0; i < 2; i++) {
-    for (j = 0; j < 16; j++) {
-      y[j] = (y[j] * alpha + y1_const) >> 16;
-    }
-    y += stride;
-  }
-
-  for (i = 0; i < 12; i++) {
-    y[0]  = (y[0] * alpha  + y1_const) >> 16;
-    y[1]  = (y[1] * alpha  + y1_const) >> 16;
-    y[14] = (y[14] * alpha + y1_const) >> 16;
-    y[15] = (y[15] * alpha + y1_const) >> 16;
-    y += stride;
-  }
-
-  for (i = 0; i < 2; i++) {
-    for (j = 0; j < 16; j++) {
-      y[j] = (y[j] * alpha + y1_const) >> 16;
-    }
-    y += stride;
-  }
-
-  stride >>= 1;
-
-  for (j = 0; j < 8; j++) {
-    u[j] = (u[j] * alpha + u1_const) >> 16;
-    v[j] = (v[j] * alpha + v1_const) >> 16;
-  }
-  u += stride;
-  v += stride;
-
-  for (i = 0; i < 6; i++) {
-    u[0] = (u[0] * alpha + u1_const) >> 16;
-    v[0] = (v[0] * alpha + v1_const) >> 16;
-
-    u[7] = (u[7] * alpha + u1_const) >> 16;
-    v[7] = (v[7] * alpha + v1_const) >> 16;
-
-    u += stride;
-    v += stride;
-  }
-
-  for (j = 0; j < 8; j++) {
-    u[j] = (u[j] * alpha + u1_const) >> 16;
-    v[j] = (v[j] * alpha + v1_const) >> 16;
-  }
-}
-
-void vp9_blend_b_c(uint8_t *y, uint8_t *u, uint8_t *v,
-                   int y1, int u1, int v1, int alpha, int stride) {
-  int i, j;
-  int y1_const = y1 * ((1 << 16) - alpha);
-  int u1_const = u1 * ((1 << 16) - alpha);
-  int v1_const = v1 * ((1 << 16) - alpha);
-
-  for (i = 0; i < 4; i++) {
-    for (j = 0; j < 4; j++) {
-      y[j] = (y[j] * alpha + y1_const) >> 16;
-    }
-    y += stride;
-  }
-
-  stride >>= 1;
-
-  for (i = 0; i < 2; i++) {
-    for (j = 0; j < 2; j++) {
-      u[j] = (u[j] * alpha + u1_const) >> 16;
-      v[j] = (v[j] * alpha + v1_const) >> 16;
-    }
-    u += stride;
-    v += stride;
-  }
-}
-
-static void constrain_line(int x0, int *x1, int y0, int *y1,
-                           int width, int height) {
-  int dx;
-  int dy;
-
-  if (*x1 > width) {
-    dx = *x1 - x0;
-    dy = *y1 - y0;
-
-    *x1 = width;
-    if (dx)
-      *y1 = ((width - x0) * dy) / dx + y0;
-  }
-  if (*x1 < 0) {
-    dx = *x1 - x0;
-    dy = *y1 - y0;
-
-    *x1 = 0;
-    if (dx)
-      *y1 = ((0 - x0) * dy) / dx + y0;
-  }
-  if (*y1 > height) {
-    dx = *x1 - x0;
-    dy = *y1 - y0;
-
-    *y1 = height;
-    if (dy)
-      *x1 = ((height - y0) * dx) / dy + x0;
-  }
-  if (*y1 < 0) {
-    dx = *x1 - x0;
-    dy = *y1 - y0;
-
-    *y1 = 0;
-    if (dy)
-      *x1 = ((0 - y0) * dx) / dy + x0;
-  }
-}
-
 int vp9_post_proc_frame(struct VP9Common *cm,
                         YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags) {
   const int q = MIN(63, cm->lf.filter_level * 10 / 6);
@@ -643,328 +432,6 @@
                         ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
   }
 
-#if 0 && CONFIG_POSTPROC_VISUALIZER
-  if (flags & VP9D_DEBUG_TXT_FRAME_INFO) {
-    char message[512];
-    snprintf(message, sizeof(message) -1,
-             "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
-             (cm->frame_type == KEY_FRAME),
-             cm->refresh_golden_frame,
-             cm->base_qindex,
-             cm->filter_level,
-             flags,
-             cm->mb_cols, cm->mb_rows);
-    vp9_blit_text(message, ppbuf->y_buffer, ppbuf->y_stride);
-  }
-
-  if (flags & VP9D_DEBUG_TXT_MBLK_MODES) {
-    int i, j;
-    uint8_t *y_ptr;
-    int mb_rows = ppbuf->y_height >> 4;
-    int mb_cols = ppbuf->y_width  >> 4;
-    int mb_index = 0;
-    MODE_INFO *mi = cm->mi;
-
-    y_ptr = post->y_buffer + 4 * post->y_stride + 4;
-
-    /* vp9_filter each macro block */
-    for (i = 0; i < mb_rows; i++) {
-      for (j = 0; j < mb_cols; j++) {
-        char zz[4];
-
-        snprintf(zz, sizeof(zz) - 1, "%c", mi[mb_index].mbmi.mode + 'a');
-
-        vp9_blit_text(zz, y_ptr, post->y_stride);
-        mb_index++;
-        y_ptr += 16;
-      }
-
-      mb_index++; /* border */
-      y_ptr += post->y_stride  * 16 - post->y_width;
-    }
-  }
-
-  if (flags & VP9D_DEBUG_TXT_DC_DIFF) {
-    int i, j;
-    uint8_t *y_ptr;
-    int mb_rows = ppbuf->y_height >> 4;
-    int mb_cols = ppbuf->y_width  >> 4;
-    int mb_index = 0;
-    MODE_INFO *mi = cm->mi;
-
-    y_ptr = post->y_buffer + 4 * post->y_stride + 4;
-
-    /* vp9_filter each macro block */
-    for (i = 0; i < mb_rows; i++) {
-      for (j = 0; j < mb_cols; j++) {
-        char zz[4];
-        int dc_diff = !(mi[mb_index].mbmi.mode != I4X4_PRED &&
-                        mi[mb_index].mbmi.mode != SPLITMV &&
-                        mi[mb_index].mbmi.skip);
-
-        if (cm->frame_type == KEY_FRAME)
-          snprintf(zz, sizeof(zz) - 1, "a");
-        else
-          snprintf(zz, sizeof(zz) - 1, "%c", dc_diff + '0');
-
-        vp9_blit_text(zz, y_ptr, post->y_stride);
-        mb_index++;
-        y_ptr += 16;
-      }
-
-      mb_index++; /* border */
-      y_ptr += post->y_stride  * 16 - post->y_width;
-    }
-  }
-
-  if (flags & VP9D_DEBUG_TXT_RATE_INFO) {
-    char message[512];
-    snprintf(message, sizeof(message),
-             "Bitrate: %10.2f framerate: %10.2f ",
-             cm->bitrate, cm->framerate);
-    vp9_blit_text(message, ppbuf->y_buffer, ppbuf->y_stride);
-  }
-
-  /* Draw motion vectors */
-  if ((flags & VP9D_DEBUG_DRAW_MV) && ppflags->display_mv_flag) {
-    int width  = ppbuf->y_width;
-    int height = ppbuf->y_height;
-    uint8_t *y_buffer = ppbuf->y_buffer;
-    int y_stride = ppbuf->y_stride;
-    MODE_INFO *mi = cm->mi;
-    int x0, y0;
-
-    for (y0 = 0; y0 < height; y0 += 16) {
-      for (x0 = 0; x0 < width; x0 += 16) {
-        int x1, y1;
-
-        if (!(ppflags->display_mv_flag & (1 << mi->mbmi.mode))) {
-          mi++;
-          continue;
-        }
-
-        if (mi->mbmi.mode == SPLITMV) {
-          switch (mi->mbmi.partitioning) {
-            case PARTITIONING_16X8 : {  /* mv_top_bottom */
-              union b_mode_info *bmi = &mi->bmi[0];
-              MV *mv = &bmi->mv.as_mv;
-
-              x1 = x0 + 8 + (mv->col >> 3);
-              y1 = y0 + 4 + (mv->row >> 3);
-
-              constrain_line(x0 + 8, &x1, y0 + 4, &y1, width, height);
-              vp9_blit_line(x0 + 8,  x1, y0 + 4,  y1, y_buffer, y_stride);
-
-              bmi = &mi->bmi[8];
-
-              x1 = x0 + 8 + (mv->col >> 3);
-              y1 = y0 + 12 + (mv->row >> 3);
-
-              constrain_line(x0 + 8, &x1, y0 + 12, &y1, width, height);
-              vp9_blit_line(x0 + 8,  x1, y0 + 12,  y1, y_buffer, y_stride);
-
-              break;
-            }
-            case PARTITIONING_8X16 : {  /* mv_left_right */
-              union b_mode_info *bmi = &mi->bmi[0];
-              MV *mv = &bmi->mv.as_mv;
-
-              x1 = x0 + 4 + (mv->col >> 3);
-              y1 = y0 + 8 + (mv->row >> 3);
-
-              constrain_line(x0 + 4, &x1, y0 + 8, &y1, width, height);
-              vp9_blit_line(x0 + 4,  x1, y0 + 8,  y1, y_buffer, y_stride);
-
-              bmi = &mi->bmi[2];
-
-              x1 = x0 + 12 + (mv->col >> 3);
-              y1 = y0 + 8 + (mv->row >> 3);
-
-              constrain_line(x0 + 12, &x1, y0 + 8, &y1, width, height);
-              vp9_blit_line(x0 + 12,  x1, y0 + 8,  y1, y_buffer, y_stride);
-
-              break;
-            }
-            case PARTITIONING_8X8 : {  /* mv_quarters   */
-              union b_mode_info *bmi = &mi->bmi[0];
-              MV *mv = &bmi->mv.as_mv;
-
-              x1 = x0 + 4 + (mv->col >> 3);
-              y1 = y0 + 4 + (mv->row >> 3);
-
-              constrain_line(x0 + 4, &x1, y0 + 4, &y1, width, height);
-              vp9_blit_line(x0 + 4,  x1, y0 + 4,  y1, y_buffer, y_stride);
-
-              bmi = &mi->bmi[2];
-
-              x1 = x0 + 12 + (mv->col >> 3);
-              y1 = y0 + 4 + (mv->row >> 3);
-
-              constrain_line(x0 + 12, &x1, y0 + 4, &y1, width, height);
-              vp9_blit_line(x0 + 12,  x1, y0 + 4,  y1, y_buffer, y_stride);
-
-              bmi = &mi->bmi[8];
-
-              x1 = x0 + 4 + (mv->col >> 3);
-              y1 = y0 + 12 + (mv->row >> 3);
-
-              constrain_line(x0 + 4, &x1, y0 + 12, &y1, width, height);
-              vp9_blit_line(x0 + 4,  x1, y0 + 12,  y1, y_buffer, y_stride);
-
-              bmi = &mi->bmi[10];
-
-              x1 = x0 + 12 + (mv->col >> 3);
-              y1 = y0 + 12 + (mv->row >> 3);
-
-              constrain_line(x0 + 12, &x1, y0 + 12, &y1, width, height);
-              vp9_blit_line(x0 + 12,  x1, y0 + 12,  y1, y_buffer, y_stride);
-              break;
-            }
-            case PARTITIONING_4X4:
-            default : {
-              union b_mode_info *bmi = mi->bmi;
-              int bx0, by0;
-
-              for (by0 = y0; by0 < (y0 + 16); by0 += 4) {
-                for (bx0 = x0; bx0 < (x0 + 16); bx0 += 4) {
-                  MV *mv = &bmi->mv.as_mv;
-
-                  x1 = bx0 + 2 + (mv->col >> 3);
-                  y1 = by0 + 2 + (mv->row >> 3);
-
-                  constrain_line(bx0 + 2, &x1, by0 + 2, &y1, width, height);
-                  vp9_blit_line(bx0 + 2,  x1, by0 + 2,  y1, y_buffer, y_stride);
-
-                  bmi++;
-                }
-              }
-            }
-          }
-        } else if (is_inter_mode(mi->mbmi.mode)) {
-          MV *mv = &mi->mbmi.mv.as_mv;
-          const int lx0 = x0 + 8;
-          const int ly0 = y0 + 8;
-
-          x1 = lx0 + (mv->col >> 3);
-          y1 = ly0 + (mv->row >> 3);
-
-          if (x1 != lx0 && y1 != ly0) {
-            constrain_line(lx0, &x1, ly0 - 1, &y1, width, height);
-            vp9_blit_line(lx0,  x1, ly0 - 1,  y1, y_buffer, y_stride);
-
-            constrain_line(lx0, &x1, ly0 + 1, &y1, width, height);
-            vp9_blit_line(lx0,  x1, ly0 + 1,  y1, y_buffer, y_stride);
-          } else {
-            vp9_blit_line(lx0,  x1, ly0,  y1, y_buffer, y_stride);
-          }
-        }
-
-        mi++;
-      }
-      mi++;
-    }
-  }
-
-  /* Color in block modes */
-  if ((flags & VP9D_DEBUG_CLR_BLK_MODES)
-      && (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag)) {
-    int y, x;
-    int width  = ppbuf->y_width;
-    int height = ppbuf->y_height;
-    uint8_t *y_ptr = ppbuf->y_buffer;
-    uint8_t *u_ptr = ppbuf->u_buffer;
-    uint8_t *v_ptr = ppbuf->v_buffer;
-    int y_stride = ppbuf->y_stride;
-    MODE_INFO *mi = cm->mi;
-
-    for (y = 0; y < height; y += 16) {
-      for (x = 0; x < width; x += 16) {
-        int Y = 0, U = 0, V = 0;
-
-        if (mi->mbmi.mode == I4X4_PRED &&
-            ((ppflags->display_mb_modes_flag & I4X4_PRED) ||
-             ppflags->display_b_modes_flag)) {
-          int by, bx;
-          uint8_t *yl, *ul, *vl;
-          union b_mode_info *bmi = mi->bmi;
-
-          yl = y_ptr + x;
-          ul = u_ptr + (x >> 1);
-          vl = v_ptr + (x >> 1);
-
-          for (by = 0; by < 16; by += 4) {
-            for (bx = 0; bx < 16; bx += 4) {
-              if ((ppflags->display_b_modes_flag & (1 << mi->mbmi.mode))
-                  || (ppflags->display_mb_modes_flag & I4X4_PRED)) {
-                Y = B_PREDICTION_MODE_colors[bmi->as_mode][0];
-                U = B_PREDICTION_MODE_colors[bmi->as_mode][1];
-                V = B_PREDICTION_MODE_colors[bmi->as_mode][2];
-
-                vp9_blend_b(yl + bx, ul + (bx >> 1), vl + (bx >> 1), Y, U, V,
-                    0xc000, y_stride);
-              }
-              bmi++;
-            }
-
-            yl += y_stride * 4;
-            ul += y_stride * 1;
-            vl += y_stride * 1;
-          }
-        } else if (ppflags->display_mb_modes_flag & (1 << mi->mbmi.mode)) {
-          Y = PREDICTION_MODE_colors[mi->mbmi.mode][0];
-          U = PREDICTION_MODE_colors[mi->mbmi.mode][1];
-          V = PREDICTION_MODE_colors[mi->mbmi.mode][2];
-
-          vp9_blend_mb_inner(y_ptr + x, u_ptr + (x >> 1), v_ptr + (x >> 1),
-                             Y, U, V, 0xc000, y_stride);
-        }
-
-        mi++;
-      }
-      y_ptr += y_stride * 16;
-      u_ptr += y_stride * 4;
-      v_ptr += y_stride * 4;
-
-      mi++;
-    }
-  }
-
-  /* Color in frame reference blocks */
-  if ((flags & VP9D_DEBUG_CLR_FRM_REF_BLKS) &&
-      ppflags->display_ref_frame_flag) {
-    int y, x;
-    int width  = ppbuf->y_width;
-    int height = ppbuf->y_height;
-    uint8_t *y_ptr = ppbuf->y_buffer;
-    uint8_t *u_ptr = ppbuf->u_buffer;
-    uint8_t *v_ptr = ppbuf->v_buffer;
-    int y_stride = ppbuf->y_stride;
-    MODE_INFO *mi = cm->mi;
-
-    for (y = 0; y < height; y += 16) {
-      for (x = 0; x < width; x += 16) {
-        int Y = 0, U = 0, V = 0;
-
-        if (ppflags->display_ref_frame_flag & (1 << mi->mbmi.ref_frame)) {
-          Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
-          U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
-          V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
-
-          vp9_blend_mb_outer(y_ptr + x, u_ptr + (x >> 1), v_ptr + (x >> 1),
-                             Y, U, V, 0xc000, y_stride);
-        }
-
-        mi++;
-      }
-      y_ptr += y_stride * 16;
-      u_ptr += y_stride * 4;
-      v_ptr += y_stride * 4;
-
-      mi++;
-    }
-  }
-#endif
-
   *dest = *ppbuf;
 
   /* handle problem with extending borders */
@@ -975,3 +442,4 @@
 
   return 0;
 }
+#endif
diff --git a/vp9/common/vp9_ppflags.h b/vp9/common/vp9_ppflags.h
index e8b04d2..1644a1b 100644
--- a/vp9/common/vp9_ppflags.h
+++ b/vp9/common/vp9_ppflags.h
@@ -33,12 +33,6 @@
   int post_proc_flag;
   int deblocking_level;
   int noise_level;
-#if CONFIG_POSTPROC_VISUALIZER
-  int display_ref_frame_flag;
-  int display_mb_modes_flag;
-  int display_b_modes_flag;
-  int display_mv_flag;
-#endif  // CONFIG_POSTPROC_VISUALIZER
 } vp9_ppflags_t;
 
 #ifdef __cplusplus
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index e722d6a..edc36d7 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -409,7 +409,7 @@
   }
 }
 
-void vp9_setup_dst_planes(MACROBLOCKD *xd,
+void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
                           const YV12_BUFFER_CONFIG *src,
                           int mi_row, int mi_col) {
   uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
@@ -419,7 +419,7 @@
   int i;
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
-    struct macroblockd_plane *const pd = &xd->plane[i];
+    struct macroblockd_plane *const pd = &planes[i];
     setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL,
                      pd->subsampling_x, pd->subsampling_y);
   }
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 86f3158..58c596e 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -57,7 +57,8 @@
   dst->stride = stride;
 }
 
-void vp9_setup_dst_planes(MACROBLOCKD *xd, const YV12_BUFFER_CONFIG *src,
+void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
+                          const YV12_BUFFER_CONFIG *src,
                           int mi_row, int mi_col);
 
 void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx,
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 57ee4cd..3124158 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -316,7 +316,7 @@
   // as they are always compared to values that are in 1/8th pel units
   set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
 
-  vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col);
+  vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
   return &xd->mi[0]->mbmi;
 }
 
@@ -686,7 +686,7 @@
     LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
     lf_data->frame_buffer = get_frame_new_buffer(cm);
     lf_data->cm = cm;
-    lf_data->xd = pbi->mb;
+    vp9_copy(lf_data->planes, pbi->mb.plane);
     lf_data->stop = 0;
     lf_data->y_only = 0;
     vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
@@ -1350,7 +1350,7 @@
     *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
     // If multiple threads are used to decode tiles, then we use those threads
     // to do parallel loopfiltering.
-    vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0, 0);
+    vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0);
   } else {
     if (do_loopfilter_inline && pbi->lf_worker.data1 == NULL) {
       CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 6f310c7..98b890b 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -32,7 +32,7 @@
 #include "vp9/decoder/vp9_detokenize.h"
 #include "vp9/decoder/vp9_dthread.h"
 
-void vp9_initialize_dec() {
+static void initialize_dec() {
   static int init_done = 0;
 
   if (!init_done) {
@@ -58,7 +58,7 @@
   }
 
   cm->error.setjmp = 1;
-  vp9_initialize_dec();
+  initialize_dec();
 
   vp9_rtcd();
 
@@ -304,11 +304,14 @@
                       int64_t *time_stamp, int64_t *time_end_stamp,
                       vp9_ppflags_t *flags) {
   int ret = -1;
+#if !CONFIG_VP9_POSTPROC
+  (void)*flags;
+#endif
 
   if (pbi->ready_for_new_data == 1)
     return ret;
 
-  /* ie no raw frame to show!!! */
+  /* no raw frame to show!!! */
   if (pbi->common.show_frame == 0)
     return ret;
 
@@ -319,8 +322,8 @@
 #if CONFIG_VP9_POSTPROC
   ret = vp9_post_proc_frame(&pbi->common, sd, flags);
 #else
-    *sd = *pbi->common.frame_to_show;
-    ret = 0;
+  *sd = *pbi->common.frame_to_show;
+  ret = 0;
 #endif /*!CONFIG_POSTPROC*/
   vp9_clear_system_state();
   return ret;
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index d6110c4..d6cb507 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -53,8 +53,6 @@
   int inv_tile_order;
 } VP9Decoder;
 
-void vp9_initialize_dec();
-
 int vp9_receive_compressed_data(struct VP9Decoder *pbi,
                                 size_t size, const uint8_t **dest,
                                 int64_t time_stamp);
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 5fe5ed7..bc6c418 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -89,7 +89,8 @@
 
 // Implement row loopfiltering for each thread.
 static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
-                                VP9_COMMON *const cm, MACROBLOCKD *const xd,
+                                VP9_COMMON *const cm,
+                                struct macroblockd_plane planes[MAX_MB_PLANE],
                                 int start, int stop, int y_only,
                                 VP9LfSync *const lf_sync, int num_lf_workers) {
   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
@@ -107,11 +108,11 @@
 
       sync_read(lf_sync, r, c);
 
-      vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
+      vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
       vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
 
       for (plane = 0; plane < num_planes; ++plane) {
-        vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+        vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);
       }
 
       sync_write(lf_sync, r, c, sb_cols);
@@ -124,7 +125,7 @@
   TileWorkerData *const tile_data = (TileWorkerData*)arg1;
   LFWorkerData *const lf_data = &tile_data->lfdata;
 
-  loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
+  loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
                       lf_data->start, lf_data->stop, lf_data->y_only,
                       lf_data->lf_sync, lf_data->num_lf_workers);
   return 1;
@@ -135,7 +136,7 @@
 void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
                               VP9Decoder *pbi, VP9_COMMON *cm,
                               int frame_filter_level,
-                              int y_only, int partial_frame) {
+                              int y_only) {
   VP9LfSync *const lf_sync = &pbi->lf_row_sync;
   // Number of superblock rows and cols
   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
@@ -186,7 +187,7 @@
     // Loopfilter data
     lf_data->frame_buffer = frame;
     lf_data->cm = cm;
-    lf_data->xd = pbi->mb;
+    vp9_copy(lf_data->planes, pbi->mb.plane);
     lf_data->start = i;
     lf_data->stop = sb_rows;
     lf_data->y_only = y_only;   // always do all planes in decoder
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
index c3b7a29..a727e2a 100644
--- a/vp9/decoder/vp9_dthread.h
+++ b/vp9/decoder/vp9_dthread.h
@@ -52,6 +52,6 @@
                               struct VP9Decoder *pbi,
                               struct VP9Common *cm,
                               int frame_filter_level,
-                              int y_only, int partial_frame);
+                              int y_only);
 
 #endif  // VP9_DECODER_VP9_DTHREAD_H_
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 2ccf4f8..0504a49 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -20,43 +20,6 @@
 extern "C" {
 #endif
 
-// Structure to hold snapshot of coding context during the mode picking process
-typedef struct {
-  MODE_INFO mic;
-  uint8_t *zcoeff_blk;
-  int16_t *coeff[MAX_MB_PLANE][3];
-  int16_t *qcoeff[MAX_MB_PLANE][3];
-  int16_t *dqcoeff[MAX_MB_PLANE][3];
-  uint16_t *eobs[MAX_MB_PLANE][3];
-
-  // dual buffer pointers, 0: in use, 1: best in store
-  int16_t *coeff_pbuf[MAX_MB_PLANE][3];
-  int16_t *qcoeff_pbuf[MAX_MB_PLANE][3];
-  int16_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
-  uint16_t *eobs_pbuf[MAX_MB_PLANE][3];
-
-  int is_coded;
-  int num_4x4_blk;
-  int skip;
-  int_mv best_ref_mv[2];
-  int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
-  int rate;
-  int distortion;
-  int best_mode_index;
-  int rddiv;
-  int rdmult;
-  int hybrid_pred_diff;
-  int comp_pred_diff;
-  int single_pred_diff;
-  int64_t tx_rd_diff[TX_MODES];
-  int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
-
-  // motion vector cache for adaptive motion search control in partition
-  // search loop
-  int_mv pred_mv[MAX_REF_FRAMES];
-  INTERP_FILTER pred_interp_filter;
-} PICK_MODE_CONTEXT;
-
 struct macroblock_plane {
   DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);
   int16_t *qcoeff;
@@ -73,18 +36,6 @@
   // Zbin Over Quant value
   int16_t zbin_extra;
 };
-typedef struct PC_TREE {
-  int index;
-  PARTITION_TYPE partitioning;
-  BLOCK_SIZE block_size;
-  PICK_MODE_CONTEXT none;
-  PICK_MODE_CONTEXT horizontal[2];
-  PICK_MODE_CONTEXT vertical[2];
-  union {
-    struct PC_TREE *split[4];
-    PICK_MODE_CONTEXT *leaf_split[4];
-  };
-} PC_TREE;
 
 /* The [2] dimension is for whether we skip the EOB node (i.e. if previous
  * coefficient in this block was zero) or not. */
@@ -157,15 +108,9 @@
   // Used to store sub partition's choices.
   int_mv pred_mv[MAX_REF_FRAMES];
 
-  PICK_MODE_CONTEXT *leaf_tree;
-  PC_TREE *pc_tree;
-  PC_TREE *pc_root;
-  int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
-
   void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride);
 };
 
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_context_tree.c b/vp9/encoder/vp9_context_tree.c
index ac9b562..9b7a932 100644
--- a/vp9/encoder/vp9_context_tree.c
+++ b/vp9/encoder/vp9_context_tree.c
@@ -9,6 +9,7 @@
  */
 
 #include "vp9/encoder/vp9_context_tree.h"
+#include "vp9/encoder/vp9_encoder.h"
 
 static const BLOCK_SIZE square[] = {
   BLOCK_8X8,
@@ -86,7 +87,7 @@
 // partition level. There are contexts for none, horizontal, vertical, and
 // split.  Along with a block_size value and a selected block_size which
 // represents the state of our search.
-void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x) {
+void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) {
   int i, j;
   const int leaf_nodes = 64;
   const int tree_nodes = 64 + 16 + 4 + 1;
@@ -96,23 +97,24 @@
   int square_index = 1;
   int nodes;
 
-  vpx_free(x->leaf_tree);
-  CHECK_MEM_ERROR(cm, x->leaf_tree, vpx_calloc(leaf_nodes,
-                                               sizeof(*x->leaf_tree)));
-  vpx_free(x->pc_tree);
-  CHECK_MEM_ERROR(cm, x->pc_tree, vpx_calloc(tree_nodes, sizeof(*x->pc_tree)));
+  vpx_free(cpi->leaf_tree);
+  CHECK_MEM_ERROR(cm, cpi->leaf_tree, vpx_calloc(leaf_nodes,
+                                                 sizeof(*cpi->leaf_tree)));
+  vpx_free(cpi->pc_tree);
+  CHECK_MEM_ERROR(cm, cpi->pc_tree, vpx_calloc(tree_nodes,
+                                               sizeof(*cpi->pc_tree)));
 
-  this_pc = &x->pc_tree[0];
-  this_leaf = &x->leaf_tree[0];
+  this_pc = &cpi->pc_tree[0];
+  this_leaf = &cpi->leaf_tree[0];
 
   // 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same
   // context so we only need to allocate 1 for each 8x8 block.
   for (i = 0; i < leaf_nodes; ++i)
-    alloc_mode_context(cm, 1, &x->leaf_tree[i]);
+    alloc_mode_context(cm, 1, &cpi->leaf_tree[i]);
 
   // Sets up all the leaf nodes in the tree.
   for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
-    PC_TREE *const tree = &x->pc_tree[pc_tree_index];
+    PC_TREE *const tree = &cpi->pc_tree[pc_tree_index];
     tree->block_size = square[0];
     alloc_tree_contexts(cm, tree, 4);
     tree->leaf_split[0] = this_leaf++;
@@ -124,7 +126,7 @@
   // from leafs to the root.
   for (nodes = 16; nodes > 0; nodes >>= 2) {
     for (i = 0; i < nodes; ++i) {
-      PC_TREE *const tree = &x->pc_tree[pc_tree_index];
+      PC_TREE *const tree = &cpi->pc_tree[pc_tree_index];
       alloc_tree_contexts(cm, tree, 4 << (2 * square_index));
       tree->block_size = square[square_index];
       for (j = 0; j < 4; j++)
@@ -133,24 +135,24 @@
     }
     ++square_index;
   }
-  x->pc_root = &x->pc_tree[tree_nodes - 1];
-  x->pc_root[0].none.best_mode_index = 2;
+  cpi->pc_root = &cpi->pc_tree[tree_nodes - 1];
+  cpi->pc_root[0].none.best_mode_index = 2;
 }
 
-void vp9_free_pc_tree(MACROBLOCK *x) {
+void vp9_free_pc_tree(VP9_COMP *cpi) {
   const int tree_nodes = 64 + 16 + 4 + 1;
   int i;
 
   // Set up all 4x4 mode contexts
   for (i = 0; i < 64; ++i)
-    free_mode_context(&x->leaf_tree[i]);
+    free_mode_context(&cpi->leaf_tree[i]);
 
   // Sets up all the leaf nodes in the tree.
   for (i = 0; i < tree_nodes; ++i)
-    free_tree_contexts(&x->pc_tree[i]);
+    free_tree_contexts(&cpi->pc_tree[i]);
 
-  vpx_free(x->pc_tree);
-  x->pc_tree = NULL;
-  vpx_free(x->leaf_tree);
-  x->leaf_tree = NULL;
+  vpx_free(cpi->pc_tree);
+  cpi->pc_tree = NULL;
+  vpx_free(cpi->leaf_tree);
+  cpi->leaf_tree = NULL;
 }
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
index 66a6f00..af4358b 100644
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -11,9 +11,55 @@
 #ifndef VP9_ENCODER_VP9_CONTEXT_TREE_H_
 #define VP9_ENCODER_VP9_CONTEXT_TREE_H_
 
-#include "vp9/encoder/vp9_encoder.h"
+#include "vp9/common/vp9_onyxc_int.h"
 
-void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x);
-void vp9_free_pc_tree(MACROBLOCK *x);
+struct VP9_COMP;
+
+// Structure to hold snapshot of coding context during the mode picking process
+typedef struct {
+  MODE_INFO mic;
+  uint8_t *zcoeff_blk;
+  int16_t *coeff[MAX_MB_PLANE][3];
+  int16_t *qcoeff[MAX_MB_PLANE][3];
+  int16_t *dqcoeff[MAX_MB_PLANE][3];
+  uint16_t *eobs[MAX_MB_PLANE][3];
+
+  // dual buffer pointers, 0: in use, 1: best in store
+  int16_t *coeff_pbuf[MAX_MB_PLANE][3];
+  int16_t *qcoeff_pbuf[MAX_MB_PLANE][3];
+  int16_t *dqcoeff_pbuf[MAX_MB_PLANE][3];
+  uint16_t *eobs_pbuf[MAX_MB_PLANE][3];
+
+  int is_coded;
+  int num_4x4_blk;
+  int skip;
+  int best_mode_index;
+  int hybrid_pred_diff;
+  int comp_pred_diff;
+  int single_pred_diff;
+  int64_t tx_rd_diff[TX_MODES];
+  int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
+
+  // motion vector cache for adaptive motion search control in partition
+  // search loop
+  int_mv pred_mv[MAX_REF_FRAMES];
+  INTERP_FILTER pred_interp_filter;
+} PICK_MODE_CONTEXT;
+
+typedef struct PC_TREE {
+  int index;
+  PARTITION_TYPE partitioning;
+  BLOCK_SIZE block_size;
+  PICK_MODE_CONTEXT none;
+  PICK_MODE_CONTEXT horizontal[2];
+  PICK_MODE_CONTEXT vertical[2];
+  union {
+    struct PC_TREE *split[4];
+    PICK_MODE_CONTEXT *leaf_split[4];
+  };
+} PC_TREE;
+
+void vp9_setup_pc_tree(struct VP9Common *cm, struct VP9_COMP *cpi);
+void vp9_free_pc_tree(struct VP9_COMP *cpi);
 
 #endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index fba9465..f7cb05b 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -201,7 +201,7 @@
   mbmi = &xd->mi[0]->mbmi;
 
   // Set up destination pointers.
-  vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col);
+  vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
 
   // Set up limit values for MV components.
   // Mv beyond the range do not produce new/different prediction block.
@@ -1538,7 +1538,7 @@
       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
 
       if (none_rate < INT_MAX) {
-        none_rate += x->partition_cost[pl][PARTITION_NONE];
+        none_rate += cpi->partition_cost[pl][PARTITION_NONE];
         none_rd = RDCOST(x->rdmult, x->rddiv, none_rate, none_dist);
       }
 
@@ -1636,7 +1636,7 @@
 
   pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   if (last_part_rate < INT_MAX) {
-    last_part_rate += x->partition_cost[pl][partition];
+    last_part_rate += cpi->partition_cost[pl][partition];
     last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist);
   }
 
@@ -1689,11 +1689,11 @@
 
       pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
                                    split_subsize);
-      chosen_rate += x->partition_cost[pl][PARTITION_NONE];
+      chosen_rate += cpi->partition_cost[pl][PARTITION_NONE];
     }
     pl = partition_plane_context(xd, mi_row, mi_col, bsize);
     if (chosen_rate < INT_MAX) {
-      chosen_rate += x->partition_cost[pl][PARTITION_SPLIT];
+      chosen_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
       chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist);
     }
   }
@@ -1805,15 +1805,11 @@
                                     BLOCK_SIZE *max_block_size) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-  MODE_INFO **mi_8x8 = xd->mi;
-  const int left_in_image = xd->left_available && mi_8x8[-1];
-  const int above_in_image = xd->up_available &&
-                             mi_8x8[-xd->mi_stride];
-  MODE_INFO **above_sb64_mi_8x8;
-  MODE_INFO **left_sb64_mi_8x8;
-
-  int row8x8_remaining = tile->mi_row_end - mi_row;
-  int col8x8_remaining = tile->mi_col_end - mi_col;
+  MODE_INFO **mi = xd->mi;
+  const int left_in_image = xd->left_available && mi[-1];
+  const int above_in_image = xd->up_available && mi[-xd->mi_stride];
+  const int row8x8_remaining = tile->mi_row_end - mi_row;
+  const int col8x8_remaining = tile->mi_col_end - mi_col;
   int bh, bw;
   BLOCK_SIZE min_size = BLOCK_4X4;
   BLOCK_SIZE max_size = BLOCK_64X64;
@@ -1833,15 +1829,13 @@
     }
     // Find the min and max partition sizes used in the left SB64
     if (left_in_image) {
-      left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE];
-      get_sb_partition_size_range(cpi, left_sb64_mi_8x8,
-                                  &min_size, &max_size);
+      MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
+      get_sb_partition_size_range(cpi, left_sb64_mi, &min_size, &max_size);
     }
     // Find the min and max partition sizes used in the above SB64.
     if (above_in_image) {
-      above_sb64_mi_8x8 = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE];
-      get_sb_partition_size_range(cpi, above_sb64_mi_8x8,
-                                  &min_size, &max_size);
+      MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
+      get_sb_partition_size_range(cpi, above_sb64_mi, &min_size, &max_size);
     }
     // adjust observed min and max
     if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
@@ -2021,7 +2015,7 @@
     if (this_rate != INT_MAX) {
       if (bsize >= BLOCK_8X8) {
         pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-        this_rate += x->partition_cost[pl][PARTITION_NONE];
+        this_rate += cpi->partition_cost[pl][PARTITION_NONE];
       }
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
       if (sum_rd < best_rd) {
@@ -2109,7 +2103,7 @@
 
     if (sum_rd < best_rd && i == 4) {
       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-      sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
+      sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
         best_rate = sum_rate;
@@ -2163,7 +2157,7 @@
     }
     if (sum_rd < best_rd) {
       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-      sum_rate += x->partition_cost[pl][PARTITION_HORZ];
+      sum_rate += cpi->partition_cost[pl][PARTITION_HORZ];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
         best_rd = sum_rd;
@@ -2212,7 +2206,7 @@
     }
     if (sum_rd < best_rd) {
       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-      sum_rate += x->partition_cost[pl][PARTITION_VERT];
+      sum_rate += cpi->partition_cost[pl][PARTITION_VERT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
         best_rate = sum_rate;
@@ -2274,17 +2268,16 @@
     int64_t dummy_dist;
 
     int i;
-    MACROBLOCK *x = &cpi->mb;
 
     if (sf->adaptive_pred_interp_filter) {
       for (i = 0; i < 64; ++i)
-        x->leaf_tree[i].pred_interp_filter = SWITCHABLE;
+        cpi->leaf_tree[i].pred_interp_filter = SWITCHABLE;
 
       for (i = 0; i < 64; ++i) {
-        x->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
-        x->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
-        x->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
-        x->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
+        cpi->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
+        cpi->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
+        cpi->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
+        cpi->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
       }
     }
 
@@ -2296,26 +2289,26 @@
          sf->partition_search_type == VAR_BASED_PARTITION ||
          sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
       const int idx_str = cm->mi_stride * mi_row + mi_col;
-      MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
-      MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
+      MODE_INFO **mi = cm->mi_grid_visible + idx_str;
+      MODE_INFO **prev_mi = cm->prev_mi_grid_visible + idx_str;
       cpi->mb.source_variance = UINT_MAX;
       if (sf->partition_search_type == FIXED_PARTITION) {
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
-        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+        set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col,
                                sf->always_this_block_size);
-        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1, x->pc_root);
+        rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                         &dummy_rate, &dummy_dist, 1, cpi->pc_root);
       } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
         BLOCK_SIZE bsize;
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
-        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
-        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1, x->pc_root);
+        set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
+        rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                         &dummy_rate, &dummy_dist, 1, cpi->pc_root);
       } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
         choose_partitioning(cpi, tile, mi_row, mi_col);
-        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1, x->pc_root);
+        rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                         &dummy_rate, &dummy_dist, 1, cpi->pc_root);
       } else {
         if ((cm->current_video_frame
             % sf->last_partitioning_redo_frequency) == 0
@@ -2325,7 +2318,7 @@
             || cpi->rc.is_src_frame_alt_ref
             || ((sf->use_lastframe_partitioning ==
                  LAST_FRAME_PARTITION_LOW_MOTION) &&
-                 sb_has_motion(cm, prev_mi_8x8))) {
+                 sb_has_motion(cm, prev_mi))) {
           // If required set upper and lower partition size limits
           if (sf->auto_min_max_partition_size) {
             set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
@@ -2334,16 +2327,17 @@
                                     &sf->max_partition_size);
           }
           rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
-                            &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root);
+                            &dummy_rate, &dummy_dist, 1, INT64_MAX,
+                            cpi->pc_root);
         } else {
           if (sf->constrain_copy_partition &&
-              sb_has_motion(cm, prev_mi_8x8))
-            constrain_copy_partitioning(cpi, tile, mi_8x8, prev_mi_8x8,
+              sb_has_motion(cm, prev_mi))
+            constrain_copy_partitioning(cpi, tile, mi, prev_mi,
                                         mi_row, mi_col, BLOCK_16X16);
           else
-            copy_partitioning(cm, mi_8x8, prev_mi_8x8);
-          rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                           &dummy_rate, &dummy_dist, 1, x->pc_root);
+            copy_partitioning(cm, mi, prev_mi);
+          rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                           &dummy_rate, &dummy_dist, 1, cpi->pc_root);
         }
       }
     } else {
@@ -2355,7 +2349,7 @@
                                 &sf->max_partition_size);
       }
       rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
-                        &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root);
+                        &dummy_rate, &dummy_dist, 1, INT64_MAX, cpi->pc_root);
     }
   }
 }
@@ -2633,7 +2627,7 @@
 
     if (this_rate != INT_MAX) {
       int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-      this_rate += x->partition_cost[pl][PARTITION_NONE];
+      this_rate += cpi->partition_cost[pl][PARTITION_NONE];
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
       if (sum_rd < best_rd) {
         int64_t stop_thresh = 4096;
@@ -2671,7 +2665,7 @@
   sum_rd = 0;
   if (do_split) {
     int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-    sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
+    sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
     subsize = get_subsize(bsize, PARTITION_SPLIT);
     for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
       const int x_idx = (i & 1) * ms;
@@ -2730,7 +2724,7 @@
         sum_rd = INT64_MAX;
       } else {
         int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-        this_rate += x->partition_cost[pl][PARTITION_HORZ];
+        this_rate += cpi->partition_cost[pl][PARTITION_HORZ];
         sum_rate += this_rate;
         sum_dist += this_dist;
         sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
@@ -2764,7 +2758,7 @@
         sum_rd = INT64_MAX;
       } else {
         int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-        this_rate += x->partition_cost[pl][PARTITION_VERT];
+        this_rate += cpi->partition_cost[pl][PARTITION_VERT];
         sum_rate += this_rate;
         sum_dist += this_dist;
         sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
@@ -2822,7 +2816,7 @@
 
 static void nonrd_use_partition(VP9_COMP *cpi,
                                 const TileInfo *const tile,
-                                MODE_INFO **mi_8x8,
+                                MODE_INFO **mi,
                                 TOKENEXTRA **tp,
                                 int mi_row, int mi_col,
                                 BLOCK_SIZE bsize, int output_enabled,
@@ -2841,7 +2835,7 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  subsize = (bsize >= BLOCK_8X8) ? mi_8x8[0]->mbmi.sb_type : BLOCK_4X4;
+  subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4;
   partition = partition_lookup[bsl][subsize];
 
   switch (partition) {
@@ -2869,7 +2863,7 @@
       if (mi_row + hbs < cm->mi_rows) {
         nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
                             &rate, &dist, subsize);
-        pc_tree->horizontal[1].mic.mbmi = mi_8x8[0]->mbmi;
+        pc_tree->horizontal[1].mic.mbmi = mi[0]->mbmi;
         if (rate != INT_MAX && dist != INT64_MAX &&
             *totrate != INT_MAX && *totdist != INT64_MAX) {
           *totrate += rate;
@@ -2879,10 +2873,10 @@
       break;
     case PARTITION_SPLIT:
       subsize = get_subsize(bsize, PARTITION_SPLIT);
-      nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+      nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
                           subsize, output_enabled, totrate, totdist,
                           pc_tree->split[0]);
-      nonrd_use_partition(cpi, tile, mi_8x8 + hbs, tp,
+      nonrd_use_partition(cpi, tile, mi + hbs, tp,
                           mi_row, mi_col + hbs, subsize, output_enabled,
                           &rate, &dist, pc_tree->split[1]);
       if (rate != INT_MAX && dist != INT64_MAX &&
@@ -2890,7 +2884,7 @@
         *totrate += rate;
         *totdist += dist;
       }
-      nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis, tp,
+      nonrd_use_partition(cpi, tile, mi + hbs * mis, tp,
                           mi_row + hbs, mi_col, subsize, output_enabled,
                           &rate, &dist, pc_tree->split[2]);
       if (rate != INT_MAX && dist != INT64_MAX &&
@@ -2898,7 +2892,7 @@
         *totrate += rate;
         *totdist += dist;
       }
-      nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis + hbs, tp,
+      nonrd_use_partition(cpi, tile, mi + hbs * mis + hbs, tp,
                           mi_row + hbs, mi_col + hbs, subsize, output_enabled,
                           &rate, &dist, pc_tree->split[3]);
       if (rate != INT_MAX && dist != INT64_MAX &&
@@ -2937,8 +2931,8 @@
     int dummy_rate = 0;
     int64_t dummy_dist = 0;
     const int idx_str = cm->mi_stride * mi_row + mi_col;
-    MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
-    MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
+    MODE_INFO **mi = cm->mi_grid_visible + idx_str;
+    MODE_INFO **prev_mi = cm->prev_mi_grid_visible + idx_str;
     BLOCK_SIZE bsize;
 
     x->in_static_area = 0;
@@ -2949,22 +2943,22 @@
     switch (cpi->sf.partition_search_type) {
       case VAR_BASED_PARTITION:
         choose_partitioning(cpi, tile, mi_row, mi_col);
-        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            1, &dummy_rate, &dummy_dist, x->pc_root);
+        nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                            1, &dummy_rate, &dummy_dist, cpi->pc_root);
         break;
       case SOURCE_VAR_BASED_PARTITION:
-        set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
-        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            1, &dummy_rate, &dummy_dist, x->pc_root);
+        set_source_var_based_partition(cpi, tile, mi, mi_row, mi_col);
+        nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                            1, &dummy_rate, &dummy_dist, cpi->pc_root);
         break;
       case VAR_BASED_FIXED_PARTITION:
       case FIXED_PARTITION:
         bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
                 cpi->sf.always_this_block_size :
                 get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
-        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
-        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                            1, &dummy_rate, &dummy_dist, x->pc_root);
+        set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
+        nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                            1, &dummy_rate, &dummy_dist, cpi->pc_root);
         break;
       case REFERENCE_PARTITION:
         if (cpi->sf.partition_check ||
@@ -2975,12 +2969,12 @@
                                &cpi->sf.max_partition_size);
           nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
                                &dummy_rate, &dummy_dist, 1, INT64_MAX,
-                               x->pc_root);
+                               cpi->pc_root);
         } else {
-          copy_partitioning(cm, mi_8x8, prev_mi_8x8);
-          nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+          copy_partitioning(cm, mi, prev_mi);
+          nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
                               BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
-                              x->pc_root);
+                              cpi->pc_root);
         }
         break;
       default:
@@ -3043,7 +3037,7 @@
     int i;
     struct macroblock_plane *const p = x->plane;
     struct macroblockd_plane *const pd = xd->plane;
-    PICK_MODE_CONTEXT *ctx = &x->pc_root->none;
+    PICK_MODE_CONTEXT *ctx = &cpi->pc_root->none;
 
     for (i = 0; i < MAX_MB_PLANE; ++i) {
       p[i].coeff = ctx->coeff_pbuf[i][0];
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 911ce7c..1f68f03 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -187,7 +187,7 @@
   vpx_free(cpi->tok);
   cpi->tok = 0;
 
-  vp9_free_pc_tree(&cpi->mb);
+  vp9_free_pc_tree(cpi);
 
   for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
@@ -455,7 +455,7 @@
     CHECK_MEM_ERROR(cm, cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
   }
 
-  vp9_setup_pc_tree(&cpi->common, &cpi->mb);
+  vp9_setup_pc_tree(&cpi->common, cpi);
 }
 
 static void update_frame_size(VP9_COMP *cpi) {
@@ -1414,6 +1414,7 @@
                             dst->alpha_buffer};
   const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride,
                               dst->alpha_stride};
+  const InterpKernel *const kernel = vp9_get_interp_kernel(EIGHTTAP);
   int x, y, i;
 
   for (y = 0; y < dst_h; y += 16) {
@@ -1429,8 +1430,8 @@
         uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
 
         vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
-                      vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * src_w / dst_w,
-                      vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * src_h / dst_h,
+                      kernel[x_q4 & 0xf], 16 * src_w / dst_w,
+                      kernel[y_q4 & 0xf], 16 * src_h / dst_h,
                       16 / factor, 16 / factor);
       }
     }
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 17c826f..e898113 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -24,6 +24,7 @@
 #include "vp9/common/vp9_onyxc_int.h"
 
 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
+#include "vp9/encoder/vp9_context_tree.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_lookahead.h"
@@ -410,8 +411,8 @@
   // Default value is 1. From first pass stats, encode_breakout may be disabled.
   ENCODE_BREAKOUT_TYPE allow_encode_breakout;
 
-  // Get threshold from external input. In real time mode, it can be
-  // overwritten according to encoding speed.
+  // Get threshold from external input. A suggested threshold is 800 for HD
+  // clips, and 300 for < HD clips.
   int encode_breakout;
 
   unsigned char *segmentation_map;
@@ -502,6 +503,11 @@
   int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
   int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
 
+  PICK_MODE_CONTEXT *leaf_tree;
+  PC_TREE *pc_tree;
+  PC_TREE *pc_root;
+  int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
+
 #if CONFIG_MULTIPLE_ARF
   // ARF tracking variables.
   int multi_arf_enabled;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index c6b6197..04f03e2 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -61,6 +61,7 @@
 #define MIN_GF_INTERVAL             4
 #endif
 
+
 // #define LONG_TERM_VBR_CORRECTION
 
 static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
@@ -475,7 +476,7 @@
   TileInfo tile;
   struct macroblock_plane *const p = x->plane;
   struct macroblockd_plane *const pd = xd->plane;
-  const PICK_MODE_CONTEXT *ctx = &x->pc_root->none;
+  const PICK_MODE_CONTEXT *ctx = &cpi->pc_root->none;
   int i;
 
   int recon_yoffset, recon_uvoffset;
@@ -540,7 +541,7 @@
 
   vp9_setup_src_planes(x, cpi->Source, 0, 0);
   vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
-  vp9_setup_dst_planes(xd, new_yv12, 0, 0);
+  vp9_setup_dst_planes(xd->plane, new_yv12, 0, 0);
 
   xd->mi = cm->mi_grid_visible;
   xd->mi[0] = cm->mi;
@@ -1007,25 +1008,6 @@
   // This variable monitors how far behind the second ref update is lagging.
   twopass->sr_update_lag = 1;
 
-  // Scan the first pass file and calculate an average Intra / Inter error
-  // score ratio for the sequence.
-  {
-    const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
-    FIRSTPASS_STATS this_frame;
-    double sum_iiratio = 0.0;
-
-    while (input_stats(twopass, &this_frame) != EOF) {
-      const double iiratio = this_frame.intra_error /
-                                 DOUBLE_DIVIDE_CHECK(this_frame.coded_error);
-      sum_iiratio += fclamp(iiratio, 1.0, 20.0);
-    }
-
-    twopass->avg_iiratio = sum_iiratio /
-                               DOUBLE_DIVIDE_CHECK((double)stats->count);
-
-    reset_fpf_position(twopass, start_pos);
-  }
-
   // Scan the first pass file and calculate a modified total error based upon
   // the bias/power function used to allocate bits.
   {
@@ -1421,10 +1403,13 @@
 static void calculate_section_intra_ratio(struct twopass_rc *twopass,
                                           const FIRSTPASS_STATS *start_pos,
                                           int section_length) {
-  FIRSTPASS_STATS next_frame = { 0 };
-  FIRSTPASS_STATS sectionstats = { 0 };
+  FIRSTPASS_STATS next_frame;
+  FIRSTPASS_STATS sectionstats;
   int i;
 
+  vp9_zero(next_frame);
+  vp9_zero(sectionstats);
+
   reset_fpf_position(twopass, start_pos);
 
   for (i = 0; i < section_length; ++i) {
@@ -1497,7 +1482,7 @@
   RATE_CONTROL *const rc = &cpi->rc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   struct twopass_rc *const twopass = &cpi->twopass;
-  FIRSTPASS_STATS next_frame = { 0 };
+  FIRSTPASS_STATS next_frame;
   const FIRSTPASS_STATS *start_pos;
   int i;
   double boost_score = 0.0;
@@ -1524,10 +1509,10 @@
   int flash_detected;
   int active_max_gf_interval;
 
-  twopass->gf_group_bits = 0;
-
   vp9_clear_system_state();
+  vp9_zero(next_frame);
 
+  twopass->gf_group_bits = 0;
   start_pos = twopass->stats_in;
 
   // Load stats for the current frame.
@@ -2145,12 +2130,10 @@
   FIRSTPASS_STATS this_frame;
   FIRSTPASS_STATS this_frame_copy;
 
-  double this_frame_intra_error;
-  double this_frame_coded_error;
   int target;
   LAYER_CONTEXT *lc = NULL;
-  int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1);
-
+  const int is_spatial_svc = (cpi->use_svc &&
+                              cpi->svc.number_temporal_layers == 1);
   if (is_spatial_svc) {
     lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
     frames_left = (int)(twopass->total_stats.count -
@@ -2200,9 +2183,6 @@
   if (EOF == input_stats(twopass, &this_frame))
     return;
 
-  this_frame_intra_error = this_frame.intra_error;
-  this_frame_coded_error = this_frame.coded_error;
-
   // Keyframe and section processing.
   if (rc->frames_to_key == 0 ||
       (cpi->frame_flags & FRAMEFLAGS_KEY)) {
@@ -2210,14 +2190,14 @@
     this_frame_copy = this_frame;
     find_next_key_frame(cpi, &this_frame_copy);
     // Don't place key frame in any enhancement layers in spatial svc
-    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    if (is_spatial_svc) {
       lc->is_key_frame = 1;
       if (cpi->svc.spatial_layer_id > 0) {
         cm->frame_type = INTER_FRAME;
       }
     }
   } else {
-    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    if (is_spatial_svc) {
       lc->is_key_frame = 0;
     }
     cm->frame_type = INTER_FRAME;
@@ -2257,9 +2237,6 @@
     assign_std_frame_bits(cpi, &this_frame_copy);
   }
 
-  // Keep a globally available copy of this and the next frame's iiratio.
-  twopass->this_iiratio = (int)(this_frame_intra_error /
-                              DOUBLE_DIVIDE_CHECK(this_frame_coded_error));
   {
     FIRSTPASS_STATS next_frame;
     if (lookup_next_frame_stats(twopass, &next_frame) != EOF) {
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index f7ba423..02a3d1f 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -41,7 +41,6 @@
 struct twopass_rc {
   unsigned int section_intra_rating;
   unsigned int next_iiratio;
-  unsigned int this_iiratio;
   FIRSTPASS_STATS total_stats;
   FIRSTPASS_STATS this_frame_stats;
   const FIRSTPASS_STATS *stats_in;
@@ -50,8 +49,6 @@
   FIRSTPASS_STATS total_left_stats;
   int first_pass_done;
   int64_t bits_left;
-  int64_t clip_bits_total;
-  double avg_iiratio;
   double modified_error_min;
   double modified_error_max;
   double modified_error_total;
@@ -72,7 +69,6 @@
   int64_t gf_group_bits;
   // Bits for the golden frame or ARF - 2 pass only
   int gf_bits;
-  int alt_extra_bits;
 
   int sr_update_lag;
 
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 5e87d28..041e583 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -235,7 +235,7 @@
 
   int mb_col, mb_row, offset = 0;
   int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
-  MV arf_top_mv = {0, 0}, gld_top_mv = {0, 0};
+  MV gld_top_mv = {0, 0};
   MODE_INFO mi_local;
 
   vp9_zero(mi_local);
@@ -253,7 +253,7 @@
   mi_local.mbmi.ref_frame[1] = NONE;
 
   for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
-    MV arf_left_mv = arf_top_mv, gld_left_mv = gld_top_mv;
+    MV gld_left_mv = gld_top_mv;
     int mb_y_in_offset  = mb_y_offset;
     int arf_y_in_offset = arf_y_offset;
     int gld_y_in_offset = gld_y_offset;
@@ -270,10 +270,8 @@
       update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
                               golden_ref, &gld_left_mv, alt_ref,
                               mb_row, mb_col);
-      arf_left_mv = mb_stats->ref[ALTREF_FRAME].m.mv.as_mv;
       gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
       if (mb_col == 0) {
-        arf_top_mv = arf_left_mv;
         gld_top_mv = gld_left_mv;
       }
       xd->left_available = 1;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 78fba73..3877e66 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -31,7 +31,7 @@
                                     int_mv *tmp_mv, int *rate_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
+  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
   int step_param;
   int sadpb = x->sadperbit16;
   MV mvp_full;
@@ -110,7 +110,7 @@
                                     MV *tmp_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
+  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
   int ref = mbmi->ref_frame[0];
   MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
   int dis;
@@ -148,7 +148,8 @@
 
 static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
                               MACROBLOCK *x, MACROBLOCKD *xd,
-                              int *out_rate_sum, int64_t *out_dist_sum) {
+                              int *out_rate_sum, int64_t *out_dist_sum,
+                              unsigned int *var_y, unsigned int *sse_y) {
   // Note our transform coeffs are 8 times an orthogonal transform.
   // Hence quantizer step is also 8 times. To get effective quantizer
   // we need to divide by 8 before sending to modeling function.
@@ -162,6 +163,9 @@
   unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
                                            pd->dst.buf, pd->dst.stride, &sse);
 
+  *var_y = var;
+  *sse_y = sse;
+
   // TODO(jingning) This is a temporary solution to account for frames with
   // light changes. Need to customize the rate-distortion modeling for non-RD
   // mode decision.
@@ -198,6 +202,9 @@
 
   int rate = INT_MAX;
   int64_t dist = INT64_MAX;
+  // var_y and sse_y are saved to be used in skipping checking
+  unsigned int var_y = UINT_MAX;
+  unsigned int sse_y = UINT_MAX;
 
   VP9_COMMON *cm = &cpi->common;
   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
@@ -219,8 +226,7 @@
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
   x->skip = 0;
-  if (!x->in_active_map)
-    x->skip = 1;
+
   // initialize mode decisions
   *returnrate = INT_MAX;
   *returndistortion = INT64_MAX;
@@ -318,54 +324,37 @@
           pred_filter_search &&
           ((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
            (mbmi->mv[0].as_mv.col & 0x07) != 0)) {
-        int64_t tmp_rdcost1 = INT64_MAX;
-        int64_t tmp_rdcost2 = INT64_MAX;
-        int64_t tmp_rdcost3 = INT64_MAX;
         int pf_rate[3];
         int64_t pf_dist[3];
+        unsigned int pf_var[3];
+        unsigned int pf_sse[3];
+        int64_t best_cost = INT64_MAX;
+        INTERP_FILTER best_filter = SWITCHABLE, filter;
 
-        mbmi->interp_filter = EIGHTTAP;
-        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP],
-                          &pf_dist[EIGHTTAP]);
-        tmp_rdcost1 = RDCOST(x->rdmult, x->rddiv,
-                             vp9_get_switchable_rate(cpi) + pf_rate[EIGHTTAP],
-                             pf_dist[EIGHTTAP]);
-
-        mbmi->interp_filter = EIGHTTAP_SHARP;
-        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SHARP],
-                          &pf_dist[EIGHTTAP_SHARP]);
-        tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv, vp9_get_switchable_rate(cpi) +
-                                 pf_rate[EIGHTTAP_SHARP],
-                             pf_dist[EIGHTTAP_SHARP]);
-
-        mbmi->interp_filter = EIGHTTAP_SMOOTH;
-        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SMOOTH],
-                          &pf_dist[EIGHTTAP_SMOOTH]);
-        tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv, vp9_get_switchable_rate(cpi) +
-                                 pf_rate[EIGHTTAP_SMOOTH],
-                             pf_dist[EIGHTTAP_SMOOTH]);
-
-        if (tmp_rdcost2 < tmp_rdcost1) {
-          if (tmp_rdcost2 < tmp_rdcost3)
-            mbmi->interp_filter = EIGHTTAP_SHARP;
-          else
-            mbmi->interp_filter = EIGHTTAP_SMOOTH;
-        } else {
-          if (tmp_rdcost1 < tmp_rdcost3)
-            mbmi->interp_filter = EIGHTTAP;
-          else
-            mbmi->interp_filter = EIGHTTAP_SMOOTH;
+        for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) {
+          int64_t cost;
+          mbmi->interp_filter = filter;
+          vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+          model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter],
+                            &pf_dist[filter], &pf_var[filter], &pf_sse[filter]);
+          cost = RDCOST(x->rdmult, x->rddiv,
+                        vp9_get_switchable_rate(cpi) + pf_rate[filter],
+                        pf_dist[filter]);
+          if (cost < best_cost) {
+              best_filter = filter;
+              best_cost = cost;
+          }
         }
 
+        mbmi->interp_filter = best_filter;
         rate = pf_rate[mbmi->interp_filter];
         dist = pf_dist[mbmi->interp_filter];
+        var_y = pf_var[mbmi->interp_filter];
+        sse_y = pf_sse[mbmi->interp_filter];
       } else {
         mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref;
         vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
       }
 
       rate += rate_mv;
@@ -373,7 +362,78 @@
                                 [INTER_OFFSET(this_mode)];
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
 
-      if (this_rd < best_rd) {
+      // Skipping checking: test to see if this block can be reconstructed by
+      // prediction only.
+      if (!x->in_active_map) {
+        x->skip = 1;
+      } else if (cpi->allow_encode_breakout && x->encode_breakout) {
+        const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
+        unsigned int var = var_y, sse = sse_y;
+        // Skipping threshold for ac.
+        unsigned int thresh_ac;
+        // Skipping threshold for dc.
+        unsigned int thresh_dc;
+        // Set a maximum for threshold to avoid big PSNR loss in low bit rate
+        // case. Use extreme low threshold for static frames to limit skipping.
+        const unsigned int max_thresh = 36000;
+        // The encode_breakout input
+        const unsigned int min_thresh =
+            MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
+
+        // Calculate threshold according to dequant value.
+        thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
+        thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
+
+        // Adjust ac threshold according to partition size.
+        thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
+            b_height_log2_lookup[bsize]);
+
+        thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
+
+        // Y skipping condition checking for ac and dc.
+        if (var <= thresh_ac && (sse - var) <= thresh_dc) {
+          unsigned int sse_u, sse_v;
+          unsigned int var_u, var_v;
+
+          // Skip u v prediction for less calculation, that won't affect
+          // result much.
+          var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
+                                          x->plane[1].src.stride,
+                                          xd->plane[1].dst.buf,
+                                          xd->plane[1].dst.stride, &sse_u);
+
+          // U skipping condition checking
+          if ((var_u * 4 <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
+            var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
+                                            x->plane[2].src.stride,
+                                            xd->plane[2].dst.buf,
+                                            xd->plane[2].dst.stride, &sse_v);
+
+            // V skipping condition checking
+            if ((var_v * 4 <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
+              x->skip = 1;
+
+              // The cost of skip bit needs to be added.
+              rate = rate_mv;
+              rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
+                                           [INTER_OFFSET(this_mode)];
+
+              // More on this part of rate
+              // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
+
+              // Scaling factor for SSE from spatial domain to frequency
+              // domain is 16. Adjust distortion accordingly.
+              // TODO(yunqingwang): In this function, only y-plane dist is
+              // calculated.
+              dist = (sse << 4);  // + ((sse_u + sse_v) << 4);
+              this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+              // *disable_skip = 1;
+            }
+          }
+        }
+      }
+
+      if (this_rd < best_rd || x->skip) {
         best_rd = this_rd;
         *returnrate = rate;
         *returndistortion = dist;
@@ -381,6 +441,9 @@
         best_pred_filter = mbmi->interp_filter;
         best_ref_frame = ref_frame;
       }
+
+      if (x->skip)
+        break;
     }
   }
 
@@ -392,14 +455,15 @@
 
   // Perform intra prediction search, if the best SAD is above a certain
   // threshold.
-  if (best_rd > inter_mode_thresh && bsize < cpi->sf.max_intra_bsize) {
+  if (!x->skip && best_rd > inter_mode_thresh &&
+      bsize < cpi->sf.max_intra_bsize) {
     for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
       vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
                               mbmi->tx_size, this_mode,
                               &p->src.buf[0], p->src.stride,
                               &pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
 
-      model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+      model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
       rate += cpi->mbmode_cost[this_mode];
       rate += intra_cost_penalty;
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 0f1e9a0..f701cf0 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -242,6 +242,31 @@
   cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
 }
 
+static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
+                           int m, int n, int min_plane, int max_plane) {
+  int i;
+
+  for (i = min_plane; i < max_plane; ++i) {
+    struct macroblock_plane *const p = &x->plane[i];
+    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
+
+    p->coeff    = ctx->coeff_pbuf[i][m];
+    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
+    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
+    p->eobs     = ctx->eobs_pbuf[i][m];
+
+    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
+    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
+    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
+    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];
+
+    ctx->coeff_pbuf[i][n]   = p->coeff;
+    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
+    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
+    ctx->eobs_pbuf[i][n]    = p->eobs;
+  }
+}
+
 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
   int i, bsize, segment_id;
 
@@ -297,7 +322,7 @@
     fill_token_costs(x->token_costs, cm->fc.coef_probs);
 
     for (i = 0; i < PARTITION_CONTEXTS; i++)
-      vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
+      vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
                       vp9_partition_tree);
   }
 
@@ -745,7 +770,8 @@
                              int use_fast_coef_casting) {
   MACROBLOCKD *const xd = &x->e_mbd;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
-  struct rdcost_block_args args = { 0 };
+  struct rdcost_block_args args;
+  vp9_zero(args);
   args.x = x;
   args.best_rd = ref_best_rd;
   args.use_fast_coef_costing = use_fast_coef_casting;
@@ -1387,27 +1413,8 @@
       *rate_tokenonly = this_rate_tokenonly;
       *distortion     = this_distortion;
       *skippable      = s;
-      if (!x->select_txfm_size) {
-        int i;
-        struct macroblock_plane *const p = x->plane;
-        struct macroblockd_plane *const pd = xd->plane;
-        for (i = 1; i < MAX_MB_PLANE; ++i) {
-          p[i].coeff    = ctx->coeff_pbuf[i][2];
-          p[i].qcoeff   = ctx->qcoeff_pbuf[i][2];
-          pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
-          p[i].eobs    = ctx->eobs_pbuf[i][2];
-
-          ctx->coeff_pbuf[i][2]   = ctx->coeff_pbuf[i][0];
-          ctx->qcoeff_pbuf[i][2]  = ctx->qcoeff_pbuf[i][0];
-          ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
-          ctx->eobs_pbuf[i][2]    = ctx->eobs_pbuf[i][0];
-
-          ctx->coeff_pbuf[i][0]   = p[i].coeff;
-          ctx->qcoeff_pbuf[i][0]  = p[i].qcoeff;
-          ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
-          ctx->eobs_pbuf[i][0]    = p[i].eobs;
-        }
-      }
+      if (!x->select_txfm_size)
+        swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
     }
   }
 
@@ -2215,10 +2222,6 @@
   ctx->skip = x->skip;
   ctx->best_mode_index = mode_index;
   ctx->mic = *xd->mi[0];
-
-  ctx->best_ref_mv[0].as_int = ref_mv->as_int;
-  ctx->best_ref_mv[1].as_int = second_ref_mv->as_int;
-
   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
   ctx->comp_pred_diff   = (int)comp_pred_diff[COMPOUND_REFERENCE];
   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
@@ -2312,7 +2315,7 @@
   MACROBLOCKD *xd = &x->e_mbd;
   const VP9_COMMON *cm = &cpi->common;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
+  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
   int bestsme = INT_MAX;
   int step_param;
   int sadpb = x->sadperbit16;
@@ -2930,30 +2933,6 @@
   return this_rd;  // if 0, this will be re-calculated by caller
 }
 
-static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
-                           int max_plane) {
-  struct macroblock_plane *const p = x->plane;
-  struct macroblockd_plane *const pd = x->e_mbd.plane;
-  int i;
-
-  for (i = 0; i < max_plane; ++i) {
-    p[i].coeff    = ctx->coeff_pbuf[i][1];
-    p[i].qcoeff  = ctx->qcoeff_pbuf[i][1];
-    pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
-    p[i].eobs    = ctx->eobs_pbuf[i][1];
-
-    ctx->coeff_pbuf[i][1]   = ctx->coeff_pbuf[i][0];
-    ctx->qcoeff_pbuf[i][1]  = ctx->qcoeff_pbuf[i][0];
-    ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
-    ctx->eobs_pbuf[i][1]    = ctx->eobs_pbuf[i][0];
-
-    ctx->coeff_pbuf[i][0]   = p[i].coeff;
-    ctx->qcoeff_pbuf[i][0]  = p[i].qcoeff;
-    ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
-    ctx->eobs_pbuf[i][0]    = p[i].eobs;
-  }
-}
-
 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                int *returnrate, int64_t *returndist,
                                BLOCK_SIZE bsize,
@@ -3068,7 +3047,7 @@
   int64_t best_pred_rd[REFERENCE_MODES];
   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
-  MB_MODE_INFO best_mbmode = { 0 };
+  MB_MODE_INFO best_mbmode;
   int mode_index, best_mode_index = -1;
   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
   vp9_prob comp_mode_p;
@@ -3094,7 +3073,7 @@
   const int intra_y_mode_mask =
       cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
   int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
-
+  vp9_zero(best_mbmode);
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
@@ -3475,7 +3454,7 @@
         best_mbmode = *mbmi;
         best_skip2 = this_skip2;
         if (!x->select_txfm_size)
-          swap_block_ptr(x, ctx, max_plane);
+          swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
                    sizeof(uint8_t) * ctx->num_4x4_blk);
 
@@ -3677,7 +3656,7 @@
   int64_t best_pred_rd[REFERENCE_MODES];
   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
-  MB_MODE_INFO best_mbmode = { 0 };
+  MB_MODE_INFO best_mbmode;
   int ref_index, best_ref_index = 0;
   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
   vp9_prob comp_mode_p;
@@ -3697,6 +3676,7 @@
 
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
   vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
+  vp9_zero(best_mbmode);
 
   for (i = 0; i < 4; i++) {
     int j;
@@ -4129,7 +4109,7 @@
         best_mbmode = *mbmi;
         best_skip2 = this_skip2;
         if (!x->select_txfm_size)
-          swap_block_ptr(x, ctx, max_plane);
+          swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
                    sizeof(uint8_t) * ctx->num_4x4_blk);
 
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 7537d1b..574df62 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -109,7 +109,7 @@
 }
 
 static void count_segs(VP9_COMP *cpi, const TileInfo *const tile,
-                       MODE_INFO **mi_8x8,
+                       MODE_INFO **mi,
                        int *no_pred_segcounts,
                        int (*temporal_predictor_count)[2],
                        int *t_unpred_seg_counts,
@@ -121,7 +121,7 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  xd->mi = mi_8x8;
+  xd->mi = mi;
   segment_id = xd->mi[0]->mbmi.segment_id;
 
   set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
@@ -131,7 +131,7 @@
 
   // Temporal prediction not allowed on key frames
   if (cm->frame_type != KEY_FRAME) {
-    const BLOCK_SIZE bsize = mi_8x8[0]->mbmi.sb_type;
+    const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
     // Test to see if the segment id matches the predicted value.
     const int pred_segment_id = vp9_get_segment_id(cm, cm->last_frame_seg_map,
                                                    bsize, mi_row, mi_col);
@@ -143,14 +143,14 @@
     xd->mi[0]->mbmi.seg_id_predicted = pred_flag;
     temporal_predictor_count[pred_context][pred_flag]++;
 
+    // Update the "unpredicted" segment count
     if (!pred_flag)
-      // Update the "unpredicted" segment count
       t_unpred_seg_counts[segment_id]++;
   }
 }
 
 static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile,
-                          MODE_INFO **mi_8x8,
+                          MODE_INFO **mi,
                           int *no_pred_segcounts,
                           int (*temporal_predictor_count)[2],
                           int *t_unpred_seg_counts,
@@ -164,22 +164,22 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type];
-  bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type];
+  bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type];
+  bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
 
   if (bw == bs && bh == bs) {
-    count_segs(cpi, tile, mi_8x8, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, bs, bs, mi_row, mi_col);
   } else if (bw == bs && bh < bs) {
-    count_segs(cpi, tile, mi_8x8, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
-    count_segs(cpi, tile, mi_8x8 + hbs * mis, no_pred_segcounts,
+    count_segs(cpi, tile, mi + hbs * mis, no_pred_segcounts,
                temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
                mi_row + hbs, mi_col);
   } else if (bw < bs && bh == bs) {
-    count_segs(cpi, tile, mi_8x8, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
-    count_segs(cpi, tile, mi_8x8 + hbs,
+    count_segs(cpi, tile, mi + hbs,
                no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts,
                hbs, bs, mi_row, mi_col + hbs);
   } else {
@@ -192,7 +192,7 @@
       const int mi_dc = hbs * (n & 1);
       const int mi_dr = hbs * (n >> 1);
 
-      count_segs_sb(cpi, tile, &mi_8x8[mi_dr * mis + mi_dc],
+      count_segs_sb(cpi, tile, &mi[mi_dr * mis + mi_dc],
                     no_pred_segcounts, temporal_predictor_count,
                     t_unpred_seg_counts,
                     mi_row + mi_dr, mi_col + mi_dc, subsize);
@@ -217,9 +217,6 @@
   vp9_prob t_pred_tree[SEG_TREE_PROBS];
   vp9_prob t_nopred_prob[PREDICTION_PROBS];
 
-  const int mis = cm->mi_stride;
-  MODE_INFO **mi_ptr, **mi;
-
   // Set default state for the segment tree probabilities and the
   // temporal coding probabilities
   vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs));
@@ -229,12 +226,13 @@
   // predicts this one
   for (tile_col = 0; tile_col < 1 << cm->log2_tile_cols; tile_col++) {
     TileInfo tile;
-
+    MODE_INFO **mi_ptr;
     vp9_tile_init(&tile, cm, 0, tile_col);
+
     mi_ptr = cm->mi_grid_visible + tile.mi_col_start;
     for (mi_row = 0; mi_row < cm->mi_rows;
-         mi_row += 8, mi_ptr += 8 * mis) {
-      mi = mi_ptr;
+         mi_row += 8, mi_ptr += 8 * cm->mi_stride) {
+      MODE_INFO **mi = mi_ptr;
       for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
            mi_col += 8, mi += 8)
         count_segs_sb(cpi, &tile, mi, no_pred_segcounts,
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 93e23ee..7b2d1e2 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -146,7 +146,6 @@
                                  int speed) {
   sf->static_segmentation = 0;
   sf->adaptive_rd_thresh = 1;
-  sf->encode_breakout_thresh = 1;
   sf->use_fast_coef_costing = 1;
 
   if (speed == 1) {
@@ -169,7 +168,6 @@
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->encode_breakout_thresh = 8;
   }
 
   if (speed >= 2) {
@@ -208,7 +206,6 @@
     sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->encode_breakout_thresh = 200;
   }
 
   if (speed >= 3) {
@@ -226,7 +223,6 @@
     sf->optimize_coefficients = 0;
     sf->disable_split_mask = DISABLE_ALL_SPLIT;
     sf->lpf_pick = LPF_PICK_FROM_Q;
-    sf->encode_breakout_thresh = 700;
   }
 
   if (speed >= 4) {
@@ -245,7 +241,6 @@
     }
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY;
     sf->frame_parameter_update = 0;
-    sf->encode_breakout_thresh = 1000;
     sf->search_method = FAST_HEX;
     sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV);
     sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV));
@@ -338,7 +333,6 @@
   sf->use_fast_coef_costing = 0;
   sf->mode_skip_start = MAX_MODES;  // Mode index at which mode skip mask set
   sf->use_nonrd_pick_mode = 0;
-  sf->encode_breakout_thresh = 0;
   for (i = 0; i < BLOCK_SIZES; ++i)
     sf->disable_inter_mode_mask[i] = 0;
   sf->max_intra_bsize = BLOCK_64X64;
@@ -384,10 +378,6 @@
 
   cpi->mb.optimize = sf->optimize_coefficients == 1 && cpi->pass != 1;
 
-  if (cpi->encode_breakout && oxcf->mode == REALTIME &&
-      sf->encode_breakout_thresh > cpi->encode_breakout)
-    cpi->encode_breakout = sf->encode_breakout_thresh;
-
   if (sf->disable_split_mask == DISABLE_ALL_SPLIT)
     sf->adaptive_pred_interp_filter = 0;
 
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 46806c9..d8c1a8b 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -321,10 +321,6 @@
   // This flag controls the use of non-RD mode decision.
   int use_nonrd_pick_mode;
 
-  // This variable sets the encode_breakout threshold. Currently, it is only
-  // enabled in real time mode.
-  int encode_breakout_thresh;
-
   // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
   // modes are disabled in order from LSB to MSB for each BLOCK_SIZE.
   int disable_inter_mode_mask[BLOCK_SIZES];
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 8ce98d9..dcca92d 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -232,7 +232,6 @@
       cpi->common.fc.coef_probs[tx_size][type][ref];
   unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
       cpi->common.counts.eob_branch[tx_size][type][ref];
-
   const uint8_t *const band = get_band_translate(tx_size);
   const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
 
@@ -289,14 +288,17 @@
   MACROBLOCK *x;
   int *skippable;
 };
-
 static void is_skippable(int plane, int block,
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                          void *argv) {
   struct is_skippable_args *args = argv;
+  (void)plane_bsize;
+  (void)tx_size;
   args->skippable[0] &= (!args->x->plane[plane].eobs[block]);
 }
 
+// TODO(yaowu): rewrite and optimize this function to remove the usage of
+//              vp9_foreach_transform_block() and simplify is_skippable().
 int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
   int result = 1;
   struct is_skippable_args args = {x, &result};
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 2a3964a..5a8a4f4 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -211,8 +211,8 @@
       ERROR("Option --tune=ssim is not currently supported in VP9.");
 
   if (cfg->g_pass == VPX_RC_LAST_PASS) {
-    size_t           packet_sz = sizeof(FIRSTPASS_STATS);
-    int              n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);
+    const size_t packet_sz = sizeof(FIRSTPASS_STATS);
+    const int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);
     const FIRSTPASS_STATS *stats;
 
     if (cfg->rc_twopass_stats_in.buf == NULL)
@@ -464,7 +464,7 @@
 
 static vpx_codec_err_t ctrl_get_param(vpx_codec_alg_priv_t *ctx, int ctrl_id,
                                  va_list args) {
-  void *arg = va_arg(args, void *);
+  void *const arg = va_arg(args, void *);
 
 #define MAP(id, var) case id: *(RECAST(id, arg)) = var; break
 
@@ -525,6 +525,7 @@
 static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
                                     vpx_codec_priv_enc_mr_cfg_t *data) {
   vpx_codec_err_t res = VPX_CODEC_OK;
+  (void)data;
 
   if (ctx->priv == NULL) {
     int i;
@@ -880,14 +881,15 @@
   return res;
 }
 
-static const vpx_codec_cx_pkt_t *encoder_get_cxdata(vpx_codec_alg_priv_t  *ctx,
+static const vpx_codec_cx_pkt_t *encoder_get_cxdata(vpx_codec_alg_priv_t *ctx,
                                                     vpx_codec_iter_t *iter) {
   return vpx_codec_pkt_list_get(&ctx->pkt_list.head, iter);
 }
 
 static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
-                                          int ctr_id, va_list args) {
+                                          int ctrl_id, va_list args) {
   vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *);
+  (void)ctrl_id;
 
   if (frame != NULL) {
     YV12_BUFFER_CONFIG sd;
@@ -902,8 +904,9 @@
 }
 
 static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
-                                           int ctr_id, va_list args) {
+                                           int ctrl_id, va_list args) {
   vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *);
+  (void)ctrl_id;
 
   if (frame != NULL) {
     YV12_BUFFER_CONFIG sd;
@@ -918,11 +921,12 @@
 }
 
 static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
-                                          int ctr_id, va_list args) {
-  vp9_ref_frame_t *frame = va_arg(args, vp9_ref_frame_t *);
+                                          int ctrl_id, va_list args) {
+  vp9_ref_frame_t *const frame = va_arg(args, vp9_ref_frame_t *);
+  (void)ctrl_id;
 
   if (frame != NULL) {
-    YV12_BUFFER_CONFIG* fb;
+    YV12_BUFFER_CONFIG *fb;
 
     vp9_get_reference_enc(ctx->cpi, frame->idx, &fb);
     yuvconfig2image(&frame->img, fb, NULL);
@@ -955,7 +959,8 @@
 
 static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) {
   YV12_BUFFER_CONFIG sd;
-  vp9_ppflags_t flags = {0};
+  vp9_ppflags_t flags;
+  vp9_zero(flags);
 
   if (ctx->preview_ppcfg.post_proc_flag) {
     flags.post_proc_flag   = ctx->preview_ppcfg.post_proc_flag;
@@ -972,36 +977,47 @@
 }
 
 static vpx_codec_err_t ctrl_update_entropy(vpx_codec_alg_priv_t *ctx,
-                                           int ctr_id, va_list args) {
+                                           int ctrl_id, va_list args) {
   const int update = va_arg(args, int);
+  (void)ctrl_id;
+
   vp9_update_entropy(ctx->cpi, update);
   return VPX_CODEC_OK;
 }
 
 static vpx_codec_err_t ctrl_update_reference(vpx_codec_alg_priv_t *ctx,
-                                             int ctr_id, va_list args) {
+                                             int ctrl_id, va_list args) {
   const int ref_frame_flags = va_arg(args, int);
+  (void)ctrl_id;
+
   vp9_update_reference(ctx->cpi, ref_frame_flags);
   return VPX_CODEC_OK;
 }
 
 static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx,
-                                          int ctr_id, va_list args) {
+                                          int ctrl_id, va_list args) {
   const int reference_flag = va_arg(args, int);
+  (void)ctrl_id;
+
   vp9_use_as_reference(ctx->cpi, reference_flag);
   return VPX_CODEC_OK;
 }
 
 static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
-                                        int ctr_id, va_list args) {
+                                        int ctrl_id, va_list args) {
+  (void)ctx;
+  (void)ctrl_id;
+  (void)args;
+
   // TODO(yaowu): Need to re-implement and test for VP9.
   return VPX_CODEC_INVALID_PARAM;
 }
 
 
 static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx,
-                                           int ctr_id, va_list args) {
+                                           int ctrl_id, va_list args) {
   vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *);
+  (void)ctrl_id;
 
   if (map) {
     if (!vp9_set_active_map(ctx->cpi, map->active_map,
@@ -1015,8 +1031,9 @@
 }
 
 static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx,
-                                           int ctr_id, va_list args) {
+                                           int ctrl_id, va_list args) {
   vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *);
+  (void)ctrl_id;
 
   if (mode) {
     const int res = vp9_set_internal_size(ctx->cpi,
@@ -1028,10 +1045,12 @@
   }
 }
 
-static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id,
+static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, int ctrl_id,
                                     va_list args) {
   int data = va_arg(args, int);
   const vpx_codec_enc_cfg_t *cfg = &ctx->cfg;
+  (void)ctrl_id;
+
   vp9_set_svc(ctx->cpi, data);
   // CBR or two pass mode for SVC with both temporal and spatial layers
   // not yet supported.
@@ -1047,11 +1066,12 @@
 }
 
 static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
-                                             int ctr_id,
-                                             va_list args) {
+                                             int ctrl_id, va_list args) {
   vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *);
   VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
   SVC *const svc = &cpi->svc;
+  (void)ctrl_id;
+
   svc->spatial_layer_id = data->spatial_layer_id;
   svc->temporal_layer_id = data->temporal_layer_id;
   // Checks on valid layer_id input.
@@ -1067,9 +1087,10 @@
 }
 
 static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
-                                               int ctr_id, va_list args) {
+                                               int ctrl_id, va_list args) {
   VP9_COMP *const cpi = ctx->cpi;
   vpx_svc_parameters_t *const params = va_arg(args, vpx_svc_parameters_t *);
+  (void)ctrl_id;
 
   if (params == NULL)
     return VPX_CODEC_INVALID_PARAM;
@@ -1214,6 +1235,7 @@
     NOT_IMPLEMENTED,  // vpx_codec_get_si_fn_t
     NOT_IMPLEMENTED,  // vpx_codec_decode_fn_t
     NOT_IMPLEMENTED,  // vpx_codec_frame_get_fn_t
+    NOT_IMPLEMENTED   // vpx_codec_set_fb_fn_t
   },
   {  // NOLINT
     encoder_usage_cfg_map,  // vpx_codec_enc_cfg_map_t
@@ -1222,6 +1244,6 @@
     encoder_set_config,     // vpx_codec_enc_config_set_fn_t
     NOT_IMPLEMENTED,        // vpx_codec_get_global_headers_fn_t
     encoder_get_preview,    // vpx_codec_get_preview_frame_fn_t
-    NOT_IMPLEMENTED ,       // vpx_codec_enc_mr_get_mem_loc_fn_t
+    NOT_IMPLEMENTED         // vpx_codec_enc_mr_get_mem_loc_fn_t
   }
 };
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 6198250..2802fba 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -32,21 +32,12 @@
   vpx_codec_priv_t        base;
   vpx_codec_dec_cfg_t     cfg;
   vp9_stream_info_t       si;
-  int                     decoder_init;
   struct VP9Decoder *pbi;
   int                     postproc_cfg_set;
   vp8_postproc_cfg_t      postproc_cfg;
-#if CONFIG_POSTPROC_VISUALIZER
-  unsigned int            dbg_postproc_flag;
-  int                     dbg_color_ref_frame_flag;
-  int                     dbg_color_mb_modes_flag;
-  int                     dbg_color_b_modes_flag;
-  int                     dbg_display_mv_flag;
-#endif
   vpx_decrypt_cb          decrypt_cb;
   void                   *decrypt_state;
   vpx_image_t             img;
-  int                     img_setup;
   int                     img_avail;
   int                     invert_tile_order;
 
@@ -226,22 +217,10 @@
 static void set_ppflags(const vpx_codec_alg_priv_t *ctx,
                         vp9_ppflags_t *flags) {
   flags->post_proc_flag =
-#if CONFIG_POSTPROC_VISUALIZER
-      (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) |
-      (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) |
-      (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) |
-      (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) |
-#endif
       ctx->postproc_cfg.post_proc_flag;
 
   flags->deblocking_level = ctx->postproc_cfg.deblocking_level;
   flags->noise_level = ctx->postproc_cfg.noise_level;
-#if CONFIG_POSTPROC_VISUALIZER
-  flags->display_ref_frame_flag = ctx->dbg_color_ref_frame_flag;
-  flags->display_mb_modes_flag = ctx->dbg_color_mb_modes_flag;
-  flags->display_b_modes_flag = ctx->dbg_color_b_modes_flag;
-  flags->display_mv_flag = ctx->dbg_display_mv_flag;
-#endif
 }
 
 static void init_decoder(vpx_codec_alg_priv_t *ctx) {
@@ -252,8 +231,6 @@
   ctx->pbi->max_threads = ctx->cfg.threads;
   ctx->pbi->inv_tile_order = ctx->invert_tile_order;
 
-  vp9_initialize_dec();
-
   // If postprocessing was enabled by the application and a
   // configuration has not been provided, default it.
   if (!ctx->postproc_cfg_set &&
@@ -285,12 +262,10 @@
   }
 
   // Initialize the decoder instance on the first frame
-  if (!ctx->decoder_init) {
+  if (ctx->pbi == NULL) {
     init_decoder(ctx);
     if (ctx->pbi == NULL)
       return VPX_CODEC_ERROR;
-
-    ctx->decoder_init = 1;
   }
 
   // Set these even if already initialized.  The caller may have changed the
@@ -539,22 +514,7 @@
 
 static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx,
                                             int ctrl_id, va_list args) {
-#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC
-  int data = va_arg(args, int);
-
-#define MAP(id, var) case id: var = data; break;
-
-  switch (ctrl_id) {
-      MAP(VP8_SET_DBG_COLOR_REF_FRAME,   ctx->dbg_color_ref_frame_flag);
-      MAP(VP8_SET_DBG_COLOR_MB_MODES,    ctx->dbg_color_mb_modes_flag);
-      MAP(VP8_SET_DBG_COLOR_B_MODES,     ctx->dbg_color_b_modes_flag);
-      MAP(VP8_SET_DBG_DISPLAY_MV,        ctx->dbg_display_mv_flag);
-  }
-
-  return VPX_CODEC_OK;
-#else
   return VPX_CODEC_INCAPABLE;
-#endif
 }
 
 static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 51ca65e..83c3308 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -170,8 +170,8 @@
  * \ref MUST be non-zero.
  */
 typedef const struct vpx_codec_ctrl_fn_map {
-  int                    ctrl_id;
-  vpx_codec_control_fn_t   fn;
+  int ctrl_id;
+  vpx_codec_control_fn_t fn;
 } vpx_codec_ctrl_fn_map_t;
 
 /*!\brief decode data function pointer prototype