Merge "Fix the printf format string"
diff --git a/test/svc_test.cc b/test/svc_test.cc
index 98a5d94..3ddd9c1 100644
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -41,7 +41,6 @@
   virtual ~SvcTest() {}
 
   virtual void SetUp() {
-    svc_.first_frame_full_size = 1;
     svc_.encoding_mode = INTER_LAYER_PREDICTION_IP;
     svc_.log_level = SVC_LOG_DEBUG;
     svc_.log_print = 0;
@@ -185,7 +184,6 @@
   res = vpx_svc_set_quantizers(&svc_, NULL);
   EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
 
-  svc_.first_frame_full_size = 0;
   svc_.spatial_layers = 2;
   res = vpx_svc_set_quantizers(&svc_, "40");
   EXPECT_EQ(VPX_CODEC_OK, res);
@@ -206,7 +204,6 @@
   res = vpx_svc_set_scale_factors(&svc_, NULL);
   EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
 
-  svc_.first_frame_full_size = 0;
   svc_.spatial_layers = 2;
   res = vpx_svc_set_scale_factors(&svc_, "4/16");
   EXPECT_EQ(VPX_CODEC_OK, res);
@@ -220,11 +217,8 @@
   codec_initialized_ = true;
 }
 
-// test that decoder can handle an SVC frame as the first frame in a sequence
-// this test is disabled since it always fails because of a decoder issue
-// https://code.google.com/p/webm/issues/detail?id=654
-TEST_F(SvcTest, DISABLED_FirstFrameHasLayers) {
-  svc_.first_frame_full_size = 0;
+// Test that decoder can handle an SVC frame as the first frame in a sequence.
+TEST_F(SvcTest, FirstFrameHasLayers) {
   svc_.spatial_layers = 2;
   vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
   vpx_svc_set_quantizers(&svc_, "40,30");
@@ -252,7 +246,6 @@
 }
 
 TEST_F(SvcTest, EncodeThreeFrames) {
-  svc_.first_frame_full_size = 1;
   svc_.spatial_layers = 2;
   vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
   vpx_svc_set_quantizers(&svc_, "40,30");
@@ -265,9 +258,9 @@
   libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
                                      codec_enc_.g_timebase.den,
                                      codec_enc_.g_timebase.num, 0, 30);
-  // FRAME 1
+  // FRAME 0
   video.Begin();
-  // this frame is full size, with only one layer
+  // This frame is a keyframe.
   res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                        video.duration(), VPX_DL_REALTIME);
   ASSERT_EQ(VPX_CODEC_OK, res);
@@ -278,13 +271,13 @@
       vpx_svc_get_frame_size(&svc_));
   ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
 
-  // FRAME 2
+  // FRAME 1
   video.Next();
-  // this is an I-frame
+  // This is a P-frame.
   res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                        video.duration(), VPX_DL_REALTIME);
   ASSERT_EQ(VPX_CODEC_OK, res);
-  EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
 
   res_dec = decoder_->DecodeFrame(
       static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
@@ -293,7 +286,7 @@
 
   // FRAME 2
   video.Next();
-  // this is a P-frame
+  // This is a P-frame.
   res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
                        video.duration(), VPX_DL_REALTIME);
   ASSERT_EQ(VPX_CODEC_OK, res);
@@ -306,7 +299,6 @@
 }
 
 TEST_F(SvcTest, GetLayerResolution) {
-  svc_.first_frame_full_size = 0;
   svc_.spatial_layers = 2;
   vpx_svc_set_scale_factors(&svc_, "4/16,8/16");
   vpx_svc_set_quantizers(&svc_, "40,30");
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index bb09b75..4451479 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -61,6 +61,7 @@
 aff51d865c2621b60510459244ea83e958e4baed  vp80-05-sharpness-1439.ivf
 da386e72b19b5485a6af199c5eb60ef25e510dd1  vp80-05-sharpness-1440.ivf
 6759a095203d96ccd267ce09b1b050b8cc4c2f1f  vp80-05-sharpness-1443.ivf
+b95d3cc1d0df991e63e150a801710a72f20d9ba0  vp80-06-smallsize.ivf
 db55ec7fd02c864ba996ff060b25b1e08611330b  vp80-00-comprehensive-001.ivf.md5
 29db0ad011cba1e45f856d5623cd38dac3e3bf19  vp80-00-comprehensive-002.ivf.md5
 e84f258f69e173e7d68f8f8c037a0a3766902182  vp80-00-comprehensive-003.ivf.md5
@@ -122,6 +123,7 @@
 086c56378df81b6cee264d7540a7b8f2b405c7a4  vp80-05-sharpness-1439.ivf.md5
 d32dc2c4165eb266ea4c23c14a45459b363def32  vp80-05-sharpness-1440.ivf.md5
 8c69dc3d8e563f56ffab5ad1e400d9e689dd23df  vp80-05-sharpness-1443.ivf.md5
+d6f246df012c241b5fa6c1345019a3703d85c419  vp80-06-smallsize.ivf.md5
 ce881e567fe1d0fbcb2d3e9e6281a1a8d74d82e0  vp90-2-00-quantizer-00.webm
 ac5eda33407d0521c7afca43a63fd305c0cd9d13  vp90-2-00-quantizer-00.webm.md5
 2ca0463f2cfb93d25d7dded174db70b7cb87cb48  vp90-2-00-quantizer-01.webm
@@ -561,3 +563,7 @@
 1a9c2914ba932a38f0a143efc1ad0e318e78888b  vp90-2-tos_426x178_tile_1x1_181kbps.webm
 a3d2b09f24debad4747a1b3066f572be4273bced  vp90-2-tos_640x266_tile_1x2_336kbps.webm
 c64b03b5c090e6888cb39685c31f00a6b79fa45c  vp90-2-tos_854x356_tile_1x2_656kbps.webm
+0e7cd4135b231c9cea8d76c19f9e84b6fd77acec  vp90-2-08-tile_1x8_frame_parallel.webm
+c9b6850af28579b031791066457f4cb40df6e1c7  vp90-2-08-tile_1x8_frame_parallel.webm.md5
+e448b6e83490bca0f8d58b4f4b1126a17baf4b0c  vp90-2-08-tile_1x8.webm
+5e524165f0397e6141d914f4f0a66267d7658376  vp90-2-08-tile_1x8.webm.md5
diff --git a/test/test.mk b/test/test.mk
index 4f877f4..98e5c7b 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -175,6 +175,7 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-06-smallsize.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf.md5
@@ -236,6 +237,7 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-06-smallsize.ivf.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-00.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-00.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-01.webm
@@ -506,6 +508,8 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x64.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x66.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x66.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-lf-1920x1080.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-lf-1920x1080.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x196.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x196.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x198.webm
@@ -650,6 +654,10 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x4_frame_parallel.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x4.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8_frame_parallel.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8_frame_parallel.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm
diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
index 4332420..ee610fa 100644
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -55,7 +55,8 @@
   "vp80-05-sharpness-1430.ivf", "vp80-05-sharpness-1431.ivf",
   "vp80-05-sharpness-1433.ivf", "vp80-05-sharpness-1434.ivf",
   "vp80-05-sharpness-1438.ivf", "vp80-05-sharpness-1439.ivf",
-  "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf"
+  "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf",
+  "vp80-06-smallsize.ivf"
 };
 #endif
 #if CONFIG_VP9_DECODER
@@ -164,8 +165,10 @@
   "vp90-2-07-frame_parallel.webm",
   "vp90-2-08-tile_1x2_frame_parallel.webm", "vp90-2-08-tile_1x2.webm",
   "vp90-2-08-tile_1x4_frame_parallel.webm", "vp90-2-08-tile_1x4.webm",
+  "vp90-2-08-tile_1x8_frame_parallel.webm", "vp90-2-08-tile_1x8.webm",
   "vp90-2-08-tile-4x4.webm", "vp90-2-08-tile-4x1.webm",
   "vp90-2-09-subpixel-00.ivf",
+  "vp90-2-02-size-lf-1920x1080.webm",
 #if CONFIG_NON420
   "vp91-2-04-yv444.webm"
 #endif
diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc
index a8ce6e4..a78cdea 100644
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -141,10 +141,12 @@
       "68ede6abd66bae0a2edf2eb9232241b6" },
     { "vp90-2-08-tile_1x4_frame_parallel.webm",
       "368ebc6ebf3a5e478d85b2c3149b2848" },
+    { "vp90-2-08-tile_1x8_frame_parallel.webm",
+      "17e439da2388aff3a0f69cb22579c6c1" },
   };
 
   for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
-    for (int t = 2; t <= 4; ++t) {
+    for (int t = 2; t <= 8; ++t) {
       EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
           << "threads = " << t;
     }
diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h
index e515c3a..8b6c50b 100644
--- a/vp8/common/setupintrarecon.h
+++ b/vp8/common/setupintrarecon.h
@@ -8,6 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#ifndef SETUPINTRARECON_H
+#define SETUPINTRARECON_H
 
 #include "vpx_scale/yv12config.h"
 extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
@@ -31,3 +33,5 @@
     for (i = 0; i < 8; i++)
         v_buffer[uv_stride *i] = (unsigned char) 129;
 }
+
+#endif
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodeframe.c
similarity index 99%
rename from vp8/decoder/decodframe.c
rename to vp8/decoder/decodeframe.c
index 16da78a..bfde599 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodeframe.c
@@ -680,7 +680,6 @@
                     vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1,
                                                recon_y_stride, recon_uv_stride,
                                                lf_dst[0], lf_dst[1], lf_dst[2]);
-
                 if(mb_row > 1)
                 {
                     yv12_extend_frame_left_right_c(yv12_fb_new,
@@ -691,10 +690,6 @@
                     eb_dst[0] += recon_y_stride  * 16;
                     eb_dst[1] += recon_uv_stride *  8;
                     eb_dst[2] += recon_uv_stride *  8;
-
-                    if(mb_row == 2)
-                        yv12_extend_frame_top_c(yv12_fb_new);
-
                 }
 
                 lf_dst[0] += recon_y_stride  * 16;
@@ -713,13 +708,9 @@
                                                eb_dst[0],
                                                eb_dst[1],
                                                eb_dst[2]);
-
                 eb_dst[0] += recon_y_stride  * 16;
                 eb_dst[1] += recon_uv_stride *  8;
                 eb_dst[2] += recon_uv_stride *  8;
-
-                if(mb_row == 1)
-                    yv12_extend_frame_top_c(yv12_fb_new);
             }
         }
     }
@@ -747,7 +738,7 @@
                                    eb_dst[0],
                                    eb_dst[1],
                                    eb_dst[2]);
-
+    yv12_extend_frame_top_c(yv12_fb_new);
     yv12_extend_frame_bottom_c(yv12_fb_new);
 
 }
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 4a8f467..892ed70 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -22,7 +22,7 @@
 
 VP8_DX_SRCS-yes += decoder/dboolhuff.c
 VP8_DX_SRCS-yes += decoder/decodemv.c
-VP8_DX_SRCS-yes += decoder/decodframe.c
+VP8_DX_SRCS-yes += decoder/decodeframe.c
 VP8_DX_SRCS-yes += decoder/detokenize.c
 VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/ec_types.h
 VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.h
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 28671c3..f567840 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -34,7 +34,7 @@
 void vp9_free_frame_buffers(VP9_COMMON *cm) {
   int i;
 
-  for (i = 0; i < NUM_YV12_BUFFERS; i++)
+  for (i = 0; i < FRAME_BUFFERS; i++)
     vp9_free_frame_buffer(&cm->yv12_fb[i]);
 
   vp9_free_frame_buffer(&cm->post_proc_buffer);
@@ -75,7 +75,6 @@
              cm->mode_info_stride * (cm->mi_rows + 1) *
              sizeof(*cm->mi_grid_base));
 
-  vp9_update_mode_info_border(cm, cm->mip);
   vp9_update_mode_info_border(cm, cm->prev_mip);
 }
 
@@ -141,20 +140,20 @@
 
   vp9_free_frame_buffers(cm);
 
-  for (i = 0; i < NUM_YV12_BUFFERS; i++) {
+  for (i = 0; i < FRAME_BUFFERS; i++) {
     cm->fb_idx_ref_cnt[i] = 0;
     if (vp9_alloc_frame_buffer(&cm->yv12_fb[i], width, height, ss_x, ss_y,
                                VP9BORDERINPIXELS) < 0)
       goto fail;
   }
 
-  cm->new_fb_idx = NUM_YV12_BUFFERS - 1;
+  cm->new_fb_idx = FRAME_BUFFERS - 1;
   cm->fb_idx_ref_cnt[cm->new_fb_idx] = 1;
 
-  for (i = 0; i < ALLOWED_REFS_PER_FRAME; i++)
+  for (i = 0; i < REFS_PER_FRAME; i++)
     cm->active_ref_idx[i] = i;
 
-  for (i = 0; i < NUM_REF_FRAMES; i++) {
+  for (i = 0; i < REF_FRAMES; i++) {
     cm->ref_frame_map[i] = i;
     cm->fb_idx_ref_cnt[i] = 1;
   }
@@ -200,9 +199,6 @@
 
 void vp9_create_common(VP9_COMMON *cm) {
   vp9_machine_specific_config(cm);
-
-  cm->tx_mode = ONLY_4X4;
-  cm->comp_pred_mode = REFERENCE_MODE_SELECT;
 }
 
 void vp9_remove_common(VP9_COMMON *cm) {
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 993ee79..93f96c8 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -26,8 +26,9 @@
 #include "vp9/common/vp9_seg_common.h"
 #include "vp9/common/vp9_treecoder.h"
 
-#define BLOCK_SIZE_GROUPS   4
+#define BLOCK_SIZE_GROUPS 4
 #define MBSKIP_CONTEXTS 3
+#define INTER_MODE_CONTEXTS 7
 
 /* Segment Feature Masks */
 #define MAX_MV_REF_CANDIDATES 2
@@ -37,8 +38,9 @@
 #define REF_CONTEXTS 5
 
 typedef enum {
-  PLANE_TYPE_Y_WITH_DC,
-  PLANE_TYPE_UV,
+  PLANE_TYPE_Y  = 0,
+  PLANE_TYPE_UV = 1,
+  PLANE_TYPES
 } PLANE_TYPE;
 
 typedef char ENTROPY_CONTEXT;
@@ -199,7 +201,6 @@
 
 struct macroblockd_plane {
   int16_t *dqcoeff;
-  uint16_t *eobs;
   PLANE_TYPE plane_type;
   int subsampling_x;
   int subsampling_y;
@@ -234,6 +235,9 @@
   int mb_to_top_edge;
   int mb_to_bottom_edge;
 
+  /* pointers to reference frames */
+  const YV12_BUFFER_CONFIG *ref_buf[2];
+
   int lossless;
   /* Inverse transform function pointers. */
   void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
@@ -265,45 +269,53 @@
   const MODE_INFO *const mi = xd->mi_8x8[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
 
-  if (plane_type != PLANE_TYPE_Y_WITH_DC ||
-      xd->lossless ||
-      is_inter_block(mbmi))
+  if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mbmi))
     return DCT_DCT;
 
-  return mode2txfm_map[mbmi->sb_type < BLOCK_8X8 ?
-                       mi->bmi[ib].as_mode : mbmi->mode];
+  return mode2txfm_map[mbmi->sb_type < BLOCK_8X8 ? mi->bmi[ib].as_mode
+                                                 : mbmi->mode];
 }
 
 static INLINE TX_TYPE get_tx_type_8x8(PLANE_TYPE plane_type,
                                       const MACROBLOCKD *xd) {
-  return plane_type == PLANE_TYPE_Y_WITH_DC ?
-             mode2txfm_map[xd->mi_8x8[0]->mbmi.mode] : DCT_DCT;
+  return plane_type == PLANE_TYPE_Y ? mode2txfm_map[xd->mi_8x8[0]->mbmi.mode]
+                                    : DCT_DCT;
 }
 
 static INLINE TX_TYPE get_tx_type_16x16(PLANE_TYPE plane_type,
                                         const MACROBLOCKD *xd) {
-  return plane_type == PLANE_TYPE_Y_WITH_DC ?
-             mode2txfm_map[xd->mi_8x8[0]->mbmi.mode] : DCT_DCT;
+  return plane_type == PLANE_TYPE_Y ? mode2txfm_map[xd->mi_8x8[0]->mbmi.mode]
+                                    : DCT_DCT;
 }
 
 static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) {
   int i;
 
   for (i = 0; i < MAX_MB_PLANE; i++) {
-    xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y_WITH_DC;
+    xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y;
     xd->plane[i].subsampling_x = i ? ss_x : 0;
     xd->plane[i].subsampling_y = i ? ss_y : 0;
   }
 #if CONFIG_ALPHA
   // TODO(jkoleszar): Using the Y w/h for now
+  xd->plane[3].plane_type = PLANE_TYPE_Y;
   xd->plane[3].subsampling_x = 0;
   xd->plane[3].subsampling_y = 0;
 #endif
 }
 
+static TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize) {
+  if (bsize < BLOCK_8X8) {
+    return TX_4X4;
+  } else {
+    // TODO(dkovalev): Assuming YUV420 (ss_x == 1, ss_y == 1)
+    const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][1][1];
+    return MIN(y_tx_size, max_txsize_lookup[plane_bsize]);
+  }
+}
 
-static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
-  return MIN(mbmi->tx_size, max_uv_txsize_lookup[mbmi->sb_type]);
+static TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
+  return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type);
 }
 
 static BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c
index 388f38d..886c0af 100644
--- a/vp9/common/vp9_common_data.c
+++ b/vp9/common/vp9_common_data.c
@@ -108,12 +108,6 @@
   TX_16X16, TX_16X16, TX_16X16,
   TX_32X32, TX_32X32, TX_32X32, TX_32X32
 };
-const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZES] = {
-  TX_4X4,   TX_4X4,   TX_4X4,
-  TX_4X4,   TX_4X4,   TX_4X4,
-  TX_8X8,   TX_8X8,   TX_8X8,
-  TX_16X16, TX_16X16, TX_16X16, TX_32X32
-};
 
 const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
   TX_4X4,  // ONLY_4X4
diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h
index c1f6405..b6fc70a 100644
--- a/vp9/common/vp9_common_data.h
+++ b/vp9/common/vp9_common_data.h
@@ -26,7 +26,6 @@
 extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZES];
 extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES];
 extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
-extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZES];
 extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
 extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
 
diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h
deleted file mode 100644
index 3b512be..0000000
--- a/vp9/common/vp9_default_coef_probs.h
+++ /dev/null
@@ -1,699 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
-*/
-#ifndef VP9_COMMON_DEFAULT_COEF_PROBS_H_
-#define VP9_COMMON_DEFAULT_COEF_PROBS_H_
-
-/*Generated file, included by vp9_entropy.c*/
-static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 195,  29, 183 },
-        {  84,  49, 136 },
-        {   8,  42,  71 }
-      }, { /* Coeff Band 1 */
-        {  31, 107, 169 },
-        {  35,  99, 159 },
-        {  17,  82, 140 },
-        {   8,  66, 114 },
-        {   2,  44,  76 },
-        {   1,  19,  32 }
-      }, { /* Coeff Band 2 */
-        {  40, 132, 201 },
-        {  29, 114, 187 },
-        {  13,  91, 157 },
-        {   7,  75, 127 },
-        {   3,  58,  95 },
-        {   1,  28,  47 }
-      }, { /* Coeff Band 3 */
-        {  69, 142, 221 },
-        {  42, 122, 201 },
-        {  15,  91, 159 },
-        {   6,  67, 121 },
-        {   1,  42,  77 },
-        {   1,  17,  31 }
-      }, { /* Coeff Band 4 */
-        { 102, 148, 228 },
-        {  67, 117, 204 },
-        {  17,  82, 154 },
-        {   6,  59, 114 },
-        {   2,  39,  75 },
-        {   1,  15,  29 }
-      }, { /* Coeff Band 5 */
-        { 156,  57, 233 },
-        { 119,  57, 212 },
-        {  58,  48, 163 },
-        {  29,  40, 124 },
-        {  12,  30,  81 },
-        {   3,  12,  31 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 191, 107, 226 },
-        { 124, 117, 204 },
-        {  25,  99, 155 }
-      }, { /* Coeff Band 1 */
-        {  29, 148, 210 },
-        {  37, 126, 194 },
-        {   8,  93, 157 },
-        {   2,  68, 118 },
-        {   1,  39,  69 },
-        {   1,  17,  33 }
-      }, { /* Coeff Band 2 */
-        {  41, 151, 213 },
-        {  27, 123, 193 },
-        {   3,  82, 144 },
-        {   1,  58, 105 },
-        {   1,  32,  60 },
-        {   1,  13,  26 }
-      }, { /* Coeff Band 3 */
-        {  59, 159, 220 },
-        {  23, 126, 198 },
-        {   4,  88, 151 },
-        {   1,  66, 114 },
-        {   1,  38,  71 },
-        {   1,  18,  34 }
-      }, { /* Coeff Band 4 */
-        { 114, 136, 232 },
-        {  51, 114, 207 },
-        {  11,  83, 155 },
-        {   3,  56, 105 },
-        {   1,  33,  65 },
-        {   1,  17,  34 }
-      }, { /* Coeff Band 5 */
-        { 149,  65, 234 },
-        { 121,  57, 215 },
-        {  61,  49, 166 },
-        {  28,  36, 114 },
-        {  12,  25,  76 },
-        {   3,  16,  42 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 214,  49, 220 },
-        { 132,  63, 188 },
-        {  42,  65, 137 }
-      }, { /* Coeff Band 1 */
-        {  85, 137, 221 },
-        { 104, 131, 216 },
-        {  49, 111, 192 },
-        {  21,  87, 155 },
-        {   2,  49,  87 },
-        {   1,  16,  28 }
-      }, { /* Coeff Band 2 */
-        {  89, 163, 230 },
-        {  90, 137, 220 },
-        {  29, 100, 183 },
-        {  10,  70, 135 },
-        {   2,  42,  81 },
-        {   1,  17,  33 }
-      }, { /* Coeff Band 3 */
-        { 108, 167, 237 },
-        {  55, 133, 222 },
-        {  15,  97, 179 },
-        {   4,  72, 135 },
-        {   1,  45,  85 },
-        {   1,  19,  38 }
-      }, { /* Coeff Band 4 */
-        { 124, 146, 240 },
-        {  66, 124, 224 },
-        {  17,  88, 175 },
-        {   4,  58, 122 },
-        {   1,  36,  75 },
-        {   1,  18,  37 }
-      }, { /* Coeff Band 5 */
-        { 141,  79, 241 },
-        { 126,  70, 227 },
-        {  66,  58, 182 },
-        {  30,  44, 136 },
-        {  12,  34,  96 },
-        {   2,  20,  47 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 229,  99, 249 },
-        { 143, 111, 235 },
-        {  46, 109, 192 }
-      }, { /* Coeff Band 1 */
-        {  82, 158, 236 },
-        {  94, 146, 224 },
-        {  25, 117, 191 },
-        {   9,  87, 149 },
-        {   3,  56,  99 },
-        {   1,  33,  57 }
-      }, { /* Coeff Band 2 */
-        {  83, 167, 237 },
-        {  68, 145, 222 },
-        {  10, 103, 177 },
-        {   2,  72, 131 },
-        {   1,  41,  79 },
-        {   1,  20,  39 }
-      }, { /* Coeff Band 3 */
-        {  99, 167, 239 },
-        {  47, 141, 224 },
-        {  10, 104, 178 },
-        {   2,  73, 133 },
-        {   1,  44,  85 },
-        {   1,  22,  47 }
-      }, { /* Coeff Band 4 */
-        { 127, 145, 243 },
-        {  71, 129, 228 },
-        {  17,  93, 177 },
-        {   3,  61, 124 },
-        {   1,  41,  84 },
-        {   1,  21,  52 }
-      }, { /* Coeff Band 5 */
-        { 157,  78, 244 },
-        { 140,  72, 231 },
-        {  69,  58, 184 },
-        {  31,  44, 137 },
-        {  14,  38, 105 },
-        {   8,  23,  61 }
-      }
-    }
-  }
-};
-static const vp9_coeff_probs_model default_coef_probs_8x8[BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 125,  34, 187 },
-        {  52,  41, 133 },
-        {   6,  31,  56 }
-      }, { /* Coeff Band 1 */
-        {  37, 109, 153 },
-        {  51, 102, 147 },
-        {  23,  87, 128 },
-        {   8,  67, 101 },
-        {   1,  41,  63 },
-        {   1,  19,  29 }
-      }, { /* Coeff Band 2 */
-        {  31, 154, 185 },
-        {  17, 127, 175 },
-        {   6,  96, 145 },
-        {   2,  73, 114 },
-        {   1,  51,  82 },
-        {   1,  28,  45 }
-      }, { /* Coeff Band 3 */
-        {  23, 163, 200 },
-        {  10, 131, 185 },
-        {   2,  93, 148 },
-        {   1,  67, 111 },
-        {   1,  41,  69 },
-        {   1,  14,  24 }
-      }, { /* Coeff Band 4 */
-        {  29, 176, 217 },
-        {  12, 145, 201 },
-        {   3, 101, 156 },
-        {   1,  69, 111 },
-        {   1,  39,  63 },
-        {   1,  14,  23 }
-      }, { /* Coeff Band 5 */
-        {  57, 192, 233 },
-        {  25, 154, 215 },
-        {   6, 109, 167 },
-        {   3,  78, 118 },
-        {   1,  48,  69 },
-        {   1,  21,  29 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 202, 105, 245 },
-        { 108, 106, 216 },
-        {  18,  90, 144 }
-      }, { /* Coeff Band 1 */
-        {  33, 172, 219 },
-        {  64, 149, 206 },
-        {  14, 117, 177 },
-        {   5,  90, 141 },
-        {   2,  61,  95 },
-        {   1,  37,  57 }
-      }, { /* Coeff Band 2 */
-        {  33, 179, 220 },
-        {  11, 140, 198 },
-        {   1,  89, 148 },
-        {   1,  60, 104 },
-        {   1,  33,  57 },
-        {   1,  12,  21 }
-      }, { /* Coeff Band 3 */
-        {  30, 181, 221 },
-        {   8, 141, 198 },
-        {   1,  87, 145 },
-        {   1,  58, 100 },
-        {   1,  31,  55 },
-        {   1,  12,  20 }
-      }, { /* Coeff Band 4 */
-        {  32, 186, 224 },
-        {   7, 142, 198 },
-        {   1,  86, 143 },
-        {   1,  58, 100 },
-        {   1,  31,  55 },
-        {   1,  12,  22 }
-      }, { /* Coeff Band 5 */
-        {  57, 192, 227 },
-        {  20, 143, 204 },
-        {   3,  96, 154 },
-        {   1,  68, 112 },
-        {   1,  42,  69 },
-        {   1,  19,  32 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 212,  35, 215 },
-        { 113,  47, 169 },
-        {  29,  48, 105 }
-      }, { /* Coeff Band 1 */
-        {  74, 129, 203 },
-        { 106, 120, 203 },
-        {  49, 107, 178 },
-        {  19,  84, 144 },
-        {   4,  50,  84 },
-        {   1,  15,  25 }
-      }, { /* Coeff Band 2 */
-        {  71, 172, 217 },
-        {  44, 141, 209 },
-        {  15, 102, 173 },
-        {   6,  76, 133 },
-        {   2,  51,  89 },
-        {   1,  24,  42 }
-      }, { /* Coeff Band 3 */
-        {  64, 185, 231 },
-        {  31, 148, 216 },
-        {   8, 103, 175 },
-        {   3,  74, 131 },
-        {   1,  46,  81 },
-        {   1,  18,  30 }
-      }, { /* Coeff Band 4 */
-        {  65, 196, 235 },
-        {  25, 157, 221 },
-        {   5, 105, 174 },
-        {   1,  67, 120 },
-        {   1,  38,  69 },
-        {   1,  15,  30 }
-      }, { /* Coeff Band 5 */
-        {  65, 204, 238 },
-        {  30, 156, 224 },
-        {   7, 107, 177 },
-        {   2,  70, 124 },
-        {   1,  42,  73 },
-        {   1,  18,  34 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 225,  86, 251 },
-        { 144, 104, 235 },
-        {  42,  99, 181 }
-      }, { /* Coeff Band 1 */
-        {  85, 175, 239 },
-        { 112, 165, 229 },
-        {  29, 136, 200 },
-        {  12, 103, 162 },
-        {   6,  77, 123 },
-        {   2,  53,  84 }
-      }, { /* Coeff Band 2 */
-        {  75, 183, 239 },
-        {  30, 155, 221 },
-        {   3, 106, 171 },
-        {   1,  74, 128 },
-        {   1,  44,  76 },
-        {   1,  17,  28 }
-      }, { /* Coeff Band 3 */
-        {  73, 185, 240 },
-        {  27, 159, 222 },
-        {   2, 107, 172 },
-        {   1,  75, 127 },
-        {   1,  42,  73 },
-        {   1,  17,  29 }
-      }, { /* Coeff Band 4 */
-        {  62, 190, 238 },
-        {  21, 159, 222 },
-        {   2, 107, 172 },
-        {   1,  72, 122 },
-        {   1,  40,  71 },
-        {   1,  18,  32 }
-      }, { /* Coeff Band 5 */
-        {  61, 199, 240 },
-        {  27, 161, 226 },
-        {   4, 113, 180 },
-        {   1,  76, 129 },
-        {   1,  46,  80 },
-        {   1,  23,  41 }
-      }
-    }
-  }
-};
-static const vp9_coeff_probs_model default_coef_probs_16x16[BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        {   7,  27, 153 },
-        {   5,  30,  95 },
-        {   1,  16,  30 }
-      }, { /* Coeff Band 1 */
-        {  50,  75, 127 },
-        {  57,  75, 124 },
-        {  27,  67, 108 },
-        {  10,  54,  86 },
-        {   1,  33,  52 },
-        {   1,  12,  18 }
-      }, { /* Coeff Band 2 */
-        {  43, 125, 151 },
-        {  26, 108, 148 },
-        {   7,  83, 122 },
-        {   2,  59,  89 },
-        {   1,  38,  60 },
-        {   1,  17,  27 }
-      }, { /* Coeff Band 3 */
-        {  23, 144, 163 },
-        {  13, 112, 154 },
-        {   2,  75, 117 },
-        {   1,  50,  81 },
-        {   1,  31,  51 },
-        {   1,  14,  23 }
-      }, { /* Coeff Band 4 */
-        {  18, 162, 185 },
-        {   6, 123, 171 },
-        {   1,  78, 125 },
-        {   1,  51,  86 },
-        {   1,  31,  54 },
-        {   1,  14,  23 }
-      }, { /* Coeff Band 5 */
-        {  15, 199, 227 },
-        {   3, 150, 204 },
-        {   1,  91, 146 },
-        {   1,  55,  95 },
-        {   1,  30,  53 },
-        {   1,  11,  20 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        {  19,  55, 240 },
-        {  19,  59, 196 },
-        {   3,  52, 105 }
-      }, { /* Coeff Band 1 */
-        {  41, 166, 207 },
-        { 104, 153, 199 },
-        {  31, 123, 181 },
-        {  14, 101, 152 },
-        {   5,  72, 106 },
-        {   1,  36,  52 }
-      }, { /* Coeff Band 2 */
-        {  35, 176, 211 },
-        {  12, 131, 190 },
-        {   2,  88, 144 },
-        {   1,  60, 101 },
-        {   1,  36,  60 },
-        {   1,  16,  28 }
-      }, { /* Coeff Band 3 */
-        {  28, 183, 213 },
-        {   8, 134, 191 },
-        {   1,  86, 142 },
-        {   1,  56,  96 },
-        {   1,  30,  53 },
-        {   1,  12,  20 }
-      }, { /* Coeff Band 4 */
-        {  20, 190, 215 },
-        {   4, 135, 192 },
-        {   1,  84, 139 },
-        {   1,  53,  91 },
-        {   1,  28,  49 },
-        {   1,  11,  20 }
-      }, { /* Coeff Band 5 */
-        {  13, 196, 216 },
-        {   2, 137, 192 },
-        {   1,  86, 143 },
-        {   1,  57,  99 },
-        {   1,  32,  56 },
-        {   1,  13,  24 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 211,  29, 217 },
-        {  96,  47, 156 },
-        {  22,  43,  87 }
-      }, { /* Coeff Band 1 */
-        {  78, 120, 193 },
-        { 111, 116, 186 },
-        {  46, 102, 164 },
-        {  15,  80, 128 },
-        {   2,  49,  76 },
-        {   1,  18,  28 }
-      }, { /* Coeff Band 2 */
-        {  71, 161, 203 },
-        {  42, 132, 192 },
-        {  10,  98, 150 },
-        {   3,  69, 109 },
-        {   1,  44,  70 },
-        {   1,  18,  29 }
-      }, { /* Coeff Band 3 */
-        {  57, 186, 211 },
-        {  30, 140, 196 },
-        {   4,  93, 146 },
-        {   1,  62, 102 },
-        {   1,  38,  65 },
-        {   1,  16,  27 }
-      }, { /* Coeff Band 4 */
-        {  47, 199, 217 },
-        {  14, 145, 196 },
-        {   1,  88, 142 },
-        {   1,  57,  98 },
-        {   1,  36,  62 },
-        {   1,  15,  26 }
-      }, { /* Coeff Band 5 */
-        {  26, 219, 229 },
-        {   5, 155, 207 },
-        {   1,  94, 151 },
-        {   1,  60, 104 },
-        {   1,  36,  62 },
-        {   1,  16,  28 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 233,  29, 248 },
-        { 146,  47, 220 },
-        {  43,  52, 140 }
-      }, { /* Coeff Band 1 */
-        { 100, 163, 232 },
-        { 179, 161, 222 },
-        {  63, 142, 204 },
-        {  37, 113, 174 },
-        {  26,  89, 137 },
-        {  18,  68,  97 }
-      }, { /* Coeff Band 2 */
-        {  85, 181, 230 },
-        {  32, 146, 209 },
-        {   7, 100, 164 },
-        {   3,  71, 121 },
-        {   1,  45,  77 },
-        {   1,  18,  30 }
-      }, { /* Coeff Band 3 */
-        {  65, 187, 230 },
-        {  20, 148, 207 },
-        {   2,  97, 159 },
-        {   1,  68, 116 },
-        {   1,  40,  70 },
-        {   1,  14,  29 }
-      }, { /* Coeff Band 4 */
-        {  40, 194, 227 },
-        {   8, 147, 204 },
-        {   1,  94, 155 },
-        {   1,  65, 112 },
-        {   1,  39,  66 },
-        {   1,  14,  26 }
-      }, { /* Coeff Band 5 */
-        {  16, 208, 228 },
-        {   3, 151, 207 },
-        {   1,  98, 160 },
-        {   1,  67, 117 },
-        {   1,  41,  74 },
-        {   1,  17,  31 }
-      }
-    }
-  }
-};
-static const vp9_coeff_probs_model default_coef_probs_32x32[BLOCK_TYPES] = {
-  { /* block Type 0 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        {  17,  38, 140 },
-        {   7,  34,  80 },
-        {   1,  17,  29 }
-      }, { /* Coeff Band 1 */
-        {  37,  75, 128 },
-        {  41,  76, 128 },
-        {  26,  66, 116 },
-        {  12,  52,  94 },
-        {   2,  32,  55 },
-        {   1,  10,  16 }
-      }, { /* Coeff Band 2 */
-        {  50, 127, 154 },
-        {  37, 109, 152 },
-        {  16,  82, 121 },
-        {   5,  59,  85 },
-        {   1,  35,  54 },
-        {   1,  13,  20 }
-      }, { /* Coeff Band 3 */
-        {  40, 142, 167 },
-        {  17, 110, 157 },
-        {   2,  71, 112 },
-        {   1,  44,  72 },
-        {   1,  27,  45 },
-        {   1,  11,  17 }
-      }, { /* Coeff Band 4 */
-        {  30, 175, 188 },
-        {   9, 124, 169 },
-        {   1,  74, 116 },
-        {   1,  48,  78 },
-        {   1,  30,  49 },
-        {   1,  11,  18 }
-      }, { /* Coeff Band 5 */
-        {  10, 222, 223 },
-        {   2, 150, 194 },
-        {   1,  83, 128 },
-        {   1,  48,  79 },
-        {   1,  27,  45 },
-        {   1,  11,  17 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        {  36,  41, 235 },
-        {  29,  36, 193 },
-        {  10,  27, 111 }
-      }, { /* Coeff Band 1 */
-        {  85, 165, 222 },
-        { 177, 162, 215 },
-        { 110, 135, 195 },
-        {  57, 113, 168 },
-        {  23,  83, 120 },
-        {  10,  49,  61 }
-      }, { /* Coeff Band 2 */
-        {  85, 190, 223 },
-        {  36, 139, 200 },
-        {   5,  90, 146 },
-        {   1,  60, 103 },
-        {   1,  38,  65 },
-        {   1,  18,  30 }
-      }, { /* Coeff Band 3 */
-        {  72, 202, 223 },
-        {  23, 141, 199 },
-        {   2,  86, 140 },
-        {   1,  56,  97 },
-        {   1,  36,  61 },
-        {   1,  16,  27 }
-      }, { /* Coeff Band 4 */
-        {  55, 218, 225 },
-        {  13, 145, 200 },
-        {   1,  86, 141 },
-        {   1,  57,  99 },
-        {   1,  35,  61 },
-        {   1,  13,  22 }
-      }, { /* Coeff Band 5 */
-        {  15, 235, 212 },
-        {   1, 132, 184 },
-        {   1,  84, 139 },
-        {   1,  57,  97 },
-        {   1,  34,  56 },
-        {   1,  14,  23 }
-      }
-    }
-  }, { /* block Type 1 */
-    { /* Intra */
-      { /* Coeff Band 0 */
-        { 181,  21, 201 },
-        {  61,  37, 123 },
-        {  10,  38,  71 }
-      }, { /* Coeff Band 1 */
-        {  47, 106, 172 },
-        {  95, 104, 173 },
-        {  42,  93, 159 },
-        {  18,  77, 131 },
-        {   4,  50,  81 },
-        {   1,  17,  23 }
-      }, { /* Coeff Band 2 */
-        {  62, 147, 199 },
-        {  44, 130, 189 },
-        {  28, 102, 154 },
-        {  18,  75, 115 },
-        {   2,  44,  65 },
-        {   1,  12,  19 }
-      }, { /* Coeff Band 3 */
-        {  55, 153, 210 },
-        {  24, 130, 194 },
-        {   3,  93, 146 },
-        {   1,  61,  97 },
-        {   1,  31,  50 },
-        {   1,  10,  16 }
-      }, { /* Coeff Band 4 */
-        {  49, 186, 223 },
-        {  17, 148, 204 },
-        {   1,  96, 142 },
-        {   1,  53,  83 },
-        {   1,  26,  44 },
-        {   1,  11,  17 }
-      }, { /* Coeff Band 5 */
-        {  13, 217, 212 },
-        {   2, 136, 180 },
-        {   1,  78, 124 },
-        {   1,  50,  83 },
-        {   1,  29,  49 },
-        {   1,  14,  23 }
-      }
-    }, { /* Inter */
-      { /* Coeff Band 0 */
-        { 197,  13, 247 },
-        {  82,  17, 222 },
-        {  25,  17, 162 }
-      }, { /* Coeff Band 1 */
-        { 126, 186, 247 },
-        { 234, 191, 243 },
-        { 176, 177, 234 },
-        { 104, 158, 220 },
-        {  66, 128, 186 },
-        {  55,  90, 137 }
-      }, { /* Coeff Band 2 */
-        { 111, 197, 242 },
-        {  46, 158, 219 },
-        {   9, 104, 171 },
-        {   2,  65, 125 },
-        {   1,  44,  80 },
-        {   1,  17,  91 }
-      }, { /* Coeff Band 3 */
-        { 104, 208, 245 },
-        {  39, 168, 224 },
-        {   3, 109, 162 },
-        {   1,  79, 124 },
-        {   1,  50, 102 },
-        {   1,  43, 102 }
-      }, { /* Coeff Band 4 */
-        {  84, 220, 246 },
-        {  31, 177, 231 },
-        {   2, 115, 180 },
-        {   1,  79, 134 },
-        {   1,  55,  77 },
-        {   1,  60,  79 }
-      }, { /* Coeff Band 5 */
-        {  43, 243, 240 },
-        {   8, 180, 217 },
-        {   1, 115, 166 },
-        {   1,  84, 121 },
-        {   1,  51,  67 },
-        {   1,  16,   6 }
-      }
-    }
-  }
-};
-
-#endif  // VP9_COMMON_DEFAULT_COEF_PROBS_H_
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index b35c43f..adab33f 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -35,8 +35,7 @@
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
-DECLARE_ALIGNED(16, const uint8_t,
-                vp9_coefband_trans_8x8plus[1024]) = {
+DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_8x8plus[1024]) = {
   0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
   4, 4, 4, 4, 4, 5,
   // beyond MAXBAND_INDEX+1 all values are filled as 5
@@ -109,13 +108,13 @@
   0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
 };
 
-DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[MAX_ENTROPY_TOKENS]) = {
+DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]) = {
   0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5
 };
 
-const vp9_tree_index vp9_coefmodel_tree[6] = {
-  -DCT_EOB_MODEL_TOKEN, 2,                      /* 0 = EOB */
-  -ZERO_TOKEN, 4,                               /* 1 = ZERO */
+const vp9_tree_index vp9_coefmodel_tree[TREE_SIZE(UNCONSTRAINED_NODES + 1)] = {
+  -EOB_MODEL_TOKEN, 2,
+  -ZERO_TOKEN, 4,
   -ONE_TOKEN, -TWO_TOKEN,
 };
 
@@ -131,265 +130,601 @@
 
 // Every odd line in this table can be generated from the even lines
 // by averaging :
-// vp9_pareto8_full[l][node] = ( vp9_pareto8_full[l-1][node] +
-//                               vp9_pareto8_full[l+1][node] ) >> 1;
-const vp9_prob vp9_pareto8_full[256][MODEL_NODES] = {
-    {  3, 86, 128,  6, 86, 23, 88, 29},
-    {  6, 86, 128, 11, 87, 42, 91, 52},
-    {  9, 86, 129, 17, 88, 61, 94, 76},
-    { 12, 86, 129, 22, 88, 77, 97, 93},
-    { 15, 87, 129, 28, 89, 93, 100, 110},
-    { 17, 87, 129, 33, 90, 105, 103, 123},
-    { 20, 88, 130, 38, 91, 118, 106, 136},
-    { 23, 88, 130, 43, 91, 128, 108, 146},
-    { 26, 89, 131, 48, 92, 139, 111, 156},
-    { 28, 89, 131, 53, 93, 147, 114, 163},
-    { 31, 90, 131, 58, 94, 156, 117, 171},
-    { 34, 90, 131, 62, 94, 163, 119, 177},
-    { 37, 90, 132, 66, 95, 171, 122, 184},
-    { 39, 90, 132, 70, 96, 177, 124, 189},
-    { 42, 91, 132, 75, 97, 183, 127, 194},
-    { 44, 91, 132, 79, 97, 188, 129, 198},
-    { 47, 92, 133, 83, 98, 193, 132, 202},
-    { 49, 92, 133, 86, 99, 197, 134, 205},
-    { 52, 93, 133, 90, 100, 201, 137, 208},
-    { 54, 93, 133, 94, 100, 204, 139, 211},
-    { 57, 94, 134, 98, 101, 208, 142, 214},
-    { 59, 94, 134, 101, 102, 211, 144, 216},
-    { 62, 94, 135, 105, 103, 214, 146, 218},
-    { 64, 94, 135, 108, 103, 216, 148, 220},
-    { 66, 95, 135, 111, 104, 219, 151, 222},
-    { 68, 95, 135, 114, 105, 221, 153, 223},
-    { 71, 96, 136, 117, 106, 224, 155, 225},
-    { 73, 96, 136, 120, 106, 225, 157, 226},
-    { 76, 97, 136, 123, 107, 227, 159, 228},
-    { 78, 97, 136, 126, 108, 229, 160, 229},
-    { 80, 98, 137, 129, 109, 231, 162, 231},
-    { 82, 98, 137, 131, 109, 232, 164, 232},
-    { 84, 98, 138, 134, 110, 234, 166, 233},
-    { 86, 98, 138, 137, 111, 235, 168, 234},
-    { 89, 99, 138, 140, 112, 236, 170, 235},
-    { 91, 99, 138, 142, 112, 237, 171, 235},
-    { 93, 100, 139, 145, 113, 238, 173, 236},
-    { 95, 100, 139, 147, 114, 239, 174, 237},
-    { 97, 101, 140, 149, 115, 240, 176, 238},
-    { 99, 101, 140, 151, 115, 241, 177, 238},
-    {101, 102, 140, 154, 116, 242, 179, 239},
-    {103, 102, 140, 156, 117, 242, 180, 239},
-    {105, 103, 141, 158, 118, 243, 182, 240},
-    {107, 103, 141, 160, 118, 243, 183, 240},
-    {109, 104, 141, 162, 119, 244, 185, 241},
-    {111, 104, 141, 164, 119, 244, 186, 241},
-    {113, 104, 142, 166, 120, 245, 187, 242},
-    {114, 104, 142, 168, 121, 245, 188, 242},
-    {116, 105, 143, 170, 122, 246, 190, 243},
-    {118, 105, 143, 171, 122, 246, 191, 243},
-    {120, 106, 143, 173, 123, 247, 192, 244},
-    {121, 106, 143, 175, 124, 247, 193, 244},
-    {123, 107, 144, 177, 125, 248, 195, 244},
-    {125, 107, 144, 178, 125, 248, 196, 244},
-    {127, 108, 145, 180, 126, 249, 197, 245},
-    {128, 108, 145, 181, 127, 249, 198, 245},
-    {130, 109, 145, 183, 128, 249, 199, 245},
-    {132, 109, 145, 184, 128, 249, 200, 245},
-    {134, 110, 146, 186, 129, 250, 201, 246},
-    {135, 110, 146, 187, 130, 250, 202, 246},
-    {137, 111, 147, 189, 131, 251, 203, 246},
-    {138, 111, 147, 190, 131, 251, 204, 246},
-    {140, 112, 147, 192, 132, 251, 205, 247},
-    {141, 112, 147, 193, 132, 251, 206, 247},
-    {143, 113, 148, 194, 133, 251, 207, 247},
-    {144, 113, 148, 195, 134, 251, 207, 247},
-    {146, 114, 149, 197, 135, 252, 208, 248},
-    {147, 114, 149, 198, 135, 252, 209, 248},
-    {149, 115, 149, 199, 136, 252, 210, 248},
-    {150, 115, 149, 200, 137, 252, 210, 248},
-    {152, 115, 150, 201, 138, 252, 211, 248},
-    {153, 115, 150, 202, 138, 252, 212, 248},
-    {155, 116, 151, 204, 139, 253, 213, 249},
-    {156, 116, 151, 205, 139, 253, 213, 249},
-    {158, 117, 151, 206, 140, 253, 214, 249},
-    {159, 117, 151, 207, 141, 253, 215, 249},
-    {161, 118, 152, 208, 142, 253, 216, 249},
-    {162, 118, 152, 209, 142, 253, 216, 249},
-    {163, 119, 153, 210, 143, 253, 217, 249},
-    {164, 119, 153, 211, 143, 253, 217, 249},
-    {166, 120, 153, 212, 144, 254, 218, 250},
-    {167, 120, 153, 212, 145, 254, 219, 250},
-    {168, 121, 154, 213, 146, 254, 220, 250},
-    {169, 121, 154, 214, 146, 254, 220, 250},
-    {171, 122, 155, 215, 147, 254, 221, 250},
-    {172, 122, 155, 216, 147, 254, 221, 250},
-    {173, 123, 155, 217, 148, 254, 222, 250},
-    {174, 123, 155, 217, 149, 254, 222, 250},
-    {176, 124, 156, 218, 150, 254, 223, 250},
-    {177, 124, 156, 219, 150, 254, 223, 250},
-    {178, 125, 157, 220, 151, 254, 224, 251},
-    {179, 125, 157, 220, 151, 254, 224, 251},
-    {180, 126, 157, 221, 152, 254, 225, 251},
-    {181, 126, 157, 221, 152, 254, 225, 251},
-    {183, 127, 158, 222, 153, 254, 226, 251},
-    {184, 127, 158, 223, 154, 254, 226, 251},
-    {185, 128, 159, 224, 155, 255, 227, 251},
-    {186, 128, 159, 224, 155, 255, 227, 251},
-    {187, 129, 160, 225, 156, 255, 228, 251},
-    {188, 130, 160, 225, 156, 255, 228, 251},
-    {189, 131, 160, 226, 157, 255, 228, 251},
-    {190, 131, 160, 226, 158, 255, 228, 251},
-    {191, 132, 161, 227, 159, 255, 229, 251},
-    {192, 132, 161, 227, 159, 255, 229, 251},
-    {193, 133, 162, 228, 160, 255, 230, 252},
-    {194, 133, 162, 229, 160, 255, 230, 252},
-    {195, 134, 163, 230, 161, 255, 231, 252},
-    {196, 134, 163, 230, 161, 255, 231, 252},
-    {197, 135, 163, 231, 162, 255, 231, 252},
-    {198, 135, 163, 231, 162, 255, 231, 252},
-    {199, 136, 164, 232, 163, 255, 232, 252},
-    {200, 136, 164, 232, 164, 255, 232, 252},
-    {201, 137, 165, 233, 165, 255, 233, 252},
-    {201, 137, 165, 233, 165, 255, 233, 252},
-    {202, 138, 166, 233, 166, 255, 233, 252},
-    {203, 138, 166, 233, 166, 255, 233, 252},
-    {204, 139, 166, 234, 167, 255, 234, 252},
-    {205, 139, 166, 234, 167, 255, 234, 252},
-    {206, 140, 167, 235, 168, 255, 235, 252},
-    {206, 140, 167, 235, 168, 255, 235, 252},
-    {207, 141, 168, 236, 169, 255, 235, 252},
-    {208, 141, 168, 236, 170, 255, 235, 252},
-    {209, 142, 169, 237, 171, 255, 236, 252},
-    {209, 143, 169, 237, 171, 255, 236, 252},
-    {210, 144, 169, 237, 172, 255, 236, 252},
-    {211, 144, 169, 237, 172, 255, 236, 252},
-    {212, 145, 170, 238, 173, 255, 237, 252},
-    {213, 145, 170, 238, 173, 255, 237, 252},
-    {214, 146, 171, 239, 174, 255, 237, 253},
-    {214, 146, 171, 239, 174, 255, 237, 253},
-    {215, 147, 172, 240, 175, 255, 238, 253},
-    {215, 147, 172, 240, 175, 255, 238, 253},
-    {216, 148, 173, 240, 176, 255, 238, 253},
-    {217, 148, 173, 240, 176, 255, 238, 253},
-    {218, 149, 173, 241, 177, 255, 239, 253},
-    {218, 149, 173, 241, 178, 255, 239, 253},
-    {219, 150, 174, 241, 179, 255, 239, 253},
-    {219, 151, 174, 241, 179, 255, 239, 253},
-    {220, 152, 175, 242, 180, 255, 240, 253},
-    {221, 152, 175, 242, 180, 255, 240, 253},
-    {222, 153, 176, 242, 181, 255, 240, 253},
-    {222, 153, 176, 242, 181, 255, 240, 253},
-    {223, 154, 177, 243, 182, 255, 240, 253},
-    {223, 154, 177, 243, 182, 255, 240, 253},
-    {224, 155, 178, 244, 183, 255, 241, 253},
-    {224, 155, 178, 244, 183, 255, 241, 253},
-    {225, 156, 178, 244, 184, 255, 241, 253},
-    {225, 157, 178, 244, 184, 255, 241, 253},
-    {226, 158, 179, 244, 185, 255, 242, 253},
-    {227, 158, 179, 244, 185, 255, 242, 253},
-    {228, 159, 180, 245, 186, 255, 242, 253},
-    {228, 159, 180, 245, 186, 255, 242, 253},
-    {229, 160, 181, 245, 187, 255, 242, 253},
-    {229, 160, 181, 245, 187, 255, 242, 253},
-    {230, 161, 182, 246, 188, 255, 243, 253},
-    {230, 162, 182, 246, 188, 255, 243, 253},
-    {231, 163, 183, 246, 189, 255, 243, 253},
-    {231, 163, 183, 246, 189, 255, 243, 253},
-    {232, 164, 184, 247, 190, 255, 243, 253},
-    {232, 164, 184, 247, 190, 255, 243, 253},
-    {233, 165, 185, 247, 191, 255, 244, 253},
-    {233, 165, 185, 247, 191, 255, 244, 253},
-    {234, 166, 185, 247, 192, 255, 244, 253},
-    {234, 167, 185, 247, 192, 255, 244, 253},
-    {235, 168, 186, 248, 193, 255, 244, 253},
-    {235, 168, 186, 248, 193, 255, 244, 253},
-    {236, 169, 187, 248, 194, 255, 244, 253},
-    {236, 169, 187, 248, 194, 255, 244, 253},
-    {236, 170, 188, 248, 195, 255, 245, 253},
-    {236, 170, 188, 248, 195, 255, 245, 253},
-    {237, 171, 189, 249, 196, 255, 245, 254},
-    {237, 172, 189, 249, 196, 255, 245, 254},
-    {238, 173, 190, 249, 197, 255, 245, 254},
-    {238, 173, 190, 249, 197, 255, 245, 254},
-    {239, 174, 191, 249, 198, 255, 245, 254},
-    {239, 174, 191, 249, 198, 255, 245, 254},
-    {240, 175, 192, 249, 199, 255, 246, 254},
-    {240, 176, 192, 249, 199, 255, 246, 254},
-    {240, 177, 193, 250, 200, 255, 246, 254},
-    {240, 177, 193, 250, 200, 255, 246, 254},
-    {241, 178, 194, 250, 201, 255, 246, 254},
-    {241, 178, 194, 250, 201, 255, 246, 254},
-    {242, 179, 195, 250, 202, 255, 246, 254},
-    {242, 180, 195, 250, 202, 255, 246, 254},
-    {242, 181, 196, 250, 203, 255, 247, 254},
-    {242, 181, 196, 250, 203, 255, 247, 254},
-    {243, 182, 197, 251, 204, 255, 247, 254},
-    {243, 183, 197, 251, 204, 255, 247, 254},
-    {244, 184, 198, 251, 205, 255, 247, 254},
-    {244, 184, 198, 251, 205, 255, 247, 254},
-    {244, 185, 199, 251, 206, 255, 247, 254},
-    {244, 185, 199, 251, 206, 255, 247, 254},
-    {245, 186, 200, 251, 207, 255, 247, 254},
-    {245, 187, 200, 251, 207, 255, 247, 254},
-    {246, 188, 201, 252, 207, 255, 248, 254},
-    {246, 188, 201, 252, 207, 255, 248, 254},
-    {246, 189, 202, 252, 208, 255, 248, 254},
-    {246, 190, 202, 252, 208, 255, 248, 254},
-    {247, 191, 203, 252, 209, 255, 248, 254},
-    {247, 191, 203, 252, 209, 255, 248, 254},
-    {247, 192, 204, 252, 210, 255, 248, 254},
-    {247, 193, 204, 252, 210, 255, 248, 254},
-    {248, 194, 205, 252, 211, 255, 248, 254},
-    {248, 194, 205, 252, 211, 255, 248, 254},
-    {248, 195, 206, 252, 212, 255, 249, 254},
-    {248, 196, 206, 252, 212, 255, 249, 254},
-    {249, 197, 207, 253, 213, 255, 249, 254},
-    {249, 197, 207, 253, 213, 255, 249, 254},
-    {249, 198, 208, 253, 214, 255, 249, 254},
-    {249, 199, 209, 253, 214, 255, 249, 254},
-    {250, 200, 210, 253, 215, 255, 249, 254},
-    {250, 200, 210, 253, 215, 255, 249, 254},
-    {250, 201, 211, 253, 215, 255, 249, 254},
-    {250, 202, 211, 253, 215, 255, 249, 254},
-    {250, 203, 212, 253, 216, 255, 249, 254},
-    {250, 203, 212, 253, 216, 255, 249, 254},
-    {251, 204, 213, 253, 217, 255, 250, 254},
-    {251, 205, 213, 253, 217, 255, 250, 254},
-    {251, 206, 214, 254, 218, 255, 250, 254},
-    {251, 206, 215, 254, 218, 255, 250, 254},
-    {252, 207, 216, 254, 219, 255, 250, 254},
-    {252, 208, 216, 254, 219, 255, 250, 254},
-    {252, 209, 217, 254, 220, 255, 250, 254},
-    {252, 210, 217, 254, 220, 255, 250, 254},
-    {252, 211, 218, 254, 221, 255, 250, 254},
-    {252, 212, 218, 254, 221, 255, 250, 254},
-    {253, 213, 219, 254, 222, 255, 250, 254},
-    {253, 213, 220, 254, 222, 255, 250, 254},
-    {253, 214, 221, 254, 223, 255, 250, 254},
-    {253, 215, 221, 254, 223, 255, 250, 254},
-    {253, 216, 222, 254, 224, 255, 251, 254},
-    {253, 217, 223, 254, 224, 255, 251, 254},
-    {253, 218, 224, 254, 225, 255, 251, 254},
-    {253, 219, 224, 254, 225, 255, 251, 254},
-    {254, 220, 225, 254, 225, 255, 251, 254},
-    {254, 221, 226, 254, 225, 255, 251, 254},
-    {254, 222, 227, 255, 226, 255, 251, 254},
-    {254, 223, 227, 255, 226, 255, 251, 254},
-    {254, 224, 228, 255, 227, 255, 251, 254},
-    {254, 225, 229, 255, 227, 255, 251, 254},
-    {254, 226, 230, 255, 228, 255, 251, 254},
-    {254, 227, 230, 255, 229, 255, 251, 254},
-    {255, 228, 231, 255, 230, 255, 251, 254},
-    {255, 229, 232, 255, 230, 255, 251, 254},
-    {255, 230, 233, 255, 231, 255, 252, 254},
-    {255, 231, 234, 255, 231, 255, 252, 254},
-    {255, 232, 235, 255, 232, 255, 252, 254},
-    {255, 233, 236, 255, 232, 255, 252, 254},
-    {255, 235, 237, 255, 233, 255, 252, 254},
-    {255, 236, 238, 255, 234, 255, 252, 254},
-    {255, 238, 240, 255, 235, 255, 252, 255},
-    {255, 239, 241, 255, 235, 255, 252, 254},
-    {255, 241, 243, 255, 236, 255, 252, 254},
-    {255, 243, 245, 255, 237, 255, 252, 254},
-    {255, 246, 247, 255, 239, 255, 253, 255},
-    {255, 246, 247, 255, 239, 255, 253, 255},
+// vp9_pareto8_full[l][node] = (vp9_pareto8_full[l-1][node] +
+//                              vp9_pareto8_full[l+1][node] ) >> 1;
+const vp9_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = {
+  {  3,  86, 128,   6,  86,  23,  88,  29},
+  {  6,  86, 128,  11,  87,  42,  91,  52},
+  {  9,  86, 129,  17,  88,  61,  94,  76},
+  { 12,  86, 129,  22,  88,  77,  97,  93},
+  { 15,  87, 129,  28,  89,  93, 100, 110},
+  { 17,  87, 129,  33,  90, 105, 103, 123},
+  { 20,  88, 130,  38,  91, 118, 106, 136},
+  { 23,  88, 130,  43,  91, 128, 108, 146},
+  { 26,  89, 131,  48,  92, 139, 111, 156},
+  { 28,  89, 131,  53,  93, 147, 114, 163},
+  { 31,  90, 131,  58,  94, 156, 117, 171},
+  { 34,  90, 131,  62,  94, 163, 119, 177},
+  { 37,  90, 132,  66,  95, 171, 122, 184},
+  { 39,  90, 132,  70,  96, 177, 124, 189},
+  { 42,  91, 132,  75,  97, 183, 127, 194},
+  { 44,  91, 132,  79,  97, 188, 129, 198},
+  { 47,  92, 133,  83,  98, 193, 132, 202},
+  { 49,  92, 133,  86,  99, 197, 134, 205},
+  { 52,  93, 133,  90, 100, 201, 137, 208},
+  { 54,  93, 133,  94, 100, 204, 139, 211},
+  { 57,  94, 134,  98, 101, 208, 142, 214},
+  { 59,  94, 134, 101, 102, 211, 144, 216},
+  { 62,  94, 135, 105, 103, 214, 146, 218},
+  { 64,  94, 135, 108, 103, 216, 148, 220},
+  { 66,  95, 135, 111, 104, 219, 151, 222},
+  { 68,  95, 135, 114, 105, 221, 153, 223},
+  { 71,  96, 136, 117, 106, 224, 155, 225},
+  { 73,  96, 136, 120, 106, 225, 157, 226},
+  { 76,  97, 136, 123, 107, 227, 159, 228},
+  { 78,  97, 136, 126, 108, 229, 160, 229},
+  { 80,  98, 137, 129, 109, 231, 162, 231},
+  { 82,  98, 137, 131, 109, 232, 164, 232},
+  { 84,  98, 138, 134, 110, 234, 166, 233},
+  { 86,  98, 138, 137, 111, 235, 168, 234},
+  { 89,  99, 138, 140, 112, 236, 170, 235},
+  { 91,  99, 138, 142, 112, 237, 171, 235},
+  { 93, 100, 139, 145, 113, 238, 173, 236},
+  { 95, 100, 139, 147, 114, 239, 174, 237},
+  { 97, 101, 140, 149, 115, 240, 176, 238},
+  { 99, 101, 140, 151, 115, 241, 177, 238},
+  {101, 102, 140, 154, 116, 242, 179, 239},
+  {103, 102, 140, 156, 117, 242, 180, 239},
+  {105, 103, 141, 158, 118, 243, 182, 240},
+  {107, 103, 141, 160, 118, 243, 183, 240},
+  {109, 104, 141, 162, 119, 244, 185, 241},
+  {111, 104, 141, 164, 119, 244, 186, 241},
+  {113, 104, 142, 166, 120, 245, 187, 242},
+  {114, 104, 142, 168, 121, 245, 188, 242},
+  {116, 105, 143, 170, 122, 246, 190, 243},
+  {118, 105, 143, 171, 122, 246, 191, 243},
+  {120, 106, 143, 173, 123, 247, 192, 244},
+  {121, 106, 143, 175, 124, 247, 193, 244},
+  {123, 107, 144, 177, 125, 248, 195, 244},
+  {125, 107, 144, 178, 125, 248, 196, 244},
+  {127, 108, 145, 180, 126, 249, 197, 245},
+  {128, 108, 145, 181, 127, 249, 198, 245},
+  {130, 109, 145, 183, 128, 249, 199, 245},
+  {132, 109, 145, 184, 128, 249, 200, 245},
+  {134, 110, 146, 186, 129, 250, 201, 246},
+  {135, 110, 146, 187, 130, 250, 202, 246},
+  {137, 111, 147, 189, 131, 251, 203, 246},
+  {138, 111, 147, 190, 131, 251, 204, 246},
+  {140, 112, 147, 192, 132, 251, 205, 247},
+  {141, 112, 147, 193, 132, 251, 206, 247},
+  {143, 113, 148, 194, 133, 251, 207, 247},
+  {144, 113, 148, 195, 134, 251, 207, 247},
+  {146, 114, 149, 197, 135, 252, 208, 248},
+  {147, 114, 149, 198, 135, 252, 209, 248},
+  {149, 115, 149, 199, 136, 252, 210, 248},
+  {150, 115, 149, 200, 137, 252, 210, 248},
+  {152, 115, 150, 201, 138, 252, 211, 248},
+  {153, 115, 150, 202, 138, 252, 212, 248},
+  {155, 116, 151, 204, 139, 253, 213, 249},
+  {156, 116, 151, 205, 139, 253, 213, 249},
+  {158, 117, 151, 206, 140, 253, 214, 249},
+  {159, 117, 151, 207, 141, 253, 215, 249},
+  {161, 118, 152, 208, 142, 253, 216, 249},
+  {162, 118, 152, 209, 142, 253, 216, 249},
+  {163, 119, 153, 210, 143, 253, 217, 249},
+  {164, 119, 153, 211, 143, 253, 217, 249},
+  {166, 120, 153, 212, 144, 254, 218, 250},
+  {167, 120, 153, 212, 145, 254, 219, 250},
+  {168, 121, 154, 213, 146, 254, 220, 250},
+  {169, 121, 154, 214, 146, 254, 220, 250},
+  {171, 122, 155, 215, 147, 254, 221, 250},
+  {172, 122, 155, 216, 147, 254, 221, 250},
+  {173, 123, 155, 217, 148, 254, 222, 250},
+  {174, 123, 155, 217, 149, 254, 222, 250},
+  {176, 124, 156, 218, 150, 254, 223, 250},
+  {177, 124, 156, 219, 150, 254, 223, 250},
+  {178, 125, 157, 220, 151, 254, 224, 251},
+  {179, 125, 157, 220, 151, 254, 224, 251},
+  {180, 126, 157, 221, 152, 254, 225, 251},
+  {181, 126, 157, 221, 152, 254, 225, 251},
+  {183, 127, 158, 222, 153, 254, 226, 251},
+  {184, 127, 158, 223, 154, 254, 226, 251},
+  {185, 128, 159, 224, 155, 255, 227, 251},
+  {186, 128, 159, 224, 155, 255, 227, 251},
+  {187, 129, 160, 225, 156, 255, 228, 251},
+  {188, 130, 160, 225, 156, 255, 228, 251},
+  {189, 131, 160, 226, 157, 255, 228, 251},
+  {190, 131, 160, 226, 158, 255, 228, 251},
+  {191, 132, 161, 227, 159, 255, 229, 251},
+  {192, 132, 161, 227, 159, 255, 229, 251},
+  {193, 133, 162, 228, 160, 255, 230, 252},
+  {194, 133, 162, 229, 160, 255, 230, 252},
+  {195, 134, 163, 230, 161, 255, 231, 252},
+  {196, 134, 163, 230, 161, 255, 231, 252},
+  {197, 135, 163, 231, 162, 255, 231, 252},
+  {198, 135, 163, 231, 162, 255, 231, 252},
+  {199, 136, 164, 232, 163, 255, 232, 252},
+  {200, 136, 164, 232, 164, 255, 232, 252},
+  {201, 137, 165, 233, 165, 255, 233, 252},
+  {201, 137, 165, 233, 165, 255, 233, 252},
+  {202, 138, 166, 233, 166, 255, 233, 252},
+  {203, 138, 166, 233, 166, 255, 233, 252},
+  {204, 139, 166, 234, 167, 255, 234, 252},
+  {205, 139, 166, 234, 167, 255, 234, 252},
+  {206, 140, 167, 235, 168, 255, 235, 252},
+  {206, 140, 167, 235, 168, 255, 235, 252},
+  {207, 141, 168, 236, 169, 255, 235, 252},
+  {208, 141, 168, 236, 170, 255, 235, 252},
+  {209, 142, 169, 237, 171, 255, 236, 252},
+  {209, 143, 169, 237, 171, 255, 236, 252},
+  {210, 144, 169, 237, 172, 255, 236, 252},
+  {211, 144, 169, 237, 172, 255, 236, 252},
+  {212, 145, 170, 238, 173, 255, 237, 252},
+  {213, 145, 170, 238, 173, 255, 237, 252},
+  {214, 146, 171, 239, 174, 255, 237, 253},
+  {214, 146, 171, 239, 174, 255, 237, 253},
+  {215, 147, 172, 240, 175, 255, 238, 253},
+  {215, 147, 172, 240, 175, 255, 238, 253},
+  {216, 148, 173, 240, 176, 255, 238, 253},
+  {217, 148, 173, 240, 176, 255, 238, 253},
+  {218, 149, 173, 241, 177, 255, 239, 253},
+  {218, 149, 173, 241, 178, 255, 239, 253},
+  {219, 150, 174, 241, 179, 255, 239, 253},
+  {219, 151, 174, 241, 179, 255, 239, 253},
+  {220, 152, 175, 242, 180, 255, 240, 253},
+  {221, 152, 175, 242, 180, 255, 240, 253},
+  {222, 153, 176, 242, 181, 255, 240, 253},
+  {222, 153, 176, 242, 181, 255, 240, 253},
+  {223, 154, 177, 243, 182, 255, 240, 253},
+  {223, 154, 177, 243, 182, 255, 240, 253},
+  {224, 155, 178, 244, 183, 255, 241, 253},
+  {224, 155, 178, 244, 183, 255, 241, 253},
+  {225, 156, 178, 244, 184, 255, 241, 253},
+  {225, 157, 178, 244, 184, 255, 241, 253},
+  {226, 158, 179, 244, 185, 255, 242, 253},
+  {227, 158, 179, 244, 185, 255, 242, 253},
+  {228, 159, 180, 245, 186, 255, 242, 253},
+  {228, 159, 180, 245, 186, 255, 242, 253},
+  {229, 160, 181, 245, 187, 255, 242, 253},
+  {229, 160, 181, 245, 187, 255, 242, 253},
+  {230, 161, 182, 246, 188, 255, 243, 253},
+  {230, 162, 182, 246, 188, 255, 243, 253},
+  {231, 163, 183, 246, 189, 255, 243, 253},
+  {231, 163, 183, 246, 189, 255, 243, 253},
+  {232, 164, 184, 247, 190, 255, 243, 253},
+  {232, 164, 184, 247, 190, 255, 243, 253},
+  {233, 165, 185, 247, 191, 255, 244, 253},
+  {233, 165, 185, 247, 191, 255, 244, 253},
+  {234, 166, 185, 247, 192, 255, 244, 253},
+  {234, 167, 185, 247, 192, 255, 244, 253},
+  {235, 168, 186, 248, 193, 255, 244, 253},
+  {235, 168, 186, 248, 193, 255, 244, 253},
+  {236, 169, 187, 248, 194, 255, 244, 253},
+  {236, 169, 187, 248, 194, 255, 244, 253},
+  {236, 170, 188, 248, 195, 255, 245, 253},
+  {236, 170, 188, 248, 195, 255, 245, 253},
+  {237, 171, 189, 249, 196, 255, 245, 254},
+  {237, 172, 189, 249, 196, 255, 245, 254},
+  {238, 173, 190, 249, 197, 255, 245, 254},
+  {238, 173, 190, 249, 197, 255, 245, 254},
+  {239, 174, 191, 249, 198, 255, 245, 254},
+  {239, 174, 191, 249, 198, 255, 245, 254},
+  {240, 175, 192, 249, 199, 255, 246, 254},
+  {240, 176, 192, 249, 199, 255, 246, 254},
+  {240, 177, 193, 250, 200, 255, 246, 254},
+  {240, 177, 193, 250, 200, 255, 246, 254},
+  {241, 178, 194, 250, 201, 255, 246, 254},
+  {241, 178, 194, 250, 201, 255, 246, 254},
+  {242, 179, 195, 250, 202, 255, 246, 254},
+  {242, 180, 195, 250, 202, 255, 246, 254},
+  {242, 181, 196, 250, 203, 255, 247, 254},
+  {242, 181, 196, 250, 203, 255, 247, 254},
+  {243, 182, 197, 251, 204, 255, 247, 254},
+  {243, 183, 197, 251, 204, 255, 247, 254},
+  {244, 184, 198, 251, 205, 255, 247, 254},
+  {244, 184, 198, 251, 205, 255, 247, 254},
+  {244, 185, 199, 251, 206, 255, 247, 254},
+  {244, 185, 199, 251, 206, 255, 247, 254},
+  {245, 186, 200, 251, 207, 255, 247, 254},
+  {245, 187, 200, 251, 207, 255, 247, 254},
+  {246, 188, 201, 252, 207, 255, 248, 254},
+  {246, 188, 201, 252, 207, 255, 248, 254},
+  {246, 189, 202, 252, 208, 255, 248, 254},
+  {246, 190, 202, 252, 208, 255, 248, 254},
+  {247, 191, 203, 252, 209, 255, 248, 254},
+  {247, 191, 203, 252, 209, 255, 248, 254},
+  {247, 192, 204, 252, 210, 255, 248, 254},
+  {247, 193, 204, 252, 210, 255, 248, 254},
+  {248, 194, 205, 252, 211, 255, 248, 254},
+  {248, 194, 205, 252, 211, 255, 248, 254},
+  {248, 195, 206, 252, 212, 255, 249, 254},
+  {248, 196, 206, 252, 212, 255, 249, 254},
+  {249, 197, 207, 253, 213, 255, 249, 254},
+  {249, 197, 207, 253, 213, 255, 249, 254},
+  {249, 198, 208, 253, 214, 255, 249, 254},
+  {249, 199, 209, 253, 214, 255, 249, 254},
+  {250, 200, 210, 253, 215, 255, 249, 254},
+  {250, 200, 210, 253, 215, 255, 249, 254},
+  {250, 201, 211, 253, 215, 255, 249, 254},
+  {250, 202, 211, 253, 215, 255, 249, 254},
+  {250, 203, 212, 253, 216, 255, 249, 254},
+  {250, 203, 212, 253, 216, 255, 249, 254},
+  {251, 204, 213, 253, 217, 255, 250, 254},
+  {251, 205, 213, 253, 217, 255, 250, 254},
+  {251, 206, 214, 254, 218, 255, 250, 254},
+  {251, 206, 215, 254, 218, 255, 250, 254},
+  {252, 207, 216, 254, 219, 255, 250, 254},
+  {252, 208, 216, 254, 219, 255, 250, 254},
+  {252, 209, 217, 254, 220, 255, 250, 254},
+  {252, 210, 217, 254, 220, 255, 250, 254},
+  {252, 211, 218, 254, 221, 255, 250, 254},
+  {252, 212, 218, 254, 221, 255, 250, 254},
+  {253, 213, 219, 254, 222, 255, 250, 254},
+  {253, 213, 220, 254, 222, 255, 250, 254},
+  {253, 214, 221, 254, 223, 255, 250, 254},
+  {253, 215, 221, 254, 223, 255, 250, 254},
+  {253, 216, 222, 254, 224, 255, 251, 254},
+  {253, 217, 223, 254, 224, 255, 251, 254},
+  {253, 218, 224, 254, 225, 255, 251, 254},
+  {253, 219, 224, 254, 225, 255, 251, 254},
+  {254, 220, 225, 254, 225, 255, 251, 254},
+  {254, 221, 226, 254, 225, 255, 251, 254},
+  {254, 222, 227, 255, 226, 255, 251, 254},
+  {254, 223, 227, 255, 226, 255, 251, 254},
+  {254, 224, 228, 255, 227, 255, 251, 254},
+  {254, 225, 229, 255, 227, 255, 251, 254},
+  {254, 226, 230, 255, 228, 255, 251, 254},
+  {254, 227, 230, 255, 229, 255, 251, 254},
+  {255, 228, 231, 255, 230, 255, 251, 254},
+  {255, 229, 232, 255, 230, 255, 251, 254},
+  {255, 230, 233, 255, 231, 255, 252, 254},
+  {255, 231, 234, 255, 231, 255, 252, 254},
+  {255, 232, 235, 255, 232, 255, 252, 254},
+  {255, 233, 236, 255, 232, 255, 252, 254},
+  {255, 235, 237, 255, 233, 255, 252, 254},
+  {255, 236, 238, 255, 234, 255, 252, 254},
+  {255, 238, 240, 255, 235, 255, 252, 255},
+  {255, 239, 241, 255, 235, 255, 252, 254},
+  {255, 241, 243, 255, 236, 255, 252, 254},
+  {255, 243, 245, 255, 237, 255, 252, 254},
+  {255, 246, 247, 255, 239, 255, 253, 255},
+  {255, 246, 247, 255, 239, 255, 253, 255},
+};
+
+static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
+  {  // Y plane
+    {  // Intra
+      {  // Band 0
+        { 195,  29, 183 }, {  84,  49, 136 }, {   8,  42,  71 }
+      }, {  // Band 1
+        {  31, 107, 169 }, {  35,  99, 159 }, {  17,  82, 140 },
+        {   8,  66, 114 }, {   2,  44,  76 }, {   1,  19,  32 }
+      }, {  // Band 2
+        {  40, 132, 201 }, {  29, 114, 187 }, {  13,  91, 157 },
+        {   7,  75, 127 }, {   3,  58,  95 }, {   1,  28,  47 }
+      }, {  // Band 3
+        {  69, 142, 221 }, {  42, 122, 201 }, {  15,  91, 159 },
+        {   6,  67, 121 }, {   1,  42,  77 }, {   1,  17,  31 }
+      }, {  // Band 4
+        { 102, 148, 228 }, {  67, 117, 204 }, {  17,  82, 154 },
+        {   6,  59, 114 }, {   2,  39,  75 }, {   1,  15,  29 }
+      }, {  // Band 5
+        { 156,  57, 233 }, { 119,  57, 212 }, {  58,  48, 163 },
+        {  29,  40, 124 }, {  12,  30,  81 }, {   3,  12,  31 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 191, 107, 226 }, { 124, 117, 204 }, {  25,  99, 155 }
+      }, {  // Band 1
+        {  29, 148, 210 }, {  37, 126, 194 }, {   8,  93, 157 },
+        {   2,  68, 118 }, {   1,  39,  69 }, {   1,  17,  33 }
+      }, {  // Band 2
+        {  41, 151, 213 }, {  27, 123, 193 }, {   3,  82, 144 },
+        {   1,  58, 105 }, {   1,  32,  60 }, {   1,  13,  26 }
+      }, {  // Band 3
+        {  59, 159, 220 }, {  23, 126, 198 }, {   4,  88, 151 },
+        {   1,  66, 114 }, {   1,  38,  71 }, {   1,  18,  34 }
+      }, {  // Band 4
+        { 114, 136, 232 }, {  51, 114, 207 }, {  11,  83, 155 },
+        {   3,  56, 105 }, {   1,  33,  65 }, {   1,  17,  34 }
+      }, {  // Band 5
+        { 149,  65, 234 }, { 121,  57, 215 }, {  61,  49, 166 },
+        {  28,  36, 114 }, {  12,  25,  76 }, {   3,  16,  42 }
+      }
+    }
+  }, {  // UV plane
+    {  // Intra
+      {  // Band 0
+        { 214,  49, 220 }, { 132,  63, 188 }, {  42,  65, 137 }
+      }, {  // Band 1
+        {  85, 137, 221 }, { 104, 131, 216 }, {  49, 111, 192 },
+        {  21,  87, 155 }, {   2,  49,  87 }, {   1,  16,  28 }
+      }, {  // Band 2
+        {  89, 163, 230 }, {  90, 137, 220 }, {  29, 100, 183 },
+        {  10,  70, 135 }, {   2,  42,  81 }, {   1,  17,  33 }
+      }, {  // Band 3
+        { 108, 167, 237 }, {  55, 133, 222 }, {  15,  97, 179 },
+        {   4,  72, 135 }, {   1,  45,  85 }, {   1,  19,  38 }
+      }, {  // Band 4
+        { 124, 146, 240 }, {  66, 124, 224 }, {  17,  88, 175 },
+        {   4,  58, 122 }, {   1,  36,  75 }, {   1,  18,  37 }
+      }, {  //  Band 5
+        { 141,  79, 241 }, { 126,  70, 227 }, {  66,  58, 182 },
+        {  30,  44, 136 }, {  12,  34,  96 }, {   2,  20,  47 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 229,  99, 249 }, { 143, 111, 235 }, {  46, 109, 192 }
+      }, {  // Band 1
+        {  82, 158, 236 }, {  94, 146, 224 }, {  25, 117, 191 },
+        {   9,  87, 149 }, {   3,  56,  99 }, {   1,  33,  57 }
+      }, {  // Band 2
+        {  83, 167, 237 }, {  68, 145, 222 }, {  10, 103, 177 },
+        {   2,  72, 131 }, {   1,  41,  79 }, {   1,  20,  39 }
+      }, {  // Band 3
+        {  99, 167, 239 }, {  47, 141, 224 }, {  10, 104, 178 },
+        {   2,  73, 133 }, {   1,  44,  85 }, {   1,  22,  47 }
+      }, {  // Band 4
+        { 127, 145, 243 }, {  71, 129, 228 }, {  17,  93, 177 },
+        {   3,  61, 124 }, {   1,  41,  84 }, {   1,  21,  52 }
+      }, {  // Band 5
+        { 157,  78, 244 }, { 140,  72, 231 }, {  69,  58, 184 },
+        {  31,  44, 137 }, {  14,  38, 105 }, {   8,  23,  61 }
+      }
+    }
+  }
+};
+
+static const vp9_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = {
+  {  // Y plane
+    {  // Intra
+      {  // Band 0
+        { 125,  34, 187 }, {  52,  41, 133 }, {   6,  31,  56 }
+      }, {  // Band 1
+        {  37, 109, 153 }, {  51, 102, 147 }, {  23,  87, 128 },
+        {   8,  67, 101 }, {   1,  41,  63 }, {   1,  19,  29 }
+      }, {  // Band 2
+        {  31, 154, 185 }, {  17, 127, 175 }, {   6,  96, 145 },
+        {   2,  73, 114 }, {   1,  51,  82 }, {   1,  28,  45 }
+      }, {  // Band 3
+        {  23, 163, 200 }, {  10, 131, 185 }, {   2,  93, 148 },
+        {   1,  67, 111 }, {   1,  41,  69 }, {   1,  14,  24 }
+      }, {  // Band 4
+        {  29, 176, 217 }, {  12, 145, 201 }, {   3, 101, 156 },
+        {   1,  69, 111 }, {   1,  39,  63 }, {   1,  14,  23 }
+      }, {  // Band 5
+        {  57, 192, 233 }, {  25, 154, 215 }, {   6, 109, 167 },
+        {   3,  78, 118 }, {   1,  48,  69 }, {   1,  21,  29 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 202, 105, 245 }, { 108, 106, 216 }, {  18,  90, 144 }
+      }, {  // Band 1
+        {  33, 172, 219 }, {  64, 149, 206 }, {  14, 117, 177 },
+        {   5,  90, 141 }, {   2,  61,  95 }, {   1,  37,  57 }
+      }, {  // Band 2
+        {  33, 179, 220 }, {  11, 140, 198 }, {   1,  89, 148 },
+        {   1,  60, 104 }, {   1,  33,  57 }, {   1,  12,  21 }
+      }, {  // Band 3
+        {  30, 181, 221 }, {   8, 141, 198 }, {   1,  87, 145 },
+        {   1,  58, 100 }, {   1,  31,  55 }, {   1,  12,  20 }
+      }, {  // Band 4
+        {  32, 186, 224 }, {   7, 142, 198 }, {   1,  86, 143 },
+        {   1,  58, 100 }, {   1,  31,  55 }, {   1,  12,  22 }
+      }, {  // Band 5
+        {  57, 192, 227 }, {  20, 143, 204 }, {   3,  96, 154 },
+        {   1,  68, 112 }, {   1,  42,  69 }, {   1,  19,  32 }
+      }
+    }
+  }, {  // UV plane
+    {  // Intra
+      {  // Band 0
+        { 212,  35, 215 }, { 113,  47, 169 }, {  29,  48, 105 }
+      }, {  // Band 1
+        {  74, 129, 203 }, { 106, 120, 203 }, {  49, 107, 178 },
+        {  19,  84, 144 }, {   4,  50,  84 }, {   1,  15,  25 }
+      }, {  // Band 2
+        {  71, 172, 217 }, {  44, 141, 209 }, {  15, 102, 173 },
+        {   6,  76, 133 }, {   2,  51,  89 }, {   1,  24,  42 }
+      }, {  // Band 3
+        {  64, 185, 231 }, {  31, 148, 216 }, {   8, 103, 175 },
+        {   3,  74, 131 }, {   1,  46,  81 }, {   1,  18,  30 }
+      }, {  // Band 4
+        {  65, 196, 235 }, {  25, 157, 221 }, {   5, 105, 174 },
+        {   1,  67, 120 }, {   1,  38,  69 }, {   1,  15,  30 }
+      }, {  // Band 5
+        {  65, 204, 238 }, {  30, 156, 224 }, {   7, 107, 177 },
+        {   2,  70, 124 }, {   1,  42,  73 }, {   1,  18,  34 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 225,  86, 251 }, { 144, 104, 235 }, {  42,  99, 181 }
+      }, {  // Band 1
+        {  85, 175, 239 }, { 112, 165, 229 }, {  29, 136, 200 },
+        {  12, 103, 162 }, {   6,  77, 123 }, {   2,  53,  84 }
+      }, {  // Band 2
+        {  75, 183, 239 }, {  30, 155, 221 }, {   3, 106, 171 },
+        {   1,  74, 128 }, {   1,  44,  76 }, {   1,  17,  28 }
+      }, {  // Band 3
+        {  73, 185, 240 }, {  27, 159, 222 }, {   2, 107, 172 },
+        {   1,  75, 127 }, {   1,  42,  73 }, {   1,  17,  29 }
+      }, {  // Band 4
+        {  62, 190, 238 }, {  21, 159, 222 }, {   2, 107, 172 },
+        {   1,  72, 122 }, {   1,  40,  71 }, {   1,  18,  32 }
+      }, {  // Band 5
+        {  61, 199, 240 }, {  27, 161, 226 }, {   4, 113, 180 },
+        {   1,  76, 129 }, {   1,  46,  80 }, {   1,  23,  41 }
+      }
+    }
+  }
+};
+
+static const vp9_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = {
+  {  // Y plane
+    {  // Intra
+      {  // Band 0
+        {   7,  27, 153 }, {   5,  30,  95 }, {   1,  16,  30 }
+      }, {  // Band 1
+        {  50,  75, 127 }, {  57,  75, 124 }, {  27,  67, 108 },
+        {  10,  54,  86 }, {   1,  33,  52 }, {   1,  12,  18 }
+      }, {  // Band 2
+        {  43, 125, 151 }, {  26, 108, 148 }, {   7,  83, 122 },
+        {   2,  59,  89 }, {   1,  38,  60 }, {   1,  17,  27 }
+      }, {  // Band 3
+        {  23, 144, 163 }, {  13, 112, 154 }, {   2,  75, 117 },
+        {   1,  50,  81 }, {   1,  31,  51 }, {   1,  14,  23 }
+      }, {  // Band 4
+        {  18, 162, 185 }, {   6, 123, 171 }, {   1,  78, 125 },
+        {   1,  51,  86 }, {   1,  31,  54 }, {   1,  14,  23 }
+      }, {  // Band 5
+        {  15, 199, 227 }, {   3, 150, 204 }, {   1,  91, 146 },
+        {   1,  55,  95 }, {   1,  30,  53 }, {   1,  11,  20 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        {  19,  55, 240 }, {  19,  59, 196 }, {   3,  52, 105 }
+      }, {  // Band 1
+        {  41, 166, 207 }, { 104, 153, 199 }, {  31, 123, 181 },
+        {  14, 101, 152 }, {   5,  72, 106 }, {   1,  36,  52 }
+      }, {  // Band 2
+        {  35, 176, 211 }, {  12, 131, 190 }, {   2,  88, 144 },
+        {   1,  60, 101 }, {   1,  36,  60 }, {   1,  16,  28 }
+      }, {  // Band 3
+        {  28, 183, 213 }, {   8, 134, 191 }, {   1,  86, 142 },
+        {   1,  56,  96 }, {   1,  30,  53 }, {   1,  12,  20 }
+      }, {  // Band 4
+        {  20, 190, 215 }, {   4, 135, 192 }, {   1,  84, 139 },
+        {   1,  53,  91 }, {   1,  28,  49 }, {   1,  11,  20 }
+      }, {  // Band 5
+        {  13, 196, 216 }, {   2, 137, 192 }, {   1,  86, 143 },
+        {   1,  57,  99 }, {   1,  32,  56 }, {   1,  13,  24 }
+      }
+    }
+  }, {  // UV plane
+    {  // Intra
+      {  // Band 0
+        { 211,  29, 217 }, {  96,  47, 156 }, {  22,  43,  87 }
+      }, {  // Band 1
+        {  78, 120, 193 }, { 111, 116, 186 }, {  46, 102, 164 },
+        {  15,  80, 128 }, {   2,  49,  76 }, {   1,  18,  28 }
+      }, {  // Band 2
+        {  71, 161, 203 }, {  42, 132, 192 }, {  10,  98, 150 },
+        {   3,  69, 109 }, {   1,  44,  70 }, {   1,  18,  29 }
+      }, {  // Band 3
+        {  57, 186, 211 }, {  30, 140, 196 }, {   4,  93, 146 },
+        {   1,  62, 102 }, {   1,  38,  65 }, {   1,  16,  27 }
+      }, {  // Band 4
+        {  47, 199, 217 }, {  14, 145, 196 }, {   1,  88, 142 },
+        {   1,  57,  98 }, {   1,  36,  62 }, {   1,  15,  26 }
+      }, {  // Band 5
+        {  26, 219, 229 }, {   5, 155, 207 }, {   1,  94, 151 },
+        {   1,  60, 104 }, {   1,  36,  62 }, {   1,  16,  28 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 233,  29, 248 }, { 146,  47, 220 }, {  43,  52, 140 }
+      }, {  // Band 1
+        { 100, 163, 232 }, { 179, 161, 222 }, {  63, 142, 204 },
+        {  37, 113, 174 }, {  26,  89, 137 }, {  18,  68,  97 }
+      }, {  // Band 2
+        {  85, 181, 230 }, {  32, 146, 209 }, {   7, 100, 164 },
+        {   3,  71, 121 }, {   1,  45,  77 }, {   1,  18,  30 }
+      }, {  // Band 3
+        {  65, 187, 230 }, {  20, 148, 207 }, {   2,  97, 159 },
+        {   1,  68, 116 }, {   1,  40,  70 }, {   1,  14,  29 }
+      }, {  // Band 4
+        {  40, 194, 227 }, {   8, 147, 204 }, {   1,  94, 155 },
+        {   1,  65, 112 }, {   1,  39,  66 }, {   1,  14,  26 }
+      }, {  // Band 5
+        {  16, 208, 228 }, {   3, 151, 207 }, {   1,  98, 160 },
+        {   1,  67, 117 }, {   1,  41,  74 }, {   1,  17,  31 }
+      }
+    }
+  }
+};
+
+static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
+  {  // Y plane
+    {  // Intra
+      {  // Band 0
+        {  17,  38, 140 }, {   7,  34,  80 }, {   1,  17,  29 }
+      }, {  // Band 1
+        {  37,  75, 128 }, {  41,  76, 128 }, {  26,  66, 116 },
+        {  12,  52,  94 }, {   2,  32,  55 }, {   1,  10,  16 }
+      }, {  // Band 2
+        {  50, 127, 154 }, {  37, 109, 152 }, {  16,  82, 121 },
+        {   5,  59,  85 }, {   1,  35,  54 }, {   1,  13,  20 }
+      }, {  // Band 3
+        {  40, 142, 167 }, {  17, 110, 157 }, {   2,  71, 112 },
+        {   1,  44,  72 }, {   1,  27,  45 }, {   1,  11,  17 }
+      }, {  // Band 4
+        {  30, 175, 188 }, {   9, 124, 169 }, {   1,  74, 116 },
+        {   1,  48,  78 }, {   1,  30,  49 }, {   1,  11,  18 }
+      }, {  // Band 5
+        {  10, 222, 223 }, {   2, 150, 194 }, {   1,  83, 128 },
+        {   1,  48,  79 }, {   1,  27,  45 }, {   1,  11,  17 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        {  36,  41, 235 }, {  29,  36, 193 }, {  10,  27, 111 }
+      }, {  // Band 1
+        {  85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 },
+        {  57, 113, 168 }, {  23,  83, 120 }, {  10,  49,  61 }
+      }, {  // Band 2
+        {  85, 190, 223 }, {  36, 139, 200 }, {   5,  90, 146 },
+        {   1,  60, 103 }, {   1,  38,  65 }, {   1,  18,  30 }
+      }, {  // Band 3
+        {  72, 202, 223 }, {  23, 141, 199 }, {   2,  86, 140 },
+        {   1,  56,  97 }, {   1,  36,  61 }, {   1,  16,  27 }
+      }, {  // Band 4
+        {  55, 218, 225 }, {  13, 145, 200 }, {   1,  86, 141 },
+        {   1,  57,  99 }, {   1,  35,  61 }, {   1,  13,  22 }
+      }, {  // Band 5
+        {  15, 235, 212 }, {   1, 132, 184 }, {   1,  84, 139 },
+        {   1,  57,  97 }, {   1,  34,  56 }, {   1,  14,  23 }
+      }
+    }
+  }, {  // UV plane
+    {  // Intra
+      {  // Band 0
+        { 181,  21, 201 }, {  61,  37, 123 }, {  10,  38,  71 }
+      }, {  // Band 1
+        {  47, 106, 172 }, {  95, 104, 173 }, {  42,  93, 159 },
+        {  18,  77, 131 }, {   4,  50,  81 }, {   1,  17,  23 }
+      }, {  // Band 2
+        {  62, 147, 199 }, {  44, 130, 189 }, {  28, 102, 154 },
+        {  18,  75, 115 }, {   2,  44,  65 }, {   1,  12,  19 }
+      }, {  // Band 3
+        {  55, 153, 210 }, {  24, 130, 194 }, {   3,  93, 146 },
+        {   1,  61,  97 }, {   1,  31,  50 }, {   1,  10,  16 }
+      }, {  // Band 4
+        {  49, 186, 223 }, {  17, 148, 204 }, {   1,  96, 142 },
+        {   1,  53,  83 }, {   1,  26,  44 }, {   1,  11,  17 }
+      }, {  // Band 5
+        {  13, 217, 212 }, {   2, 136, 180 }, {   1,  78, 124 },
+        {   1,  50,  83 }, {   1,  29,  49 }, {   1,  14,  23 }
+      }
+    }, {  // Inter
+      {  // Band 0
+        { 197,  13, 247 }, {  82,  17, 222 }, {  25,  17, 162 }
+      }, {  // Band 1
+        { 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 },
+        { 104, 158, 220 }, {  66, 128, 186 }, {  55,  90, 137 }
+      }, {  // Band 2
+        { 111, 197, 242 }, {  46, 158, 219 }, {   9, 104, 171 },
+        {   2,  65, 125 }, {   1,  44,  80 }, {   1,  17,  91 }
+      }, {  // Band 3
+        { 104, 208, 245 }, {  39, 168, 224 }, {   3, 109, 162 },
+        {   1,  79, 124 }, {   1,  50, 102 }, {   1,  43, 102 }
+      }, {  // Band 4
+        {  84, 220, 246 }, {  31, 177, 231 }, {   2, 115, 180 },
+        {   1,  79, 134 }, {   1,  55,  77 }, {   1,  60,  79 }
+      }, {  // Band 5
+        {  43, 243, 240 }, {   8, 180, 217 }, {   1, 115, 166 },
+        {   1,  84, 121 }, {   1,  51,  67 }, {   1,  16,   6 }
+      }
+    }
+  }
 };
 
 static void extend_to_full_distribution(vp9_prob *probs, vp9_prob p) {
@@ -403,8 +738,6 @@
   extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]);
 }
 
-#include "vp9/common/vp9_default_coef_probs.h"
-
 void vp9_default_coef_probs(VP9_COMMON *cm) {
   vp9_copy(cm->fc.coef_probs[TX_4X4], default_coef_probs_4x4);
   vp9_copy(cm->fc.coef_probs[TX_8X8], default_coef_probs_8x8);
@@ -423,29 +756,30 @@
                              unsigned int count_sat,
                              unsigned int update_factor) {
   const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
-
-  vp9_coeff_probs_model *dst_coef_probs = cm->fc.coef_probs[tx_size];
-  const vp9_coeff_probs_model *pre_coef_probs = pre_fc->coef_probs[tx_size];
-  vp9_coeff_count_model *coef_counts = cm->counts.coef[tx_size];
-  unsigned int (*eob_branch_count)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] =
+  vp9_coeff_probs_model *const probs = cm->fc.coef_probs[tx_size];
+  const vp9_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size];
+  vp9_coeff_count_model *counts = cm->counts.coef[tx_size];
+  unsigned int (*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
       cm->counts.eob_branch[tx_size];
   int i, j, k, l, m;
-  unsigned int branch_ct[UNCONSTRAINED_NODES][2];
 
-  for (i = 0; i < BLOCK_TYPES; ++i)
+  for (i = 0; i < PLANE_TYPES; ++i)
     for (j = 0; j < REF_TYPES; ++j)
       for (k = 0; k < COEF_BANDS; ++k)
-        for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
-          if (l >= 3 && k == 0)
-            continue;
-          vp9_tree_probs_from_distribution(vp9_coefmodel_tree, branch_ct,
-                                           coef_counts[i][j][k][l]);
-          branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0];
+        for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+          const int n0 = counts[i][j][k][l][ZERO_TOKEN];
+          const int n1 = counts[i][j][k][l][ONE_TOKEN];
+          const int n2 = counts[i][j][k][l][TWO_TOKEN];
+          const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN];
+          const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = {
+            { neob, eob_counts[i][j][k][l] - neob },
+            { n0, n1 + n2 },
+            { n1, n2 }
+          };
           for (m = 0; m < UNCONSTRAINED_NODES; ++m)
-            dst_coef_probs[i][j][k][l][m] = merge_probs(
-                                                pre_coef_probs[i][j][k][l][m],
-                                                branch_ct[m],
-                                                count_sat, update_factor);
+            probs[i][j][k][l][m] = merge_probs(pre_probs[i][j][k][l][m],
+                                               branch_ct[m],
+                                               count_sat, update_factor);
         }
 }
 
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index 941b251..721917f 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -21,30 +21,27 @@
 
 #define DIFF_UPDATE_PROB 252
 
-/* Coefficient token alphabet */
+// Coefficient token alphabet
+#define ZERO_TOKEN      0   // 0     Extra Bits 0+0
+#define ONE_TOKEN       1   // 1     Extra Bits 0+1
+#define TWO_TOKEN       2   // 2     Extra Bits 0+1
+#define THREE_TOKEN     3   // 3     Extra Bits 0+1
+#define FOUR_TOKEN      4   // 4     Extra Bits 0+1
+#define CATEGORY1_TOKEN 5   // 5-6   Extra Bits 1+1
+#define CATEGORY2_TOKEN 6   // 7-10  Extra Bits 2+1
+#define CATEGORY3_TOKEN 7   // 11-18 Extra Bits 3+1
+#define CATEGORY4_TOKEN 8   // 19-34 Extra Bits 4+1
+#define CATEGORY5_TOKEN 9   // 35-66 Extra Bits 5+1
+#define CATEGORY6_TOKEN 10  // 67+   Extra Bits 14+1
+#define EOB_TOKEN       11  // EOB   Extra Bits 0+0
 
-#define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
-#define ONE_TOKEN               1       /* 1         Extra Bits 0+1 */
-#define TWO_TOKEN               2       /* 2         Extra Bits 0+1 */
-#define THREE_TOKEN             3       /* 3         Extra Bits 0+1 */
-#define FOUR_TOKEN              4       /* 4         Extra Bits 0+1 */
-#define DCT_VAL_CATEGORY1       5       /* 5-6       Extra Bits 1+1 */
-#define DCT_VAL_CATEGORY2       6       /* 7-10      Extra Bits 2+1 */
-#define DCT_VAL_CATEGORY3       7       /* 11-18     Extra Bits 3+1 */
-#define DCT_VAL_CATEGORY4       8       /* 19-34     Extra Bits 4+1 */
-#define DCT_VAL_CATEGORY5       9       /* 35-66     Extra Bits 5+1 */
-#define DCT_VAL_CATEGORY6       10      /* 67+       Extra Bits 14+1 */
-#define DCT_EOB_TOKEN           11      /* EOB       Extra Bits 0+0 */
-#define MAX_ENTROPY_TOKENS      12
-#define ENTROPY_NODES           11
-#define EOSB_TOKEN              127     /* Not signalled, encoder only */
+#define ENTROPY_TOKENS 12
 
-#define INTER_MODE_CONTEXTS     7
+#define ENTROPY_NODES 11
 
-extern DECLARE_ALIGNED(16, const uint8_t,
-                       vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
+extern DECLARE_ALIGNED(16, const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]);
 
-#define DCT_EOB_MODEL_TOKEN     3      /* EOB       Extra Bits 0+0 */
+#define EOB_MODEL_TOKEN 3
 extern const vp9_tree_index vp9_coefmodel_tree[];
 
 typedef struct {
@@ -55,15 +52,13 @@
 } vp9_extra_bit;
 
 // indexed by token value
-extern const vp9_extra_bit vp9_extra_bits[MAX_ENTROPY_TOKENS];
+extern const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS];
 
 #define MAX_PROB                255
 #define DCT_MAX_VALUE           16384
 
 /* Coefficients are predicted via a 3-dimensional probability table. */
 
-/* Outside dimension.  0 = Y with DC, 1 = UV */
-#define BLOCK_TYPES 2
 #define REF_TYPES 2  // intra=0, inter=1
 
 /* Middle dimension reflects the coefficient position within the transform. */
@@ -85,13 +80,14 @@
    coefficient band (and since zigzag positions 0, 1, and 2 are in
    distinct bands). */
 
-#define PREV_COEF_CONTEXTS          6
+#define COEFF_CONTEXTS 6
+#define BAND_COEFF_CONTEXTS(band) ((band) == 0 ? 3 : COEFF_CONTEXTS)
 
 // #define ENTROPY_STATS
 
-typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
-                                    [MAX_ENTROPY_TOKENS];
-typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
+typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
+                                    [ENTROPY_TOKENS];
+typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
                                     [ENTROPY_NODES][2];
 
 #define SUBEXP_PARAM                4   /* Subexponential code parameter */
@@ -130,21 +126,20 @@
 // 1, 3, 5, 7, ..., 253, 255
 // In between probabilities are interpolated linearly
 
-#define COEFPROB_MODELS             128
+#define COEFF_PROB_MODELS 256
 
 #define UNCONSTRAINED_NODES         3
 
 #define PIVOT_NODE                  2   // which node is pivot
 
 #define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
-extern const vp9_prob vp9_pareto8_full[256][MODEL_NODES];
+extern const vp9_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
 
 typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS]
-                                      [PREV_COEF_CONTEXTS]
-                                      [UNCONSTRAINED_NODES];
+                                      [COEFF_CONTEXTS][UNCONSTRAINED_NODES];
 
 typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS]
-                                          [PREV_COEF_CONTEXTS]
+                                          [COEFF_CONTEXTS]
                                           [UNCONSTRAINED_NODES + 1];
 
 void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full);
@@ -182,7 +177,7 @@
   const MODE_INFO *const mi = xd->mi_8x8[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
 
-  if (is_inter_block(mbmi) || type != PLANE_TYPE_Y_WITH_DC || xd->lossless) {
+  if (is_inter_block(mbmi) || type != PLANE_TYPE_Y || xd->lossless) {
     return &vp9_default_scan_orders[tx_size];
   } else {
     const MB_PREDICTION_MODE mode =
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 2652421..83281b2 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -459,7 +459,7 @@
   if (cm->frame_type == KEY_FRAME ||
       cm->error_resilient_mode || cm->reset_frame_context == 3) {
     // Reset all frame contexts.
-    for (i = 0; i < NUM_FRAME_CONTEXTS; ++i)
+    for (i = 0; i < FRAME_CONTEXTS; ++i)
       cm->frame_contexts[i] = cm->fc;
   } else if (cm->reset_frame_context == 2) {
     // Reset only the frame context specified in the frame header.
@@ -471,9 +471,6 @@
   vpx_memset(cm->mip, 0,
              cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
 
-  vp9_update_mode_info_border(cm, cm->mip);
-  vp9_update_mode_info_border(cm, cm->prev_mip);
-
   vp9_zero(cm->ref_frame_sign_bias);
 
   cm->frame_context_idx = 0;
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index ff2bc45..40d8ffd 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -353,10 +353,17 @@
 
     // TODO(yunqingwang): count in loopfilter functions should be removed.
     if (mask & 1) {
-      if (mask_16x16_0 & 1) {
-        // if (mask_16x16_0 & 1) is 1, then (mask_16x16_1 & 1) is 1.
-        vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim,
-                                      lfi0->hev_thr);
+      if ((mask_16x16_0 | mask_16x16_1) & 1) {
+        if ((mask_16x16_0 & mask_16x16_1) & 1) {
+          vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim,
+                                     lfi0->hev_thr);
+        } else if (mask_16x16_0 & 1) {
+          vp9_mb_lpf_vertical_edge_w(s, pitch, lfi0->mblim, lfi0->lim,
+                                     lfi0->hev_thr);
+        } else {
+          vp9_mb_lpf_vertical_edge_w(s + 8 *pitch, pitch, lfi1->mblim,
+                                     lfi1->lim, lfi1->hev_thr);
+        }
       }
 
       if ((mask_8x8_0 | mask_8x8_1) & 1) {
@@ -432,10 +439,14 @@
     count = 1;
     if (mask & 1) {
       if (mask_16x16 & 1) {
-        // If (mask_16x16 & 1) is 1, then (mask_16x16 & 3) is 3.
-        vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
-                                     lfi->hev_thr, 2);
-        count = 2;
+        if ((mask_16x16 & 3) == 3) {
+          vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
+                                       lfi->hev_thr, 2);
+          count = 2;
+        } else {
+          vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
+                                       lfi->hev_thr, 1);
+        }
       } else if (mask_8x8 & 1) {
         if ((mask_8x8 & 3) == 3) {
           // Next block's thresholds
diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c
index ef8de20..f2e910f 100644
--- a/vp9/common/vp9_loopfilter_filters.c
+++ b/vp9/common/vp9_loopfilter_filters.c
@@ -121,32 +121,15 @@
   }
 }
 
-void vp9_loop_filter_horizontal_edge_16_c(uint8_t *s, int p /* pitch */,
+void vp9_loop_filter_horizontal_edge_16_c(uint8_t *s, int p,
                                           const uint8_t *blimit0,
                                           const uint8_t *limit0,
                                           const uint8_t *thresh0,
                                           const uint8_t *blimit1,
                                           const uint8_t *limit1,
                                           const uint8_t *thresh1) {
-  int i, j;
-  const uint8_t *blimit = blimit0;
-  const uint8_t *limit = limit0;
-  const uint8_t *thresh = thresh0;
-
-  for (i = 0; i < 2; ++i) {
-    for (j = 0; j < 8; ++j) {
-      const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
-      const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
-      const int8_t mask = filter_mask(*limit, *blimit,
-                                      p3, p2, p1, p0, q0, q1, q2, q3);
-      const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
-      filter4(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
-      ++s;
-    }
-    blimit = blimit1;
-    limit = limit1;
-    thresh = thresh1;
-  }
+  vp9_loop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1);
+  vp9_loop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1);
 }
 
 void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch,
@@ -176,25 +159,9 @@
                                         const uint8_t *blimit1,
                                         const uint8_t *limit1,
                                         const uint8_t *thresh1) {
-  int i, j;
-  const uint8_t *blimit = blimit0;
-  const uint8_t *limit = limit0;
-  const uint8_t *thresh = thresh0;
-
-  for (i = 0; i < 2; ++i) {
-    for (j = 0; j < 8; ++j) {
-      const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
-      const uint8_t q0 = s[0],  q1 = s[1],  q2 = s[2],  q3 = s[3];
-      const int8_t mask = filter_mask(*limit, *blimit,
-                                      p3, p2, p1, p0, q0, q1, q2, q3);
-      const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
-      filter4(mask, hev, s - 2, s - 1, s, s + 1);
-      s += pitch;
-    }
-    blimit = blimit1;
-    limit = limit1;
-    thresh = thresh1;
-  }
+  vp9_loop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1);
+  vp9_loop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1,
+                                  thresh1, 1);
 }
 
 static INLINE void filter8(int8_t mask, uint8_t hev, uint8_t flat,
@@ -241,35 +208,15 @@
   }
 }
 
-void vp9_mbloop_filter_horizontal_edge_16_c(uint8_t *s, int p /* pitch */,
+void vp9_mbloop_filter_horizontal_edge_16_c(uint8_t *s, int p,
                                             const uint8_t *blimit0,
                                             const uint8_t *limit0,
                                             const uint8_t *thresh0,
                                             const uint8_t *blimit1,
                                             const uint8_t *limit1,
                                             const uint8_t *thresh1) {
-  int i, j;
-  const uint8_t *blimit = blimit0;
-  const uint8_t *limit = limit0;
-  const uint8_t *thresh = thresh0;
-
-  for (i = 0; i < 2; ++i) {
-    for (j = 0; j < 8; ++j) {
-      const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
-      const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
-
-      const int8_t mask = filter_mask(*limit, *blimit,
-                                      p3, p2, p1, p0, q0, q1, q2, q3);
-      const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
-      const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
-      filter8(mask, hev, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
-                               s,         s + 1 * p, s + 2 * p, s + 3 * p);
-      ++s;
-    }
-    blimit = blimit1;
-    limit = limit1;
-    thresh = thresh1;
-  }
+  vp9_mbloop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1);
+  vp9_mbloop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1);
 }
 
 void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch,
@@ -299,27 +246,9 @@
                                           const uint8_t *blimit1,
                                           const uint8_t *limit1,
                                           const uint8_t *thresh1) {
-  int i, j;
-  const uint8_t *blimit = blimit0;
-  const uint8_t *limit = limit0;
-  const uint8_t *thresh = thresh0;
-
-  for (i = 0; i < 2; ++i) {
-    for (j = 0; j < 8; ++j) {
-      const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
-      const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
-      const int8_t mask = filter_mask(*limit, *blimit,
-                                      p3, p2, p1, p0, q0, q1, q2, q3);
-      const int8_t hev = hev_mask(thresh[0], p1, p0, q0, q1);
-      const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
-      filter8(mask, hev, flat, s - 4, s - 3, s - 2, s - 1,
-                               s,     s + 1, s + 2, s + 3);
-      s += pitch;
-    }
-    blimit = blimit1;
-    limit = limit1;
-    thresh = thresh1;
-  }
+  vp9_mbloop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1);
+  vp9_mbloop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1,
+                                    thresh1, 1);
 }
 
 static INLINE void filter16(int8_t mask, uint8_t hev,
@@ -402,13 +331,14 @@
   }
 }
 
-void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int p,
-                                  const uint8_t *blimit,
-                                  const uint8_t *limit,
-                                  const uint8_t *thresh) {
+static void mb_lpf_vertical_edge_w(uint8_t *s, int p,
+                                   const uint8_t *blimit,
+                                   const uint8_t *limit,
+                                   const uint8_t *thresh,
+                                   int count) {
   int i;
 
-  for (i = 0; i < 8; ++i) {
+  for (i = 0; i < count; ++i) {
     const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
     const uint8_t q0 = s[0], q1 = s[1],  q2 = s[2], q3 = s[3];
     const int8_t mask = filter_mask(*limit, *blimit,
@@ -425,25 +355,16 @@
   }
 }
 
+void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int p,
+                                  const uint8_t *blimit,
+                                  const uint8_t *limit,
+                                  const uint8_t *thresh) {
+  mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
+}
+
 void vp9_mb_lpf_vertical_edge_w_16_c(uint8_t *s, int p,
                                      const uint8_t *blimit,
                                      const uint8_t *limit,
                                      const uint8_t *thresh) {
-  int i;
-
-  for (i = 0; i < 16; ++i) {
-    const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
-    const uint8_t q0 = s[0], q1 = s[1],  q2 = s[2], q3 = s[3];
-    const int8_t mask = filter_mask(*limit, *blimit,
-                                    p3, p2, p1, p0, q0, q1, q2, q3);
-    const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
-    const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
-    const int8_t flat2 = flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0,
-                                    q0, s[4], s[5], s[6], s[7]);
-
-    filter16(mask, hev, flat, flat2,
-             s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1,
-             s,     s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7);
-    s += p;
-  }
+  mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
 }
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 751accf..a6e5b27 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -25,18 +25,18 @@
 #include "vp9/common/vp9_postproc.h"
 #endif
 
-#define ALLOWED_REFS_PER_FRAME 3
+#define REFS_PER_FRAME 3
 
-#define NUM_REF_FRAMES_LOG2 3
-#define NUM_REF_FRAMES (1 << NUM_REF_FRAMES_LOG2)
+#define REF_FRAMES_LOG2 3
+#define REF_FRAMES (1 << REF_FRAMES_LOG2)
 
 // 1 scratch frame for the new frame, 3 for scaled references on the encoder
 // TODO(jkoleszar): These 3 extra references could probably come from the
 // normal reference pool.
-#define NUM_YV12_BUFFERS (NUM_REF_FRAMES + 4)
+#define FRAME_BUFFERS (REF_FRAMES + 4)
 
-#define NUM_FRAME_CONTEXTS_LOG2 2
-#define NUM_FRAME_CONTEXTS (1 << NUM_FRAME_CONTEXTS_LOG2)
+#define FRAME_CONTEXTS_LOG2 2
+#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)
 
 extern const struct {
   PARTITION_CONTEXT above;
@@ -47,7 +47,7 @@
   vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
   vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
   vp9_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
-  vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
+  vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
   vp9_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
                                  [SWITCHABLE_FILTERS - 1];
   vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
@@ -64,9 +64,9 @@
   unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
   unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
   unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
-  vp9_coeff_count_model coef[TX_SIZES][BLOCK_TYPES];
-  unsigned int eob_branch[TX_SIZES][BLOCK_TYPES][REF_TYPES]
-                         [COEF_BANDS][PREV_COEF_CONTEXTS];
+  vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES];
+  unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES]
+                         [COEF_BANDS][COEFF_CONTEXTS];
   unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
                                 [SWITCHABLE_FILTERS];
   unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
@@ -113,17 +113,17 @@
 
   YV12_BUFFER_CONFIG *frame_to_show;
 
-  YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
-  int fb_idx_ref_cnt[NUM_YV12_BUFFERS]; /* reference counts */
-  int ref_frame_map[NUM_REF_FRAMES]; /* maps fb_idx to reference slot */
+  YV12_BUFFER_CONFIG yv12_fb[FRAME_BUFFERS];
+  int fb_idx_ref_cnt[FRAME_BUFFERS]; /* reference counts */
+  int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
 
   // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
   // roll new_fb_idx into it.
 
-  // Each frame can reference ALLOWED_REFS_PER_FRAME buffers
-  int active_ref_idx[ALLOWED_REFS_PER_FRAME];
-  struct scale_factors active_ref_scale[ALLOWED_REFS_PER_FRAME];
-  struct scale_factors_common active_ref_scale_comm[ALLOWED_REFS_PER_FRAME];
+  // Each frame can reference REFS_PER_FRAME buffers
+  int active_ref_idx[REFS_PER_FRAME];
+  struct scale_factors active_ref_scale[REFS_PER_FRAME];
+  struct scale_factors_common active_ref_scale_comm[REFS_PER_FRAME];
   int new_fb_idx;
 
   YV12_BUFFER_CONFIG post_proc_buffer;
@@ -195,10 +195,10 @@
   int allow_comp_inter_inter;
   MV_REFERENCE_FRAME comp_fixed_ref;
   MV_REFERENCE_FRAME comp_var_ref[2];
-  REFERENCE_MODE comp_pred_mode;
+  REFERENCE_MODE reference_mode;
 
   FRAME_CONTEXT fc;  /* this frame entropy */
-  FRAME_CONTEXT frame_contexts[NUM_FRAME_CONTEXTS];
+  FRAME_CONTEXT frame_contexts[FRAME_CONTEXTS];
   unsigned int  frame_context_idx; /* Context to use/update */
   FRAME_COUNTS counts;
 
@@ -228,18 +228,20 @@
 
 static int get_free_fb(VP9_COMMON *cm) {
   int i;
-  for (i = 0; i < NUM_YV12_BUFFERS; i++)
+  for (i = 0; i < FRAME_BUFFERS; i++)
     if (cm->fb_idx_ref_cnt[i] == 0)
       break;
 
-  assert(i < NUM_YV12_BUFFERS);
+  assert(i < FRAME_BUFFERS);
   cm->fb_idx_ref_cnt[i] = 1;
   return i;
 }
 
 static void ref_cnt_fb(int *buf, int *idx, int new_idx) {
-  if (buf[*idx] > 0)
-    buf[*idx]--;
+  const int ref_index = *idx;
+
+  if (ref_index >= 0 && buf[ref_index] > 0)
+    buf[ref_index]--;
 
   *idx = new_idx;
 
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index 22b66b5..b16417d 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -25,48 +25,35 @@
 }
 
 // Returns a context number for the given MB prediction signal
-unsigned char vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
-  const MODE_INFO *const above_mi = get_above_mi(xd);
-  const MODE_INFO *const left_mi = get_left_mi(xd);
-  const int above_in_image = above_mi != NULL;
-  const int left_in_image = left_mi != NULL;
+int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
   // Note:
   // The mode info data structure has a one element border above and to the
   // left of the entries correpsonding to real macroblocks.
   // The prediction flags in these dummy entries are initialised to 0.
-  // left
-  const int left_mv_pred = left_in_image ? is_inter_block(&left_mi->mbmi)
-                                         : 0;
-  const int left_interp = left_in_image && left_mv_pred
-                              ? left_mi->mbmi.interp_filter
-                              : SWITCHABLE_FILTERS;
+  const MODE_INFO *const left_mi = get_left_mi(xd);
+  const int has_left = left_mi != NULL ? is_inter_block(&left_mi->mbmi) : 0;
+  const int left_type = has_left ? left_mi->mbmi.interp_filter
+                                 : SWITCHABLE_FILTERS;
 
-  // above
-  const int above_mv_pred = above_in_image ? is_inter_block(&above_mi->mbmi)
-                                           : 0;
-  const int above_interp = above_in_image && above_mv_pred
-                               ? above_mi->mbmi.interp_filter
-                               : SWITCHABLE_FILTERS;
-
-  if (left_interp == above_interp)
-    return left_interp;
-  else if (left_interp == SWITCHABLE_FILTERS &&
-           above_interp != SWITCHABLE_FILTERS)
-    return above_interp;
-  else if (left_interp != SWITCHABLE_FILTERS &&
-           above_interp == SWITCHABLE_FILTERS)
-    return left_interp;
+  const MODE_INFO *const above_mi = get_above_mi(xd);
+  const int has_above = above_mi != NULL ? is_inter_block(&above_mi->mbmi) : 0;
+  const int above_type = has_above ? above_mi->mbmi.interp_filter
+                                   : SWITCHABLE_FILTERS;
+  if (left_type == above_type)
+    return left_type;
+  else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
+    return above_type;
+  else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
+    return left_type;
   else
     return SWITCHABLE_FILTERS;
 }
 // Returns a context number for the given MB prediction signal
-unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd) {
-  const MODE_INFO *const above_mi = get_above_mi(xd);
-  const MODE_INFO *const left_mi = get_left_mi(xd);
-  const MB_MODE_INFO *const above_mbmi = get_above_mbmi(above_mi);
-  const MB_MODE_INFO *const left_mbmi = get_left_mbmi(left_mi);
-  const int above_in_image = above_mi != NULL;
-  const int left_in_image = left_mi != NULL;
+int vp9_get_intra_inter_context(const MACROBLOCKD *xd) {
+  const MB_MODE_INFO *const above_mbmi = get_above_mbmi(get_above_mi(xd));
+  const MB_MODE_INFO *const left_mbmi = get_left_mbmi(get_left_mi(xd));
+  const int above_in_image = above_mbmi != NULL;
+  const int left_in_image = left_mbmi != NULL;
   const int above_intra = above_in_image ? !is_inter_block(above_mbmi) : 1;
   const int left_intra = left_in_image ? !is_inter_block(left_mbmi) : 1;
 
@@ -85,16 +72,14 @@
   else
     return 0;
 }
-// Returns a context number for the given MB prediction signal
-unsigned char vp9_get_pred_context_comp_inter_inter(const VP9_COMMON *cm,
-                                                    const MACROBLOCKD *xd) {
-  int pred_context;
-  const MODE_INFO *const above_mi = get_above_mi(xd);
-  const MODE_INFO *const left_mi = get_left_mi(xd);
-  const MB_MODE_INFO *const above_mbmi = get_above_mbmi(above_mi);
-  const MB_MODE_INFO *const left_mbmi = get_left_mbmi(left_mi);
-  const int above_in_image = above_mi != NULL;
-  const int left_in_image = left_mi != NULL;
+
+int vp9_get_reference_mode_context(const VP9_COMMON *cm,
+                                   const MACROBLOCKD *xd) {
+  int ctx;
+  const MB_MODE_INFO *const above_mbmi = get_above_mbmi(get_above_mi(xd));
+  const MB_MODE_INFO *const left_mbmi = get_left_mbmi(get_left_mi(xd));
+  const int above_in_image = above_mbmi != NULL;
+  const int left_in_image = left_mbmi != NULL;
   // Note:
   // The mode info data structure has a one element border above and to the
   // left of the entries correpsonding to real macroblocks.
@@ -102,32 +87,32 @@
   if (above_in_image && left_in_image) {  // both edges available
     if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
       // neither edge uses comp pred (0/1)
-      pred_context = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^
-                     (left_mbmi->ref_frame[0] == cm->comp_fixed_ref);
+      ctx = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^
+            (left_mbmi->ref_frame[0] == cm->comp_fixed_ref);
     else if (!has_second_ref(above_mbmi))
       // one of two edges uses comp pred (2/3)
-      pred_context = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
-                          !is_inter_block(above_mbmi));
+      ctx = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
+                 !is_inter_block(above_mbmi));
     else if (!has_second_ref(left_mbmi))
       // one of two edges uses comp pred (2/3)
-      pred_context = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
-                          !is_inter_block(left_mbmi));
+      ctx = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
+                 !is_inter_block(left_mbmi));
     else  // both edges use comp pred (4)
-      pred_context = 4;
+      ctx = 4;
   } else if (above_in_image || left_in_image) {  // one edge available
     const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
 
     if (!has_second_ref(edge_mbmi))
       // edge does not use comp pred (0/1)
-      pred_context = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref;
+      ctx = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref;
     else
       // edge uses comp pred (3)
-      pred_context = 3;
+      ctx = 3;
   } else {  // no edges available (1)
-    pred_context = 1;
+    ctx = 1;
   }
-  assert(pred_context >= 0 && pred_context < COMP_INTER_CONTEXTS);
-  return pred_context;
+  assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS);
+  return ctx;
 }
 
 // Returns a context number for the given MB prediction signal
@@ -368,32 +353,23 @@
 // The mode info data structure has a one element border above and to the
 // left of the entries corresponding to real blocks.
 // The prediction flags in these dummy entries are initialized to 0.
-unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd) {
-  const MODE_INFO *const above_mi = get_above_mi(xd);
-  const MODE_INFO *const left_mi = get_left_mi(xd);
-  const MB_MODE_INFO *const above_mbmi = get_above_mbmi(above_mi);
-  const MB_MODE_INFO *const left_mbmi = get_left_mbmi(left_mi);
-  const int above_in_image = above_mi != NULL;
-  const int left_in_image = left_mi != NULL;
+int vp9_get_tx_size_context(const MACROBLOCKD *xd) {
   const int max_tx_size = max_txsize_lookup[xd->mi_8x8[0]->mbmi.sb_type];
-  int above_context = max_tx_size;
-  int left_context = max_tx_size;
+  const MB_MODE_INFO *const above_mbmi = get_above_mbmi(get_above_mi(xd));
+  const MB_MODE_INFO *const left_mbmi = get_left_mbmi(get_left_mi(xd));
+  const int has_above = above_mbmi != NULL;
+  const int has_left = left_mbmi != NULL;
+  int above_ctx = (has_above && !above_mbmi->skip_coeff) ? above_mbmi->tx_size
+                                                         : max_tx_size;
+  int left_ctx = (has_left && !left_mbmi->skip_coeff) ? left_mbmi->tx_size
+                                                      : max_tx_size;
+  if (!has_left)
+    left_ctx = above_ctx;
 
-  if (above_in_image)
-    above_context = above_mbmi->skip_coeff ? max_tx_size
-                                           : above_mbmi->tx_size;
+  if (!has_above)
+    above_ctx = left_ctx;
 
-  if (left_in_image)
-    left_context = left_mbmi->skip_coeff ? max_tx_size
-                                         : left_mbmi->tx_size;
-
-  if (!left_in_image)
-    left_context = above_context;
-
-  if (!above_in_image)
-    above_context = left_context;
-
-  return above_context + left_context > max_tx_size;
+  return (above_ctx + left_ctx) > max_tx_size;
 }
 
 int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index b6f18e3..9a8f85c 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -40,44 +40,33 @@
   return seg->pred_probs[vp9_get_pred_context_seg_id(xd)];
 }
 
-static INLINE int vp9_get_pred_context_mbskip(const MACROBLOCKD *xd) {
+static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) {
   const MODE_INFO *const above_mi = get_above_mi(xd);
   const MODE_INFO *const left_mi = get_left_mi(xd);
-  const int above_skip_coeff = (above_mi != NULL) ?
-                               above_mi->mbmi.skip_coeff : 0;
-  const int left_skip_coeff = (left_mi != NULL) ? left_mi->mbmi.skip_coeff : 0;
-
-  return above_skip_coeff + left_skip_coeff;
+  const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip_coeff : 0;
+  const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip_coeff : 0;
+  return above_skip + left_skip;
 }
 
-static INLINE vp9_prob vp9_get_pred_prob_mbskip(const VP9_COMMON *cm,
+static INLINE vp9_prob vp9_get_skip_prob(const VP9_COMMON *cm,
+                                         const MACROBLOCKD *xd) {
+  return cm->fc.mbskip_probs[vp9_get_skip_context(xd)];
+}
+
+int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
+
+int vp9_get_intra_inter_context(const MACROBLOCKD *xd);
+
+static INLINE vp9_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm,
                                                 const MACROBLOCKD *xd) {
-  return cm->fc.mbskip_probs[vp9_get_pred_context_mbskip(xd)];
+  return cm->fc.intra_inter_prob[vp9_get_intra_inter_context(xd)];
 }
 
-static INLINE unsigned char vp9_get_pred_flag_mbskip(const MACROBLOCKD *xd) {
-  return xd->mi_8x8[0]->mbmi.skip_coeff;
-}
+int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd);
 
-unsigned char vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
-
-unsigned char vp9_get_pred_context_intra_inter(const MACROBLOCKD *xd);
-
-static INLINE vp9_prob vp9_get_pred_prob_intra_inter(const VP9_COMMON *cm,
-                                                     const MACROBLOCKD *xd) {
-  const int pred_context = vp9_get_pred_context_intra_inter(xd);
-  return cm->fc.intra_inter_prob[pred_context];
-}
-
-unsigned char vp9_get_pred_context_comp_inter_inter(const VP9_COMMON *cm,
-                                                    const MACROBLOCKD *xd);
-
-
-static INLINE
-vp9_prob vp9_get_pred_prob_comp_inter_inter(const VP9_COMMON *cm,
-                                            const MACROBLOCKD *xd) {
-  const int pred_context = vp9_get_pred_context_comp_inter_inter(cm, xd);
-  return cm->fc.comp_inter_prob[pred_context];
+static INLINE vp9_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm,
+                                                   const MACROBLOCKD *xd) {
+  return cm->fc.comp_inter_prob[vp9_get_reference_mode_context(cm, xd)];
 }
 
 unsigned char vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
@@ -105,7 +94,7 @@
   return cm->fc.single_ref_prob[pred_context][1];
 }
 
-unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd);
+int vp9_get_tx_size_context(const MACROBLOCKD *xd);
 
 static const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
                                     const struct tx_probs *tx_probs) {
@@ -124,8 +113,7 @@
 
 static const vp9_prob *get_tx_probs2(TX_SIZE max_tx_size, const MACROBLOCKD *xd,
                                      const struct tx_probs *tx_probs) {
-  const int ctx = vp9_get_pred_context_tx_size(xd);
-  return get_tx_probs(max_tx_size, ctx, tx_probs);
+  return get_tx_probs(max_tx_size, vp9_get_tx_size_context(xd), tx_probs);
 }
 
 static unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 09a4fc8..237fd01 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -20,18 +20,56 @@
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/common/vp9_reconintra.h"
 
+static void build_mc_border(const uint8_t *src, uint8_t *dst, int stride,
+                             int x, int y, int b_w, int b_h, int w, int h) {
+  // Get a pointer to the start of the real data for this row.
+  const uint8_t *ref_row = src - x - y * stride;
+
+  if (y >= h)
+    ref_row += (h - 1) * stride;
+  else if (y > 0)
+    ref_row += y * stride;
+
+  do {
+    int right = 0, copy;
+    int left = x < 0 ? -x : 0;
+
+    if (left > b_w)
+      left = b_w;
+
+    if (x + b_w > w)
+      right = x + b_w - w;
+
+    if (right > b_w)
+      right = b_w;
+
+    copy = b_w - left - right;
+
+    if (left)
+      memset(dst, ref_row[0], left);
+
+    if (copy)
+      memcpy(dst + left, ref_row + x + left, copy);
+
+    if (right)
+      memset(dst + left + copy, ref_row[w - 1], right);
+
+    dst += stride;
+    ++y;
+
+    if (y > 0 && y < h)
+      ref_row += stride;
+  } while (--b_h);
+}
 
 static void inter_predictor(const uint8_t *src, int src_stride,
                             uint8_t *dst, int dst_stride,
-                            const MV32 *mv,
+                            const int subpel_x,
+                            const int subpel_y,
                             const struct scale_factors *scale,
                             int w, int h, int ref,
                             const struct subpix_fn_table *subpix,
                             int xs, int ys) {
-  const int subpel_x = mv->col & SUBPEL_MASK;
-  const int subpel_y = mv->row & SUBPEL_MASK;
-
-  src += (mv->row >> SUBPEL_BITS) * src_stride + (mv->col >> SUBPEL_BITS);
   scale->sfc->predict[subpel_x != 0][subpel_y != 0][ref](
       src, src_stride, dst, dst_stride,
       subpix->filter_x[subpel_x], xs,
@@ -51,9 +89,12 @@
                      is_q4 ? src_mv->col : src_mv->col * 2 };
   const struct scale_factors_common *sfc = scale->sfc;
   const MV32 mv = sfc->scale_mv(&mv_q4, scale);
+  const int subpel_x = mv.col & SUBPEL_MASK;
+  const int subpel_y = mv.row & SUBPEL_MASK;
+  src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
 
-  inter_predictor(src, src_stride, dst, dst_stride, &mv, scale,
-                  w, h, ref, subpix, sfc->x_step_q4, sfc->y_step_q4);
+  inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+                  scale, w, h, ref, subpix, sfc->x_step_q4, sfc->y_step_q4);
 }
 
 static INLINE int round_mv_comp_q4(int value) {
@@ -102,24 +143,14 @@
 // calculate the subsampled BLOCK_SIZE, but that type isn't defined for
 // sizes smaller than 16x16 yet.
 static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
-                                   BLOCK_SIZE bsize, int pred_w, int pred_h,
+                                   int bw, int bh,
+                                   int x, int y, int w, int h,
                                    int mi_x, int mi_y) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-  const int bwl = b_width_log2(plane_bsize);
-  const int bw = 4 << bwl;
-  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
-  const int x = 4 * (block & ((1 << bwl) - 1));
-  const int y = 4 * (block >> bwl);
   const MODE_INFO *mi = xd->mi_8x8[0];
   const int is_compound = has_second_ref(&mi->mbmi);
   int ref;
 
-  assert(x < bw);
-  assert(y < bh);
-  assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_w == bw);
-  assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_h == bh);
-
   for (ref = 0; ref < 1 + is_compound; ++ref) {
     struct scale_factors *const scale = &xd->scale_factor[ref];
     struct buf_2d *const pre_buf = &pd->pre[ref];
@@ -146,7 +177,7 @@
 
     uint8_t *pre;
     MV32 scaled_mv;
-    int xs, ys;
+    int xs, ys, subpel_x, subpel_y;
 
     if (vp9_is_scaled(scale->sfc)) {
       pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, scale);
@@ -160,11 +191,13 @@
       scaled_mv.col = mv_q4.col;
       xs = ys = 16;
     }
+    subpel_x = scaled_mv.col & SUBPEL_MASK;
+    subpel_y = scaled_mv.row & SUBPEL_MASK;
+    pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride
+           + (scaled_mv.col >> SUBPEL_BITS);
 
     inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
-                    &scaled_mv, scale,
-                    4 << pred_w, 4 << pred_h, ref,
-                    &xd->subpix, xs, ys);
+                    subpel_x, subpel_y, scale, w, h, ref, &xd->subpix, xs, ys);
   }
 }
 
@@ -172,20 +205,26 @@
                                               int mi_row, int mi_col,
                                               int plane_from, int plane_to) {
   int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
   for (plane = plane_from; plane <= plane_to; ++plane) {
-    const int mi_x = mi_col * MI_SIZE;
-    const int mi_y = mi_row * MI_SIZE;
-    const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
-    const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
+                                                        &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
 
     if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) {
       int i = 0, x, y;
       assert(bsize == BLOCK_8X8);
-      for (y = 0; y < 1 << bhl; ++y)
-        for (x = 0; x < 1 << bwl; ++x)
-          build_inter_predictors(xd, plane, i++, bsize, 0, 0, mi_x, mi_y);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+           build_inter_predictors(xd, plane, i++, bw, bh,
+                                  4 * x, 4 * y, 4, 4, mi_x, mi_y);
     } else {
-      build_inter_predictors(xd, plane, 0, bsize, bwl, bhl, mi_x, mi_y);
+      build_inter_predictors(xd, plane, 0, bw, bh,
+                             0, 0, bw, bh, mi_x, mi_y);
     }
   }
 }
@@ -208,24 +247,14 @@
 // TODO(jingning): This function serves as a placeholder for decoder prediction
 // using on demand border extension. It should be moved to /decoder/ directory.
 static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
-                                       BLOCK_SIZE bsize, int pred_w, int pred_h,
+                                       int bw, int bh,
+                                       int x, int y, int w, int h,
                                        int mi_x, int mi_y) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-  const int bwl = b_width_log2(plane_bsize);
-  const int bw = 4 << bwl;
-  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
-  const int x = 4 * (block & ((1 << bwl) - 1));
-  const int y = 4 * (block >> bwl);
   const MODE_INFO *mi = xd->mi_8x8[0];
   const int is_compound = has_second_ref(&mi->mbmi);
   int ref;
 
-  assert(x < bw);
-  assert(y < bh);
-  assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_w == bw);
-  assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_h == bh);
-
   for (ref = 0; ref < 1 + is_compound; ++ref) {
     struct scale_factors *const scale = &xd->scale_factor[ref];
     struct buf_2d *const pre_buf = &pd->pre[ref];
@@ -250,47 +279,115 @@
                                                pd->subsampling_x,
                                                pd->subsampling_y);
 
-    uint8_t *pre;
     MV32 scaled_mv;
-    int xs, ys;
+    int xs, ys, x0, y0, x0_16, y0_16, x1, y1, frame_width,
+        frame_height, subpel_x, subpel_y;
+    uint8_t *ref_frame, *buf_ptr;
+    const YV12_BUFFER_CONFIG *ref_buf = xd->ref_buf[ref];
+
+    // Get reference frame pointer, width and height.
+    if (plane == 0) {
+      frame_width = ref_buf->y_crop_width;
+      frame_height = ref_buf->y_crop_height;
+      ref_frame = ref_buf->y_buffer;
+    } else {
+      frame_width = ref_buf->uv_crop_width;
+      frame_height = ref_buf->uv_crop_height;
+      ref_frame = plane == 1 ? ref_buf->u_buffer : ref_buf->v_buffer;
+    }
+
+    // Get block position in current frame.
+    x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
+    y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+
+    // Precision of x0_16 and y0_16 is 1/16th pixel.
+    x0_16 = x0 << SUBPEL_BITS;
+    y0_16 = y0 << SUBPEL_BITS;
 
     if (vp9_is_scaled(scale->sfc)) {
-      pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, scale);
       scale->sfc->set_scaled_offsets(scale, mi_y + y, mi_x + x);
       scaled_mv = scale->sfc->scale_mv(&mv_q4, scale);
       xs = scale->sfc->x_step_q4;
       ys = scale->sfc->y_step_q4;
+      // Get block position in the scaled reference frame.
+      x0 = scale->sfc->scale_value_x(x0, scale->sfc);
+      y0 = scale->sfc->scale_value_y(y0, scale->sfc);
+      x0_16 = scale->sfc->scale_value_x(x0_16, scale->sfc);
+      y0_16 = scale->sfc->scale_value_y(y0_16, scale->sfc);
     } else {
-      pre = pre_buf->buf + (y * pre_buf->stride + x);
       scaled_mv.row = mv_q4.row;
       scaled_mv.col = mv_q4.col;
       xs = ys = 16;
     }
+    subpel_x = scaled_mv.col & SUBPEL_MASK;
+    subpel_y = scaled_mv.row & SUBPEL_MASK;
 
-    inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
-                    &scaled_mv, scale,
-                    4 << pred_w, 4 << pred_h, ref,
-                    &xd->subpix, xs, ys);
+    // Get reference block top left coordinate.
+    x0 += scaled_mv.col >> SUBPEL_BITS;
+    y0 += scaled_mv.row >> SUBPEL_BITS;
+    x0_16 += scaled_mv.col;
+    y0_16 += scaled_mv.row;
+
+    // Get reference block bottom right coordinate.
+    x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
+    y1 = ((y0_16 + (h - 1) * xs) >> SUBPEL_BITS) + 1;
+
+    // Get reference block pointer.
+    buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
+
+    // Do border extension if there is motion or
+    // width/height is not a multiple of 8 pixels.
+    if (scaled_mv.col || scaled_mv.row ||
+        (frame_width & 0x7) || (frame_height & 0x7)) {
+
+      if (subpel_x) {
+        x0 -= VP9_INTERP_EXTEND - 1;
+        x1 += VP9_INTERP_EXTEND;
+      }
+
+      if (subpel_y) {
+        y0 -= VP9_INTERP_EXTEND - 1;
+        y1 += VP9_INTERP_EXTEND;
+      }
+
+      // Skip border extension if block is inside the frame.
+      if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width ||
+          y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
+        uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0;
+        // Extend the border.
+        build_mc_border(buf_ptr1, buf_ptr1, pre_buf->stride, x0, y0, x1 - x0,
+                        y1 - y0, frame_width, frame_height);
+      }
+    }
+
+    inter_predictor(buf_ptr, pre_buf->stride, dst, dst_buf->stride, subpel_x,
+                    subpel_y, scale, w, h, ref, &xd->subpix, xs, ys);
   }
 }
 
 void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
                                        BLOCK_SIZE bsize) {
   int plane;
+  const int mi_x = mi_col * MI_SIZE;
+  const int mi_y = mi_row * MI_SIZE;
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    const int mi_x = mi_col * MI_SIZE;
-    const int mi_y = mi_row * MI_SIZE;
-    const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
-    const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
+                                                        &xd->plane[plane]);
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int bw = 4 * num_4x4_w;
+    const int bh = 4 * num_4x4_h;
 
     if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) {
       int i = 0, x, y;
       assert(bsize == BLOCK_8X8);
-      for (y = 0; y < 1 << bhl; ++y)
-        for (x = 0; x < 1 << bwl; ++x)
-          dec_build_inter_predictors(xd, plane, i++, bsize, 0, 0, mi_x, mi_y);
+      for (y = 0; y < num_4x4_h; ++y)
+        for (x = 0; x < num_4x4_w; ++x)
+          dec_build_inter_predictors(xd, plane, i++, bw, bh,
+                                     4 * x, 4 * y, 4, 4, mi_x, mi_y);
     } else {
-      dec_build_inter_predictors(xd, plane, 0, bsize, bwl, bhl, mi_x, mi_y);
+      dec_build_inter_predictors(xd, plane, 0, bw, bh,
+                                 0, 0, bw, bh, mi_x, mi_y);
     }
   }
 }
@@ -300,7 +397,7 @@
   const int ref = cm->active_ref_idx[i];
   struct scale_factors *const sf = &cm->active_ref_scale[i];
   struct scale_factors_common *const sfc = &cm->active_ref_scale_comm[i];
-  if (ref >= NUM_YV12_BUFFERS) {
+  if (ref >= FRAME_BUFFERS) {
     vp9_zero(*sf);
     vp9_zero(*sfc);
   } else {
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 627ea31..19d5fc3 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -749,6 +749,9 @@
 specialize vp9_diamond_search_sad sse3
 vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4
 
+prototype int vp9_full_range_search "struct macroblock *x, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv"
+specialize vp9_full_range_search
+
 prototype void vp9_temporal_filter_apply "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"
 specialize vp9_temporal_filter_apply sse2
 
diff --git a/vp9/common/vp9_treecoder.c b/vp9/common/vp9_treecoder.c
deleted file mode 100644
index e2a5b9f..0000000
--- a/vp9/common/vp9_treecoder.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <assert.h>
-
-#include "./vpx_config.h"
-#include "vp9/common/vp9_treecoder.h"
-
-static void tree2tok(struct vp9_token *const p, vp9_tree t,
-                    int i, int v, int l) {
-  v += v;
-  ++l;
-
-  do {
-    const vp9_tree_index j = t[i++];
-
-    if (j <= 0) {
-      p[-j].value = v;
-      p[-j].len = l;
-    } else {
-      tree2tok(p, t, j, v, l);
-    }
-  } while (++v & 1);
-}
-
-void vp9_tokens_from_tree(struct vp9_token *p, vp9_tree t) {
-  tree2tok(p, t, 0, 0, 0);
-}
-
-static unsigned int convert_distribution(unsigned int i, vp9_tree tree,
-                                         unsigned int branch_ct[][2],
-                                         const unsigned int num_events[]) {
-  unsigned int left, right;
-
-  if (tree[i] <= 0)
-    left = num_events[-tree[i]];
-  else
-    left = convert_distribution(tree[i], tree, branch_ct, num_events);
-
-  if (tree[i + 1] <= 0)
-    right = num_events[-tree[i + 1]];
-  else
-    right = convert_distribution(tree[i + 1], tree, branch_ct, num_events);
-
-  branch_ct[i >> 1][0] = left;
-  branch_ct[i >> 1][1] = right;
-  return left + right;
-}
-
-void vp9_tree_probs_from_distribution(vp9_tree tree,
-                                      unsigned int branch_ct[/* n-1 */][2],
-                                      const unsigned int num_events[/* n */]) {
-  convert_distribution(0, tree, branch_ct, num_events);
-}
-
-
diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h
index a79b156..ed8c74a 100644
--- a/vp9/common/vp9_treecoder.h
+++ b/vp9/common/vp9_treecoder.h
@@ -34,25 +34,11 @@
 
 typedef const vp9_tree_index vp9_tree[];
 
-struct vp9_token {
-  int value;
-  int len;
-};
-
-/* Construct encoding array from tree. */
-
-void vp9_tokens_from_tree(struct vp9_token*, vp9_tree);
-
 /* Convert array of token occurrence counts into a table of probabilities
    for the associated binary encoding tree.  Also writes count of branches
    taken for each node on the tree; this facilitiates decisions as to
    probability updates. */
 
-void vp9_tree_probs_from_distribution(vp9_tree tree,
-                                      unsigned int branch_ct[ /* n - 1 */ ][2],
-                                      const unsigned int num_events[ /* n */ ]);
-
-
 static INLINE vp9_prob clip_prob(int p) {
   return (p > 255) ? 255u : (p < 1) ? 1u : p;
 }
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 8fdf19a..947c0ba 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -650,6 +650,25 @@
   res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
 }
 
+static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
+  const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
+  const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
+  const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
+
+  const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+  const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+  const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+  const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+
+  out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4);
+  out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4);
+  out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6);
+  out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
+  out[4] = out[5] = out[6] = out[7] = zero;
+}
+
 static void idct8_1d_sse2(__m128i *in) {
   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
   const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
@@ -1139,14 +1158,14 @@
 #define IDCT16_1D \
   /* Stage2 */ \
   { \
-    const __m128i lo_1_15 = _mm_unpacklo_epi16(in1, in15); \
-    const __m128i hi_1_15 = _mm_unpackhi_epi16(in1, in15); \
-    const __m128i lo_9_7 = _mm_unpacklo_epi16(in9, in7);   \
-    const __m128i hi_9_7 = _mm_unpackhi_epi16(in9, in7);   \
-    const __m128i lo_5_11 = _mm_unpacklo_epi16(in5, in11); \
-    const __m128i hi_5_11 = _mm_unpackhi_epi16(in5, in11); \
-    const __m128i lo_13_3 = _mm_unpacklo_epi16(in13, in3); \
-    const __m128i hi_13_3 = _mm_unpackhi_epi16(in13, in3); \
+    const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \
+    const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], in[15]); \
+    const __m128i lo_9_7 = _mm_unpacklo_epi16(in[9], in[7]);   \
+    const __m128i hi_9_7 = _mm_unpackhi_epi16(in[9], in[7]);   \
+    const __m128i lo_5_11 = _mm_unpacklo_epi16(in[5], in[11]); \
+    const __m128i hi_5_11 = _mm_unpackhi_epi16(in[5], in[11]); \
+    const __m128i lo_13_3 = _mm_unpacklo_epi16(in[13], in[3]); \
+    const __m128i hi_13_3 = _mm_unpackhi_epi16(in[13], in[3]); \
     \
     MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_9_7, hi_9_7, \
                            stg2_0, stg2_1, stg2_2, stg2_3, \
@@ -1159,10 +1178,10 @@
     \
   /* Stage3 */ \
   { \
-    const __m128i lo_2_14 = _mm_unpacklo_epi16(in2, in14); \
-    const __m128i hi_2_14 = _mm_unpackhi_epi16(in2, in14); \
-    const __m128i lo_10_6 = _mm_unpacklo_epi16(in10, in6); \
-    const __m128i hi_10_6 = _mm_unpackhi_epi16(in10, in6); \
+    const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], in[14]); \
+    const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], in[14]); \
+    const __m128i lo_10_6 = _mm_unpacklo_epi16(in[10], in[6]); \
+    const __m128i hi_10_6 = _mm_unpackhi_epi16(in[10], in[6]); \
     \
     MULTIPLICATION_AND_ADD(lo_2_14, hi_2_14, lo_10_6, hi_10_6, \
                            stg3_0, stg3_1, stg3_2, stg3_3, \
@@ -1181,10 +1200,10 @@
   \
   /* Stage4 */ \
   { \
-    const __m128i lo_0_8 = _mm_unpacklo_epi16(in0, in8); \
-    const __m128i hi_0_8 = _mm_unpackhi_epi16(in0, in8); \
-    const __m128i lo_4_12 = _mm_unpacklo_epi16(in4, in12); \
-    const __m128i hi_4_12 = _mm_unpackhi_epi16(in4, in12); \
+    const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]); \
+    const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], in[8]); \
+    const __m128i lo_4_12 = _mm_unpacklo_epi16(in[4], in[12]); \
+    const __m128i hi_4_12 = _mm_unpackhi_epi16(in[4], in[12]); \
     \
     const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \
     const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \
@@ -1296,16 +1315,7 @@
 
   const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
 
-  __m128i in0 = zero, in1 = zero, in2 = zero, in3 = zero, in4 = zero,
-          in5 = zero, in6 = zero, in7 = zero, in8 = zero, in9 = zero,
-          in10 = zero, in11 = zero, in12 = zero, in13 = zero,
-          in14 = zero, in15 = zero;
-  __m128i l0 = zero, l1 = zero, l2 = zero, l3 = zero, l4 = zero, l5 = zero,
-          l6 = zero, l7 = zero, l8 = zero, l9 = zero, l10 = zero, l11 = zero,
-          l12 = zero, l13 = zero, l14 = zero, l15 = zero;
-  __m128i r0 = zero, r1 = zero, r2 = zero, r3 = zero, r4 = zero, r5 = zero,
-          r6 = zero, r7 = zero, r8 = zero, r9 = zero, r10 = zero, r11 = zero,
-          r12 = zero, r13 = zero, r14 = zero, r15 = zero;
+  __m128i in[16], l[16], r[16], *curr1;
   __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
           stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
           stp1_8_0, stp1_12_0;
@@ -1314,162 +1324,132 @@
   __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   int i;
 
-  // We work on a 8x16 block each time, and loop 4 times for 2-D 16x16 idct.
-  for (i = 0; i < 4; i++) {
-    // 1-D idct
-    if (i < 2) {
-      if (i == 1) input += 128;
+  curr1 = l;
+  for (i = 0; i < 2; i++) {
+      // 1-D idct
 
       // Load input data.
-      in0 = _mm_load_si128((const __m128i *)input);
-      in8 = _mm_load_si128((const __m128i *)(input + 8 * 1));
-      in1 = _mm_load_si128((const __m128i *)(input + 8 * 2));
-      in9 = _mm_load_si128((const __m128i *)(input + 8 * 3));
-      in2 = _mm_load_si128((const __m128i *)(input + 8 * 4));
-      in10 = _mm_load_si128((const __m128i *)(input + 8 * 5));
-      in3 = _mm_load_si128((const __m128i *)(input + 8 * 6));
-      in11 = _mm_load_si128((const __m128i *)(input + 8 * 7));
-      in4 = _mm_load_si128((const __m128i *)(input + 8 * 8));
-      in12 = _mm_load_si128((const __m128i *)(input + 8 * 9));
-      in5 = _mm_load_si128((const __m128i *)(input + 8 * 10));
-      in13 = _mm_load_si128((const __m128i *)(input + 8 * 11));
-      in6 = _mm_load_si128((const __m128i *)(input + 8 * 12));
-      in14 = _mm_load_si128((const __m128i *)(input + 8 * 13));
-      in7 = _mm_load_si128((const __m128i *)(input + 8 * 14));
-      in15 = _mm_load_si128((const __m128i *)(input + 8 * 15));
+      in[0] = _mm_load_si128((const __m128i *)input);
+      in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1));
+      in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2));
+      in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3));
+      in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4));
+      in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5));
+      in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6));
+      in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7));
+      in[4] = _mm_load_si128((const __m128i *)(input + 8 * 8));
+      in[12] = _mm_load_si128((const __m128i *)(input + 8 * 9));
+      in[5] = _mm_load_si128((const __m128i *)(input + 8 * 10));
+      in[13] = _mm_load_si128((const __m128i *)(input + 8 * 11));
+      in[6] = _mm_load_si128((const __m128i *)(input + 8 * 12));
+      in[14] = _mm_load_si128((const __m128i *)(input + 8 * 13));
+      in[7] = _mm_load_si128((const __m128i *)(input + 8 * 14));
+      in[15] = _mm_load_si128((const __m128i *)(input + 8 * 15));
 
-      TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
-                    in4, in5, in6, in7);
-      TRANSPOSE_8X8(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9,
-                    in10, in11, in12, in13, in14, in15);
-    }
+      array_transpose_8x8(in, in);
+      array_transpose_8x8(in+8, in+8);
 
-    if (i == 2) {
-      TRANSPOSE_8X8(l0, l1, l2, l3, l4, l5, l6, l7, in0, in1, in2, in3, in4,
-                    in5, in6, in7);
-      TRANSPOSE_8X8(r0, r1, r2, r3, r4, r5, r6, r7, in8, in9, in10, in11, in12,
-                    in13, in14, in15);
-    }
+      IDCT16_1D
 
-    if (i == 3) {
-      TRANSPOSE_8X8(l8, l9, l10, l11, l12, l13, l14, l15, in0, in1, in2, in3,
-                    in4, in5, in6, in7);
-      TRANSPOSE_8X8(r8, r9, r10, r11, r12, r13, r14, r15, in8, in9, in10, in11,
-                    in12, in13, in14, in15);
-    }
+      // Stage7
+      curr1[0] = _mm_add_epi16(stp2_0, stp1_15);
+      curr1[1] = _mm_add_epi16(stp2_1, stp1_14);
+      curr1[2] = _mm_add_epi16(stp2_2, stp2_13);
+      curr1[3] = _mm_add_epi16(stp2_3, stp2_12);
+      curr1[4] = _mm_add_epi16(stp2_4, stp2_11);
+      curr1[5] = _mm_add_epi16(stp2_5, stp2_10);
+      curr1[6] = _mm_add_epi16(stp2_6, stp1_9);
+      curr1[7] = _mm_add_epi16(stp2_7, stp1_8);
+      curr1[8] = _mm_sub_epi16(stp2_7, stp1_8);
+      curr1[9] = _mm_sub_epi16(stp2_6, stp1_9);
+      curr1[10] = _mm_sub_epi16(stp2_5, stp2_10);
+      curr1[11] = _mm_sub_epi16(stp2_4, stp2_11);
+      curr1[12] = _mm_sub_epi16(stp2_3, stp2_12);
+      curr1[13] = _mm_sub_epi16(stp2_2, stp2_13);
+      curr1[14] = _mm_sub_epi16(stp2_1, stp1_14);
+      curr1[15] = _mm_sub_epi16(stp2_0, stp1_15);
 
-    IDCT16_1D
+      curr1 = r;
+      input += 128;
+  }
+  for (i = 0; i < 2; i++) {
+      // 1-D idct
+      array_transpose_8x8(l+i*8, in);
+      array_transpose_8x8(r+i*8, in+8);
 
-    // Stage7
-    if (i == 0) {
-      // Left 8x16
-      l0 = _mm_add_epi16(stp2_0, stp1_15);
-      l1 = _mm_add_epi16(stp2_1, stp1_14);
-      l2 = _mm_add_epi16(stp2_2, stp2_13);
-      l3 = _mm_add_epi16(stp2_3, stp2_12);
-      l4 = _mm_add_epi16(stp2_4, stp2_11);
-      l5 = _mm_add_epi16(stp2_5, stp2_10);
-      l6 = _mm_add_epi16(stp2_6, stp1_9);
-      l7 = _mm_add_epi16(stp2_7, stp1_8);
-      l8 = _mm_sub_epi16(stp2_7, stp1_8);
-      l9 = _mm_sub_epi16(stp2_6, stp1_9);
-      l10 = _mm_sub_epi16(stp2_5, stp2_10);
-      l11 = _mm_sub_epi16(stp2_4, stp2_11);
-      l12 = _mm_sub_epi16(stp2_3, stp2_12);
-      l13 = _mm_sub_epi16(stp2_2, stp2_13);
-      l14 = _mm_sub_epi16(stp2_1, stp1_14);
-      l15 = _mm_sub_epi16(stp2_0, stp1_15);
-    } else if (i == 1) {
-      // Right 8x16
-      r0 = _mm_add_epi16(stp2_0, stp1_15);
-      r1 = _mm_add_epi16(stp2_1, stp1_14);
-      r2 = _mm_add_epi16(stp2_2, stp2_13);
-      r3 = _mm_add_epi16(stp2_3, stp2_12);
-      r4 = _mm_add_epi16(stp2_4, stp2_11);
-      r5 = _mm_add_epi16(stp2_5, stp2_10);
-      r6 = _mm_add_epi16(stp2_6, stp1_9);
-      r7 = _mm_add_epi16(stp2_7, stp1_8);
-      r8 = _mm_sub_epi16(stp2_7, stp1_8);
-      r9 = _mm_sub_epi16(stp2_6, stp1_9);
-      r10 = _mm_sub_epi16(stp2_5, stp2_10);
-      r11 = _mm_sub_epi16(stp2_4, stp2_11);
-      r12 = _mm_sub_epi16(stp2_3, stp2_12);
-      r13 = _mm_sub_epi16(stp2_2, stp2_13);
-      r14 = _mm_sub_epi16(stp2_1, stp1_14);
-      r15 = _mm_sub_epi16(stp2_0, stp1_15);
-    } else {
+      IDCT16_1D
+
       // 2-D
-      in0 = _mm_add_epi16(stp2_0, stp1_15);
-      in1 = _mm_add_epi16(stp2_1, stp1_14);
-      in2 = _mm_add_epi16(stp2_2, stp2_13);
-      in3 = _mm_add_epi16(stp2_3, stp2_12);
-      in4 = _mm_add_epi16(stp2_4, stp2_11);
-      in5 = _mm_add_epi16(stp2_5, stp2_10);
-      in6 = _mm_add_epi16(stp2_6, stp1_9);
-      in7 = _mm_add_epi16(stp2_7, stp1_8);
-      in8 = _mm_sub_epi16(stp2_7, stp1_8);
-      in9 = _mm_sub_epi16(stp2_6, stp1_9);
-      in10 = _mm_sub_epi16(stp2_5, stp2_10);
-      in11 = _mm_sub_epi16(stp2_4, stp2_11);
-      in12 = _mm_sub_epi16(stp2_3, stp2_12);
-      in13 = _mm_sub_epi16(stp2_2, stp2_13);
-      in14 = _mm_sub_epi16(stp2_1, stp1_14);
-      in15 = _mm_sub_epi16(stp2_0, stp1_15);
+      in[0] = _mm_add_epi16(stp2_0, stp1_15);
+      in[1] = _mm_add_epi16(stp2_1, stp1_14);
+      in[2] = _mm_add_epi16(stp2_2, stp2_13);
+      in[3] = _mm_add_epi16(stp2_3, stp2_12);
+      in[4] = _mm_add_epi16(stp2_4, stp2_11);
+      in[5] = _mm_add_epi16(stp2_5, stp2_10);
+      in[6] = _mm_add_epi16(stp2_6, stp1_9);
+      in[7] = _mm_add_epi16(stp2_7, stp1_8);
+      in[8] = _mm_sub_epi16(stp2_7, stp1_8);
+      in[9] = _mm_sub_epi16(stp2_6, stp1_9);
+      in[10] = _mm_sub_epi16(stp2_5, stp2_10);
+      in[11] = _mm_sub_epi16(stp2_4, stp2_11);
+      in[12] = _mm_sub_epi16(stp2_3, stp2_12);
+      in[13] = _mm_sub_epi16(stp2_2, stp2_13);
+      in[14] = _mm_sub_epi16(stp2_1, stp1_14);
+      in[15] = _mm_sub_epi16(stp2_0, stp1_15);
 
       // Final rounding and shift
-      in0 = _mm_adds_epi16(in0, final_rounding);
-      in1 = _mm_adds_epi16(in1, final_rounding);
-      in2 = _mm_adds_epi16(in2, final_rounding);
-      in3 = _mm_adds_epi16(in3, final_rounding);
-      in4 = _mm_adds_epi16(in4, final_rounding);
-      in5 = _mm_adds_epi16(in5, final_rounding);
-      in6 = _mm_adds_epi16(in6, final_rounding);
-      in7 = _mm_adds_epi16(in7, final_rounding);
-      in8 = _mm_adds_epi16(in8, final_rounding);
-      in9 = _mm_adds_epi16(in9, final_rounding);
-      in10 = _mm_adds_epi16(in10, final_rounding);
-      in11 = _mm_adds_epi16(in11, final_rounding);
-      in12 = _mm_adds_epi16(in12, final_rounding);
-      in13 = _mm_adds_epi16(in13, final_rounding);
-      in14 = _mm_adds_epi16(in14, final_rounding);
-      in15 = _mm_adds_epi16(in15, final_rounding);
+      in[0] = _mm_adds_epi16(in[0], final_rounding);
+      in[1] = _mm_adds_epi16(in[1], final_rounding);
+      in[2] = _mm_adds_epi16(in[2], final_rounding);
+      in[3] = _mm_adds_epi16(in[3], final_rounding);
+      in[4] = _mm_adds_epi16(in[4], final_rounding);
+      in[5] = _mm_adds_epi16(in[5], final_rounding);
+      in[6] = _mm_adds_epi16(in[6], final_rounding);
+      in[7] = _mm_adds_epi16(in[7], final_rounding);
+      in[8] = _mm_adds_epi16(in[8], final_rounding);
+      in[9] = _mm_adds_epi16(in[9], final_rounding);
+      in[10] = _mm_adds_epi16(in[10], final_rounding);
+      in[11] = _mm_adds_epi16(in[11], final_rounding);
+      in[12] = _mm_adds_epi16(in[12], final_rounding);
+      in[13] = _mm_adds_epi16(in[13], final_rounding);
+      in[14] = _mm_adds_epi16(in[14], final_rounding);
+      in[15] = _mm_adds_epi16(in[15], final_rounding);
 
-      in0 = _mm_srai_epi16(in0, 6);
-      in1 = _mm_srai_epi16(in1, 6);
-      in2 = _mm_srai_epi16(in2, 6);
-      in3 = _mm_srai_epi16(in3, 6);
-      in4 = _mm_srai_epi16(in4, 6);
-      in5 = _mm_srai_epi16(in5, 6);
-      in6 = _mm_srai_epi16(in6, 6);
-      in7 = _mm_srai_epi16(in7, 6);
-      in8 = _mm_srai_epi16(in8, 6);
-      in9 = _mm_srai_epi16(in9, 6);
-      in10 = _mm_srai_epi16(in10, 6);
-      in11 = _mm_srai_epi16(in11, 6);
-      in12 = _mm_srai_epi16(in12, 6);
-      in13 = _mm_srai_epi16(in13, 6);
-      in14 = _mm_srai_epi16(in14, 6);
-      in15 = _mm_srai_epi16(in15, 6);
+      in[0] = _mm_srai_epi16(in[0], 6);
+      in[1] = _mm_srai_epi16(in[1], 6);
+      in[2] = _mm_srai_epi16(in[2], 6);
+      in[3] = _mm_srai_epi16(in[3], 6);
+      in[4] = _mm_srai_epi16(in[4], 6);
+      in[5] = _mm_srai_epi16(in[5], 6);
+      in[6] = _mm_srai_epi16(in[6], 6);
+      in[7] = _mm_srai_epi16(in[7], 6);
+      in[8] = _mm_srai_epi16(in[8], 6);
+      in[9] = _mm_srai_epi16(in[9], 6);
+      in[10] = _mm_srai_epi16(in[10], 6);
+      in[11] = _mm_srai_epi16(in[11], 6);
+      in[12] = _mm_srai_epi16(in[12], 6);
+      in[13] = _mm_srai_epi16(in[13], 6);
+      in[14] = _mm_srai_epi16(in[14], 6);
+      in[15] = _mm_srai_epi16(in[15], 6);
 
-      RECON_AND_STORE(dest, in0);
-      RECON_AND_STORE(dest, in1);
-      RECON_AND_STORE(dest, in2);
-      RECON_AND_STORE(dest, in3);
-      RECON_AND_STORE(dest, in4);
-      RECON_AND_STORE(dest, in5);
-      RECON_AND_STORE(dest, in6);
-      RECON_AND_STORE(dest, in7);
-      RECON_AND_STORE(dest, in8);
-      RECON_AND_STORE(dest, in9);
-      RECON_AND_STORE(dest, in10);
-      RECON_AND_STORE(dest, in11);
-      RECON_AND_STORE(dest, in12);
-      RECON_AND_STORE(dest, in13);
-      RECON_AND_STORE(dest, in14);
-      RECON_AND_STORE(dest, in15);
+      RECON_AND_STORE(dest, in[0]);
+      RECON_AND_STORE(dest, in[1]);
+      RECON_AND_STORE(dest, in[2]);
+      RECON_AND_STORE(dest, in[3]);
+      RECON_AND_STORE(dest, in[4]);
+      RECON_AND_STORE(dest, in[5]);
+      RECON_AND_STORE(dest, in[6]);
+      RECON_AND_STORE(dest, in[7]);
+      RECON_AND_STORE(dest, in[8]);
+      RECON_AND_STORE(dest, in[9]);
+      RECON_AND_STORE(dest, in[10]);
+      RECON_AND_STORE(dest, in[11]);
+      RECON_AND_STORE(dest, in[12]);
+      RECON_AND_STORE(dest, in[13]);
+      RECON_AND_STORE(dest, in[14]);
+      RECON_AND_STORE(dest, in[15]);
 
       dest += 8 - (stride * 16);
-    }
   }
 }
 
@@ -2489,15 +2469,7 @@
   const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64);
 
   const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
-
-  __m128i in0 = zero, in1 = zero, in2 = zero, in3 = zero, in4 = zero,
-          in5 = zero, in6 = zero, in7 = zero, in8 = zero, in9 = zero,
-          in10 = zero, in11 = zero, in12 = zero, in13 = zero,
-          in14 = zero, in15 = zero;
-  __m128i l0 = zero, l1 = zero, l2 = zero, l3 = zero, l4 = zero, l5 = zero,
-          l6 = zero, l7 = zero, l8 = zero, l9 = zero, l10 = zero, l11 = zero,
-          l12 = zero, l13 = zero, l14 = zero, l15 = zero;
-
+  __m128i in[16], l[16];
   __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
           stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
           stp1_8_0, stp1_12_0;
@@ -2505,25 +2477,26 @@
           stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15;
   __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   int i;
+  in[4] = in[5] = in[6] = in[7] = in[12] = in[13] = in[14] = in[15] = zero;
   // 1-D idct. Load input data.
-  in0 = _mm_load_si128((const __m128i *)input);
-  in8 = _mm_load_si128((const __m128i *)(input + 8 * 1));
-  in1 = _mm_load_si128((const __m128i *)(input + 8 * 2));
-  in9 = _mm_load_si128((const __m128i *)(input + 8 * 3));
-  in2 = _mm_load_si128((const __m128i *)(input + 8 * 4));
-  in10 = _mm_load_si128((const __m128i *)(input + 8 * 5));
-  in3 = _mm_load_si128((const __m128i *)(input + 8 * 6));
-  in11 = _mm_load_si128((const __m128i *)(input + 8 * 7));
+  in[0] = _mm_load_si128((const __m128i *)input);
+  in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1));
+  in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2));
+  in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3));
+  in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4));
+  in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5));
+  in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6));
+  in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7));
 
-  TRANSPOSE_8X4(in0, in1, in2, in3, in0, in1, in2, in3);
-  TRANSPOSE_8X4(in8, in9, in10, in11, in8, in9, in10, in11);
+  TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1], in[2], in[3]);
+  TRANSPOSE_8X4(in[8], in[9], in[10], in[11], in[8], in[9], in[10], in[11]);
 
   // Stage2
   {
-    const __m128i lo_1_15 = _mm_unpackhi_epi16(in0, in11);
-    const __m128i lo_9_7 = _mm_unpackhi_epi16(in8, in3);
-    const __m128i lo_5_11 = _mm_unpackhi_epi16(in2, in9);
-    const __m128i lo_13_3 = _mm_unpackhi_epi16(in10, in1);
+    const __m128i lo_1_15 = _mm_unpackhi_epi16(in[0], in[11]);
+    const __m128i lo_9_7 = _mm_unpackhi_epi16(in[8], in[3]);
+    const __m128i lo_5_11 = _mm_unpackhi_epi16(in[2], in[9]);
+    const __m128i lo_13_3 = _mm_unpackhi_epi16(in[10], in[1]);
 
     tmp0 = _mm_madd_epi16(lo_1_15, stg2_0);
     tmp2 = _mm_madd_epi16(lo_1_15, stg2_1);
@@ -2565,8 +2538,8 @@
 
   // Stage3
   {
-    const __m128i lo_2_14 = _mm_unpacklo_epi16(in1, in11);
-    const __m128i lo_10_6 = _mm_unpacklo_epi16(in9, in3);
+    const __m128i lo_2_14 = _mm_unpacklo_epi16(in[1], in[11]);
+    const __m128i lo_10_6 = _mm_unpacklo_epi16(in[9], in[3]);
 
     tmp0 = _mm_madd_epi16(lo_2_14, stg3_0);
     tmp2 = _mm_madd_epi16(lo_2_14, stg3_1);
@@ -2601,8 +2574,8 @@
 
   // Stage4
   {
-    const __m128i lo_0_8 = _mm_unpacklo_epi16(in0, in8);
-    const __m128i lo_4_12 = _mm_unpacklo_epi16(in2, in10);
+    const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]);
+    const __m128i lo_4_12 = _mm_unpacklo_epi16(in[2], in[10]);
     const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14);
     const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);
 
@@ -2711,106 +2684,99 @@
   }
 
   // Stage7. Left 8x16 only.
-  l0 = _mm_add_epi16(stp2_0, stp1_15);
-  l1 = _mm_add_epi16(stp2_1, stp1_14);
-  l2 = _mm_add_epi16(stp2_2, stp2_13);
-  l3 = _mm_add_epi16(stp2_3, stp2_12);
-  l4 = _mm_add_epi16(stp2_4, stp2_11);
-  l5 = _mm_add_epi16(stp2_5, stp2_10);
-  l6 = _mm_add_epi16(stp2_6, stp1_9);
-  l7 = _mm_add_epi16(stp2_7, stp1_8);
-  l8 = _mm_sub_epi16(stp2_7, stp1_8);
-  l9 = _mm_sub_epi16(stp2_6, stp1_9);
-  l10 = _mm_sub_epi16(stp2_5, stp2_10);
-  l11 = _mm_sub_epi16(stp2_4, stp2_11);
-  l12 = _mm_sub_epi16(stp2_3, stp2_12);
-  l13 = _mm_sub_epi16(stp2_2, stp2_13);
-  l14 = _mm_sub_epi16(stp2_1, stp1_14);
-  l15 = _mm_sub_epi16(stp2_0, stp1_15);
+  l[0] = _mm_add_epi16(stp2_0, stp1_15);
+  l[1] = _mm_add_epi16(stp2_1, stp1_14);
+  l[2] = _mm_add_epi16(stp2_2, stp2_13);
+  l[3] = _mm_add_epi16(stp2_3, stp2_12);
+  l[4] = _mm_add_epi16(stp2_4, stp2_11);
+  l[5] = _mm_add_epi16(stp2_5, stp2_10);
+  l[6] = _mm_add_epi16(stp2_6, stp1_9);
+  l[7] = _mm_add_epi16(stp2_7, stp1_8);
+  l[8] = _mm_sub_epi16(stp2_7, stp1_8);
+  l[9] = _mm_sub_epi16(stp2_6, stp1_9);
+  l[10] = _mm_sub_epi16(stp2_5, stp2_10);
+  l[11] = _mm_sub_epi16(stp2_4, stp2_11);
+  l[12] = _mm_sub_epi16(stp2_3, stp2_12);
+  l[13] = _mm_sub_epi16(stp2_2, stp2_13);
+  l[14] = _mm_sub_epi16(stp2_1, stp1_14);
+  l[15] = _mm_sub_epi16(stp2_0, stp1_15);
 
   // 2-D idct. We do 2 8x16 blocks.
   for (i = 0; i < 2; i++) {
-    if (i == 0)
-      TRANSPOSE_4X8(l0, l1, l2, l3, l4, l5, l6, l7, in0, in1, in2, in3, in4,
-                    in5, in6, in7);
-
-    if (i == 1)
-      TRANSPOSE_4X8(l8, l9, l10, l11, l12, l13, l14, l15, in0, in1, in2, in3,
-                    in4, in5, in6, in7);
-
-    in8 = in9 = in10 = in11 = in12 = in13 = in14 = in15 = zero;
+    array_transpose_4X8(l + 8*i, in);
+    in[8] = in[9] = in[10] = in[11] = in[12] = in[13] = in[14] = in[15] = zero;
 
     IDCT16_1D
 
     // Stage7
-    in0 = _mm_add_epi16(stp2_0, stp1_15);
-    in1 = _mm_add_epi16(stp2_1, stp1_14);
-    in2 = _mm_add_epi16(stp2_2, stp2_13);
-    in3 = _mm_add_epi16(stp2_3, stp2_12);
-    in4 = _mm_add_epi16(stp2_4, stp2_11);
-    in5 = _mm_add_epi16(stp2_5, stp2_10);
-    in6 = _mm_add_epi16(stp2_6, stp1_9);
-    in7 = _mm_add_epi16(stp2_7, stp1_8);
-    in8 = _mm_sub_epi16(stp2_7, stp1_8);
-    in9 = _mm_sub_epi16(stp2_6, stp1_9);
-    in10 = _mm_sub_epi16(stp2_5, stp2_10);
-    in11 = _mm_sub_epi16(stp2_4, stp2_11);
-    in12 = _mm_sub_epi16(stp2_3, stp2_12);
-    in13 = _mm_sub_epi16(stp2_2, stp2_13);
-    in14 = _mm_sub_epi16(stp2_1, stp1_14);
-    in15 = _mm_sub_epi16(stp2_0, stp1_15);
+    in[0] = _mm_add_epi16(stp2_0, stp1_15);
+    in[1] = _mm_add_epi16(stp2_1, stp1_14);
+    in[2] = _mm_add_epi16(stp2_2, stp2_13);
+    in[3] = _mm_add_epi16(stp2_3, stp2_12);
+    in[4] = _mm_add_epi16(stp2_4, stp2_11);
+    in[5] = _mm_add_epi16(stp2_5, stp2_10);
+    in[6] = _mm_add_epi16(stp2_6, stp1_9);
+    in[7] = _mm_add_epi16(stp2_7, stp1_8);
+    in[8] = _mm_sub_epi16(stp2_7, stp1_8);
+    in[9] = _mm_sub_epi16(stp2_6, stp1_9);
+    in[10] = _mm_sub_epi16(stp2_5, stp2_10);
+    in[11] = _mm_sub_epi16(stp2_4, stp2_11);
+    in[12] = _mm_sub_epi16(stp2_3, stp2_12);
+    in[13] = _mm_sub_epi16(stp2_2, stp2_13);
+    in[14] = _mm_sub_epi16(stp2_1, stp1_14);
+    in[15] = _mm_sub_epi16(stp2_0, stp1_15);
 
     // Final rounding and shift
-    in0 = _mm_adds_epi16(in0, final_rounding);
-    in1 = _mm_adds_epi16(in1, final_rounding);
-    in2 = _mm_adds_epi16(in2, final_rounding);
-    in3 = _mm_adds_epi16(in3, final_rounding);
-    in4 = _mm_adds_epi16(in4, final_rounding);
-    in5 = _mm_adds_epi16(in5, final_rounding);
-    in6 = _mm_adds_epi16(in6, final_rounding);
-    in7 = _mm_adds_epi16(in7, final_rounding);
-    in8 = _mm_adds_epi16(in8, final_rounding);
-    in9 = _mm_adds_epi16(in9, final_rounding);
-    in10 = _mm_adds_epi16(in10, final_rounding);
-    in11 = _mm_adds_epi16(in11, final_rounding);
-    in12 = _mm_adds_epi16(in12, final_rounding);
-    in13 = _mm_adds_epi16(in13, final_rounding);
-    in14 = _mm_adds_epi16(in14, final_rounding);
-    in15 = _mm_adds_epi16(in15, final_rounding);
+    in[0] = _mm_adds_epi16(in[0], final_rounding);
+    in[1] = _mm_adds_epi16(in[1], final_rounding);
+    in[2] = _mm_adds_epi16(in[2], final_rounding);
+    in[3] = _mm_adds_epi16(in[3], final_rounding);
+    in[4] = _mm_adds_epi16(in[4], final_rounding);
+    in[5] = _mm_adds_epi16(in[5], final_rounding);
+    in[6] = _mm_adds_epi16(in[6], final_rounding);
+    in[7] = _mm_adds_epi16(in[7], final_rounding);
+    in[8] = _mm_adds_epi16(in[8], final_rounding);
+    in[9] = _mm_adds_epi16(in[9], final_rounding);
+    in[10] = _mm_adds_epi16(in[10], final_rounding);
+    in[11] = _mm_adds_epi16(in[11], final_rounding);
+    in[12] = _mm_adds_epi16(in[12], final_rounding);
+    in[13] = _mm_adds_epi16(in[13], final_rounding);
+    in[14] = _mm_adds_epi16(in[14], final_rounding);
+    in[15] = _mm_adds_epi16(in[15], final_rounding);
 
-    in0 = _mm_srai_epi16(in0, 6);
-    in1 = _mm_srai_epi16(in1, 6);
-    in2 = _mm_srai_epi16(in2, 6);
-    in3 = _mm_srai_epi16(in3, 6);
-    in4 = _mm_srai_epi16(in4, 6);
-    in5 = _mm_srai_epi16(in5, 6);
-    in6 = _mm_srai_epi16(in6, 6);
-    in7 = _mm_srai_epi16(in7, 6);
-    in8 = _mm_srai_epi16(in8, 6);
-    in9 = _mm_srai_epi16(in9, 6);
-    in10 = _mm_srai_epi16(in10, 6);
-    in11 = _mm_srai_epi16(in11, 6);
-    in12 = _mm_srai_epi16(in12, 6);
-    in13 = _mm_srai_epi16(in13, 6);
-    in14 = _mm_srai_epi16(in14, 6);
-    in15 = _mm_srai_epi16(in15, 6);
+    in[0] = _mm_srai_epi16(in[0], 6);
+    in[1] = _mm_srai_epi16(in[1], 6);
+    in[2] = _mm_srai_epi16(in[2], 6);
+    in[3] = _mm_srai_epi16(in[3], 6);
+    in[4] = _mm_srai_epi16(in[4], 6);
+    in[5] = _mm_srai_epi16(in[5], 6);
+    in[6] = _mm_srai_epi16(in[6], 6);
+    in[7] = _mm_srai_epi16(in[7], 6);
+    in[8] = _mm_srai_epi16(in[8], 6);
+    in[9] = _mm_srai_epi16(in[9], 6);
+    in[10] = _mm_srai_epi16(in[10], 6);
+    in[11] = _mm_srai_epi16(in[11], 6);
+    in[12] = _mm_srai_epi16(in[12], 6);
+    in[13] = _mm_srai_epi16(in[13], 6);
+    in[14] = _mm_srai_epi16(in[14], 6);
+    in[15] = _mm_srai_epi16(in[15], 6);
 
-    RECON_AND_STORE(dest, in0);
-    RECON_AND_STORE(dest, in1);
-    RECON_AND_STORE(dest, in2);
-    RECON_AND_STORE(dest, in3);
-    RECON_AND_STORE(dest, in4);
-    RECON_AND_STORE(dest, in5);
-    RECON_AND_STORE(dest, in6);
-    RECON_AND_STORE(dest, in7);
-    RECON_AND_STORE(dest, in8);
-    RECON_AND_STORE(dest, in9);
-    RECON_AND_STORE(dest, in10);
-    RECON_AND_STORE(dest, in11);
-    RECON_AND_STORE(dest, in12);
-    RECON_AND_STORE(dest, in13);
-    RECON_AND_STORE(dest, in14);
-    RECON_AND_STORE(dest, in15);
+    RECON_AND_STORE(dest, in[0]);
+    RECON_AND_STORE(dest, in[1]);
+    RECON_AND_STORE(dest, in[2]);
+    RECON_AND_STORE(dest, in[3]);
+    RECON_AND_STORE(dest, in[4]);
+    RECON_AND_STORE(dest, in[5]);
+    RECON_AND_STORE(dest, in[6]);
+    RECON_AND_STORE(dest, in[7]);
+    RECON_AND_STORE(dest, in[8]);
+    RECON_AND_STORE(dest, in[9]);
+    RECON_AND_STORE(dest, in[10]);
+    RECON_AND_STORE(dest, in[11]);
+    RECON_AND_STORE(dest, in[12]);
+    RECON_AND_STORE(dest, in[13]);
+    RECON_AND_STORE(dest, in[14]);
+    RECON_AND_STORE(dest, in[15]);
 
     dest += 8 - (stride * 16);
   }
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index bcd51f5..56b05ce 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -9,6 +9,7 @@
  */
 
 #include <assert.h>
+#include <stdlib.h>  // qsort()
 
 #include "./vp9_rtcd.h"
 #include "./vpx_scale_rtcd.h"
@@ -41,25 +42,23 @@
   VP9_COMMON *cm;
   vp9_reader bit_reader;
   DECLARE_ALIGNED(16, MACROBLOCKD, xd);
-  DECLARE_ALIGNED(16, unsigned char, token_cache[1024]);
   DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);
-  DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);
 } TileWorkerData;
 
 static int read_be32(const uint8_t *p) {
   return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
 }
 
-static int is_compound_prediction_allowed(const VP9_COMMON *cm) {
+static int is_compound_reference_allowed(const VP9_COMMON *cm) {
   int i;
-  for (i = 1; i < ALLOWED_REFS_PER_FRAME; ++i)
+  for (i = 1; i < REFS_PER_FRAME; ++i)
     if  (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1])
       return 1;
 
   return 0;
 }
 
-static void setup_compound_prediction(VP9_COMMON *cm) {
+static void setup_compound_reference(VP9_COMMON *cm) {
   if (cm->ref_frame_sign_bias[LAST_FRAME] ==
           cm->ref_frame_sign_bias[GOLDEN_FRAME]) {
     cm->comp_fixed_ref = ALTREF_FRAME;
@@ -94,7 +93,7 @@
   return tx_mode;
 }
 
-static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) {
+static void read_tx_mode_probs(struct tx_probs *tx_probs, vp9_reader *r) {
   int i, j;
 
   for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
@@ -124,33 +123,31 @@
       vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
 }
 
-static INLINE REFERENCE_MODE read_comp_pred_mode(vp9_reader *r) {
-  REFERENCE_MODE mode = vp9_read_bit(r);
-  if (mode)
-    mode += vp9_read_bit(r);
-  return mode;
+static REFERENCE_MODE read_reference_mode(VP9_COMMON *cm, vp9_reader *r) {
+  if (is_compound_reference_allowed(cm)) {
+    REFERENCE_MODE mode = vp9_read_bit(r);
+    if (mode)
+      mode += vp9_read_bit(r);
+    setup_compound_reference(cm);
+    return mode;
+  } else {
+    return SINGLE_REFERENCE;
+  }
 }
 
-static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
+static void read_reference_mode_probs(VP9_COMMON *cm, vp9_reader *r) {
   int i;
-
-  const int compound_allowed = is_compound_prediction_allowed(cm);
-  cm->comp_pred_mode = compound_allowed ? read_comp_pred_mode(r)
-                                        : SINGLE_REFERENCE;
-  if (compound_allowed)
-    setup_compound_prediction(cm);
-
-  if (cm->comp_pred_mode == REFERENCE_MODE_SELECT)
+  if (cm->reference_mode == REFERENCE_MODE_SELECT)
     for (i = 0; i < COMP_INTER_CONTEXTS; i++)
       vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
 
-  if (cm->comp_pred_mode != COMPOUND_REFERENCE)
+  if (cm->reference_mode != COMPOUND_REFERENCE)
     for (i = 0; i < REF_CONTEXTS; i++) {
       vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
       vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
     }
 
-  if (cm->comp_pred_mode != SINGLE_REFERENCE)
+  if (cm->reference_mode != SINGLE_REFERENCE)
     for (i = 0; i < REF_CONTEXTS; i++)
       vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
 }
@@ -242,9 +239,8 @@
 
 static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
                                     TX_SIZE tx_size, uint8_t *dst, int stride,
-                                    uint8_t *token_cache) {
+                                    int eob) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int eob = pd->eobs[block];
   if (eob > 0) {
     TX_TYPE tx_type;
     const int plane_type = pd->plane_type;
@@ -275,20 +271,13 @@
 
     if (eob == 1) {
       vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0]));
-      vpx_memset(token_cache, 0, 2 * sizeof(token_cache[0]));
     } else {
-      if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) {
+      if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
         vpx_memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
-        vpx_memset(token_cache, 0,
-                   4 * (4 << tx_size) * sizeof(token_cache[0]));
-      } else if (tx_size == TX_32X32 && eob <= 34) {
+      else if (tx_size == TX_32X32 && eob <= 34)
         vpx_memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
-        vpx_memset(token_cache, 0, 256 * sizeof(token_cache[0]));
-      } else {
+      else
         vpx_memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
-        vpx_memset(token_cache, 0,
-                   (16 << (tx_size << 1)) * sizeof(token_cache[0]));
-      }
     }
   }
 }
@@ -297,7 +286,6 @@
   VP9_COMMON *cm;
   MACROBLOCKD *xd;
   vp9_reader *r;
-  uint8_t *token_cache;
 };
 
 static void predict_and_reconstruct_intra_block(int plane, int block,
@@ -325,10 +313,11 @@
                           dst, pd->dst.stride, dst, pd->dst.stride);
 
   if (!mi->mbmi.skip_coeff) {
-    vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y, tx_size,
-                            args->r, args->token_cache);
+    const int eob = vp9_decode_block_tokens(cm, xd, plane, block,
+                                            plane_bsize, x, y, tx_size,
+                                            args->r);
     inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride,
-                            args->token_cache);
+                            eob);
   }
 }
 
@@ -337,7 +326,6 @@
   MACROBLOCKD *xd;
   vp9_reader *r;
   int *eobtotal;
-  uint8_t *token_cache;
 };
 
 static void reconstruct_inter_block(int plane, int block,
@@ -347,14 +335,14 @@
   VP9_COMMON *const cm = args->cm;
   MACROBLOCKD *const xd = args->xd;
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  int x, y;
+  int x, y, eob;
   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
-  *args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block,
-                                             plane_bsize, x, y, tx_size,
-                                             args->r, args->token_cache);
+  eob = vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y,
+                                tx_size, args->r);
   inverse_transform_block(xd, plane, block, tx_size,
                           &pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
-                          pd->dst.stride, args->token_cache);
+                          pd->dst.stride, eob);
+  *args->eobtotal += eob;
 }
 
 static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
@@ -392,6 +380,8 @@
   const int ref = mbmi->ref_frame[idx] - LAST_FRAME;
   const YV12_BUFFER_CONFIG *cfg = get_frame_ref_buffer(cm, ref);
   const struct scale_factors_common *sfc = &cm->active_ref_scale_comm[ref];
+
+  xd->ref_buf[idx] = cfg;
   if (!vp9_is_valid_scale(sfc))
     vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                        "Invalid scale factors");
@@ -404,8 +394,7 @@
 static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                            const TileInfo *const tile,
                            int mi_row, int mi_col,
-                           vp9_reader *r, BLOCK_SIZE bsize,
-                           uint8_t *token_cache) {
+                           vp9_reader *r, BLOCK_SIZE bsize) {
   const int less8x8 = bsize < BLOCK_8X8;
   MB_MODE_INFO *mbmi;
 
@@ -427,9 +416,7 @@
   }
 
   if (!is_inter_block(mbmi)) {
-    struct intra_args arg = {
-      cm, xd, r, token_cache
-    };
+    struct intra_args arg = { cm, xd, r };
     foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block,
                               &arg);
   } else {
@@ -447,9 +434,7 @@
     // Reconstruction
     if (!mbmi->skip_coeff) {
       int eobtotal = 0;
-      struct inter_args arg = {
-        cm, xd, r, &eobtotal, token_cache
-      };
+      struct inter_args arg = { cm, xd, r, &eobtotal };
       foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
       if (!less8x8 && eobtotal == 0)
         mbmi->skip_coeff = 1;  // skip loopfilter
@@ -488,8 +473,7 @@
 static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                             const TileInfo *const tile,
                             int mi_row, int mi_col,
-                            vp9_reader* r, BLOCK_SIZE bsize,
-                            uint8_t *token_cache) {
+                            vp9_reader* r, BLOCK_SIZE bsize) {
   const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
   PARTITION_TYPE partition;
   BLOCK_SIZE subsize;
@@ -500,33 +484,27 @@
   partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
   subsize = get_subsize(bsize, partition);
   if (subsize < BLOCK_8X8) {
-    decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
+    decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
   } else {
     switch (partition) {
       case PARTITION_NONE:
-        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
+        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
         break;
       case PARTITION_HORZ:
-        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
+        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
         if (mi_row + hbs < cm->mi_rows)
-          decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize,
-                         token_cache);
+          decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
         break;
       case PARTITION_VERT:
-        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
+        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
         if (mi_col + hbs < cm->mi_cols)
-          decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize,
-                         token_cache);
+          decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
         break;
       case PARTITION_SPLIT:
-        decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize,
-                        token_cache);
-        decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize,
-                        token_cache);
-        decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize,
-                        token_cache);
-        decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize,
-                        token_cache);
+        decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize);
+        decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
+        decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
+        decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
         break;
       default:
         assert(!"Invalid partition type");
@@ -562,13 +540,12 @@
   int i, j, k, l, m;
 
   if (vp9_read_bit(r))
-    for (i = 0; i < BLOCK_TYPES; i++)
-      for (j = 0; j < REF_TYPES; j++)
-        for (k = 0; k < COEF_BANDS; k++)
-          for (l = 0; l < PREV_COEF_CONTEXTS; l++)
-            if (k > 0 || l < 3)
-              for (m = 0; m < UNCONSTRAINED_NODES; m++)
-                vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]);
+    for (i = 0; i < PLANE_TYPES; ++i)
+      for (j = 0; j < REF_TYPES; ++j)
+        for (k = 0; k < COEF_BANDS; ++k)
+          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
+            for (m = 0; m < UNCONSTRAINED_NODES; ++m)
+              vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]);
 }
 
 static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode,
@@ -712,12 +689,6 @@
 
   if (cm->width != width || cm->height != height) {
     // Change in frame size.
-    if (cm->width == 0 || cm->height == 0) {
-      // Assign new frame buffer on first call.
-      cm->new_fb_idx = NUM_YV12_BUFFERS - 1;
-      cm->fb_idx_ref_cnt[cm->new_fb_idx] = 1;
-    }
-
     // TODO(agrange) Don't test width/height, check overall size.
     if (width > cm->width || height > cm->height) {
       // Rescale frame buffers only if they're not big enough already.
@@ -751,7 +722,7 @@
 
   int width, height;
   int found = 0, i;
-  for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
+  for (i = 0; i < REFS_PER_FRAME; ++i) {
     if (vp9_rb_read_bit(rb)) {
       YV12_BUFFER_CONFIG *const cfg = get_frame_ref_buffer(cm, i);
       width = cfg->y_crop_width;
@@ -809,8 +780,7 @@
     vp9_zero(xd->left_seg_context);
     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
          mi_col += MI_BLOCK_SIZE) {
-      decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64,
-                      pbi->token_cache);
+      decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64);
     }
 
     if (pbi->do_loopfilter_inline) {
@@ -884,6 +854,7 @@
 typedef struct TileBuffer {
   const uint8_t *data;
   size_t size;
+  int col;  // only used with multi-threaded decoding
 } TileBuffer;
 
 static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
@@ -952,9 +923,7 @@
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     pd[i].dqcoeff = tile_data->dqcoeff[i];
-    pd[i].eobs    = tile_data->eobs[i];
     vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
-    vpx_memset(tile_data->token_cache, 0, sizeof(tile_data->token_cache));
   }
 }
 
@@ -970,22 +939,38 @@
     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
          mi_col += MI_BLOCK_SIZE) {
       decode_modes_sb(tile_data->cm, &tile_data->xd, tile,
-                      mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64,
-                      tile_data->token_cache);
+                      mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64);
     }
   }
   return !tile_data->xd.corrupted;
 }
 
+// sorts in descending order
+static int compare_tile_buffers(const void *a, const void *b) {
+  const TileBuffer *const buf1 = (const TileBuffer*)a;
+  const TileBuffer *const buf2 = (const TileBuffer*)b;
+  if (buf1->size < buf2->size) {
+    return 1;
+  } else if (buf1->size == buf2->size) {
+    return 0;
+  } else {
+    return -1;
+  }
+}
+
 static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
   VP9_COMMON *const cm = &pbi->common;
+  const uint8_t *bit_reader_end = NULL;
   const uint8_t *const data_end = pbi->source + pbi->source_sz;
   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
   const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
-  int tile_col = 0;
+  TileBuffer tile_buffers[1 << 6];
+  int n;
+  int final_worker = -1;
 
+  assert(tile_cols <= (1 << 6));
   assert(tile_rows == 1);
   (void)tile_rows;
 
@@ -1018,48 +1003,82 @@
   vpx_memset(pbi->above_seg_context, 0,
              sizeof(*pbi->above_seg_context) * aligned_mi_cols);
 
-  while (tile_col < tile_cols) {
+  // Load tile data into tile_buffers
+  for (n = 0; n < tile_cols; ++n) {
+    const size_t size =
+        get_tile(data_end, n == tile_cols - 1, &cm->error, &data);
+    TileBuffer *const buf = &tile_buffers[n];
+    buf->data = data;
+    buf->size = size;
+    buf->col = n;
+    data += size;
+  }
+
+  // Sort the buffers based on size in descending order.
+  qsort(tile_buffers, tile_cols, sizeof(tile_buffers[0]), compare_tile_buffers);
+
+  // Rearrange the tile buffers such that per-tile group the largest, and
+  // presumably the most difficult, tile will be decoded in the main thread.
+  // This should help minimize the number of instances where the main thread is
+  // waiting for a worker to complete.
+  {
+    int group_start = 0;
+    while (group_start < tile_cols) {
+      const TileBuffer largest = tile_buffers[group_start];
+      const int group_end = MIN(group_start + num_workers, tile_cols) - 1;
+      memmove(tile_buffers + group_start, tile_buffers + group_start + 1,
+              (group_end - group_start) * sizeof(tile_buffers[0]));
+      tile_buffers[group_end] = largest;
+      group_start = group_end + 1;
+    }
+  }
+
+  n = 0;
+  while (n < tile_cols) {
     int i;
-    for (i = 0; i < num_workers && tile_col < tile_cols; ++i) {
+    for (i = 0; i < num_workers && n < tile_cols; ++i) {
       VP9Worker *const worker = &pbi->tile_workers[i];
       TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
       TileInfo *const tile = (TileInfo*)worker->data2;
-      const size_t size =
-          get_tile(data_end, tile_col == tile_cols - 1, &cm->error, &data);
+      TileBuffer *const buf = &tile_buffers[n];
 
       tile_data->cm = cm;
       tile_data->xd = pbi->mb;
       tile_data->xd.corrupted = 0;
-      vp9_tile_init(tile, tile_data->cm, 0, tile_col);
+      vp9_tile_init(tile, tile_data->cm, 0, buf->col);
 
-      setup_token_decoder(data, data_end, size, &cm->error,
+      setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
                           &tile_data->bit_reader);
-      setup_tile_context(pbi, &tile_data->xd, 0, tile_col);
+      setup_tile_context(pbi, &tile_data->xd, 0, buf->col);
       setup_tile_macroblockd(tile_data);
 
       worker->had_error = 0;
-      if (i == num_workers - 1 || tile_col == tile_cols - 1) {
+      if (i == num_workers - 1 || n == tile_cols - 1) {
         vp9_worker_execute(worker);
       } else {
         vp9_worker_launch(worker);
       }
 
-      data += size;
-      ++tile_col;
+      if (buf->col == tile_cols - 1) {
+        final_worker = i;
+      }
+
+      ++n;
     }
 
     for (; i > 0; --i) {
       VP9Worker *const worker = &pbi->tile_workers[i - 1];
       pbi->mb.corrupted |= !vp9_worker_sync(worker);
     }
+    if (final_worker > -1) {
+      TileWorkerData *const tile_data =
+          (TileWorkerData*)pbi->tile_workers[final_worker].data1;
+      bit_reader_end = vp9_reader_find_end(&tile_data->bit_reader);
+      final_worker = -1;
+    }
   }
 
-  {
-    const int final_worker = (tile_cols + num_workers - 1) % num_workers;
-    TileWorkerData *const tile_data =
-        (TileWorkerData*)pbi->tile_workers[final_worker].data1;
-    return vp9_reader_find_end(&tile_data->bit_reader);
-  }
+  return bit_reader_end;
 }
 
 static void check_sync_code(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
@@ -1132,9 +1151,9 @@
       }
     }
 
-    pbi->refresh_frame_flags = (1 << NUM_REF_FRAMES) - 1;
+    pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1;
 
-    for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i)
+    for (i = 0; i < REFS_PER_FRAME; ++i)
       cm->active_ref_idx[i] = cm->new_fb_idx;
 
     setup_frame_size(pbi, rb);
@@ -1147,13 +1166,13 @@
     if (cm->intra_only) {
       check_sync_code(cm, rb);
 
-      pbi->refresh_frame_flags = vp9_rb_read_literal(rb, NUM_REF_FRAMES);
+      pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
       setup_frame_size(pbi, rb);
     } else {
-      pbi->refresh_frame_flags = vp9_rb_read_literal(rb, NUM_REF_FRAMES);
+      pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
 
-      for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
-        const int ref = vp9_rb_read_literal(rb, NUM_REF_FRAMES_LOG2);
+      for (i = 0; i < REFS_PER_FRAME; ++i) {
+        const int ref = vp9_rb_read_literal(rb, REF_FRAMES_LOG2);
         cm->active_ref_idx[i] = cm->ref_frame_map[ref];
         cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
       }
@@ -1163,7 +1182,7 @@
       cm->allow_high_precision_mv = vp9_rb_read_bit(rb);
       cm->mcomp_filter_type = read_interp_filter_type(rb);
 
-      for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
+      for (i = 0; i < REFS_PER_FRAME; ++i) {
         vp9_setup_scale_factors(cm, i);
         if (vp9_is_scaled(&cm->active_ref_scale_comm[i]))
           vp9_extend_frame_borders(&cm->yv12_fb[cm->active_ref_idx[i]],
@@ -1182,7 +1201,7 @@
 
   // This flag will be overridden by the call to vp9_setup_past_independence
   // below, forcing the use of context 0 for those frame types.
-  cm->frame_context_idx = vp9_rb_read_literal(rb, NUM_FRAME_CONTEXTS_LOG2);
+  cm->frame_context_idx = vp9_rb_read_literal(rb, FRAME_CONTEXTS_LOG2);
 
   if (frame_is_intra_only(cm) || cm->error_resilient_mode)
     vp9_setup_past_independence(cm);
@@ -1215,7 +1234,7 @@
 
   cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r);
   if (cm->tx_mode == TX_MODE_SELECT)
-    read_tx_probs(&fc->tx_probs, &r);
+    read_tx_mode_probs(&fc->tx_probs, &r);
   read_coef_probs(fc, cm->tx_mode, &r);
 
   for (k = 0; k < MBSKIP_CONTEXTS; ++k)
@@ -1233,7 +1252,8 @@
     for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
       vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]);
 
-    read_comp_pred(cm, &r);
+    cm->reference_mode = read_reference_mode(cm, &r);
+    read_reference_mode_probs(cm, &r);
 
     for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
       for (i = 0; i < INTRA_MODES - 1; ++i)
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 327a916..164576d 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -63,7 +63,7 @@
 
 static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
                                      TX_SIZE max_tx_size, vp9_reader *r) {
-  const int ctx = vp9_get_pred_context_tx_size(xd);
+  const int ctx = vp9_get_tx_size_context(xd);
   const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc.tx_probs);
   TX_SIZE tx_size = vp9_read(r, tx_probs[0]);
   if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
@@ -152,7 +152,7 @@
   if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
     return 1;
   } else {
-    const int ctx = vp9_get_pred_context_mbskip(xd);
+    const int ctx = vp9_get_skip_context(xd);
     const int skip = vp9_read(r, cm->fc.mbskip_probs[ctx]);
     if (!cm->frame_parallel_decoding_mode)
       ++cm->counts.mbskip[ctx][skip];
@@ -258,10 +258,9 @@
   mv->col = ref->col + diff.col;
 }
 
-static REFERENCE_MODE read_reference_mode(VP9_COMMON *cm,
-                                             const MACROBLOCKD *xd,
-                                             vp9_reader *r) {
-  const int ctx = vp9_get_pred_context_comp_inter_inter(cm, xd);
+static REFERENCE_MODE read_reference_mode(VP9_COMMON *cm, const MACROBLOCKD *xd,
+                                          vp9_reader *r) {
+  const int ctx = vp9_get_reference_mode_context(cm, xd);
   const int mode = vp9_read(r, cm->fc.comp_inter_prob[ctx]);
   if (!cm->frame_parallel_decoding_mode)
     ++cm->counts.comp_inter[ctx][mode];
@@ -279,9 +278,9 @@
     ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
     ref_frame[1] = NONE;
   } else {
-    const REFERENCE_MODE mode = (cm->comp_pred_mode == REFERENCE_MODE_SELECT)
+    const REFERENCE_MODE mode = (cm->reference_mode == REFERENCE_MODE_SELECT)
                                       ? read_reference_mode(cm, xd, r)
-                                      : cm->comp_pred_mode;
+                                      : cm->reference_mode;
 
     // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding
     if (mode == COMPOUND_REFERENCE) {
@@ -408,8 +407,8 @@
     return vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) !=
            INTRA_FRAME;
   } else {
-    const int ctx = vp9_get_pred_context_intra_inter(xd);
-    const int is_inter = vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd));
+    const int ctx = vp9_get_intra_inter_context(xd);
+    const int is_inter = vp9_read(r, cm->fc.intra_inter_prob[ctx]);
     if (!cm->frame_parallel_decoding_mode)
       ++cm->counts.intra_inter[ctx][is_inter];
     return is_inter;
@@ -426,19 +425,18 @@
   const int allow_hp = cm->allow_high_precision_mv;
 
   int_mv nearest[2], nearmv[2], best[2];
-  uint8_t inter_mode_ctx;
-  MV_REFERENCE_FRAME ref0;
-  int is_compound;
+  int inter_mode_ctx, ref, is_compound;
 
-  mbmi->uv_mode = DC_PRED;
   read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
-  ref0 = mbmi->ref_frame[0];
   is_compound = has_second_ref(mbmi);
 
-  vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, ref0, mbmi->ref_mvs[ref0],
-                   mi_row, mi_col);
+  for (ref = 0; ref < 1 + is_compound; ++ref) {
+    const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+    vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, frame, mbmi->ref_mvs[frame],
+                     mi_row, mi_col);
+  }
 
-  inter_mode_ctx = mbmi->mode_context[ref0];
+  inter_mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
 
   if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
     mbmi->mode = ZEROMV;
@@ -452,22 +450,11 @@
       mbmi->mode = read_inter_mode(cm, r, inter_mode_ctx);
   }
 
-  // nearest, nearby
   if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
-    vp9_find_best_ref_mvs(xd, allow_hp,
-                          mbmi->ref_mvs[ref0], &nearest[0], &nearmv[0]);
-    best[0].as_int = nearest[0].as_int;
-  }
-
-  if (is_compound) {
-    const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
-    vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi,
-                     ref1, mbmi->ref_mvs[ref1], mi_row, mi_col);
-
-    if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
-      vp9_find_best_ref_mvs(xd, allow_hp,
-                            mbmi->ref_mvs[ref1], &nearest[1], &nearmv[1]);
-      best[1].as_int = nearest[1].as_int;
+    for (ref = 0; ref < 1 + is_compound; ++ref) {
+      vp9_find_best_ref_mvs(xd, allow_hp, mbmi->ref_mvs[mbmi->ref_frame[ref]],
+                            &nearest[ref], &nearmv[ref]);
+      best[ref].as_int = nearest[ref].as_int;
     }
   }
 
@@ -486,16 +473,10 @@
         const int j = idy * 2 + idx;
         b_mode = read_inter_mode(cm, r, inter_mode_ctx);
 
-        if (b_mode == NEARESTMV || b_mode == NEARMV) {
-          vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, &nearest[0],
-                                        &nearmv[0], j, 0,
-                                        mi_row, mi_col);
-
-          if (is_compound)
-            vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, &nearest[1],
-                                          &nearmv[1], j, 1,
-                                          mi_row, mi_col);
-        }
+        if (b_mode == NEARESTMV || b_mode == NEARMV)
+          for (ref = 0; ref < 1 + is_compound; ++ref)
+            vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, &nearest[ref],
+                                          &nearmv[ref], j, ref, mi_row, mi_col);
 
         if (!assign_mv(cm, b_mode, block, best, nearest, nearmv,
                        is_compound, allow_hp, r)) {
@@ -503,7 +484,6 @@
           break;
         };
 
-
         mi->bmi[j].as_mv[0].as_int = block[0].as_int;
         if (is_compound)
           mi->bmi[j].as_mv[1].as_int = block[1].as_int;
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 49da1a0..63f1731 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -13,11 +13,8 @@
 
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_seg_common.h"
 
-#include "vp9/decoder/vp9_dboolhuff.h"
 #include "vp9/decoder/vp9_detokenize.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
 
 #define EOB_CONTEXT_NODE            0
 #define ZERO_CONTEXT_NODE           1
@@ -60,57 +57,48 @@
   254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0
 };
 
-static const int token_to_counttoken[MAX_ENTROPY_TOKENS] = {
-  ZERO_TOKEN, ONE_TOKEN, TWO_TOKEN, TWO_TOKEN,
-  TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, TWO_TOKEN,
-  TWO_TOKEN, TWO_TOKEN, TWO_TOKEN, DCT_EOB_MODEL_TOKEN
-};
-
 #define INCREMENT_COUNT(token)                              \
   do {                                                      \
      if (!cm->frame_parallel_decoding_mode)                 \
-       ++coef_counts[band][pt][token_to_counttoken[token]]; \
+       ++coef_counts[band][ctx][token];                     \
   } while (0)
 
-
 #define WRITE_COEF_CONTINUE(val, token)                  \
   {                                                      \
-    v = (val * dqv) >> dq_shift; \
-    dqcoeff_ptr[scan[c]] = (vp9_read_bit(r) ? -v : v); \
-    INCREMENT_COUNT(token);                              \
+    v = (val * dqv) >> dq_shift;                         \
+    dqcoeff[scan[c]] = vp9_read_bit(r) ? -v : v;         \
     token_cache[scan[c]] = vp9_pt_energy_class[token];   \
     ++c;                                                 \
-    pt = get_coef_context(nb, token_cache, c);           \
-    dqv = dq[1];                                          \
+    ctx = get_coef_context(nb, token_cache, c);          \
+    dqv = dq[1];                                         \
     continue;                                            \
   }
 
-
 #define ADJUST_COEF(prob, bits_count)                   \
   do {                                                  \
     val += (vp9_read(r, prob) << bits_count);           \
   } while (0)
 
-static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
-                        vp9_reader *r, int block_idx,
-                        PLANE_TYPE type, int max_eob, int16_t *dqcoeff_ptr,
-                        TX_SIZE tx_size, const int16_t *dq, int pt,
-                        uint8_t *token_cache) {
+static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, int block,
+                        PLANE_TYPE type, int16_t *dqcoeff, TX_SIZE tx_size,
+                        const int16_t *dq, int ctx, vp9_reader *r) {
+  const int max_eob = 16 << (tx_size << 1);
   const FRAME_CONTEXT *const fc = &cm->fc;
   FRAME_COUNTS *const counts = &cm->counts;
   const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
   int band, c = 0;
-  const vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] =
+  const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
       fc->coef_probs[tx_size][type][ref];
   const vp9_prob *prob;
-  unsigned int (*coef_counts)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES + 1] =
+  unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1] =
       counts->coef[tx_size][type][ref];
-  unsigned int (*eob_branch_count)[PREV_COEF_CONTEXTS] =
+  unsigned int (*eob_branch_count)[COEFF_CONTEXTS] =
       counts->eob_branch[tx_size][type][ref];
+  uint8_t token_cache[32 * 32];
   const uint8_t *cat6;
   const uint8_t *band_translate = get_band_translate(tx_size);
   const int dq_shift = (tx_size == TX_32X32);
-  const scan_order *so = get_scan(xd, tx_size, type, block_idx);
+  const scan_order *so = get_scan(xd, tx_size, type, block);
   const int16_t *scan = so->scan;
   const int16_t *nb = so->neighbors;
   int v;
@@ -119,34 +107,36 @@
   while (c < max_eob) {
     int val;
     band = *band_translate++;
-    prob = coef_probs[band][pt];
+    prob = coef_probs[band][ctx];
     if (!cm->frame_parallel_decoding_mode)
-      ++eob_branch_count[band][pt];
+      ++eob_branch_count[band][ctx];
     if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) {
-      if (!cm->frame_parallel_decoding_mode)
-        ++coef_counts[band][pt][DCT_EOB_MODEL_TOKEN];
+      INCREMENT_COUNT(EOB_MODEL_TOKEN);
       break;
     }
 
     while (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
       INCREMENT_COUNT(ZERO_TOKEN);
       dqv = dq[1];
+      token_cache[scan[c]] = 0;
       ++c;
       if (c >= max_eob)
         return c;  // zero tokens at the end (no eob token)
-      pt = get_coef_context(nb, token_cache, c);
+      ctx = get_coef_context(nb, token_cache, c);
       band = *band_translate++;
-      prob = coef_probs[band][pt];
+      prob = coef_probs[band][ctx];
     }
 
     // ONE_CONTEXT_NODE_0_
     if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) {
+      INCREMENT_COUNT(ONE_TOKEN);
       WRITE_COEF_CONTINUE(1, ONE_TOKEN);
     }
 
+    INCREMENT_COUNT(TWO_TOKEN);
+
     prob = vp9_pareto8_full[prob[PIVOT_NODE] - 1];
 
-    // LOW_VAL_CONTEXT_NODE_0_
     if (!vp9_read(r, prob[LOW_VAL_CONTEXT_NODE])) {
       if (!vp9_read(r, prob[TWO_CONTEXT_NODE])) {
         WRITE_COEF_CONTINUE(2, TWO_TOKEN);
@@ -156,35 +146,35 @@
       }
       WRITE_COEF_CONTINUE(4, FOUR_TOKEN);
     }
-    // HIGH_LOW_CONTEXT_NODE_0_
+
     if (!vp9_read(r, prob[HIGH_LOW_CONTEXT_NODE])) {
       if (!vp9_read(r, prob[CAT_ONE_CONTEXT_NODE])) {
         val = CAT1_MIN_VAL;
         ADJUST_COEF(CAT1_PROB0, 0);
-        WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY1);
+        WRITE_COEF_CONTINUE(val, CATEGORY1_TOKEN);
       }
       val = CAT2_MIN_VAL;
       ADJUST_COEF(CAT2_PROB1, 1);
       ADJUST_COEF(CAT2_PROB0, 0);
-      WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY2);
+      WRITE_COEF_CONTINUE(val, CATEGORY2_TOKEN);
     }
-    // CAT_THREEFOUR_CONTEXT_NODE_0_
+
     if (!vp9_read(r, prob[CAT_THREEFOUR_CONTEXT_NODE])) {
       if (!vp9_read(r, prob[CAT_THREE_CONTEXT_NODE])) {
         val = CAT3_MIN_VAL;
         ADJUST_COEF(CAT3_PROB2, 2);
         ADJUST_COEF(CAT3_PROB1, 1);
         ADJUST_COEF(CAT3_PROB0, 0);
-        WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY3);
+        WRITE_COEF_CONTINUE(val, CATEGORY3_TOKEN);
       }
       val = CAT4_MIN_VAL;
       ADJUST_COEF(CAT4_PROB3, 3);
       ADJUST_COEF(CAT4_PROB2, 2);
       ADJUST_COEF(CAT4_PROB1, 1);
       ADJUST_COEF(CAT4_PROB0, 0);
-      WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY4);
+      WRITE_COEF_CONTINUE(val, CATEGORY4_TOKEN);
     }
-    // CAT_FIVE_CONTEXT_NODE_0_:
+
     if (!vp9_read(r, prob[CAT_FIVE_CONTEXT_NODE])) {
       val = CAT5_MIN_VAL;
       ADJUST_COEF(CAT5_PROB4, 4);
@@ -192,7 +182,7 @@
       ADJUST_COEF(CAT5_PROB2, 2);
       ADJUST_COEF(CAT5_PROB1, 1);
       ADJUST_COEF(CAT5_PROB0, 0);
-      WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY5);
+      WRITE_COEF_CONTINUE(val, CATEGORY5_TOKEN);
     }
     val = 0;
     cat6 = cat6_prob;
@@ -200,7 +190,7 @@
       val = (val << 1) | vp9_read(r, *cat6++);
     val += CAT6_MIN_VAL;
 
-    WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6);
+    WRITE_COEF_CONTINUE(val, CATEGORY6_TOKEN);
   }
 
   return c;
@@ -208,18 +198,14 @@
 
 int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
                             int plane, int block, BLOCK_SIZE plane_bsize,
-                            int x, int y, TX_SIZE tx_size, vp9_reader *r,
-                            uint8_t *token_cache) {
+                            int x, int y, TX_SIZE tx_size, vp9_reader *r) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id,
-                                 tx_size);
-  const int pt = get_entropy_context(tx_size, pd->above_context + x,
-                                              pd->left_context + y);
-  const int eob = decode_coefs(cm, xd, r, block, pd->plane_type, seg_eob,
+  const int ctx = get_entropy_context(tx_size, pd->above_context + x,
+                                               pd->left_context + y);
+  const int eob = decode_coefs(cm, xd, block, pd->plane_type,
                                BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
-                               pd->dequant, pt, token_cache);
+                               pd->dequant, ctx, r);
   set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y);
-  pd->eobs[block] = eob;
   return eob;
 }
 
diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h
index e858a19..2a88073 100644
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -17,7 +17,6 @@
 
 int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
                             int plane, int block, BLOCK_SIZE plane_bsize,
-                            int x, int y, TX_SIZE tx_size, vp9_reader *r,
-                            uint8_t *token_cache);
+                            int x, int y, TX_SIZE tx_size, vp9_reader *r);
 
 #endif  // VP9_DECODER_VP9_DETOKENIZE_H_
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 740ad72..e24ba42 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -112,10 +112,8 @@
   struct macroblockd_plane *const pd = xd->plane;
   int i;
 
-  for (i = 0; i < MAX_MB_PLANE; ++i) {
+  for (i = 0; i < MAX_MB_PLANE; ++i)
     pd[i].dqcoeff = pbi->dqcoeff[i];
-    pd[i].eobs    = pbi->eobs[i];
-  }
 }
 
 VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
@@ -127,6 +125,9 @@
 
   vp9_zero(*pbi);
 
+  // Initialize the references to not point to any frame buffers.
+  memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
+
   if (setjmp(cm->error.jmp)) {
     cm->error.setjmp = 0;
     vp9_remove_decompressor(pbi);
@@ -260,7 +261,7 @@
   VP9D_COMP *pbi = (VP9D_COMP *) ptr;
   VP9_COMMON *cm = &pbi->common;
 
-  if (index < 0 || index >= NUM_REF_FRAMES)
+  if (index < 0 || index >= REF_FRAMES)
     return -1;
 
   *fb = &cm->yv12_fb[cm->ref_frame_map[index]];
@@ -378,10 +379,6 @@
                            cm->current_video_frame + 3000);
 #endif
 
-  vp9_extend_frame_inner_borders(cm->frame_to_show,
-                                 cm->subsampling_x,
-                                 cm->subsampling_y);
-
 #if WRITE_RECON_BUFFER == 1
   if (cm->show_frame)
     recon_write_yuv_frame("recon.yuv", cm->frame_to_show,
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index 3d1b97b..e90f892 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -23,7 +23,6 @@
   DECLARE_ALIGNED(16, VP9_COMMON, common);
 
   DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);
-  DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);
 
   VP9D_CONFIG oxcf;
 
@@ -52,8 +51,6 @@
 
   ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
   PARTITION_CONTEXT *above_seg_context;
-
-  DECLARE_ALIGNED(16, uint8_t, token_cache[1024]);
 } VP9D_COMP;
 
 #endif  // VP9_DECODER_VP9_ONYXD_INT_H_
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 9f79f8c..1bbb12c 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -44,114 +44,11 @@
 int intra_mode_stats[INTRA_MODES]
                     [INTRA_MODES]
                     [INTRA_MODES];
-vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES];
+vp9_coeff_stats tree_update_hist[TX_SIZES][PLANE_TYPES];
 
 extern unsigned int active_section;
 #endif
 
-
-#ifdef MODE_STATS
-int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZES];
-int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 1];
-int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 2];
-int64_t switchable_interp_stats[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
-
-void init_tx_count_stats() {
-  vp9_zero(tx_count_32x32p_stats);
-  vp9_zero(tx_count_16x16p_stats);
-  vp9_zero(tx_count_8x8p_stats);
-}
-
-void init_switchable_interp_stats() {
-  vp9_zero(switchable_interp_stats);
-}
-
-static void update_tx_count_stats(VP9_COMMON *cm) {
-  int i, j;
-  for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
-    for (j = 0; j < TX_SIZES; j++) {
-      tx_count_32x32p_stats[i][j] += cm->fc.tx_count_32x32p[i][j];
-    }
-  }
-  for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
-    for (j = 0; j < TX_SIZES - 1; j++) {
-      tx_count_16x16p_stats[i][j] += cm->fc.tx_count_16x16p[i][j];
-    }
-  }
-  for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
-    for (j = 0; j < TX_SIZES - 2; j++) {
-      tx_count_8x8p_stats[i][j] += cm->fc.tx_count_8x8p[i][j];
-    }
-  }
-}
-
-static void update_switchable_interp_stats(VP9_COMMON *cm) {
-  int i, j;
-  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-    for (j = 0; j < SWITCHABLE_FILTERS; ++j)
-      switchable_interp_stats[i][j] += cm->fc.switchable_interp_count[i][j];
-}
-
-void write_tx_count_stats() {
-  int i, j;
-  FILE *fp = fopen("tx_count.bin", "wb");
-  fwrite(tx_count_32x32p_stats, sizeof(tx_count_32x32p_stats), 1, fp);
-  fwrite(tx_count_16x16p_stats, sizeof(tx_count_16x16p_stats), 1, fp);
-  fwrite(tx_count_8x8p_stats, sizeof(tx_count_8x8p_stats), 1, fp);
-  fclose(fp);
-
-  printf(
-      "vp9_default_tx_count_32x32p[TX_SIZE_CONTEXTS][TX_SIZES] = {\n");
-  for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
-    printf("  { ");
-    for (j = 0; j < TX_SIZES; j++) {
-      printf("%"PRId64", ", tx_count_32x32p_stats[i][j]);
-    }
-    printf("},\n");
-  }
-  printf("};\n");
-  printf(
-      "vp9_default_tx_count_16x16p[TX_SIZE_CONTEXTS][TX_SIZES-1] = {\n");
-  for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
-    printf("  { ");
-    for (j = 0; j < TX_SIZES - 1; j++) {
-      printf("%"PRId64", ", tx_count_16x16p_stats[i][j]);
-    }
-    printf("},\n");
-  }
-  printf("};\n");
-  printf(
-      "vp9_default_tx_count_8x8p[TX_SIZE_CONTEXTS][TX_SIZES-2] = {\n");
-  for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
-    printf("  { ");
-    for (j = 0; j < TX_SIZES - 2; j++) {
-      printf("%"PRId64", ", tx_count_8x8p_stats[i][j]);
-    }
-    printf("},\n");
-  }
-  printf("};\n");
-}
-
-void write_switchable_interp_stats() {
-  int i, j;
-  FILE *fp = fopen("switchable_interp.bin", "wb");
-  fwrite(switchable_interp_stats, sizeof(switchable_interp_stats), 1, fp);
-  fclose(fp);
-
-  printf(
-      "vp9_default_switchable_filter_count[SWITCHABLE_FILTER_CONTEXTS]"
-      "[SWITCHABLE_FILTERS] = {\n");
-  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
-    printf("  { ");
-    for (j = 0; j < SWITCHABLE_FILTERS; j++) {
-      printf("%"PRId64", ", switchable_interp_stats[i][j]);
-    }
-    printf("},\n");
-  }
-  printf("};\n");
-}
-#endif
-
 static struct vp9_token intra_mode_encodings[INTRA_MODES];
 static struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS];
 static struct vp9_token partition_encodings[PARTITION_TYPES];
@@ -224,9 +121,9 @@
   if (vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
     return 1;
   } else {
-    const int skip_coeff = m->mbmi.skip_coeff;
-    vp9_write(w, skip_coeff, vp9_get_pred_prob_mbskip(&cpi->common, xd));
-    return skip_coeff;
+    const int skip = m->mbmi.skip_coeff;
+    vp9_write(w, skip, vp9_get_skip_prob(&cpi->common, xd));
+    return skip;
   }
 }
 
@@ -279,7 +176,7 @@
     // is split into two treed writes.  The first treed write takes care of the
     // unconstrained nodes.  The second treed write takes care of the
     // constrained nodes.
-    if (t >= TWO_TOKEN && t < DCT_EOB_TOKEN) {
+    if (t >= TWO_TOKEN && t < EOB_TOKEN) {
       int len = UNCONSTRAINED_NODES - p->skip_eob_node;
       int bits = v >> (n - len);
       treed_write(w, vp9_coef_tree, p->context_tree, bits, len, i);
@@ -334,12 +231,12 @@
   if (!seg_ref_active) {
     // does the feature use compound prediction or not
     // (if not specified at the frame/segment level)
-    if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) {
+    if (cm->reference_mode == REFERENCE_MODE_SELECT) {
       vp9_write(bc, mi->ref_frame[1] > INTRA_FRAME,
-                vp9_get_pred_prob_comp_inter_inter(cm, xd));
+                vp9_get_reference_mode_prob(cm, xd));
     } else {
       assert((mi->ref_frame[1] <= INTRA_FRAME) ==
-                 (cm->comp_pred_mode == SINGLE_REFERENCE));
+             (cm->reference_mode == SINGLE_REFERENCE));
     }
 
     if (mi->ref_frame[1] > INTRA_FRAME) {
@@ -395,8 +292,7 @@
   skip_coeff = write_skip_coeff(cpi, segment_id, m, bc);
 
   if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
-    vp9_write(bc, rf != INTRA_FRAME,
-              vp9_get_pred_prob_intra_inter(cm, xd));
+    vp9_write(bc, rf != INTRA_FRAME, vp9_get_intra_inter_prob(cm, xd));
 
   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
       !(rf != INTRA_FRAME &&
@@ -656,17 +552,15 @@
 static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) {
   vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[tx_size];
   vp9_coeff_count *coef_counts = cpi->coef_counts[tx_size];
-  unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] =
+  unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
       cpi->common.counts.eob_branch[tx_size];
   vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[tx_size];
   int i, j, k, l, m;
 
-  for (i = 0; i < BLOCK_TYPES; ++i) {
+  for (i = 0; i < PLANE_TYPES; ++i) {
     for (j = 0; j < REF_TYPES; ++j) {
       for (k = 0; k < COEF_BANDS; ++k) {
-        for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
-          if (l >= 3 && k == 0)
-            continue;
+        for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
           vp9_tree_probs_from_distribution(vp9_coef_tree,
                                            coef_branch_ct[i][j][k][l],
                                            coef_counts[i][j][k][l]);
@@ -679,10 +573,10 @@
 #ifdef ENTROPY_STATS
           if (!cpi->dummy_packing) {
             int t;
-            for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
+            for (t = 0; t < ENTROPY_TOKENS; ++t)
               context_counters[tx_size][i][j][k][l][t] +=
                   coef_counts[i][j][k][l][t];
-            context_counters[tx_size][i][j][k][l][MAX_ENTROPY_TOKENS] +=
+            context_counters[tx_size][i][j][k][l][ENTROPY_TOKENS] +=
                 eob_branch_ct[i][j][k][l];
           }
 #endif
@@ -706,18 +600,15 @@
       /* dry run to see if there is any udpate at all needed */
       int savings = 0;
       int update[2] = {0, 0};
-      for (i = 0; i < BLOCK_TYPES; ++i) {
+      for (i = 0; i < PLANE_TYPES; ++i) {
         for (j = 0; j < REF_TYPES; ++j) {
           for (k = 0; k < COEF_BANDS; ++k) {
-            for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+            for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
               for (t = 0; t < entropy_nodes_update; ++t) {
                 vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
                 const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t];
                 int s;
                 int u = 0;
-
-                if (l >= 3 && k == 0)
-                  continue;
                 if (t == PIVOT_NODE)
                   s = vp9_prob_diff_update_savings_search_model(
                       frame_branch_ct[i][j][k][l][0],
@@ -745,10 +636,10 @@
         return;
       }
       vp9_write_bit(bc, 1);
-      for (i = 0; i < BLOCK_TYPES; ++i) {
+      for (i = 0; i < PLANE_TYPES; ++i) {
         for (j = 0; j < REF_TYPES; ++j) {
           for (k = 0; k < COEF_BANDS; ++k) {
-            for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+            for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
               // calc probs and branch cts for this frame only
               for (t = 0; t < entropy_nodes_update; ++t) {
                 vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
@@ -756,8 +647,6 @@
                 const vp9_prob upd = DIFF_UPDATE_PROB;
                 int s;
                 int u = 0;
-                if (l >= 3 && k == 0)
-                  continue;
                 if (t == PIVOT_NODE)
                   s = vp9_prob_diff_update_savings_search_model(
                       frame_branch_ct[i][j][k][l][0],
@@ -789,25 +678,23 @@
     case 1:
     case 2: {
       const int prev_coef_contexts_to_update =
-          (cpi->sf.use_fast_coef_updates == 2 ?
-           PREV_COEF_CONTEXTS >> 1 : PREV_COEF_CONTEXTS);
+          cpi->sf.use_fast_coef_updates == 2 ? COEFF_CONTEXTS >> 1
+                                             : COEFF_CONTEXTS;
       const int coef_band_to_update =
-          (cpi->sf.use_fast_coef_updates == 2 ?
-           COEF_BANDS >> 1 : COEF_BANDS);
+          cpi->sf.use_fast_coef_updates == 2 ? COEF_BANDS >> 1
+                                             : COEF_BANDS;
       int updates = 0;
       int noupdates_before_first = 0;
-      for (i = 0; i < BLOCK_TYPES; ++i) {
+      for (i = 0; i < PLANE_TYPES; ++i) {
         for (j = 0; j < REF_TYPES; ++j) {
           for (k = 0; k < COEF_BANDS; ++k) {
-            for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+            for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
               // calc probs and branch cts for this frame only
               for (t = 0; t < entropy_nodes_update; ++t) {
                 vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
                 vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
                 int s;
                 int u = 0;
-                if (l >= 3 && k == 0)
-                  continue;
                 if (l >= prev_coef_contexts_to_update ||
                     k >= coef_band_to_update) {
                   u = 0;
@@ -1217,11 +1104,11 @@
 static void write_frame_size_with_refs(VP9_COMP *cpi,
                                        struct vp9_write_bit_buffer *wb) {
   VP9_COMMON *const cm = &cpi->common;
-  int refs[ALLOWED_REFS_PER_FRAME] = {cpi->lst_fb_idx, cpi->gld_fb_idx,
-                                      cpi->alt_fb_idx};
+  int refs[REFS_PER_FRAME] = {cpi->lst_fb_idx, cpi->gld_fb_idx,
+                              cpi->alt_fb_idx};
   int i, found = 0;
 
-  for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
+  for (i = 0; i < REFS_PER_FRAME; ++i) {
     YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->ref_frame_map[refs[i]]];
     found = cm->width == cfg->y_crop_width &&
             cm->height == cfg->y_crop_height;
@@ -1286,8 +1173,8 @@
 
     write_frame_size(cpi, wb);
   } else {
-    const int refs[ALLOWED_REFS_PER_FRAME] = {cpi->lst_fb_idx, cpi->gld_fb_idx,
-                                              cpi->alt_fb_idx};
+    const int refs[REFS_PER_FRAME] = {cpi->lst_fb_idx, cpi->gld_fb_idx,
+                                      cpi->alt_fb_idx};
     if (!cm->show_frame)
       vp9_wb_write_bit(wb, cm->intra_only);
 
@@ -1297,13 +1184,13 @@
     if (cm->intra_only) {
       write_sync_code(wb);
 
-      vp9_wb_write_literal(wb, get_refresh_mask(cpi), NUM_REF_FRAMES);
+      vp9_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
       write_frame_size(cpi, wb);
     } else {
       int i;
-      vp9_wb_write_literal(wb, get_refresh_mask(cpi), NUM_REF_FRAMES);
-      for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
-        vp9_wb_write_literal(wb, refs[i], NUM_REF_FRAMES_LOG2);
+      vp9_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+      for (i = 0; i < REFS_PER_FRAME; ++i) {
+        vp9_wb_write_literal(wb, refs[i], REF_FRAMES_LOG2);
         vp9_wb_write_bit(wb, cm->ref_frame_sign_bias[LAST_FRAME + i]);
       }
 
@@ -1321,7 +1208,7 @@
     vp9_wb_write_bit(wb, cm->frame_parallel_decoding_mode);
   }
 
-  vp9_wb_write_literal(wb, cm->frame_context_idx, NUM_FRAME_CONTEXTS_LOG2);
+  vp9_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
 
   encode_loopfilter(&cm->lf, wb);
   encode_quantization(cm, wb);
@@ -1371,9 +1258,9 @@
                                 cpi->intra_inter_count[i]);
 
     if (cm->allow_comp_inter_inter) {
-      const int comp_pred_mode = cpi->common.comp_pred_mode;
-      const int use_compound_pred = comp_pred_mode != SINGLE_REFERENCE;
-      const int use_hybrid_pred = comp_pred_mode == REFERENCE_MODE_SELECT;
+      const int reference_mode = cpi->common.reference_mode;
+      const int use_compound_pred = reference_mode != SINGLE_REFERENCE;
+      const int use_hybrid_pred = reference_mode == REFERENCE_MODE_SELECT;
 
       vp9_write_bit(&header_bc, use_compound_pred);
       if (use_compound_pred) {
@@ -1385,7 +1272,7 @@
       }
     }
 
-    if (cm->comp_pred_mode != COMPOUND_REFERENCE) {
+    if (cm->reference_mode != COMPOUND_REFERENCE) {
       for (i = 0; i < REF_CONTEXTS; i++) {
         vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0],
                                   cpi->single_ref_count[i][0]);
@@ -1394,7 +1281,7 @@
       }
     }
 
-    if (cm->comp_pred_mode != SINGLE_REFERENCE)
+    if (cm->reference_mode != SINGLE_REFERENCE)
       for (i = 0; i < REF_CONTEXTS; i++)
         vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i],
                                   cpi->comp_ref_count[i]);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 71f7e7a..0088338 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -71,6 +71,7 @@
   DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);
   int16_t *qcoeff;
   int16_t *coeff;
+  uint16_t *eobs;
   struct buf_2d src;
 
   // Quantizer setings
@@ -85,8 +86,8 @@
 
 /* The [2] dimension is for whether we skip the EOB node (i.e. if previous
  * coefficient in this block was zero) or not. */
-typedef unsigned int vp9_coeff_cost[BLOCK_TYPES][REF_TYPES][COEF_BANDS][2]
-                                   [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
+typedef unsigned int vp9_coeff_cost[PLANE_TYPES][REF_TYPES][COEF_BANDS][2]
+                                   [COEFF_CONTEXTS][ENTROPY_TOKENS];
 
 typedef struct macroblock MACROBLOCK;
 struct macroblock {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 89da781..21cace6 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -441,14 +441,14 @@
     p[i].coeff = ctx->coeff_pbuf[i][1];
     p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
-    pd[i].eobs = ctx->eobs_pbuf[i][1];
+    p[i].eobs = ctx->eobs_pbuf[i][1];
   }
 
   for (i = max_plane; i < MAX_MB_PLANE; ++i) {
     p[i].coeff = ctx->coeff_pbuf[i][2];
     p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
-    pd[i].eobs = ctx->eobs_pbuf[i][2];
+    p[i].eobs = ctx->eobs_pbuf[i][2];
   }
 
   // Restore the coding context of the MB to that that was in place
@@ -677,7 +677,7 @@
     p[i].coeff = ctx->coeff_pbuf[i][0];
     p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
-    pd[i].eobs = ctx->eobs_pbuf[i][0];
+    p[i].eobs = ctx->eobs_pbuf[i][0];
   }
   ctx->is_coded = 0;
   x->skip_recode = 0;
@@ -751,15 +751,15 @@
                                                      SEG_LVL_REF_FRAME);
 
     if (!seg_ref_active)
-      cpi->intra_inter_count[vp9_get_pred_context_intra_inter(xd)]
+      cpi->intra_inter_count[vp9_get_intra_inter_context(xd)]
                             [is_inter_block(mbmi)]++;
 
     // If the segment reference feature is enabled we have only a single
     // reference frame allowed for the segment so exclude it from
     // the reference frame counts used to work out probabilities.
     if (is_inter_block(mbmi) && !seg_ref_active) {
-      if (cm->comp_pred_mode == REFERENCE_MODE_SELECT)
-        cpi->comp_inter_count[vp9_get_pred_context_comp_inter_inter(cm, xd)]
+      if (cm->reference_mode == REFERENCE_MODE_SELECT)
+        cpi->comp_inter_count[vp9_get_reference_mode_context(cm, xd)]
                              [has_second_ref(mbmi)]++;
 
       if (has_second_ref(mbmi)) {
@@ -1284,7 +1284,8 @@
       split_dist += dt;
       pl = partition_plane_context(cpi->above_seg_context,
                                    cpi->left_seg_context,
-                                   mi_row + y_idx, mi_col + x_idx, bsize);
+                                   mi_row + y_idx, mi_col + x_idx,
+                                   split_subsize);
       split_rate += x->partition_cost[pl][PARTITION_NONE];
     }
     pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
@@ -1640,7 +1641,8 @@
       }
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
       if (sum_rd < best_rd) {
-        int64_t stop_thresh = 2048;
+        int64_t stop_thresh = 4096;
+        int64_t stop_thresh_rd;
 
         best_rate = this_rate;
         best_dist = this_dist;
@@ -1652,9 +1654,10 @@
         stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
             b_height_log2_lookup[bsize]);
 
+        stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
         // If obtained distortion is very small, choose current partition
         // and stop splitting.
-        if (this_dist < stop_thresh) {
+        if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
           do_split = 0;
           do_rect = 0;
         }
@@ -2359,7 +2362,7 @@
 
     /* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */
     select_tx_mode(cpi);
-    cpi->common.comp_pred_mode = pred_type;
+    cpi->common.reference_mode = pred_type;
     cpi->common.mcomp_filter_type = filter_type;
     encode_frame_internal(cpi);
 
@@ -2386,7 +2389,7 @@
       cpi->rd_tx_select_threshes[frame_type][i] /= 2;
     }
 
-    if (cpi->common.comp_pred_mode == REFERENCE_MODE_SELECT) {
+    if (cpi->common.reference_mode == REFERENCE_MODE_SELECT) {
       int single_count_zero = 0;
       int comp_count_zero = 0;
 
@@ -2396,10 +2399,10 @@
       }
 
       if (comp_count_zero == 0) {
-        cpi->common.comp_pred_mode = SINGLE_REFERENCE;
+        cpi->common.reference_mode = SINGLE_REFERENCE;
         vp9_zero(cpi->comp_inter_count);
       } else if (single_count_zero == 0) {
-        cpi->common.comp_pred_mode = COMPOUND_REFERENCE;
+        cpi->common.reference_mode = COMPOUND_REFERENCE;
         vp9_zero(cpi->comp_inter_count);
       }
     }
@@ -2587,32 +2590,23 @@
         !(is_inter_block(mbmi) &&
             (mbmi->skip_coeff ||
              vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) {
-      const uint8_t context = vp9_get_pred_context_tx_size(xd);
-      ++get_tx_counts(max_txsize_lookup[bsize],
-                      context, &cm->counts.tx)[mbmi->tx_size];
+      ++get_tx_counts(max_txsize_lookup[bsize], vp9_get_tx_size_context(xd),
+                      &cm->counts.tx)[mbmi->tx_size];
     } else {
       int x, y;
-      TX_SIZE sz = tx_mode_to_biggest_tx_size[cm->tx_mode];
-      assert(sizeof(tx_mode_to_biggest_tx_size) /
-             sizeof(tx_mode_to_biggest_tx_size[0]) == TX_MODES);
+      TX_SIZE tx_size;
       // The new intra coding scheme requires no change of transform size
       if (is_inter_block(&mi->mbmi)) {
-        if (sz == TX_32X32 && bsize < BLOCK_32X32)
-          sz = TX_16X16;
-        if (sz == TX_16X16 && bsize < BLOCK_16X16)
-          sz = TX_8X8;
-        if (sz == TX_8X8 && bsize < BLOCK_8X8)
-          sz = TX_4X4;
-      } else if (bsize >= BLOCK_8X8) {
-        sz = mbmi->tx_size;
+        tx_size = MIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
+                      max_txsize_lookup[bsize]);
       } else {
-        sz = TX_4X4;
+        tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
       }
 
       for (y = 0; y < mi_height; y++)
         for (x = 0; x < mi_width; x++)
           if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
-            mi_8x8[mis * y + x]->mbmi.tx_size = sz;
+            mi_8x8[mis * y + x]->mbmi.tx_size = tx_size;
     }
   }
 }
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 83f87b0..0821c26 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -144,7 +144,7 @@
   const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block);
   int16_t *qcoeff_ptr;
   int16_t *dqcoeff_ptr;
-  int eob = pd->eobs[block], final_eob, sz = 0;
+  int eob = p->eobs[block], final_eob, sz = 0;
   const int i0 = 0;
   int rc, x, next, i;
   int64_t rdmult, rddiv, rd_cost0, rd_cost1;
@@ -176,7 +176,7 @@
   tokens[eob][0].rate = 0;
   tokens[eob][0].error = 0;
   tokens[eob][0].next = default_eob;
-  tokens[eob][0].token = DCT_EOB_TOKEN;
+  tokens[eob][0].token = EOB_TOKEN;
   tokens[eob][0].qc = 0;
   *(tokens[eob] + 1) = *(tokens[eob] + 0);
   next = eob;
@@ -243,21 +243,19 @@
         /* If we reduced this coefficient to zero, check to see if
          *  we need to move the EOB back here.
          */
-        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
-             DCT_EOB_TOKEN : ZERO_TOKEN;
-        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
-             DCT_EOB_TOKEN : ZERO_TOKEN;
+        t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
+        t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
       } else {
         t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
       }
       if (next < default_eob) {
         band = band_translate[i + 1];
-        if (t0 != DCT_EOB_TOKEN) {
+        if (t0 != EOB_TOKEN) {
           pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
           rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
                                   [tokens[next][0].token];
         }
-        if (t1 != DCT_EOB_TOKEN) {
+        if (t1 != EOB_TOKEN) {
           pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
           rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
                                   [tokens[next][1].token];
@@ -289,12 +287,12 @@
       t0 = tokens[next][0].token;
       t1 = tokens[next][1].token;
       /* Update the cost of each path if we're past the EOB token. */
-      if (t0 != DCT_EOB_TOKEN) {
+      if (t0 != EOB_TOKEN) {
         tokens[next][0].rate +=
             mb->token_costs[tx_size][type][ref][band][1][0][t0];
         tokens[next][0].token = ZERO_TOKEN;
       }
-      if (t1 != DCT_EOB_TOKEN) {
+      if (t1 != EOB_TOKEN) {
         tokens[next][1].rate +=
             mb->token_costs[tx_size][type][ref][band][1][0][t1];
         tokens[next][1].token = ZERO_TOKEN;
@@ -334,7 +332,7 @@
   }
   final_eob++;
 
-  xd->plane[plane].eobs[block] = final_eob;
+  mb->plane[plane].eobs[block] = final_eob;
   *a = *l = (final_eob > 0);
 }
 
@@ -372,7 +370,7 @@
   int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   const scan_order *so;
-  uint16_t *eob = &pd->eobs[block];
+  uint16_t *eob = &p->eobs[block];
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   int i, j;
   int16_t *src_diff;
@@ -423,6 +421,7 @@
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx *const ctx = args->ctx;
+  struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   int i, j;
@@ -433,7 +432,7 @@
   // TODO(jingning): per transformed block zero forcing only enabled for
   // luma component. will integrate chroma components as well.
   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
-    pd->eobs[block] = 0;
+    p->eobs[block] = 0;
     ctx->ta[plane][i] = 0;
     ctx->tl[plane][j] = 0;
     return;
@@ -445,28 +444,28 @@
   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
     vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
   } else {
-    ctx->ta[plane][i] = pd->eobs[block] > 0;
-    ctx->tl[plane][j] = pd->eobs[block] > 0;
+    ctx->ta[plane][i] = p->eobs[block] > 0;
+    ctx->tl[plane][j] = p->eobs[block] > 0;
   }
 
-  if (x->skip_encode || pd->eobs[block] == 0)
+  if (x->skip_encode || p->eobs[block] == 0)
     return;
 
   switch (tx_size) {
     case TX_32X32:
-      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
+      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
       break;
     case TX_16X16:
-      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
+      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
       break;
     case TX_8X8:
-      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
+      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
       break;
     case TX_4X4:
       // this is like vp9_short_idct4x4 but has a special case around eob<=1
       // which is significant (not just an optimization) for the lossless
       // case.
-      xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
+      xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
       break;
     default:
       assert(!"Invalid transform size");
@@ -478,6 +477,7 @@
   struct encode_b_args *const args = arg;
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
+  struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   int i, j;
@@ -487,10 +487,10 @@
 
   vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
 
-  if (pd->eobs[block] == 0)
+  if (p->eobs[block] == 0)
     return;
 
-  xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
+  xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
 }
 
 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {
@@ -540,7 +540,7 @@
   const int diff_stride = 4 * (1 << bwl);
   uint8_t *src, *dst;
   int16_t *src_diff;
-  uint16_t *eob = &pd->eobs[block];
+  uint16_t *eob = &p->eobs[block];
   int i, j;
   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
   dst = &pd->dst.buf[4 * (j * pd->dst.stride + i)];
@@ -559,8 +559,9 @@
       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
       block >>= 6;
       vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode,
-                              dst, pd->dst.stride, dst, pd->dst.stride);
-
+                              x->skip_encode ? src : dst,
+                              x->skip_encode ? p->src.stride : pd->dst.stride,
+                              dst, pd->dst.stride);
       if (!x->skip_recode) {
         vp9_subtract_block(32, 32, src_diff, diff_stride,
                            src, p->src.stride, dst, pd->dst.stride);
@@ -582,7 +583,9 @@
       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
       block >>= 4;
       vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode,
-                              dst, pd->dst.stride, dst, pd->dst.stride);
+                              x->skip_encode ? src : dst,
+                              x->skip_encode ? p->src.stride : pd->dst.stride,
+                              dst, pd->dst.stride);
       if (!x->skip_recode) {
         vp9_subtract_block(16, 16, src_diff, diff_stride,
                            src, p->src.stride, dst, pd->dst.stride);
@@ -600,7 +603,9 @@
       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
       block >>= 2;
       vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode,
-                              dst, pd->dst.stride, dst, pd->dst.stride);
+                              x->skip_encode ? src : dst,
+                              x->skip_encode ? p->src.stride : pd->dst.stride,
+                              dst, pd->dst.stride);
       if (!x->skip_recode) {
         vp9_subtract_block(8, 8, src_diff, diff_stride,
                            src, p->src.stride, dst, pd->dst.stride);
@@ -621,7 +626,9 @@
         mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
 
       vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
-                              dst, pd->dst.stride, dst, pd->dst.stride);
+                              x->skip_encode ? src : dst,
+                              x->skip_encode ? p->src.stride : pd->dst.stride,
+                              dst, pd->dst.stride);
 
       if (!x->skip_recode) {
         vp9_subtract_block(4, 4, src_diff, diff_stride,
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 50d8036..5f42d0e 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -545,7 +545,7 @@
     p[i].coeff = ctx->coeff_pbuf[i][1];
     p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
-    pd[i].eobs = ctx->eobs_pbuf[i][1];
+    p[i].eobs = ctx->eobs_pbuf[i][1];
   }
   x->skip_recode = 0;
 
@@ -1008,10 +1008,8 @@
   int target_norm_bits_per_mb;
 
   double section_err = fpstats->coded_error / fpstats->count;
-  double sr_correction;
   double err_per_mb = section_err / num_mbs;
   double err_correction_factor;
-  double speed_correction = 1.0;
 
   if (section_target_bandwitdh <= 0)
     return cpi->twopass.maxq_max_limit;          // Highest value allowed
@@ -1020,40 +1018,6 @@
                               ? (512 * section_target_bandwitdh) / num_mbs
                               : 512 * (section_target_bandwitdh / num_mbs);
 
-  // Look at the drop in prediction quality between the last frame
-  // and the GF buffer (which contained an older frame).
-  if (fpstats->sr_coded_error > fpstats->coded_error) {
-    double sr_err_diff = (fpstats->sr_coded_error - fpstats->coded_error) /
-                             (fpstats->count * cpi->common.MBs);
-    sr_correction = fclamp(pow(sr_err_diff / 32.0, 0.25), 0.75, 1.25);
-  } else {
-    sr_correction = 0.75;
-  }
-
-  // Calculate a corrective factor based on a rolling ratio of bits spent
-  // vs target bits
-  if (cpi->rc.rolling_target_bits > 0 &&
-      cpi->rc.active_worst_quality < cpi->rc.worst_quality) {
-    double rolling_ratio = (double)cpi->rc.rolling_actual_bits /
-                               (double)cpi->rc.rolling_target_bits;
-
-    if (rolling_ratio < 0.95)
-      cpi->twopass.est_max_qcorrection_factor -= 0.005;
-    else if (rolling_ratio > 1.05)
-      cpi->twopass.est_max_qcorrection_factor += 0.005;
-
-    cpi->twopass.est_max_qcorrection_factor = fclamp(
-        cpi->twopass.est_max_qcorrection_factor, 0.1, 10.0);
-  }
-
-  // Corrections for higher compression speed settings
-  // (reduced compression expected)
-  // FIXME(jimbankoski): Once we settle on vp9 speed features we need to
-  // change this code.
-  if (cpi->compressor_speed == 1)
-    speed_correction = cpi->oxcf.cpu_used <= 5 ?
-                          1.04 + (/*cpi->oxcf.cpu_used*/0 * 0.04) :
-                          1.25;
 
   // Try and pick a max Q that will be high enough to encode the
   // content at the given rate.
@@ -1061,9 +1025,7 @@
     int bits_per_mb_at_this_q;
 
     err_correction_factor = calc_correction_factor(err_per_mb,
-                                                   ERR_DIVISOR, 0.4, 0.90, q) *
-                                sr_correction * speed_correction *
-                                cpi->twopass.est_max_qcorrection_factor;
+                                                   ERR_DIVISOR, 0.4, 0.90, q);
 
     bits_per_mb_at_this_q = vp9_rc_bits_per_mb(INTER_FRAME, q,
                                                err_correction_factor);
@@ -1077,14 +1039,6 @@
       q < cpi->cq_target_quality)
     q = cpi->cq_target_quality;
 
-  // Adjust maxq_min_limit and maxq_max_limit limits based on
-  // average q observed in clip for non kf/gf/arf frames
-  // Give average a chance to settle though.
-  // PGW TODO.. This code is broken for the extended Q range
-  if (cpi->rc.ni_frames > ((int)cpi->twopass.total_stats.count >> 8) &&
-      cpi->rc.ni_frames > 25)
-    adjust_maxq_qrange(cpi);
-
   return q;
 }
 
@@ -1100,9 +1054,6 @@
   double section_err = (fpstats->coded_error / fpstats->count);
   double err_per_mb = section_err / num_mbs;
   double err_correction_factor;
-  double sr_err_diff;
-  double sr_correction;
-  double speed_correction = 1.0;
   double clip_iiratio;
   double clip_iifactor;
 
@@ -1111,31 +1062,6 @@
                             : 512 * (section_target_bandwitdh / num_mbs);
 
 
-  // Corrections for higher compression speed settings
-  // (reduced compression expected)
-  if (cpi->compressor_speed == 1) {
-    if (cpi->oxcf.cpu_used <= 5)
-      speed_correction = 1.04 + (/*cpi->oxcf.cpu_used*/ 0 * 0.04);
-    else
-      speed_correction = 1.25;
-  }
-
-  // Look at the drop in prediction quality between the last frame
-  // and the GF buffer (which contained an older frame).
-  if (fpstats->sr_coded_error > fpstats->coded_error) {
-    sr_err_diff =
-      (fpstats->sr_coded_error - fpstats->coded_error) /
-      (fpstats->count * cpi->common.MBs);
-    sr_correction = (sr_err_diff / 32.0);
-    sr_correction = pow(sr_correction, 0.25);
-    if (sr_correction < 0.75)
-      sr_correction = 0.75;
-    else if (sr_correction > 1.25)
-      sr_correction = 1.25;
-  } else {
-    sr_correction = 0.75;
-  }
-
   // II ratio correction factor for clip as a whole
   clip_iiratio = cpi->twopass.total_stats.intra_error /
                  DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error);
@@ -1149,8 +1075,7 @@
 
     // Error per MB based correction factor
     err_correction_factor =
-      calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, q) *
-      sr_correction * speed_correction * clip_iifactor;
+      calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, q) * clip_iifactor;
 
     bits_per_mb_at_this_q =
       vp9_rc_bits_per_mb(INTER_FRAME, q, err_correction_factor);
@@ -2163,69 +2088,28 @@
 
   if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
     cpi->rc.active_worst_quality = cpi->oxcf.cq_level;
-  } else {
+  } else if (cpi->common.current_video_frame == 0) {
     // Special case code for first frame.
-    if (cpi->common.current_video_frame == 0) {
-      int section_target_bandwidth =
-          (int)(cpi->twopass.bits_left / frames_left);
-      cpi->twopass.est_max_qcorrection_factor = 1.0;
+    int section_target_bandwidth =
+        (int)(cpi->twopass.bits_left / frames_left);
 
-      // Set a cq_level in constrained quality mode.
-      // Commenting this code out for now since it does not seem to be
-      // working well.
-      /*
-      if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
-        int est_cq = estimate_cq(cpi, &cpi->twopass.total_left_stats,
-           section_target_bandwidth);
+    // guess at maxq needed in 2nd pass
+    cpi->twopass.maxq_max_limit = cpi->rc.worst_quality;
+    cpi->twopass.maxq_min_limit = cpi->rc.best_quality;
 
-        if (est_cq > cpi->cq_target_quality)
-          cpi->cq_target_quality = est_cq;
-        else
-          cpi->cq_target_quality = cpi->oxcf.cq_level;
-      }
-      */
+    tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats,
+                           section_target_bandwidth);
 
-      // guess at maxq needed in 2nd pass
-      cpi->twopass.maxq_max_limit = cpi->rc.worst_quality;
-      cpi->twopass.maxq_min_limit = cpi->rc.best_quality;
+    cpi->rc.active_worst_quality = tmp_q;
+    cpi->rc.ni_av_qi = tmp_q;
+    cpi->rc.avg_q = vp9_convert_qindex_to_q(tmp_q);
 
-      tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats,
-                             section_target_bandwidth);
-
-      cpi->rc.active_worst_quality = tmp_q;
-      cpi->rc.ni_av_qi = tmp_q;
-      cpi->rc.avg_q = vp9_convert_qindex_to_q(tmp_q);
-
-      // Limit the maxq value returned subsequently.
-      // This increases the risk of overspend or underspend if the initial
-      // estimate for the clip is bad, but helps prevent excessive
-      // variation in Q, especially near the end of a clip
-      // where for example a small overspend may cause Q to crash
-      adjust_maxq_qrange(cpi);
-    }
-
-    // The last few frames of a clip almost always have to few or too many
-    // bits and for the sake of over exact rate control we dont want to make
-    // radical adjustments to the allowed quantizer range just to use up a
-    // few surplus bits or get beneath the target rate.
-    else if ((cpi->common.current_video_frame <
-              (((unsigned int)cpi->twopass.total_stats.count * 255) >> 8)) &&
-             ((cpi->common.current_video_frame + cpi->rc.baseline_gf_interval) <
-              (unsigned int)cpi->twopass.total_stats.count)) {
-      int section_target_bandwidth =
-          (int)(cpi->twopass.bits_left / frames_left);
-      if (frames_left < 1)
-        frames_left = 1;
-
-      tmp_q = estimate_max_q(
-          cpi,
-          &cpi->twopass.total_left_stats,
-          section_target_bandwidth);
-
-      // Make a damped adjustment to active max Q
-      cpi->rc.active_worst_quality =
-          adjust_active_maxq(cpi->rc.active_worst_quality, tmp_q);
-    }
+    // Limit the maxq value returned subsequently.
+    // This increases the risk of overspend or underspend if the initial
+    // estimate for the clip is bad, but helps prevent excessive
+    // variation in Q, especially near the end of a clip
+    // where for example a small overspend may cause Q to crash
+    adjust_maxq_qrange(cpi);
   }
   vp9_zero(this_frame);
   if (EOF == input_stats(cpi, &this_frame))
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index a383164..fee11fd 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1066,6 +1066,126 @@
 #undef CHECK_POINT
 #undef CHECK_BETTER
 
+int vp9_full_range_search_c(MACROBLOCK *x, int_mv *ref_mv, int_mv *best_mv,
+                            int search_param, int sad_per_bit, int *num00,
+                            vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
+                            int *mvcost[2], int_mv *center_mv) {
+  const MACROBLOCKD* const xd = &x->e_mbd;
+  uint8_t *what = x->plane[0].src.buf;
+  int what_stride = x->plane[0].src.stride;
+  uint8_t *in_what;
+  int in_what_stride = xd->plane[0].pre[0].stride;
+  uint8_t *best_address;
+
+  int_mv this_mv;
+
+  int bestsad = INT_MAX;
+  int ref_row, ref_col;
+
+  uint8_t *check_here;
+  int thissad;
+  int_mv fcenter_mv;
+
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+  int tr, tc;
+  int best_tr = 0;
+  int best_tc = 0;
+  int range = 64;
+
+  int start_col, end_col;
+  int start_row, end_row;
+  int i;
+
+  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+  clamp_mv(&ref_mv->as_mv,
+           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+  ref_row = ref_mv->as_mv.row;
+  ref_col = ref_mv->as_mv.col;
+  *num00 = 11;
+  best_mv->as_mv.row = ref_row;
+  best_mv->as_mv.col = ref_col;
+
+  // Work out the start point for the search
+  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
+                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
+  best_address = in_what;
+
+  // Check the starting position
+  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
+                + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+                                 mvjsadcost, mvsadcost, sad_per_bit);
+
+  start_row = MAX(-range, x->mv_row_min - ref_row);
+  start_col = MAX(-range, x->mv_col_min - ref_col);
+  end_row = MIN(range, x->mv_row_max - ref_row);
+  end_col = MIN(range, x->mv_col_max - ref_col);
+
+  for (tr = start_row; tr <= end_row; ++tr) {
+    for (tc = start_col; tc <= end_col; tc += 4) {
+      if ((tc + 3) <= end_col) {
+        unsigned int sad_array[4];
+        unsigned char const *addr_ref[4];
+        for (i = 0; i < 4; ++i)
+          addr_ref[i] = in_what + tr * in_what_stride + tc + i;
+
+        fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array);
+
+        for (i = 0; i < 4; ++i) {
+          if (sad_array[i] < bestsad) {
+            this_mv.as_mv.row = ref_row + tr;
+            this_mv.as_mv.col = ref_col + tc + i;
+            thissad = sad_array[i] +
+                      mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+                                      mvjsadcost, mvsadcost, sad_per_bit);
+            if (thissad < bestsad) {
+              bestsad = thissad;
+              best_tr = tr;
+              best_tc = tc + i;
+            }
+          }
+        }
+      } else {
+        for (i = 0; i < end_col - tc; ++i) {
+          check_here = in_what + tr * in_what_stride + tc + i;
+          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
+                                bestsad);
+
+          if (thissad < bestsad) {
+            this_mv.as_mv.row = ref_row + tr;
+            this_mv.as_mv.col = ref_col + tc + i;
+            thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+                                      mvjsadcost, mvsadcost, sad_per_bit);
+
+            if (thissad < bestsad) {
+              bestsad = thissad;
+              best_tr = tr;
+              best_tc = tc + i;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  best_mv->as_mv.row += best_tr;
+  best_mv->as_mv.col += best_tc;
+
+  this_mv.as_mv.row = best_mv->as_mv.row * 8;
+  this_mv.as_mv.col = best_mv->as_mv.col * 8;
+
+  if (bestsad == INT_MAX)
+    return INT_MAX;
+
+  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+                    (unsigned int *)(&thissad)) +
+                       mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
+                                   mvjcost, mvcost, x->errorperbit);
+}
+
 int vp9_diamond_search_sad_c(MACROBLOCK *x,
                              int_mv *ref_mv, int_mv *best_mv,
                              int search_param, int sad_per_bit, int *num00,
@@ -1111,7 +1231,7 @@
 
   // Work out the start point for the search
   in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
-                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
+                        ref_row * in_what_stride + ref_col);
   best_address = in_what;
 
   // Check the starting position
@@ -1255,7 +1375,7 @@
 
   // Work out the start point for the search
   in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
-                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
+                        ref_row * in_what_stride + ref_col);
   best_address = in_what;
 
   // Check the starting position
@@ -1606,7 +1726,7 @@
     check_here = r * mv_stride + in_what + col_min;
     c = col_min;
 
-    while ((c + 2) < col_max) {
+    while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
       int i;
 
       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 968cbfb..b10d9f8 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -143,8 +143,9 @@
   }
 }
 
-static void set_mvcost(VP9_COMP *cpi) {
+static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
   MACROBLOCK *const mb = &cpi->mb;
+  cpi->common.allow_high_precision_mv = allow_high_precision_mv;
   if (cpi->common.allow_high_precision_mv) {
     mb->mvcost = mb->nmvcost_hp;
     mb->mvsadcost = mb->nmvsadcost_hp;
@@ -159,6 +160,7 @@
 
   if (!init_done) {
     vp9_initialize_common();
+    vp9_coef_tree_initialize();
     vp9_tokenize_initialize();
     vp9_init_quant_tables();
     vp9_init_me_luts();
@@ -166,7 +168,6 @@
     // init_base_skip_probs();
     vp9_entropy_mv_init();
     vp9_entropy_mode_init();
-    vp9_coef_tree_initialize();
     init_done = 1;
   }
 }
@@ -234,7 +235,7 @@
 // Computes a q delta (in "q index" terms) to get from a starting q value
 // to a target value
 // target q value
-int vp9_compute_qdelta(VP9_COMP *cpi, double qstart, double qtarget) {
+int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget) {
   int i;
   int start_index = cpi->rc.worst_quality;
   int target_index = cpi->rc.worst_quality;
@@ -688,6 +689,7 @@
 
   switch (mode) {
     case 0:  // This is the best quality mode.
+      cpi->diamond_search_sad = vp9_full_range_search;
       break;
 
     case 1:
@@ -1115,8 +1117,8 @@
   vp9_change_config(ptr, oxcf);
 
   // Initialize active best and worst q and average q values.
-  cpi->rc.active_worst_quality         = cpi->oxcf.worst_allowed_q;
-  cpi->rc.active_best_quality          = cpi->oxcf.best_allowed_q;
+  cpi->rc.active_worst_quality      = cpi->oxcf.worst_allowed_q;
+
   cpi->rc.avg_frame_qindex          = cpi->oxcf.worst_allowed_q;
 
   // Initialise the starting buffer levels
@@ -1205,8 +1207,7 @@
   cm->reset_frame_context = 0;
 
   setup_features(cm);
-  cpi->common.allow_high_precision_mv = 0;  // Default mv precision
-  set_mvcost(cpi);
+  set_high_precision_mv(cpi, 0);
 
   {
     int i;
@@ -1253,12 +1254,8 @@
 
   // active values should only be modified if out of new range
   cpi->rc.active_worst_quality = clamp(cpi->rc.active_worst_quality,
-                                    cpi->oxcf.best_allowed_q,
-                                    cpi->oxcf.worst_allowed_q);
-
-  cpi->rc.active_best_quality = clamp(cpi->rc.active_best_quality,
-                                   cpi->oxcf.best_allowed_q,
-                                   cpi->oxcf.worst_allowed_q);
+                                       cpi->rc.best_quality,
+                                       cpi->rc.worst_quality);
 
   cpi->cq_target_quality = cpi->oxcf.cq_level;
 
@@ -1636,7 +1633,6 @@
   cpi->rc.rate_correction_factor         = 1.0;
   cpi->rc.key_frame_rate_correction_factor = 1.0;
   cpi->rc.gf_rate_correction_factor  = 1.0;
-  cpi->twopass.est_max_qcorrection_factor  = 1.0;
 
   cal_nmvjointsadcost(cpi->mb.nmvjointsadcost);
   cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX];
@@ -2155,7 +2151,7 @@
   VP9_COMP *cpi = (VP9_COMP *)(ptr);
   VP9_COMMON *cm = &cpi->common;
 
-  if (index < 0 || index >= NUM_REF_FRAMES)
+  if (index < 0 || index >= REF_FRAMES)
     return -1;
 
   *fb = &cm->yv12_fb[cm->ref_frame_map[index]];
@@ -2369,16 +2365,6 @@
   return i;
 }
 
-static void Pass1Encode(VP9_COMP *cpi, unsigned long *size, unsigned char *dest,
-                        unsigned int *frame_flags) {
-  (void) size;
-  (void) dest;
-  (void) frame_flags;
-
-  vp9_set_quantizer(cpi, find_fp_qindex());
-  vp9_first_pass(cpi);
-}
-
 #define WRITE_RECON_BUFFER 0
 #if WRITE_RECON_BUFFER
 void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
@@ -2470,16 +2456,9 @@
           cpi->rc.projected_frame_size <
           ((cpi->rc.this_frame_target * 7) >> 3)) {
         force_recode = 1;
-      } else if (q > cpi->oxcf.cq_level &&
-                 cpi->rc.projected_frame_size < cpi->rc.min_frame_bandwidth &&
-                 cpi->rc.active_best_quality > cpi->oxcf.cq_level) {
-        // Severe undershoot and between auto and user cq level
-        force_recode = 1;
-        cpi->rc.active_best_quality = cpi->oxcf.cq_level;
       }
     }
   }
-
   return force_recode;
 }
 
@@ -2572,8 +2551,8 @@
 static void scale_references(VP9_COMP *cpi) {
   VP9_COMMON *cm = &cpi->common;
   int i;
-  int refs[ALLOWED_REFS_PER_FRAME] = {cpi->lst_fb_idx, cpi->gld_fb_idx,
-                                      cpi->alt_fb_idx};
+  int refs[REFS_PER_FRAME] = {cpi->lst_fb_idx, cpi->gld_fb_idx,
+                              cpi->alt_fb_idx};
 
   for (i = 0; i < 3; i++) {
     YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[cm->ref_frame_map[refs[i]]];
@@ -2609,22 +2588,20 @@
   model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
   model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
   model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
-  for (n = THREE_TOKEN; n < DCT_EOB_TOKEN; ++n)
+  for (n = THREE_TOKEN; n < EOB_TOKEN; ++n)
     model_count[TWO_TOKEN] += full_count[n];
-  model_count[DCT_EOB_MODEL_TOKEN] = full_count[DCT_EOB_TOKEN];
+  model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN];
 }
 
-static void full_to_model_counts(
-    vp9_coeff_count_model *model_count, vp9_coeff_count *full_count) {
+static void full_to_model_counts(vp9_coeff_count_model *model_count,
+                                 vp9_coeff_count *full_count) {
   int i, j, k, l;
-  for (i = 0; i < BLOCK_TYPES; ++i)
+
+  for (i = 0; i < PLANE_TYPES; ++i)
     for (j = 0; j < REF_TYPES; ++j)
       for (k = 0; k < COEF_BANDS; ++k)
-        for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
-          if (l >= 3 && k == 0)
-            continue;
+        for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
           full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]);
-        }
 }
 
 #if 0 && CONFIG_INTERNAL_STATS
@@ -2639,8 +2616,8 @@
 
   if (cpi->twopass.total_left_stats.coded_error != 0.0)
     fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d"
-        "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
-        "%6d %6d %5d %5d %5d %8.2f %10d %10.3f"
+        "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f"
+        "%6d %6d %5d %5d %5d %10d %10.3f"
         "%10.3f %8d %10d %10d %10d\n",
         cpi->common.current_video_frame, cpi->rc.this_frame_target,
         cpi->rc.projected_frame_size, 0,
@@ -2650,13 +2627,12 @@
         (int)cpi->rc.total_actual_bits, cm->base_qindex,
         vp9_convert_qindex_to_q(cm->base_qindex),
         (double)vp9_dc_quant(cm->base_qindex, 0) / 4.0,
-        vp9_convert_qindex_to_q(cpi->rc.active_best_quality),
         vp9_convert_qindex_to_q(cpi->rc.active_worst_quality), cpi->rc.avg_q,
         vp9_convert_qindex_to_q(cpi->rc.ni_av_qi),
         vp9_convert_qindex_to_q(cpi->cq_target_quality),
         cpi->refresh_last_frame, cpi->refresh_golden_frame,
         cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost,
-        cpi->twopass.est_max_qcorrection_factor, (int)cpi->twopass.bits_left,
+        (int)cpi->twopass.bits_left,
         cpi->twopass.total_left_stats.coded_error,
         (double)cpi->twopass.bits_left /
             (1 + cpi->twopass.total_left_stats.coded_error),
@@ -2699,6 +2675,7 @@
   int overshoot_seen = 0;
   int undershoot_seen = 0;
   int q_low = bottom_index, q_high = top_index;
+
   do {
     vp9_clear_system_state();  // __asm emms;
 
@@ -2816,11 +2793,13 @@
             // Update rate_correction_factor unless
             vp9_rc_update_rate_correction_factors(cpi, 0);
 
-            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target);
+            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+                                   bottom_index, top_index);
 
             while (*q < q_low && retries < 10) {
               vp9_rc_update_rate_correction_factors(cpi, 0);
-              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target);
+              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+                                     bottom_index, top_index);
               retries++;
             }
           }
@@ -2831,18 +2810,12 @@
           q_high = *q > q_low ? *q - 1 : q_low;
 
           if (overshoot_seen || loop_count > 1) {
-            // Update rate_correction_factor unless
-            // cpi->rc.active_worst_quality has changed.
             vp9_rc_update_rate_correction_factors(cpi, 1);
-
             *q = (q_high + q_low) / 2;
           } else {
-            // Update rate_correction_factor unless
-            // cpi->rc.active_worst_quality has changed.
             vp9_rc_update_rate_correction_factors(cpi, 0);
-
-            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target);
-
+            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+                                   bottom_index, top_index);
             // Special case reset for qlow for constrained quality.
             // This should only trigger where there is very substantial
             // undershoot on a frame and the auto cq level is above
@@ -2854,7 +2827,8 @@
 
             while (*q > q_high && retries < 10) {
               vp9_rc_update_rate_correction_factors(cpi, 0);
-              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target);
+              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+                                     bottom_index, top_index);
               retries++;
             }
           }
@@ -2893,8 +2867,8 @@
   int q;
   int frame_over_shoot_limit;
   int frame_under_shoot_limit;
-
   int top_index;
+  int top_index_prop;
   int bottom_index;
 
   SPEED_FEATURES *const sf = &cpi->sf;
@@ -3008,45 +2982,8 @@
 
   vp9_clear_system_state();
 
-  // Decide how big to make the frame.
-  vp9_rc_pick_frame_size_and_bounds(cpi,
-                                    &frame_under_shoot_limit,
-                                    &frame_over_shoot_limit);
-
-  q = vp9_rc_pick_q_and_adjust_q_bounds(cpi,
-                                        &bottom_index,
-                                        &top_index);
-
-#if CONFIG_MULTIPLE_ARF
-  // Force the quantizer determined by the coding order pattern.
-  if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
-      cpi->oxcf.end_usage != USAGE_CONSTANT_QUALITY) {
-    double new_q;
-    double current_q = vp9_convert_qindex_to_q(cpi->rc.active_worst_quality);
-    int level = cpi->this_frame_weight;
-    assert(level >= 0);
-
-    // Set quantizer steps at 10% increments.
-    new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level)));
-    q = cpi->rc.active_worst_quality +
-        vp9_compute_qdelta(cpi, current_q, new_q);
-
-    bottom_index = q;
-    top_index    = q;
-
-    printf("frame:%d q:%d\n", cm->current_video_frame, q);
-  }
-#endif
-
   vp9_zero(cpi->rd_tx_select_threshes);
 
-  if (!frame_is_intra_only(cm)) {
-    cm->mcomp_filter_type = DEFAULT_INTERP_FILTER;
-    /* TODO: Decide this more intelligently */
-    cm->allow_high_precision_mv = q < HIGH_PRECISION_MV_QTHRESH;
-    set_mvcost(cpi);
-  }
-
 #if CONFIG_VP9_POSTPROC
   if (cpi->oxcf.noise_sensitivity > 0) {
     int l = 0;
@@ -3076,6 +3013,26 @@
   vp9_write_yuv_frame(cpi->Source);
 #endif
 
+  // Decide how big to make the frame.
+  vp9_rc_pick_frame_size_target(cpi);
+
+  // Decide frame size bounds
+  vp9_rc_compute_frame_size_bounds(cpi, cpi->rc.this_frame_target,
+                                   &frame_under_shoot_limit,
+                                   &frame_over_shoot_limit);
+
+  // Decide q and q bounds
+  q = vp9_rc_pick_q_and_adjust_q_bounds(cpi,
+                                        &bottom_index,
+                                        &top_index,
+                                        &top_index_prop);
+
+  if (!frame_is_intra_only(cm)) {
+    cm->mcomp_filter_type = DEFAULT_INTERP_FILTER;
+    /* TODO: Decide this more intelligently */
+    set_high_precision_mv(cpi, (q < HIGH_PRECISION_MV_QTHRESH));
+  }
+
   encode_with_recode_loop(cpi,
                           size,
                           dest,
@@ -3161,7 +3118,7 @@
    * needed in motion search besides loopfilter */
   cm->last_frame_type = cm->frame_type;
 
-  vp9_rc_postencode_update(cpi, *size, q);
+  vp9_rc_postencode_update(cpi, *size, top_index_prop);
 
 #if 0
   output_frame_level_debug_stats(cpi);
@@ -3282,6 +3239,21 @@
   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1;
 }
 
+static void Pass0Encode(VP9_COMP *cpi, unsigned long *size, unsigned char *dest,
+                        unsigned int *frame_flags) {
+  encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+}
+
+static void Pass1Encode(VP9_COMP *cpi, unsigned long *size, unsigned char *dest,
+                        unsigned int *frame_flags) {
+  (void) size;
+  (void) dest;
+  (void) frame_flags;
+
+  vp9_set_quantizer(cpi, find_fp_qindex());
+  vp9_first_pass(cpi);
+}
+
 static void Pass2Encode(VP9_COMP *cpi, unsigned long *size,
                         unsigned char *dest, unsigned int *frame_flags) {
   cpi->enable_encode_breakout = 1;
@@ -3293,37 +3265,15 @@
   // vp9_print_modes_and_motion_vectors(&cpi->common, "encode.stt");
 
   vp9_twopass_postencode_update(cpi, *size);
-
-  /*
-#ifdef DISABLE_RC_LONG_TERM_MEM
-  cpi->twopass.bits_left -=  cpi->rc.this_frame_target;
-#else
-  cpi->twopass.bits_left -= 8 * *size;
-#endif
-
-  if (!cpi->refresh_alt_ref_frame) {
-    double lower_bounds_min_rate = FRAME_OVERHEAD_BITS * cpi->oxcf.framerate;
-    double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
-                                        * cpi->oxcf.two_pass_vbrmin_section
-                                        / 100);
-
-    if (two_pass_min_rate < lower_bounds_min_rate)
-      two_pass_min_rate = lower_bounds_min_rate;
-
-    cpi->twopass.bits_left += (int64_t)(two_pass_min_rate
-                              / cpi->oxcf.framerate);
-  }
-  */
 }
 
 static void check_initial_width(VP9_COMP *cpi, YV12_BUFFER_CONFIG *sd) {
-  VP9_COMMON            *cm = &cpi->common;
+  VP9_COMMON *const cm = &cpi->common;
   if (!cpi->initial_width) {
-    // TODO(jkoleszar): Support 1/4 subsampling?
-    cm->subsampling_x = (sd != NULL) && sd->uv_width < sd->y_width;
-    cm->subsampling_y = (sd != NULL) && sd->uv_height < sd->y_height;
+    // TODO(agrange) Subsampling defaults to assuming sampled chroma.
+    cm->subsampling_x = sd != NULL ? (sd->uv_width < sd->y_width) : 1;
+    cm->subsampling_y = sd != NULL ? (sd->uv_height < sd->y_height) : 1;
     alloc_raw_frame_buffers(cpi);
-
     cpi->initial_width = cm->width;
     cpi->initial_height = cm->height;
   }
@@ -3386,8 +3336,7 @@
 
   cpi->source = NULL;
 
-  cpi->common.allow_high_precision_mv = ALTREF_HIGH_PRECISION_MV;
-  set_mvcost(cpi);
+  set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
 
   // Should we code an alternate reference frame.
   if (cpi->oxcf.play_alternate && cpi->source_alt_ref_pending) {
@@ -3609,7 +3558,7 @@
                            VP9BORDERINPIXELS);
 
   // Calculate scaling factors for each of the 3 available references
-  for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
+  for (i = 0; i < REFS_PER_FRAME; ++i) {
     vp9_setup_scale_factors(cm, i);
     if (vp9_is_scaled(&cm->active_ref_scale_comm[i]))
       vp9_extend_frame_borders(&cm->yv12_fb[cm->active_ref_idx[i]],
@@ -3627,7 +3576,8 @@
   } else if (cpi->pass == 2) {
     Pass2Encode(cpi, size, dest, frame_flags);
   } else {
-    encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+    // One pass encode
+    Pass0Encode(cpi, size, dest, frame_flags);
   }
 
   if (cm->refresh_frame_context)
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index b8602e0..72e9196 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -338,7 +338,7 @@
   int worst_quality;
   int active_worst_quality;
   int best_quality;
-  int active_best_quality;
+  // int active_best_quality;
 } RATE_CONTROL;
 
 typedef struct VP9_COMP {
@@ -366,7 +366,7 @@
   struct lookahead_ctx    *lookahead;
   struct lookahead_entry  *source;
 #if CONFIG_MULTIPLE_ARF
-  struct lookahead_entry  *alt_ref_source[NUM_REF_FRAMES];
+  struct lookahead_entry  *alt_ref_source[REF_FRAMES];
 #else
   struct lookahead_entry  *alt_ref_source;
 #endif
@@ -394,7 +394,7 @@
   int use_svc;
 
 #if CONFIG_MULTIPLE_ARF
-  int alt_ref_fb_idx[NUM_REF_FRAMES - 3];
+  int alt_ref_fb_idx[REF_FRAMES - 3];
 #endif
   int refresh_last_frame;
   int refresh_golden_frame;
@@ -469,9 +469,9 @@
 
   nmv_context_counts NMVcount;
 
-  vp9_coeff_count coef_counts[TX_SIZES][BLOCK_TYPES];
-  vp9_coeff_probs_model frame_coef_probs[TX_SIZES][BLOCK_TYPES];
-  vp9_coeff_stats frame_branch_ct[TX_SIZES][BLOCK_TYPES];
+  vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
+  vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES];
+  vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES];
 
   int kf_zeromotion_pct;
   int gf_zeromotion_pct;
@@ -569,7 +569,6 @@
     int alt_extra_bits;
 
     int sr_update_lag;
-    double est_max_qcorrection_factor;
   } twopass;
 
   YV12_BUFFER_CONFIG alt_ref_buffer;
@@ -695,7 +694,7 @@
 
 void vp9_alloc_compressor_data(VP9_COMP *cpi);
 
-int vp9_compute_qdelta(VP9_COMP *cpi, double qstart, double qtarget);
+int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget);
 
 static int get_token_alloc(int mb_rows, int mb_cols) {
   return mb_rows * mb_cols * (48 * 16 + 4);
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 698130a..8c41724 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -137,45 +137,18 @@
   *eob_ptr = eob + 1;
 }
 
-struct plane_block_idx {
-  int plane;
-  int block;
-};
-
-// TODO(jkoleszar): returning a struct so it can be used in a const context,
-// expect to refactor this further later.
-static INLINE struct plane_block_idx plane_block_idx(int y_blocks,
-                                                     int b_idx) {
-  const int v_offset = y_blocks * 5 / 4;
-  struct plane_block_idx res;
-
-  if (b_idx < y_blocks) {
-    res.plane = 0;
-    res.block = b_idx;
-  } else if (b_idx < v_offset) {
-    res.plane = 1;
-    res.block = b_idx - y_blocks;
-  } else {
-    assert(b_idx < y_blocks * 3 / 2);
-    res.plane = 2;
-    res.block = b_idx - v_offset;
-  }
-  return res;
-}
-
-void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int y_blocks, int b_idx,
+void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
                                 const int16_t *scan, const int16_t *iscan) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
-  struct macroblock_plane* p = &x->plane[pb_idx.plane];
-  struct macroblockd_plane* pd = &xd->plane[pb_idx.plane];
+  struct macroblock_plane* p = &x->plane[plane];
+  struct macroblockd_plane* pd = &xd->plane[plane];
 
-  vp9_quantize_b(BLOCK_OFFSET(p->coeff, pb_idx.block),
+  vp9_quantize_b(BLOCK_OFFSET(p->coeff, block),
            16, x->skip_block,
            p->zbin, p->round, p->quant, p->quant_shift,
-           BLOCK_OFFSET(p->qcoeff, pb_idx.block),
-           BLOCK_OFFSET(pd->dqcoeff, pb_idx.block),
-           pd->dequant, p->zbin_extra, &pd->eobs[pb_idx.block], scan, iscan);
+           BLOCK_OFFSET(p->qcoeff, block),
+           BLOCK_OFFSET(pd->dqcoeff, block),
+           pd->dequant, p->zbin_extra, &p->eobs[block], scan, iscan);
 }
 
 static void invert_quant(int16_t *quant, int16_t *shift, int d) {
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index c078e1d..41cfa52 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -13,7 +13,7 @@
 
 #include "vp9/encoder/vp9_block.h"
 
-void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int y_blocks, int b_idx,
+void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
                                 const int16_t *scan, const int16_t *iscan);
 
 struct VP9_COMP;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index bf1fc4f..3fa8cea 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -45,6 +45,10 @@
 static int inter_minq[QINDEX_RANGE];
 static int afq_low_motion_minq[QINDEX_RANGE];
 static int afq_high_motion_minq[QINDEX_RANGE];
+static int gf_high = 2000;
+static int gf_low = 400;
+static int kf_high = 5000;
+static int kf_low = 400;
 
 // Functions to compute the active minq lookup table entries based on a
 // formulaic approach to facilitate easier adjustment of the Q tables.
@@ -84,7 +88,7 @@
     kf_high_motion_minq[i] = calculate_minq_index(maxq,
                                                   0.000002,
                                                   -0.0012,
-                                                  0.5,
+                                                  0.50,
                                                   0.0);
 
     gf_low_motion_minq[i] = calculate_minq_index(maxq,
@@ -97,11 +101,6 @@
                                                   -0.00125,
                                                   0.50,
                                                   0.0);
-    inter_minq[i] = calculate_minq_index(maxq,
-                                         0.00000271,
-                                         -0.00113,
-                                         0.75,
-                                         0.0);
     afq_low_motion_minq[i] = calculate_minq_index(maxq,
                                                   0.0000015,
                                                   -0.0009,
@@ -112,6 +111,11 @@
                                                    -0.00125,
                                                    0.55,
                                                    0.0);
+    inter_minq[i] = calculate_minq_index(maxq,
+                                         0.00000271,
+                                         -0.00113,
+                                         0.75,
+                                         0.0);
   }
 }
 
@@ -195,7 +199,7 @@
   if (cm->error_resilient_mode || cm->intra_only)
     vp9_setup_past_independence(cm);
 
-  assert(cm->frame_context_idx < NUM_FRAME_CONTEXTS);
+  assert(cm->frame_context_idx < FRAME_CONTEXTS);
   cm->fc = cm->frame_contexts[cm->frame_context_idx];
 }
 
@@ -285,7 +289,6 @@
   }
 }
 
-
 void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
   const int q = cpi->common.base_qindex;
   int correction_factor = 100;
@@ -333,7 +336,6 @@
       break;
   }
 
-  // if ( (correction_factor > 102) && (Q < cpi->rc.active_worst_quality) )
   if (correction_factor > 102) {
     // We are not already at the worst allowable quality
     correction_factor =
@@ -367,8 +369,9 @@
 }
 
 
-int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame) {
-  int q = cpi->rc.active_worst_quality;
+int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
+                      int active_best_quality, int active_worst_quality) {
+  int q = active_worst_quality;
 
   int i;
   int last_error = INT_MAX;
@@ -396,7 +399,7 @@
     target_bits_per_mb =
         (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs;
 
-  i = cpi->rc.active_best_quality;
+  i = active_best_quality;
 
   do {
     bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(cpi->common.frame_type, i,
@@ -412,7 +415,7 @@
     } else {
       last_error = bits_per_mb_at_this_q - target_bits_per_mb;
     }
-  } while (++i <= cpi->rc.active_worst_quality);
+  } while (++i <= active_worst_quality);
 
   return q;
 }
@@ -438,14 +441,17 @@
   return active_best_quality;
 }
 
-int vp9_rc_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
+int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
                                       int *bottom_index,
-                                      int *top_index) {
-  // Set an active best quality and if necessary active worst quality
-  int q = cpi->rc.active_worst_quality;
-  VP9_COMMON *const cm = &cpi->common;
+                                      int *top_index,
+                                      int *top_index_prop) {
+  const VP9_COMMON *const cm = &cpi->common;
+  int active_best_quality;
+  int active_worst_quality = cpi->rc.active_worst_quality;
+  int q;
 
   if (frame_is_intra_only(cm)) {
+    active_best_quality = cpi->rc.best_quality;
 #if !CONFIG_MULTIPLE_ARF
     // Handle the special case for key frames forced when we have75 reached
     // the maximum key frame interval. Here force the Q to a range
@@ -457,25 +463,19 @@
 
       delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
                                         (last_boosted_q * 0.75));
-
-      cpi->rc.active_best_quality = MAX(qindex + delta_qindex,
-                                        cpi->rc.best_quality);
-    } else if (cpi->pass == 0 && cpi->common.current_video_frame == 0) {
-      // If this is the first (key) frame in 1-pass, active best/worst is
-      // the user best/worst-allowed, and leave the top_index to active_worst.
-      cpi->rc.active_best_quality = cpi->oxcf.best_allowed_q;
-      cpi->rc.active_worst_quality = cpi->oxcf.worst_allowed_q;
-    } else {
-      int high = 5000;
-      int low = 400;
+      active_best_quality = MAX(qindex + delta_qindex,
+                                cpi->rc.best_quality);
+    } else if (!(cpi->pass == 0 && cpi->common.current_video_frame == 0)) {
+      // not first frame of one pass
       double q_adj_factor = 1.0;
       double q_val;
 
       // Baseline value derived from cpi->active_worst_quality and kf boost
-      cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.kf_boost,
-                                                       low, high,
-                                                       kf_low_motion_minq,
-                                                       kf_high_motion_minq);
+      active_best_quality = get_active_quality(active_worst_quality,
+                                               cpi->rc.kf_boost,
+                                               kf_low, kf_high,
+                                               kf_low_motion_minq,
+                                               kf_high_motion_minq);
 
       // Allow somewhat lower kf minq with small image formats.
       if ((cm->width * cm->height) <= (352 * 288)) {
@@ -487,124 +487,128 @@
 
       // Convert the adjustment factor to a qindex delta
       // on active_best_quality.
-      q_val = vp9_convert_qindex_to_q(cpi->rc.active_best_quality);
-      cpi->rc.active_best_quality +=
+      q_val = vp9_convert_qindex_to_q(active_best_quality);
+      active_best_quality +=
           vp9_compute_qdelta(cpi, q_val, (q_val * q_adj_factor));
     }
 #else
     double current_q;
     // Force the KF quantizer to be 30% of the active_worst_quality.
-    current_q = vp9_convert_qindex_to_q(cpi->rc.active_worst_quality);
-    cpi->rc.active_best_quality = cpi->rc.active_worst_quality
+    current_q = vp9_convert_qindex_to_q(active_worst_quality);
+    active_best_quality = active_worst_quality
         + vp9_compute_qdelta(cpi, current_q, current_q * 0.3);
 #endif
   } else if (!cpi->is_src_frame_alt_ref &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-    int high = 2000;
-    int low = 400;
 
-    // Use the lower of cpi->rc.active_worst_quality and recent
+    // Use the lower of active_worst_quality and recent
     // average Q as basis for GF/ARF best Q limit unless last frame was
     // a key frame.
     if (cpi->frames_since_key > 1 &&
-        cpi->rc.avg_frame_qindex < cpi->rc.active_worst_quality) {
+        cpi->rc.avg_frame_qindex < active_worst_quality) {
       q = cpi->rc.avg_frame_qindex;
+    } else {
+      q = active_worst_quality;
     }
     // For constrained quality dont allow Q less than the cq level
     if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
       if (q < cpi->cq_target_quality)
         q = cpi->cq_target_quality;
       if (cpi->frames_since_key > 1) {
-        cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost,
-                                                         low, high,
-                                                         afq_low_motion_minq,
-                                                         afq_high_motion_minq);
+        active_best_quality = get_active_quality(q, cpi->rc.gfu_boost,
+                                                 gf_low, gf_high,
+                                                 afq_low_motion_minq,
+                                                 afq_high_motion_minq);
       } else {
-        cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost,
-                                                         low, high,
-                                                         gf_low_motion_minq,
-                                                         gf_high_motion_minq);
+        active_best_quality = get_active_quality(q, cpi->rc.gfu_boost,
+                                                 gf_low, gf_high,
+                                                 gf_low_motion_minq,
+                                                 gf_high_motion_minq);
       }
       // Constrained quality use slightly lower active best.
-      cpi->rc.active_best_quality = cpi->rc.active_best_quality * 15 / 16;
+      active_best_quality = active_best_quality * 15 / 16;
 
     } else if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
       if (!cpi->refresh_alt_ref_frame) {
-        cpi->rc.active_best_quality = cpi->cq_target_quality;
+        active_best_quality = cpi->cq_target_quality;
       } else {
         if (cpi->frames_since_key > 1) {
-          cpi->rc.active_best_quality = get_active_quality(
-              q, cpi->rc.gfu_boost, low, high,
+          active_best_quality = get_active_quality(
+              q, cpi->rc.gfu_boost, gf_low, gf_high,
               afq_low_motion_minq, afq_high_motion_minq);
         } else {
-          cpi->rc.active_best_quality = get_active_quality(
-              q, cpi->rc.gfu_boost, low, high,
+          active_best_quality = get_active_quality(
+              q, cpi->rc.gfu_boost, gf_low, gf_high,
               gf_low_motion_minq, gf_high_motion_minq);
         }
       }
     } else {
-        cpi->rc.active_best_quality = get_active_quality(
-            q, cpi->rc.gfu_boost, low, high,
-            gf_low_motion_minq, gf_high_motion_minq);
+      active_best_quality = get_active_quality(
+          q, cpi->rc.gfu_boost, gf_low, gf_high,
+          gf_low_motion_minq, gf_high_motion_minq);
     }
   } else {
     if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
-      cpi->rc.active_best_quality = cpi->cq_target_quality;
+      active_best_quality = cpi->cq_target_quality;
     } else {
-      cpi->rc.active_best_quality = inter_minq[q];
-      // 1-pass: for now, use the average Q for the active_best, if its lower
-      // than active_worst.
-      if (cpi->pass == 0 && (cpi->rc.avg_frame_qindex < q))
-        cpi->rc.active_best_quality = inter_minq[cpi->rc.avg_frame_qindex];
+      if (cpi->pass == 0 &&
+          cpi->rc.avg_frame_qindex < active_worst_quality)
+        // 1-pass: for now, use the average Q for the active_best, if its lower
+        // than active_worst.
+        active_best_quality = inter_minq[cpi->rc.avg_frame_qindex];
+      else
+        active_best_quality = inter_minq[active_worst_quality];
 
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
       if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) &&
-          (cpi->rc.active_best_quality < cpi->cq_target_quality)) {
+          (active_best_quality < cpi->cq_target_quality)) {
         // If we are strongly undershooting the target rate in the last
         // frames then use the user passed in cq value not the auto
         // cq value.
         if (cpi->rc.rolling_actual_bits < cpi->rc.min_frame_bandwidth)
-          cpi->rc.active_best_quality = cpi->oxcf.cq_level;
+          active_best_quality = cpi->oxcf.cq_level;
         else
-          cpi->rc.active_best_quality = cpi->cq_target_quality;
+          active_best_quality = cpi->cq_target_quality;
       }
     }
   }
 
   // Clip the active best and worst quality values to limits
-  if (cpi->rc.active_worst_quality > cpi->rc.worst_quality)
-    cpi->rc.active_worst_quality = cpi->rc.worst_quality;
+  if (active_worst_quality > cpi->rc.worst_quality)
+    active_worst_quality = cpi->rc.worst_quality;
 
-  if (cpi->rc.active_best_quality < cpi->rc.best_quality)
-    cpi->rc.active_best_quality = cpi->rc.best_quality;
+  if (active_best_quality < cpi->rc.best_quality)
+    active_best_quality = cpi->rc.best_quality;
 
-  if (cpi->rc.active_best_quality > cpi->rc.worst_quality)
-    cpi->rc.active_best_quality = cpi->rc.worst_quality;
+  if (active_best_quality > cpi->rc.worst_quality)
+    active_best_quality = cpi->rc.worst_quality;
 
-  if (cpi->rc.active_worst_quality < cpi->rc.active_best_quality)
-    cpi->rc.active_worst_quality = cpi->rc.active_best_quality;
+  if (active_worst_quality < active_best_quality)
+    active_worst_quality = active_best_quality;
 
-  *top_index = cpi->rc.active_worst_quality;
-  *bottom_index = cpi->rc.active_best_quality;
+  *top_index_prop = active_worst_quality;
+  *top_index = active_worst_quality;
+  *bottom_index = active_best_quality;
 
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
   // Limit Q range for the adaptive loop.
   if (cm->frame_type == KEY_FRAME && !cpi->this_key_frame_forced) {
     if (!(cpi->pass == 0 && cpi->common.current_video_frame == 0)) {
+      *top_index = active_worst_quality;
       *top_index =
-          (cpi->rc.active_worst_quality + cpi->rc.active_best_quality * 3) / 4;
+          (active_worst_quality + active_best_quality * 3) / 4;
     }
   } else if (!cpi->is_src_frame_alt_ref &&
              (cpi->oxcf.end_usage != USAGE_STREAM_FROM_SERVER) &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
     *top_index =
-      (cpi->rc.active_worst_quality + cpi->rc.active_best_quality) / 2;
+      (active_worst_quality + active_best_quality) / 2;
   }
 #endif
 
   if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
-    q = cpi->rc.active_best_quality;
+    q = active_best_quality;
   // Special case code to try and match quality with forced key frames
   } else if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) {
     q = cpi->rc.last_boosted_qindex;
@@ -614,17 +618,35 @@
       // 1-pass: for now, use per-frame-bw for target size of frame, scaled
       // by |x| for key frame.
       int scale = (cm->frame_type == KEY_FRAME) ? 5 : 1;
-      q = vp9_rc_regulate_q(cpi, scale * cpi->rc.av_per_frame_bandwidth);
+      q = vp9_rc_regulate_q(cpi, scale * cpi->rc.av_per_frame_bandwidth,
+                            active_best_quality, active_worst_quality);
     } else {
-      q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target);
+      q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+                            active_best_quality, active_worst_quality);
     }
     if (q > *top_index)
       q = *top_index;
   }
+#if CONFIG_MULTIPLE_ARF
+  // Force the quantizer determined by the coding order pattern.
+  if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
+      cpi->oxcf.end_usage != USAGE_CONSTANT_QUALITY) {
+    double new_q;
+    double current_q = vp9_convert_qindex_to_q(active_worst_quality);
+    int level = cpi->this_frame_weight;
+    assert(level >= 0);
+    new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level)));
+    q = active_worst_quality +
+        vp9_compute_qdelta(cpi, current_q, new_q);
+
+    *bottom_index = q;
+    *top_index    = q;
+    printf("frame:%d q:%d\n", cm->current_video_frame, q);
+  }
+#endif
   return q;
 }
 
-
 static int estimate_keyframe_frequency(VP9_COMP *cpi) {
   int i;
 
@@ -680,8 +702,7 @@
   cpi->rc.key_frame_count++;
 }
 
-
-static void compute_frame_size_bounds(const VP9_COMP *cpi,
+void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
                                       int this_frame_target,
                                       int *frame_under_shoot_limit,
                                       int *frame_over_shoot_limit) {
@@ -720,9 +741,7 @@
 }
 
 // return of 0 means drop frame
-int vp9_rc_pick_frame_size_and_bounds(VP9_COMP *cpi,
-                                      int *frame_under_shoot_limit,
-                                      int *frame_over_shoot_limit) {
+int vp9_rc_pick_frame_size_target(VP9_COMP *cpi) {
   VP9_COMMON *cm = &cpi->common;
 
   if (cm->frame_type == KEY_FRAME)
@@ -733,13 +752,11 @@
   // Target rate per SB64 (including partial SB64s.
   cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) /
                              (cpi->common.width * cpi->common.height);
-  compute_frame_size_bounds(cpi, cpi->rc.this_frame_target,
-                            frame_under_shoot_limit, frame_over_shoot_limit);
-
   return 1;
 }
 
-void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used, int q) {
+void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used,
+                              int worst_q) {
   VP9_COMMON *const cm = &cpi->common;
   // Update rate control heuristics
   cpi->rc.projected_frame_size = (bytes_used << 3);
@@ -750,6 +767,7 @@
             cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
 
   cpi->rc.last_q[cm->frame_type] = cm->base_qindex;
+  cpi->rc.active_worst_quality = worst_q;
 
   // Keep record of last boosted (KF/KF/ARF) Q value.
   // If the current frame is coded at a lower Q then we also update it.
@@ -777,11 +795,11 @@
   if (cm->frame_type != KEY_FRAME &&
       !cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
     cpi->rc.ni_frames++;
-    cpi->rc.tot_q += vp9_convert_qindex_to_q(q);
+    cpi->rc.tot_q += vp9_convert_qindex_to_q(cm->base_qindex);
     cpi->rc.avg_q = cpi->rc.tot_q / (double)cpi->rc.ni_frames;
 
     // Calculate the average Q for normal inter frames (not key or GFU frames).
-    cpi->rc.ni_tot_qi += q;
+    cpi->rc.ni_tot_qi += cm->base_qindex;
     cpi->rc.ni_av_qi = cpi->rc.ni_tot_qi / cpi->rc.ni_frames;
   }
 
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index f01d186..063ac8f 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -31,23 +31,29 @@
 void vp9_rc_init_minq_luts(void);
 
 // return of 0 means drop frame
-// Changes rc.this_frame_target and rc.sb64_rate_target
-int vp9_rc_pick_frame_size_and_bounds(VP9_COMP *cpi,
+// Changes only rc.this_frame_target and rc.sb64_rate_target
+int vp9_rc_pick_frame_size_target(VP9_COMP *cpi);
+
+void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
+                                      int this_frame_target,
                                       int *frame_under_shoot_limit,
                                       int *frame_over_shoot_limit);
+
 // Picks q and q bounds given the target for bits
-int vp9_rc_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
-                                      int * bottom_index,
-                                      int * top_index);
+int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
+                                      int *bottom_index,
+                                      int *top_index,
+                                      int *top_index_prop);
 
 // Estimates q to achieve a target bits per frame
-int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame);
+int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
+                      int active_best_quality, int active_worst_quality);
 
 // Post encode update of the rate control parameters based
 // on bytes used and q used for the frame
 void vp9_rc_postencode_update(VP9_COMP *cpi,
                               uint64_t bytes_used,
-                              int q_used);
+                              int worst_q);
 
 // estimates bits per mb for a given qindex and correction factor
 int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 84b7122..274ced1 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -122,7 +122,7 @@
 }
 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                           int raster_block, int16_t *base) {
-  const int stride = 4 << b_width_log2(plane_bsize);
+  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   return base + raster_block_offset(plane_bsize, raster_block, stride);
 }
 
@@ -151,22 +151,22 @@
 }
 
 static void fill_token_costs(vp9_coeff_cost *c,
-                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
+                             vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
   int i, j, k, l;
   TX_SIZE t;
-  for (t = TX_4X4; t <= TX_32X32; t++)
-    for (i = 0; i < BLOCK_TYPES; i++)
-      for (j = 0; j < REF_TYPES; j++)
-        for (k = 0; k < COEF_BANDS; k++)
-          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+  for (t = TX_4X4; t <= TX_32X32; ++t)
+    for (i = 0; i < PLANE_TYPES; ++i)
+      for (j = 0; j < REF_TYPES; ++j)
+        for (k = 0; k < COEF_BANDS; ++k)
+          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
             vp9_prob probs[ENTROPY_NODES];
             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
                             vp9_coef_tree);
             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
                                  vp9_coef_tree);
-            assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
-                   c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
+            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
+                   c[t][i][j][k][1][l][EOB_TOKEN]);
           }
 }
 
@@ -283,7 +283,7 @@
 
   cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                               cm->frame_type != KEY_FRAME) ?
-                             0 : 1;
+                              0 : 1;
 
   set_block_thresholds(cpi);
 
@@ -525,10 +525,10 @@
   struct macroblockd_plane *pd = &xd->plane[plane];
   const PLANE_TYPE type = pd->plane_type;
   const int16_t *band_count = &band_counts[tx_size][1];
-  const int eob = pd->eobs[block];
+  const int eob = p->eobs[block];
   const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
   const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
-  unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
+  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
                    x->token_costs[tx_size][type][ref];
   const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
   uint8_t *p_tok = x->token_cache;
@@ -536,12 +536,12 @@
   int c, cost;
 
   // Check for consistency of tx_size with mode info
-  assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size
+  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                                       : get_uv_tx_size(mbmi) == tx_size);
 
   if (eob == 0) {
     // single eob token
-    cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
+    cost = token_costs[0][0][pt][EOB_TOKEN];
     c = 0;
   } else {
     int band_left = *band_count++;
@@ -573,7 +573,7 @@
     // eob token
     if (band_left) {
       pt = get_coef_context(nb, p_tok, c);
-      cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
+      cost += (*token_costs)[0][pt][EOB_TOKEN];
     }
   }
 
@@ -643,7 +643,7 @@
   // TODO(jingning): temporarily enabled only for luma component
   rd = MIN(rd1, rd2);
   if (plane == 0)
-    x->zcoeff_blk[tx_size][block] = !xd->plane[plane].eobs[block] ||
+    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
                                     (rd1 > rd2 && !xd->lossless);
 
   args->this_rate += args->rate;
@@ -739,7 +739,7 @@
     *distortion = rd_stack->this_dist;
     *rate       = rd_stack->this_rate;
     *sse        = rd_stack->this_sse;
-    *skippable  = vp9_is_skippable_in_plane(xd, bsize, plane);
+    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
   }
 }
 
@@ -772,7 +772,7 @@
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
-  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
+  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
   int64_t rd[TX_SIZES][2];
   int n, m;
   int s0, s1;
@@ -847,7 +847,7 @@
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
-  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
+  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
   int64_t rd[TX_SIZES][2];
   int n, m;
   int s0, s1;
@@ -1047,7 +1047,7 @@
                            src, src_stride,
                            dst, dst_stride);
 
-        tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
+        tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
         so = &vp9_scan_orders[TX_4X4][tx_type];
 
         if (tx_type != DCT_DCT)
@@ -1055,7 +1055,7 @@
         else
           x->fwd_txm4x4(src_diff, coeff, 8);
 
-        vp9_regular_quantize_b_4x4(x, 4, block, so->scan, so->iscan);
+        vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 
         ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                              so->scan, so->neighbors);
@@ -1292,20 +1292,15 @@
                                        PICK_MODE_CONTEXT *ctx,
                                        int *rate, int *rate_tokenonly,
                                        int64_t *distortion, int *skippable,
-                                       BLOCK_SIZE bsize) {
+                                       BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
   MB_PREDICTION_MODE mode;
   MB_PREDICTION_MODE mode_selected = DC_PRED;
   int64_t best_rd = INT64_MAX, this_rd;
   int this_rate_tokenonly, this_rate, s;
   int64_t this_distortion, this_sse;
 
-  // int mode_mask = (bsize <= BLOCK_8X8)
-  //                ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask;
-
-  for (mode = DC_PRED; mode <= TM_PRED; mode ++) {
-    // if (!(mode_mask & (1 << mode)))
-    if (!(cpi->sf.intra_uv_mode_mask[max_uv_txsize_lookup[bsize]]
-          & (1 << mode)))
+  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
+    if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
       continue;
 
     x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
@@ -1331,9 +1326,9 @@
         struct macroblockd_plane *const pd = x->e_mbd.plane;
         for (i = 1; i < MAX_MB_PLANE; ++i) {
           p[i].coeff    = ctx->coeff_pbuf[i][2];
-          p[i].qcoeff  = ctx->qcoeff_pbuf[i][2];
+          p[i].qcoeff   = ctx->qcoeff_pbuf[i][2];
           pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
-          pd[i].eobs    = ctx->eobs_pbuf[i][2];
+          p[i].eobs    = ctx->eobs_pbuf[i][2];
 
           ctx->coeff_pbuf[i][2]   = ctx->coeff_pbuf[i][0];
           ctx->qcoeff_pbuf[i][2]  = ctx->qcoeff_pbuf[i][0];
@@ -1343,14 +1338,13 @@
           ctx->coeff_pbuf[i][0]   = p[i].coeff;
           ctx->qcoeff_pbuf[i][0]  = p[i].qcoeff;
           ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
-          ctx->eobs_pbuf[i][0]    = pd[i].eobs;
+          ctx->eobs_pbuf[i][0]    = p[i].eobs;
         }
       }
     }
   }
 
   x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected;
-
   return best_rd;
 }
 
@@ -1372,8 +1366,8 @@
 }
 
 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
-                                 BLOCK_SIZE bsize, int *rate_uv,
-                                 int *rate_uv_tokenonly,
+                                 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
+                                 int *rate_uv, int *rate_uv_tokenonly,
                                  int64_t *dist_uv, int *skip_uv,
                                  MB_PREDICTION_MODE *mode_uv) {
   MACROBLOCK *const x = &cpi->mb;
@@ -1388,7 +1382,7 @@
   } else {
     rd_pick_intra_sbuv_mode(cpi, x, ctx,
                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
-                            bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
+                            bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
   }
   *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
 }
@@ -1546,7 +1540,7 @@
       coeff = BLOCK_OFFSET(p->coeff, k);
       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
                     coeff, 8);
-      vp9_regular_quantize_b_4x4(x, 4, k, so->scan, so->iscan);
+      vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
                                         16, &ssz);
       thissse += ssz;
@@ -1635,6 +1629,7 @@
   MB_PREDICTION_MODE this_mode;
   MODE_INFO *mi = x->e_mbd.mi_8x8[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
+  struct macroblock_plane *const p = &x->plane[0];
   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
   const int label_count = 4;
   int64_t this_segment_rd = 0;
@@ -1963,11 +1958,11 @@
           bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
                                             bsi->rdstat[i][mode_idx].brate, 0);
           bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
-          bsi->rdstat[i][mode_idx].eobs = pd->eobs[i];
+          bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
           if (num_4x4_blocks_wide > 1)
-            bsi->rdstat[i + 1][mode_idx].eobs = pd->eobs[i + 1];
+            bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
           if (num_4x4_blocks_high > 1)
-            bsi->rdstat[i + 2][mode_idx].eobs = pd->eobs[i + 2];
+            bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
         }
 
         if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
@@ -2065,7 +2060,7 @@
     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
     if (has_second_ref(mbmi))
       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
-    xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
+    x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
     mi->bmi[i].as_mode = bsi->modes[i];
   }
 
@@ -2075,7 +2070,7 @@
   *returntotrate = bsi->r;
   *returndistortion = bsi->d;
   *returnyrate = bsi->segment_yrate;
-  *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0);
+  *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
   *psse = bsi->sse;
   mbmi->mode = bsi->modes[3];
 
@@ -2149,11 +2144,11 @@
     vpx_memset(ref_costs_comp,   0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
     *comp_mode_p = 128;
   } else {
-    vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd);
+    vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
     vp9_prob comp_inter_p = 128;
 
-    if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) {
-      comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd);
+    if (cm->reference_mode == REFERENCE_MODE_SELECT) {
+      comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
       *comp_mode_p = comp_inter_p;
     } else {
       *comp_mode_p = 128;
@@ -2161,12 +2156,12 @@
 
     ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
 
-    if (cm->comp_pred_mode != COMPOUND_REFERENCE) {
+    if (cm->reference_mode != COMPOUND_REFERENCE) {
       vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
       vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
 
-      if (cm->comp_pred_mode == REFERENCE_MODE_SELECT)
+      if (cm->reference_mode == REFERENCE_MODE_SELECT)
         base_cost += vp9_cost_bit(comp_inter_p, 0);
 
       ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
@@ -2181,11 +2176,11 @@
       ref_costs_single[GOLDEN_FRAME] = 512;
       ref_costs_single[ALTREF_FRAME] = 512;
     }
-    if (cm->comp_pred_mode != SINGLE_REFERENCE) {
+    if (cm->reference_mode != SINGLE_REFERENCE) {
       vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
 
-      if (cm->comp_pred_mode == REFERENCE_MODE_SELECT)
+      if (cm->reference_mode == REFERENCE_MODE_SELECT)
         base_cost += vp9_cost_bit(comp_inter_p, 1);
 
       ref_costs_comp[LAST_FRAME]   = base_cost + vp9_cost_bit(ref_comp_p, 0);
@@ -2327,7 +2322,6 @@
   int_mv mvp_full;
   int ref = mbmi->ref_frame[0];
   int_mv ref_mv = mbmi->ref_mvs[ref][0];
-  const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
 
   int tmp_col_min = x->mv_col_min;
   int tmp_col_max = x->mv_col_max;
@@ -2401,24 +2395,24 @@
     bestsme = vp9_hex_search(x, &mvp_full.as_mv,
                              step_param,
                              sadpb, 1,
-                             &cpi->fn_ptr[block_size], 1,
+                             &cpi->fn_ptr[bsize], 1,
                              &ref_mv.as_mv, &tmp_mv->as_mv);
   } else if (cpi->sf.search_method == SQUARE) {
     bestsme = vp9_square_search(x, &mvp_full.as_mv,
                                 step_param,
                                 sadpb, 1,
-                                &cpi->fn_ptr[block_size], 1,
+                                &cpi->fn_ptr[bsize], 1,
                                 &ref_mv.as_mv, &tmp_mv->as_mv);
   } else if (cpi->sf.search_method == BIGDIA) {
     bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
                                 step_param,
                                 sadpb, 1,
-                                &cpi->fn_ptr[block_size], 1,
+                                &cpi->fn_ptr[bsize], 1,
                                 &ref_mv.as_mv, &tmp_mv->as_mv);
   } else {
     bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
                                      sadpb, further_steps, 1,
-                                     &cpi->fn_ptr[block_size],
+                                     &cpi->fn_ptr[bsize],
                                      &ref_mv, tmp_mv);
   }
 
@@ -2433,7 +2427,7 @@
     cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
                                  cm->allow_high_precision_mv,
                                  x->errorperbit,
-                                 &cpi->fn_ptr[block_size],
+                                 &cpi->fn_ptr[bsize],
                                  0, cpi->sf.subpel_iters_per_step,
                                  x->nmvjointcost, x->mvcost,
                                  &dis, &sse);
@@ -2457,13 +2451,13 @@
                                 int mi_row, int mi_col,
                                 int_mv single_newmv[MAX_REF_FRAMES],
                                 int *rate_mv) {
-  int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
+  const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
+  const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
   const int refs[2] = { mbmi->ref_frame[0],
                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
   int_mv ref_mv[2];
-  const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
   int ite, ref;
   // Prediction buffer from second frame.
   uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
@@ -2537,7 +2531,7 @@
     // Small-range full-pixel motion search
     bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
                                        search_range,
-                                       &cpi->fn_ptr[block_size],
+                                       &cpi->fn_ptr[bsize],
                                        x->nmvjointcost, x->mvcost,
                                        &ref_mv[id], second_pred,
                                        pw, ph);
@@ -2556,7 +2550,7 @@
           &ref_mv[id].as_mv,
           cpi->common.allow_high_precision_mv,
           x->errorperbit,
-          &cpi->fn_ptr[block_size],
+          &cpi->fn_ptr[bsize],
           0, cpi->sf.subpel_iters_per_step,
           x->nmvjointcost, x->mvcost,
           &dis, &sse, second_pred,
@@ -2738,13 +2732,10 @@
   *rate2 += cost_mv_ref(cpi, this_mode,
                         mbmi->mode_context[mbmi->ref_frame[0]]);
 
-  if (!(*mode_excluded)) {
-    if (is_comp_pred) {
-      *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_REFERENCE);
-    } else {
-      *mode_excluded = (cpi->common.comp_pred_mode == COMPOUND_REFERENCE);
-    }
-  }
+  if (!(*mode_excluded))
+    *mode_excluded = is_comp_pred
+        ? cpi->common.reference_mode == SINGLE_REFERENCE
+        : cpi->common.reference_mode == COMPOUND_REFERENCE;
 
   pred_exists = 0;
   // Are all MVs integer pel for Y and UV
@@ -2943,7 +2934,7 @@
               x->skip = 1;
 
               // The cost of skip bit needs to be added.
-              *rate2 += vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
+              *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
 
               // Scaling factor for SSE from spatial domain to frequency domain
               // is 16. Adjust distortion accordingly.
@@ -3010,7 +3001,7 @@
     p[i].coeff    = ctx->coeff_pbuf[i][1];
     p[i].qcoeff  = ctx->qcoeff_pbuf[i][1];
     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
-    pd[i].eobs    = ctx->eobs_pbuf[i][1];
+    p[i].eobs    = ctx->eobs_pbuf[i][1];
 
     ctx->coeff_pbuf[i][1]   = ctx->coeff_pbuf[i][0];
     ctx->qcoeff_pbuf[i][1]  = ctx->qcoeff_pbuf[i][0];
@@ -3020,7 +3011,7 @@
     ctx->coeff_pbuf[i][0]   = p[i].coeff;
     ctx->qcoeff_pbuf[i][0]  = p[i].qcoeff;
     ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
-    ctx->eobs_pbuf[i][0]    = pd[i].eobs;
+    ctx->eobs_pbuf[i][0]    = p[i].eobs;
   }
 }
 
@@ -3033,9 +3024,11 @@
   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
   int y_skip = 0, uv_skip = 0;
   int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
+  TX_SIZE max_uv_tx_size;
   x->skip_encode = 0;
   ctx->skip = 0;
   xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
+
   if (bsize >= BLOCK_8X8) {
     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
                                &dist_y, &y_skip, bsize, tx_cache,
@@ -3043,8 +3036,9 @@
       *returnrate = INT_MAX;
       return;
     }
+    max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
-                            &dist_uv, &uv_skip, bsize);
+                            &dist_uv, &uv_skip, bsize, max_uv_tx_size);
   } else {
     y_skip = 0;
     if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
@@ -3052,19 +3046,19 @@
       *returnrate = INT_MAX;
       return;
     }
+    max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
-                            &dist_uv, &uv_skip, BLOCK_8X8);
+                            &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
   }
 
   if (y_skip && uv_skip) {
     *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
-                  vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
+                  vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
     *returndist = dist_y + dist_uv;
     vp9_zero(ctx->tx_rd_diff);
   } else {
     int i;
-    *returnrate = rate_y + rate_uv +
-        vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
+    *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
     *returndist = dist_y + dist_uv;
     if (cpi->sf.tx_size_search_method == USE_FULL_RD)
       for (i = 0; i < TX_MODES; i++) {
@@ -3317,17 +3311,14 @@
     if (comp_pred) {
       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
         continue;
-      set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
 
-      mode_excluded = mode_excluded
-                         ? mode_excluded
-                         : cm->comp_pred_mode == SINGLE_REFERENCE;
+      set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
+      mode_excluded = mode_excluded ? mode_excluded
+                                    : cm->reference_mode == SINGLE_REFERENCE;
     } else {
-      if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
-        mode_excluded =
-            mode_excluded ?
-                mode_excluded : cm->comp_pred_mode == COMPOUND_REFERENCE;
-      }
+      if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME)
+        mode_excluded = mode_excluded ?
+            mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
     }
 
     // Select prediction reference frames.
@@ -3416,12 +3407,11 @@
       if (rate_y == INT_MAX)
         continue;
 
-      uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]);
+      uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
       if (rate_uv_intra[uv_tx] == INT_MAX) {
-        choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx],
-                             &rate_uv_tokenonly[uv_tx],
-                             &dist_uv[uv_tx], &skip_uv[uv_tx],
-                             &mode_uv[uv_tx]);
+        choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
+                             &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
+                             &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
       }
 
       rate_uv = rate_uv_tokenonly[uv_tx];
@@ -3449,9 +3439,8 @@
         continue;
     }
 
-    if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) {
+    if (cm->reference_mode == REFERENCE_MODE_SELECT)
       rate2 += compmode_cost;
-    }
 
     // Estimate the reference frame signaling cost and add it
     // to the rolling cost variable.
@@ -3480,9 +3469,7 @@
           int prob_skip_cost;
 
           // Cost the skip mb case
-          vp9_prob skip_prob =
-            vp9_get_pred_prob_mbskip(cm, xd);
-
+          vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
           if (skip_prob) {
             prob_skip_cost = vp9_cost_bit(skip_prob, 1);
             rate2 += prob_skip_cost;
@@ -3492,14 +3479,10 @@
         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
           // Add in the cost of the no skip flag.
-          int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
-                                            0);
-          rate2 += prob_skip_cost;
+          rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
         } else {
           // FIXME(rbultje) make this work for splitmv also
-          int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
-                                            1);
-          rate2 += prob_skip_cost;
+          rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
           distortion2 = total_sse;
           assert(total_sse >= 0);
           rate2 -= (rate_y + rate_uv);
@@ -3509,9 +3492,7 @@
         }
       } else if (mb_skip_allowed) {
         // Add in the cost of the no skip flag.
-        int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
-                                          0);
-        rate2 += prob_skip_cost;
+        rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
       }
 
       // Calculate the final RD estimate for this mode.
@@ -3596,7 +3577,7 @@
     if (!disable_skip && ref_frame != INTRA_FRAME) {
       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
 
-      if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) {
+      if (cm->reference_mode == REFERENCE_MODE_SELECT) {
         single_rate = rate2 - compmode_cost;
         hybrid_rate = rate2;
       } else {
@@ -3675,12 +3656,15 @@
   if (cpi->sf.use_uv_intra_rd_estimate) {
     // Do Intra UV best rd mode selection if best mode choice above was intra.
     if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
-      TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
+      TX_SIZE uv_tx_size;
+      *mbmi = best_mbmode;
+      uv_tx_size = get_uv_tx_size(mbmi);
       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
                               &rate_uv_tokenonly[uv_tx_size],
                               &dist_uv[uv_tx_size],
                               &skip_uv[uv_tx_size],
-                              bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
+                              bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
+                              uv_tx_size);
     }
   }
 
@@ -3986,14 +3970,12 @@
         continue;
       set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
 
-      mode_excluded = mode_excluded
-                         ? mode_excluded
-                         : cm->comp_pred_mode == SINGLE_REFERENCE;
+      mode_excluded = mode_excluded ? mode_excluded
+                                    : cm->reference_mode == SINGLE_REFERENCE;
     } else {
       if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
-        mode_excluded =
-            mode_excluded ?
-                mode_excluded : cm->comp_pred_mode == COMPOUND_REFERENCE;
+        mode_excluded = mode_excluded ?
+            mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
       }
     }
 
@@ -4045,7 +4027,8 @@
       distortion2 += distortion_y;
 
       if (rate_uv_intra[TX_4X4] == INT_MAX) {
-        choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4],
+        choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
+                             &rate_uv_intra[TX_4X4],
                              &rate_uv_tokenonly[TX_4X4],
                              &dist_uv[TX_4X4], &skip_uv[TX_4X4],
                              &mode_uv[TX_4X4]);
@@ -4136,7 +4119,7 @@
               tmp_best_mbmode = *mbmi;
               for (i = 0; i < 4; i++) {
                 tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
-                x->zcoeff_blk[TX_4X4][i] = !xd->plane[0].eobs[i];
+                x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
               }
               pred_exists = 1;
               if (switchable_filter_index == 0 &&
@@ -4197,12 +4180,11 @@
       if (cpi->common.mcomp_filter_type == SWITCHABLE)
         rate2 += get_switchable_rate(x);
 
-      if (!mode_excluded) {
-        if (comp_pred)
-          mode_excluded = cpi->common.comp_pred_mode == SINGLE_REFERENCE;
-        else
-          mode_excluded = cpi->common.comp_pred_mode == COMPOUND_REFERENCE;
-      }
+      if (!mode_excluded)
+         mode_excluded = comp_pred
+             ? cpi->common.reference_mode == SINGLE_REFERENCE
+             : cpi->common.reference_mode == COMPOUND_REFERENCE;
+
       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
 
       tmp_best_rdu = best_rd -
@@ -4229,9 +4211,8 @@
       }
     }
 
-    if (cpi->common.comp_pred_mode == REFERENCE_MODE_SELECT) {
+    if (cpi->common.reference_mode == REFERENCE_MODE_SELECT)
       rate2 += compmode_cost;
-    }
 
     // Estimate the reference frame signaling cost and add it
     // to the rolling cost variable.
@@ -4254,14 +4235,10 @@
         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
           // Add in the cost of the no skip flag.
-          int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
-                                            0);
-          rate2 += prob_skip_cost;
+          rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
         } else {
           // FIXME(rbultje) make this work for splitmv also
-          int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
-                                            1);
-          rate2 += prob_skip_cost;
+          rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
           distortion2 = total_sse;
           assert(total_sse >= 0);
           rate2 -= (rate_y + rate_uv);
@@ -4271,9 +4248,7 @@
         }
       } else if (mb_skip_allowed) {
         // Add in the cost of the no skip flag.
-        int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
-                                          0);
-        rate2 += prob_skip_cost;
+        rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
       }
 
       // Calculate the final RD estimate for this mode.
@@ -4347,7 +4322,7 @@
     if (!disable_skip && ref_frame != INTRA_FRAME) {
       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
 
-      if (cpi->common.comp_pred_mode == REFERENCE_MODE_SELECT) {
+      if (cpi->common.reference_mode == REFERENCE_MODE_SELECT) {
         single_rate = rate2 - compmode_cost;
         hybrid_rate = rate2;
       } else {
@@ -4423,12 +4398,14 @@
   if (cpi->sf.use_uv_intra_rd_estimate) {
     // Do Intra UV best rd mode selection if best mode choice above was intra.
     if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) {
-      TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
+      TX_SIZE uv_tx_size;
+      *mbmi = best_mbmode;
+      uv_tx_size = get_uv_tx_size(mbmi);
       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
                               &rate_uv_tokenonly[uv_tx_size],
                               &dist_uv[uv_tx_size],
                               &skip_uv[uv_tx_size],
-                              BLOCK_8X8);
+                              BLOCK_8X8, uv_tx_size);
     }
   }
 
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 389ec15..970a27a 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -27,30 +27,30 @@
 const int *vp9_dct_value_cost_ptr;
 
 // Array indices are identical to previously-existing CONTEXT_NODE indices
-const vp9_tree_index vp9_coef_tree[TREE_SIZE(MAX_ENTROPY_TOKENS)] = {
-  -DCT_EOB_TOKEN, 2,                          /* 0 = EOB */
-  -ZERO_TOKEN, 4,                             /* 1 = ZERO */
-  -ONE_TOKEN, 6,                              /* 2 = ONE */
-  8, 12,                                      /* 3 = LOW_VAL */
-  -TWO_TOKEN, 10,                            /* 4 = TWO */
-  -THREE_TOKEN, -FOUR_TOKEN,                /* 5 = THREE */
-  14, 16,                                   /* 6 = HIGH_LOW */
-  -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2,   /* 7 = CAT_ONE */
-  18, 20,                                   /* 8 = CAT_THREEFOUR */
-  -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4,   /* 9 = CAT_THREE */
-  -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6    /* 10 = CAT_FIVE */
+const vp9_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
+  -EOB_TOKEN, 2,                       // 0  = EOB
+  -ZERO_TOKEN, 4,                      // 1  = ZERO
+  -ONE_TOKEN, 6,                       // 2  = ONE
+  8, 12,                               // 3  = LOW_VAL
+  -TWO_TOKEN, 10,                      // 4  = TWO
+  -THREE_TOKEN, -FOUR_TOKEN,           // 5  = THREE
+  14, 16,                              // 6  = HIGH_LOW
+  -CATEGORY1_TOKEN, -CATEGORY2_TOKEN,  // 7  = CAT_ONE
+  18, 20,                              // 8  = CAT_THREEFOUR
+  -CATEGORY3_TOKEN, -CATEGORY4_TOKEN,  // 9  = CAT_THREE
+  -CATEGORY5_TOKEN, -CATEGORY6_TOKEN   // 10 = CAT_FIVE
 };
 
 // Unconstrained Node Tree
-const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(MAX_ENTROPY_TOKENS)] = {
-  2, 6,                                     /* 0 = LOW_VAL */
-  -TWO_TOKEN, 4,                            /* 1 = TWO */
-  -THREE_TOKEN, -FOUR_TOKEN,                /* 2 = THREE */
-  8, 10,                                    /* 3 = HIGH_LOW */
-  -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2,   /* 4 = CAT_ONE */
-  12, 14,                                   /* 5 = CAT_THREEFOUR */
-  -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4,   /* 6 = CAT_THREE */
-  -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6    /* 7 = CAT_FIVE */
+const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
+  2, 6,                                // 0 = LOW_VAL
+  -TWO_TOKEN, 4,                       // 1 = TWO
+  -THREE_TOKEN, -FOUR_TOKEN,           // 2 = THREE
+  8, 10,                               // 3 = HIGH_LOW
+  -CATEGORY1_TOKEN, -CATEGORY2_TOKEN,  // 4 = CAT_ONE
+  12, 14,                              // 5 = CAT_THREEFOUR
+  -CATEGORY3_TOKEN, -CATEGORY4_TOKEN,  // 6 = CAT_THREE
+  -CATEGORY5_TOKEN, -CATEGORY6_TOKEN   // 7 = CAT_FIVE
 };
 
 static const vp9_prob Pcat1[] = { 159};
@@ -84,22 +84,22 @@
   init_bit_tree(cat6, 14);
 }
 
-const vp9_extra_bit vp9_extra_bits[MAX_ENTROPY_TOKENS] = {
+const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = {
   {0, 0, 0, 0},           // ZERO_TOKEN
   {0, 0, 0, 1},           // ONE_TOKEN
   {0, 0, 0, 2},           // TWO_TOKEN
   {0, 0, 0, 3},           // THREE_TOKEN
   {0, 0, 0, 4},           // FOUR_TOKEN
-  {cat1, Pcat1, 1, 5},    // DCT_VAL_CATEGORY1
-  {cat2, Pcat2, 2, 7},    // DCT_VAL_CATEGORY2
-  {cat3, Pcat3, 3, 11},   // DCT_VAL_CATEGORY3
-  {cat4, Pcat4, 4, 19},   // DCT_VAL_CATEGORY4
-  {cat5, Pcat5, 5, 35},   // DCT_VAL_CATEGORY5
-  {cat6, Pcat6, 14, 67},  // DCT_VAL_CATEGORY6
-  {0, 0, 0, 0}            // DCT_EOB_TOKEN
+  {cat1, Pcat1, 1, 5},    // CATEGORY1_TOKEN
+  {cat2, Pcat2, 2, 7},    // CATEGORY2_TOKEN
+  {cat3, Pcat3, 3, 11},   // CATEGORY3_TOKEN
+  {cat4, Pcat4, 4, 19},   // CATEGORY4_TOKEN
+  {cat5, Pcat5, 5, 35},   // CATEGORY5_TOKEN
+  {cat6, Pcat6, 14, 67},  // CATEGORY6_TOKEN
+  {0, 0, 0, 0}            // EOB_TOKEN
 };
 
-struct vp9_token vp9_coef_encodings[MAX_ENTROPY_TOKENS];
+struct vp9_token vp9_coef_encodings[ENTROPY_TOKENS];
 
 void vp9_coef_tree_initialize() {
   init_bit_trees();
@@ -168,10 +168,11 @@
                                   TX_SIZE tx_size, void *arg) {
   struct tokenize_b_args* const args = arg;
   MACROBLOCKD *const xd = args->xd;
+  struct macroblock_plane *p = &args->cpi->mb.plane[plane];
   struct macroblockd_plane *pd = &xd->plane[plane];
   int aoff, loff;
   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff);
-  set_contexts(xd, pd, plane_bsize, tx_size, pd->eobs[block] > 0, aoff, loff);
+  set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, aoff, loff);
 }
 
 static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -181,16 +182,15 @@
   MACROBLOCKD *xd = args->xd;
   TOKENEXTRA **tp = args->tp;
   uint8_t *token_cache = args->token_cache;
+  struct macroblock_plane *p = &cpi->mb.plane[plane];
   struct macroblockd_plane *pd = &xd->plane[plane];
   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
   int pt; /* near block/prev token context index */
   int c = 0, rc = 0;
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
-  const int eob = pd->eobs[block];
+  const int eob = p->eobs[block];
   const PLANE_TYPE type = pd->plane_type;
-  struct macroblock_plane *p = &cpi->mb.plane[plane];
   const int16_t *qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
-
   const int segment_id = mbmi->segment_id;
   const int16_t *scan, *nb;
   const scan_order *so;
@@ -226,7 +226,7 @@
       t->extra = vp9_dct_value_tokens_ptr[v].extra;
       token    = vp9_dct_value_tokens_ptr[v].token;
     } else {
-      token = DCT_EOB_TOKEN;
+      token = EOB_TOKEN;
     }
 
     t->token = token;
@@ -249,7 +249,7 @@
 }
 
 struct is_skippable_args {
-  MACROBLOCKD *xd;
+  MACROBLOCK *x;
   int *skippable;
 };
 
@@ -257,21 +257,21 @@
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                          void *argv) {
   struct is_skippable_args *args = argv;
-  args->skippable[0] &= (!args->xd->plane[plane].eobs[block]);
+  args->skippable[0] &= (!args->x->plane[plane].eobs[block]);
 }
 
-int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
+static int sb_is_skippable(MACROBLOCK *x, BLOCK_SIZE bsize) {
   int result = 1;
-  struct is_skippable_args args = {xd, &result};
-  foreach_transformed_block(xd, bsize, is_skippable, &args);
+  struct is_skippable_args args = {x, &result};
+  foreach_transformed_block(&x->e_mbd, bsize, is_skippable, &args);
   return result;
 }
 
-int vp9_is_skippable_in_plane(MACROBLOCKD *xd, BLOCK_SIZE bsize,
-                              int plane) {
+int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
   int result = 1;
-  struct is_skippable_args args = {xd, &result};
-  foreach_transformed_block_in_plane(xd, bsize, plane, is_skippable, &args);
+  struct is_skippable_args args = {x, &result};
+  foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane, is_skippable,
+                                     &args);
   return result;
 }
 
@@ -281,15 +281,15 @@
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
   TOKENEXTRA *t_backup = *t;
-  const int mb_skip_context = vp9_get_pred_context_mbskip(xd);
+  const int ctx = vp9_get_skip_context(xd);
   const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
                                               SEG_LVL_SKIP);
   struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size, cpi->mb.token_cache};
 
-  mbmi->skip_coeff = vp9_sb_is_skippable(xd, bsize);
+  mbmi->skip_coeff = sb_is_skippable(&cpi->mb, bsize);
   if (mbmi->skip_coeff) {
     if (!dry_run)
-      cm->counts.mbskip[mb_skip_context][1] += skip_inc;
+      cm->counts.mbskip[ctx][1] += skip_inc;
     reset_skip_context(xd, bsize);
     if (dry_run)
       *t = t_backup;
@@ -297,7 +297,7 @@
   }
 
   if (!dry_run) {
-    cm->counts.mbskip[mb_skip_context][0] += skip_inc;
+    cm->counts.mbskip[ctx][0] += skip_inc;
     foreach_transformed_block(xd, bsize, tokenize_b, &arg);
   } else {
     foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
index 2e3bf52..67e6c9d 100644
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -12,10 +12,14 @@
 #define VP9_ENCODER_VP9_TOKENIZE_H_
 
 #include "vp9/common/vp9_entropy.h"
+
 #include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_treewriter.h"
 
 void vp9_tokenize_initialize();
 
+#define EOSB_TOKEN 127     // Not signalled, encoder only
+
 typedef struct {
   int16_t token;
   int16_t extra;
@@ -32,9 +36,8 @@
 extern const vp9_tree_index vp9_coef_con_tree[];
 extern struct vp9_token vp9_coef_encodings[];
 
-int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE bsize);
-int vp9_is_skippable_in_plane(MACROBLOCKD *xd, BLOCK_SIZE bsize,
-                              int plane);
+int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
+
 struct VP9_COMP;
 
 void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
diff --git a/vp9/encoder/vp9_treewriter.c b/vp9/encoder/vp9_treewriter.c
index e4aed53..35e5a8f 100644
--- a/vp9/encoder/vp9_treewriter.c
+++ b/vp9/encoder/vp9_treewriter.c
@@ -36,3 +36,50 @@
   costs[-tree[0]] = vp9_cost_bit(probs[0], 0);
   cost(costs, tree, probs, 2, 0);
 }
+
+static void tree2tok(struct vp9_token *tokens, const vp9_tree_index *tree,
+                     int i, int v, int l) {
+  v += v;
+  ++l;
+
+  do {
+    const vp9_tree_index j = tree[i++];
+    if (j <= 0) {
+      tokens[-j].value = v;
+      tokens[-j].len = l;
+    } else {
+      tree2tok(tokens, tree, j, v, l);
+    }
+  } while (++v & 1);
+}
+
+void vp9_tokens_from_tree(struct vp9_token *tokens,
+                          const vp9_tree_index *tree) {
+  tree2tok(tokens, tree, 0, 0, 0);
+}
+
+static unsigned int convert_distribution(unsigned int i, vp9_tree tree,
+                                         unsigned int branch_ct[][2],
+                                         const unsigned int num_events[]) {
+  unsigned int left, right;
+
+  if (tree[i] <= 0)
+    left = num_events[-tree[i]];
+  else
+    left = convert_distribution(tree[i], tree, branch_ct, num_events);
+
+  if (tree[i + 1] <= 0)
+    right = num_events[-tree[i + 1]];
+  else
+    right = convert_distribution(tree[i + 1], tree, branch_ct, num_events);
+
+  branch_ct[i >> 1][0] = left;
+  branch_ct[i >> 1][1] = right;
+  return left + right;
+}
+
+void vp9_tree_probs_from_distribution(vp9_tree tree,
+                                      unsigned int branch_ct[/* n-1 */][2],
+                                      const unsigned int num_events[/* n */]) {
+  convert_distribution(0, tree, branch_ct, num_events);
+}
diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h
index 3245960..a2f9df1 100644
--- a/vp9/encoder/vp9_treewriter.h
+++ b/vp9/encoder/vp9_treewriter.h
@@ -44,6 +44,14 @@
   } while (len);
 }
 
+struct vp9_token {
+  int value;
+  int len;
+};
+
+
+void vp9_tokens_from_tree(struct vp9_token*, const vp9_tree_index *);
+
 static INLINE void write_token(vp9_writer *w, vp9_tree tree,
                                const vp9_prob *probs,
                                const struct vp9_token *token) {
@@ -67,4 +75,8 @@
 void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree);
 void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree);
 
+void vp9_tree_probs_from_distribution(vp9_tree tree,
+                                      unsigned int branch_ct[ /* n - 1 */ ][2],
+                                      const unsigned int num_events[ /* n */ ]);
+
 #endif  // VP9_ENCODER_VP9_TREEWRITER_H_
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index eefbd1a..01c55a4 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -17,7 +17,6 @@
 VP9_COMMON_SRCS-yes += common/vp9_convolve.c
 VP9_COMMON_SRCS-yes += common/vp9_convolve.h
 VP9_COMMON_SRCS-yes += common/vp9_debugmodes.c
-VP9_COMMON_SRCS-yes += common/vp9_default_coef_probs.h
 VP9_COMMON_SRCS-yes += common/vp9_entropy.c
 VP9_COMMON_SRCS-yes += common/vp9_entropymode.c
 VP9_COMMON_SRCS-yes += common/vp9_entropymv.c
@@ -62,7 +61,6 @@
 VP9_COMMON_SRCS-yes += common/vp9_reconinter.c
 VP9_COMMON_SRCS-yes += common/vp9_reconintra.c
 VP9_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/vp9_textblit.c
-VP9_COMMON_SRCS-yes += common/vp9_treecoder.c
 VP9_COMMON_SRCS-yes += common/vp9_common_data.c
 VP9_COMMON_SRCS-yes += common/vp9_common_data.h
 VP9_COMMON_SRCS-yes += common/vp9_scan.c
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 9a23ebd..5d53a41 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -198,6 +198,10 @@
   RANGE_CHECK(vp8_cfg, arnr_type,       1, 3);
   RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
 
+  // TODO(yaowu): remove this when ssim tuning is implemented for vp9
+  if (vp8_cfg->tuning == VP8_TUNE_SSIM)
+      ERROR("Option --tune=ssim is not currently supported in VP9.");
+
   if (cfg->g_pass == VPX_RC_LAST_PASS) {
     size_t           packet_sz = sizeof(FIRSTPASS_STATS);
     int              n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);
diff --git a/vp9_spatial_scalable_encoder.c b/vp9_spatial_scalable_encoder.c
index 9aaec82..a727f50 100644
--- a/vp9_spatial_scalable_encoder.c
+++ b/vp9_spatial_scalable_encoder.c
@@ -53,14 +53,11 @@
     ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
 static const arg_def_t quantizers_arg =
     ARG_DEF("q", "quantizers", 1, "quantizers (lowest to highest layer)");
-static const arg_def_t dummy_frame_arg =
-    ARG_DEF("z", "dummy-frame", 1, "make first frame blank and full size");
 
 static const arg_def_t *svc_args[] = {
   &encoding_mode_arg, &frames_arg,        &width_arg,       &height_arg,
   &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &layers_arg,
-  &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,  &dummy_frame_arg,
-  NULL
+  &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,  NULL
 };
 
 static const SVC_ENCODING_MODE default_encoding_mode =
@@ -74,7 +71,6 @@
 static const uint32_t default_bitrate = 1000;
 static const uint32_t default_spatial_layers = 5;
 static const uint32_t default_kf_dist = 100;
-static const int default_use_dummy_frame = 1;
 
 typedef struct {
   char *output_filename;
@@ -116,8 +112,6 @@
   svc_ctx->log_level = SVC_LOG_DEBUG;
   svc_ctx->spatial_layers = default_spatial_layers;
   svc_ctx->encoding_mode = default_encoding_mode;
-  // when using a dummy frame, that frame is only encoded to be full size
-  svc_ctx->first_frame_full_size = default_use_dummy_frame;
 
   // start with default encoder configuration
   res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
@@ -150,8 +144,6 @@
       enc_cfg->g_w = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &height_arg, argi)) {
       enc_cfg->g_h = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &height_arg, argi)) {
-      enc_cfg->g_h = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &timebase_arg, argi)) {
       enc_cfg->g_timebase = arg_parse_rational(&arg);
     } else if (arg_match(&arg, &bitrate_arg, argi)) {
@@ -167,8 +159,6 @@
       vpx_svc_set_scale_factors(svc_ctx, arg.val);
     } else if (arg_match(&arg, &quantizers_arg, argi)) {
       vpx_svc_set_quantizers(svc_ctx, arg.val);
-    } else if (arg_match(&arg, &dummy_frame_arg, argi)) {
-      svc_ctx->first_frame_full_size = arg_parse_int(&arg);
     } else {
       ++argj;
     }
@@ -195,13 +185,12 @@
       "mode: %d, layers: %d\n"
       "width %d, height: %d,\n"
       "num: %d, den: %d, bitrate: %d,\n"
-      "gop size: %d, use_dummy_frame: %d\n",
+      "gop size: %d\n",
       vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
       app_input->frames_to_skip, svc_ctx->encoding_mode,
       svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
       enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
-      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist,
-      svc_ctx->first_frame_full_size);
+      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
 }
 
 int main(int argc, const char **argv) {
@@ -246,12 +235,9 @@
   }
 
   // Encode frames
-  while (frame_cnt <= app_input.frames_to_code) {
-    if (frame_cnt == 0 && svc_ctx.first_frame_full_size) {
-      create_dummy_frame(&raw);
-    } else {
-      if (!read_yuv_frame(&app_input.input_ctx, &raw)) break;
-    }
+  while (frame_cnt < app_input.frames_to_code) {
+    if (read_yuv_frame(&app_input.input_ctx, &raw)) break;
+
     res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration,
                          VPX_DL_REALTIME);
     printf("%s", vpx_svc_get_message(&svc_ctx));
@@ -269,7 +255,7 @@
     pts += frame_duration;
   }
 
-  printf("Processed %d frames\n", frame_cnt - svc_ctx.first_frame_full_size);
+  printf("Processed %d frames\n", frame_cnt);
 
   fclose(app_input.input_ctx.file);
   if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index 57d21dc..810e881 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -584,21 +584,13 @@
   return flags;
 }
 
-/**
- * Helper to check if the current frame is the first, full resolution dummy.
- */
-static int vpx_svc_dummy_frame(SvcContext *svc_ctx) {
-  SvcInternal *const si = get_svc_internal(svc_ctx);
-  return svc_ctx->first_frame_full_size == 1 && si->encode_frame_count == 0;
-}
-
 static void calculate_enc_frame_flags(SvcContext *svc_ctx) {
   vpx_enc_frame_flags_t flags = VPX_EFLAG_FORCE_KF;
   SvcInternal *const si = get_svc_internal(svc_ctx);
   const int is_keyframe = (si->frame_within_gop == 0);
 
   // keyframe layer zero is identical for all modes
-  if ((is_keyframe && si->layer == 0) || vpx_svc_dummy_frame(svc_ctx)) {
+  if (is_keyframe && si->layer == 0) {
     si->enc_frame_flags = VPX_EFLAG_FORCE_KF;
     return;
   }
@@ -783,10 +775,9 @@
   memset(&superframe, 0, sizeof(superframe));
   svc_log_reset(svc_ctx);
 
-  si->layers = vpx_svc_dummy_frame(svc_ctx) ? 1 : svc_ctx->spatial_layers;
+  si->layers = svc_ctx->spatial_layers;
   if (si->frame_within_gop >= si->kf_dist ||
-      si->encode_frame_count == 0 ||
-      (si->encode_frame_count == 1 && svc_ctx->first_frame_full_size == 1)) {
+      si->encode_frame_count == 0) {
     si->frame_within_gop = 0;
   }
   si->is_keyframe = (si->frame_within_gop == 0);
@@ -805,12 +796,8 @@
     }
     calculate_enc_frame_flags(svc_ctx);
 
-    if (vpx_svc_dummy_frame(svc_ctx)) {
-      // do not set svc parameters, use normal encode
-      svc_log(svc_ctx, SVC_LOG_DEBUG, "encoding full size first frame\n");
-    } else {
-      set_svc_parameters(svc_ctx, codec_ctx);
-    }
+    set_svc_parameters(svc_ctx, codec_ctx);
+
     res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration,
                            si->enc_frame_flags, deadline);
     if (res != VPX_CODEC_OK) {
@@ -822,12 +809,10 @@
       switch (cx_pkt->kind) {
         case VPX_CODEC_CX_FRAME_PKT: {
           const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz);
-          if (!vpx_svc_dummy_frame(svc_ctx)) {
-            si->bytes_in_layer[si->layer] += frame_pkt_size;
-            svc_log(svc_ctx, SVC_LOG_DEBUG,
-                    "SVC frame: %d, layer: %d, size: %u\n",
-                    si->encode_frame_count, si->layer, frame_pkt_size);
-          }
+          si->bytes_in_layer[si->layer] += frame_pkt_size;
+          svc_log(svc_ctx, SVC_LOG_DEBUG,
+                  "SVC frame: %d, layer: %d, size: %u\n",
+                  si->encode_frame_count, si->layer, frame_pkt_size);
           layer_data =
               ld_create(cx_pkt->data.frame.buf, (size_t)frame_pkt_size);
           if (layer_data == NULL) {
@@ -842,15 +827,13 @@
           break;
         }
         case VPX_CODEC_PSNR_PKT: {
-          if (!vpx_svc_dummy_frame(svc_ctx)) {
-            svc_log(svc_ctx, SVC_LOG_DEBUG,
-                    "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): "
-                    "%2.3f  %2.3f  %2.3f  %2.3f \n",
-                    si->encode_frame_count, si->layer,
-                    cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1],
-                    cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]);
-            si->psnr_in_layer[si->layer] += cx_pkt->data.psnr.psnr[0];
-          }
+          svc_log(svc_ctx, SVC_LOG_DEBUG,
+                  "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): "
+                  "%2.3f  %2.3f  %2.3f  %2.3f \n",
+                  si->encode_frame_count, si->layer,
+                  cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1],
+                  cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]);
+          si->psnr_in_layer[si->layer] += cx_pkt->data.psnr.psnr[0];
           break;
         }
         default: {
@@ -860,11 +843,10 @@
     }
   }
   // add superframe index to layer data list
-  if (!vpx_svc_dummy_frame(svc_ctx)) {
-    sf_create_index(&superframe);
-    layer_data = ld_create(superframe.buffer, superframe.index_size);
-    ld_list_add(&cx_layer_list, layer_data);
-  }
+  sf_create_index(&superframe);
+  layer_data = ld_create(superframe.buffer, superframe.index_size);
+  ld_list_add(&cx_layer_list, layer_data);
+
   // get accumulated size of layer data
   si->frame_size = ld_list_get_buffer_size(cx_layer_list);
   if (si->frame_size == 0) return VPX_CODEC_ERROR;
@@ -940,7 +922,6 @@
   svc_log_reset(svc_ctx);
 
   encode_frame_count = si->encode_frame_count;
-  if (svc_ctx->first_frame_full_size) encode_frame_count--;
   if (si->encode_frame_count <= 0) return vpx_svc_get_message(svc_ctx);
 
   svc_log(svc_ctx, SVC_LOG_INFO, "\n");
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index 1f575e0..a99e48f 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -172,7 +172,7 @@
   if (!ctx || !cb)
     res = VPX_CODEC_INVALID_PARAM;
   else if (!ctx->iface || !ctx->priv
-           || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
+           || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_SLICE))
     res = VPX_CODEC_ERROR;
   else {
     ctx->priv->dec.put_slice_cb.u.put_slice = cb;
diff --git a/vpx/svc_context.h b/vpx/svc_context.h
index f4933f8..8204f9c 100644
--- a/vpx/svc_context.h
+++ b/vpx/svc_context.h
@@ -39,7 +39,6 @@
 typedef struct {
   // public interface to svc_command options
   int spatial_layers;               // number of layers
-  int first_frame_full_size;        // set to one to force first frame full size
   SVC_ENCODING_MODE encoding_mode;  // svc encoding strategy
   SVC_LOG_LEVEL log_level;  // amount of information to display
   int log_print;  // when set, printf log messages instead of returning the
diff --git a/vpx_ports/vpx_once.h b/vpx_ports/vpx_once.h
index 16a735c..6052c4d 100644
--- a/vpx_ports/vpx_once.h
+++ b/vpx_ports/vpx_once.h
@@ -7,6 +7,10 @@
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
+
+#ifndef VPX_ONCE_H
+#define VPX_ONCE_H
+
 #include "vpx_config.h"
 
 #if CONFIG_MULTITHREAD && defined(_WIN32)
@@ -95,3 +99,5 @@
     }
 }
 #endif
+
+#endif
diff --git a/vpxdec.c b/vpxdec.c
index 622a461..1b9bfd3 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -697,7 +697,7 @@
 
 
   if (arg_skip)
-    fprintf(stderr, "Skiping first %d frames.\n", arg_skip);
+    fprintf(stderr, "Skipping first %d frames.\n", arg_skip);
   while (arg_skip) {
     if (read_frame(&input, &buf, &bytes_in_buffer, &buffer_size))
       break;
diff --git a/warnings.c b/warnings.c
index 96400db..f76d706 100644
--- a/warnings.c
+++ b/warnings.c
@@ -81,7 +81,8 @@
 
 static void check_quantizer(int min_q, int max_q,
                             struct WarningList *warning_list) {
-  if (min_q == max_q || abs(max_q - min_q) < 8)
+  const int lossless = min_q == 0 && max_q == 0;
+  if (!lossless && (min_q == max_q || abs(max_q - min_q) < 8))
     add_warning(quantizer_warning_string, warning_list);
 }