Merge "Make decode modules independent of tile index"
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index edc194d..18c12a8 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -31,15 +31,15 @@
 }
 void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
                  int stride, int /*tx_type*/) {
-  vp9_idct4x4_16_add_c(out, dst, stride >> 1);
+  vp9_idct4x4_16_add_c(out, dst, stride);
 }
 void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
             int stride, int tx_type) {
-  vp9_short_fht4x4_c(in, out, stride >> 1, tx_type);
+  vp9_short_fht4x4_c(in, out, stride, tx_type);
 }
 void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
                 int stride, int tx_type) {
-  vp9_iht4x4_16_add_c(out, dst, stride >> 1, tx_type);
+  vp9_iht4x4_16_add_c(out, dst, stride, tx_type);
 }
 
 class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
@@ -78,7 +78,7 @@
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
   DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);
-  const int pitch = 8;
+  const int pitch = 4;
   int count_sign_block[16][2];
   const int count_test_block = 1000000;
 
@@ -152,7 +152,7 @@
     for (int j = 0; j < 16; ++j)
       test_input_block[j] = src[j] - dst[j];
 
-    const int pitch = 8;
+    const int pitch = 4;
     RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
 
     for (int j = 0; j < 16; ++j) {
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 728db6d..ffd7d23 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -223,7 +223,7 @@
     fwd_txfm_ = GET_PARAM(0);
     inv_txfm_ = GET_PARAM(1);
     tx_type_  = GET_PARAM(2);
-    pitch_    = 16;
+    pitch_    = 8;
     fwd_txfm_ref = fdct8x8_ref;
   }
 
@@ -234,7 +234,7 @@
     fwd_txfm_(in, out, stride);
   }
   void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride >> 1);
+    inv_txfm_(out, dst, stride);
   }
 
   fdct_t fwd_txfm_;
diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc
index 41d22dd..4fec46a 100644
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -18,7 +18,7 @@
 
 namespace {
 
-class VP9WorkerThreadTest : public ::testing::Test {
+class VP9WorkerThreadTest : public ::testing::TestWithParam<bool> {
  protected:
   virtual ~VP9WorkerThreadTest() {}
   virtual void SetUp() {
@@ -38,7 +38,7 @@
   return *reinterpret_cast<int*>(return_value);
 }
 
-TEST_F(VP9WorkerThreadTest, HookSuccess) {
+TEST_P(VP9WorkerThreadTest, HookSuccess) {
   EXPECT_TRUE(vp9_worker_sync(&worker_));  // should be a no-op.
 
   for (int i = 0; i < 2; ++i) {
@@ -50,7 +50,12 @@
     worker_.data1 = &hook_data;
     worker_.data2 = &return_value;
 
-    vp9_worker_launch(&worker_);
+    const bool synchronous = GetParam();
+    if (synchronous) {
+      vp9_worker_execute(&worker_);
+    } else {
+      vp9_worker_launch(&worker_);
+    }
     EXPECT_TRUE(vp9_worker_sync(&worker_));
     EXPECT_FALSE(worker_.had_error);
     EXPECT_EQ(5, hook_data);
@@ -59,7 +64,7 @@
   }
 }
 
-TEST_F(VP9WorkerThreadTest, HookFailure) {
+TEST_P(VP9WorkerThreadTest, HookFailure) {
   EXPECT_TRUE(vp9_worker_reset(&worker_));
 
   int hook_data = 0;
@@ -68,7 +73,12 @@
   worker_.data1 = &hook_data;
   worker_.data2 = &return_value;
 
-  vp9_worker_launch(&worker_);
+  const bool synchronous = GetParam();
+  if (synchronous) {
+    vp9_worker_execute(&worker_);
+  } else {
+    vp9_worker_launch(&worker_);
+  }
   EXPECT_FALSE(vp9_worker_sync(&worker_));
   EXPECT_TRUE(worker_.had_error);
 
@@ -106,4 +116,6 @@
   EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
 }
 
+INSTANTIATE_TEST_CASE_P(Synchronous, VP9WorkerThreadTest, ::testing::Bool());
+
 }  // namespace
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index a0f646e..1a03269 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -53,7 +53,7 @@
 typedef enum {
   KEY_FRAME = 0,
   INTER_FRAME = 1,
-  NUM_FRAME_TYPES,
+  FRAME_TYPES,
 } FRAME_TYPE;
 
 typedef enum {
@@ -205,11 +205,6 @@
 
   int up_available;
   int left_available;
-  int right_available;
-
-  // partition contexts
-  PARTITION_CONTEXT *above_seg_context;
-  PARTITION_CONTEXT *left_seg_context;
 
   /* Distance of MB away from frame edges */
   int mb_to_left_edge;
@@ -233,44 +228,7 @@
   int q_index;
 } MACROBLOCKD;
 
-static INLINE void update_partition_context(MACROBLOCKD *xd, BLOCK_SIZE sb_type,
-                                            BLOCK_SIZE sb_size) {
-  const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
-  const int bwl = b_width_log2(sb_type);
-  const int bhl = b_height_log2(sb_type);
-  const int boffset = b_width_log2(BLOCK_64X64) - bsl;
-  const char pcval0 = ~(0xe << boffset);
-  const char pcval1 = ~(0xf << boffset);
-  const char pcvalue[2] = {pcval0, pcval1};
 
-  assert(MAX(bwl, bhl) <= bsl);
-
-  // update the partition context at the end notes. set partition bits
-  // of block sizes larger than the current one to be one, and partition
-  // bits of smaller block sizes to be zero.
-  vpx_memset(xd->above_seg_context, pcvalue[bwl == bsl], bs);
-  vpx_memset(xd->left_seg_context, pcvalue[bhl == bsl], bs);
-}
-
-static INLINE int partition_plane_context(MACROBLOCKD *xd, BLOCK_SIZE sb_type) {
-  int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
-  int above = 0, left = 0, i;
-  int boffset = mi_width_log2(BLOCK_64X64) - bsl;
-
-  assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
-  assert(bsl >= 0);
-  assert(boffset >= 0);
-
-  for (i = 0; i < bs; i++)
-    above |= (xd->above_seg_context[i] & (1 << boffset));
-  for (i = 0; i < bs; i++)
-    left |= (xd->left_seg_context[i] & (1 << boffset));
-
-  above = (above > 0);
-  left  = (left > 0);
-
-  return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
-}
 
 static BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, PARTITION_TYPE partition) {
   const BLOCK_SIZE subsize = subsize_lookup[partition][bsize];
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 56e6444..3347b35 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -14,203 +14,197 @@
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_seg_common.h"
 
-const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES]
-                                  [INTRA_MODES - 1] = {
-  { 144,  11,  54, 157, 195, 130,  46,  58, 108 } /* y = dc */,
-  { 118,  15, 123, 148, 131, 101,  44,  93, 131 } /* y = v */,
-  { 113,  12,  23, 188, 226, 142,  26,  32, 125 } /* y = h */,
-  { 120,  11,  50, 123, 163, 135,  64,  77, 103 } /* y = d45 */,
-  { 113,   9,  36, 155, 111, 157,  32,  44, 161 } /* y = d135 */,
-  { 116,   9,  55, 176,  76,  96,  37,  61, 149 } /* y = d117 */,
-  { 115,   9,  28, 141, 161, 167,  21,  25, 193 } /* y = d153 */,
-  { 120,  12,  32, 145, 195, 142,  32,  38,  86 } /* y = d207 */,
-  { 116,  12,  64, 120, 140, 125,  49, 115, 121 } /* y = d63 */,
-  { 102,  19,  66, 162, 182, 122,  35,  59, 128 } /* y = tm */
-};
-
-static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS]
-                                        [INTRA_MODES - 1] = {
-  {  65,  32,  18, 144, 162, 194,  41,  51,  98 } /* block_size < 8x8 */,
-  { 132,  68,  18, 165, 217, 196,  45,  40,  78 } /* block_size < 16x16 */,
-  { 173,  80,  19, 176, 240, 193,  64,  35,  46 } /* block_size < 32x32 */,
-  { 221, 135,  38, 194, 248, 121,  96,  85,  29 } /* block_size >= 32x32 */
-};
-
-static const vp9_prob default_if_uv_probs[INTRA_MODES]
-                                         [INTRA_MODES - 1] = {
-  { 120,   7,  76, 176, 208, 126,  28,  54, 103 } /* y = dc */,
-  {  48,  12, 154, 155, 139,  90,  34, 117, 119 } /* y = v */,
-  {  67,   6,  25, 204, 243, 158,  13,  21,  96 } /* y = h */,
-  {  97,   5,  44, 131, 176, 139,  48,  68,  97 } /* y = d45 */,
-  {  83,   5,  42, 156, 111, 152,  26,  49, 152 } /* y = d135 */,
-  {  80,   5,  58, 178,  74,  83,  33,  62, 145 } /* y = d117 */,
-  {  86,   5,  32, 154, 192, 168,  14,  22, 163 } /* y = d153 */,
-  {  85,   5,  32, 156, 216, 148,  19,  29,  73 } /* y = d207 */,
-  {  77,   7,  64, 116, 132, 122,  37, 126, 120 } /* y = d63 */,
-  { 101,  21, 107, 181, 192, 103,  19,  67, 125 } /* y = tm */
-};
-
-static const vp9_prob default_partition_probs[NUM_FRAME_TYPES]
-                                             [NUM_PARTITION_CONTEXTS]
-                                             [PARTITION_TYPES - 1] = {
-  { /* frame_type = keyframe */
-    /* 8x8 -> 4x4 */
-    { 158,  97,  94 } /* a/l both not split */,
-    {  93,  24,  99 } /* a split, l not split */,
-    {  85, 119,  44 } /* l split, a not split */,
-    {  62,  59,  67 } /* a/l both split */,
-    /* 16x16 -> 8x8 */
-    { 149,  53,  53 } /* a/l both not split */,
-    {  94,  20,  48 } /* a split, l not split */,
-    {  83,  53,  24 } /* l split, a not split */,
-    {  52,  18,  18 } /* a/l both split */,
-    /* 32x32 -> 16x16 */
-    { 150,  40,  39 } /* a/l both not split */,
-    {  78,  12,  26 } /* a split, l not split */,
-    {  67,  33,  11 } /* l split, a not split */,
-    {  24,   7,   5 } /* a/l both split */,
-    /* 64x64 -> 32x32 */
-    { 174,  35,  49 } /* a/l both not split */,
-    {  68,  11,  27 } /* a split, l not split */,
-    {  57,  15,   9 } /* l split, a not split */,
-    {  12,   3,   3 } /* a/l both split */
-  }, { /* frame_type = interframe */
-    /* 8x8 -> 4x4 */
-    { 199, 122, 141 } /* a/l both not split */,
-    { 147,  63, 159 } /* a split, l not split */,
-    { 148, 133, 118 } /* l split, a not split */,
-    { 121, 104, 114 } /* a/l both split */,
-    /* 16x16 -> 8x8 */
-    { 174,  73,  87 } /* a/l both not split */,
-    {  92,  41,  83 } /* a split, l not split */,
-    {  82,  99,  50 } /* l split, a not split */,
-    {  53,  39,  39 } /* a/l both split */,
-    /* 32x32 -> 16x16 */
-    { 177,  58,  59 } /* a/l both not split */,
-    {  68,  26,  63 } /* a split, l not split */,
-    {  52,  79,  25 } /* l split, a not split */,
-    {  17,  14,  12 } /* a/l both split */,
-    /* 64x64 -> 32x32 */
-    { 222,  34,  30 } /* a/l both not split */,
-    {  72,  16,  44 } /* a split, l not split */,
-    {  58,  32,  12 } /* l split, a not split */,
-    {  10,   7,   6 } /* a/l both split */
+const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = {
+  {  // above = dc
+    { 137,  30,  42, 148, 151, 207,  70,  52,  91 },  // left = dc
+    {  92,  45, 102, 136, 116, 180,  74,  90, 100 },  // left = v
+    {  73,  32,  19, 187, 222, 215,  46,  34, 100 },  // left = h
+    {  91,  30,  32, 116, 121, 186,  93,  86,  94 },  // left = d45
+    {  72,  35,  36, 149,  68, 206,  68,  63, 105 },  // left = d135
+    {  73,  31,  28, 138,  57, 124,  55, 122, 151 },  // left = d117
+    {  67,  23,  21, 140, 126, 197,  40,  37, 171 },  // left = d153
+    {  86,  27,  28, 128, 154, 212,  45,  43,  53 },  // left = d207
+    {  74,  32,  27, 107,  86, 160,  63, 134, 102 },  // left = d63
+    {  59,  67,  44, 140, 161, 202,  78,  67, 119 }   // left = tm
+  }, {  // above = v
+    {  63,  36, 126, 146, 123, 158,  60,  90,  96 },  // left = dc
+    {  43,  46, 168, 134, 107, 128,  69, 142,  92 },  // left = v
+    {  44,  29,  68, 159, 201, 177,  50,  57,  77 },  // left = h
+    {  58,  38,  76, 114,  97, 172,  78, 133,  92 },  // left = d45
+    {  46,  41,  76, 140,  63, 184,  69, 112,  57 },  // left = d135
+    {  38,  32,  85, 140,  46, 112,  54, 151, 133 },  // left = d117
+    {  39,  27,  61, 131, 110, 175,  44,  75, 136 },  // left = d153
+    {  52,  30,  74, 113, 130, 175,  51,  64,  58 },  // left = d207
+    {  47,  35,  80, 100,  74, 143,  64, 163,  74 },  // left = d63
+    {  36,  61, 116, 114, 128, 162,  80, 125,  82 }   // left = tm
+  }, {  // above = h
+    {  82,  26,  26, 171, 208, 204,  44,  32, 105 },  // left = dc
+    {  55,  44,  68, 166, 179, 192,  57,  57, 108 },  // left = v
+    {  42,  26,  11, 199, 241, 228,  23,  15,  85 },  // left = h
+    {  68,  42,  19, 131, 160, 199,  55,  52,  83 },  // left = d45
+    {  58,  50,  25, 139, 115, 232,  39,  52, 118 },  // left = d135
+    {  50,  35,  33, 153, 104, 162,  64,  59, 131 },  // left = d117
+    {  44,  24,  16, 150, 177, 202,  33,  19, 156 },  // left = d153
+    {  55,  27,  12, 153, 203, 218,  26,  27,  49 },  // left = d207
+    {  53,  49,  21, 110, 116, 168,  59,  80,  76 },  // left = d63
+    {  38,  72,  19, 168, 203, 212,  50,  50, 107 }   // left = tm
+  }, {  // above = d45
+    { 103,  26,  36, 129, 132, 201,  83,  80,  93 },  // left = dc
+    {  59,  38,  83, 112, 103, 162,  98, 136,  90 },  // left = v
+    {  62,  30,  23, 158, 200, 207,  59,  57,  50 },  // left = h
+    {  67,  30,  29,  84,  86, 191, 102,  91,  59 },  // left = d45
+    {  60,  32,  33, 112,  71, 220,  64,  89, 104 },  // left = d135
+    {  53,  26,  34, 130,  56, 149,  84, 120, 103 },  // left = d117
+    {  53,  21,  23, 133, 109, 210,  56,  77, 172 },  // left = d153
+    {  77,  19,  29, 112, 142, 228,  55,  66,  36 },  // left = d207
+    {  61,  29,  29,  93,  97, 165,  83, 175, 162 },  // left = d63
+    {  47,  47,  43, 114, 137, 181, 100,  99,  95 }   // left = tm
+  }, {  // above = d135
+    {  69,  23,  29, 128,  83, 199,  46,  44, 101 },  // left = dc
+    {  53,  40,  55, 139,  69, 183,  61,  80, 110 },  // left = v
+    {  40,  29,  19, 161, 180, 207,  43,  24,  91 },  // left = h
+    {  60,  34,  19, 105,  61, 198,  53,  64,  89 },  // left = d45
+    {  52,  31,  22, 158,  40, 209,  58,  62,  89 },  // left = d135
+    {  44,  31,  29, 147,  46, 158,  56, 102, 198 },  // left = d117
+    {  35,  19,  12, 135,  87, 209,  41,  45, 167 },  // left = d153
+    {  55,  25,  21, 118,  95, 215,  38,  39,  66 },  // left = d207
+    {  51,  38,  25, 113,  58, 164,  70,  93,  97 },  // left = d63
+    {  47,  54,  34, 146, 108, 203,  72, 103, 151 }   // left = tm
+  }, {  // above = d117
+    {  64,  19,  37, 156,  66, 138,  49,  95, 133 },  // left = dc
+    {  46,  27,  80, 150,  55, 124,  55, 121, 135 },  // left = v
+    {  36,  23,  27, 165, 149, 166,  54,  64, 118 },  // left = h
+    {  53,  21,  36, 131,  63, 163,  60, 109,  81 },  // left = d45
+    {  40,  26,  35, 154,  40, 185,  51,  97, 123 },  // left = d135
+    {  35,  19,  34, 179,  19,  97,  48, 129, 124 },  // left = d117
+    {  36,  20,  26, 136,  62, 164,  33,  77, 154 },  // left = d153
+    {  45,  18,  32, 130,  90, 157,  40,  79,  91 },  // left = d207
+    {  45,  26,  28, 129,  45, 129,  49, 147, 123 },  // left = d63
+    {  38,  44,  51, 136,  74, 162,  57,  97, 121 }   // left = tm
+  }, {  // above = d153
+    {  75,  17,  22, 136, 138, 185,  32,  34, 166 },  // left = dc
+    {  56,  39,  58, 133, 117, 173,  48,  53, 187 },  // left = v
+    {  35,  21,  12, 161, 212, 207,  20,  23, 145 },  // left = h
+    {  56,  29,  19, 117, 109, 181,  55,  68, 112 },  // left = d45
+    {  47,  29,  17, 153,  64, 220,  59,  51, 114 },  // left = d135
+    {  46,  16,  24, 136,  76, 147,  41,  64, 172 },  // left = d117
+    {  34,  17,  11, 108, 152, 187,  13,  15, 209 },  // left = d153
+    {  51,  24,  14, 115, 133, 209,  32,  26, 104 },  // left = d207
+    {  55,  30,  18, 122,  79, 179,  44,  88, 116 },  // left = d63
+    {  37,  49,  25, 129, 168, 164,  41,  54, 148 }   // left = tm
+  }, {  // above = d207
+    {  82,  22,  32, 127, 143, 213,  39,  41,  70 },  // left = dc
+    {  62,  44,  61, 123, 105, 189,  48,  57,  64 },  // left = v
+    {  47,  25,  17, 175, 222, 220,  24,  30,  86 },  // left = h
+    {  68,  36,  17, 106, 102, 206,  59,  74,  74 },  // left = d45
+    {  57,  39,  23, 151,  68, 216,  55,  63,  58 },  // left = d135
+    {  49,  30,  35, 141,  70, 168,  82,  40, 115 },  // left = d117
+    {  51,  25,  15, 136, 129, 202,  38,  35, 139 },  // left = d153
+    {  68,  26,  16, 111, 141, 215,  29,  28,  28 },  // left = d207
+    {  59,  39,  19, 114,  75, 180,  77, 104,  42 },  // left = d63
+    {  40,  61,  26, 126, 152, 206,  61,  59,  93 }   // left = tm
+  }, {  // above = d63
+    {  78,  23,  39, 111, 117, 170,  74, 124,  94 },  // left = dc
+    {  48,  34,  86, 101,  92, 146,  78, 179, 134 },  // left = v
+    {  47,  22,  24, 138, 187, 178,  68,  69,  59 },  // left = h
+    {  56,  25,  33, 105, 112, 187,  95, 177, 129 },  // left = d45
+    {  48,  31,  27, 114,  63, 183,  82, 116,  56 },  // left = d135
+    {  43,  28,  37, 121,  63, 123,  61, 192, 169 },  // left = d117
+    {  42,  17,  24, 109,  97, 177,  56,  76, 122 },  // left = d153
+    {  58,  18,  28, 105, 139, 182,  70,  92,  63 },  // left = d207
+    {  46,  23,  32,  74,  86, 150,  67, 183,  88 },  // left = d63
+    {  36,  38,  48,  92, 122, 165,  88, 137,  91 }   // left = tm
+  }, {  // above = tm
+    {  65,  70,  60, 155, 159, 199,  61,  60,  81 },  // left = dc
+    {  44,  78, 115, 132, 119, 173,  71, 112,  93 },  // left = v
+    {  39,  38,  21, 184, 227, 206,  42,  32,  64 },  // left = h
+    {  58,  47,  36, 124, 137, 193,  80,  82,  78 },  // left = d45
+    {  49,  50,  35, 144,  95, 205,  63,  78,  59 },  // left = d135
+    {  41,  53,  52, 148,  71, 142,  65, 128,  51 },  // left = d117
+    {  40,  36,  28, 143, 143, 202,  40,  55, 137 },  // left = d153
+    {  52,  34,  29, 129, 183, 227,  42,  35,  43 },  // left = d207
+    {  42,  44,  44, 104, 105, 164,  64, 130,  80 },  // left = d63
+    {  43,  81,  53, 140, 169, 204,  68,  84,  72 }   // left = tm
   }
 };
 
-const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES]
-                                 [INTRA_MODES]
-                                 [INTRA_MODES - 1] = {
-  { /* above = dc */
-    { 137,  30,  42, 148, 151, 207,  70,  52,  91 } /* left = dc */,
-    {  92,  45, 102, 136, 116, 180,  74,  90, 100 } /* left = v */,
-    {  73,  32,  19, 187, 222, 215,  46,  34, 100 } /* left = h */,
-    {  91,  30,  32, 116, 121, 186,  93,  86,  94 } /* left = d45 */,
-    {  72,  35,  36, 149,  68, 206,  68,  63, 105 } /* left = d135 */,
-    {  73,  31,  28, 138,  57, 124,  55, 122, 151 } /* left = d117 */,
-    {  67,  23,  21, 140, 126, 197,  40,  37, 171 } /* left = d153 */,
-    {  86,  27,  28, 128, 154, 212,  45,  43,  53 } /* left = d207 */,
-    {  74,  32,  27, 107,  86, 160,  63, 134, 102 } /* left = d63 */,
-    {  59,  67,  44, 140, 161, 202,  78,  67, 119 } /* left = tm */
-  }, { /* above = v */
-    {  63,  36, 126, 146, 123, 158,  60,  90,  96 } /* left = dc */,
-    {  43,  46, 168, 134, 107, 128,  69, 142,  92 } /* left = v */,
-    {  44,  29,  68, 159, 201, 177,  50,  57,  77 } /* left = h */,
-    {  58,  38,  76, 114,  97, 172,  78, 133,  92 } /* left = d45 */,
-    {  46,  41,  76, 140,  63, 184,  69, 112,  57 } /* left = d135 */,
-    {  38,  32,  85, 140,  46, 112,  54, 151, 133 } /* left = d117 */,
-    {  39,  27,  61, 131, 110, 175,  44,  75, 136 } /* left = d153 */,
-    {  52,  30,  74, 113, 130, 175,  51,  64,  58 } /* left = d207 */,
-    {  47,  35,  80, 100,  74, 143,  64, 163,  74 } /* left = d63 */,
-    {  36,  61, 116, 114, 128, 162,  80, 125,  82 } /* left = tm */
-  }, { /* above = h */
-    {  82,  26,  26, 171, 208, 204,  44,  32, 105 } /* left = dc */,
-    {  55,  44,  68, 166, 179, 192,  57,  57, 108 } /* left = v */,
-    {  42,  26,  11, 199, 241, 228,  23,  15,  85 } /* left = h */,
-    {  68,  42,  19, 131, 160, 199,  55,  52,  83 } /* left = d45 */,
-    {  58,  50,  25, 139, 115, 232,  39,  52, 118 } /* left = d135 */,
-    {  50,  35,  33, 153, 104, 162,  64,  59, 131 } /* left = d117 */,
-    {  44,  24,  16, 150, 177, 202,  33,  19, 156 } /* left = d153 */,
-    {  55,  27,  12, 153, 203, 218,  26,  27,  49 } /* left = d207 */,
-    {  53,  49,  21, 110, 116, 168,  59,  80,  76 } /* left = d63 */,
-    {  38,  72,  19, 168, 203, 212,  50,  50, 107 } /* left = tm */
-  }, { /* above = d45 */
-    { 103,  26,  36, 129, 132, 201,  83,  80,  93 } /* left = dc */,
-    {  59,  38,  83, 112, 103, 162,  98, 136,  90 } /* left = v */,
-    {  62,  30,  23, 158, 200, 207,  59,  57,  50 } /* left = h */,
-    {  67,  30,  29,  84,  86, 191, 102,  91,  59 } /* left = d45 */,
-    {  60,  32,  33, 112,  71, 220,  64,  89, 104 } /* left = d135 */,
-    {  53,  26,  34, 130,  56, 149,  84, 120, 103 } /* left = d117 */,
-    {  53,  21,  23, 133, 109, 210,  56,  77, 172 } /* left = d153 */,
-    {  77,  19,  29, 112, 142, 228,  55,  66,  36 } /* left = d207 */,
-    {  61,  29,  29,  93,  97, 165,  83, 175, 162 } /* left = d63 */,
-    {  47,  47,  43, 114, 137, 181, 100,  99,  95 } /* left = tm */
-  }, { /* above = d135 */
-    {  69,  23,  29, 128,  83, 199,  46,  44, 101 } /* left = dc */,
-    {  53,  40,  55, 139,  69, 183,  61,  80, 110 } /* left = v */,
-    {  40,  29,  19, 161, 180, 207,  43,  24,  91 } /* left = h */,
-    {  60,  34,  19, 105,  61, 198,  53,  64,  89 } /* left = d45 */,
-    {  52,  31,  22, 158,  40, 209,  58,  62,  89 } /* left = d135 */,
-    {  44,  31,  29, 147,  46, 158,  56, 102, 198 } /* left = d117 */,
-    {  35,  19,  12, 135,  87, 209,  41,  45, 167 } /* left = d153 */,
-    {  55,  25,  21, 118,  95, 215,  38,  39,  66 } /* left = d207 */,
-    {  51,  38,  25, 113,  58, 164,  70,  93,  97 } /* left = d63 */,
-    {  47,  54,  34, 146, 108, 203,  72, 103, 151 } /* left = tm */
-  }, { /* above = d117 */
-    {  64,  19,  37, 156,  66, 138,  49,  95, 133 } /* left = dc */,
-    {  46,  27,  80, 150,  55, 124,  55, 121, 135 } /* left = v */,
-    {  36,  23,  27, 165, 149, 166,  54,  64, 118 } /* left = h */,
-    {  53,  21,  36, 131,  63, 163,  60, 109,  81 } /* left = d45 */,
-    {  40,  26,  35, 154,  40, 185,  51,  97, 123 } /* left = d135 */,
-    {  35,  19,  34, 179,  19,  97,  48, 129, 124 } /* left = d117 */,
-    {  36,  20,  26, 136,  62, 164,  33,  77, 154 } /* left = d153 */,
-    {  45,  18,  32, 130,  90, 157,  40,  79,  91 } /* left = d207 */,
-    {  45,  26,  28, 129,  45, 129,  49, 147, 123 } /* left = d63 */,
-    {  38,  44,  51, 136,  74, 162,  57,  97, 121 } /* left = tm */
-  }, { /* above = d153 */
-    {  75,  17,  22, 136, 138, 185,  32,  34, 166 } /* left = dc */,
-    {  56,  39,  58, 133, 117, 173,  48,  53, 187 } /* left = v */,
-    {  35,  21,  12, 161, 212, 207,  20,  23, 145 } /* left = h */,
-    {  56,  29,  19, 117, 109, 181,  55,  68, 112 } /* left = d45 */,
-    {  47,  29,  17, 153,  64, 220,  59,  51, 114 } /* left = d135 */,
-    {  46,  16,  24, 136,  76, 147,  41,  64, 172 } /* left = d117 */,
-    {  34,  17,  11, 108, 152, 187,  13,  15, 209 } /* left = d153 */,
-    {  51,  24,  14, 115, 133, 209,  32,  26, 104 } /* left = d207 */,
-    {  55,  30,  18, 122,  79, 179,  44,  88, 116 } /* left = d63 */,
-    {  37,  49,  25, 129, 168, 164,  41,  54, 148 } /* left = tm */
-  }, { /* above = d207 */
-    {  82,  22,  32, 127, 143, 213,  39,  41,  70 } /* left = dc */,
-    {  62,  44,  61, 123, 105, 189,  48,  57,  64 } /* left = v */,
-    {  47,  25,  17, 175, 222, 220,  24,  30,  86 } /* left = h */,
-    {  68,  36,  17, 106, 102, 206,  59,  74,  74 } /* left = d45 */,
-    {  57,  39,  23, 151,  68, 216,  55,  63,  58 } /* left = d135 */,
-    {  49,  30,  35, 141,  70, 168,  82,  40, 115 } /* left = d117 */,
-    {  51,  25,  15, 136, 129, 202,  38,  35, 139 } /* left = d153 */,
-    {  68,  26,  16, 111, 141, 215,  29,  28,  28 } /* left = d207 */,
-    {  59,  39,  19, 114,  75, 180,  77, 104,  42 } /* left = d63 */,
-    {  40,  61,  26, 126, 152, 206,  61,  59,  93 } /* left = tm */
-  }, { /* above = d63 */
-    {  78,  23,  39, 111, 117, 170,  74, 124,  94 } /* left = dc */,
-    {  48,  34,  86, 101,  92, 146,  78, 179, 134 } /* left = v */,
-    {  47,  22,  24, 138, 187, 178,  68,  69,  59 } /* left = h */,
-    {  56,  25,  33, 105, 112, 187,  95, 177, 129 } /* left = d45 */,
-    {  48,  31,  27, 114,  63, 183,  82, 116,  56 } /* left = d135 */,
-    {  43,  28,  37, 121,  63, 123,  61, 192, 169 } /* left = d117 */,
-    {  42,  17,  24, 109,  97, 177,  56,  76, 122 } /* left = d153 */,
-    {  58,  18,  28, 105, 139, 182,  70,  92,  63 } /* left = d207 */,
-    {  46,  23,  32,  74,  86, 150,  67, 183,  88 } /* left = d63 */,
-    {  36,  38,  48,  92, 122, 165,  88, 137,  91 } /* left = tm */
-  }, { /* above = tm */
-    {  65,  70,  60, 155, 159, 199,  61,  60,  81 } /* left = dc */,
-    {  44,  78, 115, 132, 119, 173,  71, 112,  93 } /* left = v */,
-    {  39,  38,  21, 184, 227, 206,  42,  32,  64 } /* left = h */,
-    {  58,  47,  36, 124, 137, 193,  80,  82,  78 } /* left = d45 */,
-    {  49,  50,  35, 144,  95, 205,  63,  78,  59 } /* left = d135 */,
-    {  41,  53,  52, 148,  71, 142,  65, 128,  51 } /* left = d117 */,
-    {  40,  36,  28, 143, 143, 202,  40,  55, 137 } /* left = d153 */,
-    {  52,  34,  29, 129, 183, 227,  42,  35,  43 } /* left = d207 */,
-    {  42,  44,  44, 104, 105, 164,  64, 130,  80 } /* left = d63 */,
-    {  43,  81,  53, 140, 169, 204,  68,  84,  72 } /* left = tm */
+const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = {
+  { 144,  11,  54, 157, 195, 130,  46,  58, 108 },  // y = dc
+  { 118,  15, 123, 148, 131, 101,  44,  93, 131 },  // y = v
+  { 113,  12,  23, 188, 226, 142,  26,  32, 125 },  // y = h
+  { 120,  11,  50, 123, 163, 135,  64,  77, 103 },  // y = d45
+  { 113,   9,  36, 155, 111, 157,  32,  44, 161 },  // y = d135
+  { 116,   9,  55, 176,  76,  96,  37,  61, 149 },  // y = d117
+  { 115,   9,  28, 141, 161, 167,  21,  25, 193 },  // y = d153
+  { 120,  12,  32, 145, 195, 142,  32,  38,  86 },  // y = d207
+  { 116,  12,  64, 120, 140, 125,  49, 115, 121 },  // y = d63
+  { 102,  19,  66, 162, 182, 122,  35,  59, 128 }   // y = tm
+};
+
+static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = {
+  {  65,  32,  18, 144, 162, 194,  41,  51,  98 },  // block_size < 8x8
+  { 132,  68,  18, 165, 217, 196,  45,  40,  78 },  // block_size < 16x16
+  { 173,  80,  19, 176, 240, 193,  64,  35,  46 },  // block_size < 32x32
+  { 221, 135,  38, 194, 248, 121,  96,  85,  29 }   // block_size >= 32x32
+};
+
+static const vp9_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = {
+  { 120,   7,  76, 176, 208, 126,  28,  54, 103 },  // y = dc
+  {  48,  12, 154, 155, 139,  90,  34, 117, 119 },  // y = v
+  {  67,   6,  25, 204, 243, 158,  13,  21,  96 },  // y = h
+  {  97,   5,  44, 131, 176, 139,  48,  68,  97 },  // y = d45
+  {  83,   5,  42, 156, 111, 152,  26,  49, 152 },  // y = d135
+  {  80,   5,  58, 178,  74,  83,  33,  62, 145 },  // y = d117
+  {  86,   5,  32, 154, 192, 168,  14,  22, 163 },  // y = d153
+  {  85,   5,  32, 156, 216, 148,  19,  29,  73 },  // y = d207
+  {  77,   7,  64, 116, 132, 122,  37, 126, 120 },  // y = d63
+  { 101,  21, 107, 181, 192, 103,  19,  67, 125 }   // y = tm
+};
+
+static const vp9_prob default_partition_probs[FRAME_TYPES][PARTITION_CONTEXTS]
+                                             [PARTITION_TYPES - 1] = {
+  {  // frame_type = keyframe
+    // 8x8 -> 4x4
+    { 158,  97,  94 },  // a/l both not split
+    {  93,  24,  99 },  // a split, l not split
+    {  85, 119,  44 },  // l split, a not split
+    {  62,  59,  67 },  // a/l both split
+    // 16x16 -> 8x8
+    { 149,  53,  53 },  // a/l both not split
+    {  94,  20,  48 },  // a split, l not split
+    {  83,  53,  24 },  // l split, a not split
+    {  52,  18,  18 },  // a/l both split
+    // 32x32 -> 16x16
+    { 150,  40,  39 },  // a/l both not split
+    {  78,  12,  26 },  // a split, l not split
+    {  67,  33,  11 },  // l split, a not split
+    {  24,   7,   5 },  // a/l both split
+    // 64x64 -> 32x32
+    { 174,  35,  49 },  // a/l both not split
+    {  68,  11,  27 },  // a split, l not split
+    {  57,  15,   9 },  // l split, a not split
+    {  12,   3,   3 },  // a/l both split
+  }, {  // frame_type = interframe
+    // 8x8 -> 4x4
+    { 199, 122, 141 },  // a/l both not split
+    { 147,  63, 159 },  // a split, l not split
+    { 148, 133, 118 },  // l split, a not split
+    { 121, 104, 114 },  // a/l both split
+    // 16x16 -> 8x8
+    { 174,  73,  87 },  // a/l both not split
+    {  92,  41,  83 },  // a split, l not split
+    {  82,  99,  50 },  // l split, a not split
+    {  53,  39,  39 },  // a/l both split
+    // 32x32 -> 16x16
+    { 177,  58,  59 },  // a/l both not split
+    {  68,  26,  63 },  // a split, l not split
+    {  52,  79,  25 },  // l split, a not split
+    {  17,  14,  12 },  // a/l both split
+    // 64x64 -> 32x32
+    { 222,  34,  30 },  // a/l both not split
+    {  72,  16,  44 },  // a split, l not split
+    {  58,  32,  12 },  // l split, a not split
+    {  10,   7,   6 },  // a/l both split
   }
 };
 
@@ -415,7 +409,7 @@
                       counts->uv_mode[i], pre_fc->uv_mode_prob[i],
                       fc->uv_mode_prob[i], 0);
 
-  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
+  for (i = 0; i < PARTITION_CONTEXTS; i++)
     update_mode_probs(PARTITION_TYPES, vp9_partition_tree,
                       counts->partition[i],
                       pre_fc->partition_prob[INTER_FRAME][i],
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 1bf0742..768ff2c 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -50,7 +50,7 @@
 } PARTITION_TYPE;
 
 #define PARTITION_PLOFFSET   4  // number of probability models per block size
-#define NUM_PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
+#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
 
 typedef enum {
   TX_4X4 = 0,                      // 4x4 dct transform
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 5fc180b..f2244e5 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -40,8 +40,7 @@
 typedef struct frame_contexts {
   vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
   vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
-  vp9_prob partition_prob[NUM_FRAME_TYPES][NUM_PARTITION_CONTEXTS]
-                         [PARTITION_TYPES - 1];
+  vp9_prob partition_prob[FRAME_TYPES][PARTITION_CONTEXTS][PARTITION_TYPES - 1];
   vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
   vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1]
                                  [SWITCHABLE_FILTERS - 1];
@@ -58,7 +57,7 @@
 typedef struct {
   unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
   unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
-  unsigned int partition[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
+  unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
   vp9_coeff_count_model coef[TX_SIZES][BLOCK_TYPES];
   unsigned int eob_branch[TX_SIZES][BLOCK_TYPES][REF_TYPES]
                          [COEF_BANDS][PREV_COEF_CONTEXTS];
@@ -253,12 +252,6 @@
   }
 }
 
-static INLINE void set_partition_seg_context(VP9_COMMON *cm, MACROBLOCKD *xd,
-                                             int mi_row, int mi_col) {
-  xd->above_seg_context = cm->above_seg_context + mi_col;
-  xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
-}
-
 // return the node index in the prob tree for binary coding
 static int check_bsize_coverage(int bs, int mi_rows, int mi_cols,
                                 int mi_row, int mi_col) {
@@ -288,7 +281,6 @@
   // Are edges available for intra prediction?
   xd->up_available    = (mi_row != 0);
   xd->left_available  = (mi_col > cm->cur_tile_mi_col_start);
-  xd->right_available = (mi_col + bw < cm->cur_tile_mi_col_end);
 }
 
 static void set_prev_mi(VP9_COMMON *cm) {
@@ -307,4 +299,53 @@
   return cm->frame_type == KEY_FRAME || cm->intra_only;
 }
 
+static INLINE void update_partition_context(VP9_COMMON *cm,
+                                            int mi_row, int mi_col,
+                                            BLOCK_SIZE sb_type,
+                                            BLOCK_SIZE sb_size) {
+  PARTITION_CONTEXT *above_ctx = cm->above_seg_context + mi_col;
+  PARTITION_CONTEXT *left_ctx = cm->left_seg_context + (mi_row & MI_MASK);
+
+  const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
+  const int bwl = b_width_log2(sb_type);
+  const int bhl = b_height_log2(sb_type);
+  const int boffset = b_width_log2(BLOCK_64X64) - bsl;
+  const char pcval0 = ~(0xe << boffset);
+  const char pcval1 = ~(0xf << boffset);
+  const char pcvalue[2] = {pcval0, pcval1};
+
+  assert(MAX(bwl, bhl) <= bsl);
+
+  // update the partition context at the end notes. set partition bits
+  // of block sizes larger than the current one to be one, and partition
+  // bits of smaller block sizes to be zero.
+  vpx_memset(above_ctx, pcvalue[bwl == bsl], bs);
+  vpx_memset(left_ctx, pcvalue[bhl == bsl], bs);
+}
+
+static INLINE int partition_plane_context(const VP9_COMMON *cm,
+                                          int mi_row, int mi_col,
+                                          BLOCK_SIZE sb_type) {
+  const PARTITION_CONTEXT *above_ctx = cm->above_seg_context + mi_col;
+  const PARTITION_CONTEXT *left_ctx = cm->left_seg_context + (mi_row & MI_MASK);
+
+  int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
+  int above = 0, left = 0, i;
+  int boffset = mi_width_log2(BLOCK_64X64) - bsl;
+
+  assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
+  assert(bsl >= 0);
+  assert(boffset >= 0);
+
+  for (i = 0; i < bs; i++)
+    above |= (above_ctx[i] & (1 << boffset));
+  for (i = 0; i < bs; i++)
+    left |= (left_ctx[i] & (1 << boffset));
+
+  above = (above > 0);
+  left  = (left > 0);
+
+  return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
+}
+
 #endif  // VP9_COMMON_VP9_ONYXC_INT_H_
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 6fa9e22..df92b58 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -695,10 +695,10 @@
 prototype void vp9_short_fht16x16 "int16_t *InputData, int16_t *OutputData, int pitch, int tx_type"
 specialize vp9_short_fht16x16 sse2
 
-prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int pitch"
+prototype void vp9_short_fdct8x8 "int16_t *InputData, int16_t *OutputData, int stride"
 specialize vp9_short_fdct8x8 sse2
 
-prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
+prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int stride"
 specialize vp9_short_fdct4x4 sse2
 
 prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int stride"
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 1674e67..33793ee 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -23,7 +23,6 @@
 #include "vp9/decoder/vp9_decodemv.h"
 #include "vp9/decoder/vp9_decodframe.h"
 #include "vp9/decoder/vp9_onyxd_int.h"
-#include "vp9/decoder/vp9_dsubexp.h"
 #include "vp9/decoder/vp9_treereader.h"
 
 static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
@@ -260,51 +259,6 @@
   mv->col = ref->col + diff.col;
 }
 
-static void update_mv(vp9_reader *r, vp9_prob *p) {
-  if (vp9_read(r, NMV_UPDATE_PROB))
-    *p = (vp9_read_literal(r, 7) << 1) | 1;
-}
-
-static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) {
-  int i, j, k;
-
-  for (j = 0; j < MV_JOINTS - 1; ++j)
-    update_mv(r, &mvc->joints[j]);
-
-  for (i = 0; i < 2; ++i) {
-    nmv_component *const comp = &mvc->comps[i];
-
-    update_mv(r, &comp->sign);
-
-    for (j = 0; j < MV_CLASSES - 1; ++j)
-      update_mv(r, &comp->classes[j]);
-
-    for (j = 0; j < CLASS0_SIZE - 1; ++j)
-      update_mv(r, &comp->class0[j]);
-
-    for (j = 0; j < MV_OFFSET_BITS; ++j)
-      update_mv(r, &comp->bits[j]);
-  }
-
-  for (i = 0; i < 2; ++i) {
-    nmv_component *const comp = &mvc->comps[i];
-
-    for (j = 0; j < CLASS0_SIZE; ++j)
-      for (k = 0; k < 3; ++k)
-        update_mv(r, &comp->class0_fp[j][k]);
-
-    for (j = 0; j < 3; ++j)
-      update_mv(r, &comp->fp[j]);
-  }
-
-  if (allow_hp) {
-    for (i = 0; i < 2; ++i) {
-      update_mv(r, &mvc->comps[i].class0_hp);
-      update_mv(r, &mvc->comps[i].hp);
-    }
-  }
-}
-
 // Read the referncence frame
 static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                             vp9_reader *r,
@@ -356,26 +310,6 @@
   }
 }
 
-static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
-  int i, j;
-  for (j = 0; j < SWITCHABLE_FILTERS + 1; ++j)
-    for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
-      vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]);
-}
-
-static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
-  int i, j;
-  for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
-    for (j = 0; j < INTER_MODES - 1; ++j)
-      vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
-}
-
-static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) {
-  COMPPREDMODE_TYPE mode = vp9_read_bit(r);
-  if (mode)
-    mode += vp9_read_bit(r);
-  return mode;
-}
 
 static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type(
     VP9_COMMON *const cm, MACROBLOCKD *const xd, vp9_reader *r) {
@@ -608,61 +542,6 @@
     read_intra_block_mode_info(cm, mi, r);
 }
 
-static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
-  int i;
-
-  cm->comp_pred_mode = cm->allow_comp_inter_inter ? read_comp_pred_mode(r)
-                                                  : SINGLE_PREDICTION_ONLY;
-
-  if (cm->comp_pred_mode == HYBRID_PREDICTION)
-    for (i = 0; i < COMP_INTER_CONTEXTS; i++)
-      vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
-
-  if (cm->comp_pred_mode != COMP_PREDICTION_ONLY)
-    for (i = 0; i < REF_CONTEXTS; i++) {
-      vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
-      vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
-    }
-
-  if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
-    for (i = 0; i < REF_CONTEXTS; i++)
-      vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
-}
-
-void vp9_prepare_read_mode_info(VP9_COMMON *cm, vp9_reader *r) {
-  int k;
-
-  // TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove.
-  // vpx_memset(cm->fc.mbskip_probs, 0, sizeof(cm->fc.mbskip_probs));
-  for (k = 0; k < MBSKIP_CONTEXTS; ++k)
-    vp9_diff_update_prob(r, &cm->fc.mbskip_probs[k]);
-
-  if (!frame_is_intra_only(cm)) {
-    nmv_context *const nmvc = &cm->fc.nmvc;
-    int i, j;
-
-    read_inter_mode_probs(&cm->fc, r);
-
-    if (cm->mcomp_filter_type == SWITCHABLE)
-      read_switchable_interp_probs(&cm->fc, r);
-
-    for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
-      vp9_diff_update_prob(r, &cm->fc.intra_inter_prob[i]);
-
-    read_comp_pred(cm, r);
-
-    for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
-      for (i = 0; i < INTRA_MODES - 1; ++i)
-        vp9_diff_update_prob(r, &cm->fc.y_mode_prob[j][i]);
-
-    for (j = 0; j < NUM_PARTITION_CONTEXTS; ++j)
-      for (i = 0; i < PARTITION_TYPES - 1; ++i)
-        vp9_diff_update_prob(r, &cm->fc.partition_prob[INTER_FRAME][j][i]);
-
-    read_mv_probs(r, nmvc, cm->allow_high_precision_mv);
-  }
-}
-
 void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd,
                         int mi_row, int mi_col, vp9_reader *r) {
   MODE_INFO *const mi = xd->mi_8x8[0];
diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h
index 981e8fe..cec99f2 100644
--- a/vp9/decoder/vp9_decodemv.h
+++ b/vp9/decoder/vp9_decodemv.h
@@ -14,8 +14,6 @@
 #include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/decoder/vp9_dboolhuff.h"
 
-void vp9_prepare_read_mode_info(VP9_COMMON* cm, vp9_reader *r);
-
 void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd,
                         int mi_row, int mi_col, vp9_reader *r);
 
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index b8b63fe..3ee8ba4 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -74,6 +74,93 @@
       vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]);
 }
 
+static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
+  int i, j;
+  for (j = 0; j < SWITCHABLE_FILTERS + 1; ++j)
+    for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
+      vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]);
+}
+
+static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
+  int i, j;
+  for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+    for (j = 0; j < INTER_MODES - 1; ++j)
+      vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
+}
+
+static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) {
+  COMPPREDMODE_TYPE mode = vp9_read_bit(r);
+  if (mode)
+    mode += vp9_read_bit(r);
+  return mode;
+}
+
+static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) {
+  int i;
+
+  cm->comp_pred_mode = cm->allow_comp_inter_inter ? read_comp_pred_mode(r)
+                                                  : SINGLE_PREDICTION_ONLY;
+
+  if (cm->comp_pred_mode == HYBRID_PREDICTION)
+    for (i = 0; i < COMP_INTER_CONTEXTS; i++)
+      vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]);
+
+  if (cm->comp_pred_mode != COMP_PREDICTION_ONLY)
+    for (i = 0; i < REF_CONTEXTS; i++) {
+      vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]);
+      vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]);
+    }
+
+  if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY)
+    for (i = 0; i < REF_CONTEXTS; i++)
+      vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
+}
+
+static void update_mv(vp9_reader *r, vp9_prob *p) {
+  if (vp9_read(r, NMV_UPDATE_PROB))
+    *p = (vp9_read_literal(r, 7) << 1) | 1;
+}
+
+static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) {
+  int i, j, k;
+
+  for (j = 0; j < MV_JOINTS - 1; ++j)
+    update_mv(r, &mvc->joints[j]);
+
+  for (i = 0; i < 2; ++i) {
+    nmv_component *const comp = &mvc->comps[i];
+
+    update_mv(r, &comp->sign);
+
+    for (j = 0; j < MV_CLASSES - 1; ++j)
+      update_mv(r, &comp->classes[j]);
+
+    for (j = 0; j < CLASS0_SIZE - 1; ++j)
+      update_mv(r, &comp->class0[j]);
+
+    for (j = 0; j < MV_OFFSET_BITS; ++j)
+      update_mv(r, &comp->bits[j]);
+  }
+
+  for (i = 0; i < 2; ++i) {
+    nmv_component *const comp = &mvc->comps[i];
+
+    for (j = 0; j < CLASS0_SIZE; ++j)
+      for (k = 0; k < 3; ++k)
+        update_mv(r, &comp->class0_fp[j][k]);
+
+    for (j = 0; j < 3; ++j)
+      update_mv(r, &comp->fp[j]);
+  }
+
+  if (allow_hp) {
+    for (i = 0; i < 2; ++i) {
+      update_mv(r, &mvc->comps[i].class0_hp);
+      update_mv(r, &mvc->comps[i].hp);
+    }
+  }
+}
+
 static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
   int i;
   xd->plane[0].dequant = cm->y_dequant[q_index];
@@ -195,7 +282,6 @@
   xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL;
 
   set_skip_context(cm, xd, mi_row, mi_col);
-  set_partition_seg_context(cm, xd, mi_row, mi_col);
 
   // Distance of Mb to the various image edges. These are specified to 8th pel
   // as they are always compared to values that are in 1/8th pel units
@@ -271,7 +357,6 @@
 static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col,
                             vp9_reader* r, BLOCK_SIZE bsize, int index) {
   VP9_COMMON *const cm = &pbi->common;
-  MACROBLOCKD *const xd = &pbi->mb;
   const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
   PARTITION_TYPE partition = PARTITION_NONE;
   BLOCK_SIZE subsize;
@@ -286,8 +371,7 @@
     int pl;
     const int idx = check_bsize_coverage(hbs, cm->mi_rows, cm->mi_cols,
                                          mi_row, mi_col);
-    set_partition_seg_context(cm, xd, mi_row, mi_col);
-    pl = partition_plane_context(xd, bsize);
+    pl = partition_plane_context(cm, mi_row, mi_col, bsize);
 
     if (idx == 0)
       partition = treed_read(r, vp9_partition_tree,
@@ -332,10 +416,8 @@
 
   // update partition context
   if (bsize >= BLOCK_8X8 &&
-      (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) {
-    set_partition_seg_context(cm, xd, mi_row, mi_col);
-    update_partition_context(xd, subsize, bsize);
-  }
+      (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
+    update_partition_context(cm, mi_row, mi_col, subsize, bsize);
 }
 
 static void setup_token_decoder(const uint8_t *data,
@@ -588,14 +670,12 @@
   xd->mi_stream = pbi->mi_streams[tile_col];
 
   if (pbi->do_loopfilter_inline) {
-    if (num_threads > 1) {
-      LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
-      lf_data->frame_buffer = fb;
-      lf_data->cm = cm;
-      lf_data->xd = pbi->mb;
-      lf_data->stop = 0;
-      lf_data->y_only = 0;
-    }
+    LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+    lf_data->frame_buffer = fb;
+    lf_data->cm = cm;
+    lf_data->xd = pbi->mb;
+    lf_data->stop = 0;
+    lf_data->y_only = 0;
     vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
   }
 
@@ -609,39 +689,33 @@
       decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_64X64, 0);
 
     if (pbi->do_loopfilter_inline) {
-      // delay the loopfilter by 1 macroblock row.
       const int lf_start = mi_row - MI_BLOCK_SIZE;
+      LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+
+      // delay the loopfilter by 1 macroblock row.
       if (lf_start < 0) continue;
 
+      // decoding has completed: finish up the loop filter in this thread.
+      if (mi_row + MI_BLOCK_SIZE >= cm->cur_tile_mi_row_end) continue;
+
+      vp9_worker_sync(&pbi->lf_worker);
+      lf_data->start = lf_start;
+      lf_data->stop = mi_row;
       if (num_threads > 1) {
-        LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
-
-        // decoding has completed: finish up the loop filter in this thread.
-        if (mi_row + MI_BLOCK_SIZE >= cm->cur_tile_mi_row_end) continue;
-
-        vp9_worker_sync(&pbi->lf_worker);
-        lf_data->start = lf_start;
-        lf_data->stop = mi_row;
-        pbi->lf_worker.hook = vp9_loop_filter_worker;
         vp9_worker_launch(&pbi->lf_worker);
       } else {
-        vp9_loop_filter_rows(fb, cm, &pbi->mb, lf_start, mi_row, 0);
+        vp9_worker_execute(&pbi->lf_worker);
       }
     }
   }
 
   if (pbi->do_loopfilter_inline) {
-    int lf_start;
-    if (num_threads > 1) {
-      LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
+    LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
 
-      vp9_worker_sync(&pbi->lf_worker);
-      lf_start = lf_data->stop;
-    } else {
-      lf_start = mi_row - MI_BLOCK_SIZE;
-    }
-    vp9_loop_filter_rows(fb, cm, &pbi->mb,
-                         lf_start, cm->mi_rows, 0);
+    vp9_worker_sync(&pbi->lf_worker);
+    lf_data->start = lf_data->stop;
+    lf_data->stop = cm->mi_rows;
+    vp9_worker_execute(&pbi->lf_worker);
   }
 }
 
@@ -913,7 +987,9 @@
                                   size_t partition_size) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
+  FRAME_CONTEXT *const fc = &cm->fc;
   vp9_reader r;
+  int k;
 
   if (vp9_reader_init(&r, data, partition_size))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
@@ -921,10 +997,36 @@
 
   cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r);
   if (cm->tx_mode == TX_MODE_SELECT)
-    read_tx_probs(&cm->fc.tx_probs, &r);
-  read_coef_probs(&cm->fc, cm->tx_mode, &r);
+    read_tx_probs(&fc->tx_probs, &r);
+  read_coef_probs(fc, cm->tx_mode, &r);
 
-  vp9_prepare_read_mode_info(cm, &r);
+  for (k = 0; k < MBSKIP_CONTEXTS; ++k)
+    vp9_diff_update_prob(&r, &fc->mbskip_probs[k]);
+
+  if (!frame_is_intra_only(cm)) {
+    nmv_context *const nmvc = &fc->nmvc;
+    int i, j;
+
+    read_inter_mode_probs(fc, &r);
+
+    if (cm->mcomp_filter_type == SWITCHABLE)
+      read_switchable_interp_probs(fc, &r);
+
+    for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
+      vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]);
+
+    read_comp_pred(cm, &r);
+
+    for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
+      for (i = 0; i < INTRA_MODES - 1; ++i)
+        vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
+
+    for (j = 0; j < PARTITION_CONTEXTS; ++j)
+      for (i = 0; i < PARTITION_TYPES - 1; ++i)
+        vp9_diff_update_prob(&r, &fc->partition_prob[INTER_FRAME][j][i]);
+
+    read_mv_probs(&r, nmvc, cm->allow_high_precision_mv);
+  }
 
   return vp9_reader_has_error(&r);
 }
@@ -941,6 +1043,44 @@
   }
 }
 
+#ifdef NDEBUG
+#define debug_check_frame_counts(cm) (void)0
+#else  // !NDEBUG
+// Counts should only be incremented when frame_parallel_decoding_mode and
+// error_resilient_mode are disabled.
+static void debug_check_frame_counts(const VP9_COMMON *const cm) {
+  FRAME_COUNTS zero_counts;
+  vp9_zero(zero_counts);
+  assert(cm->frame_parallel_decoding_mode || cm->error_resilient_mode);
+  assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode,
+                 sizeof(cm->counts.y_mode)));
+  assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode,
+                 sizeof(cm->counts.uv_mode)));
+  assert(!memcmp(cm->counts.partition, zero_counts.partition,
+                 sizeof(cm->counts.partition)));
+  assert(!memcmp(cm->counts.coef, zero_counts.coef,
+                 sizeof(cm->counts.coef)));
+  assert(!memcmp(cm->counts.eob_branch, zero_counts.eob_branch,
+                 sizeof(cm->counts.eob_branch)));
+  assert(!memcmp(cm->counts.switchable_interp, zero_counts.switchable_interp,
+                 sizeof(cm->counts.switchable_interp)));
+  assert(!memcmp(cm->counts.inter_mode, zero_counts.inter_mode,
+                 sizeof(cm->counts.inter_mode)));
+  assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter,
+                 sizeof(cm->counts.intra_inter)));
+  assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter,
+                 sizeof(cm->counts.comp_inter)));
+  assert(!memcmp(cm->counts.single_ref, zero_counts.single_ref,
+                 sizeof(cm->counts.single_ref)));
+  assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref,
+                 sizeof(cm->counts.comp_ref)));
+  assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
+  assert(!memcmp(cm->counts.mbskip, zero_counts.mbskip,
+                 sizeof(cm->counts.mbskip)));
+  assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
+}
+#endif  // NDEBUG
+
 int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
   int i;
   VP9_COMMON *const cm = &pbi->common;
@@ -1031,6 +1171,8 @@
       vp9_adapt_mode_probs(cm);
       vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
     }
+  } else {
+    debug_check_frame_counts(cm);
   }
 
   if (cm->refresh_frame_context)
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 2f5b136..0d0f0df 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -61,13 +61,16 @@
   254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0
 };
 
-#define INCREMENT_COUNT(token)               \
-  do {                                       \
-    coef_counts[type][ref][band][pt]         \
-               [token >= TWO_TOKEN ?     \
-                (token == DCT_EOB_TOKEN ? DCT_EOB_MODEL_TOKEN : TWO_TOKEN) : \
-                token]++;     \
-    token_cache[scan[c]] = vp9_pt_energy_class[token]; \
+#define INCREMENT_COUNT(token)                           \
+  do {                                                   \
+    if (!cm->frame_parallel_decoding_mode) {             \
+      ++coef_counts[type][ref][band][pt]                 \
+                   [token >= TWO_TOKEN ?                 \
+                    (token == DCT_EOB_TOKEN ?            \
+                     DCT_EOB_MODEL_TOKEN : TWO_TOKEN) :  \
+                    token];                              \
+    }                                                    \
+    token_cache[scan[c]] = vp9_pt_energy_class[token];   \
   } while (0)
 
 #define WRITE_COEF_CONTINUE(val, token)                  \
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index c1fbee3..243dbef 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -141,14 +141,13 @@
   cm->error.setjmp = 0;
   pbi->decoded_key_frame = 0;
 
-  if (pbi->oxcf.max_threads > 1) {
-    vp9_worker_init(&pbi->lf_worker);
-    pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData));
-    pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
-    if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) {
-      vp9_remove_decompressor(pbi);
-      return NULL;
-    }
+  vp9_worker_init(&pbi->lf_worker);
+  pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData));
+  pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
+  if (pbi->lf_worker.data1 == NULL ||
+      (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker))) {
+    vp9_remove_decompressor(pbi);
+    return NULL;
   }
 
   return pbi;
diff --git a/vp9/decoder/vp9_thread.c b/vp9/decoder/vp9_thread.c
index 5442ddf..d953e72 100644
--- a/vp9/decoder/vp9_thread.c
+++ b/vp9/decoder/vp9_thread.c
@@ -145,9 +145,7 @@
       pthread_cond_wait(&worker->condition_, &worker->mutex_);
     }
     if (worker->status_ == WORK) {
-      if (worker->hook) {
-        worker->had_error |= !worker->hook(worker->data1, worker->data2);
-      }
+      vp9_worker_execute(worker);
       worker->status_ = OK;
     } else if (worker->status_ == NOT_OK) {   // finish the worker
       done = 1;
@@ -178,7 +176,7 @@
   pthread_mutex_unlock(&worker->mutex_);
 }
 
-#endif
+#endif  // CONFIG_MULTITHREAD
 
 //------------------------------------------------------------------------------
 
@@ -218,12 +216,17 @@
   return ok;
 }
 
+void vp9_worker_execute(VP9Worker* const worker) {
+  if (worker->hook != NULL) {
+    worker->had_error |= !worker->hook(worker->data1, worker->data2);
+  }
+}
+
 void vp9_worker_launch(VP9Worker* const worker) {
 #if CONFIG_MULTITHREAD
   change_state(worker, WORK);
 #else
-  if (worker->hook)
-    worker->had_error |= !worker->hook(worker->data1, worker->data2);
+  vp9_worker_execute(worker);
 #endif
 }
 
diff --git a/vp9/decoder/vp9_thread.h b/vp9/decoder/vp9_thread.h
index e5e6f60..a624f3c 100644
--- a/vp9/decoder/vp9_thread.h
+++ b/vp9/decoder/vp9_thread.h
@@ -80,6 +80,11 @@
 // hook/data1/data2 can be changed at any time before calling this function,
 // but not be changed afterward until the next call to vp9_worker_sync().
 void vp9_worker_launch(VP9Worker* const worker);
+// This function is similar to vp9_worker_launch() except that it calls the
+// hook directly instead of using a thread. Convenient to bypass the thread
+// mechanism while still using the VP9Worker structs. vp9_worker_sync() must
+// still be called afterward (for error reporting).
+void vp9_worker_execute(VP9Worker* const worker);
 // Kill the thread and terminate the object. To use the object again, one
 // must call vp9_worker_reset() again.
 void vp9_worker_end(VP9Worker* const worker);
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 4b2d7c5..8378a78 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -598,7 +598,6 @@
                            int mi_row, int mi_col, BLOCK_SIZE bsize,
                            int index) {
   VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *xd = &cpi->mb.e_mbd;
   const int mis = cm->mode_info_stride;
   int bsl = b_width_log2(bsize);
   int bs = (1 << bsl) / 4;  // mode_info step for subsize
@@ -619,8 +618,7 @@
     int pl;
     const int idx = check_bsize_coverage(bs, cm->mi_rows, cm->mi_cols,
                                          mi_row, mi_col);
-    set_partition_seg_context(cm, xd, mi_row, mi_col);
-    pl = partition_plane_context(xd, bsize);
+    pl = partition_plane_context(cm, mi_row, mi_col, bsize);
     // encode the partition information
     if (idx == 0)
       write_token(bc, vp9_partition_tree,
@@ -662,10 +660,8 @@
 
   // update partition context
   if (bsize >= BLOCK_8X8 &&
-      (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) {
-    set_partition_seg_context(cm, xd, mi_row, mi_col);
-    update_partition_context(xd, subsize, bsize);
-  }
+      (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
+    update_partition_context(cm, mi_row, mi_col, subsize, bsize);
 }
 
 static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
@@ -1460,7 +1456,7 @@
 
     update_mbintra_mode_probs(cpi, &header_bc);
 
-    for (i = 0; i < NUM_PARTITION_CONTEXTS; ++i) {
+    for (i = 0; i < PARTITION_CONTEXTS; ++i) {
       vp9_prob pnew[PARTITION_TYPES - 1];
       unsigned int bct[PARTITION_TYPES - 1][2];
       update_mode(&header_bc, PARTITION_TYPES,
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index e40a609..12dad03 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -115,7 +115,7 @@
   int **mvsadcost;
 
   int mbmode_cost[MB_MODE_COUNT];
-  unsigned inter_mode_cost[INTER_MODE_CONTEXTS][MB_MODE_COUNT - NEARESTMV];
+  unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
   int intra_uv_mode_cost[2][MB_MODE_COUNT];
   int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
   int switchable_interp_costs[SWITCHABLE_FILTERS + 1]
@@ -166,7 +166,7 @@
   PICK_MODE_CONTEXT sb32x64_context[2];
   PICK_MODE_CONTEXT sb64x32_context[2];
   PICK_MODE_CONTEXT sb64_context;
-  int partition_cost[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
+  int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
 
   BLOCK_SIZE b_partitioning[4][4][4];
   BLOCK_SIZE mb_partitioning[4][4];
@@ -174,8 +174,6 @@
   BLOCK_SIZE sb64_partitioning;
 
   void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
-  void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
-                         int y_blocks);
 };
 
 // TODO(jingning): the variables used here are little complicated. need further
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 23c652d..550cdee 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -36,14 +36,13 @@
   output[3] = dct_const_round_shift(temp2);
 }
 
-void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
   // similar. In the first one, we transform the columns and transpose
   // the results. In the second one, we transform the rows. To achieve that,
   // as the first pass results are transposed, we tranpose the columns (that
   // is the transposed rows) and transpose the results (so that it goes back
   // in normal/row positions).
-  const int stride = pitch >> 1;
   int pass;
   // We need an intermediate buffer between passes.
   int16_t intermediate[4 * 4];
@@ -230,8 +229,7 @@
   output[7] = dct_const_round_shift(t3);
 }
 
-void vp9_short_fdct8x8_c(int16_t *input, int16_t *final_output, int pitch) {
-  const int stride = pitch >> 1;
+void vp9_short_fdct8x8_c(int16_t *input, int16_t *final_output, int stride) {
   int i, j;
   int16_t intermediate[64];
 
@@ -587,18 +585,17 @@
 
 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
    pixel. */
-void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int stride) {
   int i;
   int a1, b1, c1, d1, e1;
   int16_t *ip = input;
   int16_t *op = output;
-  int pitch_short = pitch >> 1;
 
   for (i = 0; i < 4; i++) {
-    a1 = ip[0 * pitch_short];
-    b1 = ip[1 * pitch_short];
-    c1 = ip[2 * pitch_short];
-    d1 = ip[3 * pitch_short];
+    a1 = ip[0 * stride];
+    b1 = ip[1 * stride];
+    c1 = ip[2 * stride];
+    d1 = ip[3 * stride];
 
     a1 += b1;
     d1 = d1 - c1;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 0515db2..98284a6 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -500,7 +500,6 @@
   const struct segmentation *const seg = &cm->seg;
 
   set_skip_context(cm, xd, mi_row, mi_col);
-  set_partition_seg_context(cm, xd, mi_row, mi_col);
 
   // Activity map pointer
   x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
@@ -819,8 +818,7 @@
 
   c1 = BLOCK_4X4;
   if (bsize >= BLOCK_8X8) {
-    set_partition_seg_context(cm, xd, mi_row, mi_col);
-    pl = partition_plane_context(xd, bsize);
+    pl = partition_plane_context(cm, mi_row, mi_col, bsize);
     c1 = *(get_sb_partitioning(x, bsize));
   }
   partition = partition_lookup[bsl][c1];
@@ -862,10 +860,8 @@
       break;
   }
 
-  if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) {
-    set_partition_seg_context(cm, xd, mi_row, mi_col);
-    update_partition_context(xd, c1, bsize);
-  }
+  if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
+    update_partition_context(cm, mi_row, mi_col, c1, bsize);
 }
 
 // Check to see if the given partition size is allowed for a specified number
@@ -1056,8 +1052,7 @@
       pick_sb_modes(cpi, mi_row, mi_col, &none_rate, &none_dist, bsize,
                     get_block_context(x, bsize), INT64_MAX);
 
-      set_partition_seg_context(cm, xd, mi_row, mi_col);
-      pl = partition_plane_context(xd, bsize);
+      pl = partition_plane_context(cm, mi_row, mi_col, bsize);
       none_rate += x->partition_cost[pl][PARTITION_NONE];
 
       restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -1147,8 +1142,8 @@
     default:
       assert(0);
   }
-  set_partition_seg_context(cm, xd, mi_row, mi_col);
-  pl = partition_plane_context(xd, bsize);
+
+  pl = partition_plane_context(cm, mi_row, mi_col, bsize);
   if (last_part_rate < INT_MAX)
     last_part_rate += x->partition_cost[pl][partition];
 
@@ -1198,12 +1193,10 @@
 
       split_rate += rt;
       split_dist += dt;
-      set_partition_seg_context(cm, xd, mi_row + y_idx, mi_col + x_idx);
-      pl = partition_plane_context(xd, bsize);
+      pl = partition_plane_context(cm, mi_row + y_idx, mi_col + x_idx, bsize);
       split_rate += x->partition_cost[pl][PARTITION_NONE];
     }
-    set_partition_seg_context(cm, xd, mi_row, mi_col);
-    pl = partition_plane_context(xd, bsize);
+    pl = partition_plane_context(cm, mi_row, mi_col, bsize);
     if (split_rate < INT_MAX) {
       split_rate += x->partition_cost[pl][PARTITION_SPLIT];
 
@@ -1532,8 +1525,7 @@
                   get_block_context(x, bsize), best_rd);
     if (this_rate != INT_MAX) {
       if (bsize >= BLOCK_8X8) {
-        set_partition_seg_context(cm, xd, mi_row, mi_col);
-        pl = partition_plane_context(xd, bsize);
+        pl = partition_plane_context(cm, mi_row, mi_col, bsize);
         this_rate += x->partition_cost[pl][PARTITION_NONE];
       }
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
@@ -1593,8 +1585,7 @@
       }
     }
     if (sum_rd < best_rd && i == 4) {
-      set_partition_seg_context(cm, xd, mi_row, mi_col);
-      pl = partition_plane_context(xd, bsize);
+      pl = partition_plane_context(cm, mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -1650,8 +1641,7 @@
       }
     }
     if (sum_rd < best_rd) {
-      set_partition_seg_context(cm, xd, mi_row, mi_col);
-      pl = partition_plane_context(xd, bsize);
+      pl = partition_plane_context(cm, mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_HORZ];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -1693,8 +1683,7 @@
       }
     }
     if (sum_rd < best_rd) {
-      set_partition_seg_context(cm, xd, mi_row, mi_col);
-      pl = partition_plane_context(xd, bsize);
+      pl = partition_plane_context(cm, mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_VERT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -1726,7 +1715,6 @@
 static void rd_pick_reference_frame(VP9_COMP *cpi, int mi_row, int mi_col) {
   VP9_COMMON * const cm = &cpi->common;
   MACROBLOCK * const x = &cpi->mb;
-  MACROBLOCKD * const xd = &x->e_mbd;
   int bsl = b_width_log2(BLOCK_64X64), bs = 1 << bsl;
   int ms = bs / 2;
   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
@@ -1746,8 +1734,7 @@
     cpi->set_ref_frame_mask = 1;
     pick_sb_modes(cpi, mi_row, mi_col, &r, &d, BLOCK_64X64,
                   get_block_context(x, BLOCK_64X64), INT64_MAX);
-    set_partition_seg_context(cm, xd, mi_row, mi_col);
-    pl = partition_plane_context(xd, BLOCK_64X64);
+    pl = partition_plane_context(cm, mi_row, mi_col, BLOCK_64X64);
     r += x->partition_cost[pl][PARTITION_NONE];
 
     *(get_sb_partitioning(x, BLOCK_64X64)) = BLOCK_64X64;
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 13d8aa8..3358fbb 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -391,7 +391,7 @@
       xoff = 8 * (block & twmask);
       yoff = 8 * (block >> twl);
       src_diff = p->src_diff + 4 * bw * yoff + xoff;
-      vp9_short_fdct8x8(src_diff, coeff, bw * 8);
+      vp9_short_fdct8x8(src_diff, coeff, bw * 4);
       vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                      p->quant, p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
@@ -402,7 +402,7 @@
       xoff = 4 * (block & twmask);
       yoff = 4 * (block >> twl);
       src_diff = p->src_diff + 4 * bw * yoff + xoff;
-      x->fwd_txm4x4(src_diff, coeff, bw * 8);
+      x->fwd_txm4x4(src_diff, coeff, bw * 4);
       vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                      p->quant, p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
@@ -584,7 +584,7 @@
       if (tx_type != DCT_DCT)
         vp9_short_fht8x8(src_diff, coeff, bw * 4, tx_type);
       else
-        vp9_short_fdct8x8(src_diff, coeff, bw * 8);
+        vp9_short_fdct8x8(src_diff, coeff, bw * 4);
       vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
                      p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
@@ -612,7 +612,7 @@
       if (tx_type != DCT_DCT)
         vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
       else
-        x->fwd_txm4x4(src_diff, coeff, bw * 8);
+        x->fwd_txm4x4(src_diff, coeff, bw * 4);
       vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
                      p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index f6b2a28..05b1662 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -959,13 +959,11 @@
     sf->optimize_coefficients = 0;
   }
 
-  cpi->mb.fwd_txm4x4    = vp9_short_fdct4x4;
+  cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
   if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
-    cpi->mb.fwd_txm4x4    = vp9_short_walsh4x4;
+    cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
   }
 
-  cpi->mb.quantize_b_4x4      = vp9_regular_quantize_b_4x4;
-
   if (cpi->sf.subpel_search_method == SUBPEL_ITERATIVE) {
     cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_iterative;
     cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_iterative;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 7187884..b1dfcbb 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -475,7 +475,7 @@
 
   int y_mode_count[4][INTRA_MODES];
   int y_uv_mode_count[INTRA_MODES][INTRA_MODES];
-  unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
+  unsigned int partition_count[PARTITION_CONTEXTS][PARTITION_TYPES];
 
   nmv_context_counts NMVcount;
 
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 3191c49..459aa33 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -13,21 +13,8 @@
 
 #include "vp9/encoder/vp9_block.h"
 
-#define prototype_quantize_block(sym) \
-  void (sym)(MACROBLOCK *mb, int b_idx)
-
-#define prototype_quantize_block_pair(sym) \
-  void (sym)(MACROBLOCK *mb, int b_idx1, int b_idx2)
-
-#define prototype_quantize_mb(sym) \
-  void (sym)(MACROBLOCK *x)
-
-void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2,
-                                     int y_blocks);
 void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
                                 int y_blocks);
-void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
-                                int y_blocks);
 struct VP9_COMP;
 
 void vp9_set_quantizer(struct VP9_COMP *cpi, int q);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 4e4dbac..f166b10 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -250,7 +250,7 @@
 
   fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
 
-  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
+  for (i = 0; i < PARTITION_CONTEXTS; i++)
     vp9_cost_tokens(cpi->mb.partition_cost[i],
                     cm->fc.partition_prob[cm->frame_type][i],
                     vp9_partition_tree);
@@ -1085,16 +1085,13 @@
                            dst, dst_stride);
 
         tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
-        if (tx_type != DCT_DCT) {
+        if (tx_type != DCT_DCT)
           vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
-          x->quantize_b_4x4(x, block, tx_type, 16);
-        } else {
-          x->fwd_txm4x4(src_diff, coeff, 16);
-          x->quantize_b_4x4(x, block, tx_type, 16);
-        }
+        else
+          x->fwd_txm4x4(src_diff, coeff, 8);
+        vp9_regular_quantize_b_4x4(x, block, tx_type, 16);
 
-        get_scan_nb_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block),
-                        &scan, &nb);
+        get_scan_nb_4x4(tx_type, &scan, &nb);
         ratey += cost_coeffs(x, 0, block,
                              tempa + idx, templ + idy, TX_4X4, scan, nb);
         distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
@@ -1563,8 +1560,8 @@
       k += (idy * 2 + idx);
       coeff = BLOCK_OFFSET(p->coeff, k);
       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
-                    coeff, 16);
-      x->quantize_b_4x4(x, k, DCT_DCT, 16);
+                    coeff, 8);
+      vp9_regular_quantize_b_4x4(x, k, DCT_DCT, 16);
       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
                                         16, &ssz);
       thissse += ssz;
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index 457883f..fa60e80 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -12,14 +12,13 @@
 #include "vp9/common/vp9_idct.h"  // for cospi constants
 #include "vpx_ports/mem.h"
 
-void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
   // similar. In the first one, we transform the columns and transpose
   // the results. In the second one, we transform the rows. To achieve that,
   // as the first pass results are transposed, we tranpose the columns (that
   // is the transposed rows) and transpose the results (so that it goes back
   // in normal/row positions).
-  const int stride = pitch >> 1;
   int pass;
   // Constants
   //    When we use them, in one case, they are all the same. In all others
@@ -271,8 +270,7 @@
   write_buffer_4x4(output, in);
 }
 
-void vp9_short_fdct8x8_sse2(int16_t *input, int16_t *output, int pitch) {
-  const int stride = pitch >> 1;
+void vp9_short_fdct8x8_sse2(int16_t *input, int16_t *output, int stride) {
   int pass;
   // Constants
   //    When we use them, in one case, they are all the same. In all others