Merge "Implenment on demand border extension. In place extend the border now. Next commit will totally remove the border."
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 57c7eb3..4451479 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -563,3 +563,7 @@
 1a9c2914ba932a38f0a143efc1ad0e318e78888b  vp90-2-tos_426x178_tile_1x1_181kbps.webm
 a3d2b09f24debad4747a1b3066f572be4273bced  vp90-2-tos_640x266_tile_1x2_336kbps.webm
 c64b03b5c090e6888cb39685c31f00a6b79fa45c  vp90-2-tos_854x356_tile_1x2_656kbps.webm
+0e7cd4135b231c9cea8d76c19f9e84b6fd77acec  vp90-2-08-tile_1x8_frame_parallel.webm
+c9b6850af28579b031791066457f4cb40df6e1c7  vp90-2-08-tile_1x8_frame_parallel.webm.md5
+e448b6e83490bca0f8d58b4f4b1126a17baf4b0c  vp90-2-08-tile_1x8.webm
+5e524165f0397e6141d914f4f0a66267d7658376  vp90-2-08-tile_1x8.webm.md5
diff --git a/test/test.mk b/test/test.mk
index 7c632fd..98e5c7b 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -654,6 +654,10 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x4_frame_parallel.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x4.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8_frame_parallel.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8_frame_parallel.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm
diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
index 5ce3be2..ee610fa 100644
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -165,6 +165,7 @@
   "vp90-2-07-frame_parallel.webm",
   "vp90-2-08-tile_1x2_frame_parallel.webm", "vp90-2-08-tile_1x2.webm",
   "vp90-2-08-tile_1x4_frame_parallel.webm", "vp90-2-08-tile_1x4.webm",
+  "vp90-2-08-tile_1x8_frame_parallel.webm", "vp90-2-08-tile_1x8.webm",
   "vp90-2-08-tile-4x4.webm", "vp90-2-08-tile-4x1.webm",
   "vp90-2-09-subpixel-00.ivf",
   "vp90-2-02-size-lf-1920x1080.webm",
diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc
index a8ce6e4..a78cdea 100644
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -141,10 +141,12 @@
       "68ede6abd66bae0a2edf2eb9232241b6" },
     { "vp90-2-08-tile_1x4_frame_parallel.webm",
       "368ebc6ebf3a5e478d85b2c3149b2848" },
+    { "vp90-2-08-tile_1x8_frame_parallel.webm",
+      "17e439da2388aff3a0f69cb22579c6c1" },
   };
 
   for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
-    for (int t = 2; t <= 4; ++t) {
+    for (int t = 2; t <= 8; ++t) {
       EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
           << "threads = " << t;
     }
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index f974f8c..93f96c8 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -38,8 +38,9 @@
 #define REF_CONTEXTS 5
 
 typedef enum {
-  PLANE_TYPE_Y_WITH_DC,
-  PLANE_TYPE_UV,
+  PLANE_TYPE_Y  = 0,
+  PLANE_TYPE_UV = 1,
+  PLANE_TYPES
 } PLANE_TYPE;
 
 typedef char ENTROPY_CONTEXT;
@@ -268,45 +269,53 @@
   const MODE_INFO *const mi = xd->mi_8x8[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
 
-  if (plane_type != PLANE_TYPE_Y_WITH_DC ||
-      xd->lossless ||
-      is_inter_block(mbmi))
+  if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mbmi))
     return DCT_DCT;
 
-  return mode2txfm_map[mbmi->sb_type < BLOCK_8X8 ?
-                       mi->bmi[ib].as_mode : mbmi->mode];
+  return mode2txfm_map[mbmi->sb_type < BLOCK_8X8 ? mi->bmi[ib].as_mode
+                                                 : mbmi->mode];
 }
 
 static INLINE TX_TYPE get_tx_type_8x8(PLANE_TYPE plane_type,
                                       const MACROBLOCKD *xd) {
-  return plane_type == PLANE_TYPE_Y_WITH_DC ?
-             mode2txfm_map[xd->mi_8x8[0]->mbmi.mode] : DCT_DCT;
+  return plane_type == PLANE_TYPE_Y ? mode2txfm_map[xd->mi_8x8[0]->mbmi.mode]
+                                    : DCT_DCT;
 }
 
 static INLINE TX_TYPE get_tx_type_16x16(PLANE_TYPE plane_type,
                                         const MACROBLOCKD *xd) {
-  return plane_type == PLANE_TYPE_Y_WITH_DC ?
-             mode2txfm_map[xd->mi_8x8[0]->mbmi.mode] : DCT_DCT;
+  return plane_type == PLANE_TYPE_Y ? mode2txfm_map[xd->mi_8x8[0]->mbmi.mode]
+                                    : DCT_DCT;
 }
 
 static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) {
   int i;
 
   for (i = 0; i < MAX_MB_PLANE; i++) {
-    xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y_WITH_DC;
+    xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y;
     xd->plane[i].subsampling_x = i ? ss_x : 0;
     xd->plane[i].subsampling_y = i ? ss_y : 0;
   }
 #if CONFIG_ALPHA
   // TODO(jkoleszar): Using the Y w/h for now
+  xd->plane[3].plane_type = PLANE_TYPE_Y;
   xd->plane[3].subsampling_x = 0;
   xd->plane[3].subsampling_y = 0;
 #endif
 }
 
+static TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize) {
+  if (bsize < BLOCK_8X8) {
+    return TX_4X4;
+  } else {
+    // TODO(dkovalev): Assuming YUV420 (ss_x == 1, ss_y == 1)
+    const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][1][1];
+    return MIN(y_tx_size, max_txsize_lookup[plane_bsize]);
+  }
+}
 
-static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
-  return MIN(mbmi->tx_size, max_uv_txsize_lookup[mbmi->sb_type]);
+static TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
+  return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type);
 }
 
 static BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
diff --git a/vp9/common/vp9_common_data.c b/vp9/common/vp9_common_data.c
index 388f38d..886c0af 100644
--- a/vp9/common/vp9_common_data.c
+++ b/vp9/common/vp9_common_data.c
@@ -108,12 +108,6 @@
   TX_16X16, TX_16X16, TX_16X16,
   TX_32X32, TX_32X32, TX_32X32, TX_32X32
 };
-const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZES] = {
-  TX_4X4,   TX_4X4,   TX_4X4,
-  TX_4X4,   TX_4X4,   TX_4X4,
-  TX_8X8,   TX_8X8,   TX_8X8,
-  TX_16X16, TX_16X16, TX_16X16, TX_32X32
-};
 
 const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
   TX_4X4,  // ONLY_4X4
diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h
index c1f6405..b6fc70a 100644
--- a/vp9/common/vp9_common_data.h
+++ b/vp9/common/vp9_common_data.h
@@ -26,7 +26,6 @@
 extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZES];
 extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES];
 extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
-extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZES];
 extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
 extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
 
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index d89f70d..adab33f 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -391,7 +391,7 @@
   {255, 246, 247, 255, 239, 255, 253, 255},
 };
 
-static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = {
+static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
   {  // Y plane
     {  // Intra
       {  // Band 0
@@ -475,7 +475,7 @@
   }
 };
 
-static const vp9_coeff_probs_model default_coef_probs_8x8[BLOCK_TYPES] = {
+static const vp9_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = {
   {  // Y plane
     {  // Intra
       {  // Band 0
@@ -559,7 +559,7 @@
   }
 };
 
-static const vp9_coeff_probs_model default_coef_probs_16x16[BLOCK_TYPES] = {
+static const vp9_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = {
   {  // Y plane
     {  // Intra
       {  // Band 0
@@ -643,7 +643,7 @@
   }
 };
 
-static const vp9_coeff_probs_model default_coef_probs_32x32[BLOCK_TYPES] = {
+static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
   {  // Y plane
     {  // Intra
       {  // Band 0
@@ -763,7 +763,7 @@
       cm->counts.eob_branch[tx_size];
   int i, j, k, l, m;
 
-  for (i = 0; i < BLOCK_TYPES; ++i)
+  for (i = 0; i < PLANE_TYPES; ++i)
     for (j = 0; j < REF_TYPES; ++j)
       for (k = 0; k < COEF_BANDS; ++k)
         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index 4560bf8..721917f 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -59,8 +59,6 @@
 
 /* Coefficients are predicted via a 3-dimensional probability table. */
 
-/* Outside dimension.  0 = Y with DC, 1 = UV */
-#define BLOCK_TYPES 2
 #define REF_TYPES 2  // intra=0, inter=1
 
 /* Middle dimension reflects the coefficient position within the transform. */
@@ -179,7 +177,7 @@
   const MODE_INFO *const mi = xd->mi_8x8[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
 
-  if (is_inter_block(mbmi) || type != PLANE_TYPE_Y_WITH_DC || xd->lossless) {
+  if (is_inter_block(mbmi) || type != PLANE_TYPE_Y || xd->lossless) {
     return &vp9_default_scan_orders[tx_size];
   } else {
     const MB_PREDICTION_MODE mode =
diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c
index ef8de20..f2e910f 100644
--- a/vp9/common/vp9_loopfilter_filters.c
+++ b/vp9/common/vp9_loopfilter_filters.c
@@ -121,32 +121,15 @@
   }
 }
 
-void vp9_loop_filter_horizontal_edge_16_c(uint8_t *s, int p /* pitch */,
+void vp9_loop_filter_horizontal_edge_16_c(uint8_t *s, int p,
                                           const uint8_t *blimit0,
                                           const uint8_t *limit0,
                                           const uint8_t *thresh0,
                                           const uint8_t *blimit1,
                                           const uint8_t *limit1,
                                           const uint8_t *thresh1) {
-  int i, j;
-  const uint8_t *blimit = blimit0;
-  const uint8_t *limit = limit0;
-  const uint8_t *thresh = thresh0;
-
-  for (i = 0; i < 2; ++i) {
-    for (j = 0; j < 8; ++j) {
-      const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
-      const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
-      const int8_t mask = filter_mask(*limit, *blimit,
-                                      p3, p2, p1, p0, q0, q1, q2, q3);
-      const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
-      filter4(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
-      ++s;
-    }
-    blimit = blimit1;
-    limit = limit1;
-    thresh = thresh1;
-  }
+  vp9_loop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1);
+  vp9_loop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1);
 }
 
 void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch,
@@ -176,25 +159,9 @@
                                         const uint8_t *blimit1,
                                         const uint8_t *limit1,
                                         const uint8_t *thresh1) {
-  int i, j;
-  const uint8_t *blimit = blimit0;
-  const uint8_t *limit = limit0;
-  const uint8_t *thresh = thresh0;
-
-  for (i = 0; i < 2; ++i) {
-    for (j = 0; j < 8; ++j) {
-      const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
-      const uint8_t q0 = s[0],  q1 = s[1],  q2 = s[2],  q3 = s[3];
-      const int8_t mask = filter_mask(*limit, *blimit,
-                                      p3, p2, p1, p0, q0, q1, q2, q3);
-      const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
-      filter4(mask, hev, s - 2, s - 1, s, s + 1);
-      s += pitch;
-    }
-    blimit = blimit1;
-    limit = limit1;
-    thresh = thresh1;
-  }
+  vp9_loop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1);
+  vp9_loop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1,
+                                  thresh1, 1);
 }
 
 static INLINE void filter8(int8_t mask, uint8_t hev, uint8_t flat,
@@ -241,35 +208,15 @@
   }
 }
 
-void vp9_mbloop_filter_horizontal_edge_16_c(uint8_t *s, int p /* pitch */,
+void vp9_mbloop_filter_horizontal_edge_16_c(uint8_t *s, int p,
                                             const uint8_t *blimit0,
                                             const uint8_t *limit0,
                                             const uint8_t *thresh0,
                                             const uint8_t *blimit1,
                                             const uint8_t *limit1,
                                             const uint8_t *thresh1) {
-  int i, j;
-  const uint8_t *blimit = blimit0;
-  const uint8_t *limit = limit0;
-  const uint8_t *thresh = thresh0;
-
-  for (i = 0; i < 2; ++i) {
-    for (j = 0; j < 8; ++j) {
-      const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
-      const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
-
-      const int8_t mask = filter_mask(*limit, *blimit,
-                                      p3, p2, p1, p0, q0, q1, q2, q3);
-      const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
-      const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
-      filter8(mask, hev, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
-                               s,         s + 1 * p, s + 2 * p, s + 3 * p);
-      ++s;
-    }
-    blimit = blimit1;
-    limit = limit1;
-    thresh = thresh1;
-  }
+  vp9_mbloop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1);
+  vp9_mbloop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1);
 }
 
 void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch,
@@ -299,27 +246,9 @@
                                           const uint8_t *blimit1,
                                           const uint8_t *limit1,
                                           const uint8_t *thresh1) {
-  int i, j;
-  const uint8_t *blimit = blimit0;
-  const uint8_t *limit = limit0;
-  const uint8_t *thresh = thresh0;
-
-  for (i = 0; i < 2; ++i) {
-    for (j = 0; j < 8; ++j) {
-      const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
-      const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
-      const int8_t mask = filter_mask(*limit, *blimit,
-                                      p3, p2, p1, p0, q0, q1, q2, q3);
-      const int8_t hev = hev_mask(thresh[0], p1, p0, q0, q1);
-      const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
-      filter8(mask, hev, flat, s - 4, s - 3, s - 2, s - 1,
-                               s,     s + 1, s + 2, s + 3);
-      s += pitch;
-    }
-    blimit = blimit1;
-    limit = limit1;
-    thresh = thresh1;
-  }
+  vp9_mbloop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1);
+  vp9_mbloop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1,
+                                    thresh1, 1);
 }
 
 static INLINE void filter16(int8_t mask, uint8_t hev,
@@ -402,13 +331,14 @@
   }
 }
 
-void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int p,
-                                  const uint8_t *blimit,
-                                  const uint8_t *limit,
-                                  const uint8_t *thresh) {
+static void mb_lpf_vertical_edge_w(uint8_t *s, int p,
+                                   const uint8_t *blimit,
+                                   const uint8_t *limit,
+                                   const uint8_t *thresh,
+                                   int count) {
   int i;
 
-  for (i = 0; i < 8; ++i) {
+  for (i = 0; i < count; ++i) {
     const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
     const uint8_t q0 = s[0], q1 = s[1],  q2 = s[2], q3 = s[3];
     const int8_t mask = filter_mask(*limit, *blimit,
@@ -425,25 +355,16 @@
   }
 }
 
+void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int p,
+                                  const uint8_t *blimit,
+                                  const uint8_t *limit,
+                                  const uint8_t *thresh) {
+  mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
+}
+
 void vp9_mb_lpf_vertical_edge_w_16_c(uint8_t *s, int p,
                                      const uint8_t *blimit,
                                      const uint8_t *limit,
                                      const uint8_t *thresh) {
-  int i;
-
-  for (i = 0; i < 16; ++i) {
-    const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
-    const uint8_t q0 = s[0], q1 = s[1],  q2 = s[2], q3 = s[3];
-    const int8_t mask = filter_mask(*limit, *blimit,
-                                    p3, p2, p1, p0, q0, q1, q2, q3);
-    const int8_t hev = hev_mask(*thresh, p1, p0, q0, q1);
-    const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
-    const int8_t flat2 = flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0,
-                                    q0, s[4], s[5], s[6], s[7]);
-
-    filter16(mask, hev, flat, flat2,
-             s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1,
-             s,     s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7);
-    s += p;
-  }
+  mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
 }
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 8163a03..975a1d5 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -47,7 +47,7 @@
   vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
   vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
   vp9_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
-  vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
+  vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
   vp9_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
                                  [SWITCHABLE_FILTERS - 1];
   vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
@@ -64,8 +64,8 @@
   unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
   unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
   unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
-  vp9_coeff_count_model coef[TX_SIZES][BLOCK_TYPES];
-  unsigned int eob_branch[TX_SIZES][BLOCK_TYPES][REF_TYPES]
+  vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES];
+  unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES]
                          [COEF_BANDS][COEFF_CONTEXTS];
   unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
                                 [SWITCHABLE_FILTERS];
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index 793117d..24cdd5f 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -83,16 +83,14 @@
   else
     return 0;
 }
-// Returns a context number for the given MB prediction signal
-unsigned char vp9_get_pred_context_comp_inter_inter(const VP9_COMMON *cm,
-                                                    const MACROBLOCKD *xd) {
-  int pred_context;
-  const MODE_INFO *const above_mi = get_above_mi(xd);
-  const MODE_INFO *const left_mi = get_left_mi(xd);
-  const MB_MODE_INFO *const above_mbmi = get_above_mbmi(above_mi);
-  const MB_MODE_INFO *const left_mbmi = get_left_mbmi(left_mi);
-  const int above_in_image = above_mi != NULL;
-  const int left_in_image = left_mi != NULL;
+
+int vp9_get_reference_mode_context(const VP9_COMMON *cm,
+                                   const MACROBLOCKD *xd) {
+  int ctx;
+  const MB_MODE_INFO *const above_mbmi = get_above_mbmi(get_above_mi(xd));
+  const MB_MODE_INFO *const left_mbmi = get_left_mbmi(get_left_mi(xd));
+  const int above_in_image = above_mbmi != NULL;
+  const int left_in_image = left_mbmi != NULL;
   // Note:
   // The mode info data structure has a one element border above and to the
   // left of the entries correpsonding to real macroblocks.
@@ -100,32 +98,32 @@
   if (above_in_image && left_in_image) {  // both edges available
     if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
       // neither edge uses comp pred (0/1)
-      pred_context = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^
-                     (left_mbmi->ref_frame[0] == cm->comp_fixed_ref);
+      ctx = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^
+            (left_mbmi->ref_frame[0] == cm->comp_fixed_ref);
     else if (!has_second_ref(above_mbmi))
       // one of two edges uses comp pred (2/3)
-      pred_context = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
-                          !is_inter_block(above_mbmi));
+      ctx = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
+                 !is_inter_block(above_mbmi));
     else if (!has_second_ref(left_mbmi))
       // one of two edges uses comp pred (2/3)
-      pred_context = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
-                          !is_inter_block(left_mbmi));
+      ctx = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
+                 !is_inter_block(left_mbmi));
     else  // both edges use comp pred (4)
-      pred_context = 4;
+      ctx = 4;
   } else if (above_in_image || left_in_image) {  // one edge available
     const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
 
     if (!has_second_ref(edge_mbmi))
       // edge does not use comp pred (0/1)
-      pred_context = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref;
+      ctx = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref;
     else
       // edge uses comp pred (3)
-      pred_context = 3;
+      ctx = 3;
   } else {  // no edges available (1)
-    pred_context = 1;
+    ctx = 1;
   }
-  assert(pred_context >= 0 && pred_context < COMP_INTER_CONTEXTS);
-  return pred_context;
+  assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS);
+  return ctx;
 }
 
 // Returns a context number for the given MB prediction signal
@@ -366,7 +364,7 @@
 // The mode info data structure has a one element border above and to the
 // left of the entries corresponding to real blocks.
 // The prediction flags in these dummy entries are initialized to 0.
-unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd) {
+int vp9_get_tx_size_context(const MACROBLOCKD *xd) {
   const int max_tx_size = max_txsize_lookup[xd->mi_8x8[0]->mbmi.sb_type];
   const MB_MODE_INFO *const above_mbmi = get_above_mbmi(get_above_mi(xd));
   const MB_MODE_INFO *const left_mbmi = get_left_mbmi(get_left_mi(xd));
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index bd0b8d6..a238489 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -68,15 +68,11 @@
   return cm->fc.intra_inter_prob[vp9_get_intra_inter_context(xd)];
 }
 
-unsigned char vp9_get_pred_context_comp_inter_inter(const VP9_COMMON *cm,
-                                                    const MACROBLOCKD *xd);
+int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd);
 
-
-static INLINE
-vp9_prob vp9_get_pred_prob_comp_inter_inter(const VP9_COMMON *cm,
-                                            const MACROBLOCKD *xd) {
-  const int pred_context = vp9_get_pred_context_comp_inter_inter(cm, xd);
-  return cm->fc.comp_inter_prob[pred_context];
+static INLINE vp9_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm,
+                                                   const MACROBLOCKD *xd) {
+  return cm->fc.comp_inter_prob[vp9_get_reference_mode_context(cm, xd)];
 }
 
 unsigned char vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
@@ -104,7 +100,7 @@
   return cm->fc.single_ref_prob[pred_context][1];
 }
 
-unsigned char vp9_get_pred_context_tx_size(const MACROBLOCKD *xd);
+int vp9_get_tx_size_context(const MACROBLOCKD *xd);
 
 static const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
                                     const struct tx_probs *tx_probs) {
@@ -123,8 +119,7 @@
 
 static const vp9_prob *get_tx_probs2(TX_SIZE max_tx_size, const MACROBLOCKD *xd,
                                      const struct tx_probs *tx_probs) {
-  const int ctx = vp9_get_pred_context_tx_size(xd);
-  return get_tx_probs(max_tx_size, ctx, tx_probs);
+  return get_tx_probs(max_tx_size, vp9_get_tx_size_context(xd), tx_probs);
 }
 
 static unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 627ea31..19d5fc3 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -749,6 +749,9 @@
 specialize vp9_diamond_search_sad sse3
 vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4
 
+prototype int vp9_full_range_search "struct macroblock *x, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv"
+specialize vp9_full_range_search
+
 prototype void vp9_temporal_filter_apply "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"
 specialize vp9_temporal_filter_apply sse2
 
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 1d5aa06..1608aa6 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -539,7 +539,7 @@
   int i, j, k, l, m;
 
   if (vp9_read_bit(r))
-    for (i = 0; i < BLOCK_TYPES; ++i)
+    for (i = 0; i < PLANE_TYPES; ++i)
       for (j = 0; j < REF_TYPES; ++j)
         for (k = 0; k < COEF_BANDS; ++k)
           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 7858e16..996fd12 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -63,7 +63,7 @@
 
 static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
                                      TX_SIZE max_tx_size, vp9_reader *r) {
-  const int ctx = vp9_get_pred_context_tx_size(xd);
+  const int ctx = vp9_get_tx_size_context(xd);
   const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc.tx_probs);
   TX_SIZE tx_size = vp9_read(r, tx_probs[0]);
   if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
@@ -258,10 +258,9 @@
   mv->col = ref->col + diff.col;
 }
 
-static REFERENCE_MODE read_reference_mode(VP9_COMMON *cm,
-                                             const MACROBLOCKD *xd,
-                                             vp9_reader *r) {
-  const int ctx = vp9_get_pred_context_comp_inter_inter(cm, xd);
+static REFERENCE_MODE read_reference_mode(VP9_COMMON *cm, const MACROBLOCKD *xd,
+                                          vp9_reader *r) {
+  const int ctx = vp9_get_reference_mode_context(cm, xd);
   const int mode = vp9_read(r, cm->fc.comp_inter_prob[ctx]);
   if (!cm->frame_parallel_decoding_mode)
     ++cm->counts.comp_inter[ctx][mode];
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 7ee775b..e8ab868 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -44,7 +44,7 @@
 int intra_mode_stats[INTRA_MODES]
                     [INTRA_MODES]
                     [INTRA_MODES];
-vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES];
+vp9_coeff_stats tree_update_hist[TX_SIZES][PLANE_TYPES];
 
 extern unsigned int active_section;
 #endif
@@ -233,10 +233,10 @@
     // (if not specified at the frame/segment level)
     if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) {
       vp9_write(bc, mi->ref_frame[1] > INTRA_FRAME,
-                vp9_get_pred_prob_comp_inter_inter(cm, xd));
+                vp9_get_reference_mode_prob(cm, xd));
     } else {
       assert((mi->ref_frame[1] <= INTRA_FRAME) ==
-                 (cm->comp_pred_mode == SINGLE_REFERENCE));
+             (cm->comp_pred_mode == SINGLE_REFERENCE));
     }
 
     if (mi->ref_frame[1] > INTRA_FRAME) {
@@ -557,7 +557,7 @@
   vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[tx_size];
   int i, j, k, l, m;
 
-  for (i = 0; i < BLOCK_TYPES; ++i) {
+  for (i = 0; i < PLANE_TYPES; ++i) {
     for (j = 0; j < REF_TYPES; ++j) {
       for (k = 0; k < COEF_BANDS; ++k) {
         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
@@ -600,7 +600,7 @@
       /* dry run to see if there is any udpate at all needed */
       int savings = 0;
       int update[2] = {0, 0};
-      for (i = 0; i < BLOCK_TYPES; ++i) {
+      for (i = 0; i < PLANE_TYPES; ++i) {
         for (j = 0; j < REF_TYPES; ++j) {
           for (k = 0; k < COEF_BANDS; ++k) {
             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
@@ -636,7 +636,7 @@
         return;
       }
       vp9_write_bit(bc, 1);
-      for (i = 0; i < BLOCK_TYPES; ++i) {
+      for (i = 0; i < PLANE_TYPES; ++i) {
         for (j = 0; j < REF_TYPES; ++j) {
           for (k = 0; k < COEF_BANDS; ++k) {
             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
@@ -685,7 +685,7 @@
                                              : COEF_BANDS;
       int updates = 0;
       int noupdates_before_first = 0;
-      for (i = 0; i < BLOCK_TYPES; ++i) {
+      for (i = 0; i < PLANE_TYPES; ++i) {
         for (j = 0; j < REF_TYPES; ++j) {
           for (k = 0; k < COEF_BANDS; ++k) {
             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index f9a0226..0088338 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -86,7 +86,7 @@
 
 /* The [2] dimension is for whether we skip the EOB node (i.e. if previous
  * coefficient in this block was zero) or not. */
-typedef unsigned int vp9_coeff_cost[BLOCK_TYPES][REF_TYPES][COEF_BANDS][2]
+typedef unsigned int vp9_coeff_cost[PLANE_TYPES][REF_TYPES][COEF_BANDS][2]
                                    [COEFF_CONTEXTS][ENTROPY_TOKENS];
 
 typedef struct macroblock MACROBLOCK;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 2fa9dcc..342b11a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -759,7 +759,7 @@
     // the reference frame counts used to work out probabilities.
     if (is_inter_block(mbmi) && !seg_ref_active) {
       if (cm->comp_pred_mode == REFERENCE_MODE_SELECT)
-        cpi->comp_inter_count[vp9_get_pred_context_comp_inter_inter(cm, xd)]
+        cpi->comp_inter_count[vp9_get_reference_mode_context(cm, xd)]
                              [has_second_ref(mbmi)]++;
 
       if (has_second_ref(mbmi)) {
@@ -1640,7 +1640,8 @@
       }
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
       if (sum_rd < best_rd) {
-        int64_t stop_thresh = 2048;
+        int64_t stop_thresh = 4096;
+        int64_t stop_thresh_rd;
 
         best_rate = this_rate;
         best_dist = this_dist;
@@ -1652,9 +1653,10 @@
         stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
             b_height_log2_lookup[bsize]);
 
+        stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
         // If obtained distortion is very small, choose current partition
         // and stop splitting.
-        if (this_dist < stop_thresh) {
+        if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
           do_split = 0;
           do_rect = 0;
         }
@@ -2587,9 +2589,8 @@
         !(is_inter_block(mbmi) &&
             (mbmi->skip_coeff ||
              vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) {
-      const uint8_t context = vp9_get_pred_context_tx_size(xd);
-      ++get_tx_counts(max_txsize_lookup[bsize],
-                      context, &cm->counts.tx)[mbmi->tx_size];
+      ++get_tx_counts(max_txsize_lookup[bsize], vp9_get_tx_size_context(xd),
+                      &cm->counts.tx)[mbmi->tx_size];
     } else {
       int x, y;
       TX_SIZE tx_size;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 3f194b2..5f42d0e 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -1008,10 +1008,8 @@
   int target_norm_bits_per_mb;
 
   double section_err = fpstats->coded_error / fpstats->count;
-  double sr_correction;
   double err_per_mb = section_err / num_mbs;
   double err_correction_factor;
-  double speed_correction = 1.0;
 
   if (section_target_bandwitdh <= 0)
     return cpi->twopass.maxq_max_limit;          // Highest value allowed
@@ -1020,24 +1018,6 @@
                               ? (512 * section_target_bandwitdh) / num_mbs
                               : 512 * (section_target_bandwitdh / num_mbs);
 
-  // Look at the drop in prediction quality between the last frame
-  // and the GF buffer (which contained an older frame).
-  if (fpstats->sr_coded_error > fpstats->coded_error) {
-    double sr_err_diff = (fpstats->sr_coded_error - fpstats->coded_error) /
-                             (fpstats->count * cpi->common.MBs);
-    sr_correction = fclamp(pow(sr_err_diff / 32.0, 0.25), 0.75, 1.25);
-  } else {
-    sr_correction = 0.75;
-  }
-
-  // Corrections for higher compression speed settings
-  // (reduced compression expected)
-  // FIXME(jimbankoski): Once we settle on vp9 speed features we need to
-  // change this code.
-  if (cpi->compressor_speed == 1)
-    speed_correction = cpi->oxcf.cpu_used <= 5 ?
-                          1.04 + (/*cpi->oxcf.cpu_used*/0 * 0.04) :
-                          1.25;
 
   // Try and pick a max Q that will be high enough to encode the
   // content at the given rate.
@@ -1045,8 +1025,7 @@
     int bits_per_mb_at_this_q;
 
     err_correction_factor = calc_correction_factor(err_per_mb,
-                                                   ERR_DIVISOR, 0.4, 0.90, q) *
-                                sr_correction * speed_correction;
+                                                   ERR_DIVISOR, 0.4, 0.90, q);
 
     bits_per_mb_at_this_q = vp9_rc_bits_per_mb(INTER_FRAME, q,
                                                err_correction_factor);
@@ -1060,14 +1039,6 @@
       q < cpi->cq_target_quality)
     q = cpi->cq_target_quality;
 
-  // Adjust maxq_min_limit and maxq_max_limit limits based on
-  // average q observed in clip for non kf/gf/arf frames
-  // Give average a chance to settle though.
-  // PGW TODO.. This code is broken for the extended Q range
-  if (cpi->rc.ni_frames > ((int)cpi->twopass.total_stats.count >> 8) &&
-      cpi->rc.ni_frames > 25)
-    adjust_maxq_qrange(cpi);
-
   return q;
 }
 
@@ -1083,9 +1054,6 @@
   double section_err = (fpstats->coded_error / fpstats->count);
   double err_per_mb = section_err / num_mbs;
   double err_correction_factor;
-  double sr_err_diff;
-  double sr_correction;
-  double speed_correction = 1.0;
   double clip_iiratio;
   double clip_iifactor;
 
@@ -1094,31 +1062,6 @@
                             : 512 * (section_target_bandwitdh / num_mbs);
 
 
-  // Corrections for higher compression speed settings
-  // (reduced compression expected)
-  if (cpi->compressor_speed == 1) {
-    if (cpi->oxcf.cpu_used <= 5)
-      speed_correction = 1.04 + (/*cpi->oxcf.cpu_used*/ 0 * 0.04);
-    else
-      speed_correction = 1.25;
-  }
-
-  // Look at the drop in prediction quality between the last frame
-  // and the GF buffer (which contained an older frame).
-  if (fpstats->sr_coded_error > fpstats->coded_error) {
-    sr_err_diff =
-      (fpstats->sr_coded_error - fpstats->coded_error) /
-      (fpstats->count * cpi->common.MBs);
-    sr_correction = (sr_err_diff / 32.0);
-    sr_correction = pow(sr_correction, 0.25);
-    if (sr_correction < 0.75)
-      sr_correction = 0.75;
-    else if (sr_correction > 1.25)
-      sr_correction = 1.25;
-  } else {
-    sr_correction = 0.75;
-  }
-
   // II ratio correction factor for clip as a whole
   clip_iiratio = cpi->twopass.total_stats.intra_error /
                  DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error);
@@ -1132,8 +1075,7 @@
 
     // Error per MB based correction factor
     err_correction_factor =
-      calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, q) *
-      sr_correction * speed_correction * clip_iifactor;
+      calc_correction_factor(err_per_mb, 100.0, 0.4, 0.90, q) * clip_iifactor;
 
     bits_per_mb_at_this_q =
       vp9_rc_bits_per_mb(INTER_FRAME, q, err_correction_factor);
@@ -2146,53 +2088,28 @@
 
   if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
     cpi->rc.active_worst_quality = cpi->oxcf.cq_level;
-  } else {
+  } else if (cpi->common.current_video_frame == 0) {
     // Special case code for first frame.
-    if (cpi->common.current_video_frame == 0) {
-      int section_target_bandwidth =
-          (int)(cpi->twopass.bits_left / frames_left);
+    int section_target_bandwidth =
+        (int)(cpi->twopass.bits_left / frames_left);
 
-      // guess at maxq needed in 2nd pass
-      cpi->twopass.maxq_max_limit = cpi->rc.worst_quality;
-      cpi->twopass.maxq_min_limit = cpi->rc.best_quality;
+    // guess at maxq needed in 2nd pass
+    cpi->twopass.maxq_max_limit = cpi->rc.worst_quality;
+    cpi->twopass.maxq_min_limit = cpi->rc.best_quality;
 
-      tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats,
-                             section_target_bandwidth);
+    tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats,
+                           section_target_bandwidth);
 
-      cpi->rc.active_worst_quality = tmp_q;
-      cpi->rc.ni_av_qi = tmp_q;
-      cpi->rc.avg_q = vp9_convert_qindex_to_q(tmp_q);
+    cpi->rc.active_worst_quality = tmp_q;
+    cpi->rc.ni_av_qi = tmp_q;
+    cpi->rc.avg_q = vp9_convert_qindex_to_q(tmp_q);
 
-      // Limit the maxq value returned subsequently.
-      // This increases the risk of overspend or underspend if the initial
-      // estimate for the clip is bad, but helps prevent excessive
-      // variation in Q, especially near the end of a clip
-      // where for example a small overspend may cause Q to crash
-      adjust_maxq_qrange(cpi);
-    }
-
-    // The last few frames of a clip almost always have to few or too many
-    // bits and for the sake of over exact rate control we dont want to make
-    // radical adjustments to the allowed quantizer range just to use up a
-    // few surplus bits or get beneath the target rate.
-    else if ((cpi->common.current_video_frame <
-              (((unsigned int)cpi->twopass.total_stats.count * 255) >> 8)) &&
-             ((cpi->common.current_video_frame + cpi->rc.baseline_gf_interval) <
-              (unsigned int)cpi->twopass.total_stats.count)) {
-      int section_target_bandwidth =
-          (int)(cpi->twopass.bits_left / frames_left);
-      if (frames_left < 1)
-        frames_left = 1;
-
-      tmp_q = estimate_max_q(
-          cpi,
-          &cpi->twopass.total_left_stats,
-          section_target_bandwidth);
-
-      // Make a damped adjustment to active max Q
-      cpi->rc.active_worst_quality =
-          adjust_active_maxq(cpi->rc.active_worst_quality, tmp_q);
-    }
+    // Limit the maxq value returned subsequently.
+    // This increases the risk of overspend or underspend if the initial
+    // estimate for the clip is bad, but helps prevent excessive
+    // variation in Q, especially near the end of a clip
+    // where for example a small overspend may cause Q to crash
+    adjust_maxq_qrange(cpi);
   }
   vp9_zero(this_frame);
   if (EOF == input_stats(cpi, &this_frame))
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index a383164..efdb612 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1066,6 +1066,126 @@
 #undef CHECK_POINT
 #undef CHECK_BETTER
 
+int vp9_full_range_search_c(MACROBLOCK *x, int_mv *ref_mv, int_mv *best_mv,
+                            int search_param, int sad_per_bit, int *num00,
+                            vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
+                            int *mvcost[2], int_mv *center_mv) {
+  const MACROBLOCKD* const xd = &x->e_mbd;
+  uint8_t *what = x->plane[0].src.buf;
+  int what_stride = x->plane[0].src.stride;
+  uint8_t *in_what;
+  int in_what_stride = xd->plane[0].pre[0].stride;
+  uint8_t *best_address;
+
+  int_mv this_mv;
+
+  int bestsad = INT_MAX;
+  int ref_row, ref_col;
+
+  uint8_t *check_here;
+  int thissad;
+  int_mv fcenter_mv;
+
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+  int tr, tc;
+  int best_tr = 0;
+  int best_tc = 0;
+  int range = 64;
+
+  int start_col, end_col;
+  int start_row, end_row;
+  int i;
+
+  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
+  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+
+  clamp_mv(&ref_mv->as_mv,
+           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+  ref_row = ref_mv->as_mv.row;
+  ref_col = ref_mv->as_mv.col;
+  *num00 = 11;
+  best_mv->as_mv.row = ref_row;
+  best_mv->as_mv.col = ref_col;
+
+  // Work out the start point for the search
+  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
+                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
+  best_address = in_what;
+
+  // Check the starting position
+  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
+                + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+                                 mvjsadcost, mvsadcost, sad_per_bit);
+
+  start_row = MAX(-range, x->mv_row_min - ref_row);
+  start_col = MAX(-range, x->mv_col_min - ref_col);
+  end_row = MIN(range, x->mv_row_max - ref_row);
+  end_col = MIN(range, x->mv_col_max - ref_col);
+
+  for (tr = start_row; tr <= end_row; ++tr) {
+    for (tc = start_col; tc <= end_col; tc += 4) {
+      if ((tc + 3) <= end_col) {
+        unsigned int sad_array[4];
+        unsigned char const *addr_ref[4];
+        for (i = 0; i < 4; ++i)
+          addr_ref[i] = in_what + tr * in_what_stride + tc + i;
+
+        fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array);
+
+        for (i = 0; i < 4; ++i) {
+          if (sad_array[i] < bestsad) {
+            this_mv.as_mv.row = ref_row + tr;
+            this_mv.as_mv.col = ref_col + tc + i;
+            thissad = sad_array[i] +
+                      mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+                                      mvjsadcost, mvsadcost, sad_per_bit);
+            if (thissad < bestsad) {
+              bestsad = thissad;
+              best_tr = tr;
+              best_tc = tc + i;
+            }
+          }
+        }
+      } else {
+        for (i = 0; i < end_col - tc; ++i) {
+          check_here = in_what + tr * in_what_stride + tc + i;
+          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
+                                bestsad);
+
+          if (thissad < bestsad) {
+            this_mv.as_mv.row = ref_row + tr;
+            this_mv.as_mv.col = ref_col + tc + i;
+            thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+                                      mvjsadcost, mvsadcost, sad_per_bit);
+
+            if (thissad < bestsad) {
+              bestsad = thissad;
+              best_tr = tr;
+              best_tc = tc + i;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  best_mv->as_mv.row += best_tr;
+  best_mv->as_mv.col += best_tc;
+
+  this_mv.as_mv.row = best_mv->as_mv.row * 8;
+  this_mv.as_mv.col = best_mv->as_mv.col * 8;
+
+  if (bestsad == INT_MAX)
+    return INT_MAX;
+
+  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+                    (unsigned int *)(&thissad)) +
+                       mv_err_cost(&this_mv.as_mv, &center_mv->as_mv,
+                                   mvjcost, mvcost, x->errorperbit);
+}
+
 int vp9_diamond_search_sad_c(MACROBLOCK *x,
                              int_mv *ref_mv, int_mv *best_mv,
                              int search_param, int sad_per_bit, int *num00,
@@ -1111,7 +1231,7 @@
 
   // Work out the start point for the search
   in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
-                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
+                        ref_row * in_what_stride + ref_col);
   best_address = in_what;
 
   // Check the starting position
@@ -1255,7 +1375,7 @@
 
   // Work out the start point for the search
   in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
-                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
+                        ref_row * in_what_stride + ref_col);
   best_address = in_what;
 
   // Check the starting position
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 9d86dd0..b10d9f8 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -689,6 +689,7 @@
 
   switch (mode) {
     case 0:  // This is the best quality mode.
+      cpi->diamond_search_sad = vp9_full_range_search;
       break;
 
     case 1:
@@ -2596,7 +2597,7 @@
                                  vp9_coeff_count *full_count) {
   int i, j, k, l;
 
-  for (i = 0; i < BLOCK_TYPES; ++i)
+  for (i = 0; i < PLANE_TYPES; ++i)
     for (j = 0; j < REF_TYPES; ++j)
       for (k = 0; k < COEF_BANDS; ++k)
         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index fe65688..72e9196 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -469,9 +469,9 @@
 
   nmv_context_counts NMVcount;
 
-  vp9_coeff_count coef_counts[TX_SIZES][BLOCK_TYPES];
-  vp9_coeff_probs_model frame_coef_probs[TX_SIZES][BLOCK_TYPES];
-  vp9_coeff_stats frame_branch_ct[TX_SIZES][BLOCK_TYPES];
+  vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
+  vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES];
+  vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES];
 
   int kf_zeromotion_pct;
   int gf_zeromotion_pct;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index d7aabf8..ecc6e3c 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -122,7 +122,7 @@
 }
 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                           int raster_block, int16_t *base) {
-  const int stride = 4 << b_width_log2(plane_bsize);
+  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   return base + raster_block_offset(plane_bsize, raster_block, stride);
 }
 
@@ -151,11 +151,11 @@
 }
 
 static void fill_token_costs(vp9_coeff_cost *c,
-                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
+                             vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
   int i, j, k, l;
   TX_SIZE t;
   for (t = TX_4X4; t <= TX_32X32; ++t)
-    for (i = 0; i < BLOCK_TYPES; ++i)
+    for (i = 0; i < PLANE_TYPES; ++i)
       for (j = 0; j < REF_TYPES; ++j)
         for (k = 0; k < COEF_BANDS; ++k)
           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
@@ -536,7 +536,7 @@
   int c, cost;
 
   // Check for consistency of tx_size with mode info
-  assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size
+  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                                       : get_uv_tx_size(mbmi) == tx_size);
 
   if (eob == 0) {
@@ -1047,7 +1047,7 @@
                            src, src_stride,
                            dst, dst_stride);
 
-        tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
+        tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
         so = &vp9_scan_orders[TX_4X4][tx_type];
 
         if (tx_type != DCT_DCT)
@@ -1292,7 +1292,7 @@
                                        PICK_MODE_CONTEXT *ctx,
                                        int *rate, int *rate_tokenonly,
                                        int64_t *distortion, int *skippable,
-                                       BLOCK_SIZE bsize) {
+                                       BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
   MB_PREDICTION_MODE mode;
   MB_PREDICTION_MODE mode_selected = DC_PRED;
   int64_t best_rd = INT64_MAX, this_rd;
@@ -1300,8 +1300,7 @@
   int64_t this_distortion, this_sse;
 
   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
-    if (!(cpi->sf.intra_uv_mode_mask[max_uv_txsize_lookup[bsize]]
-          & (1 << mode)))
+    if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
       continue;
 
     x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
@@ -1367,8 +1366,8 @@
 }
 
 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
-                                 BLOCK_SIZE bsize, int *rate_uv,
-                                 int *rate_uv_tokenonly,
+                                 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
+                                 int *rate_uv, int *rate_uv_tokenonly,
                                  int64_t *dist_uv, int *skip_uv,
                                  MB_PREDICTION_MODE *mode_uv) {
   MACROBLOCK *const x = &cpi->mb;
@@ -1383,7 +1382,7 @@
   } else {
     rd_pick_intra_sbuv_mode(cpi, x, ctx,
                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
-                            bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
+                            bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
   }
   *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
 }
@@ -2149,7 +2148,7 @@
     vp9_prob comp_inter_p = 128;
 
     if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) {
-      comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd);
+      comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
       *comp_mode_p = comp_inter_p;
     } else {
       *comp_mode_p = 128;
@@ -2452,7 +2451,8 @@
                                 int mi_row, int mi_col,
                                 int_mv single_newmv[MAX_REF_FRAMES],
                                 int *rate_mv) {
-  int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
+  const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
+  const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
   const int refs[2] = { mbmi->ref_frame[0],
@@ -3027,9 +3027,11 @@
   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
   int y_skip = 0, uv_skip = 0;
   int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
+  TX_SIZE max_uv_tx_size;
   x->skip_encode = 0;
   ctx->skip = 0;
   xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
+
   if (bsize >= BLOCK_8X8) {
     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
                                &dist_y, &y_skip, bsize, tx_cache,
@@ -3037,8 +3039,9 @@
       *returnrate = INT_MAX;
       return;
     }
+    max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
-                            &dist_uv, &uv_skip, bsize);
+                            &dist_uv, &uv_skip, bsize, max_uv_tx_size);
   } else {
     y_skip = 0;
     if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
@@ -3046,8 +3049,9 @@
       *returnrate = INT_MAX;
       return;
     }
+    max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
-                            &dist_uv, &uv_skip, BLOCK_8X8);
+                            &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
   }
 
   if (y_skip && uv_skip) {
@@ -3410,12 +3414,11 @@
       if (rate_y == INT_MAX)
         continue;
 
-      uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]);
+      uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
       if (rate_uv_intra[uv_tx] == INT_MAX) {
-        choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx],
-                             &rate_uv_tokenonly[uv_tx],
-                             &dist_uv[uv_tx], &skip_uv[uv_tx],
-                             &mode_uv[uv_tx]);
+        choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
+                             &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
+                             &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
       }
 
       rate_uv = rate_uv_tokenonly[uv_tx];
@@ -3676,7 +3679,8 @@
                               &rate_uv_tokenonly[uv_tx_size],
                               &dist_uv[uv_tx_size],
                               &skip_uv[uv_tx_size],
-                              bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
+                              bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
+                              uv_tx_size);
     }
   }
 
@@ -4041,7 +4045,8 @@
       distortion2 += distortion_y;
 
       if (rate_uv_intra[TX_4X4] == INT_MAX) {
-        choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4],
+        choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
+                             &rate_uv_intra[TX_4X4],
                              &rate_uv_tokenonly[TX_4X4],
                              &dist_uv[TX_4X4], &skip_uv[TX_4X4],
                              &mode_uv[TX_4X4]);
@@ -4426,7 +4431,7 @@
                               &rate_uv_tokenonly[uv_tx_size],
                               &dist_uv[uv_tx_size],
                               &skip_uv[uv_tx_size],
-                              BLOCK_8X8);
+                              BLOCK_8X8, uv_tx_size);
     }
   }
 
diff --git a/warnings.c b/warnings.c
index 96400db..f76d706 100644
--- a/warnings.c
+++ b/warnings.c
@@ -81,7 +81,8 @@
 
 static void check_quantizer(int min_q, int max_q,
                             struct WarningList *warning_list) {
-  if (min_q == max_q || abs(max_q - min_q) < 8)
+  const int lossless = min_q == 0 && max_q == 0;
+  if (!lossless && (min_q == max_q || abs(max_q - min_q) < 8))
     add_warning(quantizer_warning_string, warning_list);
 }