Merge "Add a placeholder forward buffered ANS coder." into nextgenv2
diff --git a/configure b/configure
index ed1d048..eda83f1 100755
--- a/configure
+++ b/configure
@@ -282,6 +282,7 @@
     ans
     loop_restoration
     ext_partition
+    ext_partition_types
     ext_tile
     obmc
     entropy
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 016fc75..ffa3c64 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -167,9 +167,9 @@
   PREDICTION_MODE mode;
   TX_SIZE tx_size;
 #if CONFIG_VAR_TX
-  // TODO(jingning): This effectively assigned 64 entries for each 8x8 block.
+  // TODO(jingning): This effectively assigned an entry for each 8x8 block.
   // Apparently it takes much more space than needed.
-  TX_SIZE inter_tx_size[64];
+  TX_SIZE inter_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
 #endif
   int8_t skip;
   int8_t has_no_coeffs;
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c
index d9891bb..c4fdd2a 100644
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -1276,9 +1276,9 @@
 #if CONFIG_VAR_TX
       if (is_inter_block(mbmi) && !mbmi->skip)
         tx_size = (plane->plane_type == PLANE_TYPE_UV) ?
-            get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row * 8 + blk_col],
+            get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row][ blk_col],
                                 sb_type, ss_x, ss_y) :
-            mbmi->inter_tx_size[blk_row * 8 + blk_col];
+            mbmi->inter_tx_size[blk_row][blk_col];
 
       tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]);
       tx_size_c = VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & 0x07]);
diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h
index 83a3597..385a3e1 100644
--- a/vp10/common/pred_common.h
+++ b/vp10/common/pred_common.h
@@ -192,9 +192,9 @@
                              TX_SIZE max_tx_size, int ctx) {
   const struct macroblockd_plane *const pd = &xd->plane[0];
   const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
-  int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
-               (blk_col >> (1 - pd->subsampling_x));
-  TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+  const int tx_row = blk_row >> (1 - pd->subsampling_y);
+  const int tx_col = blk_col >> (1 - pd->subsampling_x);
+  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
 
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index d4ac688..84b01e0 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -307,11 +307,11 @@
                                   TX_SIZE tx_size, int *eob_total) {
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
-  int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
-               (blk_col >> (1 - pd->subsampling_x));
-  TX_SIZE plane_tx_size = plane ?
-      get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) :
-      mbmi->inter_tx_size[tx_idx];
+  const int tx_row = blk_row >> (1 - pd->subsampling_y);
+  const int tx_col = blk_col >> (1 - pd->subsampling_x);
+  const TX_SIZE plane_tx_size = plane ?
+      get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
+      mbmi->inter_tx_size[tx_row][tx_col];
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
 
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index eb336be..f52fae4 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -155,7 +155,29 @@
   uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
   mbmi->ref_mv_idx = 0;
 
-  if (xd->ref_mv_count[ref_frame_type] > 2) {
+  if (xd->ref_mv_count[ref_frame_type] > 1 && mbmi->mode == NEWMV) {
+    uint8_t drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 0);
+    vpx_prob drl_prob = cm->fc->drl_prob0[drl_ctx];
+
+    if (!vpx_read(r, drl_prob)) {
+      mbmi->ref_mv_idx = 0;
+      return;
+    }
+    mbmi->ref_mv_idx = 1;
+
+    if (xd->ref_mv_count[ref_frame_type] > 2) {
+      drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 1);
+      drl_prob = cm->fc->drl_prob0[drl_ctx];
+      if (!vpx_read(r, drl_prob)) {
+        mbmi->ref_mv_idx = 1;
+        return;
+      }
+      mbmi->ref_mv_idx = 2;
+    }
+    return;
+  }
+
+  if (xd->ref_mv_count[ref_frame_type] > 2 && mbmi->mode == NEARMV) {
     uint8_t drl0_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 1);
     vpx_prob drl0_prob = cm->fc->drl_prob0[drl0_ctx];
     if (vpx_read(r, drl0_prob)) {
@@ -213,12 +235,15 @@
                                TX_SIZE tx_size, int blk_row, int blk_col,
                                vpx_reader *r) {
   int is_split = 0;
-  const int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+  const int tx_row = blk_row >> 1;
+  const int tx_col = blk_col >> 1;
   int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
-  int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
-                                   xd->left_txfm_context + (blk_row >> 1),
+  int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
+                                   xd->left_txfm_context + tx_row,
                                    tx_size);
+  TX_SIZE (*const inter_tx_size)[MI_BLOCK_SIZE] =
+    (TX_SIZE (*)[MI_BLOCK_SIZE])&mbmi->inter_tx_size[tx_row][tx_col];
 
   if (xd->mb_to_bottom_edge < 0)
     max_blocks_high += xd->mb_to_bottom_edge >> 5;
@@ -239,10 +264,10 @@
       ++counts->txfm_partition[ctx][1];
 
     if (tx_size == TX_8X8) {
-      mbmi->inter_tx_size[tx_idx] = TX_4X4;
-      mbmi->tx_size = mbmi->inter_tx_size[tx_idx];
-      txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
-                            xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+      inter_tx_size[0][0] = TX_4X4;
+      mbmi->tx_size = TX_4X4;
+      txfm_partition_update(xd->above_txfm_context + tx_col,
+                            xd->left_txfm_context + tx_row, TX_4X4);
       return;
     }
 
@@ -256,15 +281,15 @@
     }
   } else {
     int idx, idy;
-    mbmi->inter_tx_size[tx_idx] = tx_size;
+    inter_tx_size[0][0] = tx_size;
     for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
       for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
-        mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
-    mbmi->tx_size = mbmi->inter_tx_size[tx_idx];
+        inter_tx_size[idy][idx] = tx_size;
+    mbmi->tx_size = tx_size;
     if (counts)
       ++counts->txfm_partition[ctx][0];
-    txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
-                          xd->left_txfm_context + (blk_row >> 1), tx_size);
+    txfm_partition_update(xd->above_txfm_context + tx_col,
+                          xd->left_txfm_context + tx_row, tx_size);
   }
 }
 #endif
@@ -1243,7 +1268,7 @@
 #endif  // CONFIG_REF_MV && CONFIG_EXT_INTER
                                    r, mode_ctx);
 #if CONFIG_REF_MV
-      if (mbmi->mode == NEARMV)
+      if (mbmi->mode == NEARMV || mbmi->mode == NEWMV)
         read_drl_idx(cm, xd, mbmi, r);
 #endif
     }
@@ -1436,6 +1461,22 @@
     mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
     mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
   } else {
+    int ref;
+    for (ref = 0; ref < 1 + is_compound && mbmi->mode == NEWMV; ++ref) {
+      int_mv ref_mv = nearestmv[ref];
+#if CONFIG_REF_MV
+      uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+      if (xd->ref_mv_count[ref_frame_type] > 1) {
+        ref_mv = (ref == 0) ?
+            xd->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx].this_mv :
+            xd->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx].comp_mv;
+        clamp_mv_ref(&ref_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+        lower_mv_precision(&ref_mv.as_mv, allow_hp);
+      }
+#endif
+      nearestmv[ref] = ref_mv;
+    }
+
     xd->corrupted |= !assign_mv(cm, xd, mbmi->mode,
 #if CONFIG_REF_MV
                                 0,
@@ -1565,7 +1606,7 @@
         int idx, idy;
         for (idy = 0; idy < height; ++idy)
           for (idx = 0; idx < width; ++idx)
-            mbmi->inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = mbmi->tx_size;
+            mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size;
       }
 
       set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h);
@@ -1584,7 +1625,7 @@
     xd->mi[0]->mbmi.tx_size = xd->supertx_size;
     for (idy = 0; idy < height; ++idy)
       for (idx = 0; idx < width; ++idx)
-        xd->mi[0]->mbmi.inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] =
+        xd->mi[0]->mbmi.inter_tx_size[idy >> 1][idx >> 1] =
             xd->supertx_size;
   }
 #endif  // CONFIG_VAR_TX
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index f20c224..386a17e 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -193,7 +193,31 @@
                           const MB_MODE_INFO_EXT *mbmi_ext,
                           vpx_writer *w) {
   uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
-  if (mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
+
+  assert(mbmi->ref_mv_idx < 3);
+
+  if (mbmi_ext->ref_mv_count[ref_frame_type] > 1 && mbmi->mode == NEWMV) {
+    uint8_t drl_ctx =
+        vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 0);
+    vpx_prob drl_prob = cm->fc->drl_prob0[drl_ctx];
+
+    vpx_write(w, mbmi->ref_mv_idx != 0, drl_prob);
+    if (mbmi->ref_mv_idx == 0)
+      return;
+
+    if (mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
+      drl_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
+      drl_prob = cm->fc->drl_prob0[drl_ctx];
+      vpx_write(w, mbmi->ref_mv_idx != 1, drl_prob);
+    }
+    if (mbmi->ref_mv_idx == 1)
+      return;
+
+    assert(mbmi->ref_mv_idx == 2);
+    return;
+  }
+
+  if (mbmi_ext->ref_mv_count[ref_frame_type] > 2 && mbmi->mode == NEARMV) {
     uint8_t drl0_ctx =
         vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
     vpx_prob drl0_prob = cm->fc->drl_prob0[drl0_ctx];
@@ -266,11 +290,12 @@
                                 const MB_MODE_INFO *mbmi,
                                 TX_SIZE tx_size, int blk_row, int blk_col,
                                 vpx_writer *w) {
-  const int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+  const int tx_row = blk_row >> 1;
+  const int tx_col = blk_col >> 1;
   int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
-  int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
-                                   xd->left_txfm_context + (blk_row >> 1),
+  int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
+                                   xd->left_txfm_context + tx_row,
                                    tx_size);
 
   if (xd->mb_to_bottom_edge < 0)
@@ -281,10 +306,10 @@
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
      return;
 
-  if (tx_size == mbmi->inter_tx_size[tx_idx]) {
+  if (tx_size == mbmi->inter_tx_size[tx_row][tx_col]) {
     vpx_write(w, 0, cm->fc->txfm_partition_prob[ctx]);
-    txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
-                          xd->left_txfm_context + (blk_row >> 1), tx_size);
+    txfm_partition_update(xd->above_txfm_context + tx_col,
+                          xd->left_txfm_context + tx_row, tx_size);
   } else {
     const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
     int bsl = b_width_log2_lookup[bsize];
@@ -292,8 +317,8 @@
     vpx_write(w, 1, cm->fc->txfm_partition_prob[ctx]);
 
     if (tx_size == TX_8X8) {
-      txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
-                            xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+      txfm_partition_update(xd->above_txfm_context + tx_col,
+                            xd->left_txfm_context + tx_row, TX_4X4);
       return;
     }
 
@@ -706,11 +731,11 @@
                            int blk_row, int blk_col, TX_SIZE tx_size) {
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
-  int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
-               (blk_col >> (1 - pd->subsampling_x));
-  TX_SIZE plane_tx_size = plane ?
-      get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) :
-      mbmi->inter_tx_size[tx_idx];
+  const int tx_row = blk_row >> (1 - pd->subsampling_y);
+  const int tx_col = blk_col >> (1 - pd->subsampling_x);
+  const TX_SIZE plane_tx_size = plane ?
+      get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
+      mbmi->inter_tx_size[tx_row][tx_col];
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
 
@@ -1088,7 +1113,7 @@
                          mode_ctx);
 
 #if CONFIG_REF_MV
-        if (mode == NEARMV)
+        if (mode == NEARMV || mode == NEWMV)
           write_drl_idx(cm, mbmi, mbmi_ext, w);
 #endif
       }
@@ -1175,13 +1200,15 @@
 #else
       if (mode == NEWMV) {
 #endif  // CONFIG_EXT_INTER
+        int_mv ref_mv;
         for (ref = 0; ref < 1 + is_compound; ++ref) {
 #if CONFIG_REF_MV
-              int nmv_ctx =
-                  vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[ref]],
-                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[ref]]);
-              const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+          int nmv_ctx =
+              vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[ref]],
+                           mbmi_ext->ref_mv_stack[mbmi->ref_frame[ref]]);
+          const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
 #endif
+          ref_mv = mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0];
 #if CONFIG_EXT_INTER
           if (mode == NEWFROMNEARMV)
             vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
@@ -1190,8 +1217,8 @@
           else
 #endif  // CONFIG_EXT_INTER
           vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
-                        &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc,
-                        allow_hp);
+                         &ref_mv.as_mv, nmvc,
+                         allow_hp);
         }
 #if CONFIG_EXT_INTER
       } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
@@ -1457,7 +1484,7 @@
       assert(*tok < tok_end);
       pack_palette_tokens(w, tok, m->mbmi.palette_mode_info.palette_size[plane],
                           rows * cols - 1);
-      assert(*tok < tok_end);
+      assert(*tok < tok_end + m->mbmi.skip);
     }
   }
 
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index ce650b1..295213f 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -164,12 +164,12 @@
   int quant_fp;
 
   // skip forward transform and quantization
-  uint8_t skip_txfm[MAX_MB_PLANE << 2];
+  uint8_t skip_txfm[MAX_MB_PLANE][4];
   #define SKIP_TXFM_NONE 0
   #define SKIP_TXFM_AC_DC 1
   #define SKIP_TXFM_AC_ONLY 2
 
-  int64_t bsse[MAX_MB_PLANE << 2];
+  int64_t bsse[MAX_MB_PLANE][4];
 
   // Used to store sub partition's choices.
   MV pred_mv[MAX_REF_FRAMES];
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index ec00b62..2c47be9 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -1085,6 +1085,10 @@
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
   int max_plane;
 
+#if CONFIG_REF_MV
+  int8_t rf_type;
+#endif
+
 #if !CONFIG_SUPERTX
   assert(mi->mbmi.sb_type == bsize);
 #endif
@@ -1092,6 +1096,23 @@
   *mi_addr = *mi;
   *x->mbmi_ext = ctx->mbmi_ext;
 
+#if CONFIG_REF_MV
+  rf_type = vp10_ref_frame_type(mbmi->ref_frame);
+  if (x->mbmi_ext->ref_mv_count[rf_type] > 1 &&
+      mbmi->sb_type >= BLOCK_8X8 &&
+      mbmi->mode == NEWMV) {
+    for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+      int_mv this_mv = (i == 0) ?
+          x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].this_mv :
+          x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].comp_mv;
+      clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+      lower_mv_precision(&this_mv.as_mv, cm->allow_high_precision_mv);
+      x->mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0] = this_mv;
+      mbmi->pred_mv[i] = this_mv;
+    }
+  }
+#endif
+
   // If segmentation in use
   if (seg->enabled) {
     // For in frame complexity AQ copy the segment id from the segment map.
@@ -1231,11 +1252,32 @@
       cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
   int w, h;
 
+#if CONFIG_REF_MV
+  int8_t rf_type;
+#endif
+
   *mi_addr = *mi;
   *x->mbmi_ext = ctx->mbmi_ext;
   assert(is_inter_block(mbmi));
   assert(mbmi->tx_size == ctx->mic.mbmi.tx_size);
 
+#if CONFIG_REF_MV
+  rf_type = vp10_ref_frame_type(mbmi->ref_frame);
+  if (x->mbmi_ext->ref_mv_count[rf_type] > 1 &&
+      mbmi->sb_type >= BLOCK_8X8 &&
+      mbmi->mode == NEWMV) {
+    for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+      int_mv this_mv = (i == 0) ?
+          x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].this_mv :
+          x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].comp_mv;
+      clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+      lower_mv_precision(&this_mv.as_mv, cm->allow_high_precision_mv);
+      x->mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0] = this_mv;
+      mbmi->pred_mv[i] = this_mv;
+    }
+  }
+#endif
+
   // If segmentation in use
   if (seg->enabled && output_enabled) {
     // For in frame complexity AQ copy the segment id from the segment map.
@@ -1287,7 +1329,7 @@
     int idy, idx;
     for (idy = 0; idy < (1 << mtx) / 2; ++idy)
       for (idx = 0; idx < (1 << mtx) / 2; ++idx)
-        mbmi->inter_tx_size[(idy << 3) + idx] = mbmi->tx_size;
+        mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
   }
 #endif  // CONFIG_VAR_TX
 #if CONFIG_OBMC
@@ -1941,15 +1983,24 @@
   }
 }
 
-static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
-                            ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
-                            ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
-                            PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
+
+typedef struct {
+  ENTROPY_CONTEXT a[16 * MAX_MB_PLANE];
+  ENTROPY_CONTEXT l[16 * MAX_MB_PLANE];
+  PARTITION_CONTEXT sa[8];
+  PARTITION_CONTEXT sl[8];
 #if CONFIG_VAR_TX
-                            TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8],
+  TXFM_CONTEXT *p_ta;
+  TXFM_CONTEXT *p_tl;
+  TXFM_CONTEXT ta[8];
+  TXFM_CONTEXT tl[8];
 #endif
-                            BLOCK_SIZE bsize) {
-  MACROBLOCKD *const xd = &x->e_mbd;
+} RD_SEARCH_MACROBLOCK_CONTEXT;
+
+static void restore_context(MACROBLOCK *x,
+                            const RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
+                            int mi_row, int mi_col, BLOCK_SIZE bsize) {
+  MACROBLOCKD *xd = &x->e_mbd;
   int p;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
@@ -1958,37 +2009,34 @@
   for (p = 0; p < MAX_MB_PLANE; p++) {
     memcpy(
         xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
-        a + num_4x4_blocks_wide * p,
+        ctx->a + num_4x4_blocks_wide * p,
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
         xd->plane[p].subsampling_x);
     memcpy(
         xd->left_context[p]
             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
-        l + num_4x4_blocks_high * p,
+        ctx->l + num_4x4_blocks_high * p,
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
         xd->plane[p].subsampling_y);
   }
-  memcpy(xd->above_seg_context + mi_col, sa,
+  memcpy(xd->above_seg_context + mi_col, ctx->sa,
          sizeof(*xd->above_seg_context) * mi_width);
-  memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
+  memcpy(xd->left_seg_context + (mi_row & MI_MASK), ctx->sl,
          sizeof(xd->left_seg_context[0]) * mi_height);
 #if CONFIG_VAR_TX
-  memcpy(xd->above_txfm_context, ta,
+  xd->above_txfm_context = ctx->p_ta;
+  xd->left_txfm_context = ctx->p_tl;
+  memcpy(xd->above_txfm_context, ctx->ta,
          sizeof(*xd->above_txfm_context) * mi_width);
-  memcpy(xd->left_txfm_context, tl,
+  memcpy(xd->left_txfm_context, ctx->tl,
          sizeof(*xd->left_txfm_context) * mi_height);
 #endif
 }
 
-static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
-                         ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
-                         ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
-                         PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
-#if CONFIG_VAR_TX
-                         TXFM_CONTEXT ta[8], TXFM_CONTEXT tl[8],
-#endif
-                         BLOCK_SIZE bsize) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
+static void save_context(const MACROBLOCK *x,
+                         RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
+                         int mi_row, int mi_col, BLOCK_SIZE bsize) {
+  const MACROBLOCKD *xd = &x->e_mbd;
   int p;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
@@ -1998,26 +2046,28 @@
   // buffer the above/left context information of the block in search.
   for (p = 0; p < MAX_MB_PLANE; ++p) {
     memcpy(
-        a + num_4x4_blocks_wide * p,
+        ctx->a + num_4x4_blocks_wide * p,
         xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
         xd->plane[p].subsampling_x);
     memcpy(
-        l + num_4x4_blocks_high * p,
+        ctx->l + num_4x4_blocks_high * p,
         xd->left_context[p]
             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
         xd->plane[p].subsampling_y);
   }
-  memcpy(sa, xd->above_seg_context + mi_col,
+  memcpy(ctx->sa, xd->above_seg_context + mi_col,
          sizeof(*xd->above_seg_context) * mi_width);
-  memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
+  memcpy(ctx->sl, xd->left_seg_context + (mi_row & MI_MASK),
          sizeof(xd->left_seg_context[0]) * mi_height);
 #if CONFIG_VAR_TX
-  memcpy(ta, xd->above_txfm_context,
+  memcpy(ctx->ta, xd->above_txfm_context,
          sizeof(*xd->above_txfm_context) * mi_width);
-  memcpy(tl, xd->left_txfm_context,
+  memcpy(ctx->tl, xd->left_txfm_context,
          sizeof(*xd->left_txfm_context) * mi_height);
+  ctx->p_ta = xd->above_txfm_context;
+  ctx->p_tl = xd->left_txfm_context;
 #endif
 }
 
@@ -2299,11 +2349,7 @@
   int i, pl;
   PARTITION_TYPE partition = PARTITION_NONE;
   BLOCK_SIZE subsize;
-  ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
-  PARTITION_CONTEXT sl[8], sa[8];
-#if CONFIG_VAR_TX
-  TXFM_CONTEXT tl[8], ta[8];
-#endif
+  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
   RD_COST last_part_rdc, none_rdc, chosen_rdc;
   BLOCK_SIZE sub_subsize = BLOCK_4X4;
   int splits_below = 0;
@@ -2329,16 +2375,14 @@
   partition = partition_lookup[bsl][bs_type];
   subsize = get_subsize(bsize, partition);
 
+  pc_tree->partitioning = partition;
+
 #if CONFIG_VAR_TX
   xd->above_txfm_context = cm->above_txfm_context + mi_col;
   xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
 #endif
-  pc_tree->partitioning = partition;
-  save_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
-               ta, tl,
-#endif
-               bsize);
+
+  save_context(x, &x_ctx, mi_row, mi_col, bsize);
 
   if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) {
     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -2384,11 +2428,8 @@
 #endif
       }
 
-      restore_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
-                      ta, tl,
-#endif
-                      bsize);
+      restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
       mi_8x8[0]->mbmi.sb_type = bs_type;
       pc_tree->partitioning = partition;
     }
@@ -2556,11 +2597,9 @@
 #if CONFIG_SUPERTX
     chosen_rate_nocoef = 0;
 #endif
-    restore_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
-                    ta, tl,
-#endif
-                    bsize);
+
+    restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
     pc_tree->partitioning = PARTITION_SPLIT;
 
     // Split partition.
@@ -2571,20 +2610,12 @@
 #if CONFIG_SUPERTX
       int rt_nocoef = 0;
 #endif
-      ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
-      PARTITION_CONTEXT sl[8], sa[8];
-#if CONFIG_VAR_TX
-      TXFM_CONTEXT tl[8], ta[8];
-#endif
+      RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
 
       if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
         continue;
 
-      save_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
-                   ta, tl,
-#endif
-                   bsize);
+      save_context(x, &x_ctx, mi_row, mi_col, bsize);
       pc_tree->split[i]->partitioning = PARTITION_NONE;
       rd_pick_sb_modes(cpi, tile_data, x,
                        mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
@@ -2593,11 +2624,7 @@
 #endif
                        split_subsize, &pc_tree->split[i]->none, INT64_MAX);
 
-      restore_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
-                      ta, tl,
-#endif
-                      bsize);
+      restore_context(x, &x_ctx, mi_row, mi_col, bsize);
 
       if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
         vp10_rd_cost_reset(&chosen_rdc);
@@ -2655,15 +2682,7 @@
 #endif
   }
 
-#if CONFIG_VAR_TX
-  xd->above_txfm_context = cm->above_txfm_context + mi_col;
-  xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
-#endif
-  restore_context(x, mi_row, mi_col, a, l, sa, sl,
-#if CONFIG_VAR_TX
-                  ta, tl,
-#endif
-                  bsize);
+  restore_context(x, &x_ctx, mi_row, mi_col, bsize);
 
   // We must have chosen a partitioning and encoding or we'll fail later on.
   // No other opportunities for success.
@@ -2939,11 +2958,7 @@
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
-  ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
-  PARTITION_CONTEXT sl[8], sa[8];
-#if CONFIG_VAR_TX
-  TXFM_CONTEXT tl[8], ta[8];
-#endif
+  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
   TOKENEXTRA *tp_orig = *tp;
   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
   int i;
@@ -3020,11 +3035,10 @@
 #if CONFIG_VAR_TX
   xd->above_txfm_context = cm->above_txfm_context + mi_col;
   xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
-  save_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
-  save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
 #endif
 
+  save_context(x, &x_ctx, mi_row, mi_col, bsize);
+
 #if CONFIG_FP_MB_STATS
   if (cpi->use_fp_mb_stats) {
     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -3178,13 +3192,8 @@
 #endif
       }
     }
-#if CONFIG_VAR_TX
-    xd->above_txfm_context = cm->above_txfm_context + mi_col;
-    xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
-    restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
-    restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif  // CONFIG_VAR_TX
+
+    restore_context(x, &x_ctx, mi_row, mi_col, bsize);
   }
 
   // store estimated motion vector
@@ -3232,14 +3241,9 @@
           TX_TYPE best_tx = DCT_DCT;
           tmp_rate = sum_rate_nocoef;
           tmp_dist = 0;
-#if CONFIG_VAR_TX
-          xd->above_txfm_context = cm->above_txfm_context + mi_col;
-          xd->left_txfm_context =
-              xd->left_txfm_context_buffer + (mi_row & MI_MASK);
-          restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
-          restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif  // CONFIG_VAR_TX
+
+          restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
           rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
                         &tmp_rate, &tmp_dist,
                         &best_tx,
@@ -3326,14 +3330,9 @@
 
           tmp_rate = sum_rate_nocoef;
           tmp_dist = 0;
-#if CONFIG_VAR_TX
-          xd->above_txfm_context = cm->above_txfm_context + mi_col;
-          xd->left_txfm_context =
-              xd->left_txfm_context_buffer + (mi_row & MI_MASK);
-          restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
-          restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif  // CONFIG_VAR_TX
+
+          restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
           rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
                         &tmp_rate, &tmp_dist,
                         &best_tx,
@@ -3380,13 +3379,8 @@
       if (cpi->sf.less_rectangular_check)
         do_rect &= !partition_none_allowed;
     }
-#if CONFIG_VAR_TX
-    xd->above_txfm_context = cm->above_txfm_context + mi_col;
-    xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
-    restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
-    restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif
+
+    restore_context(x, &x_ctx, mi_row, mi_col, bsize);
   }  // if (do_split)
 
   // PARTITION_HORZ
@@ -3466,13 +3460,9 @@
         TX_TYPE best_tx = DCT_DCT;
         tmp_rate = sum_rate_nocoef;
         tmp_dist = 0;
-#if CONFIG_VAR_TX
-        xd->above_txfm_context = cm->above_txfm_context + mi_col;
-        xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
-        restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
-        restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif  // CONFIG_VAR_TX
+
+        restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
         rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
                       &tmp_rate, &tmp_dist,
                       &best_tx,
@@ -3511,13 +3501,8 @@
         pc_tree->partitioning = PARTITION_HORZ;
       }
     }
-#if CONFIG_VAR_TX
-    xd->above_txfm_context = cm->above_txfm_context + mi_col;
-    xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
-    restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
-    restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif
+
+    restore_context(x, &x_ctx, mi_row, mi_col, bsize);
   }
   // PARTITION_VERT
   if (partition_vert_allowed &&
@@ -3594,13 +3579,9 @@
 
         tmp_rate = sum_rate_nocoef;
         tmp_dist = 0;
-#if CONFIG_VAR_TX
-        xd->above_txfm_context = cm->above_txfm_context + mi_col;
-        xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07);
-        restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
-        restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif  // CONFIG_VAR_TX
+
+        restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
         rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
                       &tmp_rate, &tmp_dist,
                       &best_tx,
@@ -3640,13 +3621,8 @@
         pc_tree->partitioning = PARTITION_VERT;
       }
     }
-#if CONFIG_VAR_TX
-    xd->above_txfm_context = cm->above_txfm_context + mi_col;
-    xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MI_MASK);
-    restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize);
-#else
-    restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#endif
+
+    restore_context(x, &x_ctx, mi_row, mi_col, bsize);
   }
 
   // TODO(jbb): This code added so that we avoid static analysis
@@ -4218,13 +4194,14 @@
                               FRAME_COUNTS *counts,
                               TX_SIZE tx_size, int blk_row, int blk_col) {
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+  const int tx_row = blk_row >> 1;
+  const int tx_col = blk_col >> 1;
   int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
-  int ctx = txfm_partition_context(xd->above_txfm_context + (blk_col >> 1),
-                                   xd->left_txfm_context + (blk_row >> 1),
+  int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
+                                   xd->left_txfm_context + tx_row,
                                    tx_size);
-  TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
 
   if (xd->mb_to_bottom_edge < 0)
     max_blocks_high += xd->mb_to_bottom_edge >> 5;
@@ -4237,8 +4214,8 @@
   if (tx_size == plane_tx_size) {
     ++counts->txfm_partition[ctx][0];
     mbmi->tx_size = tx_size;
-    txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
-                          xd->left_txfm_context + (blk_row >> 1), tx_size);
+    txfm_partition_update(xd->above_txfm_context + tx_col,
+                          xd->left_txfm_context + tx_row, tx_size);
   } else {
     BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
     int bh = num_4x4_blocks_high_lookup[bsize];
@@ -4246,10 +4223,10 @@
     ++counts->txfm_partition[ctx][1];
 
     if (tx_size == TX_8X8) {
-      mbmi->inter_tx_size[tx_idx] = TX_4X4;
+      mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
       mbmi->tx_size = TX_4X4;
-      txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
-                            xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+      txfm_partition_update(xd->above_txfm_context + tx_col,
+                            xd->left_txfm_context + tx_row, TX_4X4);
       return;
     }
 
@@ -4285,10 +4262,11 @@
 static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size,
                              int blk_row, int blk_col) {
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  int tx_idx = (blk_row >> 1) * 8 + (blk_col >> 1);
+  const int tx_row = blk_row >> 1;
+  const int tx_col = blk_col >> 1;
   int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
-  TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx];
+  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
 
   if (xd->mb_to_bottom_edge < 0)
     max_blocks_high += xd->mb_to_bottom_edge >> 5;
@@ -4300,8 +4278,8 @@
 
   if (tx_size == plane_tx_size) {
     mbmi->tx_size = tx_size;
-    txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
-                          xd->left_txfm_context + (blk_row >> 1), tx_size);
+    txfm_partition_update(xd->above_txfm_context + tx_col,
+                          xd->left_txfm_context + tx_row, tx_size);
 
   } else {
     BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
@@ -4309,10 +4287,10 @@
     int i;
 
     if (tx_size == TX_8X8) {
-      mbmi->inter_tx_size[tx_idx] = TX_4X4;
+      mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
       mbmi->tx_size = TX_4X4;
-      txfm_partition_update(xd->above_txfm_context + (blk_col >> 1),
-                            xd->left_txfm_context + (blk_row >> 1), TX_4X4);
+      txfm_partition_update(xd->above_txfm_context + tx_col,
+                            xd->left_txfm_context + tx_row, TX_4X4);
       return;
     }
 
@@ -5271,7 +5249,7 @@
   // to reuse distortion values from the RD estimation, so we reset these
   // flags here before evaluating RD for supertx coding.
   for (plane = 0 ; plane < MAX_MB_PLANE ; plane++)
-    x->skip_txfm[plane << 2] = SKIP_TXFM_NONE;
+    x->skip_txfm[plane][0] = SKIP_TXFM_NONE;
 
   mbmi = &xd->mi[0]->mbmi;
   best_tx_nostx = mbmi->tx_type;
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 700088c..c42b7f1 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -449,7 +449,7 @@
 #endif
     if (x->quant_fp) {
       // Encoding process for rtc mode
-      if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
+      if (x->skip_txfm[0][0] == SKIP_TXFM_AC_DC && plane == 0) {
         // skip forward transform
         p->eobs[block] = 0;
         *a = *l = 0;
@@ -460,12 +460,12 @@
       }
     } else {
       if (max_txsize_lookup[plane_bsize] == tx_size) {
-        int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
-        if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
+        int blk_index = (block >> (tx_size << 1));
+        if (x->skip_txfm[plane][blk_index] == SKIP_TXFM_NONE) {
           // full forward transform and quantization
           vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
                            tx_size, VP10_XFORM_QUANT_B);
-        } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
+        } else if (x->skip_txfm[plane][blk_index] == SKIP_TXFM_AC_ONLY) {
           // fast path forward transform and quantization
           vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize,
                            tx_size, VP10_XFORM_QUANT_DC);
@@ -558,12 +558,11 @@
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
   const struct macroblockd_plane *const pd = &xd->plane[plane];
-  int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
-                (blk_col >> (1 - pd->subsampling_x));
-  TX_SIZE plane_tx_size = plane ?
-      get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize,
-                          0, 0) :
-      mbmi->inter_tx_size[blk_idx];
+  const int tx_row = blk_row >> (1 - pd->subsampling_y);
+  const int tx_col = blk_col >> (1 - pd->subsampling_x);
+  const TX_SIZE plane_tx_size = plane ?
+      get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
+      mbmi->inter_tx_size[tx_row][tx_col];
 
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 01b5abb..3d92591 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -603,17 +603,6 @@
          dct_vs_dst(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
 }
 
-static int prune_three_for_sby(const VP10_COMP *cpi,
-                               BLOCK_SIZE bsize,
-                               MACROBLOCK *x,
-                               MACROBLOCKD *xd) {
-  (void) cpi;
-  (void) bsize;
-  (void) x;
-  (void) xd;
-  return 0;
-}
-
 #endif  // CONFIG_EXT_TX
 
 // Performance drop: 0.3%, Speed improvement: 5%
@@ -644,9 +633,6 @@
     case PRUNE_TWO :
       return prune_two_for_sby(cpi, bsize, x, xd);
       break;
-    case PRUNE_THREE :
-      return prune_three_for_sby(cpi, bsize, x, xd);
-      break;
   #endif
   }
   assert(0);
@@ -764,18 +750,18 @@
 
         var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                         dst, pd->dst.stride, &sse);
-        x->bsse[(i << 2) + block_idx] = sse;
+        x->bsse[i][block_idx] = sse;
         sum_sse += sse;
 
-        x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
+        x->skip_txfm[i][block_idx] = SKIP_TXFM_NONE;
         if (!x->select_tx_size) {
           // Check if all ac coefficients can be quantized to zero.
           if (var < ac_thr || var == 0) {
-            x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
+            x->skip_txfm[i][block_idx] = SKIP_TXFM_AC_ONLY;
 
             // Check if dc coefficient can be quantized to zero.
             if (sse - var < dc_thr || sse == var) {
-              x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
+              x->skip_txfm[i][block_idx] = SKIP_TXFM_AC_DC;
 
               if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
                 low_err_skip = 1;
@@ -1168,21 +1154,21 @@
       dist = (int64_t)tmp * 16;
     }
   } else if (max_txsize_lookup[plane_bsize] == tx_size) {
-    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
+    if (x->skip_txfm[plane][block >> (tx_size << 1)] ==
         SKIP_TXFM_NONE) {
       // full forward transform and quantization
       vp10_xform_quant(x, plane, block, blk_row, blk_col,
                        plane_bsize, tx_size, VP10_XFORM_QUANT_B);
       dist_block(args->cpi, x, plane, block, blk_row, blk_col,
                  tx_size, &dist, &sse);
-    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
+    } else if (x->skip_txfm[plane][block >> (tx_size << 1)] ==
                SKIP_TXFM_AC_ONLY) {
       // compute DC coefficient
       tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
       tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
       vp10_xform_quant(x, plane, block, blk_row, blk_col,
                           plane_bsize, tx_size, VP10_XFORM_QUANT_DC);
-      sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
+      sse  = x->bsse[plane][block >> (tx_size << 1)] << 4;
       dist = sse;
       if (x->plane[plane].eobs[block]) {
         const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
@@ -1200,7 +1186,7 @@
       // SKIP_TXFM_AC_DC
       // skip forward transform
       x->plane[plane].eobs[block] = 0;
-      sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
+      sse  = x->bsse[plane][block >> (tx_size << 1)] << 4;
       dist = sse;
     }
   } else {
@@ -2921,8 +2907,10 @@
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
-               (blk_col >> (1 - pd->subsampling_x));
+  const int tx_row = blk_row >> (1 - pd->subsampling_y);
+  const int tx_col = blk_col >> (1 - pd->subsampling_x);
+  TX_SIZE (*const inter_tx_size)[MI_BLOCK_SIZE] =
+    (TX_SIZE (*)[MI_BLOCK_SIZE])&mbmi->inter_tx_size[tx_row][tx_col];
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
   int64_t this_rd = INT64_MAX;
@@ -2985,7 +2973,7 @@
       x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN];
 
   if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
-    mbmi->inter_tx_size[tx_idx] = tx_size;
+    inter_tx_size[0][0] = tx_size;
     vp10_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
                        plane_bsize, coeff_ctx, rate, dist, bsse, skip);
 
@@ -3048,11 +3036,10 @@
       pta[i] = ptl[i] = !(tmp_eob == 0);
     txfm_partition_update(tx_above + (blk_col >> 1),
                           tx_left + (blk_row >> 1), tx_size);
-    mbmi->inter_tx_size[tx_idx] = tx_size;
-
+    inter_tx_size[0][0] = tx_size;
     for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
       for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
-        mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
+        inter_tx_size[idy][idx] = tx_size;
     mbmi->tx_size = tx_size;
     if (this_rd == INT64_MAX)
       *is_cost_valid = 0;
@@ -3152,7 +3139,7 @@
   vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
   int s0 = vp10_cost_bit(skip_prob, 0);
   int s1 = vp10_cost_bit(skip_prob, 1);
-  TX_SIZE best_tx_size[64];
+  TX_SIZE best_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
   TX_SIZE best_tx = TX_SIZES;
   uint8_t best_blk_skip[256];
   const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
@@ -3267,14 +3254,14 @@
       memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
       for (idy = 0; idy < xd->n8_h; ++idy)
         for (idx = 0; idx < xd->n8_w; ++idx)
-          best_tx_size[idy * 8 + idx] = mbmi->inter_tx_size[idy * 8 + idx];
+          best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
     }
   }
 
   mbmi->tx_type = best_tx_type;
   for (idy = 0; idy < xd->n8_h; ++idy)
     for (idx = 0; idx < xd->n8_w; ++idx)
-      mbmi->inter_tx_size[idy * 8 + idx] = best_tx_size[idy * 8 + idx];
+      mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
   mbmi->tx_size = best_tx;
   memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
 }
@@ -3289,12 +3276,11 @@
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
-  int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
-               (blk_col >> (1 - pd->subsampling_x));
-  TX_SIZE plane_tx_size = plane ?
-      get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize,
-                          0, 0) :
-      mbmi->inter_tx_size[tx_idx];
+  const int tx_row = blk_row >> (1 - pd->subsampling_y);
+  const int tx_col = blk_col >> (1 - pd->subsampling_x);
+  const TX_SIZE plane_tx_size = plane ?
+      get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
+      mbmi->inter_tx_size[tx_row][tx_col];
 
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
@@ -6073,8 +6059,8 @@
   int orig_dst_stride[MAX_MB_PLANE];
   int rs = 0;
   INTERP_FILTER best_filter = SWITCHABLE;
-  uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
-  int64_t bsse[MAX_MB_PLANE << 2] = {0};
+  uint8_t skip_txfm[MAX_MB_PLANE][4] = {{0}};
+  int64_t bsse[MAX_MB_PLANE][4] = {{0}};
 
   int skip_txfm_sb = 0;
   int64_t skip_sse_sb = INT64_MAX;
@@ -6923,7 +6909,7 @@
                       bsize, ref_best_rd);
       for (idy = 0; idy < xd->n8_h; ++idy)
         for (idx = 0; idx < xd->n8_w; ++idx)
-          mbmi->inter_tx_size[idy * 8 + idx] = mbmi->tx_size;
+          mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
     }
 #else
     super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
@@ -7998,6 +7984,13 @@
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
     } else {
+#if CONFIG_REF_MV
+      int_mv backup_ref_mv[2];
+
+      backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
+      if (comp_pred)
+        backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
+#endif
 #if CONFIG_EXT_INTER
       if (second_ref_frame == INTRA_FRAME) {
         mbmi->interintra_mode = best_intra_mode;
@@ -8016,6 +8009,19 @@
 #if CONFIG_REF_MV
       mbmi->ref_mv_idx = 0;
       ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+
+      if (this_mode == NEWMV &&
+          mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+        int ref;
+        for (ref = 0; ref < 1 + comp_pred; ++ref) {
+          int_mv this_mv = (ref == 0) ?
+              mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv :
+              mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
+          clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+          lower_mv_precision(&this_mv.as_mv, cm->allow_high_precision_mv);
+          mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
+        }
+      }
 #endif
       this_rd = handle_inter_mode(cpi, x, bsize,
                                   &rate2, &distortion2, &skippable,
@@ -8041,20 +8047,38 @@
 #if CONFIG_REF_MV
       // TODO(jingning): This needs some refactoring to improve code quality
       // and reduce redundant steps.
-      if (mbmi->mode == NEARMV &&
-          mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
+      if ((mbmi->mode == NEARMV &&
+           mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
+          (mbmi->mode == NEWMV &&
+           mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
         int_mv backup_mv = frame_mv[NEARMV][ref_frame];
-        int_mv cur_mv = mbmi_ext->ref_mv_stack[ref_frame][2].this_mv;
         MB_MODE_INFO backup_mbmi = *mbmi;
         int backup_skip = x->skip;
-
         int64_t tmp_ref_rd = this_rd;
         int ref_idx;
-        int ref_set = VPXMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 2);
 
-        uint8_t drl0_ctx =
-            vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
-        rate2 += cpi->drl_mode_cost0[drl0_ctx][0];
+        // TODO(jingning): This should be deprecated shortly.
+        int idx_offset = (mbmi->mode == NEARMV) ? 1 : 0;
+
+        int ref_set =
+            VPXMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
+
+        uint8_t drl0_ctx = 0;
+        uint8_t drl_ctx = 0;
+
+        // Dummy
+        int_mv backup_fmv[2];
+        backup_fmv[0] = frame_mv[NEWMV][ref_frame];
+        if (comp_pred)
+          backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
+
+        if (mbmi->mode == NEARMV) {
+          drl0_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
+          rate2 += cpi->drl_mode_cost0[drl0_ctx][0];
+        } else {
+          drl_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 0);
+          rate2 += cpi->drl_mode_cost0[drl_ctx][0];
+        }
 
         if (this_rd < INT64_MAX) {
           if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
@@ -8080,8 +8104,24 @@
           int tmp_skip = 1;
           int64_t tmp_dist = 0, tmp_sse = 0;
           int dummy_disable_skip = 0;
+          int ref;
+          int_mv cur_mv;
 
-          cur_mv = mbmi_ext->ref_mv_stack[ref_frame][2 + ref_idx].this_mv;
+          mbmi->ref_mv_idx = 1 + ref_idx;
+
+          for (ref = 0; ref < 1 + comp_pred; ++ref) {
+            int_mv this_mv = (ref == 0) ?
+                mbmi_ext->ref_mv_stack[ref_frame_type]
+                                      [mbmi->ref_mv_idx].this_mv :
+                mbmi_ext->ref_mv_stack[ref_frame_type]
+                                      [mbmi->ref_mv_idx].comp_mv;
+            clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+            lower_mv_precision(&this_mv.as_mv, cm->allow_high_precision_mv);
+            mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
+          }
+
+          cur_mv = mbmi_ext->ref_mv_stack[ref_frame]
+                                 [mbmi->ref_mv_idx + idx_offset].this_mv;
           lower_mv_precision(&cur_mv.as_mv, cm->allow_high_precision_mv);
           clamp_mv2(&cur_mv.as_mv, xd);
 
@@ -8100,7 +8140,6 @@
 #else
             int_mv dummy_single_newmv[MAX_REF_FRAMES] = { { 0 } };
 #endif
-            mbmi->ref_mv_idx = 1 + ref_idx;
 
             frame_mv[NEARMV][ref_frame] = cur_mv;
             tmp_alt_rd = handle_inter_mode(cpi, x, bsize,
@@ -8125,12 +8164,23 @@
                                            &tmp_sse, best_rd);
           }
 
-          tmp_rate += cpi->drl_mode_cost0[drl0_ctx][1];
+          if (this_mode == NEARMV) {
+            tmp_rate += cpi->drl_mode_cost0[drl0_ctx][1];
+            if (mbmi_ext->ref_mv_count[ref_frame_type] > 3) {
+              uint8_t drl1_ctx =
+                  vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 2);
+              tmp_rate += cpi->drl_mode_cost1[drl1_ctx][ref_idx];
+            }
+          }
 
-          if (mbmi_ext->ref_mv_count[ref_frame_type] > 3) {
-            uint8_t drl1_ctx =
-                vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 2);
-            tmp_rate += cpi->drl_mode_cost1[drl1_ctx][ref_idx];
+          if (this_mode == NEWMV) {
+            tmp_rate += cpi->drl_mode_cost0[drl_ctx][1];
+
+            if (mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
+              uint8_t this_drl_ctx =
+                  vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
+              tmp_rate += cpi->drl_mode_cost0[this_drl_ctx][ref_idx];
+            }
           }
 
           if (tmp_alt_rd < INT64_MAX) {
@@ -8175,12 +8225,18 @@
         }
 
         frame_mv[NEARMV][ref_frame] = backup_mv;
+        frame_mv[NEWMV][ref_frame] = backup_fmv[0];
+        if (comp_pred)
+          frame_mv[NEWMV][second_ref_frame] = backup_fmv[1];
 #if CONFIG_VAR_TX
         for (i = 0; i < MAX_MB_PLANE; ++i)
           memcpy(x->blk_skip[i], x->blk_skip_drl[i],
                  sizeof(uint8_t) * ctx->num_4x4_blk);
 #endif
       }
+      mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
+      if (comp_pred)
+        mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
 #endif  // CONFIG_REF_MV
 
       if (this_rd == INT64_MAX)
@@ -8286,7 +8342,7 @@
 
         rd_cost->rate = rate2;
 #if CONFIG_SUPERTX
-        if (x->skip && rate_y == INT_MAX)
+        if (x->skip)
           *returnrate_nocoef = rate2;
         else
           *returnrate_nocoef = rate2 - rate_y - rate_uv;
@@ -8489,17 +8545,21 @@
 #if CONFIG_REF_MV
     const uint8_t rf_type = vp10_ref_frame_type(best_mbmode.ref_frame);
     if (!comp_pred_mode) {
-      if (best_mbmode.ref_mv_idx > 0 && refs[1] == NONE) {
-        int idx = best_mbmode.ref_mv_idx + 1;
-        int_mv cur_mv = mbmi_ext->ref_mv_stack[refs[0]][idx].this_mv;
+      int i;
+      int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2) ?
+          VPXMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2) : INT_MAX;
+
+      for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
+        int_mv cur_mv = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
         lower_mv_precision(&cur_mv.as_mv, cm->allow_high_precision_mv);
-        frame_mv[NEARMV][refs[0]] = cur_mv;
+        if (cur_mv.as_int == best_mbmode.mv[0].as_int) {
+          best_mbmode.mode = NEARMV;
+          best_mbmode.ref_mv_idx = i;
+        }
       }
 
       if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
         best_mbmode.mode = NEARESTMV;
-      else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
-        best_mbmode.mode = NEARMV;
       else if (best_mbmode.mv[0].as_int == 0)
         best_mbmode.mode = ZEROMV;
     } else {
@@ -8507,21 +8567,37 @@
       const int allow_hp = cm->allow_high_precision_mv;
       int_mv nearestmv[2] = { frame_mv[NEARESTMV][refs[0]],
                               frame_mv[NEARESTMV][refs[1]] };
-
       int_mv nearmv[2] = { frame_mv[NEARMV][refs[0]],
                            frame_mv[NEARMV][refs[1]] };
 
+#if CONFIG_EXT_INTER
+      if (mbmi_ext->ref_mv_count[rf_type] > 1) {
+         nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
+         nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
+       }
+#else
+      int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2) ?
+          VPXMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2) : INT_MAX;
+
+      for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
+        nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
+        nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
+        lower_mv_precision(&nearmv[0].as_mv, allow_hp);
+        lower_mv_precision(&nearmv[1].as_mv, allow_hp);
+
+        if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
+            nearmv[1].as_int == best_mbmode.mv[1].as_int) {
+          best_mbmode.mode = NEARMV;
+          best_mbmode.ref_mv_idx = i;
+        }
+      }
+#endif
+
       if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
         nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
         nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
       }
 
-      if (mbmi_ext->ref_mv_count[rf_type] > 1) {
-        int ref_mv_idx = best_mbmode.ref_mv_idx + 1;
-        nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][ref_mv_idx].this_mv;
-        nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][ref_mv_idx].comp_mv;
-      }
-
       for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
         lower_mv_precision(&nearestmv[i].as_mv, allow_hp);
         lower_mv_precision(&nearmv[i].as_mv, allow_hp);
@@ -8541,9 +8617,6 @@
         best_mbmode.mode = ZERO_ZEROMV;
 #else
         best_mbmode.mode = NEARESTMV;
-      else if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
-          nearmv[1].as_int == best_mbmode.mv[1].as_int)
-        best_mbmode.mode = NEARMV;
       else if (best_mbmode.mv[0].as_int == 0 && best_mbmode.mv[1].as_int == 0)
         best_mbmode.mode = ZEROMV;
 #endif  // CONFIG_EXT_INTER
@@ -9087,7 +9160,7 @@
     }
 
 #if CONFIG_VAR_TX
-    mbmi->inter_tx_size[0] = mbmi->tx_size;
+    mbmi->inter_tx_size[0][0] = mbmi->tx_size;
 #endif
 
     if (ref_frame == INTRA_FRAME) {
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c
index 169ae2c..c50b949 100644
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -199,9 +199,6 @@
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
     sf->adaptive_interp_filter_search = 1;
-#if CONFIG_EXT_TX
-    sf->tx_type_search = PRUNE_THREE;
-#endif
   }
 
   if (speed >= 4) {
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index 02ee204..ea4df6e 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -188,8 +188,6 @@
 #if CONFIG_EXT_TX
   // eliminates two tx types in each direction
   PRUNE_TWO = 2,
-  // eliminates three tx types in each direction
-  PRUNE_THREE = 3,
 #endif
 } TX_TYPE_SEARCH;
 
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c
index c71c985..7aaef5b 100644
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c
@@ -565,11 +565,11 @@
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
-  int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
-                (blk_col >> (1 - pd->subsampling_x));
-  TX_SIZE plane_tx_size = plane ?
-      get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize, 0, 0) :
-      mbmi->inter_tx_size[blk_idx];
+  const int tx_row = blk_row >> (1 - pd->subsampling_y);
+  const int tx_col = blk_col >> (1 - pd->subsampling_x);
+  const TX_SIZE plane_tx_size = plane ?
+      get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0) :
+      mbmi->inter_tx_size[tx_row][tx_col];
 
   int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
   int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];