Merge "Fix aom_fdct8x8_ssse3 in high bit depth mode" into nextgenv2

diff --git a/aom_dsp/prob.c b/aom_dsp/prob.c
index d3556cb..b85fa7e 100644
--- a/aom_dsp/prob.c
+++ b/aom_dsp/prob.c

@@ -11,7 +11,7 @@
 
 #include "./aom_config.h"
 
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
 #include <string.h>
 #endif
 
@@ -57,7 +57,7 @@
   tree_merge_probs_impl(0, tree, pre_probs, counts, probs);
 }
 
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
 typedef struct tree_node tree_node;
 
 struct tree_node {
@@ -86,7 +86,7 @@
   int i;
   uint32_t pa;
   uint32_t pb;
-  for (i = 0; i < OD_MINI(a.len, b.len) && a.probs[i] == b.probs[i]; i++) {
+  for (i = 0; i < AOMMIN(a.len, b.len) && a.probs[i] == b.probs[i]; i++) {
   }
   pa = tree_node_prob(a, i);
   pb = tree_node_prob(b, i);

diff --git a/aom_dsp/prob.h b/aom_dsp/prob.h
index bf9abbf..fcd1a74 100644
--- a/aom_dsp/prob.h
+++ b/aom_dsp/prob.h

@@ -96,7 +96,7 @@
 void aom_tree_merge_probs(const aom_tree_index *tree, const aom_prob *pre_probs,
                           const unsigned int *counts, aom_prob *probs);
 
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
 int tree_to_cdf(const aom_tree_index *tree, const aom_prob *probs,
                 aom_tree_index root, aom_cdf_prob *cdf, aom_tree_index *ind,
                 int *pth, int *len);

diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 65f7440..43843cd 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h

@@ -443,14 +443,6 @@
 }
 #endif  // CONFIG_SUPERTX
 
-static INLINE int get_tx1d_width(TX_SIZE tx_size) {
-  return num_4x4_blocks_wide_txsize_lookup[tx_size] << 2;
-}
-
-static INLINE int get_tx1d_height(TX_SIZE tx_size) {
-  return num_4x4_blocks_high_txsize_lookup[tx_size] << 2;
-}
-
 static INLINE int get_tx2d_size(TX_SIZE tx_size) {
   return num_4x4_blocks_txsize_lookup[tx_size] << 4;
 }

diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 52dc8f1..f23ac96 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c

@@ -911,11 +911,13 @@
   // this (or similar) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
   int scores[PALETTE_MAX_SIZE + 10];
   const int weights[4] = { 3, 2, 3, 2 };
-  int color_ctx = 0;
+  int color_ctx_hash;
+  int color_ctx;
   int color_neighbors[4];
   int inverse_color_order[PALETTE_MAX_SIZE];
   assert(n <= PALETTE_MAX_SIZE);
 
+  // Get color indices of neighbors.
   color_neighbors[0] = (c - 1 >= 0) ? color_map[r * cols + c - 1] : -1;
   color_neighbors[1] =
       (c - 1 >= 0 && r - 1 >= 0) ? color_map[(r - 1) * cols + c - 1] : -1;
@@ -960,15 +962,19 @@
     }
   }
 
-  for (i = 0; i < 4; ++i) color_ctx = color_ctx * 11 + scores[i];
+  // Get hash value of context.
+  color_ctx_hash = 0;
+  for (i = 0; i < 4; ++i) color_ctx_hash = color_ctx_hash * 11 + scores[i];
 
-  for (i = 0; i < PALETTE_COLOR_CONTEXTS; ++i)
-    if (color_ctx == palette_color_context_lookup[i]) {
+  // Lookup context from hash.
+  color_ctx = 0;  // Default.
+  for (i = 0; i < PALETTE_COLOR_CONTEXTS; ++i) {
+    if (color_ctx_hash == palette_color_context_lookup[i]) {
       color_ctx = i;
       break;
     }
+  }
 
-  if (color_ctx >= PALETTE_COLOR_CONTEXTS) color_ctx = 0;
   if (color_idx != NULL) {
     *color_idx = inverse_color_order[color_map[r * cols + c]];
   }

diff --git a/av1/common/entropymv.c b/av1/common/entropymv.c
index a80165e..1ed4dbb 100644
--- a/av1/common/entropymv.c
+++ b/av1/common/entropymv.c

@@ -43,21 +43,21 @@
 
 static const nmv_context default_nmv_context = {
   { 32, 64, 96 },  // joints
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
   { 0, 0, 0, 0 },  // joint_cdf is computed from joints in av1_init_mv_probs()
 #endif
   { {
         // Vertical component
         128,                                                   // sign
         { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 },  // class
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
         { 0 },  // class_cdf is computed from class in av1_init_mv_probs()
 #endif
         { 216 },                                               // class0
         { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 },  // bits
         { { 128, 128, 64 }, { 96, 112, 64 } },                 // class0_fp
         { 64, 96, 64 },                                        // fp
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
         { { 0 }, { 0 } },  // class0_fp_cdf is computed in av1_init_mv_probs()
         { 0 },             // fp_cdf is computed from fp in av1_init_mv_probs()
 #endif
@@ -68,14 +68,14 @@
         // Horizontal component
         128,                                                   // sign
         { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 },  // class
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
         { 0 },  // class_cdf is computed from class in av1_init_mv_probs()
 #endif
         { 208 },                                               // class0
         { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 },  // bits
         { { 128, 128, 64 }, { 96, 112, 64 } },                 // class0_fp
         { 64, 96, 64 },                                        // fp
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
         { { 0 }, { 0 } },  // class0_fp_cdf is computed in av1_init_mv_probs()
         { 0 },             // fp_cdf is computed from fp in av1_init_mv_probs()
 #endif
@@ -149,13 +149,6 @@
   return c;
 }
 
-// TODO(jingning): This idle function is intentionally left as is for
-// experimental purpose.
-int av1_use_mv_hp(const MV *ref) {
-  (void)ref;
-  return 1;
-}
-
 static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr,
                              int usehp) {
   int s, z, c, o, d, e, f;
@@ -279,7 +272,7 @@
   for (i = 0; i < NMV_CONTEXTS; ++i) cm->fc->nmvc[i] = default_nmv_context;
 #else
   cm->fc->nmvc = default_nmv_context;
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
   {
     int i, j;
     av1_tree_to_cdf(av1_mv_joint_tree, cm->fc->nmvc.joints,

diff --git a/av1/common/entropymv.h b/av1/common/entropymv.h
index f308ef3..c215d23 100644
--- a/av1/common/entropymv.h
+++ b/av1/common/entropymv.h

@@ -27,7 +27,6 @@
 void av1_init_mv_probs(struct AV1Common *cm);
 
 void av1_adapt_mv_probs(struct AV1Common *cm, int usehp);
-int av1_use_mv_hp(const MV *ref);
 
 #define MV_UPDATE_PROB 252
 
@@ -85,14 +84,14 @@
 typedef struct {
   aom_prob sign;
   aom_prob classes[MV_CLASSES - 1];
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
   aom_cdf_prob class_cdf[MV_CLASSES];
 #endif
   aom_prob class0[CLASS0_SIZE - 1];
   aom_prob bits[MV_OFFSET_BITS];
   aom_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1];
   aom_prob fp[MV_FP_SIZE - 1];
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
   aom_cdf_prob class0_fp_cdf[CLASS0_SIZE][MV_FP_SIZE];
   aom_cdf_prob fp_cdf[MV_FP_SIZE];
 #endif
@@ -102,7 +101,7 @@
 
 typedef struct {
   aom_prob joints[MV_JOINTS - 1];
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
   aom_cdf_prob joint_cdf[MV_JOINTS];
 #endif
   nmv_component comps[2];

diff --git a/av1/common/mvref_common.h b/av1/common/mvref_common.h
index 25ebbfd..55688a9 100644
--- a/av1/common/mvref_common.h
+++ b/av1/common/mvref_common.h

@@ -341,8 +341,7 @@
 }
 
 static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
-  const int use_hp = allow_hp && av1_use_mv_hp(mv);
-  if (!use_hp) {
+  if (!allow_hp) {
     if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1);
     if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1);
   }

diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index 6c4ae2a..66b6bfd 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c

@@ -1889,6 +1889,7 @@
                                                   int dst_stride,
                                                   PREDICTION_MODE mode,
                                                   BLOCK_SIZE bsize, int plane) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
   BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
   const int bwl = b_width_log2_lookup[plane_bsize];
   const int bhl = b_height_log2_lookup[plane_bsize];
@@ -1897,14 +1898,14 @@
   TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
 
   if (bwl == bhl) {
-    av1_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride,
-                            dst, dst_stride, 0, 0, plane);
+    av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, ref,
+                            ref_stride, dst, dst_stride, 0, 0, plane);
 
   } else if (bwl < bhl) {
     uint8_t *src_2 = ref + pxbw * ref_stride;
     uint8_t *dst_2 = dst + pxbw * dst_stride;
-    av1_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride,
-                            dst, dst_stride, 0, 0, plane);
+    av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, ref,
+                            ref_stride, dst, dst_stride, 0, 0, plane);
 #if CONFIG_AOM_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
@@ -1916,14 +1917,14 @@
     {
       memcpy(src_2 - ref_stride, dst_2 - dst_stride, sizeof(*src_2) * pxbw);
     }
-    av1_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, src_2, ref_stride,
-                            dst_2, dst_stride, 0, 1 << bwl, plane);
+    av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, src_2,
+                            ref_stride, dst_2, dst_stride, 0, 1 << bwl, plane);
   } else {  // bwl > bhl
     int i;
     uint8_t *src_2 = ref + pxbh;
     uint8_t *dst_2 = dst + pxbh;
-    av1_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride,
-                            dst, dst_stride, 0, 0, plane);
+    av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, ref,
+                            ref_stride, dst, dst_stride, 0, 0, plane);
 #if CONFIG_AOM_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
@@ -1936,8 +1937,8 @@
       for (i = 0; i < pxbh; ++i)
         src_2[i * ref_stride - 1] = dst_2[i * dst_stride - 1];
     }
-    av1_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, src_2, ref_stride,
-                            dst_2, dst_stride, 1 << bhl, 0, plane);
+    av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, src_2,
+                            ref_stride, dst_2, dst_stride, 1 << bhl, 0, plane);
   }
 }
 

diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 15f9991..7d5a10f 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c

@@ -209,7 +209,7 @@
   int i, j;
 
   update_mv_probs(ctx->joints, MV_JOINTS - 1, r);
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
   av1_tree_to_cdf(av1_mv_joint_tree, ctx->joints, ctx->joint_cdf);
 #endif
 
@@ -219,7 +219,7 @@
     update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r);
     update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r);
     update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r);
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
     av1_tree_to_cdf(av1_mv_class_tree, comp_ctx->classes, comp_ctx->class_cdf);
 #endif
   }
@@ -228,13 +228,13 @@
     nmv_component *const comp_ctx = &ctx->comps[i];
     for (j = 0; j < CLASS0_SIZE; ++j) {
       update_mv_probs(comp_ctx->class0_fp[j], MV_FP_SIZE - 1, r);
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
       av1_tree_to_cdf(av1_mv_fp_tree, comp_ctx->class0_fp[j],
                       comp_ctx->class0_fp_cdf[j]);
 #endif
     }
     update_mv_probs(comp_ctx->fp, MV_FP_SIZE - 1, r);
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
     av1_tree_to_cdf(av1_mv_fp_tree, comp_ctx->fp, comp_ctx->fp_cdf);
 #endif
   }
@@ -325,13 +325,17 @@
   const TX_SIZE plane_tx_size =
       plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
             : mbmi->inter_tx_size[tx_row][tx_col];
-  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
-  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+  // Scale to match transform block unit.
+  int max_blocks_high = block_size_high[plane_bsize];
+  int max_blocks_wide = block_size_wide[plane_bsize];
 
   if (xd->mb_to_bottom_edge < 0)
-    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+    max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
   if (xd->mb_to_right_edge < 0)
-    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+    max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
+
+  max_blocks_high >>= tx_size_wide_log2[0];
+  max_blocks_wide >>= tx_size_wide_log2[0];
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
@@ -350,15 +354,14 @@
         pd->dst.stride, max_scan_line, eob);
     *eob_total += eob;
   } else {
-    int bsl = b_width_log2_lookup[bsize];
+    int bsl = block_size_wide[bsize] >> (tx_size_wide_log2[0] + 1);
     int i;
 
     assert(bsl > 0);
-    --bsl;
 
     for (i = 0; i < 4; ++i) {
-      const int offsetr = blk_row + ((i >> 1) << bsl);
-      const int offsetc = blk_col + ((i & 0x01) << bsl);
+      const int offsetr = blk_row + (i >> 1) * bsl;
+      const int offsetc = blk_col + (i & 0x01) * bsl;
 
       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
@@ -1281,8 +1284,8 @@
 
       for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
         const struct macroblockd_plane *const pd = &xd->plane[plane];
-        const int num_4x4_w = pd->n4_w;
-        const int num_4x4_h = pd->n4_h;
+        int block_width = pd->width;
+        int block_height = pd->height;
         int row, col;
 #if CONFIG_VAR_TX
         // TODO(jingning): This can be simplified for decoder performance.
@@ -1297,23 +1300,26 @@
               plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
           const int stepr = tx_size_high_unit[tx_size];
           const int stepc = tx_size_wide_unit[tx_size];
-          const int max_blocks_wide =
-              num_4x4_w +
+          int max_blocks_wide =
+              block_width +
               (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >>
-                                                   (5 + pd->subsampling_x));
-          const int max_blocks_high =
-              num_4x4_h +
+                                                   (3 + pd->subsampling_x));
+          int max_blocks_high =
+              block_height +
               (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >>
-                                                    (5 + pd->subsampling_y));
-
+                                                    (3 + pd->subsampling_y));
+          max_blocks_wide >>= tx_size_wide_log2[0];
+          max_blocks_high >>= tx_size_wide_log2[0];
           for (row = 0; row < max_blocks_high; row += stepr)
             for (col = 0; col < max_blocks_wide; col += stepc)
               eobtotal += reconstruct_inter_block(cm, xd, r, mbmi->segment_id,
                                                   plane, row, col, tx_size);
         } else {
 #endif
-          for (row = 0; row < num_4x4_h; row += bh_var_tx)
-            for (col = 0; col < num_4x4_w; col += bw_var_tx)
+          block_width >>= tx_size_wide_log2[0];
+          block_height >>= tx_size_wide_log2[0];
+          for (row = 0; row < block_height; row += bh_var_tx)
+            for (col = 0; col < block_width; col += bw_var_tx)
               decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, row,
                                     col, max_tx_size, &eobtotal);
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -1324,15 +1330,16 @@
             plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
         const int stepr = tx_size_high_unit[tx_size];
         const int stepc = tx_size_wide_unit[tx_size];
-        const int max_blocks_wide =
-            num_4x4_w + (xd->mb_to_right_edge >= 0
-                             ? 0
-                             : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
-        const int max_blocks_high =
-            num_4x4_h +
+        int max_blocks_wide =
+            block_width +
+            (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >>
+                                                 (3 + pd->subsampling_x));
+        int max_blocks_high =
+            block_height +
             (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >>
-                                                  (5 + pd->subsampling_y));
-
+                                                  (3 + pd->subsampling_y));
+        max_blocks_wide >>= tx_size_wide_log2[0];
+        max_blocks_high >>= tx_size_wide_log2[0];
         for (row = 0; row < max_blocks_high; row += stepr)
           for (col = 0; col < max_blocks_wide; col += stepc)
             eobtotal += reconstruct_inter_block(cm, xd, r, mbmi->segment_id,

diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 31183c0..9aa182d 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c

@@ -281,24 +281,28 @@
   int is_split = 0;
   const int tx_row = blk_row >> 1;
   const int tx_col = blk_col >> 1;
-  int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
-  int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+  int max_blocks_high = block_size_high[mbmi->sb_type];
+  int max_blocks_wide = block_size_wide[mbmi->sb_type];
   int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
                                    xd->left_txfm_context + tx_row, tx_size);
   TX_SIZE(*const inter_tx_size)
   [MAX_MIB_SIZE] =
       (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
 
-  if (xd->mb_to_bottom_edge < 0) max_blocks_high += xd->mb_to_bottom_edge >> 5;
-  if (xd->mb_to_right_edge < 0) max_blocks_wide += xd->mb_to_right_edge >> 5;
+  if (xd->mb_to_bottom_edge < 0) max_blocks_high += xd->mb_to_bottom_edge >> 3;
+  if (xd->mb_to_right_edge < 0) max_blocks_wide += xd->mb_to_right_edge >> 3;
+
+  // Scale to transform block unit.
+  max_blocks_high >>= tx_size_wide_log2[0];
+  max_blocks_wide >>= tx_size_wide_log2[0];
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
   if (depth == MAX_VARTX_DEPTH) {
     int idx, idy;
     inter_tx_size[0][0] = tx_size;
-    for (idy = 0; idy < num_4x4_blocks_high_txsize_lookup[tx_size] / 2; ++idy)
-      for (idx = 0; idx < num_4x4_blocks_wide_txsize_lookup[tx_size] / 2; ++idx)
+    for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
+      for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
         inter_tx_size[idy][idx] = tx_size;
     mbmi->tx_size = tx_size;
     if (counts) ++counts->txfm_partition[ctx][0];
@@ -311,7 +315,8 @@
 
   if (is_split) {
     BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
-    int bsl = b_width_log2_lookup[bsize];
+    // Half the block size in transform block unit.
+    int bsl = block_size_wide[bsize] >> (tx_size_wide_log2[0] + 1);
     int i;
 
     if (counts) ++counts->txfm_partition[ctx][1];
@@ -325,18 +330,17 @@
     }
 
     assert(bsl > 0);
-    --bsl;
     for (i = 0; i < 4; ++i) {
-      int offsetr = blk_row + ((i >> 1) << bsl);
-      int offsetc = blk_col + ((i & 0x01) << bsl);
+      int offsetr = blk_row + ((i >> 1) * bsl);
+      int offsetc = blk_col + ((i & 0x01) * bsl);
       read_tx_size_vartx(cm, xd, mbmi, counts, tx_size - 1, depth + 1, offsetr,
                          offsetc, r);
     }
   } else {
     int idx, idy;
     inter_tx_size[0][0] = tx_size;
-    for (idy = 0; idy < num_4x4_blocks_high_txsize_lookup[tx_size] / 2; ++idy)
-      for (idx = 0; idx < num_4x4_blocks_wide_txsize_lookup[tx_size] / 2; ++idx)
+    for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
+      for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
         inter_tx_size[idy][idx] = tx_size;
     mbmi->tx_size = tx_size;
     if (counts) ++counts->txfm_partition[ctx][0];
@@ -797,7 +801,7 @@
   int mag, d, fr, hp;
   const int sign = aom_read(r, mvcomp->sign, ACCT_STR);
   const int mv_class =
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
       aom_read_symbol(r, mvcomp->class_cdf, MV_CLASSES, ACCT_STR);
 #else
       aom_read_tree(r, av1_mv_class_tree, mvcomp->classes, ACCT_STR);
@@ -818,7 +822,7 @@
   }
 
 // Fractional part
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
   fr = aom_read_symbol(r, class0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
                        MV_FP_SIZE, ACCT_STR);
 #else
@@ -839,22 +843,21 @@
                            const nmv_context *ctx, nmv_context_counts *counts,
                            int allow_hp) {
   MV_JOINT_TYPE joint_type;
-  const int use_hp = allow_hp && av1_use_mv_hp(ref);
   MV diff = { 0, 0 };
   joint_type =
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
       (MV_JOINT_TYPE)aom_read_symbol(r, ctx->joint_cdf, MV_JOINTS, ACCT_STR);
 #else
       (MV_JOINT_TYPE)aom_read_tree(r, av1_mv_joint_tree, ctx->joints, ACCT_STR);
 #endif
 
   if (mv_joint_vertical(joint_type))
-    diff.row = read_mv_component(r, &ctx->comps[0], use_hp);
+    diff.row = read_mv_component(r, &ctx->comps[0], allow_hp);
 
   if (mv_joint_horizontal(joint_type))
-    diff.col = read_mv_component(r, &ctx->comps[1], use_hp);
+    diff.col = read_mv_component(r, &ctx->comps[1], allow_hp);
 
-  av1_inc_mv(&diff, counts, use_hp);
+  av1_inc_mv(&diff, counts, allow_hp);
 
   mv->row = ref->row + diff.row;
   mv->col = ref->col + diff.col;
@@ -1801,9 +1804,9 @@
         inter_block) {
       const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
       const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
-      const int bs = num_4x4_blocks_wide_lookup[txb_size];
-      const int width = num_4x4_blocks_wide_lookup[bsize];
-      const int height = num_4x4_blocks_high_lookup[bsize];
+      const int bs = block_size_wide[txb_size] >> tx_size_wide_log2[0];
+      const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
+      const int height = block_size_high[bsize] >> tx_size_wide_log2[0];
       int idx, idy;
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
       int is_rect_tx_allowed = inter_block && is_rect_tx_allowed_bsize(bsize) &&

diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 6778847..21725d7 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c

@@ -4886,17 +4886,18 @@
       int count16x16_16x16p = 0, count16x16_lp = 0;
       int count32x32 = 0;
       for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
-        count4x4 += counts->tx_size[0][i][TX_4X4];
-        count4x4 += counts->tx_size[1][i][TX_4X4];
-        count4x4 += counts->tx_size[2][i][TX_4X4];
+        // counts->tx_size[max_depth][context_idx][this_depth_level]
+        count4x4 += counts->tx_size[0][i][0];
+        count4x4 += counts->tx_size[1][i][0];
+        count4x4 += counts->tx_size[2][i][0];
 
-        count8x8_lp += counts->tx_size[1][i][TX_8X8];
-        count8x8_lp += counts->tx_size[2][i][TX_8X8];
-        count8x8_8x8p += counts->tx_size[0][i][TX_8X8];
+        count8x8_lp += counts->tx_size[1][i][1];
+        count8x8_lp += counts->tx_size[2][i][1];
+        count8x8_8x8p += counts->tx_size[0][i][1];
 
-        count16x16_16x16p += counts->tx_size[1][i][TX_16X16];
-        count16x16_lp += counts->tx_size[2][i][TX_16X16];
-        count32x32 += counts->tx_size[2][i][TX_32X32];
+        count16x16_16x16p += counts->tx_size[1][i][2];
+        count16x16_lp += counts->tx_size[2][i][2];
+        count32x32 += counts->tx_size[2][i][3];
       }
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
       count4x4 += counts->tx_size_implied[0][TX_4X4];

diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c
index 53dac12..9b6e0d9 100644
--- a/av1/encoder/encodemv.c
+++ b/av1/encoder/encodemv.c

@@ -46,7 +46,7 @@
   aom_write(w, sign, mvcomp->sign);
 
 // Class
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
   aom_write_symbol(w, mv_class, mvcomp->class_cdf, MV_CLASSES);
 #else
   av1_write_token(w, av1_mv_class_tree, mvcomp->classes,
@@ -63,7 +63,7 @@
   }
 
 // Fractional bits
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
   aom_write_symbol(
       w, fr, mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
       MV_FP_SIZE);
@@ -204,7 +204,12 @@
   nmv_context *const mvc = &cm->fc->nmvc;
   nmv_context_counts *const counts = nmv_counts;
 
+#if !(CONFIG_DAALA_EC || CONFIG_RANS)
   write_mv_update(av1_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, w);
+#if CONFIG_DAALA_EC || CONFIG_RANS
+  av1_tree_to_cdf(av1_mv_joint_tree, cm->fc->nmvc.joints,
+                  cm->fc->nmvc.joint_cdf);
+#endif
 
   for (i = 0; i < 2; ++i) {
     nmv_component *comp = &mvc->comps[i];
@@ -213,7 +218,7 @@
     update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB);
     write_mv_update(av1_mv_class_tree, comp->classes, comp_counts->classes,
                     MV_CLASSES, w);
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
     av1_tree_to_cdf(av1_mv_class_tree, comp->classes, comp->class_cdf);
 #endif
     write_mv_update(av1_mv_class0_tree, comp->class0, comp_counts->class0,
@@ -226,17 +231,18 @@
     for (j = 0; j < CLASS0_SIZE; ++j) {
       write_mv_update(av1_mv_fp_tree, mvc->comps[i].class0_fp[j],
                       counts->comps[i].class0_fp[j], MV_FP_SIZE, w);
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
       av1_tree_to_cdf(av1_mv_fp_tree, mvc->comps[i].class0_fp[j],
                       mvc->comps[i].class0_fp_cdf[j]);
 #endif
     }
     write_mv_update(av1_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp,
                     MV_FP_SIZE, w);
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
     av1_tree_to_cdf(av1_mv_fp_tree, mvc->comps[i].fp, mvc->comps[i].fp_cdf);
 #endif
   }
+#endif  // !CONFIG_EC_ADAPT || !(CONFIG_DAALA_EC || CONFIG_RANS)
 
   if (usehp) {
     for (i = 0; i < 2; ++i) {
@@ -255,11 +261,10 @@
                    const nmv_context *mvctx, int usehp) {
   const MV diff = { mv->row - ref->row, mv->col - ref->col };
   const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
-  usehp = usehp && av1_use_mv_hp(ref);
 #if CONFIG_REF_MV
   (void)is_compound;
 #endif
-#if CONFIG_DAALA_EC
+#if CONFIG_DAALA_EC || CONFIG_RANS
   aom_write_symbol(w, j, mvctx->joint_cdf, MV_JOINTS);
 #else
   av1_write_token(w, av1_mv_joint_tree, mvctx->joints, &mv_joint_encodings[j]);
@@ -312,7 +317,7 @@
       nmv_context_counts *counts = &nmv_counts[nmv_ctx];
       (void)pred_mvs;
 #endif
-      av1_inc_mv(&diff, counts, av1_use_mv_hp(ref));
+      av1_inc_mv(&diff, counts, 1);
     }
   } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv;
@@ -325,7 +330,7 @@
                     mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
 #endif
-    av1_inc_mv(&diff, counts, av1_use_mv_hp(ref));
+    av1_inc_mv(&diff, counts, 1);
   } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
     const MV diff = { mvs[0].as_mv.row - ref->row,
@@ -337,7 +342,7 @@
                     mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
 #endif
-    av1_inc_mv(&diff, counts, av1_use_mv_hp(ref));
+    av1_inc_mv(&diff, counts, 1);
   }
 }
 
@@ -366,7 +371,7 @@
                       mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx);
       nmv_context_counts *counts = &nmv_counts[nmv_ctx];
 #endif
-      av1_inc_mv(&diff, counts, av1_use_mv_hp(ref));
+      av1_inc_mv(&diff, counts, 1);
     }
   } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
     const MV *ref = &mi->bmi[block].ref_mv[1].as_mv;
@@ -379,7 +384,7 @@
                     mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
 #endif
-    av1_inc_mv(&diff, counts, av1_use_mv_hp(ref));
+    av1_inc_mv(&diff, counts, 1);
   } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
     const MV *ref = &mi->bmi[block].ref_mv[0].as_mv;
     const MV diff = { mvs[0].as_mv.row - ref->row,
@@ -391,7 +396,7 @@
                     mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
 #endif
-    av1_inc_mv(&diff, counts, av1_use_mv_hp(ref));
+    av1_inc_mv(&diff, counts, 1);
   }
 }
 #else
@@ -419,7 +424,7 @@
 #endif
     const MV diff = { mvs[i].as_mv.row - ref->row,
                       mvs[i].as_mv.col - ref->col };
-    av1_inc_mv(&diff, counts, av1_use_mv_hp(ref));
+    av1_inc_mv(&diff, counts, 1);
   }
 }
 #endif  // CONFIG_EXT_INTER

diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 3fbceab..4b54a2c 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c

@@ -424,7 +424,7 @@
   tr = br;
   tc = bc;
 
-  if (allow_hp && av1_use_mv_hp(ref_mv) && forced_stop == 0) {
+  if (allow_hp && forced_stop == 0) {
     hstep >>= 1;
     FIRST_LEVEL_CHECKS;
     if (eighthiters > 1) {
@@ -484,7 +484,7 @@
     }
   }
 
-  if (allow_hp && av1_use_mv_hp(ref_mv) && forced_stop == 0) {
+  if (allow_hp && forced_stop == 0) {
     tr = br;
     tc = bc;
     hstep >>= 1;
@@ -572,7 +572,7 @@
     tc = bc;
   }
 
-  if (allow_hp && av1_use_mv_hp(ref_mv) && forced_stop == 0) {
+  if (allow_hp && forced_stop == 0) {
     hstep >>= 1;
     FIRST_LEVEL_CHECKS;
     if (eighthiters > 1) {
@@ -687,7 +687,7 @@
   unsigned int cost_array[5];
   int kr, kc;
 
-  if (!(allow_hp && av1_use_mv_hp(ref_mv)))
+  if (!allow_hp)
     if (round == 3) round = 2;
 
   bestmv->row *= 8;
@@ -2446,7 +2446,7 @@
     tc = bc;
   }
 
-  if (allow_hp && av1_use_mv_hp(ref_mv) && forced_stop == 0) {
+  if (allow_hp && forced_stop == 0) {
     hstep >>= 1;
     FIRST_LEVEL_CHECKS;
     if (eighthiters > 1) {
@@ -2581,7 +2581,7 @@
   y_stride = pd->pre[is_second].stride;
   offset = bestmv->row * y_stride + bestmv->col;
 
-  if (!(allow_hp && av1_use_mv_hp(ref_mv)))
+  if (!allow_hp)
     if (round == 3) round = 2;
 
   bestmv->row *= 8;
@@ -3083,7 +3083,7 @@
   y_stride = pd->pre[is_second].stride;
   offset = bestmv->row * y_stride + bestmv->col;
 
-  if (!(allow_hp && av1_use_mv_hp(ref_mv)))
+  if (!allow_hp)
     if (round == 3) round = 2;
 
   bestmv->row *= 8;

diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 890cfa1..d4a089a 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -3054,9 +3054,9 @@
   TX_SIZE(*const inter_tx_size)
   [MAX_MIB_SIZE] =
       (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
-  const int bw = num_4x4_blocks_wide_lookup[plane_bsize];
-  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
-  int max_blocks_wide = bw;
+  int max_blocks_high = block_size_high[plane_bsize];
+  int max_blocks_wide = block_size_wide[plane_bsize];
+  const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
   int64_t this_rd = INT64_MAX;
   ENTROPY_CONTEXT *pta = ta + blk_col;
   ENTROPY_CONTEXT *ptl = tl + blk_row;
@@ -3103,9 +3103,12 @@
   coeff_ctx = combine_entropy_contexts(stxa, stxl);
 
   if (xd->mb_to_bottom_edge < 0)
-    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+    max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
   if (xd->mb_to_right_edge < 0)
-    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+    max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
+
+  max_blocks_high >>= tx_size_wide_log2[0];
+  max_blocks_wide >>= tx_size_wide_log2[0];
 
   *rate = 0;
   *dist = 0;
@@ -3144,8 +3147,10 @@
 
   if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
     BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
-    int bsl = b_height_log2_lookup[bsize];
-    int sub_step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+    int bsl = block_size_wide[bsize] >> (tx_size_wide_log2[0] + 1);
+    // TODO(jingning): Refactor this transform block size transition.
+    TX_SIZE sub_txs = tx_size - 1;
+    int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
     int this_rate;
     int64_t this_dist;
     int64_t this_bsse;
@@ -3156,15 +3161,13 @@
 #if CONFIG_EXT_TX
     assert(tx_size < TX_SIZES);
 #endif  // CONFIG_EXT_TX
-    --bsl;
     for (i = 0; i < 4 && this_cost_valid; ++i) {
-      int offsetr = (i >> 1) << bsl;
-      int offsetc = (i & 0x01) << bsl;
+      int offsetr = (i >> 1) * bsl;
+      int offsetc = (i & 0x01) * bsl;
       select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
-                      block + i * sub_step, tx_size - 1, depth + 1, plane_bsize,
-                      ta, tl, tx_above, tx_left, &this_rate, &this_dist,
-                      &this_bsse, &this_skip, ref_best_rd - tmp_rd,
-                      &this_cost_valid);
+                      block + i * sub_step, sub_txs, depth + 1, plane_bsize, ta,
+                      tl, tx_above, tx_left, &this_rate, &this_dist, &this_bsse,
+                      &this_skip, ref_best_rd - tmp_rd, &this_cost_valid);
       sum_rate += this_rate;
       sum_dist += this_dist;
       sum_bsse += this_bsse;
@@ -3177,15 +3180,13 @@
 
   if (this_rd < sum_rd) {
     int idx, idy;
-    for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i)
-      pta[i] = !(tmp_eob == 0);
-    for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i)
-      ptl[i] = !(tmp_eob == 0);
+    for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) pta[i] = !(tmp_eob == 0);
+    for (i = 0; i < tx_size_high_unit[tx_size]; ++i) ptl[i] = !(tmp_eob == 0);
     txfm_partition_update(tx_above + (blk_col >> 1), tx_left + (blk_row >> 1),
                           tx_size);
     inter_tx_size[0][0] = tx_size;
-    for (idy = 0; idy < num_4x4_blocks_high_txsize_lookup[tx_size] / 2; ++idy)
-      for (idx = 0; idx < num_4x4_blocks_wide_txsize_lookup[tx_size] / 2; ++idx)
+    for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
+      for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
         inter_tx_size[idy][idx] = tx_size;
     mbmi->tx_size = tx_size;
     if (this_rd == INT64_MAX) *is_cost_valid = 0;
@@ -3479,17 +3480,20 @@
   const int tx_row = blk_row >> (1 - pd->subsampling_y);
   const int tx_col = blk_col >> (1 - pd->subsampling_x);
   TX_SIZE plane_tx_size;
-  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
-  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+  int max_blocks_high = block_size_high[plane_bsize];
+  int max_blocks_wide = block_size_wide[plane_bsize];
 
 #if CONFIG_EXT_TX
   assert(tx_size < TX_SIZES);
 #endif  // CONFIG_EXT_TX
 
   if (xd->mb_to_bottom_edge < 0)
-    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+    max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
   if (xd->mb_to_right_edge < 0)
-    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+    max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
+
+  max_blocks_high >>= tx_size_wide_log2[0];
+  max_blocks_wide >>= tx_size_wide_log2[0];
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
@@ -3521,24 +3525,24 @@
     av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
                       plane_bsize, coeff_ctx, rate, dist, bsse, skip);
 
-    for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i)
+    for (i = 0; i < tx_size_wide_unit[tx_size]; ++i)
       ta[i] = !(p->eobs[block] == 0);
-    for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i)
+    for (i = 0; i < tx_size_high_unit[tx_size]; ++i)
       tl[i] = !(p->eobs[block] == 0);
   } else {
-    int bsl = b_width_log2_lookup[bsize];
-    int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+    const int bsl = block_size_wide[bsize] >> (1 + tx_size_wide_log2[0]);
+    const TX_SIZE sub_txs = tx_size - 1;
+    int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
     int i;
 
     assert(bsl > 0);
-    --bsl;
 
     for (i = 0; i < 4; ++i) {
-      int offsetr = (i >> 1) << bsl;
-      int offsetc = (i & 0x01) << bsl;
+      int offsetr = (i >> 1) * bsl;
+      int offsetc = (i & 0x01) * bsl;
       tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
-                  block + i * step, tx_size - 1, plane_bsize, above_ctx,
-                  left_ctx, rate, dist, bsse, skip);
+                  block + i * step, sub_txs, plane_bsize, above_ctx, left_ctx,
+                  rate, dist, bsse, skip);
     }
   }
 }
@@ -4220,8 +4224,7 @@
 #endif  // CONFIG_EXT_INTER
       this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
 #if CONFIG_EXT_INTER
-      if (!cpi->common.allow_high_precision_mv ||
-          !av1_use_mv_hp(&best_ref_mv[0]->as_mv))
+      if (!cpi->common.allow_high_precision_mv)
         lower_mv_precision(&this_mv[0].as_mv, 0);
 #endif  // CONFIG_EXT_INTER
 
@@ -4280,11 +4283,9 @@
         this_mv[0].as_int = compound_seg_newmvs[0].as_int;
         this_mv[1].as_int = compound_seg_newmvs[1].as_int;
       }
-      if (!cpi->common.allow_high_precision_mv ||
-          !av1_use_mv_hp(&best_ref_mv[0]->as_mv))
+      if (!cpi->common.allow_high_precision_mv)
         lower_mv_precision(&this_mv[0].as_mv, 0);
-      if (!cpi->common.allow_high_precision_mv ||
-          !av1_use_mv_hp(&best_ref_mv[1]->as_mv))
+      if (!cpi->common.allow_high_precision_mv)
         lower_mv_precision(&this_mv[1].as_mv, 0);
       thismvcost += av1_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
@@ -4294,8 +4295,7 @@
     case NEW_NEARMV:
     case NEW_NEARESTMV:
       this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
-      if (!cpi->common.allow_high_precision_mv ||
-          !av1_use_mv_hp(&best_ref_mv[0]->as_mv))
+      if (!cpi->common.allow_high_precision_mv)
         lower_mv_precision(&this_mv[0].as_mv, 0);
       thismvcost += av1_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
@@ -4305,8 +4305,7 @@
     case NEAREST_NEWMV:
       this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
       this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
-      if (!cpi->common.allow_high_precision_mv ||
-          !av1_use_mv_hp(&best_ref_mv[1]->as_mv))
+      if (!cpi->common.allow_high_precision_mv)
         lower_mv_precision(&this_mv[1].as_mv, 0);
       thismvcost += av1_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
@@ -4376,8 +4375,8 @@
   struct macroblock_plane *const p = &x->plane[0];
   MODE_INFO *const mi = xd->mi[0];
   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
-  const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
-  const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
+  const int width = block_size_wide[plane_bsize];
+  const int height = block_size_high[plane_bsize];
   int idx, idy;
   const uint8_t *const src =
       &p->src.buf[av1_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
@@ -4389,8 +4388,8 @@
 
   TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, tx_size);
   const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
-  const int num_4x4_w = num_4x4_blocks_wide_txsize_lookup[tx_size];
-  const int num_4x4_h = num_4x4_blocks_high_txsize_lookup[tx_size];
+  const int num_4x4_w = tx_size_wide_unit[tx_size];
+  const int num_4x4_h = tx_size_high_unit[tx_size];
 
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
   assert(IMPLIES(xd->lossless[mi->mbmi.segment_id], tx_size == TX_4X4));
@@ -4430,11 +4429,8 @@
         block = k;
       else
         block = (i ? 2 : 0);
-#if CONFIG_VAR_TX
-      coeff_ctx = get_entropy_context(tx_size, ta + (k & 1), tl + (k >> 1));
-#else
+
       coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)), *(tl + (k >> 1)));
-#endif
 #if CONFIG_NEW_QUANT
       av1_xform_quant_fp_nuq(cm, x, 0, block, idy + (i >> 1), idx + (i & 0x01),
                              BLOCK_8X8, tx_size, coeff_ctx);
@@ -5147,8 +5143,7 @@
         if (!has_second_rf &&
 #if CONFIG_EXT_INTER
             have_newmv_in_inter_mode(this_mode) &&
-            (seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV ||
-             av1_use_mv_hp(&bsi->ref_mv[0]->as_mv) == 0)
+            (seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV)
 #else
             this_mode == NEWMV &&
             (seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV ||
@@ -7044,7 +7039,7 @@
           int tmp_skip_sb = 0;
           int64_t tmp_skip_sse = INT64_MAX;
           int tmp_rs;
-          int tmp_rd;
+          int64_t tmp_rd;
 #if CONFIG_DUAL_FILTER
           mbmi->interp_filter[0] = filter_sets[i][0];
           mbmi->interp_filter[1] = filter_sets[i][1];

diff --git a/test/scan_test.cc b/test/scan_test.cc
new file mode 100644
index 0000000..43df09f
--- /dev/null
+++ b/test/scan_test.cc

@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "av1/common/scan.h"
+#include "test/acm_random.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+TEST(scan_test, av1_augment_prob) {
+  int tx1d_size = 4;
+  uint32_t prob[16] = { 8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2 };
+  uint32_t ref_prob[16] = { 8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2 };
+  av1_augment_prob(prob, tx1d_size, tx1d_size);
+  for (int r = 0; r < tx1d_size; ++r) {
+    for (int c = 0; c < tx1d_size; ++c) {
+      int idx = r * tx1d_size + c;
+      EXPECT_EQ(ref_prob[idx], prob[idx] >> 16);
+    }
+  }
+
+  int mask = (1 << 10) - 1;
+  for (int r = 0; r < tx1d_size; ++r) {
+    for (int c = 0; c < tx1d_size; ++c) {
+      int idx = r * tx1d_size + c;
+      EXPECT_EQ(idx, mask ^ (prob[r * tx1d_size + c] & mask));
+    }
+  }
+}
+
+TEST(scan_test, av1_update_sort_order) {
+  TX_SIZE tx_size = TX_4X4;
+  uint32_t prob[16] = { 8, 8, 7, 7, 8, 8, 4, 2, 3, 3, 2, 2, 2, 2, 2, 2 };
+  int16_t ref_sort_order[16] = { 0, 1,  4, 5,  2,  3,  6,  8,
+                                 9, 12, 7, 10, 13, 11, 14, 15 };
+  int16_t sort_order[16];
+  av1_update_sort_order(tx_size, prob, sort_order);
+  for (int i = 0; i < 16; ++i) EXPECT_EQ(ref_sort_order[i], sort_order[i]);
+}
+
+TEST(scan_test, av1_update_scan_order) {
+  TX_SIZE tx_size = TX_4X4;
+  uint32_t prob[16] = { 4, 5, 7, 4, 5, 6, 8, 2, 3, 3, 2, 2, 2, 2, 2, 2 };
+  int16_t sort_order[16];
+  int16_t scan[16];
+  int16_t iscan[16];
+  int16_t ref_iscan[16] = {
+    0, 1, 2, 6, 3, 4, 5, 10, 7, 8, 11, 13, 9, 12, 14, 15
+  };
+
+  av1_update_sort_order(tx_size, prob, sort_order);
+  av1_update_scan_order(tx_size, sort_order, scan, iscan);
+
+  for (int i = 0; i < 16; ++i) EXPECT_EQ(ref_iscan[i], iscan[i]);
+
+  for (int i = 0; i < 16; ++i) EXPECT_EQ(i, scan[ref_iscan[i]]);
+}
+
+TEST(scan_test, av1_update_neighbors) {
+  TX_SIZE tx_size = TX_4X4;
+  // raster order
+  int16_t scan[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+  int16_t nb[(16 + 1) * 2];
+  int16_t ref_nb[(16 + 1) * 2] = { 0, 0, 0,  0, 1,  1,  2,  2,  0, 0, 4,  1,
+                                   5, 2, 6,  3, 4,  4,  8,  5,  9, 6, 10, 7,
+                                   8, 8, 12, 9, 13, 10, 14, 11, 0, 0 };
+
+  // raster order's scan and iscan are the same
+  av1_update_neighbors(tx_size, scan, scan, nb);
+  for (int i = 0; i < (16 + 1) * 2; ++i) {
+    EXPECT_EQ(ref_nb[i], nb[i]);
+  }
+}
+
+}  // namespace