Merge "Cleanup psnr.h" into nextgenv2
diff --git a/test/vp10_convolve_test.cc b/test/vp10_convolve_test.cc
index eea7068..af34ffc 100644
--- a/test/vp10_convolve_test.cc
+++ b/test/vp10_convolve_test.cc
@@ -60,38 +60,42 @@
   int dst_stride = 1;
   int x_step_q4 = 16;
   int y_step_q4 = 16;
-  int subpel_x_q4 = 3;
-  int subpel_y_q4 = 2;
   int avg = 0;
-
   int w = 1;
   int h = 1;
 
+  int subpel_x_q4;
+  int subpel_y_q4;
+
   for (int i = 0; i < filter_size * filter_size; i++) {
     src[i] = rnd.Rand16() % (1 << 8);
   }
 
-  vp10_convolve(src + src_stride * filter_center + filter_center, src_stride,
-                dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4,
-                subpel_y_q4, y_step_q4, avg);
+  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
+    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
+      vp10_convolve(src + src_stride * filter_center + filter_center,
+                    src_stride, dst, dst_stride, w, h, filter_params,
+                    subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
 
-  const int16_t* x_filter =
-      vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
-  const int16_t* y_filter =
-      vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+      const int16_t* x_filter =
+          vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+      const int16_t* y_filter =
+          vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
 
-  int temp[12];
-  int dst_ref = 0;
-  for (int r = 0; r < filter_size; r++) {
-    temp[r] = 0;
-    for (int c = 0; c < filter_size; c++) {
-      temp[r] += x_filter[c] * src[r * filter_size + c];
+      int temp[12];
+      int dst_ref = 0;
+      for (int r = 0; r < filter_size; r++) {
+        temp[r] = 0;
+        for (int c = 0; c < filter_size; c++) {
+          temp[r] += x_filter[c] * src[r * filter_size + c];
+        }
+        temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
+        dst_ref += temp[r] * y_filter[r];
+      }
+      dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
+      EXPECT_EQ(dst[0], dst_ref);
     }
-    temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
-    dst_ref += temp[r] * y_filter[r];
   }
-  dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
-  EXPECT_EQ(dst[0], dst_ref);
 }
 
 TEST(VP10ConvolveTest, vp10_convolve_avg) {
@@ -110,13 +114,14 @@
   int dst_stride = 1;
   int x_step_q4 = 16;
   int y_step_q4 = 16;
-  int subpel_x_q4 = 3;
-  int subpel_y_q4 = 2;
   int avg = 0;
 
   int w = 1;
   int h = 1;
 
+  int subpel_x_q4;
+  int subpel_y_q4;
+
   for (int i = 0; i < filter_size * filter_size; i++) {
     src0[i] = rnd.Rand16() % (1 << 8);
     src1[i] = rnd.Rand16() % (1 << 8);
@@ -124,23 +129,29 @@
 
   int offset = filter_size * filter_center + filter_center;
 
-  avg = 0;
-  vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
-                filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
-                avg);
-  avg = 0;
-  vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
-                filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
-                avg);
+  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
+    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
+      avg = 0;
+      vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
+                    filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                    y_step_q4, avg);
+      avg = 0;
+      vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
+                    filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                    y_step_q4, avg);
 
-  avg = 0;
-  vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h, filter_params,
-                subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
-  avg = 1;
-  vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h, filter_params,
-                subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
+      avg = 0;
+      vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h,
+                    filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                    y_step_q4, avg);
+      avg = 1;
+      vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h,
+                    filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                    y_step_q4, avg);
 
-  EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+      EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+    }
+  }
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -157,40 +168,45 @@
   int dst_stride = 1;
   int x_step_q4 = 16;
   int y_step_q4 = 16;
-  int subpel_x_q4 = 8;
-  int subpel_y_q4 = 6;
   int avg = 0;
   int bd = 10;
-
   int w = 1;
   int h = 1;
 
+  int subpel_x_q4;
+  int subpel_y_q4;
+
   for (int i = 0; i < filter_size * filter_size; i++) {
     src[i] = rnd.Rand16() % (1 << bd);
   }
 
-  vp10_highbd_convolve(
-      CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
-      src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
-      subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
+    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
+      vp10_highbd_convolve(
+          CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
+          src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
+          subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
 
-  const int16_t* x_filter =
-      vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
-  const int16_t* y_filter =
-      vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+      const int16_t* x_filter =
+          vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+      const int16_t* y_filter =
+          vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
 
-  int temp[12];
-  int dst_ref = 0;
-  for (int r = 0; r < filter_size; r++) {
-    temp[r] = 0;
-    for (int c = 0; c < filter_size; c++) {
-      temp[r] += x_filter[c] * src[r * filter_size + c];
+      int temp[12];
+      int dst_ref = 0;
+      for (int r = 0; r < filter_size; r++) {
+        temp[r] = 0;
+        for (int c = 0; c < filter_size; c++) {
+          temp[r] += x_filter[c] * src[r * filter_size + c];
+        }
+        temp[r] =
+            clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
+        dst_ref += temp[r] * y_filter[r];
+      }
+      dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
+      EXPECT_EQ(dst[0], dst_ref);
     }
-    temp[r] = clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
-    dst_ref += temp[r] * y_filter[r];
   }
-  dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
-  EXPECT_EQ(dst[0], dst_ref);
 }
 
 TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
@@ -209,42 +225,49 @@
   int dst_stride = 1;
   int x_step_q4 = 16;
   int y_step_q4 = 16;
-  int subpel_x_q4 = 3;
-  int subpel_y_q4 = 2;
   int avg = 0;
   int bd = 10;
 
   int w = 1;
   int h = 1;
 
+  int subpel_x_q4;
+  int subpel_y_q4;
+
   for (int i = 0; i < filter_size * filter_size; i++) {
     src0[i] = rnd.Rand16() % (1 << bd);
     src1[i] = rnd.Rand16() % (1 << bd);
   }
 
-  int offset = filter_size * filter_center + filter_center;
+  for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
+    for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
+      int offset = filter_size * filter_center + filter_center;
 
-  avg = 0;
-  vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
-                       CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
-                       filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
-                       y_step_q4, avg, bd);
-  avg = 0;
-  vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
-                       CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
-                       filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
-                       y_step_q4, avg, bd);
+      avg = 0;
+      vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
+                           CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
+                           filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                           y_step_q4, avg, bd);
+      avg = 0;
+      vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
+                           CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
+                           filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                           y_step_q4, avg, bd);
 
-  avg = 0;
-  vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
-                       CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
-                       subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
-  avg = 1;
-  vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
-                       CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
-                       subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+      avg = 0;
+      vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
+                           CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
+                           filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                           y_step_q4, avg, bd);
+      avg = 1;
+      vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
+                           CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
+                           filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+                           y_step_q4, avg, bd);
 
-  EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+      EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+    }
+  }
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index cf78cbb..4d7f921 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -79,6 +79,9 @@
 typedef struct {
   PREDICTION_MODE as_mode;
   int_mv as_mv[2];  // first, second inter predictor motion vectors
+#if CONFIG_REF_MV
+  int_mv pred_mv[2];
+#endif
 #if CONFIG_EXT_INTER
   int_mv ref_mv[2];
 #endif  // CONFIG_EXT_INTER
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 735e10e..abdb7db 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -809,7 +809,17 @@
   { 36, 243, 48},
   { 149, 160, 128},
 };
-#else
+#elif CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 5
+static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+                                                    [SWITCHABLE_FILTERS - 1] = {
+  { 235, 192, 128, 128},
+  { 36, 243, 208, 128},
+  { 34, 16, 128, 128},
+  { 36, 243, 48, 128},
+  { 34, 16, 128, 128},
+  { 149, 160, 128, 128},
+};
+#else  // CONFIG_EXT_INTERP
 static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
                                                     [SWITCHABLE_FILTERS - 1] = {
   { 235, 162, },
@@ -817,7 +827,7 @@
   { 34, 3, },
   { 149, 144, },
 };
-#endif
+#endif  // CONFIG_EXT_INTERP
 
 #if CONFIG_EXT_TX
 const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER]
@@ -1335,6 +1345,14 @@
   4, -EIGHTTAP_SHARP,
   -EIGHTTAP_SMOOTH, -EIGHTTAP_SMOOTH2,
 };
+#elif CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 5
+const vpx_tree_index vp10_switchable_interp_tree
+[TREE_SIZE(SWITCHABLE_FILTERS)] = {
+  -EIGHTTAP, 2,
+  4, 6,
+  -EIGHTTAP_SMOOTH, -EIGHTTAP_SMOOTH2,
+  -EIGHTTAP_SHARP, -EIGHTTAP_SHARP2,
+};
 #else
 const vpx_tree_index vp10_switchable_interp_tree
 [TREE_SIZE(SWITCHABLE_FILTERS)] = {
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index d581a08..05918ee 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -93,7 +93,11 @@
   vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
 #endif
   vpx_prob skip_probs[SKIP_CONTEXTS];
+#if CONFIG_REF_MV
+  nmv_context nmvc[NMV_CONTEXTS];
+#else
   nmv_context nmvc;
+#endif
   int initialized;
 #if CONFIG_EXT_TX
   vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1];
@@ -150,7 +154,11 @@
   unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
 #endif
   unsigned int skip[SKIP_CONTEXTS][2];
+#if CONFIG_REF_MV
+  nmv_context_counts mv[NMV_CONTEXTS];
+#else
   nmv_context_counts mv;
+#endif
 #if CONFIG_EXT_TX
   unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
   unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
diff --git a/vp10/common/entropymv.c b/vp10/common/entropymv.c
index a9946ee..5be9797 100644
--- a/vp10/common/entropymv.c
+++ b/vp10/common/entropymv.c
@@ -185,7 +185,45 @@
 
 void vp10_adapt_mv_probs(VP10_COMMON *cm, int allow_hp) {
   int i, j;
+#if CONFIG_REF_MV
+  int idx;
+  for (idx = 0; idx < NMV_CONTEXTS; ++idx) {
+    nmv_context *fc = &cm->fc->nmvc[idx];
+    const nmv_context *pre_fc =
+        &cm->frame_contexts[cm->frame_context_idx].nmvc[idx];
+    const nmv_context_counts *counts = &cm->counts.mv[idx];
 
+    vpx_tree_merge_probs(vp10_mv_joint_tree, pre_fc->joints, counts->joints,
+                         fc->joints);
+
+    for (i = 0; i < 2; ++i) {
+      nmv_component *comp = &fc->comps[i];
+      const nmv_component *pre_comp = &pre_fc->comps[i];
+      const nmv_component_counts *c = &counts->comps[i];
+
+      comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign);
+      vpx_tree_merge_probs(vp10_mv_class_tree, pre_comp->classes, c->classes,
+                           comp->classes);
+      vpx_tree_merge_probs(vp10_mv_class0_tree, pre_comp->class0, c->class0,
+                           comp->class0);
+
+      for (j = 0; j < MV_OFFSET_BITS; ++j)
+        comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
+
+      for (j = 0; j < CLASS0_SIZE; ++j)
+        vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->class0_fp[j],
+                             c->class0_fp[j], comp->class0_fp[j]);
+
+      vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
+
+      if (allow_hp) {
+        comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp,
+                                              c->class0_hp);
+        comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp);
+      }
+    }
+  }
+#else
   nmv_context *fc = &cm->fc->nmvc;
   const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc;
   const nmv_context_counts *counts = &cm->counts.mv;
@@ -218,8 +256,15 @@
       comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp);
     }
   }
+#endif
 }
 
 void vp10_init_mv_probs(VP10_COMMON *cm) {
+#if CONFIG_REF_MV
+  int i;
+  for (i = 0; i < NMV_CONTEXTS; ++i)
+    cm->fc->nmvc[i] = default_nmv_context;
+#else
   cm->fc->nmvc = default_nmv_context;
+#endif
 }
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index af6ef36..e1f3168 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -227,6 +227,8 @@
 #define SKIP_CONTEXTS 3
 
 #if CONFIG_REF_MV
+#define NMV_CONTEXTS 2
+
 #define NEWMV_MODE_CONTEXTS  7
 #define ZEROMV_MODE_CONTEXTS 2
 #define REFMV_MODE_CONTEXTS  9
diff --git a/vp10/common/filter.c b/vp10/common/filter.c
index a5987f1..c001f7f 100644
--- a/vp10/common/filter.c
+++ b/vp10/common/filter.c
@@ -12,8 +12,6 @@
 
 #include "vp10/common/filter.h"
 
-#define USE_12_SHARP_FILTER 0
-
 DECLARE_ALIGNED(256, static const InterpKernel,
                 bilinear_filters[SUBPEL_SHIFTS]) = {
   { 0, 0, 0, 128,   0, 0, 0, 0 },
@@ -75,29 +73,6 @@
 #endif  // CONFIG_EXT_INTERP
 };
 
-#if USE_12_SHARP_FILTER
-DECLARE_ALIGNED(16, static const int16_t,
-                sub_pel_filters_12sharp[16][12]) = {
-  // intfilt 0.8
-  {0,   0,   0,   0,   0, 128,   0,   0,   0,   0,   0, 0},
-  {0,   1,  -1,   3,  -7, 127,   8,  -4,   2,  -1,   0, 0},
-  {0,   1,  -3,   5, -12, 124,  18,  -8,   4,  -2,   1, 0},
-  {-1,   2,  -4,   8, -17, 120,  28, -11,   6,  -3,   1, -1},
-  {-1,   2,  -4,  10, -21, 114,  38, -15,   8,  -4,   2, -1},
-  {-1,   3,  -5,  11, -23, 107,  49, -18,   9,  -5,   2, -1},
-  {-1,   3,  -6,  12, -25,  99,  60, -21,  11,  -6,   3, -1},
-  {-1,   3,  -6,  12, -25,  90,  70, -23,  12,  -6,   3, -1},
-  {-1,   3,  -6,  12, -24,  80,  80, -24,  12,  -6,   3, -1},
-  {-1,   3,  -6,  12, -23,  70,  90, -25,  12,  -6,   3, -1},
-  {-1,   3,  -6,  11, -21,  60,  99, -25,  12,  -6,   3, -1},
-  {-1,   2,  -5,   9, -18,  49, 107, -23,  11,  -5,   3, -1},
-  {-1,   2,  -4,   8, -15,  38, 114, -21,  10,  -4,   2, -1},
-  {-1,   1,  -3,   6, -11,  28, 120, -17,   8,  -4,   2, -1},
-  {0,   1,  -2,   4,  -8,  18, 124, -12,   5,  -3,   1, 0},
-  {0,   0,  -1,   2,  -4,   8, 127,  -7,   3,  -1,   1, 0},
-};
-#endif  // USE_12_SHARP_FILTER
-
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
 #if CONFIG_EXT_INTERP
@@ -139,7 +114,7 @@
 #endif  // CONFIG_EXT_INTERP
 };
 
-#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+#if CONFIG_EXT_INTERP && (SWITCHABLE_FILTERS == 4 || SWITCHABLE_FILTERS == 5)
 
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8smooth2[SUBPEL_SHIFTS]) = {
@@ -183,6 +158,27 @@
   {0,   2, -11,  31,  95,  19, -10, 2},
 };
 
+DECLARE_ALIGNED(16, static const int16_t,
+                sub_pel_filters_12sharp[16][12]) = {
+  // intfilt 0.8
+  {0,   0,   0,   0,   0, 128,   0,   0,   0,   0,   0, 0},
+  {0,   1,  -1,   3,  -7, 127,   8,  -4,   2,  -1,   0, 0},
+  {0,   1,  -3,   5, -12, 124,  18,  -8,   4,  -2,   1, 0},
+  {-1,   2,  -4,   8, -17, 120,  28, -11,   6,  -3,   1, -1},
+  {-1,   2,  -4,  10, -21, 114,  38, -15,   8,  -4,   2, -1},
+  {-1,   3,  -5,  11, -23, 107,  49, -18,   9,  -5,   2, -1},
+  {-1,   3,  -6,  12, -25,  99,  60, -21,  11,  -6,   3, -1},
+  {-1,   3,  -6,  12, -25,  90,  70, -23,  12,  -6,   3, -1},
+  {-1,   3,  -6,  12, -24,  80,  80, -24,  12,  -6,   3, -1},
+  {-1,   3,  -6,  12, -23,  70,  90, -25,  12,  -6,   3, -1},
+  {-1,   3,  -6,  11, -21,  60,  99, -25,  12,  -6,   3, -1},
+  {-1,   2,  -5,   9, -18,  49, 107, -23,  11,  -5,   3, -1},
+  {-1,   2,  -4,   8, -15,  38, 114, -21,  10,  -4,   2, -1},
+  {-1,   1,  -3,   6, -11,  28, 120, -17,   8,  -4,   2, -1},
+  {0,   1,  -2,   4,  -8,  18, 124, -12,   5,  -3,   1, 0},
+  {0,   0,  -1,   2,  -4,   8, 127,  -7,   3,  -1,   1, 0},
+};
+
 #else
 
 DECLARE_ALIGNED(256, static const InterpKernel,
@@ -214,6 +210,9 @@
   sub_pel_filters_8sharp,
 #if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
   sub_pel_filters_8smooth2,
+#elif CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 5
+  sub_pel_filters_8smooth2,
+  (const InterpKernel*)sub_pel_filters_12sharp,
 #endif
   bilinear_filters
 };
@@ -231,13 +230,12 @@
 vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
   {(const int16_t*)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS},
   {(const int16_t*)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS},
-#if USE_12_SHARP_FILTER
-  {(const int16_t*)sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS},
-#else  // USE_12_SHARP_FILTER
   {(const int16_t*)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS},
-#endif  // USE_12_SHARP_FILTER
 #if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
   {(const int16_t*)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS},
+#elif CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 5
+  {(const int16_t*)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS},
+  {(const int16_t*)sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS},
 #endif
   {(const int16_t*)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS}
 };
diff --git a/vp10/common/filter.h b/vp10/common/filter.h
index afebee0..fea8330 100644
--- a/vp10/common/filter.h
+++ b/vp10/common/filter.h
@@ -27,8 +27,19 @@
 
 #if CONFIG_EXT_INTERP
 #define SUPPORT_NONINTERPOLATING_FILTERS 0  /* turn it on for experimentation */
+#define SWITCHABLE_FILTERS  5 /* Number of switchable filters */
+
+#if SWITCHABLE_FILTERS == 4
+
 #define EIGHTTAP_SMOOTH2    3
-#define SWITCHABLE_FILTERS  4 /* Number of switchable filters */
+
+#elif SWITCHABLE_FILTERS == 5
+
+#define EIGHTTAP_SMOOTH2    3
+#define EIGHTTAP_SHARP2     4
+
+#endif  // SWITCHABLE_FILTERS
+
 #else
 #define SWITCHABLE_FILTERS  3 /* Number of switchable filters */
 #endif  // CONFIG_EXT_INTERP
diff --git a/vp10/common/mv.h b/vp10/common/mv.h
index 904d372..4523705 100644
--- a/vp10/common/mv.h
+++ b/vp10/common/mv.h
@@ -38,6 +38,7 @@
 typedef struct candidate_mv {
   int_mv this_mv;
   int_mv comp_mv;
+  int_mv pred_mv;
   int weight;
 } CANDIDATE_MV;
 #endif
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c
index 1b7fb7d..5a2def0 100644
--- a/vp10/common/mvref_common.c
+++ b/vp10/common/mvref_common.c
@@ -38,6 +38,8 @@
         // Add a new item to the list.
         if (index == *refmv_count) {
           ref_mv_stack[index].this_mv = this_refmv;
+          ref_mv_stack[index].pred_mv =
+              get_sub_block_pred_mv(candidate_mi, ref, col, block);
           ref_mv_stack[index].weight = 2 * weight;
           ++(*refmv_count);
 
@@ -63,6 +65,8 @@
           // Add a new item to the list.
           if (index == *refmv_count) {
             ref_mv_stack[index].this_mv = this_refmv;
+            ref_mv_stack[index].pred_mv =
+                get_sub_block_pred_mv(candidate_mi, ref, col, alt_block);
             ref_mv_stack[index].weight = weight;
             ++(*refmv_count);
 
diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h
index b02c0dd..b3a8beb 100644
--- a/vp10/common/mvref_common.h
+++ b/vp10/common/mvref_common.h
@@ -150,6 +150,16 @@
           : candidate->mbmi.mv[which_mv];
 }
 
+#if CONFIG_REF_MV
+static INLINE int_mv get_sub_block_pred_mv(const MODE_INFO *candidate,
+                                           int which_mv,
+                                           int search_col, int block_idx) {
+  return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+          ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+              .pred_mv[which_mv]
+          : candidate->mbmi.pred_mv[which_mv];
+}
+#endif
 
 // Performs mv sign inversion if indicated by the reference frame combination.
 static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
@@ -218,6 +228,22 @@
 }
 
 #if CONFIG_REF_MV
+static INLINE int vp10_nmv_ctx(const uint8_t ref_mv_count,
+                               const CANDIDATE_MV *ref_mv_stack) {
+#if CONFIG_EXT_INTER
+  return 0;
+#endif
+  if (ref_mv_stack[0].weight > REF_CAT_LEVEL &&
+      ref_mv_count > 0) {
+    if (abs(ref_mv_stack[0].this_mv.as_mv.row -
+            ref_mv_stack[0].pred_mv.as_mv.row) < 8 &&
+        abs(ref_mv_stack[0].this_mv.as_mv.col -
+            ref_mv_stack[0].pred_mv.as_mv.col) < 8)
+      return 1;
+  }
+  return 0;
+}
+
 static INLINE int8_t vp10_ref_frame_type(const MV_REFERENCE_FRAME *const rf) {
   if (rf[1] > INTRA_FRAME)
     return rf[0] + ALTREF_FRAME;
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index 6e959ed..7f04a09 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -447,6 +447,39 @@
     for (j = 0; j < 2; j++)
       cm->counts.skip[i][j] += counts->skip[i][j];
 
+#if CONFIG_REF_MV
+  for (m = 0; m < NMV_CONTEXTS; ++m) {
+    for (i = 0; i < MV_JOINTS; i++)
+      cm->counts.mv[m].joints[i] += counts->mv[m].joints[i];
+
+    for (k = 0; k < 2; k++) {
+      nmv_component_counts *comps = &cm->counts.mv[m].comps[k];
+      nmv_component_counts *comps_t = &counts->mv[m].comps[k];
+
+      for (i = 0; i < 2; i++) {
+        comps->sign[i] += comps_t->sign[i];
+        comps->class0_hp[i] += comps_t->class0_hp[i];
+        comps->hp[i] += comps_t->hp[i];
+      }
+
+      for (i = 0; i < MV_CLASSES; i++)
+        comps->classes[i] += comps_t->classes[i];
+
+      for (i = 0; i < CLASS0_SIZE; i++) {
+        comps->class0[i] += comps_t->class0[i];
+        for (j = 0; j < MV_FP_SIZE; j++)
+          comps->class0_fp[i][j] += comps_t->class0_fp[i][j];
+      }
+
+      for (i = 0; i < MV_OFFSET_BITS; i++)
+        for (j = 0; j < 2; j++)
+          comps->bits[i][j] += comps_t->bits[i][j];
+
+      for (i = 0; i < MV_FP_SIZE; i++)
+        comps->fp[i] += comps_t->fp[i];
+    }
+  }
+#else
   for (i = 0; i < MV_JOINTS; i++)
     cm->counts.mv.joints[i] += counts->mv.joints[i];
 
@@ -476,6 +509,7 @@
     for (i = 0; i < MV_FP_SIZE; i++)
       comps->fp[i] += comps_t->fp[i];
   }
+#endif
 
 #if CONFIG_EXT_TX
   for (i = 0; i < EXT_TX_SIZES; i++) {
diff --git a/vp10/common/vp10_convolve.c b/vp10/common/vp10_convolve.c
index e8c0c92..33a8542 100644
--- a/vp10/common/vp10_convolve.c
+++ b/vp10/common/vp10_convolve.c
@@ -1,4 +1,5 @@
 #include <assert.h>
+#include <string.h>
 
 #include "vp10/common/filter.h"
 #include "vpx_dsp/vpx_dsp_common.h"
@@ -69,22 +70,35 @@
   }
 }
 
+static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
+                          int dst_stride, int w, int h, int avg) {
+  if (avg == 0) {
+    int r;
+    for (r = 0; r < h; ++r) {
+      memcpy(dst, src, w);
+      src += src_stride;
+      dst += dst_stride;
+    }
+  } else {
+    int r, c;
+    for (r = 0; r < h; ++r) {
+      for (c = 0; c < w; ++c) {
+        dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
+      }
+      src += src_stride;
+      dst += dst_stride;
+    }
+  }
+}
+
 void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                    int dst_stride, int w, int h,
                    const InterpFilterParams filter_params,
                    const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
                    int y_step_q4, int avg) {
   int filter_size = filter_params.tap;
-
-  // temp's size is set to (maximum possible intermediate_height) *
-  // MAX_BLOCK_WIDTH
-  uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
-                MAX_FILTER_TAP) *
-               MAX_BLOCK_WIDTH];
-  int temp_stride = MAX_BLOCK_WIDTH;
-
-  int intermediate_height =
-      (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
+  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
 
   assert(w <= MAX_BLOCK_WIDTH);
   assert(h <= MAX_BLOCK_HEIGHT);
@@ -92,11 +106,31 @@
   assert(x_step_q4 <= MAX_STEP);
   assert(filter_params.tap <= MAX_FILTER_TAP);
 
-  convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
-                 temp_stride, w, intermediate_height, filter_params,
-                 subpel_x_q4, x_step_q4, 0);
-  convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
-                dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg);
+  if (ignore_horiz && ignore_vert) {
+    convolve_copy(src, src_stride, dst, dst_stride, w, h, avg);
+  } else if (ignore_vert) {
+    convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
+                   subpel_x_q4, x_step_q4, avg);
+  } else if (ignore_horiz) {
+    convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
+                  subpel_y_q4, y_step_q4, avg);
+  } else {
+    // temp's size is set to (maximum possible intermediate_height) *
+    // MAX_BLOCK_WIDTH
+    uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+                  MAX_FILTER_TAP) *
+                 MAX_BLOCK_WIDTH];
+    int temp_stride = MAX_BLOCK_WIDTH;
+
+    int intermediate_height =
+        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+    convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
+                   temp_stride, w, intermediate_height, filter_params,
+                   subpel_x_q4, x_step_q4, 0);
+    convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
+                  dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg);
+  }
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -164,23 +198,39 @@
   }
 }
 
+static void highbd_convolve_copy(const uint16_t *src, int src_stride,
+                                 uint16_t *dst, int dst_stride, int w, int h,
+                                 int avg, int bd) {
+  if (avg == 0) {
+    int r;
+    for (r = 0; r < h; ++r) {
+      memcpy(dst, src, w * sizeof(*src));
+      src += src_stride;
+      dst += dst_stride;
+    }
+  } else {
+    int r, c;
+    for (r = 0; r < h; ++r) {
+      for (c = 0; c < w; ++c) {
+        dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
+      }
+      src += src_stride;
+      dst += dst_stride;
+    }
+  }
+}
+
 void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
                           int dst_stride, int w, int h,
                           const InterpFilterParams filter_params,
                           const int subpel_x_q4, int x_step_q4,
                           const int subpel_y_q4, int y_step_q4, int avg,
                           int bd) {
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   int filter_size = filter_params.tap;
-
-  // temp's size is set to (maximum possible intermediate_height) *
-  // MAX_BLOCK_WIDTH
-  uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
-                 MAX_FILTER_TAP) *
-                MAX_BLOCK_WIDTH];
-  int temp_stride = MAX_BLOCK_WIDTH;
-
-  int intermediate_height =
-      (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
+  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
 
   assert(w <= MAX_BLOCK_WIDTH);
   assert(h <= MAX_BLOCK_HEIGHT);
@@ -188,12 +238,31 @@
   assert(x_step_q4 <= MAX_STEP);
   assert(filter_params.tap <= MAX_FILTER_TAP);
 
-  highbd_convolve_horiz(
-      CONVERT_TO_SHORTPTR(src8 - src_stride * (filter_size / 2 - 1)),
-      src_stride, temp, temp_stride, w, intermediate_height, filter_params,
-      subpel_x_q4, x_step_q4, 0, bd);
-  highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
-                       CONVERT_TO_SHORTPTR(dst8), dst_stride, w, h,
-                       filter_params, subpel_y_q4, y_step_q4, avg, bd);
+  if (ignore_horiz && ignore_vert) {
+    highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, avg, bd);
+  } else if (ignore_vert) {
+    highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
+                          subpel_x_q4, x_step_q4, avg, bd);
+  } else if (ignore_horiz) {
+    highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
+                         subpel_y_q4, y_step_q4, avg, bd);
+  } else {
+    // temp's size is set to (maximum possible intermediate_height) *
+    // MAX_BLOCK_WIDTH
+    uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+                   MAX_FILTER_TAP) *
+                  MAX_BLOCK_WIDTH];
+    int temp_stride = MAX_BLOCK_WIDTH;
+
+    int intermediate_height =
+        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+    highbd_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
+                          temp, temp_stride, w, intermediate_height,
+                          filter_params, subpel_x_q4, x_step_q4, 0, bd);
+    highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
+                         temp_stride, dst, dst_stride, w, h, filter_params,
+                         subpel_y_q4, y_step_q4, avg, bd);
+  }
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index a003d7a..9d9fae0 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -653,6 +653,7 @@
   struct macroblockd_plane *const pd = &xd->plane[plane];
   uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
   MV32 scaled_mv;
+  MV mv_q4;
   int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height,
       buf_stride, subpel_x, subpel_y;
   uint8_t *ref_frame, *buf_ptr;
@@ -672,10 +673,10 @@
                          : ref_frame_buf->buf.v_buffer;
   }
 
+  mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh,
+                                    pd->subsampling_x,
+                                    pd->subsampling_y);
   if (is_scaled) {
-    const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh,
-                                               pd->subsampling_x,
-                                               pd->subsampling_y);
     // Co-ordinate of containing block to pixel precision.
     int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
     int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
@@ -699,11 +700,6 @@
     xs = sf->x_step_q4;
     ys = sf->y_step_q4;
   } else {
-#if CONFIG_OBMC
-    const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh,
-                                               pd->subsampling_x,
-                                               pd->subsampling_y);
-#endif  // CONFIG_OBMC
     // Co-ordinate of containing block to pixel precision.
     x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
     y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
@@ -712,13 +708,8 @@
     x0_16 = x0 << SUBPEL_BITS;
     y0_16 = y0 << SUBPEL_BITS;
 
-#if CONFIG_OBMC
     scaled_mv.row = mv_q4.row;
     scaled_mv.col = mv_q4.col;
-#else
-    scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y));
-    scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x));
-#endif  // CONFIG_OBMC
     xs = ys = 16;
   }
   subpel_x = scaled_mv.col & SUBPEL_MASK;
@@ -3566,7 +3557,9 @@
         for (i = 0; i < INTRA_MODES - 1; ++i)
           vp10_diff_update_prob(&r, &cm->kf_y_prob[k][j][i]);
   } else {
+#if !CONFIG_REF_MV
     nmv_context *const nmvc = &fc->nmvc;
+#endif
 
     read_inter_mode_probs(fc, &r);
 
@@ -3593,7 +3586,12 @@
       for (i = 0; i < INTRA_MODES - 1; ++i)
         vp10_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
 
+#if CONFIG_REF_MV
+    for (i = 0; i < NMV_CONTEXTS; ++i)
+      read_mv_probs(&fc->nmvc[i], cm->allow_high_precision_mv, &r);
+#else
     read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
+#endif
     read_ext_tx_probs(fc, &r);
 #if CONFIG_SUPERTX
     if (!xd->lossless[0])
@@ -3647,7 +3645,14 @@
                  sizeof(cm->counts.comp_ref)));
   assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
   assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
+#if CONFIG_REF_MV
+  assert(!memcmp(&cm->counts.mv[0], &zero_counts.mv[0],
+                 sizeof(cm->counts.mv[0])));
+  assert(!memcmp(&cm->counts.mv[1], &zero_counts.mv[1],
+                 sizeof(cm->counts.mv[0])));
+#else
   assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
+#endif
   assert(!memcmp(cm->counts.inter_ext_tx, zero_counts.inter_ext_tx,
                  sizeof(cm->counts.inter_ext_tx)));
   assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx,
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 401298f..7a8b47f 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -891,11 +891,20 @@
 
 static INLINE int assign_mv(VP10_COMMON *cm, MACROBLOCKD *xd,
                             PREDICTION_MODE mode,
+#if CONFIG_REF_MV
+                            int block,
+#endif
                             int_mv mv[2], int_mv ref_mv[2],
                             int_mv nearest_mv[2], int_mv near_mv[2],
                             int is_compound, int allow_hp, vpx_reader *r) {
   int i;
   int ret = 1;
+#if CONFIG_REF_MV
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  BLOCK_SIZE bsize = mbmi->sb_type;
+  int_mv *pred_mv = (bsize >= BLOCK_8X8) ?
+      mbmi->pred_mv : xd->mi[0]->bmi[block].pred_mv;
+#endif
 
   switch (mode) {
 #if CONFIG_EXT_INTER
@@ -903,11 +912,26 @@
 #endif  // CONFIG_EXT_INTER
     case NEWMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if !CONFIG_REF_MV
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+#endif
       for (i = 0; i < 1 + is_compound; ++i) {
+#if CONFIG_REF_MV
+        int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[i]],
+                                   xd->ref_mv_stack[mbmi->ref_frame[i]]);
+        nmv_context_counts *const mv_counts =
+            counts ? &counts->mv[nmv_ctx] : NULL;
+        read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc[nmv_ctx],
+                mv_counts, allow_hp);
+#else
         read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts,
                 allow_hp);
+#endif
         ret = ret && is_mv_valid(&mv[i].as_mv);
+
+#if CONFIG_REF_MV
+        pred_mv[i].as_int = ref_mv[i].as_int;
+#endif
       }
       break;
     }
@@ -915,28 +939,58 @@
       mv[0].as_int = nearest_mv[0].as_int;
       if (is_compound)
         mv[1].as_int = nearest_mv[1].as_int;
+
+#if CONFIG_REF_MV
+      pred_mv[0].as_int = nearest_mv[0].as_int;
+      if (is_compound)
+        pred_mv[1].as_int = nearest_mv[1].as_int;
+#endif
       break;
     }
     case NEARMV: {
       mv[0].as_int = near_mv[0].as_int;
       if (is_compound)
         mv[1].as_int = near_mv[1].as_int;
+
+#if CONFIG_REF_MV
+      pred_mv[0].as_int = near_mv[0].as_int;
+      if (is_compound)
+        pred_mv[1].as_int = near_mv[1].as_int;
+#endif
       break;
     }
     case ZEROMV: {
       mv[0].as_int = 0;
       if (is_compound)
         mv[1].as_int = 0;
+
+#if CONFIG_REF_MV
+      pred_mv[0].as_int = 0;
+      if (is_compound)
+        pred_mv[1].as_int = 0;
+#endif
       break;
     }
 #if CONFIG_EXT_INTER
     case NEW_NEWMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if !CONFIG_REF_MV
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+#endif
       assert(is_compound);
       for (i = 0; i < 2; ++i) {
+#if CONFIG_REF_MV
+        int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[i]],
+                                   xd->ref_mv_stack[mbmi->ref_frame[i]]);
+        nmv_context_counts *const mv_counts =
+            counts ? &counts->mv[nmv_ctx] : NULL;
+        read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv,
+                &cm->fc->nmvc[nmv_ctx], mv_counts,
+                allow_hp);
+#else
         read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts,
                 allow_hp);
+#endif
         ret = ret && is_mv_valid(&mv[i].as_mv);
       }
       break;
@@ -961,40 +1015,83 @@
     }
     case NEW_NEARESTMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+      int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[0]],
+                                 xd->ref_mv_stack[mbmi->ref_frame[0]]);
+      nmv_context_counts *const mv_counts =
+          counts ? &counts->mv[nmv_ctx] : NULL;
+      read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv,
+              &cm->fc->nmvc[nmv_ctx], mv_counts,
+              allow_hp);
+#else
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
-      assert(is_compound);
       read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, &cm->fc->nmvc, mv_counts,
               allow_hp);
+#endif
+      assert(is_compound);
       ret = ret && is_mv_valid(&mv[0].as_mv);
       mv[1].as_int = nearest_mv[1].as_int;
       break;
     }
     case NEAREST_NEWMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+      int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[1]],
+                                 xd->ref_mv_stack[mbmi->ref_frame[1]]);
+      nmv_context_counts *const mv_counts =
+          counts ? &counts->mv[nmv_ctx] : NULL;
+      mv[0].as_int = nearest_mv[0].as_int;
+      read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv,
+              &cm->fc->nmvc[nmv_ctx], mv_counts,
+              allow_hp);
+#else
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
-      assert(is_compound);
       mv[0].as_int = nearest_mv[0].as_int;
       read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, &cm->fc->nmvc, mv_counts,
               allow_hp);
+#endif
+      assert(is_compound);
       ret = ret && is_mv_valid(&mv[1].as_mv);
       break;
     }
     case NEAR_NEWMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+      int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[1]],
+                                 xd->ref_mv_stack[mbmi->ref_frame[1]]);
+      nmv_context_counts *const mv_counts =
+          counts ? &counts->mv[nmv_ctx] : NULL;
+      mv[0].as_int = near_mv[0].as_int;
+      read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv,
+              &cm->fc->nmvc[nmv_ctx], mv_counts,
+              allow_hp);
+#else
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
-      assert(is_compound);
       mv[0].as_int = near_mv[0].as_int;
       read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, &cm->fc->nmvc, mv_counts,
               allow_hp);
+#endif
+      assert(is_compound);
+
       ret = ret && is_mv_valid(&mv[1].as_mv);
       break;
     }
     case NEW_NEARMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+      int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[0]],
+                                 xd->ref_mv_stack[mbmi->ref_frame[0]]);
+      nmv_context_counts *const mv_counts =
+          counts ? &counts->mv[nmv_ctx] : NULL;
+      read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv,
+              &cm->fc->nmvc[nmv_ctx], mv_counts,
+              allow_hp);
+#else
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
-      assert(is_compound);
       read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, &cm->fc->nmvc, mv_counts,
               allow_hp);
+#endif
+      assert(is_compound);
       ret = ret && is_mv_valid(&mv[0].as_mv);
       mv[1].as_int = near_mv[1].as_int;
       break;
@@ -1284,7 +1381,11 @@
 #endif  // CONFIG_EXT_INTER
         }
 
-        if (!assign_mv(cm, xd, b_mode, block,
+        if (!assign_mv(cm, xd, b_mode,
+#if CONFIG_REF_MV
+                       j,
+#endif
+                       block,
 #if CONFIG_EXT_INTER
                        ref_mv[mv_idx],
 #else
@@ -1307,12 +1408,20 @@
       }
     }
 
+#if CONFIG_REF_MV
+    mbmi->pred_mv[0].as_int = mi->bmi[3].pred_mv[0].as_int;
+    mbmi->pred_mv[1].as_int = mi->bmi[3].pred_mv[1].as_int;
+#endif
     mi->mbmi.mode = b_mode;
 
     mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
     mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
   } else {
-    xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv,
+    xd->corrupted |= !assign_mv(cm, xd, mbmi->mode,
+#if CONFIG_REF_MV
+                                0,
+#endif
+                                mbmi->mv,
 #if CONFIG_EXT_INTER
                                 mbmi->mode == NEWFROMNEARMV ?
                                               nearmv : nearestmv,
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 930f73f..7134672 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -42,6 +42,9 @@
 #if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
 static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
   {{0, 1}, {4, 3}, {3, 2}, {5, 3}};
+#elif CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 5
+static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
+  {{0, 1}, {4, 3}, {6, 3}, {5, 3}, {7, 3}};
 #else
 static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
   {{0, 1}, {2, 2}, {3, 2}};
@@ -882,7 +885,9 @@
 #endif
                                 vpx_writer *w) {
   VP10_COMMON *const cm = &cpi->common;
+#if !CONFIG_REF_MV
   const nmv_context *nmvc = &cm->fc->nmvc;
+#endif
   const MACROBLOCK *x = &cpi->td.mb;
   const MACROBLOCKD *xd = &x->e_mbd;
   const struct segmentation *const seg = &cm->seg;
@@ -1070,20 +1075,39 @@
 #else
           if (b_mode == NEWMV) {
 #endif  // CONFIG_EXT_INTER
-            for (ref = 0; ref < 1 + is_compound; ++ref)
+            for (ref = 0; ref < 1 + is_compound; ++ref) {
+#if CONFIG_REF_MV
+              int nmv_ctx =
+                  vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[ref]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[ref]]);
+              const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
               vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
 #if CONFIG_EXT_INTER
                              &mi->bmi[j].ref_mv[ref].as_mv,
 #else
                              &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
 #endif  // CONFIG_EXT_INTER
-                            nmvc, allow_hp);
+                             nmvc, allow_hp);
+            }
           }
 #if CONFIG_EXT_INTER
           else if (b_mode == NEAREST_NEWMV || b_mode == NEAR_NEWMV) {
+#if CONFIG_REF_MV
+            int nmv_ctx =
+                vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+                             mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+            const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
             vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[1].as_mv,
                            &mi->bmi[j].ref_mv[1].as_mv, nmvc, allow_hp);
           } else if (b_mode == NEW_NEARESTMV || b_mode == NEW_NEARMV) {
+#if CONFIG_REF_MV
+            int nmv_ctx =
+                vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+                             mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+            const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
             vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[0].as_mv,
                            &mi->bmi[j].ref_mv[0].as_mv, nmvc, allow_hp);
           }
@@ -1096,9 +1120,14 @@
 #else
       if (mode == NEWMV) {
 #endif  // CONFIG_EXT_INTER
-        for (ref = 0; ref < 1 + is_compound; ++ref)
+        for (ref = 0; ref < 1 + is_compound; ++ref) {
+#if CONFIG_REF_MV
+              int nmv_ctx =
+                  vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[ref]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[ref]]);
+              const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
 #if CONFIG_EXT_INTER
-        {
           if (mode == NEWFROMNEARMV)
             vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
                            &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][1].as_mv,
@@ -1108,13 +1137,25 @@
           vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
                         &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc,
                         allow_hp);
-#if CONFIG_EXT_INTER
         }
+#if CONFIG_EXT_INTER
       } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
+#if CONFIG_REF_MV
+            int nmv_ctx =
+                vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+                             mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+            const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
         vp10_encode_mv(cpi, w, &mbmi->mv[1].as_mv,
                        &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv, nmvc,
                        allow_hp);
       } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
+#if CONFIG_REF_MV
+            int nmv_ctx =
+                vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+                             mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+            const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
         vp10_encode_mv(cpi, w, &mbmi->mv[0].as_mv,
                        &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv, nmvc,
                        allow_hp);
@@ -2449,7 +2490,11 @@
                        counts->y_mode[i], INTRA_MODES, &header_bc);
 
     vp10_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc,
-                        &counts->mv);
+#if CONFIG_REF_MV
+                         counts->mv);
+#else
+                         &counts->mv);
+#endif
     update_ext_tx_probs(cm, &header_bc);
 #if CONFIG_SUPERTX
     if (!xd->lossless[0])
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index 3c49d14..0c3e48f 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -107,12 +107,21 @@
   unsigned int pred_sse[MAX_REF_FRAMES];
   int pred_mv_sad[MAX_REF_FRAMES];
 
+#if CONFIG_REF_MV
+  int *nmvjointcost;
+  int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
+  int *nmvcost[NMV_CONTEXTS][2];
+  int *nmvcost_hp[NMV_CONTEXTS][2];
+  int **mv_cost_stack[NMV_CONTEXTS];
+  int *nmvjointsadcost;
+#else
   int nmvjointcost[MV_JOINTS];
   int *nmvcost[2];
   int *nmvcost_hp[2];
-  int **mvcost;
-
   int nmvjointsadcost[MV_JOINTS];
+#endif
+
+  int **mvcost;
   int *nmvsadcost[2];
   int *nmvsadcost_hp[2];
   int **mvsadcost;
diff --git a/vp10/encoder/encodemv.c b/vp10/encoder/encodemv.c
index 4124c4a..61429aa 100644
--- a/vp10/encoder/encodemv.c
+++ b/vp10/encoder/encodemv.c
@@ -157,9 +157,49 @@
 }
 
 void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vpx_writer *w,
-                         nmv_context_counts *const counts) {
+                          nmv_context_counts *const nmv_counts) {
   int i, j;
+#if CONFIG_REF_MV
+  int nmv_ctx = 0;
+  for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
+    nmv_context *const mvc = &cm->fc->nmvc[nmv_ctx];
+    nmv_context_counts *const counts = &nmv_counts[nmv_ctx];
+    write_mv_update(vp10_mv_joint_tree, mvc->joints, counts->joints,
+                    MV_JOINTS, w);
+
+    for (i = 0; i < 2; ++i) {
+      nmv_component *comp = &mvc->comps[i];
+      nmv_component_counts *comp_counts = &counts->comps[i];
+
+      update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB);
+      write_mv_update(vp10_mv_class_tree, comp->classes, comp_counts->classes,
+                      MV_CLASSES, w);
+      write_mv_update(vp10_mv_class0_tree, comp->class0, comp_counts->class0,
+                      CLASS0_SIZE, w);
+      for (j = 0; j < MV_OFFSET_BITS; ++j)
+        update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB);
+    }
+
+    for (i = 0; i < 2; ++i) {
+      for (j = 0; j < CLASS0_SIZE; ++j)
+        write_mv_update(vp10_mv_fp_tree, mvc->comps[i].class0_fp[j],
+                        counts->comps[i].class0_fp[j], MV_FP_SIZE, w);
+
+      write_mv_update(vp10_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp,
+                      MV_FP_SIZE, w);
+    }
+
+    if (usehp) {
+      for (i = 0; i < 2; ++i) {
+        update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp,
+                  MV_UPDATE_PROB);
+        update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB);
+      }
+    }
+  }
+#else
   nmv_context *const mvc = &cm->fc->nmvc;
+  nmv_context_counts *const counts = nmv_counts;
 
   write_mv_update(vp10_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, w);
 
@@ -192,6 +232,7 @@
       update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB);
     }
   }
+#endif
 }
 
 void vp10_encode_mv(VP10_COMP* cpi, vpx_writer* w,
@@ -227,27 +268,45 @@
 #if CONFIG_EXT_INTER
 static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
                     const int_mv mvs[2],
-                    nmv_context_counts *counts) {
+                    nmv_context_counts *nmv_counts) {
   int i;
   PREDICTION_MODE mode = mbmi->mode;
   int mv_idx = (mode == NEWFROMNEARMV);
+#if !CONFIG_REF_MV
+  nmv_context_counts *counts = nmv_counts;
+#endif
 
   if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
     for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
       const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][mv_idx].as_mv;
       const MV diff = {mvs[i].as_mv.row - ref->row,
                        mvs[i].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
       vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
     }
   } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv;
     const MV diff = {mvs[1].as_mv.row - ref->row,
                      mvs[1].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
     vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
   } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
     const MV diff = {mvs[0].as_mv.row - ref->row,
                      mvs[0].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
     vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
   }
 }
@@ -255,36 +314,67 @@
 static void inc_mvs_sub8x8(const MODE_INFO *mi,
                            int block,
                            const int_mv mvs[2],
-                           nmv_context_counts *counts) {
+#if CONFIG_REF_MV
+                           const MB_MODE_INFO_EXT *mbmi_ext,
+#endif
+                           nmv_context_counts *nmv_counts) {
   int i;
   PREDICTION_MODE mode = mi->bmi[block].as_mode;
+#if CONFIG_REF_MV
+  const MB_MODE_INFO *mbmi = &mi->mbmi;
+#else
+  nmv_context_counts *counts = nmv_counts;
+#endif
 
   if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
     for (i = 0; i < 1 + has_second_ref(&mi->mbmi); ++i) {
       const MV *ref = &mi->bmi[block].ref_mv[i].as_mv;
       const MV diff = {mvs[i].as_mv.row - ref->row,
                        mvs[i].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
       vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
     }
   } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
     const MV *ref = &mi->bmi[block].ref_mv[1].as_mv;
     const MV diff = {mvs[1].as_mv.row - ref->row,
                      mvs[1].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
     vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
   } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
     const MV *ref = &mi->bmi[block].ref_mv[0].as_mv;
     const MV diff = {mvs[0].as_mv.row - ref->row,
                      mvs[0].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
     vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
   }
 }
 #else
 static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
                     const int_mv mvs[2],
-                    nmv_context_counts *counts) {
+                    nmv_context_counts *nmv_counts) {
   int i;
+#if !CONFIG_REF_MV
+  nmv_context_counts *counts = nmv_counts;
+#endif
 
   for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
     const MV diff = {mvs[i].as_mv.row - ref->row,
                      mvs[i].as_mv.col - ref->col};
@@ -310,10 +400,21 @@
 
 #if CONFIG_EXT_INTER
         if (have_newmv_in_inter_mode(mi->bmi[i].as_mode))
-          inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, &td->counts->mv);
+          inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv,
+#if CONFIG_REF_MV
+                         mbmi_ext,
+                         td->counts->mv);
+#else
+                         &td->counts->mv);
+#endif
 #else
         if (mi->bmi[i].as_mode == NEWMV)
-          inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, &td->counts->mv);
+          inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv,
+#if CONFIG_REF_MV
+                  td->counts->mv);
+#else
+                  &td->counts->mv);
+#endif
 #endif  // CONFIG_EXT_INTER
       }
     }
@@ -323,7 +424,12 @@
 #else
     if (mbmi->mode == NEWMV)
 #endif  // CONFIG_EXT_INTER
-      inc_mvs(mbmi, mbmi_ext, mbmi->mv, &td->counts->mv);
+      inc_mvs(mbmi, mbmi_ext, mbmi->mv,
+#if CONFIG_REF_MV
+              td->counts->mv);
+#else
+              &td->counts->mv);
+#endif
   }
 }
 
diff --git a/vp10/encoder/encodemv.h b/vp10/encoder/encodemv.h
index 006f6d7..c753d34 100644
--- a/vp10/encoder/encodemv.h
+++ b/vp10/encoder/encodemv.h
@@ -21,7 +21,7 @@
 void vp10_entropy_mv_init(void);
 
 void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vpx_writer *w,
-                         nmv_context_counts *const counts);
+                          nmv_context_counts *const counts);
 
 void vp10_encode_mv(VP10_COMP *cpi, vpx_writer* w, const MV* mv, const MV* ref,
                    const nmv_context* mvctx, int usehp);
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 5cc6dca..3f1dcf8 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -222,6 +222,22 @@
 void vp10_set_high_precision_mv(VP10_COMP *cpi, int allow_high_precision_mv) {
   MACROBLOCK *const mb = &cpi->td.mb;
   cpi->common.allow_high_precision_mv = allow_high_precision_mv;
+
+#if CONFIG_REF_MV
+  if (cpi->common.allow_high_precision_mv) {
+    int i;
+    for (i = 0; i < NMV_CONTEXTS; ++i) {
+      mb->mv_cost_stack[i] = mb->nmvcost_hp[i];
+      mb->mvsadcost = mb->nmvsadcost_hp;
+    }
+  } else {
+    int i;
+    for (i = 0; i < NMV_CONTEXTS; ++i) {
+      mb->mv_cost_stack[i] = mb->nmvcost[i];
+      mb->mvsadcost = mb->nmvsadcost;
+    }
+  }
+#else
   if (cpi->common.allow_high_precision_mv) {
     mb->mvcost = mb->nmvcost_hp;
     mb->mvsadcost = mb->nmvsadcost_hp;
@@ -229,6 +245,7 @@
     mb->mvcost = mb->nmvcost;
     mb->mvsadcost = mb->nmvsadcost;
   }
+#endif
 }
 
 static void setup_frame(VP10_COMP *cpi) {
@@ -338,6 +355,9 @@
 
 static void dealloc_compressor_data(VP10_COMP *cpi) {
   VP10_COMMON *const cm = &cpi->common;
+#if CONFIG_REF_MV
+  int i;
+#endif
 
   vpx_free(cpi->mbmi_ext_base);
   cpi->mbmi_ext_base = NULL;
@@ -351,6 +371,19 @@
   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
   cpi->coding_context.last_frame_seg_map_copy = NULL;
 
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    vpx_free(cpi->nmv_costs[i][0]);
+    vpx_free(cpi->nmv_costs[i][1]);
+    vpx_free(cpi->nmv_costs_hp[i][0]);
+    vpx_free(cpi->nmv_costs_hp[i][1]);
+    cpi->nmv_costs[i][0] = NULL;
+    cpi->nmv_costs[i][1] = NULL;
+    cpi->nmv_costs_hp[i][0] = NULL;
+    cpi->nmv_costs_hp[i][1] = NULL;
+  }
+#endif
+
   vpx_free(cpi->nmvcosts[0]);
   vpx_free(cpi->nmvcosts[1]);
   cpi->nmvcosts[0] = NULL;
@@ -412,12 +445,29 @@
 static void save_coding_context(VP10_COMP *cpi) {
   CODING_CONTEXT *const cc = &cpi->coding_context;
   VP10_COMMON *cm = &cpi->common;
+#if CONFIG_REF_MV
+  int i;
+#endif
 
   // Stores a snapshot of key state variables which can subsequently be
   // restored with a call to vp10_restore_coding_context. These functions are
   // intended for use in a re-code loop in vp10_compress_frame where the
   // quantizer value is adjusted between loop iterations.
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    vp10_copy(cc->nmv_vec_cost[i], cpi->td.mb.nmv_vec_cost[i]);
+    memcpy(cc->nmv_costs[i][0], cpi->nmv_costs[i][0],
+           MV_VALS * sizeof(*cpi->nmv_costs[i][0]));
+    memcpy(cc->nmv_costs[i][1], cpi->nmv_costs[i][1],
+           MV_VALS * sizeof(*cpi->nmv_costs[i][1]));
+    memcpy(cc->nmv_costs_hp[i][0], cpi->nmv_costs_hp[i][0],
+           MV_VALS * sizeof(*cpi->nmv_costs_hp[i][0]));
+    memcpy(cc->nmv_costs_hp[i][1], cpi->nmv_costs_hp[i][1],
+           MV_VALS * sizeof(*cpi->nmv_costs_hp[i][1]));
+  }
+#else
   vp10_copy(cc->nmvjointcost,  cpi->td.mb.nmvjointcost);
+#endif
 
   memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
          MV_VALS * sizeof(*cpi->nmvcosts[0]));
@@ -440,10 +490,27 @@
 static void restore_coding_context(VP10_COMP *cpi) {
   CODING_CONTEXT *const cc = &cpi->coding_context;
   VP10_COMMON *cm = &cpi->common;
+#if CONFIG_REF_MV
+  int i;
+#endif
 
   // Restore key state variables to the snapshot state stored in the
   // previous call to vp10_save_coding_context.
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    vp10_copy(cpi->td.mb.nmv_vec_cost[i], cc->nmv_vec_cost[i]);
+    memcpy(cpi->nmv_costs[i][0], cc->nmv_costs[i][0],
+           MV_VALS * sizeof(*cc->nmv_costs[i][0]));
+    memcpy(cpi->nmv_costs[i][1], cc->nmv_costs[i][1],
+           MV_VALS * sizeof(*cc->nmv_costs[i][1]));
+    memcpy(cpi->nmv_costs_hp[i][0], cc->nmv_costs_hp[i][0],
+           MV_VALS * sizeof(*cc->nmv_costs_hp[i][0]));
+    memcpy(cpi->nmv_costs_hp[i][1], cc->nmv_costs_hp[i][1],
+           MV_VALS * sizeof(*cc->nmv_costs_hp[i][1]));
+  }
+#else
   vp10_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
+#endif
 
   memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
   memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
@@ -1560,12 +1627,14 @@
 #endif
 #define log2f(x) (log (x) / (float) M_LOG2_E)
 
+#if !CONFIG_REF_MV
 static void cal_nmvjointsadcost(int *mvjointsadcost) {
   mvjointsadcost[0] = 600;
   mvjointsadcost[1] = 300;
   mvjointsadcost[2] = 300;
   mvjointsadcost[3] = 300;
 }
+#endif
 
 static void cal_nmvsadcosts(int *mvsadcost[2]) {
   int i = 1;
@@ -1640,6 +1709,19 @@
 
   realloc_segmentation_maps(cpi);
 
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs[i][0],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs[i][0])));
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs[i][1],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs[i][1])));
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs_hp[i][0],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs_hp[i][0])));
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs_hp[i][1],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs_hp[i][1])));
+  }
+#endif
+
   CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
   CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
@@ -1714,15 +1796,24 @@
 
   cpi->first_time_stamp_ever = INT64_MAX;
 
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    cpi->td.mb.nmvcost[i][0] = &cpi->nmv_costs[i][0][MV_MAX];
+    cpi->td.mb.nmvcost[i][1] = &cpi->nmv_costs[i][1][MV_MAX];
+    cpi->td.mb.nmvcost_hp[i][0] = &cpi->nmv_costs_hp[i][0][MV_MAX];
+    cpi->td.mb.nmvcost_hp[i][1] = &cpi->nmv_costs_hp[i][1][MV_MAX];
+  }
+#else
   cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
   cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
   cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
+  cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
+  cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
+#endif
   cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
   cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
   cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
 
-  cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
-  cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
   cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
   cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
   cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index 59c7682..292494c 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -55,6 +55,12 @@
   int nmvcosts[2][MV_VALS];
   int nmvcosts_hp[2][MV_VALS];
 
+#if CONFIG_REF_MV
+  int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
+  int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
+  int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
+#endif
+
   unsigned char *last_frame_seg_map_copy;
 
   // 0 = Intra, Last, GF, ARF
@@ -352,6 +358,11 @@
 
   CODING_CONTEXT coding_context;
 
+#if CONFIG_REF_MV
+  int *nmv_costs[NMV_CONTEXTS][2];
+  int *nmv_costs_hp[NMV_CONTEXTS][2];
+#endif
+
   int *nmvcosts[2];
   int *nmvcosts_hp[2];
   int *nmvsadcosts[2];
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index dd19e02..6e3b06a 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -97,12 +97,22 @@
 
 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
                           int sad_per_bit) {
+#if CONFIG_REF_MV
+  const MV diff = { (mv->row - ref->row) << 3,
+                    (mv->col - ref->col) << 3 };
+  return ROUND_POWER_OF_TWO(
+      (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->mvsadcost) *
+          sad_per_bit,
+      VP9_PROB_COST_SHIFT);
+#else
   const MV diff = { mv->row - ref->row,
                     mv->col - ref->col };
+
   return ROUND_POWER_OF_TWO(
       (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) *
           sad_per_bit,
       VP9_PROB_COST_SHIFT);
+#endif
 }
 
 void vp10_init_dsmotion_compensation(search_site_config *cfg, int stride) {
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index bf73064..299b761 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -331,6 +331,18 @@
   }
 }
 
+#if CONFIG_REF_MV
+void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame) {
+  MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
+  int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[ref_frame],
+                             mbmi_ext->ref_mv_stack[ref_frame]);
+  x->mvcost = x->mv_cost_stack[nmv_ctx];
+  x->nmvjointcost = x->nmv_vec_cost[nmv_ctx];
+  x->mvsadcost = x->mvcost;
+  x->nmvjointsadcost = x->nmvjointcost;
+}
+#endif
+
 void vp10_initialize_rd_consts(VP10_COMP *cpi) {
   VP10_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
@@ -361,10 +373,26 @@
   fill_mode_costs(cpi);
 
   if (!frame_is_intra_only(cm)) {
+#if CONFIG_REF_MV
+    int nmv_ctx;
+    for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
+      vp10_build_nmv_cost_table(x->nmv_vec_cost[nmv_ctx],
+                                cm->allow_high_precision_mv ?
+                                  x->nmvcost_hp[nmv_ctx] : x->nmvcost[nmv_ctx],
+                                &cm->fc->nmvc[nmv_ctx],
+                                cm->allow_high_precision_mv);
+    }
+    x->mvcost = x->mv_cost_stack[0];
+    x->nmvjointcost = x->nmv_vec_cost[0];
+    x->mvsadcost = x->mvcost;
+    x->nmvjointsadcost = x->nmvjointcost;
+#else
     vp10_build_nmv_cost_table(x->nmvjointcost,
                              cm->allow_high_precision_mv ? x->nmvcost_hp
                                                          : x->nmvcost,
                              &cm->fc->nmvc, cm->allow_high_precision_mv);
+#endif
+
 #if CONFIG_REF_MV
     for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
       cpi->newmv_mode_cost[i][0] = vp10_cost_bit(cm->fc->newmv_prob[i], 0);
diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h
index 5d6f8e6..fdbe431 100644
--- a/vp10/encoder/rd.h
+++ b/vp10/encoder/rd.h
@@ -292,6 +292,10 @@
 
 void vp10_init_me_luts(void);
 
+#if CONFIG_REF_MV
+void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame);
+#endif
+
 void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
                               const struct macroblockd_plane *pd,
                               ENTROPY_CONTEXT t_above[16],
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index acff554..403dd19 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -3554,6 +3554,20 @@
 
   mic->bmi[i].as_mode = mode;
 
+#if CONFIG_REF_MV
+  if (mode == NEWMV) {
+    mic->bmi[i].pred_mv[0].as_int =
+        mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_int;
+    if (is_compound)
+      mic->bmi[i].pred_mv[1].as_int =
+          mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_int;
+  } else {
+    mic->bmi[i].pred_mv[0].as_int = this_mv[0].as_int;
+    if (is_compound)
+      mic->bmi[i].pred_mv[1].as_int = this_mv[1].as_int;
+  }
+#endif
+
   for (idy = 0; idy < num_4x4_blocks_high; ++idy)
     for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
       memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
@@ -3997,6 +4011,10 @@
     tmp_mv.col >>= 3;
     tmp_mv.row >>= 3;
 
+#if CONFIG_REF_MV
+    vp10_set_mvcost(x, refs[id]);
+#endif
+
     // Small-range full-pixel motion search.
     bestsme = vp10_refining_search_8p_c(x, &tmp_mv, sadpb,
                                        search_range,
@@ -4294,6 +4312,9 @@
 
           vp10_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
 
+#if CONFIG_REF_MV
+          vp10_set_mvcost(x, mbmi->ref_frame[0]);
+#endif
           bestsme = vp10_full_pixel_search(
               cpi, x, bsize, &mvp_full, step_param, sadpb,
               cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
@@ -4830,6 +4851,10 @@
   pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
   pred_mv[2] = x->pred_mv[ref];
 
+#if CONFIG_REF_MV
+  vp10_set_mvcost(x, ref);
+#endif
+
   if (scaled_ref_frame) {
     int i;
     // Swap out the reference frame for a version that's been scaled to
@@ -7203,6 +7228,15 @@
   *mbmi = best_mbmode;
   x->skip |= best_skip2;
 
+#if CONFIG_REF_MV
+  for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+    if (mbmi->mode != NEWMV)
+      mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
+    else
+      mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+  }
+#endif
+
   for (i = 0; i < REFERENCE_MODES; ++i) {
     if (best_pred_rd[i] == INT64_MAX)
       best_pred_diff[i] = INT_MIN;
@@ -8129,6 +8163,10 @@
 
     mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
     mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
+#if CONFIG_REF_MV
+    mbmi->pred_mv[0].as_int = xd->mi[0]->bmi[3].pred_mv[0].as_int;
+    mbmi->pred_mv[1].as_int = xd->mi[0]->bmi[3].pred_mv[1].as_int;
+#endif
   }
 
   for (i = 0; i < REFERENCE_MODES; ++i) {
diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c
index 035b66a..afe555d 100644
--- a/vp10/encoder/temporal_filter.c
+++ b/vp10/encoder/temporal_filter.c
@@ -293,6 +293,13 @@
   step_param = mv_sf->reduce_first_step_size;
   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
 
+#if CONFIG_REF_MV
+  x->mvcost = x->mv_cost_stack[0];
+  x->nmvjointcost = x->nmv_vec_cost[0];
+  x->mvsadcost = x->mvcost;
+  x->nmvjointsadcost = x->nmvjointcost;
+#endif
+
   // Ignore mv costing by sending NULL pointer instead of cost arrays
   vp10_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
                  cond_cost_list(cpi, cost_list),