Merge "Extend HBDMetricTest" into nextgenv2
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index cf78cbb..4d7f921 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -79,6 +79,9 @@
 typedef struct {
   PREDICTION_MODE as_mode;
   int_mv as_mv[2];  // first, second inter predictor motion vectors
+#if CONFIG_REF_MV
+  int_mv pred_mv[2];
+#endif
 #if CONFIG_EXT_INTER
   int_mv ref_mv[2];
 #endif  // CONFIG_EXT_INTER
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index d581a08..05918ee 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -93,7 +93,11 @@
   vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
 #endif
   vpx_prob skip_probs[SKIP_CONTEXTS];
+#if CONFIG_REF_MV
+  nmv_context nmvc[NMV_CONTEXTS];
+#else
   nmv_context nmvc;
+#endif
   int initialized;
 #if CONFIG_EXT_TX
   vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1];
@@ -150,7 +154,11 @@
   unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
 #endif
   unsigned int skip[SKIP_CONTEXTS][2];
+#if CONFIG_REF_MV
+  nmv_context_counts mv[NMV_CONTEXTS];
+#else
   nmv_context_counts mv;
+#endif
 #if CONFIG_EXT_TX
   unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
   unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
diff --git a/vp10/common/entropymv.c b/vp10/common/entropymv.c
index a9946ee..5be9797 100644
--- a/vp10/common/entropymv.c
+++ b/vp10/common/entropymv.c
@@ -185,7 +185,45 @@
 
 void vp10_adapt_mv_probs(VP10_COMMON *cm, int allow_hp) {
   int i, j;
+#if CONFIG_REF_MV
+  int idx;
+  for (idx = 0; idx < NMV_CONTEXTS; ++idx) {
+    nmv_context *fc = &cm->fc->nmvc[idx];
+    const nmv_context *pre_fc =
+        &cm->frame_contexts[cm->frame_context_idx].nmvc[idx];
+    const nmv_context_counts *counts = &cm->counts.mv[idx];
 
+    vpx_tree_merge_probs(vp10_mv_joint_tree, pre_fc->joints, counts->joints,
+                         fc->joints);
+
+    for (i = 0; i < 2; ++i) {
+      nmv_component *comp = &fc->comps[i];
+      const nmv_component *pre_comp = &pre_fc->comps[i];
+      const nmv_component_counts *c = &counts->comps[i];
+
+      comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign);
+      vpx_tree_merge_probs(vp10_mv_class_tree, pre_comp->classes, c->classes,
+                           comp->classes);
+      vpx_tree_merge_probs(vp10_mv_class0_tree, pre_comp->class0, c->class0,
+                           comp->class0);
+
+      for (j = 0; j < MV_OFFSET_BITS; ++j)
+        comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
+
+      for (j = 0; j < CLASS0_SIZE; ++j)
+        vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->class0_fp[j],
+                             c->class0_fp[j], comp->class0_fp[j]);
+
+      vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
+
+      if (allow_hp) {
+        comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp,
+                                              c->class0_hp);
+        comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp);
+      }
+    }
+  }
+#else
   nmv_context *fc = &cm->fc->nmvc;
   const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc;
   const nmv_context_counts *counts = &cm->counts.mv;
@@ -218,8 +256,15 @@
       comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp);
     }
   }
+#endif
 }
 
 void vp10_init_mv_probs(VP10_COMMON *cm) {
+#if CONFIG_REF_MV
+  int i;
+  for (i = 0; i < NMV_CONTEXTS; ++i)
+    cm->fc->nmvc[i] = default_nmv_context;
+#else
   cm->fc->nmvc = default_nmv_context;
+#endif
 }
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index af6ef36..e1f3168 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -227,6 +227,8 @@
 #define SKIP_CONTEXTS 3
 
 #if CONFIG_REF_MV
+#define NMV_CONTEXTS 2
+
 #define NEWMV_MODE_CONTEXTS  7
 #define ZEROMV_MODE_CONTEXTS 2
 #define REFMV_MODE_CONTEXTS  9
diff --git a/vp10/common/mv.h b/vp10/common/mv.h
index 904d372..4523705 100644
--- a/vp10/common/mv.h
+++ b/vp10/common/mv.h
@@ -38,6 +38,7 @@
 typedef struct candidate_mv {
   int_mv this_mv;
   int_mv comp_mv;
+  int_mv pred_mv;
   int weight;
 } CANDIDATE_MV;
 #endif
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c
index 1b7fb7d..5a2def0 100644
--- a/vp10/common/mvref_common.c
+++ b/vp10/common/mvref_common.c
@@ -38,6 +38,8 @@
         // Add a new item to the list.
         if (index == *refmv_count) {
           ref_mv_stack[index].this_mv = this_refmv;
+          ref_mv_stack[index].pred_mv =
+              get_sub_block_pred_mv(candidate_mi, ref, col, block);
           ref_mv_stack[index].weight = 2 * weight;
           ++(*refmv_count);
 
@@ -63,6 +65,8 @@
           // Add a new item to the list.
           if (index == *refmv_count) {
             ref_mv_stack[index].this_mv = this_refmv;
+            ref_mv_stack[index].pred_mv =
+                get_sub_block_pred_mv(candidate_mi, ref, col, alt_block);
             ref_mv_stack[index].weight = weight;
             ++(*refmv_count);
 
diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h
index b02c0dd..b3a8beb 100644
--- a/vp10/common/mvref_common.h
+++ b/vp10/common/mvref_common.h
@@ -150,6 +150,16 @@
           : candidate->mbmi.mv[which_mv];
 }
 
+#if CONFIG_REF_MV
+static INLINE int_mv get_sub_block_pred_mv(const MODE_INFO *candidate,
+                                           int which_mv,
+                                           int search_col, int block_idx) {
+  return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+          ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+              .pred_mv[which_mv]
+          : candidate->mbmi.pred_mv[which_mv];
+}
+#endif
 
 // Performs mv sign inversion if indicated by the reference frame combination.
 static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
@@ -218,6 +228,22 @@
 }
 
 #if CONFIG_REF_MV
+static INLINE int vp10_nmv_ctx(const uint8_t ref_mv_count,
+                               const CANDIDATE_MV *ref_mv_stack) {
+#if CONFIG_EXT_INTER
+  return 0;
+#endif
+  if (ref_mv_stack[0].weight > REF_CAT_LEVEL &&
+      ref_mv_count > 0) {
+    if (abs(ref_mv_stack[0].this_mv.as_mv.row -
+            ref_mv_stack[0].pred_mv.as_mv.row) < 8 &&
+        abs(ref_mv_stack[0].this_mv.as_mv.col -
+            ref_mv_stack[0].pred_mv.as_mv.col) < 8)
+      return 1;
+  }
+  return 0;
+}
+
 static INLINE int8_t vp10_ref_frame_type(const MV_REFERENCE_FRAME *const rf) {
   if (rf[1] > INTRA_FRAME)
     return rf[0] + ALTREF_FRAME;
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index 6e959ed..7f04a09 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -447,6 +447,39 @@
     for (j = 0; j < 2; j++)
       cm->counts.skip[i][j] += counts->skip[i][j];
 
+#if CONFIG_REF_MV
+  for (m = 0; m < NMV_CONTEXTS; ++m) {
+    for (i = 0; i < MV_JOINTS; i++)
+      cm->counts.mv[m].joints[i] += counts->mv[m].joints[i];
+
+    for (k = 0; k < 2; k++) {
+      nmv_component_counts *comps = &cm->counts.mv[m].comps[k];
+      nmv_component_counts *comps_t = &counts->mv[m].comps[k];
+
+      for (i = 0; i < 2; i++) {
+        comps->sign[i] += comps_t->sign[i];
+        comps->class0_hp[i] += comps_t->class0_hp[i];
+        comps->hp[i] += comps_t->hp[i];
+      }
+
+      for (i = 0; i < MV_CLASSES; i++)
+        comps->classes[i] += comps_t->classes[i];
+
+      for (i = 0; i < CLASS0_SIZE; i++) {
+        comps->class0[i] += comps_t->class0[i];
+        for (j = 0; j < MV_FP_SIZE; j++)
+          comps->class0_fp[i][j] += comps_t->class0_fp[i][j];
+      }
+
+      for (i = 0; i < MV_OFFSET_BITS; i++)
+        for (j = 0; j < 2; j++)
+          comps->bits[i][j] += comps_t->bits[i][j];
+
+      for (i = 0; i < MV_FP_SIZE; i++)
+        comps->fp[i] += comps_t->fp[i];
+    }
+  }
+#else
   for (i = 0; i < MV_JOINTS; i++)
     cm->counts.mv.joints[i] += counts->mv.joints[i];
 
@@ -476,6 +509,7 @@
     for (i = 0; i < MV_FP_SIZE; i++)
       comps->fp[i] += comps_t->fp[i];
   }
+#endif
 
 #if CONFIG_EXT_TX
   for (i = 0; i < EXT_TX_SIZES; i++) {
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index a003d7a..a976356 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -3566,7 +3566,9 @@
         for (i = 0; i < INTRA_MODES - 1; ++i)
           vp10_diff_update_prob(&r, &cm->kf_y_prob[k][j][i]);
   } else {
+#if !CONFIG_REF_MV
     nmv_context *const nmvc = &fc->nmvc;
+#endif
 
     read_inter_mode_probs(fc, &r);
 
@@ -3593,7 +3595,12 @@
       for (i = 0; i < INTRA_MODES - 1; ++i)
         vp10_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
 
+#if CONFIG_REF_MV
+    for (i = 0; i < NMV_CONTEXTS; ++i)
+      read_mv_probs(&fc->nmvc[i], cm->allow_high_precision_mv, &r);
+#else
     read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
+#endif
     read_ext_tx_probs(fc, &r);
 #if CONFIG_SUPERTX
     if (!xd->lossless[0])
@@ -3647,7 +3654,14 @@
                  sizeof(cm->counts.comp_ref)));
   assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
   assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
+#if CONFIG_REF_MV
+  assert(!memcmp(&cm->counts.mv[0], &zero_counts.mv[0],
+                 sizeof(cm->counts.mv[0])));
+  assert(!memcmp(&cm->counts.mv[1], &zero_counts.mv[1],
+                 sizeof(cm->counts.mv[0])));
+#else
   assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
+#endif
   assert(!memcmp(cm->counts.inter_ext_tx, zero_counts.inter_ext_tx,
                  sizeof(cm->counts.inter_ext_tx)));
   assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx,
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 401298f..7a8b47f 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -891,11 +891,20 @@
 
 static INLINE int assign_mv(VP10_COMMON *cm, MACROBLOCKD *xd,
                             PREDICTION_MODE mode,
+#if CONFIG_REF_MV
+                            int block,
+#endif
                             int_mv mv[2], int_mv ref_mv[2],
                             int_mv nearest_mv[2], int_mv near_mv[2],
                             int is_compound, int allow_hp, vpx_reader *r) {
   int i;
   int ret = 1;
+#if CONFIG_REF_MV
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  BLOCK_SIZE bsize = mbmi->sb_type;
+  int_mv *pred_mv = (bsize >= BLOCK_8X8) ?
+      mbmi->pred_mv : xd->mi[0]->bmi[block].pred_mv;
+#endif
 
   switch (mode) {
 #if CONFIG_EXT_INTER
@@ -903,11 +912,26 @@
 #endif  // CONFIG_EXT_INTER
     case NEWMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if !CONFIG_REF_MV
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+#endif
       for (i = 0; i < 1 + is_compound; ++i) {
+#if CONFIG_REF_MV
+        int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[i]],
+                                   xd->ref_mv_stack[mbmi->ref_frame[i]]);
+        nmv_context_counts *const mv_counts =
+            counts ? &counts->mv[nmv_ctx] : NULL;
+        read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc[nmv_ctx],
+                mv_counts, allow_hp);
+#else
         read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts,
                 allow_hp);
+#endif
         ret = ret && is_mv_valid(&mv[i].as_mv);
+
+#if CONFIG_REF_MV
+        pred_mv[i].as_int = ref_mv[i].as_int;
+#endif
       }
       break;
     }
@@ -915,28 +939,58 @@
       mv[0].as_int = nearest_mv[0].as_int;
       if (is_compound)
         mv[1].as_int = nearest_mv[1].as_int;
+
+#if CONFIG_REF_MV
+      pred_mv[0].as_int = nearest_mv[0].as_int;
+      if (is_compound)
+        pred_mv[1].as_int = nearest_mv[1].as_int;
+#endif
       break;
     }
     case NEARMV: {
       mv[0].as_int = near_mv[0].as_int;
       if (is_compound)
         mv[1].as_int = near_mv[1].as_int;
+
+#if CONFIG_REF_MV
+      pred_mv[0].as_int = near_mv[0].as_int;
+      if (is_compound)
+        pred_mv[1].as_int = near_mv[1].as_int;
+#endif
       break;
     }
     case ZEROMV: {
       mv[0].as_int = 0;
       if (is_compound)
         mv[1].as_int = 0;
+
+#if CONFIG_REF_MV
+      pred_mv[0].as_int = 0;
+      if (is_compound)
+        pred_mv[1].as_int = 0;
+#endif
       break;
     }
 #if CONFIG_EXT_INTER
     case NEW_NEWMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if !CONFIG_REF_MV
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+#endif
       assert(is_compound);
       for (i = 0; i < 2; ++i) {
+#if CONFIG_REF_MV
+        int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[i]],
+                                   xd->ref_mv_stack[mbmi->ref_frame[i]]);
+        nmv_context_counts *const mv_counts =
+            counts ? &counts->mv[nmv_ctx] : NULL;
+        read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv,
+                &cm->fc->nmvc[nmv_ctx], mv_counts,
+                allow_hp);
+#else
         read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts,
                 allow_hp);
+#endif
         ret = ret && is_mv_valid(&mv[i].as_mv);
       }
       break;
@@ -961,40 +1015,83 @@
     }
     case NEW_NEARESTMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+      int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[0]],
+                                 xd->ref_mv_stack[mbmi->ref_frame[0]]);
+      nmv_context_counts *const mv_counts =
+          counts ? &counts->mv[nmv_ctx] : NULL;
+      read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv,
+              &cm->fc->nmvc[nmv_ctx], mv_counts,
+              allow_hp);
+#else
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
-      assert(is_compound);
       read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, &cm->fc->nmvc, mv_counts,
               allow_hp);
+#endif
+      assert(is_compound);
       ret = ret && is_mv_valid(&mv[0].as_mv);
       mv[1].as_int = nearest_mv[1].as_int;
       break;
     }
     case NEAREST_NEWMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+      int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[1]],
+                                 xd->ref_mv_stack[mbmi->ref_frame[1]]);
+      nmv_context_counts *const mv_counts =
+          counts ? &counts->mv[nmv_ctx] : NULL;
+      mv[0].as_int = nearest_mv[0].as_int;
+      read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv,
+              &cm->fc->nmvc[nmv_ctx], mv_counts,
+              allow_hp);
+#else
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
-      assert(is_compound);
       mv[0].as_int = nearest_mv[0].as_int;
       read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, &cm->fc->nmvc, mv_counts,
               allow_hp);
+#endif
+      assert(is_compound);
       ret = ret && is_mv_valid(&mv[1].as_mv);
       break;
     }
     case NEAR_NEWMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+      int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[1]],
+                                 xd->ref_mv_stack[mbmi->ref_frame[1]]);
+      nmv_context_counts *const mv_counts =
+          counts ? &counts->mv[nmv_ctx] : NULL;
+      mv[0].as_int = near_mv[0].as_int;
+      read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv,
+              &cm->fc->nmvc[nmv_ctx], mv_counts,
+              allow_hp);
+#else
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
-      assert(is_compound);
       mv[0].as_int = near_mv[0].as_int;
       read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, &cm->fc->nmvc, mv_counts,
               allow_hp);
+#endif
+      assert(is_compound);
+
       ret = ret && is_mv_valid(&mv[1].as_mv);
       break;
     }
     case NEW_NEARMV: {
       FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+      int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[0]],
+                                 xd->ref_mv_stack[mbmi->ref_frame[0]]);
+      nmv_context_counts *const mv_counts =
+          counts ? &counts->mv[nmv_ctx] : NULL;
+      read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv,
+              &cm->fc->nmvc[nmv_ctx], mv_counts,
+              allow_hp);
+#else
       nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
-      assert(is_compound);
       read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, &cm->fc->nmvc, mv_counts,
               allow_hp);
+#endif
+      assert(is_compound);
       ret = ret && is_mv_valid(&mv[0].as_mv);
       mv[1].as_int = near_mv[1].as_int;
       break;
@@ -1284,7 +1381,11 @@
 #endif  // CONFIG_EXT_INTER
         }
 
-        if (!assign_mv(cm, xd, b_mode, block,
+        if (!assign_mv(cm, xd, b_mode,
+#if CONFIG_REF_MV
+                       j,
+#endif
+                       block,
 #if CONFIG_EXT_INTER
                        ref_mv[mv_idx],
 #else
@@ -1307,12 +1408,20 @@
       }
     }
 
+#if CONFIG_REF_MV
+    mbmi->pred_mv[0].as_int = mi->bmi[3].pred_mv[0].as_int;
+    mbmi->pred_mv[1].as_int = mi->bmi[3].pred_mv[1].as_int;
+#endif
     mi->mbmi.mode = b_mode;
 
     mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
     mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
   } else {
-    xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv,
+    xd->corrupted |= !assign_mv(cm, xd, mbmi->mode,
+#if CONFIG_REF_MV
+                                0,
+#endif
+                                mbmi->mv,
 #if CONFIG_EXT_INTER
                                 mbmi->mode == NEWFROMNEARMV ?
                                               nearmv : nearestmv,
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 930f73f..f06b960 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -882,7 +882,9 @@
 #endif
                                 vpx_writer *w) {
   VP10_COMMON *const cm = &cpi->common;
+#if !CONFIG_REF_MV
   const nmv_context *nmvc = &cm->fc->nmvc;
+#endif
   const MACROBLOCK *x = &cpi->td.mb;
   const MACROBLOCKD *xd = &x->e_mbd;
   const struct segmentation *const seg = &cm->seg;
@@ -1070,20 +1072,39 @@
 #else
           if (b_mode == NEWMV) {
 #endif  // CONFIG_EXT_INTER
-            for (ref = 0; ref < 1 + is_compound; ++ref)
+            for (ref = 0; ref < 1 + is_compound; ++ref) {
+#if CONFIG_REF_MV
+              int nmv_ctx =
+                  vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[ref]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[ref]]);
+              const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
               vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
 #if CONFIG_EXT_INTER
                              &mi->bmi[j].ref_mv[ref].as_mv,
 #else
                              &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
 #endif  // CONFIG_EXT_INTER
-                            nmvc, allow_hp);
+                             nmvc, allow_hp);
+            }
           }
 #if CONFIG_EXT_INTER
           else if (b_mode == NEAREST_NEWMV || b_mode == NEAR_NEWMV) {
+#if CONFIG_REF_MV
+            int nmv_ctx =
+                vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+                             mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+            const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
             vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[1].as_mv,
                            &mi->bmi[j].ref_mv[1].as_mv, nmvc, allow_hp);
           } else if (b_mode == NEW_NEARESTMV || b_mode == NEW_NEARMV) {
+#if CONFIG_REF_MV
+            int nmv_ctx =
+                vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+                             mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+            const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
             vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[0].as_mv,
                            &mi->bmi[j].ref_mv[0].as_mv, nmvc, allow_hp);
           }
@@ -1096,9 +1117,14 @@
 #else
       if (mode == NEWMV) {
 #endif  // CONFIG_EXT_INTER
-        for (ref = 0; ref < 1 + is_compound; ++ref)
+        for (ref = 0; ref < 1 + is_compound; ++ref) {
+#if CONFIG_REF_MV
+              int nmv_ctx =
+                  vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[ref]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[ref]]);
+              const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
 #if CONFIG_EXT_INTER
-        {
           if (mode == NEWFROMNEARMV)
             vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
                            &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][1].as_mv,
@@ -1108,13 +1134,25 @@
           vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
                         &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc,
                         allow_hp);
-#if CONFIG_EXT_INTER
         }
+#if CONFIG_EXT_INTER
       } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
+#if CONFIG_REF_MV
+            int nmv_ctx =
+                vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+                             mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+            const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
         vp10_encode_mv(cpi, w, &mbmi->mv[1].as_mv,
                        &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv, nmvc,
                        allow_hp);
       } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
+#if CONFIG_REF_MV
+            int nmv_ctx =
+                vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+                             mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+            const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
         vp10_encode_mv(cpi, w, &mbmi->mv[0].as_mv,
                        &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv, nmvc,
                        allow_hp);
@@ -2449,7 +2487,11 @@
                        counts->y_mode[i], INTRA_MODES, &header_bc);
 
     vp10_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc,
-                        &counts->mv);
+#if CONFIG_REF_MV
+                         counts->mv);
+#else
+                         &counts->mv);
+#endif
     update_ext_tx_probs(cm, &header_bc);
 #if CONFIG_SUPERTX
     if (!xd->lossless[0])
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index 3c49d14..0c3e48f 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -107,12 +107,21 @@
   unsigned int pred_sse[MAX_REF_FRAMES];
   int pred_mv_sad[MAX_REF_FRAMES];
 
+#if CONFIG_REF_MV
+  int *nmvjointcost;
+  int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
+  int *nmvcost[NMV_CONTEXTS][2];
+  int *nmvcost_hp[NMV_CONTEXTS][2];
+  int **mv_cost_stack[NMV_CONTEXTS];
+  int *nmvjointsadcost;
+#else
   int nmvjointcost[MV_JOINTS];
   int *nmvcost[2];
   int *nmvcost_hp[2];
-  int **mvcost;
-
   int nmvjointsadcost[MV_JOINTS];
+#endif
+
+  int **mvcost;
   int *nmvsadcost[2];
   int *nmvsadcost_hp[2];
   int **mvsadcost;
diff --git a/vp10/encoder/encodemv.c b/vp10/encoder/encodemv.c
index 4124c4a..61429aa 100644
--- a/vp10/encoder/encodemv.c
+++ b/vp10/encoder/encodemv.c
@@ -157,9 +157,49 @@
 }
 
 void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vpx_writer *w,
-                         nmv_context_counts *const counts) {
+                          nmv_context_counts *const nmv_counts) {
   int i, j;
+#if CONFIG_REF_MV
+  int nmv_ctx = 0;
+  for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
+    nmv_context *const mvc = &cm->fc->nmvc[nmv_ctx];
+    nmv_context_counts *const counts = &nmv_counts[nmv_ctx];
+    write_mv_update(vp10_mv_joint_tree, mvc->joints, counts->joints,
+                    MV_JOINTS, w);
+
+    for (i = 0; i < 2; ++i) {
+      nmv_component *comp = &mvc->comps[i];
+      nmv_component_counts *comp_counts = &counts->comps[i];
+
+      update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB);
+      write_mv_update(vp10_mv_class_tree, comp->classes, comp_counts->classes,
+                      MV_CLASSES, w);
+      write_mv_update(vp10_mv_class0_tree, comp->class0, comp_counts->class0,
+                      CLASS0_SIZE, w);
+      for (j = 0; j < MV_OFFSET_BITS; ++j)
+        update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB);
+    }
+
+    for (i = 0; i < 2; ++i) {
+      for (j = 0; j < CLASS0_SIZE; ++j)
+        write_mv_update(vp10_mv_fp_tree, mvc->comps[i].class0_fp[j],
+                        counts->comps[i].class0_fp[j], MV_FP_SIZE, w);
+
+      write_mv_update(vp10_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp,
+                      MV_FP_SIZE, w);
+    }
+
+    if (usehp) {
+      for (i = 0; i < 2; ++i) {
+        update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp,
+                  MV_UPDATE_PROB);
+        update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB);
+      }
+    }
+  }
+#else
   nmv_context *const mvc = &cm->fc->nmvc;
+  nmv_context_counts *const counts = nmv_counts;
 
   write_mv_update(vp10_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, w);
 
@@ -192,6 +232,7 @@
       update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB);
     }
   }
+#endif
 }
 
 void vp10_encode_mv(VP10_COMP* cpi, vpx_writer* w,
@@ -227,27 +268,45 @@
 #if CONFIG_EXT_INTER
 static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
                     const int_mv mvs[2],
-                    nmv_context_counts *counts) {
+                    nmv_context_counts *nmv_counts) {
   int i;
   PREDICTION_MODE mode = mbmi->mode;
   int mv_idx = (mode == NEWFROMNEARMV);
+#if !CONFIG_REF_MV
+  nmv_context_counts *counts = nmv_counts;
+#endif
 
   if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
     for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
       const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][mv_idx].as_mv;
       const MV diff = {mvs[i].as_mv.row - ref->row,
                        mvs[i].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
       vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
     }
   } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv;
     const MV diff = {mvs[1].as_mv.row - ref->row,
                      mvs[1].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
     vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
   } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
     const MV diff = {mvs[0].as_mv.row - ref->row,
                      mvs[0].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
     vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
   }
 }
@@ -255,36 +314,67 @@
 static void inc_mvs_sub8x8(const MODE_INFO *mi,
                            int block,
                            const int_mv mvs[2],
-                           nmv_context_counts *counts) {
+#if CONFIG_REF_MV
+                           const MB_MODE_INFO_EXT *mbmi_ext,
+#endif
+                           nmv_context_counts *nmv_counts) {
   int i;
   PREDICTION_MODE mode = mi->bmi[block].as_mode;
+#if CONFIG_REF_MV
+  const MB_MODE_INFO *mbmi = &mi->mbmi;
+#else
+  nmv_context_counts *counts = nmv_counts;
+#endif
 
   if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
     for (i = 0; i < 1 + has_second_ref(&mi->mbmi); ++i) {
       const MV *ref = &mi->bmi[block].ref_mv[i].as_mv;
       const MV diff = {mvs[i].as_mv.row - ref->row,
                        mvs[i].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
       vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
     }
   } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
     const MV *ref = &mi->bmi[block].ref_mv[1].as_mv;
     const MV diff = {mvs[1].as_mv.row - ref->row,
                      mvs[1].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
     vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
   } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
     const MV *ref = &mi->bmi[block].ref_mv[0].as_mv;
     const MV diff = {mvs[0].as_mv.row - ref->row,
                      mvs[0].as_mv.col - ref->col};
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
     vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
   }
 }
 #else
 static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
                     const int_mv mvs[2],
-                    nmv_context_counts *counts) {
+                    nmv_context_counts *nmv_counts) {
   int i;
+#if !CONFIG_REF_MV
+  nmv_context_counts *counts = nmv_counts;
+#endif
 
   for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+#if CONFIG_REF_MV
+    int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+                               mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
     const MV diff = {mvs[i].as_mv.row - ref->row,
                      mvs[i].as_mv.col - ref->col};
@@ -310,10 +400,21 @@
 
 #if CONFIG_EXT_INTER
         if (have_newmv_in_inter_mode(mi->bmi[i].as_mode))
-          inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, &td->counts->mv);
+          inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv,
+#if CONFIG_REF_MV
+                         mbmi_ext,
+                         td->counts->mv);
+#else
+                         &td->counts->mv);
+#endif
 #else
         if (mi->bmi[i].as_mode == NEWMV)
-          inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, &td->counts->mv);
+          inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv,
+#if CONFIG_REF_MV
+                  td->counts->mv);
+#else
+                  &td->counts->mv);
+#endif
 #endif  // CONFIG_EXT_INTER
       }
     }
@@ -323,7 +424,12 @@
 #else
     if (mbmi->mode == NEWMV)
 #endif  // CONFIG_EXT_INTER
-      inc_mvs(mbmi, mbmi_ext, mbmi->mv, &td->counts->mv);
+      inc_mvs(mbmi, mbmi_ext, mbmi->mv,
+#if CONFIG_REF_MV
+              td->counts->mv);
+#else
+              &td->counts->mv);
+#endif
   }
 }
 
diff --git a/vp10/encoder/encodemv.h b/vp10/encoder/encodemv.h
index 006f6d7..c753d34 100644
--- a/vp10/encoder/encodemv.h
+++ b/vp10/encoder/encodemv.h
@@ -21,7 +21,7 @@
 void vp10_entropy_mv_init(void);
 
 void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vpx_writer *w,
-                         nmv_context_counts *const counts);
+                          nmv_context_counts *const counts);
 
 void vp10_encode_mv(VP10_COMP *cpi, vpx_writer* w, const MV* mv, const MV* ref,
                    const nmv_context* mvctx, int usehp);
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 55ec9c1..f629970 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -222,6 +222,22 @@
 void vp10_set_high_precision_mv(VP10_COMP *cpi, int allow_high_precision_mv) {
   MACROBLOCK *const mb = &cpi->td.mb;
   cpi->common.allow_high_precision_mv = allow_high_precision_mv;
+
+#if CONFIG_REF_MV
+  if (cpi->common.allow_high_precision_mv) {
+    int i;
+    for (i = 0; i < NMV_CONTEXTS; ++i) {
+      mb->mv_cost_stack[i] = mb->nmvcost_hp[i];
+      mb->mvsadcost = mb->nmvsadcost_hp;
+    }
+  } else {
+    int i;
+    for (i = 0; i < NMV_CONTEXTS; ++i) {
+      mb->mv_cost_stack[i] = mb->nmvcost[i];
+      mb->mvsadcost = mb->nmvsadcost;
+    }
+  }
+#else
   if (cpi->common.allow_high_precision_mv) {
     mb->mvcost = mb->nmvcost_hp;
     mb->mvsadcost = mb->nmvsadcost_hp;
@@ -229,6 +245,7 @@
     mb->mvcost = mb->nmvcost;
     mb->mvsadcost = mb->nmvsadcost;
   }
+#endif
 }
 
 static void setup_frame(VP10_COMP *cpi) {
@@ -338,6 +355,9 @@
 
 static void dealloc_compressor_data(VP10_COMP *cpi) {
   VP10_COMMON *const cm = &cpi->common;
+#if CONFIG_REF_MV
+  int i;
+#endif
 
   vpx_free(cpi->mbmi_ext_base);
   cpi->mbmi_ext_base = NULL;
@@ -351,6 +371,19 @@
   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
   cpi->coding_context.last_frame_seg_map_copy = NULL;
 
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    vpx_free(cpi->nmv_costs[i][0]);
+    vpx_free(cpi->nmv_costs[i][1]);
+    vpx_free(cpi->nmv_costs_hp[i][0]);
+    vpx_free(cpi->nmv_costs_hp[i][1]);
+    cpi->nmv_costs[i][0] = NULL;
+    cpi->nmv_costs[i][1] = NULL;
+    cpi->nmv_costs_hp[i][0] = NULL;
+    cpi->nmv_costs_hp[i][1] = NULL;
+  }
+#endif
+
   vpx_free(cpi->nmvcosts[0]);
   vpx_free(cpi->nmvcosts[1]);
   cpi->nmvcosts[0] = NULL;
@@ -412,12 +445,29 @@
 static void save_coding_context(VP10_COMP *cpi) {
   CODING_CONTEXT *const cc = &cpi->coding_context;
   VP10_COMMON *cm = &cpi->common;
+#if CONFIG_REF_MV
+  int i;
+#endif
 
   // Stores a snapshot of key state variables which can subsequently be
   // restored with a call to vp10_restore_coding_context. These functions are
   // intended for use in a re-code loop in vp10_compress_frame where the
   // quantizer value is adjusted between loop iterations.
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    vp10_copy(cc->nmv_vec_cost[i], cpi->td.mb.nmv_vec_cost[i]);
+    memcpy(cc->nmv_costs[i][0], cpi->nmv_costs[i][0],
+           MV_VALS * sizeof(*cpi->nmv_costs[i][0]));
+    memcpy(cc->nmv_costs[i][1], cpi->nmv_costs[i][1],
+           MV_VALS * sizeof(*cpi->nmv_costs[i][1]));
+    memcpy(cc->nmv_costs_hp[i][0], cpi->nmv_costs_hp[i][0],
+           MV_VALS * sizeof(*cpi->nmv_costs_hp[i][0]));
+    memcpy(cc->nmv_costs_hp[i][1], cpi->nmv_costs_hp[i][1],
+           MV_VALS * sizeof(*cpi->nmv_costs_hp[i][1]));
+  }
+#else
   vp10_copy(cc->nmvjointcost,  cpi->td.mb.nmvjointcost);
+#endif
 
   memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
          MV_VALS * sizeof(*cpi->nmvcosts[0]));
@@ -440,10 +490,27 @@
 static void restore_coding_context(VP10_COMP *cpi) {
   CODING_CONTEXT *const cc = &cpi->coding_context;
   VP10_COMMON *cm = &cpi->common;
+#if CONFIG_REF_MV
+  int i;
+#endif
 
   // Restore key state variables to the snapshot state stored in the
   // previous call to vp10_save_coding_context.
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    vp10_copy(cpi->td.mb.nmv_vec_cost[i], cc->nmv_vec_cost[i]);
+    memcpy(cpi->nmv_costs[i][0], cc->nmv_costs[i][0],
+           MV_VALS * sizeof(*cc->nmv_costs[i][0]));
+    memcpy(cpi->nmv_costs[i][1], cc->nmv_costs[i][1],
+           MV_VALS * sizeof(*cc->nmv_costs[i][1]));
+    memcpy(cpi->nmv_costs_hp[i][0], cc->nmv_costs_hp[i][0],
+           MV_VALS * sizeof(*cc->nmv_costs_hp[i][0]));
+    memcpy(cpi->nmv_costs_hp[i][1], cc->nmv_costs_hp[i][1],
+           MV_VALS * sizeof(*cc->nmv_costs_hp[i][1]));
+  }
+#else
   vp10_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
+#endif
 
   memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
   memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
@@ -1560,12 +1627,14 @@
 #endif
 #define log2f(x) (log (x) / (float) M_LOG2_E)
 
+#if !CONFIG_REF_MV
 static void cal_nmvjointsadcost(int *mvjointsadcost) {
   mvjointsadcost[0] = 600;
   mvjointsadcost[1] = 300;
   mvjointsadcost[2] = 300;
   mvjointsadcost[3] = 300;
 }
+#endif
 
 static void cal_nmvsadcosts(int *mvsadcost[2]) {
   int i = 1;
@@ -1640,6 +1709,19 @@
 
   realloc_segmentation_maps(cpi);
 
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs[i][0],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs[i][0])));
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs[i][1],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs[i][1])));
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs_hp[i][0],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs_hp[i][0])));
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs_hp[i][1],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs_hp[i][1])));
+  }
+#endif
+
   CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
   CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
@@ -1714,15 +1796,24 @@
 
   cpi->first_time_stamp_ever = INT64_MAX;
 
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    cpi->td.mb.nmvcost[i][0] = &cpi->nmv_costs[i][0][MV_MAX];
+    cpi->td.mb.nmvcost[i][1] = &cpi->nmv_costs[i][1][MV_MAX];
+    cpi->td.mb.nmvcost_hp[i][0] = &cpi->nmv_costs_hp[i][0][MV_MAX];
+    cpi->td.mb.nmvcost_hp[i][1] = &cpi->nmv_costs_hp[i][1][MV_MAX];
+  }
+#else
   cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
   cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
   cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
+  cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
+  cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
+#endif
   cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
   cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
   cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
 
-  cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
-  cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
   cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
   cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
   cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index 59c7682..292494c 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -55,6 +55,12 @@
   int nmvcosts[2][MV_VALS];
   int nmvcosts_hp[2][MV_VALS];
 
+#if CONFIG_REF_MV
+  int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
+  int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
+  int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
+#endif
+
   unsigned char *last_frame_seg_map_copy;
 
   // 0 = Intra, Last, GF, ARF
@@ -352,6 +358,11 @@
 
   CODING_CONTEXT coding_context;
 
+#if CONFIG_REF_MV
+  int *nmv_costs[NMV_CONTEXTS][2];
+  int *nmv_costs_hp[NMV_CONTEXTS][2];
+#endif
+
   int *nmvcosts[2];
   int *nmvcosts_hp[2];
   int *nmvsadcosts[2];
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index dd19e02..6e3b06a 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -97,12 +97,22 @@
 
 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
                           int sad_per_bit) {
+#if CONFIG_REF_MV
+  const MV diff = { (mv->row - ref->row) << 3,
+                    (mv->col - ref->col) << 3 };
+  return ROUND_POWER_OF_TWO(
+      (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->mvsadcost) *
+          sad_per_bit,
+      VP9_PROB_COST_SHIFT);
+#else
   const MV diff = { mv->row - ref->row,
                     mv->col - ref->col };
+
   return ROUND_POWER_OF_TWO(
       (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) *
           sad_per_bit,
       VP9_PROB_COST_SHIFT);
+#endif
 }
 
 void vp10_init_dsmotion_compensation(search_site_config *cfg, int stride) {
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index bf73064..299b761 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -331,6 +331,18 @@
   }
 }
 
+#if CONFIG_REF_MV
+void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame) {
+  MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
+  int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[ref_frame],
+                             mbmi_ext->ref_mv_stack[ref_frame]);
+  x->mvcost = x->mv_cost_stack[nmv_ctx];
+  x->nmvjointcost = x->nmv_vec_cost[nmv_ctx];
+  x->mvsadcost = x->mvcost;
+  x->nmvjointsadcost = x->nmvjointcost;
+}
+#endif
+
 void vp10_initialize_rd_consts(VP10_COMP *cpi) {
   VP10_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
@@ -361,10 +373,26 @@
   fill_mode_costs(cpi);
 
   if (!frame_is_intra_only(cm)) {
+#if CONFIG_REF_MV
+    int nmv_ctx;
+    for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
+      vp10_build_nmv_cost_table(x->nmv_vec_cost[nmv_ctx],
+                                cm->allow_high_precision_mv ?
+                                  x->nmvcost_hp[nmv_ctx] : x->nmvcost[nmv_ctx],
+                                &cm->fc->nmvc[nmv_ctx],
+                                cm->allow_high_precision_mv);
+    }
+    x->mvcost = x->mv_cost_stack[0];
+    x->nmvjointcost = x->nmv_vec_cost[0];
+    x->mvsadcost = x->mvcost;
+    x->nmvjointsadcost = x->nmvjointcost;
+#else
     vp10_build_nmv_cost_table(x->nmvjointcost,
                              cm->allow_high_precision_mv ? x->nmvcost_hp
                                                          : x->nmvcost,
                              &cm->fc->nmvc, cm->allow_high_precision_mv);
+#endif
+
 #if CONFIG_REF_MV
     for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
       cpi->newmv_mode_cost[i][0] = vp10_cost_bit(cm->fc->newmv_prob[i], 0);
diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h
index 5d6f8e6..fdbe431 100644
--- a/vp10/encoder/rd.h
+++ b/vp10/encoder/rd.h
@@ -292,6 +292,10 @@
 
 void vp10_init_me_luts(void);
 
+#if CONFIG_REF_MV
+void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame);
+#endif
+
 void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
                               const struct macroblockd_plane *pd,
                               ENTROPY_CONTEXT t_above[16],
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index acff554..403dd19 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -3554,6 +3554,20 @@
 
   mic->bmi[i].as_mode = mode;
 
+#if CONFIG_REF_MV
+  if (mode == NEWMV) {
+    mic->bmi[i].pred_mv[0].as_int =
+        mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_int;
+    if (is_compound)
+      mic->bmi[i].pred_mv[1].as_int =
+          mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_int;
+  } else {
+    mic->bmi[i].pred_mv[0].as_int = this_mv[0].as_int;
+    if (is_compound)
+      mic->bmi[i].pred_mv[1].as_int = this_mv[1].as_int;
+  }
+#endif
+
   for (idy = 0; idy < num_4x4_blocks_high; ++idy)
     for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
       memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
@@ -3997,6 +4011,10 @@
     tmp_mv.col >>= 3;
     tmp_mv.row >>= 3;
 
+#if CONFIG_REF_MV
+    vp10_set_mvcost(x, refs[id]);
+#endif
+
     // Small-range full-pixel motion search.
     bestsme = vp10_refining_search_8p_c(x, &tmp_mv, sadpb,
                                        search_range,
@@ -4294,6 +4312,9 @@
 
           vp10_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
 
+#if CONFIG_REF_MV
+          vp10_set_mvcost(x, mbmi->ref_frame[0]);
+#endif
           bestsme = vp10_full_pixel_search(
               cpi, x, bsize, &mvp_full, step_param, sadpb,
               cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
@@ -4830,6 +4851,10 @@
   pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
   pred_mv[2] = x->pred_mv[ref];
 
+#if CONFIG_REF_MV
+  vp10_set_mvcost(x, ref);
+#endif
+
   if (scaled_ref_frame) {
     int i;
     // Swap out the reference frame for a version that's been scaled to
@@ -7203,6 +7228,15 @@
   *mbmi = best_mbmode;
   x->skip |= best_skip2;
 
+#if CONFIG_REF_MV
+  for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+    if (mbmi->mode != NEWMV)
+      mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
+    else
+      mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+  }
+#endif
+
   for (i = 0; i < REFERENCE_MODES; ++i) {
     if (best_pred_rd[i] == INT64_MAX)
       best_pred_diff[i] = INT_MIN;
@@ -8129,6 +8163,10 @@
 
     mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
     mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
+#if CONFIG_REF_MV
+    mbmi->pred_mv[0].as_int = xd->mi[0]->bmi[3].pred_mv[0].as_int;
+    mbmi->pred_mv[1].as_int = xd->mi[0]->bmi[3].pred_mv[1].as_int;
+#endif
   }
 
   for (i = 0; i < REFERENCE_MODES; ++i) {
diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c
index 035b66a..afe555d 100644
--- a/vp10/encoder/temporal_filter.c
+++ b/vp10/encoder/temporal_filter.c
@@ -293,6 +293,13 @@
   step_param = mv_sf->reduce_first_step_size;
   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
 
+#if CONFIG_REF_MV
+  x->mvcost = x->mv_cost_stack[0];
+  x->nmvjointcost = x->nmv_vec_cost[0];
+  x->mvsadcost = x->mvcost;
+  x->nmvjointsadcost = x->nmvjointcost;
+#endif
+
   // Ignore mv costing by sending NULL pointer instead of cost arrays
   vp10_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
                  cond_cost_list(cpi, cost_list),