New motion vector entropy coding Adds a new experiment with redesigned/refactored motion vector entropy coding. The patch also takes a first step towards separating the integer and fractional pel components of a MV. However the fractional pel encoding still depends on the integer pel part and so they are not fully independent. Further experiments are in progress to see how much they can be decoupled without affecting performance. All components including entropy coding/decoding, costing for MV search, forward updates and backward updates to probability tables, have been implemented. Results so far: derf: +0.19% std-hd: +0.28% yt: +0.80% hd: +1.15% Patch: Simplifies the fractional pel models: derf: +0.284% std-hd: +0.289% yt: +0.849% hd: +1.254% Patch: Some changes in the models, rebased. derf: +0.330% std-hd: +0.306% yt: +0.816% hd: +1.225% Change-Id: I646b3c48f3587f4cc909639b78c3798da6402678

commit: 00f9eb659034cb5cef0bf063c1b72c72c6333f36 [log] [tgz]
author: Deb Mukherjee <debargha@google.com> Thu Jul 26 13:42:07 2012 -0700
committer: Deb Mukherjee <debargha@google.com> Thu Sep 06 08:28:21 2012 -0700
tree: 760c659d32debf97ec1c0a6ecb5c9e834be4a915
parent: de6dfa6bb0cd680ca446a15b52b2a026859eb1e6 [diff]
diff --git a/configure b/configure
index 2180ae3..168da07 100755
--- a/configure
+++ b/configure

@@ -228,6 +228,7 @@
     newbestrefmv
     new_mvref
     hybridtransform16x16
+    newmventropy
 "
 CONFIG_LIST="
     external_build

diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index 1664b28..9d7d566 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c

@@ -12,6 +12,443 @@
 #include "onyxc_int.h"
 #include "entropymv.h"
 
+//#define MV_COUNT_TESTING
+
+#if CONFIG_NEWMVENTROPY
+
+#define MV_COUNT_SAT 16
+#define MV_MAX_UPDATE_FACTOR 160
+
+/* Smooth or bias the mv-counts before prob computation */
+/* #define SMOOTH_MV_COUNTS */
+
+const vp8_tree_index vp8_mv_joint_tree[2 * MV_JOINTS - 2] = {
+  -MV_JOINT_ZERO, 2,
+  -MV_JOINT_HNZVZ, 4,
+  -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
+};
+struct vp8_token_struct vp8_mv_joint_encodings[MV_JOINTS];
+
+const vp8_tree_index vp8_mv_class_tree[2 * MV_CLASSES - 2] = {
+  -MV_CLASS_0, 2,
+  -MV_CLASS_1, 4,
+  6, 8,
+  -MV_CLASS_2, -MV_CLASS_3,
+  10, 12,
+  -MV_CLASS_4, -MV_CLASS_5,
+  -MV_CLASS_6, -MV_CLASS_7,
+};
+struct vp8_token_struct vp8_mv_class_encodings[MV_CLASSES];
+
+const vp8_tree_index vp8_mv_class0_tree [2 * CLASS0_SIZE - 2] = {
+  -0, -1,
+};
+struct vp8_token_struct vp8_mv_class0_encodings[CLASS0_SIZE];
+
+const vp8_tree_index vp8_mv_fp_tree [2 * 4 - 2] = {
+  -0, 2,
+  -1, 4,
+  -2, -3
+};
+struct vp8_token_struct vp8_mv_fp_encodings[4];
+
+const nmv_context vp8_default_nmv_context = {
+  {32, 64, 96},
+  {
+    { /* vert component */
+      128,                                             /* sign */
+      {224, 144, 192, 168, 192, 176, 192},             /* class */
+      {216},                                           /* class0 */
+      {136, 140, 148, 160, 176, 192, 224},             /* bits */
+      {{128, 128, 64}, {96, 112, 64}},                 /* class0_fp */
+      {64, 96, 64},                                    /* fp */
+      160,                                             /* class0_hp bit */
+      128,                                             /* hp */
+    },
+    { /* hor component */
+      128,                                             /* sign */
+      {216, 128, 176, 160, 176, 176, 192},             /* class */
+      {208},                                           /* class0 */
+      {136, 140, 148, 160, 176, 192, 224},             /* bits */
+      {{128, 128, 64}, {96, 112, 64}},                 /* class0_fp */
+      {64, 96, 64},                                    /* fp */
+      160,                                             /* class0_hp bit */
+      128,                                             /* hp */
+    }
+  },
+};
+
+MV_JOINT_TYPE vp8_get_mv_joint(MV mv) {
+  if (mv.row == 0 && mv.col == 0) return MV_JOINT_ZERO;
+  else if (mv.row == 0 && mv.col != 0) return MV_JOINT_HNZVZ;
+  else if (mv.row != 0 && mv.col == 0) return MV_JOINT_HZVNZ;
+  else return MV_JOINT_HNZVNZ;
+}
+
+#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
+
+MV_CLASS_TYPE vp8_get_mv_class(int z, int *offset) {
+  MV_CLASS_TYPE c;
+  if      (z < CLASS0_SIZE * 8)    c = MV_CLASS_0;
+  else if (z < CLASS0_SIZE * 16)   c = MV_CLASS_1;
+  else if (z < CLASS0_SIZE * 32)   c = MV_CLASS_2;
+  else if (z < CLASS0_SIZE * 64)   c = MV_CLASS_3;
+  else if (z < CLASS0_SIZE * 128)  c = MV_CLASS_4;
+  else if (z < CLASS0_SIZE * 256)  c = MV_CLASS_5;
+  else if (z < CLASS0_SIZE * 512)  c = MV_CLASS_6;
+  else if (z < CLASS0_SIZE * 1024) c = MV_CLASS_7;
+  else assert(0);
+  if (offset)
+    *offset = z - mv_class_base(c);
+  return c;
+}
+
+int vp8_get_mv_mag(MV_CLASS_TYPE c, int offset) {
+  return mv_class_base(c) + offset;
+}
+
+static void increment_nmv_component_count(int v,
+                                          nmv_component_counts *mvcomp,
+                                          int incr,
+                                          int usehp) {
+  assert (v != 0);            /* should not be zero */
+  mvcomp->mvcount[MV_MAX + v] += incr;
+}
+
+static void increment_nmv_component(int v,
+                                    nmv_component_counts *mvcomp,
+                                    int incr,
+                                    int usehp) {
+  int s, z, c, o, d, e, f;
+  assert (v != 0);            /* should not be zero */
+  s = v < 0;
+  mvcomp->sign[s] += incr;
+  z = (s ? -v : v) - 1;       /* magnitude - 1 */
+
+  c = vp8_get_mv_class(z, &o);
+  mvcomp->classes[c] += incr;
+
+  d = (o >> 3);               /* int mv data */
+  f = (o >> 1) & 3;           /* fractional pel mv data */
+  e = (o & 1);                /* high precision mv data */
+  if (c == MV_CLASS_0) {
+    mvcomp->class0[d] += incr;
+  } else {
+    int i, b;
+    b = c + CLASS0_BITS - 1;  /* number of bits */
+    for (i = 0; i < b; ++i)
+      mvcomp->bits[i][((d >> i) & 1)] += incr;
+  }
+
+  /* Code the fractional pel bits */
+  if (c == MV_CLASS_0) {
+    mvcomp->class0_fp[d][f] += incr;
+  } else {
+    mvcomp->fp[f] += incr;
+  }
+
+  /* Code the high precision bit */
+  if (usehp) {
+    if (c == MV_CLASS_0) {
+      mvcomp->class0_hp[e] += incr;
+    } else {
+      mvcomp->hp[e] += incr;
+    }
+  } else {  /* assume the extra bit is 1 */
+    if (c == MV_CLASS_0) {
+      mvcomp->class0_hp[1] += incr;
+    } else {
+      mvcomp->hp[1] += incr;
+    }
+  }
+}
+
+#ifdef SMOOTH_MV_COUNTS
+static void smooth_counts(nmv_component_counts *mvcomp) {
+  static const int flen = 3;  // (filter_length + 1) / 2
+  static const int fval[] = {8, 3, 1};
+  static const int fvalbits = 4;
+  int i;
+  unsigned int smvcount[MV_VALS];
+  vpx_memcpy(smvcount, mvcomp->mvcount, sizeof(smvcount));
+  smvcount[MV_MAX] = (smvcount[MV_MAX - 1] + smvcount[MV_MAX + 1]) >> 1;
+  for (i = flen - 1; i <= MV_VALS - flen; ++i) {
+    int j, s = smvcount[i] * fval[0];
+    for (j = 1; j < flen; ++j)
+      s += (smvcount[i - j] + smvcount[i + j]) * fval[j];
+    mvcomp->mvcount[i] = (s + (1 << (fvalbits - 1))) >> fvalbits;
+  }
+}
+#endif
+
+static void counts_to_context(nmv_component_counts *mvcomp, int usehp) {
+  int v;
+  vpx_memset(mvcomp->sign, 0, sizeof(nmv_component_counts) - sizeof(mvcomp->mvcount));
+  for (v = 1; v <= MV_MAX; v++) {
+    increment_nmv_component(-v, mvcomp, mvcomp->mvcount[MV_MAX - v], usehp);
+    increment_nmv_component( v, mvcomp, mvcomp->mvcount[MV_MAX + v], usehp);
+  }
+}
+
+void vp8_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
+                       int usehp) {
+  MV_JOINT_TYPE j = vp8_get_mv_joint(*mv);
+  mvctx->joints[j]++;
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    increment_nmv_component_count(mv->row, &mvctx->comps[0], 1, usehp);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    increment_nmv_component_count(mv->col, &mvctx->comps[1], 1, usehp);
+  }
+}
+
+static void adapt_prob(vp8_prob *dest, vp8_prob prep, vp8_prob newp,
+                       unsigned int ct[2]) {
+  int factor;
+  int prob;
+  int count = ct[0] + ct[1];
+  if (count) {
+    count = count > MV_COUNT_SAT ? MV_COUNT_SAT : count;
+    factor = (MV_MAX_UPDATE_FACTOR * count / MV_COUNT_SAT);
+    prob = ((int)prep * (256 - factor) + (int)(newp) * factor + 128) >> 8;
+    prob += !prob;
+    prob = (prob > 255 ? 255 : prob);
+    *dest = prob;
+  }
+}
+
+void vp8_counts_to_nmv_context(
+    nmv_context_counts *NMVcount,
+    nmv_context *prob,
+    int usehp,
+    unsigned int (*branch_ct_joint)[2],
+    unsigned int (*branch_ct_sign)[2],
+    unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2],
+    unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2],
+    unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2],
+    unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2],
+    unsigned int (*branch_ct_fp)[4 - 1][2],
+    unsigned int (*branch_ct_class0_hp)[2],
+    unsigned int (*branch_ct_hp)[2]) {
+  int i, j, k;
+  counts_to_context(&NMVcount->comps[0], usehp);
+  counts_to_context(&NMVcount->comps[1], usehp);
+  vp8_tree_probs_from_distribution(MV_JOINTS,
+                                   vp8_mv_joint_encodings,
+                                   vp8_mv_joint_tree,
+                                   prob->joints,
+                                   branch_ct_joint,
+                                   NMVcount->joints,
+                                   256, 1);
+  for (i = 0; i < 2; ++i) {
+    prob->comps[i].sign =
+        vp8_bin_prob_from_distribution(NMVcount->comps[i].sign);
+    branch_ct_sign[i][0] = NMVcount->comps[i].sign[0];
+    branch_ct_sign[i][1] = NMVcount->comps[i].sign[1];
+    vp8_tree_probs_from_distribution(MV_CLASSES,
+                                     vp8_mv_class_encodings,
+                                     vp8_mv_class_tree,
+                                     prob->comps[i].classes,
+                                     branch_ct_classes[i],
+                                     NMVcount->comps[i].classes,
+                                     256, 1);
+    vp8_tree_probs_from_distribution(CLASS0_SIZE,
+                                     vp8_mv_class0_encodings,
+                                     vp8_mv_class0_tree,
+                                     prob->comps[i].class0,
+                                     branch_ct_class0[i],
+                                     NMVcount->comps[i].class0,
+                                     256, 1);
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      prob->comps[i].bits[j] = vp8_bin_prob_from_distribution(
+          NMVcount->comps[i].bits[j]);
+      branch_ct_bits[i][j][0] = NMVcount->comps[i].bits[j][0];
+      branch_ct_bits[i][j][1] = NMVcount->comps[i].bits[j][1];
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (k = 0; k < CLASS0_SIZE; ++k) {
+      vp8_tree_probs_from_distribution(4,
+                                       vp8_mv_fp_encodings,
+                                       vp8_mv_fp_tree,
+                                       prob->comps[i].class0_fp[k],
+                                       branch_ct_class0_fp[i][k],
+                                       NMVcount->comps[i].class0_fp[k],
+                                       256, 1);
+    }
+    vp8_tree_probs_from_distribution(4,
+                                     vp8_mv_fp_encodings,
+                                     vp8_mv_fp_tree,
+                                     prob->comps[i].fp,
+                                     branch_ct_fp[i],
+                                     NMVcount->comps[i].fp,
+                                     256, 1);
+  }
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      prob->comps[i].class0_hp = vp8_bin_prob_from_distribution(
+          NMVcount->comps[i].class0_hp);
+      branch_ct_class0_hp[i][0] = NMVcount->comps[i].class0_hp[0];
+      branch_ct_class0_hp[i][1] = NMVcount->comps[i].class0_hp[1];
+
+      prob->comps[i].hp =
+          vp8_bin_prob_from_distribution(NMVcount->comps[i].hp);
+      branch_ct_hp[i][0] = NMVcount->comps[i].hp[0];
+      branch_ct_hp[i][1] = NMVcount->comps[i].hp[1];
+    }
+  }
+}
+
+void vp8_adapt_nmv_probs(VP8_COMMON *cm, int usehp) {
+  int i, j, k;
+  nmv_context prob;
+  unsigned int branch_ct_joint[MV_JOINTS - 1][2];
+  unsigned int branch_ct_sign[2][2];
+  unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
+  unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
+  unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
+  unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
+  unsigned int branch_ct_fp[2][4 - 1][2];
+  unsigned int branch_ct_class0_hp[2][2];
+  unsigned int branch_ct_hp[2][2];
+#ifdef MV_COUNT_TESTING
+  printf("joints count: ");
+  for (j = 0; j < MV_JOINTS; ++j) printf("%d ", cm->fc.NMVcount.joints[j]);
+  printf("\n"); fflush(stdout);
+  printf("signs count:\n");
+  for (i = 0; i < 2; ++i)
+    printf("%d/%d ", cm->fc.NMVcount.comps[i].sign[0], cm->fc.NMVcount.comps[i].sign[1]);
+  printf("\n"); fflush(stdout);
+  printf("classes count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < MV_CLASSES; ++j)
+      printf("%d ", cm->fc.NMVcount.comps[i].classes[j]);
+    printf("\n"); fflush(stdout);
+  }
+  printf("class0 count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j)
+      printf("%d ", cm->fc.NMVcount.comps[i].class0[j]);
+    printf("\n"); fflush(stdout);
+  }
+  printf("bits count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < MV_OFFSET_BITS; ++j)
+      printf("%d/%d ", cm->fc.NMVcount.comps[i].bits[j][0],
+                       cm->fc.NMVcount.comps[i].bits[j][1]);
+    printf("\n"); fflush(stdout);
+  }
+  printf("class0_fp count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      printf("{");
+      for (k = 0; k < 4; ++k)
+        printf("%d ", cm->fc.NMVcount.comps[i].class0_fp[j][k]);
+      printf("}, ");
+    }
+    printf("\n"); fflush(stdout);
+  }
+  printf("fp count:\n");
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < 4; ++j)
+      printf("%d ", cm->fc.NMVcount.comps[i].fp[j]);
+    printf("\n"); fflush(stdout);
+  }
+  if (usehp) {
+    printf("class0_hp count:\n");
+    for (i = 0; i < 2; ++i)
+      printf("%d/%d ", cm->fc.NMVcount.comps[i].class0_hp[0],
+                       cm->fc.NMVcount.comps[i].class0_hp[1]);
+    printf("\n"); fflush(stdout);
+    printf("hp count:\n");
+    for (i = 0; i < 2; ++i)
+      printf("%d/%d ", cm->fc.NMVcount.comps[i].hp[0],
+                       cm->fc.NMVcount.comps[i].hp[1]);
+    printf("\n"); fflush(stdout);
+  }
+#endif
+#ifdef SMOOTH_MV_COUNTS
+  smooth_counts(&cm->fc.NMVcount.comps[0]);
+  smooth_counts(&cm->fc.NMVcount.comps[1]);
+#endif
+  vp8_counts_to_nmv_context(&cm->fc.NMVcount,
+                            &prob,
+                            usehp,
+                            branch_ct_joint,
+                            branch_ct_sign,
+                            branch_ct_classes,
+                            branch_ct_class0,
+                            branch_ct_bits,
+                            branch_ct_class0_fp,
+                            branch_ct_fp,
+                            branch_ct_class0_hp,
+                            branch_ct_hp);
+
+  for (j = 0; j < MV_JOINTS - 1; ++j) {
+    adapt_prob(&cm->fc.nmvc.joints[j],
+               cm->fc.pre_nmvc.joints[j],
+               prob.joints[j],
+               branch_ct_joint[j]);
+  }
+  for (i = 0; i < 2; ++i) {
+    adapt_prob(&cm->fc.nmvc.comps[i].sign,
+               cm->fc.pre_nmvc.comps[i].sign,
+               prob.comps[i].sign,
+               branch_ct_sign[i]);
+    for (j = 0; j < MV_CLASSES - 1; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].classes[j],
+                 cm->fc.pre_nmvc.comps[i].classes[j],
+                 prob.comps[i].classes[j],
+                 branch_ct_classes[i][j]);
+    }
+    for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].class0[j],
+                 cm->fc.pre_nmvc.comps[i].class0[j],
+                 prob.comps[i].class0[j],
+                 branch_ct_class0[i][j]);
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].bits[j],
+                 cm->fc.pre_nmvc.comps[i].bits[j],
+                 prob.comps[i].bits[j],
+                 branch_ct_bits[i][j]);
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      for (k = 0; k < 3; ++k) {
+        adapt_prob(&cm->fc.nmvc.comps[i].class0_fp[j][k],
+                   cm->fc.pre_nmvc.comps[i].class0_fp[j][k],
+                   prob.comps[i].class0_fp[j][k],
+                   branch_ct_class0_fp[i][j][k]);
+      }
+    }
+    for (j = 0; j < 3; ++j) {
+      adapt_prob(&cm->fc.nmvc.comps[i].fp[j],
+                 cm->fc.pre_nmvc.comps[i].fp[j],
+                 prob.comps[i].fp[j],
+                 branch_ct_fp[i][j]);
+    }
+  }
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      adapt_prob(&cm->fc.nmvc.comps[i].class0_hp,
+                 cm->fc.pre_nmvc.comps[i].class0_hp,
+                 prob.comps[i].class0_hp,
+                 branch_ct_class0_hp[i]);
+      adapt_prob(&cm->fc.nmvc.comps[i].hp,
+                 cm->fc.pre_nmvc.comps[i].hp,
+                 prob.comps[i].hp,
+                 branch_ct_hp[i]);
+    }
+  }
+}
+
+#else   /* CONFIG_NEWMVENTROPY */
+
+#define MV_COUNT_SAT 16
+#define MV_MAX_UPDATE_FACTOR 128
+
 const MV_CONTEXT_HP vp8_mv_update_probs_hp[2] = {
   {{
       237,
@@ -266,14 +703,6 @@
   }
 }
 
-void vp8_entropy_mv_init() {
-  vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
-  vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp);
-}
-
-// #define MV_COUNT_TESTING
-#define MV_COUNT_SAT 16
-#define MV_MAX_UPDATE_FACTOR 128
 void vp8_adapt_mv_probs(VP8_COMMON *cm) {
   int i, t, count, factor;
 #ifdef MV_COUNT_TESTING
@@ -400,3 +829,28 @@
     }
   }
 }
+
+#endif  /* CONFIG_NEWMVENTROPY */
+
+void vp8_entropy_mv_init() {
+#if CONFIG_NEWMVENTROPY
+  vp8_tokens_from_tree(vp8_mv_joint_encodings, vp8_mv_joint_tree);
+  vp8_tokens_from_tree(vp8_mv_class_encodings, vp8_mv_class_tree);
+  vp8_tokens_from_tree(vp8_mv_class0_encodings, vp8_mv_class0_tree);
+  vp8_tokens_from_tree(vp8_mv_fp_encodings, vp8_mv_fp_tree);
+#else
+  vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
+  vp8_tokens_from_tree(vp8_small_mvencodings_hp, vp8_small_mvtree_hp);
+#endif
+}
+
+void vp8_init_mv_probs(VP8_COMMON *cm) {
+#if CONFIG_NEWMVENTROPY
+  vpx_memcpy(&cm->fc.nmvc, &vp8_default_nmv_context, sizeof(nmv_context));
+#else
+  vpx_memcpy(cm->fc.mvc,
+             vp8_default_mv_context, sizeof(vp8_default_mv_context));
+  vpx_memcpy(cm->fc.mvc_hp,
+             vp8_default_mv_context_hp, sizeof(vp8_default_mv_context_hp));
+#endif
+}

diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h
index 535d9b8..20af7e4 100644
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h

@@ -16,6 +16,121 @@
 #include "vpx_config.h"
 #include "blockd.h"
 
+struct VP8Common;
+
+void vp8_entropy_mv_init();
+void vp8_init_mv_probs(struct VP8Common *cm);
+void vp8_adapt_mv_probs(struct VP8Common *cm);
+#if CONFIG_NEWMVENTROPY
+void vp8_adapt_nmv_probs(struct VP8Common *cm, int usehp);
+#endif
+
+#if CONFIG_NEWMVENTROPY
+
+#define VP8_NMV_UPDATE_PROB  255
+//#define MV_GROUP_UPDATE
+
+#define LOW_PRECISION_MV_UPDATE  /* Use 7 bit forward update */
+
+/* Symbols for coding which components are zero jointly */
+#define MV_JOINTS     4
+typedef enum {
+  MV_JOINT_ZERO = 0,             /* Zero vector */
+  MV_JOINT_HNZVZ = 1,            /* Vert zero, hor nonzero */
+  MV_JOINT_HZVNZ = 2,            /* Hor zero, vert nonzero */
+  MV_JOINT_HNZVNZ = 3,           /* Both components nonzero */
+} MV_JOINT_TYPE;
+
+extern const vp8_tree_index vp8_mv_joint_tree[2 * MV_JOINTS - 2];
+extern struct vp8_token_struct vp8_mv_joint_encodings [MV_JOINTS];
+
+/* Symbols for coding magnitude class of nonzero components */
+#define MV_CLASSES     8
+typedef enum {
+  MV_CLASS_0 = 0,      /* (0, 2]     integer pel */
+  MV_CLASS_1 = 1,      /* (2, 4]     integer pel */
+  MV_CLASS_2 = 2,      /* (4, 8]     integer pel */
+  MV_CLASS_3 = 3,      /* (8, 16]    integer pel */
+  MV_CLASS_4 = 4,      /* (16, 32]   integer pel */
+  MV_CLASS_5 = 5,      /* (32, 64]   integer pel */
+  MV_CLASS_6 = 6,      /* (64, 128]  integer pel */
+  MV_CLASS_7 = 7,      /* (128, 256] integer pel */
+} MV_CLASS_TYPE;
+
+extern const vp8_tree_index vp8_mv_class_tree[2 * MV_CLASSES - 2];
+extern struct vp8_token_struct vp8_mv_class_encodings [MV_CLASSES];
+
+#define CLASS0_BITS    1  /* bits at integer precision for class 0 */
+#define CLASS0_SIZE    (1 << CLASS0_BITS)
+#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
+
+#define MV_MAX_BITS    (MV_CLASSES + CLASS0_BITS + 2)
+#define MV_MAX         ((1 << MV_MAX_BITS) - 1)
+#define MV_VALS        ((MV_MAX << 1) + 1)
+
+extern const vp8_tree_index vp8_mv_class0_tree[2 * CLASS0_SIZE - 2];
+extern struct vp8_token_struct vp8_mv_class0_encodings[CLASS0_SIZE];
+
+extern const vp8_tree_index vp8_mv_fp_tree[2 * 4 - 2];
+extern struct vp8_token_struct vp8_mv_fp_encodings[4];
+
+typedef struct {
+  vp8_prob sign;
+  vp8_prob classes[MV_CLASSES - 1];
+  vp8_prob class0[CLASS0_SIZE - 1];
+  vp8_prob bits[MV_OFFSET_BITS];
+  vp8_prob class0_fp[CLASS0_SIZE][4 - 1];
+  vp8_prob fp[4 - 1];
+  vp8_prob class0_hp;
+  vp8_prob hp;
+} nmv_component;
+
+typedef struct {
+  vp8_prob joints[MV_JOINTS - 1];
+  nmv_component comps[2];
+} nmv_context;
+
+MV_JOINT_TYPE vp8_get_mv_joint(MV mv);
+MV_CLASS_TYPE vp8_get_mv_class(int z, int *offset);
+int vp8_get_mv_mag(MV_CLASS_TYPE c, int offset);
+
+
+typedef struct {
+  unsigned int mvcount[MV_VALS];
+  unsigned int sign[2];
+  unsigned int classes[MV_CLASSES];
+  unsigned int class0[CLASS0_SIZE];
+  unsigned int bits[MV_OFFSET_BITS][2];
+  unsigned int class0_fp[CLASS0_SIZE][4];
+  unsigned int fp[4];
+  unsigned int class0_hp[2];
+  unsigned int hp[2];
+} nmv_component_counts;
+
+typedef struct {
+  unsigned int joints[MV_JOINTS];
+  nmv_component_counts comps[2];
+} nmv_context_counts;
+
+void vp8_increment_nmv(const MV *mv, const MV *ref, nmv_context_counts *mvctx,
+                       int usehp);
+extern const nmv_context vp8_default_nmv_context;
+void vp8_counts_to_nmv_context(
+    nmv_context_counts *NMVcount,
+    nmv_context *prob,
+    int usehp,
+    unsigned int (*branch_ct_joint)[2],
+    unsigned int (*branch_ct_sign)[2],
+    unsigned int (*branch_ct_classes)[MV_CLASSES - 1][2],
+    unsigned int (*branch_ct_class0)[CLASS0_SIZE - 1][2],
+    unsigned int (*branch_ct_bits)[MV_OFFSET_BITS][2],
+    unsigned int (*branch_ct_class0_fp)[CLASS0_SIZE][4 - 1][2],
+    unsigned int (*branch_ct_fp)[4 - 1][2],
+    unsigned int (*branch_ct_class0_hp)[2],
+    unsigned int (*branch_ct_hp)[2]);
+
+#else  /* CONFIG_NEWMVENTROPY */
+
 enum {
   mv_max  = 1023,              /* max absolute value of a MV component */
   MVvals = (2 * mv_max) + 1,   /* # possible values "" */
@@ -73,8 +188,6 @@
 extern const vp8_tree_index vp8_small_mvtree_hp[];
 extern struct vp8_token_struct vp8_small_mvencodings_hp [16];
 
-void vp8_entropy_mv_init();
-struct VP8Common;
-void vp8_adapt_mv_probs(struct VP8Common *cm);
+#endif  /* CONFIG_NEWMVENTROPY */
 
 #endif

diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index b76c4cc..4cdcbeb 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h

@@ -55,10 +55,15 @@
 #if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
   vp8_prob coef_probs_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
 #endif
+#if CONFIG_NEWMVENTROPY
+  nmv_context nmvc;
+  nmv_context pre_nmvc;
+#else
   MV_CONTEXT mvc[2];
   MV_CONTEXT_HP mvc_hp[2];
   MV_CONTEXT pre_mvc[2];
   MV_CONTEXT_HP pre_mvc_hp[2];
+#endif
   vp8_prob pre_bmode_prob [VP8_BINTRAMODES - 1];
   vp8_prob pre_ymode_prob [VP8_YMODES - 1]; /* interframe intra mode probs */
   vp8_prob pre_uv_mode_prob [VP8_YMODES][VP8_UV_MODES - 1];
@@ -88,8 +93,12 @@
   unsigned int coef_counts_16x16 [BLOCK_TYPES_16X16] [COEF_BANDS]
       [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];
 #endif
+#if CONFIG_NEWMVENTROPY
+  nmv_context_counts NMVcount;
+#else
   unsigned int MVcount [2] [MVvals];
   unsigned int MVcount_hp [2] [MVvals_hp];
+#endif
 #if CONFIG_SWITCHABLE_INTERP
   vp8_prob switchable_interp_prob[VP8_SWITCHABLE_FILTERS+1]
                                  [VP8_SWITCHABLE_FILTERS-1];

diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 70d3832..647b3ad 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c

@@ -723,9 +723,9 @@
 
         // Sub-pel filter
         xd->subpixel_predict8x8(pTemp, len,
-                               _o16x16mv.as_mv.col & 15,
-                               _o16x16mv.as_mv.row & 15,
-                               pDst, dst_uvstride);
+                                _o16x16mv.as_mv.col & 15,
+                                _o16x16mv.as_mv.row & 15,
+                                pDst, dst_uvstride);
       } else {
         filter_mb(pSrc, pre_stride, pDst, dst_uvstride, 8, 8);
       }

diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c
index def4caa..adf291b 100644
--- a/vp8/common/treecoder.c
+++ b/vp8/common/treecoder.c

@@ -124,3 +124,15 @@
       probs[t] = vp8_prob_half;
   } while (++t < tree_len);
 }
+
+vp8_prob vp8_bin_prob_from_distribution(const unsigned int counts[2]) {
+  int tot_count = counts[0] + counts[1];
+  vp8_prob prob;
+  if (tot_count) {
+    prob = (counts[0] * 255 + (tot_count >> 1)) / tot_count;
+    prob += !prob;
+  } else {
+    prob = 128;
+  }
+  return prob;
+}

diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index c4d0aa6..b7fa17d 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h

@@ -85,5 +85,6 @@
   c_bool_coder_spec *s
 );
 
+vp8_prob vp8_bin_prob_from_distribution(const unsigned int counts[2]);
 
 #endif

diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 069d073..e8b4a1f 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c

@@ -172,6 +172,150 @@
 
 }
 
+#if CONFIG_NEWMVENTROPY
+static int read_nmv_component(vp8_reader *r,
+                              int rv,
+                              const nmv_component *mvcomp) {
+  int v, s, z, c, o, d;
+  s = vp8_read(r, mvcomp->sign);
+  c = vp8_treed_read(r, vp8_mv_class_tree, mvcomp->classes);
+  if (c == MV_CLASS_0) {
+    d = vp8_treed_read(r, vp8_mv_class0_tree, mvcomp->class0);
+  } else {
+    int i, b;
+    d = 0;
+    b = c + CLASS0_BITS - 1;  /* number of bits */
+    for (i = 0; i < b; ++i)
+      d |= (vp8_read(r, mvcomp->bits[i]) << i);
+  }
+  o = d << 3;
+
+  z = vp8_get_mv_mag(c, o);
+  v = (s ? -(z + 1) : (z + 1));
+  return v;
+}
+
+static int read_nmv_component_fp(vp8_reader *r,
+                                 int v,
+                                 int rv,
+                                 const nmv_component *mvcomp,
+                                 int usehp) {
+  int s, z, c, o, d, e, f;
+  s = v < 0;
+  z = (s ? -v : v) - 1;       /* magnitude - 1 */
+
+  c = vp8_get_mv_class(z, &o);
+  d = o >> 3;
+
+  if (c == MV_CLASS_0) {
+    f = vp8_treed_read(r, vp8_mv_fp_tree, mvcomp->class0_fp[d]);
+  } else {
+    f = vp8_treed_read(r, vp8_mv_fp_tree, mvcomp->fp);
+  }
+  o += (f << 1);
+
+  if (usehp) {
+    if (c == MV_CLASS_0) {
+      e = vp8_read(r, mvcomp->class0_hp);
+    } else {
+      e = vp8_read(r, mvcomp->hp);
+    }
+    o += e;
+  } else {
+    ++o;  /* Note if hp is not used, the default value of the hp bit is 1 */
+  }
+  z = vp8_get_mv_mag(c, o);
+  v = (s ? -(z + 1) : (z + 1));
+  return v;
+}
+
+static void read_nmv(vp8_reader *r, MV *mv, const MV *ref,
+                     const nmv_context *mvctx) {
+  MV_JOINT_TYPE j = vp8_treed_read(r, vp8_mv_joint_tree, mvctx->joints);
+  mv->row = mv-> col = 0;
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    mv->row = read_nmv_component(r, ref->row, &mvctx->comps[0]);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    mv->col = read_nmv_component(r, ref->col, &mvctx->comps[1]);
+  }
+}
+
+static void read_nmv_fp(vp8_reader *r, MV *mv, const MV *ref,
+                        const nmv_context *mvctx, int usehp) {
+  MV_JOINT_TYPE j = vp8_get_mv_joint(*mv);
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    mv->row = read_nmv_component_fp(r, mv->row, ref->row, &mvctx->comps[0],
+                                    usehp);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    mv->col = read_nmv_component_fp(r, mv->col, ref->col, &mvctx->comps[1],
+                                    usehp);
+  }
+}
+
+static void update_nmv(vp8_reader *bc, vp8_prob *const p,
+                       const vp8_prob upd_p) {
+  if (vp8_read(bc, upd_p)) {
+#ifdef LOW_PRECISION_MV_UPDATE
+    *p = (vp8_read_literal(bc, 7) << 1) | 1;
+#else
+    *p = (vp8_read_literal(bc, 8));
+#endif
+  }
+}
+
+static void read_nmvprobs(vp8_reader *bc, nmv_context *mvctx,
+                          int usehp) {
+  int i, j, k;
+#ifdef MV_GROUP_UPDATE
+  if (!vp8_read_bit(bc)) return;
+#endif
+  for (j = 0; j < MV_JOINTS - 1; ++j) {
+    update_nmv(bc, &mvctx->joints[j],
+               VP8_NMV_UPDATE_PROB);
+  }
+  for (i = 0; i < 2; ++i) {
+    update_nmv(bc, &mvctx->comps[i].sign,
+               VP8_NMV_UPDATE_PROB);
+    for (j = 0; j < MV_CLASSES - 1; ++j) {
+      update_nmv(bc, &mvctx->comps[i].classes[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+      update_nmv(bc, &mvctx->comps[i].class0[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      update_nmv(bc, &mvctx->comps[i].bits[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      for (k = 0; k < 3; ++k)
+        update_nmv(bc, &mvctx->comps[i].class0_fp[j][k],
+                   VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < 3; ++j) {
+      update_nmv(bc, &mvctx->comps[i].fp[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      update_nmv(bc, &mvctx->comps[i].class0_hp,
+                 VP8_NMV_UPDATE_PROB);
+      update_nmv(bc, &mvctx->comps[i].hp,
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+}
+
+#else
+
 static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) {
   const vp8_prob *const p = (const vp8_prob *) mvc;
   int x = 0;
@@ -211,7 +355,6 @@
 #endif
 }
 
-
 static void read_mvcontexts(vp8_reader *bc, MV_CONTEXT *mvc) {
   int i = 0;
 
@@ -287,6 +430,8 @@
   } while (++i < 2);
 }
 
+#endif  /* CONFIG_NEWMVENTROPY */
+
 // Read the referncence frame
 static MV_REFERENCE_FRAME read_ref_frame(VP8D_COMP *pbi,
                                          vp8_reader *const bc,
@@ -452,8 +597,12 @@
 static void mb_mode_mv_init(VP8D_COMP *pbi) {
   VP8_COMMON *const cm = & pbi->common;
   vp8_reader *const bc = & pbi->bc;
+#if CONFIG_NEWMVENTROPY
+  nmv_context *const nmvc = &pbi->common.fc.nmvc;
+#else
   MV_CONTEXT *const mvc = pbi->common.fc.mvc;
   MV_CONTEXT_HP *const mvc_hp = pbi->common.fc.mvc_hp;
+#endif
   MACROBLOCKD *const xd  = & pbi->mb;
 
   vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs));
@@ -499,10 +648,14 @@
         cm->fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
       } while (++i < VP8_YMODES - 1);
     }
+#if CONFIG_NEWMVENTROPY
+    read_nmvprobs(bc, nmvc, xd->allow_high_precision_mv);
+#else
     if (xd->allow_high_precision_mv)
       read_mvcontexts_hp(bc, mvc_hp);
     else
       read_mvcontexts(bc, mvc);
+#endif
   }
 }
 
@@ -585,12 +738,16 @@
                              int mb_row, int mb_col) {
   VP8_COMMON *const cm = & pbi->common;
   vp8_reader *const bc = & pbi->bc;
+#if CONFIG_NEWMVENTROPY
+  nmv_context *const nmvc = &pbi->common.fc.nmvc;
+#else
   MV_CONTEXT *const mvc = pbi->common.fc.mvc;
   MV_CONTEXT_HP *const mvc_hp = pbi->common.fc.mvc_hp;
+#endif
   const int mis = pbi->common.mode_info_stride;
   MACROBLOCKD *const xd  = & pbi->mb;
 
-  int_mv *const mv = & mbmi->mv[0];
+  int_mv *const mv = & mbmi->mv;
   int mb_to_left_edge;
   int mb_to_right_edge;
   int mb_to_top_edge;
@@ -684,7 +841,6 @@
       // Update stats on relative distance of chosen vector to the
       // possible best reference vectors.
       {
-        int i;
         MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame;
 
         find_mv_refs(xd, mi, prev_mi,
@@ -833,8 +989,6 @@
           int mv_contz;
           int blockmode;
 
-          second_leftmv.as_int = 0;
-          second_abovemv.as_int = 0;
           k = vp8_mbsplit_offset[s][j];
 
           leftmv.as_int = left_block_mv(mi, k);
@@ -849,6 +1003,13 @@
 
           switch (blockmode) {
             case NEW4X4:
+#if CONFIG_NEWMVENTROPY
+              read_nmv(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc);
+              read_nmv_fp(bc, &blockmv.as_mv, &best_mv.as_mv, nmvc,
+                          xd->allow_high_precision_mv);
+              vp8_increment_nmv(&blockmv.as_mv, &best_mv.as_mv,
+                                &cm->fc.NMVcount, xd->allow_high_precision_mv);
+#else
               if (xd->allow_high_precision_mv) {
                 read_mv_hp(bc, &blockmv.as_mv, (const MV_CONTEXT_HP *) mvc_hp);
                 cm->fc.MVcount_hp[0][mv_max_hp + (blockmv.as_mv.row)]++;
@@ -858,10 +1019,18 @@
                 cm->fc.MVcount[0][mv_max + (blockmv.as_mv.row >> 1)]++;
                 cm->fc.MVcount[1][mv_max + (blockmv.as_mv.col >> 1)]++;
               }
+#endif  /* CONFIG_NEWMVENTROPY */
               blockmv.as_mv.row += best_mv.as_mv.row;
               blockmv.as_mv.col += best_mv.as_mv.col;
 
               if (mbmi->second_ref_frame) {
+#if CONFIG_NEWMVENTROPY
+                read_nmv(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc);
+                read_nmv_fp(bc, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
+                            xd->allow_high_precision_mv);
+                vp8_increment_nmv(&secondmv.as_mv, &best_mv_second.as_mv,
+                                  &cm->fc.NMVcount, xd->allow_high_precision_mv);
+#else
                 if (xd->allow_high_precision_mv) {
                   read_mv_hp(bc, &secondmv.as_mv, (const MV_CONTEXT_HP *) mvc_hp);
                   cm->fc.MVcount_hp[0][mv_max_hp + (secondmv.as_mv.row)]++;
@@ -871,6 +1040,7 @@
                   cm->fc.MVcount[0][mv_max + (secondmv.as_mv.row >> 1)]++;
                   cm->fc.MVcount[1][mv_max + (secondmv.as_mv.col >> 1)]++;
                 }
+#endif  /* CONFIG_NEWMVENTROPY */
                 secondmv.as_mv.row += best_mv_second.as_mv.row;
                 secondmv.as_mv.col += best_mv_second.as_mv.col;
               }
@@ -975,6 +1145,13 @@
         break;
 
       case NEWMV:
+#if CONFIG_NEWMVENTROPY
+        read_nmv(bc, &mv->as_mv, &best_mv.as_mv, nmvc);
+        read_nmv_fp(bc, &mv->as_mv, &best_mv.as_mv, nmvc,
+                    xd->allow_high_precision_mv);
+        vp8_increment_nmv(&mv->as_mv, &best_mv.as_mv, &cm->fc.NMVcount,
+                          xd->allow_high_precision_mv);
+#else
         if (xd->allow_high_precision_mv) {
           read_mv_hp(bc, &mv->as_mv, (const MV_CONTEXT_HP *) mvc_hp);
           cm->fc.MVcount_hp[0][mv_max_hp + (mv->as_mv.row)]++;
@@ -984,6 +1161,7 @@
           cm->fc.MVcount[0][mv_max + (mv->as_mv.row >> 1)]++;
           cm->fc.MVcount[1][mv_max + (mv->as_mv.col >> 1)]++;
         }
+#endif  /* CONFIG_NEWMVENTROPY */
         mv->as_mv.row += best_mv.as_mv.row;
         mv->as_mv.col += best_mv.as_mv.col;
 
@@ -998,6 +1176,13 @@
                                                       mb_to_top_edge,
                                                       mb_to_bottom_edge);
         if (mbmi->second_ref_frame) {
+#if CONFIG_NEWMVENTROPY
+          read_nmv(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc);
+          read_nmv_fp(bc, &mbmi->mv[1].as_mv, &best_mv_second.as_mv, nmvc,
+                      xd->allow_high_precision_mv);
+          vp8_increment_nmv(&mbmi->mv[1].as_mv, &best_mv_second.as_mv,
+                            &cm->fc.NMVcount, xd->allow_high_precision_mv);
+#else
           if (xd->allow_high_precision_mv) {
             read_mv_hp(bc, &mbmi->mv[1].as_mv, (const MV_CONTEXT_HP *) mvc_hp);
             cm->fc.MVcount_hp[0][mv_max_hp + (mbmi->mv[1].as_mv.row)]++;
@@ -1007,6 +1192,7 @@
             cm->fc.MVcount[0][mv_max + (mbmi->mv[1].as_mv.row >> 1)]++;
             cm->fc.MVcount[1][mv_max + (mbmi->mv[1].as_mv.col >> 1)]++;
           }
+#endif  /* CONFIG_NEWMVENTROPY */
           mbmi->mv[1].as_mv.row += best_mv_second.as_mv.row;
           mbmi->mv[1].as_mv.col += best_mv_second.as_mv.col;
           mbmi->need_to_clamp_secondmv |=

diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index d932683..5fb510b 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c

@@ -923,9 +923,7 @@
 
   if (pc->frame_type == KEY_FRAME) {
     /* Various keyframe initializations */
-    vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
-    vpx_memcpy(pc->fc.mvc_hp, vp8_default_mv_context_hp,
-               sizeof(vp8_default_mv_context_hp));
+    vp8_init_mv_probs(pc);
 
     vp8_init_mbmode_probs(pc);
 
@@ -1464,8 +1462,12 @@
   vp8_copy(pbi->common.fc.pre_i8x8_mode_prob, pbi->common.fc.i8x8_mode_prob);
   vp8_copy(pbi->common.fc.pre_sub_mv_ref_prob, pbi->common.fc.sub_mv_ref_prob);
   vp8_copy(pbi->common.fc.pre_mbsplit_prob, pbi->common.fc.mbsplit_prob);
+#if CONFIG_NEWMVENTROPY
+  pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc;
+#else
   vp8_copy(pbi->common.fc.pre_mvc, pbi->common.fc.mvc);
   vp8_copy(pbi->common.fc.pre_mvc_hp, pbi->common.fc.mvc_hp);
+#endif
   vp8_zero(pbi->common.fc.coef_counts);
   vp8_zero(pbi->common.fc.coef_counts_8x8);
 #if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16
@@ -1477,8 +1479,12 @@
   vp8_zero(pbi->common.fc.i8x8_mode_counts);
   vp8_zero(pbi->common.fc.sub_mv_ref_counts);
   vp8_zero(pbi->common.fc.mbsplit_counts);
+#if CONFIG_NEWMVENTROPY
+  vp8_zero(pbi->common.fc.NMVcount);
+#else
   vp8_zero(pbi->common.fc.MVcount);
   vp8_zero(pbi->common.fc.MVcount_hp);
+#endif
   vp8_zero(pbi->common.fc.mv_ref_ct);
   vp8_zero(pbi->common.fc.mv_ref_ct_a);
 #if COEFUPDATETYPE == 2
@@ -1544,7 +1550,11 @@
   vp8_adapt_coef_probs(pc);
   if (pc->frame_type != KEY_FRAME) {
     vp8_adapt_mode_probs(pc);
+#if CONFIG_NEWMVENTROPY
+    vp8_adapt_nmv_probs(pc, xd->allow_high_precision_mv);
+#else
     vp8_adapt_mv_probs(pc);
+#endif
     vp8_update_mode_context(&pbi->common);
   }
 

diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 7d94bec..7120196 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c

@@ -27,6 +27,7 @@
 #include "vp8/common/seg_common.h"
 #include "vp8/common/pred_common.h"
 #include "vp8/common/entropy.h"
+#include "vp8/encoder/encodemv.h"
 
 #if CONFIG_NEW_MVREF
 #include "vp8/common/mvref_common.h"
@@ -36,6 +37,8 @@
 unsigned __int64 Sectionbits[500];
 #endif
 
+//int final_packing = 0;
+
 #ifdef ENTROPY_STATS
 int intra_mode_stats [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES];
 unsigned int tree_update_hist [BLOCK_TYPES]
@@ -564,6 +567,19 @@
                   vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
 }
 
+#if CONFIG_NEWMVENTROPY
+static void write_nmv (vp8_writer *w, const MV *mv, const int_mv *ref,
+                       const nmv_context *nmvc, int usehp) {
+  MV e;
+  e.row = mv->row - ref->as_mv.row;
+  e.col = mv->col - ref->as_mv.col;
+
+  vp8_encode_nmv(w, &e, &ref->as_mv, nmvc);
+  vp8_encode_nmv_fp(w, &e, &ref->as_mv, nmvc, usehp);
+}
+
+#else
+
 static void write_mv
 (
   vp8_writer *w, const MV *mv, const int_mv *ref, const MV_CONTEXT *mvc
@@ -585,6 +601,7 @@
 
   vp8_encode_motion_vector_hp(w, &e, mvc);
 }
+#endif  /* CONFIG_NEWMVENTROPY */
 
 // This function writes the current macro block's segnment id to the bitstream
 // It should only be called if a segment map update is indicated.
@@ -737,8 +754,12 @@
   int i;
   VP8_COMMON *const pc = & cpi->common;
   vp8_writer *const w = & cpi->bc;
+#if CONFIG_NEWMVENTROPY
+  const nmv_context *nmvc = &pc->fc.nmvc;
+#else
   const MV_CONTEXT *mvc = pc->fc.mvc;
   const MV_CONTEXT_HP *mvc_hp = pc->fc.mvc_hp;
+#endif
   MACROBLOCKD *xd = &cpi->mb.e_mbd;
   MODE_INFO *m;
   MODE_INFO *prev_m;
@@ -754,6 +775,8 @@
   int row_delta[4] = { 0, +1,  0, -1};
   int col_delta[4] = { +1, -1, +1, +1};
 
+  //final_packing = !cpi->dummy_packing;
+
   cpi->mb.partition_info = cpi->mb.pi;
 
   // Update the probabilities used to encode reference frame data
@@ -814,10 +837,14 @@
 
   update_mbintra_mode_probs(cpi);
 
+#if CONFIG_NEWMVENTROPY
+  vp8_write_nmvprobs(cpi, xd->allow_high_precision_mv);
+#else
   if (xd->allow_high_precision_mv)
     vp8_write_mvprobs_hp(cpi);
   else
     vp8_write_mvprobs(cpi);
+#endif
 
   mb_row = 0;
   for (row = 0; row < pc->mb_rows; row += 2) {
@@ -1055,10 +1082,17 @@
 
                 pick_best_mv_ref( mi->mv[0], mi->ref_mvs[rf], &best_mv);
 #endif
-                if (xd->allow_high_precision_mv)
+#if CONFIG_NEWMVENTROPY
+                write_nmv(w, &mi->mv[0].as_mv, &best_mv,
+                          (const nmv_context*) nmvc,
+                          xd->allow_high_precision_mv);
+#else
+                if (xd->allow_high_precision_mv) {
                   write_mv_hp(w, &mi->mv[0].as_mv, &best_mv, mvc_hp);
-                else
+                } else {
                   write_mv(w, &mi->mv[0].as_mv, &best_mv, mvc);
+                }
+#endif
 
                 if (mi->second_ref_frame) {
 #if 0 //CONFIG_NEW_MVREF
@@ -1071,10 +1105,17 @@
                                     mi->ref_mvs[mi->second_ref_frame],
                                     &best_second_mv);
 #endif
-                  if (xd->allow_high_precision_mv)
+#if CONFIG_NEWMVENTROPY
+                  write_nmv(w, &mi->mv[1].as_mv, &best_second_mv,
+                            (const nmv_context*) nmvc,
+                            xd->allow_high_precision_mv);
+#else
+                  if (xd->allow_high_precision_mv) {
                     write_mv_hp(w, &mi->mv[1].as_mv, &best_second_mv, mvc_hp);
-                  else
+                  } else {
                     write_mv(w, &mi->mv[1].as_mv, &best_second_mv, mvc);
+                  }
+#endif
                 }
                 break;
               case SPLITMV: {
@@ -1116,6 +1157,11 @@
 #ifdef ENTROPY_STATS
                     active_section = 11;
 #endif
+#if CONFIG_NEWMVENTROPY
+                    write_nmv(w, &blockmv.as_mv, &best_mv,
+                              (const nmv_context*) nmvc,
+                              xd->allow_high_precision_mv);
+#else
                     if (xd->allow_high_precision_mv) {
                       write_mv_hp(w, &blockmv.as_mv, &best_mv,
                                   (const MV_CONTEXT_HP *) mvc_hp);
@@ -1123,8 +1169,16 @@
                       write_mv(w, &blockmv.as_mv, &best_mv,
                                (const MV_CONTEXT *) mvc);
                     }
+#endif
 
                     if (mi->second_ref_frame) {
+#if CONFIG_NEWMVENTROPY
+                      write_nmv(w,
+                                &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
+                                &best_second_mv,
+                                (const nmv_context*) nmvc,
+                                xd->allow_high_precision_mv);
+#else
                       if (xd->allow_high_precision_mv) {
                         write_mv_hp(w, &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
                                     &best_second_mv, (const MV_CONTEXT_HP *) mvc_hp);
@@ -1132,6 +1186,7 @@
                         write_mv(w, &cpi->mb.partition_info->bmi[j].second_mv.as_mv,
                                  &best_second_mv, (const MV_CONTEXT *) mvc);
                       }
+#endif
                     }
                   }
                 } while (++j < cpi->mb.partition_info->count);
@@ -2313,8 +2368,12 @@
   vp8_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob);
   vp8_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob);
   vp8_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob);
+#if CONFIG_NEWMVENTROPY
+  cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc;
+#else
   vp8_copy(cpi->common.fc.pre_mvc, cpi->common.fc.mvc);
   vp8_copy(cpi->common.fc.pre_mvc_hp, cpi->common.fc.mvc_hp);
+#endif
   vp8_zero(cpi->sub_mv_ref_count);
   vp8_zero(cpi->mbsplit_count);
   vp8_zero(cpi->common.fc.mv_ref_ct)

diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 7798056..c0cd2e7 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h

@@ -117,6 +117,19 @@
   int *mb_norm_activity_ptr;
   signed int act_zbin_adj;
 
+#if CONFIG_NEWMVENTROPY
+  int nmvjointcost[MV_JOINTS];
+  int nmvcosts[2][MV_VALS];
+  int *nmvcost[2];
+  int nmvcosts_hp[2][MV_VALS];
+  int *nmvcost_hp[2];
+
+  int nmvjointsadcost[MV_JOINTS];
+  int nmvsadcosts[2][MV_VALS];
+  int *nmvsadcost[2];
+  int nmvsadcosts_hp[2][MV_VALS];
+  int *nmvsadcost_hp[2];
+#else
   int mvcosts[2][MVvals + 1];
   int *mvcost[2];
   int mvsadcosts[2][MVfpvals + 1];
@@ -125,6 +138,7 @@
   int *mvcost_hp[2];
   int mvsadcosts_hp[2][MVfpvals_hp + 1];
   int *mvsadcost_hp[2];
+#endif  /* CONFIG_NEWMVENTROPY */
 
   int mbmode_cost[2][MB_MODE_COUNT];
   int intra_uv_mode_cost[2][MB_MODE_COUNT];
@@ -150,8 +164,6 @@
   signed char *gf_active_ptr;
 
   unsigned char *active_ptr;
-  MV_CONTEXT *mvc;
-  MV_CONTEXT_HP *mvc_hp;
 
   unsigned int token_costs[TX_SIZE_MAX][BLOCK_TYPES][COEF_BANDS]
     [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 41b87f2..4670c31 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c

@@ -1280,9 +1280,6 @@
 #endif
   // vp8_zero(cpi->uv_mode_count)
 
-  x->mvc = cm->fc.mvc;
-  x->mvc_hp = cm->fc.mvc_hp;
-
   vpx_memset(cm->above_context, 0,
              sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 
@@ -1357,8 +1354,12 @@
   xd->mode_info_context = cm->mi;
   xd->prev_mode_info_context = cm->prev_mi;
 
+#if CONFIG_NEWMVENTROPY
+  vp8_zero(cpi->NMVcount);
+#else
   vp8_zero(cpi->MVcount);
   vp8_zero(cpi->MVcount_hp);
+#endif
   vp8_zero(cpi->coef_counts);
   vp8_zero(cpi->coef_counts_8x8);
 #if CONFIG_TX16X16 || CONFIG_HYBRIDTRANSFORM16X16

diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index e5426f6..217cc00 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c

@@ -20,6 +20,536 @@
 extern unsigned int active_section;
 #endif
 
+//extern int final_packing;
+
+#if CONFIG_NEWMVENTROPY
+
+#ifdef NMV_STATS
+nmv_context_counts tnmvcounts;
+#endif
+
+static void encode_nmv_component(vp8_writer *w,
+                                 int v,
+                                 int r,
+                                 const nmv_component *mvcomp) {
+  int s, z, c, o, d;
+  assert (v != 0);            /* should not be zero */
+  s = v < 0;
+  vp8_write(w, s, mvcomp->sign);
+  z = (s ? -v : v) - 1;       /* magnitude - 1 */
+
+  c = vp8_get_mv_class(z, &o);
+
+  vp8_write_token(w, vp8_mv_class_tree, mvcomp->classes,
+                  vp8_mv_class_encodings + c);
+
+  d = (o >> 3);               /* int mv data */
+
+  if (c == MV_CLASS_0) {
+    vp8_write_token(w, vp8_mv_class0_tree, mvcomp->class0,
+                    vp8_mv_class0_encodings + d);
+  } else {
+    int i, b;
+    b = c + CLASS0_BITS - 1;  /* number of bits */
+    for (i = 0; i < b; ++i)
+      vp8_write(w, ((d >> i) & 1), mvcomp->bits[i]);
+  }
+}
+
+static void encode_nmv_component_fp(vp8_writer *w,
+                                    int v,
+                                    int r,
+                                    const nmv_component *mvcomp,
+                                    int usehp) {
+  int s, z, c, o, d, f, e;
+  assert (v != 0);            /* should not be zero */
+  s = v < 0;
+  z = (s ? -v : v) - 1;       /* magnitude - 1 */
+
+  c = vp8_get_mv_class(z, &o);
+
+  d = (o >> 3);               /* int mv data */
+  f = (o >> 1) & 3;           /* fractional pel mv data */
+  e = (o & 1);                /* high precision mv data */
+
+  /* Code the fractional pel bits */
+  if (c == MV_CLASS_0) {
+    vp8_write_token(w, vp8_mv_fp_tree, mvcomp->class0_fp[d],
+                    vp8_mv_fp_encodings + f);
+  } else {
+    vp8_write_token(w, vp8_mv_fp_tree, mvcomp->fp,
+                    vp8_mv_fp_encodings + f);
+  }
+  /* Code the high precision bit */
+  if (usehp) {
+    if (c == MV_CLASS_0) {
+      vp8_write(w, e, mvcomp->class0_hp);
+    } else {
+      vp8_write(w, e, mvcomp->hp);
+    }
+  }
+}
+
+static void build_nmv_component_cost_table(int *mvcost,
+                                           const nmv_component *mvcomp,
+                                           int usehp) {
+  int i, v;
+  int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
+  int bits_cost[MV_OFFSET_BITS][2];
+  int class0_fp_cost[CLASS0_SIZE][4], fp_cost[4];
+  int class0_hp_cost[2], hp_cost[2];
+
+  sign_cost[0] = vp8_cost_zero(mvcomp->sign);
+  sign_cost[1] = vp8_cost_one(mvcomp->sign);
+  vp8_cost_tokens(class_cost, mvcomp->classes, vp8_mv_class_tree);
+  vp8_cost_tokens(class0_cost, mvcomp->class0, vp8_mv_class0_tree);
+  for (i = 0; i < MV_OFFSET_BITS; ++i) {
+    bits_cost[i][0] = vp8_cost_zero(mvcomp->bits[i]);
+    bits_cost[i][1] = vp8_cost_one(mvcomp->bits[i]);
+  }
+
+  for (i = 0; i < CLASS0_SIZE; ++i)
+    vp8_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], vp8_mv_fp_tree);
+  vp8_cost_tokens(fp_cost, mvcomp->fp, vp8_mv_fp_tree);
+
+  if (usehp) {
+    class0_hp_cost[0] = vp8_cost_zero(mvcomp->class0_hp);
+    class0_hp_cost[1] = vp8_cost_one(mvcomp->class0_hp);
+    hp_cost[0] = vp8_cost_zero(mvcomp->hp);
+    hp_cost[1] = vp8_cost_one(mvcomp->hp);
+  }
+  mvcost[0] = 0;
+  for (v = 1; v <= MV_MAX; ++v) {
+    int z, c, o, d, e, f, cost = 0;
+    z = v - 1;
+    c = vp8_get_mv_class(z, &o);
+    cost += class_cost[c];
+    d = (o >> 3);               /* int mv data */
+    f = (o >> 1) & 3;           /* fractional pel mv data */
+    e = (o & 1);                /* high precision mv data */
+    if (c == MV_CLASS_0) {
+      cost += class0_cost[d];
+    } else {
+      int i, b;
+      b = c + CLASS0_BITS - 1;  /* number of bits */
+      for (i = 0; i < b; ++i)
+        cost += bits_cost[i][((d >> i) & 1)];
+    }
+    if (c == MV_CLASS_0) {
+      cost += class0_fp_cost[d][f];
+    } else {
+      cost += fp_cost[f];
+    }
+    if (usehp) {
+      if (c == MV_CLASS_0) {
+        cost += class0_hp_cost[e];
+      } else {
+        cost += hp_cost[e];
+      }
+    }
+    mvcost[v] = cost + sign_cost[0];
+    mvcost[-v] = cost + sign_cost[1];
+  }
+}
+
+static int update_nmv_savings(const unsigned int ct[2],
+                              const vp8_prob cur_p,
+                              const vp8_prob new_p,
+                              const vp8_prob upd_p) {
+
+#ifdef LOW_PRECISION_MV_UPDATE
+  vp8_prob mod_p = new_p | 1;
+#else
+  vp8_prob mod_p = new_p;
+#endif
+  const int cur_b = vp8_cost_branch256(ct, cur_p);
+  const int mod_b = vp8_cost_branch256(ct, mod_p);
+  const int cost = 7 * 256 +
+#ifndef LOW_PRECISION_MV_UPDATE
+      256 +
+#endif
+      (vp8_cost_one(upd_p) - vp8_cost_zero(upd_p));
+  if (cur_b - mod_b - cost > 0) {
+    return cur_b - mod_b - cost;
+  } else {
+    return -vp8_cost_zero(upd_p);
+  }
+}
+
+static int update_nmv(
+  vp8_writer *const w,
+  const unsigned int ct[2],
+  vp8_prob *const cur_p,
+  const vp8_prob new_p,
+  const vp8_prob upd_p) {
+
+#ifdef LOW_PRECISION_MV_UPDATE
+  vp8_prob mod_p = new_p | 1;
+#else
+  vp8_prob mod_p = new_p;
+#endif
+
+  const int cur_b = vp8_cost_branch256(ct, *cur_p);
+  const int mod_b = vp8_cost_branch256(ct, mod_p);
+  const int cost = 7 * 256 +
+#ifndef LOW_PRECISION_MV_UPDATE
+      256 +
+#endif
+      (vp8_cost_one(upd_p) - vp8_cost_zero(upd_p));
+
+  if (cur_b - mod_b > cost) {
+    *cur_p = mod_p;
+    vp8_write(w, 1, upd_p);
+#ifdef LOW_PRECISION_MV_UPDATE
+    vp8_write_literal(w, mod_p >> 1, 7);
+#else
+    vp8_write_literal(w, mod_p, 8);
+#endif
+    return 1;
+  } else {
+    vp8_write(w, 0, upd_p);
+    return 0;
+  }
+}
+
+#ifdef NMV_STATS
+void init_nmvstats() {
+  vp8_zero(tnmvcounts);
+}
+
+void print_nmvstats() {
+  nmv_context prob;
+  unsigned int branch_ct_joint[MV_JOINTS - 1][2];
+  unsigned int branch_ct_sign[2][2];
+  unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
+  unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
+  unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
+  unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
+  unsigned int branch_ct_fp[2][4 - 1][2];
+  unsigned int branch_ct_class0_hp[2][2];
+  unsigned int branch_ct_hp[2][2];
+  int i, j, k;
+  vp8_counts_to_nmv_context(&tnmvcounts, &prob, 1,
+                            branch_ct_joint, branch_ct_sign, branch_ct_classes,
+                            branch_ct_class0, branch_ct_bits,
+                            branch_ct_class0_fp, branch_ct_fp,
+                            branch_ct_class0_hp, branch_ct_hp);
+
+  printf("\nCounts =\n  { ");
+  for (j = 0; j < MV_JOINTS; ++j)
+    printf("%d, ", tnmvcounts.joints[j]);
+  printf("},\n");
+  for (i=0; i< 2; ++i) {
+    printf("  {\n");
+    printf("    %d/%d,\n", tnmvcounts.comps[i].sign[0],
+                           tnmvcounts.comps[i].sign[1]);
+    printf("    { ");
+    for (j = 0; j < MV_CLASSES; ++j)
+      printf("%d, ", tnmvcounts.comps[i].classes[j]);
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < CLASS0_SIZE; ++j)
+      printf("%d, ", tnmvcounts.comps[i].class0[j]);
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < MV_OFFSET_BITS; ++j)
+      printf("%d/%d, ", tnmvcounts.comps[i].bits[j][0],
+                        tnmvcounts.comps[i].bits[j][1]);
+    printf("},\n");
+
+    printf("    {");
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      printf("{");
+      for (k = 0; k < 4; ++k)
+        printf("%d, ", tnmvcounts.comps[i].class0_fp[j][k]);
+      printf("}, ");
+    }
+    printf("},\n");
+
+    printf("    { ");
+    for (j = 0; j < 4; ++j)
+      printf("%d, ", tnmvcounts.comps[i].fp[j]);
+    printf("},\n");
+
+    printf("    %d/%d,\n",
+           tnmvcounts.comps[i].class0_hp[0],
+           tnmvcounts.comps[i].class0_hp[1]);
+    printf("    %d/%d,\n",
+           tnmvcounts.comps[i].hp[0],
+           tnmvcounts.comps[i].hp[1]);
+    printf("  },\n");
+  }
+
+  printf("\nProbs =\n  { ");
+  for (j = 0; j < MV_JOINTS - 1; ++j)
+    printf("%d, ", prob.joints[j]);
+  printf("},\n");
+  for (i=0; i< 2; ++i) {
+    printf("  {\n");
+    printf("    %d,\n", prob.comps[i].sign);
+    printf("    { ");
+    for (j = 0; j < MV_CLASSES - 1; ++j)
+      printf("%d, ", prob.comps[i].classes[j]);
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < CLASS0_SIZE - 1; ++j)
+      printf("%d, ", prob.comps[i].class0[j]);
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < MV_OFFSET_BITS; ++j)
+      printf("%d, ", prob.comps[i].bits[j]);
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      printf("{");
+      for (k = 0; k < 3; ++k)
+        printf("%d, ", prob.comps[i].class0_fp[j][k]);
+      printf("}, ");
+    }
+    printf("},\n");
+    printf("    { ");
+    for (j = 0; j < 3; ++j)
+      printf("%d, ", prob.comps[i].fp[j]);
+    printf("},\n");
+
+    printf("    %d,\n", prob.comps[i].class0_hp);
+    printf("    %d,\n", prob.comps[i].hp);
+    printf("  },\n");
+  }
+}
+
+static void add_nmvcount(nmv_context_counts *dst, nmv_context_counts *src) {
+  int i, j, k;
+  for (j = 0; j < MV_JOINTS; ++j) {
+    dst->joints[j] += src->joints[j];
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < MV_VALS; ++j) {
+      dst->comps[i].mvcount[j] += src->comps[i].mvcount[j];
+    }
+    dst->comps[i].sign[0] += src->comps[i].sign[0];
+    dst->comps[i].sign[1] += src->comps[i].sign[1];
+    for (j = 0; j < MV_CLASSES; ++j) {
+      dst->comps[i].classes[j] += src->comps[i].classes[j];
+    }
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      dst->comps[i].class0[j] += src->comps[i].class0[j];
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      dst->comps[i].bits[j][0] += src->comps[i].bits[j][0];
+      dst->comps[i].bits[j][1] += src->comps[i].bits[j][1];
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      for (k = 0; k < 4; ++k) {
+        dst->comps[i].class0_fp[j][k] += src->comps[i].class0_fp[j][k];
+      }
+    }
+    for (j = 0; j < 4; ++j) {
+      dst->comps[i].fp[j] += src->comps[i].fp[j];
+    }
+    dst->comps[i].class0_hp[0] += src->comps[i].class0_hp[0];
+    dst->comps[i].class0_hp[1] += src->comps[i].class0_hp[1];
+    dst->comps[i].hp[0] += src->comps[i].hp[0];
+    dst->comps[i].hp[1] += src->comps[i].hp[1];
+  }
+}
+#endif
+
+void vp8_write_nmvprobs(VP8_COMP * cpi, int usehp) {
+  vp8_writer *const w  = & cpi->bc;
+  int i, j;
+  nmv_context prob;
+  unsigned int branch_ct_joint[MV_JOINTS - 1][2];
+  unsigned int branch_ct_sign[2][2];
+  unsigned int branch_ct_classes[2][MV_CLASSES - 1][2];
+  unsigned int branch_ct_class0[2][CLASS0_SIZE - 1][2];
+  unsigned int branch_ct_bits[2][MV_OFFSET_BITS][2];
+  unsigned int branch_ct_class0_fp[2][CLASS0_SIZE][4 - 1][2];
+  unsigned int branch_ct_fp[2][4 - 1][2];
+  unsigned int branch_ct_class0_hp[2][2];
+  unsigned int branch_ct_hp[2][2];
+  int savings = 0;
+
+#ifdef NMV_STATS
+  if (!cpi->dummy_packing)
+    add_nmvcount(&tnmvcounts, &cpi->NMVcount);
+#endif
+  vp8_counts_to_nmv_context(&cpi->NMVcount, &prob, usehp,
+                            branch_ct_joint, branch_ct_sign, branch_ct_classes,
+                            branch_ct_class0, branch_ct_bits,
+                            branch_ct_class0_fp, branch_ct_fp,
+                            branch_ct_class0_hp, branch_ct_hp);
+  /* write updates if they help */
+#ifdef MV_GROUP_UPDATE
+  for (j = 0; j < MV_JOINTS - 1; ++j) {
+    savings += update_nmv_savings(branch_ct_joint[j],
+                                  cpi->common.fc.nmvc.joints[j],
+                                  prob.joints[j],
+                                  VP8_NMV_UPDATE_PROB);
+  }
+  for (i = 0; i < 2; ++i) {
+    savings += update_nmv_savings(branch_ct_sign[i],
+                                  cpi->common.fc.nmvc.comps[i].sign,
+                                  prob.comps[i].sign,
+                                  VP8_NMV_UPDATE_PROB);
+    for (j = 0; j < MV_CLASSES - 1; ++j) {
+      savings += update_nmv_savings(branch_ct_classes[i][j],
+                                    cpi->common.fc.nmvc.comps[i].classes[j],
+                                    prob.comps[i].classes[j],
+                                    VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+      savings += update_nmv_savings(branch_ct_class0[i][j],
+                                    cpi->common.fc.nmvc.comps[i].class0[j],
+                                    prob.comps[i].class0[j],
+                                    VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      savings += update_nmv_savings(branch_ct_bits[i][j],
+                                    cpi->common.fc.nmvc.comps[i].bits[j],
+                                    prob.comps[i].bits[j],
+                                    VP8_NMV_UPDATE_PROB);
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      int k;
+      for (k = 0; k < 3; ++k) {
+        savings += update_nmv_savings(branch_ct_class0_fp[i][j][k],
+                                      cpi->common.fc.nmvc.comps[i].class0_fp[j][k],
+                                      prob.comps[i].class0_fp[j][k],
+                                      VP8_NMV_UPDATE_PROB);
+      }
+    }
+    for (j = 0; j < 3; ++j) {
+      savings += update_nmv_savings(branch_ct_fp[i][j],
+                                    cpi->common.fc.nmvc.comps[i].fp[j],
+                                    prob.comps[i].fp[j],
+                                    VP8_NMV_UPDATE_PROB);
+    }
+  }
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      savings += update_nmv_savings(branch_ct_class0_hp[i],
+                                    cpi->common.fc.nmvc.comps[i].class0_hp,
+                                    prob.comps[i].class0_hp,
+                                    VP8_NMV_UPDATE_PROB);
+      savings += update_nmv_savings(branch_ct_hp[i],
+                                    cpi->common.fc.nmvc.comps[i].hp,
+                                    prob.comps[i].hp,
+                                    VP8_NMV_UPDATE_PROB);
+    }
+  }
+  if (savings <= 0) {
+    vp8_write_bit(w, 0);
+    return;
+  }
+  vp8_write_bit(w, 1);
+#endif
+
+  for (j = 0; j < MV_JOINTS - 1; ++j) {
+    update_nmv(w, branch_ct_joint[j],
+               &cpi->common.fc.nmvc.joints[j],
+               prob.joints[j],
+               VP8_NMV_UPDATE_PROB);
+  }
+  for (i = 0; i < 2; ++i) {
+    update_nmv(w, branch_ct_sign[i],
+               &cpi->common.fc.nmvc.comps[i].sign,
+               prob.comps[i].sign,
+               VP8_NMV_UPDATE_PROB);
+    for (j = 0; j < MV_CLASSES - 1; ++j) {
+      update_nmv(w, branch_ct_classes[i][j],
+                 &cpi->common.fc.nmvc.comps[i].classes[j],
+                 prob.comps[i].classes[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < CLASS0_SIZE - 1; ++j) {
+      update_nmv(w, branch_ct_class0[i][j],
+                 &cpi->common.fc.nmvc.comps[i].class0[j],
+                 prob.comps[i].class0[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      update_nmv(w, branch_ct_bits[i][j],
+                 &cpi->common.fc.nmvc.comps[i].bits[j],
+                 prob.comps[i].bits[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+  for (i = 0; i < 2; ++i) {
+    for (j = 0; j < CLASS0_SIZE; ++j) {
+      int k;
+      for (k = 0; k < 3; ++k) {
+        update_nmv(w, branch_ct_class0_fp[i][j][k],
+                   &cpi->common.fc.nmvc.comps[i].class0_fp[j][k],
+                   prob.comps[i].class0_fp[j][k],
+                   VP8_NMV_UPDATE_PROB);
+      }
+    }
+    for (j = 0; j < 3; ++j) {
+      update_nmv(w, branch_ct_fp[i][j],
+                 &cpi->common.fc.nmvc.comps[i].fp[j],
+                 prob.comps[i].fp[j],
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+  if (usehp) {
+    for (i = 0; i < 2; ++i) {
+      update_nmv(w, branch_ct_class0_hp[i],
+                 &cpi->common.fc.nmvc.comps[i].class0_hp,
+                 prob.comps[i].class0_hp,
+                 VP8_NMV_UPDATE_PROB);
+      update_nmv(w, branch_ct_hp[i],
+                 &cpi->common.fc.nmvc.comps[i].hp,
+                 prob.comps[i].hp,
+                 VP8_NMV_UPDATE_PROB);
+    }
+  }
+}
+
+void vp8_encode_nmv(vp8_writer *w, const MV *mv, const MV *ref,
+                    const nmv_context *mvctx) {
+  MV_JOINT_TYPE j = vp8_get_mv_joint(*mv);
+  vp8_write_token(w, vp8_mv_joint_tree, mvctx->joints,
+                  vp8_mv_joint_encodings + j);
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    encode_nmv_component(w, mv->row, ref->col, &mvctx->comps[0]);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    encode_nmv_component(w, mv->col, ref->col, &mvctx->comps[1]);
+  }
+}
+
+void vp8_encode_nmv_fp(vp8_writer *w, const MV *mv, const MV *ref,
+                       const nmv_context *mvctx, int usehp) {
+  MV_JOINT_TYPE j = vp8_get_mv_joint(*mv);
+  if (j == MV_JOINT_HZVNZ || j == MV_JOINT_HNZVNZ) {
+    encode_nmv_component_fp(w, mv->row, ref->row, &mvctx->comps[0], usehp);
+  }
+  if (j == MV_JOINT_HNZVZ || j == MV_JOINT_HNZVNZ) {
+    encode_nmv_component_fp(w, mv->col, ref->col, &mvctx->comps[1], usehp);
+  }
+}
+
+void vp8_build_nmv_cost_table(int *mvjoint,
+                              int *mvcost[2],
+                              const nmv_context *mvctx,
+                              int usehp,
+                              int mvc_flag_v,
+                              int mvc_flag_h) {
+  vp8_clear_system_state();
+  vp8_cost_tokens(mvjoint, mvctx->joints, vp8_mv_joint_tree);
+  if (mvc_flag_v)
+    build_nmv_component_cost_table(mvcost[0], &mvctx->comps[0], usehp);
+  if (mvc_flag_h)
+    build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
+}
+
+#else  /* CONFIG_NEWMVENTROPY */
+
 static void encode_mvcomponent(
   vp8_writer *const w,
   const int v,
@@ -596,8 +1126,9 @@
     vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp,
                                       (const MV_CONTEXT_HP *)
                                       cpi->common.fc.mvc_hp, flags);
-
 #ifdef ENTROPY_STATS
   active_section = 5;
 #endif
 }
+
+#endif  /* CONFIG_NEWMVENTROPY */

diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h
index 7e33007..e675fe0 100644
--- a/vp8/encoder/encodemv.h
+++ b/vp8/encoder/encodemv.h

@@ -14,11 +14,31 @@
 
 #include "onyx_int.h"
 
+#if CONFIG_NEWMVENTROPY
+void vp8_write_nmvprobs(VP8_COMP *, int usehp);
+void vp8_encode_nmv(vp8_writer *w, const MV *mv, const MV *ref,
+                    const nmv_context *mvctx);
+void vp8_encode_nmv_fp(vp8_writer *w, const MV *mv, const MV *ref,
+                       const nmv_context *mvctx, int usehp);
+void vp8_build_nmv_cost_table(int *mvjoint,
+                              int *mvcost[2],
+                              const nmv_context *mvctx,
+                              int usehp,
+                              int mvc_flag_v,
+                              int mvc_flag_h);
+#else  /* CONFIG_NEWMVENTROPY */
 void vp8_write_mvprobs(VP8_COMP *);
-void vp8_encode_motion_vector(vp8_writer *, const MV *, const MV_CONTEXT *);
-void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]);
+void vp8_encode_motion_vector(vp8_writer *, const MV *,
+                              const MV_CONTEXT *);
+void vp8_build_component_cost_table(int *mvcost[2],
+                                    const MV_CONTEXT *mvc,
+                                    int mvc_flag[2]);
 void vp8_write_mvprobs_hp(VP8_COMP *);
-void vp8_encode_motion_vector_hp(vp8_writer *, const MV *, const MV_CONTEXT_HP *);
-void vp8_build_component_cost_table_hp(int *mvcost[2], const MV_CONTEXT_HP *mvc, int mvc_flag[2]);
+void vp8_encode_motion_vector_hp(vp8_writer *, const MV *,
+                                 const MV_CONTEXT_HP *);
+void vp8_build_component_cost_table_hp(int *mvcost[2],
+                                       const MV_CONTEXT_HP *mvc,
+                                       int mvc_flag[2]);
+#endif  /* CONFIG_NEWMVENTROPY */
 
 #endif

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index c1504f2..1e54371 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c

@@ -28,6 +28,7 @@
 #include "rdopt.h"
 #include "ratectrl.h"
 #include "vp8/common/quant_common.h"
+#include "vp8/common/entropymv.h"
 #include "encodemv.h"
 
 #define OUTPUT_FPF 0
@@ -38,8 +39,6 @@
 #define IF_RTCD(x) NULL
 #endif
 
-#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
-
 extern void vp8_build_block_offsets(MACROBLOCK *x);
 extern void vp8_setup_block_ptrs(MACROBLOCK *x);
 extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
@@ -492,11 +491,13 @@
   // if ( 0 )
   {
     int flag[2] = {1, 1};
+    vp8_init_mv_probs(cm);
+#if CONFIG_NEWMVENTROPY
     vp8_initialize_rd_consts(cpi, cm->base_qindex + cm->y1dc_delta_q);
-    vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
+#else
     vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
-    vpx_memcpy(cm->fc.mvc_hp, vp8_default_mv_context_hp, sizeof(vp8_default_mv_context_hp));
     vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cm->fc.mvc_hp, flag);
+#endif
   }
 
   // for each macroblock row in image

diff --git a/vp8/encoder/mbgraph.c b/vp8/encoder/mbgraph.c
index dde000a..d512ae4 100644
--- a/vp8/encoder/mbgraph.c
+++ b/vp8/encoder/mbgraph.c

@@ -31,12 +31,6 @@
   vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
   unsigned int best_err;
   int step_param, further_steps;
-  static int dummy_cost[2 * mv_max + 1];
-  int *mvcost[2]    = { &dummy_cost[mv_max + 1], &dummy_cost[mv_max + 1] };
-  int *mvsadcost[2] = { &dummy_cost[mv_max + 1], &dummy_cost[mv_max + 1] };
-  static int dummy_cost_hp[2 * mv_max_hp + 1];
-  int *mvcost_hp[2]    = { &dummy_cost_hp[mv_max_hp + 1], &dummy_cost_hp[mv_max_hp + 1] };
-  int *mvsadcost_hp[2] = { &dummy_cost_hp[mv_max_hp + 1], &dummy_cost_hp[mv_max_hp + 1] };
 
   int tmp_col_min = x->mv_col_min;
   int tmp_col_max = x->mv_col_max;
@@ -65,8 +59,8 @@
       step_param,
       x->errorperbit,
       &v_fn_ptr,
-      xd->allow_high_precision_mv ? mvsadcost_hp : mvsadcost,
-      xd->allow_high_precision_mv ? mvcost_hp : mvcost,
+      NULLMVCOST,
+      NULLMVCOST,
       ref_mv);
 
   // Try sub-pixel MC
@@ -78,7 +72,7 @@
         x, b, d,
         dst_mv, ref_mv,
         x->errorperbit, &v_fn_ptr,
-        xd->allow_high_precision_mv ? mvcost_hp : mvcost,
+        NULLMVCOST,
         & distortion, &sse);
   }
 

diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 79e10a5..76accd4 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c

@@ -42,42 +42,59 @@
     x->mv_row_max = row_max;
 }
 
-int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
+int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
                     int Weight, int ishp) {
-  // MV costing is based on the distribution of vectors in the previous frame
-  // and as such will tend to over state the cost of vectors. In addition
-  // coding a new vector can have a knock on effect on the cost of subsequent
-  // vectors and the quality of prediction from NEAR and NEAREST for subsequent
-  // blocks. The "Weight" parameter allows, to a limited extent, for some
-  // account to be taken of these factors.
-  return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp == 0)] +
-           mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp == 0)])
+  MV v;
+  v.row = (mv->as_mv.row - ref->as_mv.row);
+  v.col = (mv->as_mv.col - ref->as_mv.col);
+#if CONFIG_NEWMVENTROPY
+  return ((mvjcost[vp8_get_mv_joint(v)] +
+           mvcost[0][v.row] + mvcost[1][v.col]) *
+          Weight) >> 7;
+#else
+  return ((mvcost[0][v.row >> (ishp == 0)] +
+           mvcost[1][v.col >> (ishp == 0)])
           * Weight) >> 7;
+#endif
 }
 
-static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
+static int mv_err_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
                        int error_per_bit, int ishp) {
-  // Ignore costing if mvcost is NULL
-  if (mvcost)
-    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> (ishp == 0)] +
-             mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> (ishp == 0)])
-            * error_per_bit + 128) >> 8;
+  if (mvcost) {
+    MV v;
+    v.row = (mv->as_mv.row - ref->as_mv.row);
+    v.col = (mv->as_mv.col - ref->as_mv.col);
+#if CONFIG_NEWMVENTROPY
+    return ((mvjcost[vp8_get_mv_joint(v)] +
+             mvcost[0][v.row] + mvcost[1][v.col]) *
+            error_per_bit + 128) >> 8;
+#else
+    return ((mvcost[0][v.row >> (ishp == 0)] +
+             mvcost[1][v.col >> (ishp == 0)]) * error_per_bit + 128) >> 8;
+#endif
+  }
   return 0;
 }
 
-
-static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
+static int mvsad_err_cost(int_mv *mv, int_mv *ref, DEC_MVSADCOSTS,
                           int error_per_bit) {
-  // Calculate sad error cost on full pixel basis.
-  // Ignore costing if mvcost is NULL
-  if (mvsadcost)
-    return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
-             mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
+
+  if (mvsadcost) {
+    MV v;
+    v.row = (mv->as_mv.row - ref->as_mv.row);
+    v.col = (mv->as_mv.col - ref->as_mv.col);
+#if CONFIG_NEWMVENTROPY
+    return ((mvjsadcost[vp8_get_mv_joint(v)] +
+             mvsadcost[0][v.row] + mvsadcost[1][v.col]) *
+            error_per_bit + 128) >> 8;
+#else
+    return ((mvsadcost[0][v.row] + mvsadcost[1][v.col])
             * error_per_bit + 128) >> 8;
+#endif
+  }
   return 0;
 }
 
-
 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
   int Len;
   int search_site_count = 0;
@@ -185,7 +202,6 @@
     x->ss[search_site_count].offset = Len * stride + Len;
     search_site_count++;
 
-
     // Contract.
     Len /= 2;
   }
@@ -204,18 +220,35 @@
  * could reduce the area.
  */
 
-#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) // pointer to predictor base of a motionvector
-#define SP(x) (((x)&3)<<2) // convert motion vector component to offset for svf calc
-#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128 )>>8 : 0) // estimated cost of a motion vector (r,c)
-#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
-#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
-#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
+#if CONFIG_NEWMVENTROPY
+/* estimated cost of a motion vector (r,c) */
+#define MVC(r,c) \
+  (mvcost ? \
+  ((mvjcost[((r)!=rr)*2 + ((c)!=rc)] + \
+    mvcost[0][((r)-rr)] + mvcost[1][((c)-rc)]) * error_per_bit + 128 )>>8 : 0)
+#else
+#define MVC(r,c) \
+  (mvcost ? \
+  ((mvcost[0][((r)-rr)>>(xd->allow_high_precision_mv==0)] + \
+    mvcost[1][((c)-rc)>>(xd->allow_high_precision_mv==0)]) * \
+    error_per_bit + 128 )>>8 : 0)
+#endif  /* CONFIG_NEWMVENTROPY */
 
-#define PREHP(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset))) // pointer to predictor base of a motionvector
-#define SPHP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
-#define DISTHP(r,c) vfp->svf( PREHP(r,c), y_stride, SPHP(c),SPHP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
-#define ERRHP(r,c) (MVC(r,c)+DISTHP(r,c)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = ((xd->allow_high_precision_mv)?DISTHP(r,c):DIST(r,c)); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
+#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
+
+#define IFMVCV(r,c,s,e) \
+  if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
+
+/* pointer to predictor base of a motionvector */
+#define PRE(r,c) (y + (((r)>>3) * y_stride + ((c)>>3) -(offset)))
+
+/* returns subpixel variance error function */
+#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
+
+/* checks if (r,c) has better score than previous best */
+#define CHECK_BETTER(v,r,c) \
+  IFMVCV(r,c,{thismse = (DIST(r,c)); if((v = MVC(r,c)+thismse) < besterr) \
+  { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)
 
 #define MIN(x,y) (((x)<(y))?(x):(y))
 #define MAX(x,y) (((x)>(y))?(x):(y))
@@ -224,7 +257,8 @@
                                              int_mv *bestmv, int_mv *ref_mv,
                                              int error_per_bit,
                                              const vp8_variance_fn_ptr_t *vfp,
-                                             int *mvcost[2], int *distortion,
+                                             DEC_MVCOSTS,
+                                             int *distortion,
                                              unsigned int *sse1) {
   unsigned char *z = (*(b->base_src) + b->src);
   MACROBLOCKD *xd = &x->e_mbd;
@@ -268,27 +302,22 @@
 #endif
 
 
-  if (xd->allow_high_precision_mv) {
-    rr = ref_mv->as_mv.row;
-    rc = ref_mv->as_mv.col;
-    br = bestmv->as_mv.row << 3;
-    bc = bestmv->as_mv.col << 3;
-    hstep = 4;
-    minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1));
-    maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1));
-    minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1));
-    maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1));
-  } else {
-    rr = ref_mv->as_mv.row >> 1;
-    rc = ref_mv->as_mv.col >> 1;
-    br = bestmv->as_mv.row << 2;
-    bc = bestmv->as_mv.col << 2;
-    hstep = 2;
-    minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
-    maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
-    minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
-    maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
-  }
+  rr = ref_mv->as_mv.row;
+  rc = ref_mv->as_mv.col;
+  br = bestmv->as_mv.row << 3;
+  bc = bestmv->as_mv.col << 3;
+  hstep = 4;
+#if CONFIG_NEWMVENTROPY
+  minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
+  maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
+  minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
+  maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
+#else
+  minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << mvlong_width_hp) - 1));
+  maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << mvlong_width_hp) - 1));
+  minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << mvlong_width_hp) - 1));
+  maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << mvlong_width_hp) - 1));
+#endif
 
   tr = br;
   tc = bc;
@@ -303,8 +332,8 @@
   // calculate central point error
   besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
   *distortion = besterr;
-  besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit,
-                         xd->allow_high_precision_mv);
+  besterr += mv_err_cost(bestmv, ref_mv, MVCOSTS,
+                         error_per_bit, xd->allow_high_precision_mv);
 
   // TODO: Each subsequent iteration checks at least one point in
   // common with the last iteration could be 2 ( if diag selected)
@@ -407,13 +436,8 @@
       tc = bc;
     }
   }
-  if (x->e_mbd.allow_high_precision_mv) {
-    bestmv->as_mv.row = br;
-    bestmv->as_mv.col = bc;
-  } else {
-    bestmv->as_mv.row = br << 1;
-    bestmv->as_mv.col = bc << 1;
-  }
+  bestmv->as_mv.row = br;
+  bestmv->as_mv.col = bc;
 
   if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
       (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
@@ -423,25 +447,17 @@
 }
 #undef MVC
 #undef PRE
-#undef SP
 #undef DIST
 #undef IFMVCV
-#undef ERR
 #undef CHECK_BETTER
 #undef MIN
 #undef MAX
 
-#undef PREHP
-#undef DPHP
-#undef DISTHP
-#undef ERRHP
-
-#define SP(x) (((x)&7)<<1) // convert motion vector component to offset for svf calc
 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                                  int_mv *bestmv, int_mv *ref_mv,
                                  int error_per_bit,
                                  const vp8_variance_fn_ptr_t *vfp,
-                                 int *mvcost[2], int *distortion,
+                                 DEC_MVCOSTS, int *distortion,
                                  unsigned int *sse1) {
   int bestmse = INT_MAX;
   int_mv startmv;
@@ -478,14 +494,14 @@
   // calculate central point error
   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
   *distortion = bestmse;
-  bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit,
+  bestmse += mv_err_cost(bestmv, ref_mv, MVCOSTS, error_per_bit,
                          xd->allow_high_precision_mv);
 
   // go left then right and check error
   this_mv.as_mv.row = startmv.as_mv.row;
   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
   thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (left < bestmse) {
@@ -497,7 +513,7 @@
 
   this_mv.as_mv.col += 8;
   thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                 xd->allow_high_precision_mv);
 
   if (right < bestmse) {
@@ -511,7 +527,7 @@
   this_mv.as_mv.col = startmv.as_mv.col;
   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
   thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                              xd->allow_high_precision_mv);
 
   if (up < bestmse) {
@@ -523,7 +539,7 @@
 
   this_mv.as_mv.row += 8;
   thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (down < bestmse) {
@@ -564,7 +580,7 @@
       break;
   }
 
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (diag < bestmse) {
@@ -606,7 +622,7 @@
                        b->src_stride, &sse);
   }
 
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (left < bestmse) {
@@ -620,7 +636,7 @@
   thismse = vfp->svf(y, y_stride,
                      SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
                      z, b->src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                 xd->allow_high_precision_mv);
 
   if (right < bestmse) {
@@ -644,7 +660,7 @@
                        z, b->src_stride, &sse);
   }
 
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                              xd->allow_high_precision_mv);
 
   if (up < bestmse) {
@@ -657,7 +673,7 @@
   this_mv.as_mv.row += 4;
   thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
                      z, b->src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (down < bestmse) {
@@ -736,7 +752,7 @@
       break;
   }
 
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (diag < bestmse) {
@@ -776,7 +792,7 @@
                        z, b->src_stride, &sse);
   }
 
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (left < bestmse) {
@@ -787,8 +803,10 @@
   }
 
   this_mv.as_mv.col += 2;
-  thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
+  thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+                     z, b->src_stride, &sse);
+  right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
+                                xd->allow_high_precision_mv);
 
   if (right < bestmse) {
     *bestmv = this_mv;
@@ -808,7 +826,8 @@
     thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(7), z, b->src_stride, &sse);
   }
 
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
+  up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
+                             xd->allow_high_precision_mv);
 
   if (up < bestmse) {
     *bestmv = this_mv;
@@ -819,7 +838,8 @@
 
   this_mv.as_mv.row += 2;
   thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), z, b->src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
+  down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
+                               xd->allow_high_precision_mv);
 
   if (down < bestmse) {
     *bestmv = this_mv;
@@ -892,7 +912,8 @@
       break;
   }
 
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit, xd->allow_high_precision_mv);
+  diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
+                               xd->allow_high_precision_mv);
 
   if (diag < bestmse) {
     *bestmv = this_mv;
@@ -910,7 +931,8 @@
                                   int_mv *bestmv, int_mv *ref_mv,
                                   int error_per_bit,
                                   const vp8_variance_fn_ptr_t *vfp,
-                                  int *mvcost[2], int *distortion,
+                                  DEC_MVCOSTS,
+                                  int *distortion,
                                   unsigned int *sse1) {
   int bestmse = INT_MAX;
   int_mv startmv;
@@ -946,14 +968,14 @@
   // calculate central point error
   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
   *distortion = bestmse;
-  bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit,
+  bestmse += mv_err_cost(bestmv, ref_mv, MVCOSTS, error_per_bit,
                          xd->allow_high_precision_mv);
 
   // go left then right and check error
   this_mv.as_mv.row = startmv.as_mv.row;
   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
   thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
-  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  left = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (left < bestmse) {
@@ -965,7 +987,7 @@
 
   this_mv.as_mv.col += 8;
   thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
-  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  right = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                 xd->allow_high_precision_mv);
 
   if (right < bestmse) {
@@ -979,7 +1001,7 @@
   this_mv.as_mv.col = startmv.as_mv.col;
   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
   thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
-  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  up = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                              xd->allow_high_precision_mv);
 
   if (up < bestmse) {
@@ -991,7 +1013,7 @@
 
   this_mv.as_mv.row += 8;
   thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
-  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  down = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (down < bestmse) {
@@ -1029,7 +1051,7 @@
       break;
   }
 
-  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit,
+  diag = thismse + mv_err_cost(&this_mv, ref_mv, MVCOSTS, error_per_bit,
                                xd->allow_high_precision_mv);
 
   if (diag < bestmse) {
@@ -1063,7 +1085,7 @@
   {\
     if (thissad < bestsad)\
     {\
-      thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
+      thissad += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);\
       if (thissad < bestsad)\
       {\
         bestsad = thissad;\
@@ -1091,8 +1113,8 @@
   int search_param,
   int sad_per_bit,
   const vp8_variance_fn_ptr_t *vfp,
-  int *mvsadcost[2],
-  int *mvcost[2],
+  DEC_MVSADCOSTS,
+  DEC_MVCOSTS,
   int_mv *center_mv
 ) {
   MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} };
@@ -1128,7 +1150,7 @@
   this_mv.as_mv.col = bc;
   bestsad = vfp->sdf(what, what_stride, this_offset,
                      in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // hex search
   // j=0
@@ -1240,7 +1262,7 @@
 int vp8_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                            int_mv *ref_mv, int_mv *best_mv,
                            int search_param, int sad_per_bit, int *num00,
-                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
+                           vp8_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,
                            int_mv *center_mv) {
   int i, j, step;
 
@@ -1264,14 +1286,19 @@
   unsigned char *check_here;
   int thissad;
   MACROBLOCKD *xd = &x->e_mbd;
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
+
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
@@ -1289,7 +1316,7 @@
   // Check the starting position
   bestsad = fn_ptr->sdf(what, what_stride, in_what,
                         in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // search_param determines the length of the initial step and hence the number of iterations
   // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
@@ -1315,7 +1342,7 @@
           this_mv.as_mv.row = this_row_offset;
           this_mv.as_mv.col = this_col_offset;
           thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvsadcost, sad_per_bit);
+                                    MVSADCOSTS, sad_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
@@ -1345,7 +1372,7 @@
   return
       fn_ptr->vf(what, what_stride, best_address, in_what_stride,
                  (unsigned int *)(&thissad)) +
-      mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+      mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                   xd->allow_high_precision_mv);
 }
 
@@ -1353,7 +1380,7 @@
                              int_mv *ref_mv, int_mv *best_mv, int search_param,
                              int sad_per_bit, int *num00,
                              vp8_variance_fn_ptr_t *fn_ptr,
-                             int *mvcost[2], int_mv *center_mv) {
+                             DEC_MVCOSTS, int_mv *center_mv) {
   int i, j, step;
 
   unsigned char *what = (*(b->base_src) + b->src);
@@ -1378,14 +1405,19 @@
   unsigned char *check_here;
   unsigned int thissad;
   MACROBLOCKD *xd = &x->e_mbd;
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
+
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
@@ -1403,7 +1435,7 @@
   // Check the starting position
   bestsad = fn_ptr->sdf(what, what_stride,
                         in_what, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // search_param determines the length of the initial step and hence the number of iterations
   // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
@@ -1438,7 +1470,7 @@
             this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
             this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
             sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                           mvsadcost, sad_per_bit);
+                                           MVSADCOSTS, sad_per_bit);
 
             if (sad_array[t] < bestsad) {
               bestsad = sad_array[t];
@@ -1462,7 +1494,7 @@
             this_mv.as_mv.row = this_row_offset;
             this_mv.as_mv.col = this_col_offset;
             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvsadcost, sad_per_bit);
+                                      MVSADCOSTS, sad_per_bit);
 
             if (thissad < bestsad) {
               bestsad = thissad;
@@ -1492,11 +1524,10 @@
   return
       fn_ptr->vf(what, what_stride, best_address, in_what_stride,
                  (unsigned int *)(&thissad)) +
-      mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+      mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                   xd->allow_high_precision_mv);
 }
 
-#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
               point as the best match, we will do a final 1-away diamond
               refining search  */
@@ -1558,7 +1589,7 @@
 
 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
                         int sad_per_bit, int distance,
-                        vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
+                        vp8_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,
                         int_mv *center_mv) {
   unsigned char *what = (*(b->base_src) + b->src);
   int what_stride = b->src_stride;
@@ -1582,14 +1613,19 @@
   int row_max = ref_row + distance;
   int col_min = ref_col - distance;
   int col_max = ref_col + distance;
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
+
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
@@ -1603,7 +1639,7 @@
   // Baseline value at the centre
   bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
                         in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   if (col_min < x->mv_col_min)
@@ -1627,7 +1663,7 @@
 
       this_mv.as_mv.col = c;
       thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                 mvsadcost, sad_per_bit);
+                                 MVSADCOSTS, sad_per_bit);
 
       if (thissad < bestsad) {
         bestsad = thissad;
@@ -1647,7 +1683,7 @@
     return
         fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
                    (unsigned int *)(&thissad)) +
-        mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+        mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                     xd->allow_high_precision_mv);
   else
     return INT_MAX;
@@ -1655,7 +1691,7 @@
 
 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
                           int sad_per_bit, int distance,
-                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
+                          vp8_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,
                           int_mv *center_mv) {
   unsigned char *what = (*(b->base_src) + b->src);
   int what_stride = b->src_stride;
@@ -1681,14 +1717,19 @@
   int col_max = ref_col + distance;
 
   unsigned int sad_array[3];
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
+
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
@@ -1702,7 +1743,7 @@
   // Baseline value at the centre
   bestsad = fn_ptr->sdf(what, what_stride,
                         bestaddress, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   if (col_min < x->mv_col_min)
@@ -1733,7 +1774,7 @@
         if (thissad < bestsad) {
           this_mv.as_mv.col = c;
           thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                     mvsadcost, sad_per_bit);
+                                     MVSADCOSTS, sad_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
@@ -1754,7 +1795,7 @@
       if (thissad < bestsad) {
         this_mv.as_mv.col = c;
         thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                   mvsadcost, sad_per_bit);
+                                   MVSADCOSTS, sad_per_bit);
 
         if (thissad < bestsad) {
           bestsad = thissad;
@@ -1777,7 +1818,7 @@
     return
         fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
                    (unsigned int *)(&thissad)) +
-        mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+        mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                     xd->allow_high_precision_mv);
   else
     return INT_MAX;
@@ -1785,7 +1826,8 @@
 
 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
                           int sad_per_bit, int distance,
-                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
+                          vp8_variance_fn_ptr_t *fn_ptr,
+                          DEC_MVCOSTS,
                           int_mv *center_mv) {
   unsigned char *what = (*(b->base_src) + b->src);
   int what_stride = b->src_stride;
@@ -1812,14 +1854,19 @@
 
   DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
   unsigned int sad_array[3];
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
+
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
@@ -1833,7 +1880,7 @@
   // Baseline value at the centre
   bestsad = fn_ptr->sdf(what, what_stride,
                         bestaddress, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(best_mv, &fcenter_mv, MVSADCOSTS, sad_per_bit);
 
   // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   if (col_min < x->mv_col_min)
@@ -1864,7 +1911,7 @@
         if (thissad < bestsad) {
           this_mv.as_mv.col = c;
           thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                     mvsadcost, sad_per_bit);
+                                     MVSADCOSTS, sad_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
@@ -1890,7 +1937,7 @@
         if (thissad < bestsad) {
           this_mv.as_mv.col = c;
           thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                     mvsadcost, sad_per_bit);
+                                     MVSADCOSTS, sad_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
@@ -1911,7 +1958,7 @@
       if (thissad < bestsad) {
         this_mv.as_mv.col = c;
         thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                   mvsadcost, sad_per_bit);
+                                   MVSADCOSTS, sad_per_bit);
 
         if (thissad < bestsad) {
           bestsad = thissad;
@@ -1933,7 +1980,7 @@
     return
         fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
                    (unsigned int *)(&thissad)) +
-        mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+        mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                     xd->allow_high_precision_mv);
   else
     return INT_MAX;
@@ -1941,7 +1988,7 @@
 
 int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
                             int error_per_bit, int search_range,
-                            vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
+                            vp8_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,
                             int_mv *center_mv) {
   MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   int i, j;
@@ -1957,19 +2004,24 @@
   int_mv this_mv;
   unsigned int bestsad = INT_MAX;
   MACROBLOCKD *xd = &x->e_mbd;
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
-  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
+  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
+      mvsad_err_cost(ref_mv, &fcenter_mv, MVSADCOSTS, error_per_bit);
 
   for (i = 0; i < search_range; i++) {
     int best_site = -1;
@@ -1986,7 +2038,7 @@
         if (thissad < bestsad) {
           this_mv.as_mv.row = this_row_offset;
           this_mv.as_mv.col = this_col_offset;
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
+          thissad += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, error_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
@@ -2012,7 +2064,7 @@
     return
         fn_ptr->vf(what, what_stride, best_address, in_what_stride,
                    (unsigned int *)(&thissad)) +
-        mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+        mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                     xd->allow_high_precision_mv);
   else
     return INT_MAX;
@@ -2021,7 +2073,7 @@
 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
                               int_mv *ref_mv, int error_per_bit,
                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
-                              int *mvcost[2], int_mv *center_mv) {
+                              DEC_MVCOSTS, int_mv *center_mv) {
   MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   int i, j;
   short this_row_offset, this_col_offset;
@@ -2036,19 +2088,24 @@
   int_mv this_mv;
   unsigned int bestsad = INT_MAX;
   MACROBLOCKD *xd = &x->e_mbd;
-
-  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   int_mv fcenter_mv;
 
+#if CONFIG_NEWMVENTROPY
+  int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+#else
+  int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
   if (xd->allow_high_precision_mv) {
     mvsadcost[0] = x->mvsadcost_hp[0];
     mvsadcost[1] = x->mvsadcost_hp[1];
   }
+#endif
 
   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
 
-  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
+  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
+      mvsad_err_cost(ref_mv, &fcenter_mv, MVSADCOSTS, error_per_bit);
 
   for (i = 0; i < search_range; i++) {
     int best_site = -1;
@@ -2073,7 +2130,7 @@
         if (sad_array[j] < bestsad) {
           this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
           this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
-          sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
+          sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, error_per_bit);
 
           if (sad_array[j] < bestsad) {
             bestsad = sad_array[j];
@@ -2094,7 +2151,7 @@
           if (thissad < bestsad) {
             this_mv.as_mv.row = this_row_offset;
             this_mv.as_mv.col = this_col_offset;
-            thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
+            thissad += mvsad_err_cost(&this_mv, &fcenter_mv, MVSADCOSTS, error_per_bit);
 
             if (thissad < bestsad) {
               bestsad = thissad;
@@ -2121,7 +2178,7 @@
     return
         fn_ptr->vf(what, what_stride, best_address, in_what_stride,
                    (unsigned int *)(&thissad)) +
-        mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit,
+        mv_err_cost(&this_mv, center_mv, MVCOSTS, x->errorperbit,
                     xd->allow_high_precision_mv);
   else
     return INT_MAX;

diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index c27f3bf..afca580 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h

@@ -15,6 +15,22 @@
 #include "block.h"
 #include "variance.h"
 
+#if CONFIG_NEWMVENTROPY
+#define MVCOSTS mvjcost, mvcost
+#define MVSADCOSTS mvjsadcost, mvsadcost
+#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
+#define DEC_MVSADCOSTS int *mvjsadcost, int *mvsadcost[2]
+#define NULLMVCOST NULL, NULL
+#define XMVCOST x->nmvjointcost, (x->e_mbd.allow_high_precision_mv?x->nmvcost_hp:x->nmvcost)
+#else
+#define MVCOSTS mvcost
+#define MVSADCOSTS mvsadcost
+#define DEC_MVCOSTS int *mvcost[2]
+#define DEC_MVSADCOSTS int *mvsadcost[2]
+#define NULLMVCOST NULL
+#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
+#endif  /* CONFIG_NEWMVENTROPY */
+
 #ifdef ENTROPY_STATS
 extern void init_mv_ref_counts();
 extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]);
@@ -26,7 +42,7 @@
 #define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))            // Maximum size of the first step in full pel units
 
 extern void vp8_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv);
-extern int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
+extern int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, DEC_MVCOSTS,
                            int Weight, int ishp);
 extern void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride);
 extern void vp8_init3smotion_compensation(MACROBLOCK *x,  int stride);
@@ -48,14 +64,14 @@
   int search_param,
   int error_per_bit,
   const vp8_variance_fn_ptr_t *vf,
-  int *mvsadcost[2],
-  int *mvcost[2],
+  DEC_MVSADCOSTS,
+  DEC_MVCOSTS,
   int_mv *center_mv
 );
 
 typedef int (fractional_mv_step_fp)
 (MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv,
- int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2],
+ int error_per_bit, const vp8_variance_fn_ptr_t *vfp, DEC_MVCOSTS,
  int *distortion, unsigned int *sse);
 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively;
 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step;
@@ -72,7 +88,7 @@
    int sad_per_bit, \
    int distance, \
    vp8_variance_fn_ptr_t *fn_ptr, \
-   int *mvcost[2], \
+   DEC_MVSADCOSTS, \
    int_mv *center_mv \
   )
 
@@ -86,7 +102,7 @@
    int sad_per_bit, \
    int distance, \
    vp8_variance_fn_ptr_t *fn_ptr, \
-   int *mvcost[2], \
+   DEC_MVSADCOSTS, \
    int_mv *center_mv \
   )
 
@@ -102,7 +118,7 @@
    int sad_per_bit, \
    int *num00, \
    vp8_variance_fn_ptr_t *fn_ptr, \
-   int *mvcost[2], \
+   DEC_MVSADCOSTS, \
    int_mv *center_mv \
   )
 

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 68c28c3..fdb3fa1 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c

@@ -149,6 +149,13 @@
 extern int intra_mode_stats[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES];
 #endif
 
+#if CONFIG_NEWMVENTROPY
+#ifdef NMV_STATS
+extern void init_nmvstats();
+extern void print_nmvstats();
+#endif
+#endif
+
 #ifdef SPEEDSTATS
 unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 unsigned int tot_pm = 0;
@@ -1697,6 +1704,48 @@
 
 #define M_LOG2_E 0.693147180559945309417
 #define log2f(x) (log (x) / (float) M_LOG2_E)
+
+#if CONFIG_NEWMVENTROPY
+
+static void cal_nmvjointsadcost(int *mvjointsadcost) {
+  mvjointsadcost[0] = 600;
+  mvjointsadcost[1] = 300;
+  mvjointsadcost[2] = 300;
+  mvjointsadcost[0] = 300;
+}
+
+static void cal_nmvsadcosts(int *mvsadcost[2]) {
+  int i = 1;
+
+  mvsadcost [0] [0] = 0;
+  mvsadcost [1] [0] = 0;
+
+  do {
+    double z = 256 * (2 * (log2f(8 * i) + .6));
+    mvsadcost [0][i] = (int) z;
+    mvsadcost [1][i] = (int) z;
+    mvsadcost [0][-i] = (int) z;
+    mvsadcost [1][-i] = (int) z;
+  } while (++i <= MV_MAX);
+}
+
+static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
+  int i = 1;
+
+  mvsadcost [0] [0] = 0;
+  mvsadcost [1] [0] = 0;
+
+  do {
+    double z = 256 * (2 * (log2f(8 * i) + .6));
+    mvsadcost [0][i] = (int) z;
+    mvsadcost [1][i] = (int) z;
+    mvsadcost [0][-i] = (int) z;
+    mvsadcost [1][-i] = (int) z;
+  } while (++i <= MV_MAX);
+}
+
+#else
+
 static void cal_mvsadcosts(int *mvsadcost[2]) {
   int i = 1;
 
@@ -1727,6 +1776,8 @@
   } while (++i <= mvfp_max_hp);
 }
 
+#endif  /* CONFIG_NEWMVENTROPY */
+
 VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
   int i;
   volatile union {
@@ -1838,6 +1889,11 @@
   vp8_zero(inter_uv_modes);
   vp8_zero(inter_b_modes);
 #endif
+#if CONFIG_NEWMVENTROPY
+#ifdef NMV_STATS
+  init_nmvstats();
+#endif
+#endif
 
   /*Initialize the feed-forward activity masking.*/
   cpi->activity_avg = 90 << 12;
@@ -1903,19 +1959,32 @@
   cpi->gf_rate_correction_factor  = 1.0;
   cpi->twopass.est_max_qcorrection_factor  = 1.0;
 
+#if CONFIG_NEWMVENTROPY
+  cal_nmvjointsadcost(cpi->mb.nmvjointsadcost);
+  cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX];
+  cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX];
+  cpi->mb.nmvsadcost[0] = &cpi->mb.nmvsadcosts[0][MV_MAX];
+  cpi->mb.nmvsadcost[1] = &cpi->mb.nmvsadcosts[1][MV_MAX];
+  cal_nmvsadcosts(cpi->mb.nmvsadcost);
+
+  cpi->mb.nmvcost_hp[0] = &cpi->mb.nmvcosts_hp[0][MV_MAX];
+  cpi->mb.nmvcost_hp[1] = &cpi->mb.nmvcosts_hp[1][MV_MAX];
+  cpi->mb.nmvsadcost_hp[0] = &cpi->mb.nmvsadcosts_hp[0][MV_MAX];
+  cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX];
+  cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp);
+#else
   cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max + 1];
   cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max + 1];
   cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max + 1];
   cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max + 1];
-
   cal_mvsadcosts(cpi->mb.mvsadcost);
 
   cpi->mb.mvcost_hp[0] = &cpi->mb.mvcosts_hp[0][mv_max_hp + 1];
   cpi->mb.mvcost_hp[1] = &cpi->mb.mvcosts_hp[1][mv_max_hp + 1];
   cpi->mb.mvsadcost_hp[0] = &cpi->mb.mvsadcosts_hp[0][mvfp_max_hp + 1];
   cpi->mb.mvsadcost_hp[1] = &cpi->mb.mvsadcosts_hp[1][mvfp_max_hp + 1];
-
   cal_mvsadcosts_hp(cpi->mb.mvsadcost_hp);
+#endif  /* CONFIG_NEWMVENTROPY */
 
   for (i = 0; i < KEY_FRAME_CONTEXT; i++) {
     cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
@@ -2068,6 +2137,12 @@
       print_mode_context();
     }
 #endif
+#if CONFIG_NEWMVENTROPY
+#ifdef NMV_STATS
+    if (cpi->pass != 1)
+      print_nmvstats();
+#endif
+#endif
 
 #if CONFIG_INTERNAL_STATS
 
@@ -3697,9 +3772,14 @@
     vp8_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count);
     vp8_adapt_mode_probs(&cpi->common);
 
+#if CONFIG_NEWMVENTROPY
+    cpi->common.fc.NMVcount = cpi->NMVcount;
+    vp8_adapt_nmv_probs(&cpi->common, cpi->mb.e_mbd.allow_high_precision_mv);
+#else
     vp8_copy(cpi->common.fc.MVcount, cpi->MVcount);
     vp8_copy(cpi->common.fc.MVcount_hp, cpi->MVcount_hp);
     vp8_adapt_mv_probs(&cpi->common);
+#endif  /* CONFIG_NEWMVENTROPY */
     vp8_update_mode_context(&cpi->common);
   }
 

diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 04c7ad4..0bdc07f 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h

@@ -67,10 +67,17 @@
 #endif
 
 typedef struct {
+#if CONFIG_NEWMVENTROPY
+  nmv_context nmvc;
+  int nmvjointcost[MV_JOINTS];
+  int nmvcosts[2][MV_VALS];
+  int nmvcosts_hp[2][MV_VALS];
+#else
   MV_CONTEXT mvc[2];
   int mvcosts[2][MVvals + 1];
   MV_CONTEXT_HP mvc_hp[2];
   int mvcosts_hp[2][MVvals_hp + 1];
+#endif
 
 #ifdef MODE_STATS
   // Stats
@@ -549,8 +556,12 @@
   // int uv_mode_count[VP8_UV_MODES];       /* intra MB type cts this frame */
   int y_uv_mode_count[VP8_YMODES][VP8_UV_MODES];
 
+#if CONFIG_NEWMVENTROPY
+  nmv_context_counts NMVcount;
+#else
   unsigned int MVcount [2] [MVvals];  /* (row,col) MV cts this frame */
   unsigned int MVcount_hp [2] [MVvals_hp];  /* (row,col) MV cts this frame */
+#endif
 
   unsigned int coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];  /* for this frame */
   // DECLARE_ALIGNED(16, int, coef_counts_backup [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]);   //not used any more

diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 809279e..e985748 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c

@@ -132,10 +132,17 @@
   // intended for use in a re-code loop in vp8_compress_frame where the
   // quantizer value is adjusted between loop iterations.
 
+#if CONFIG_NEWMVENTROPY
+  cc->nmvc = cm->fc.nmvc;
+  vp8_copy(cc->nmvjointcost,  cpi->mb.nmvjointcost);
+  vp8_copy(cc->nmvcosts,  cpi->mb.nmvcosts);
+  vp8_copy(cc->nmvcosts_hp,  cpi->mb.nmvcosts_hp);
+#else
   vp8_copy(cc->mvc,      cm->fc.mvc);
   vp8_copy(cc->mvcosts,  cpi->mb.mvcosts);
   vp8_copy(cc->mvc_hp,     cm->fc.mvc_hp);
   vp8_copy(cc->mvcosts_hp,  cpi->mb.mvcosts_hp);
+#endif
 
   vp8_copy(cc->mv_ref_ct, cm->fc.mv_ref_ct);
   vp8_copy(cc->mode_context, cm->fc.mode_context);
@@ -188,10 +195,17 @@
   // Restore key state variables to the snapshot state stored in the
   // previous call to vp8_save_coding_context.
 
+#if CONFIG_NEWMVENTROPY
+  cm->fc.nmvc = cc->nmvc;
+  vp8_copy(cpi->mb.nmvjointcost, cc->nmvjointcost);
+  vp8_copy(cpi->mb.nmvcosts, cc->nmvcosts);
+  vp8_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp);
+#else
   vp8_copy(cm->fc.mvc, cc->mvc);
   vp8_copy(cpi->mb.mvcosts, cc->mvcosts);
   vp8_copy(cm->fc.mvc_hp, cc->mvc_hp);
   vp8_copy(cpi->mb.mvcosts_hp, cc->mvcosts_hp);
+#endif
 
   vp8_copy(cm->fc.mv_ref_ct, cc->mv_ref_ct);
   vp8_copy(cm->fc.mode_context, cc->mode_context);
@@ -244,17 +258,17 @@
   vp8_kf_default_bmode_probs(cpi->common.kf_bmode_prob);
   vp8_init_mbmode_probs(& cpi->common);
 
-  vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
+  vp8_init_mv_probs(& cpi->common);
+#if CONFIG_NEWMVENTROPY == 0
+  /* this is not really required */
   {
     int flag[2] = {1, 1};
-    vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
+    vp8_build_component_cost_table(
+        cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag);
+    vp8_build_component_cost_table_hp(
+        cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cpi->common.fc.mvc_hp, flag);
   }
-  vpx_memcpy(cpi->common.fc.mvc_hp, vp8_default_mv_context_hp, sizeof(vp8_default_mv_context_hp));
-  {
-    int flag[2] = {1, 1};
-    vp8_build_component_cost_table_hp(cpi->mb.mvcost_hp, (const MV_CONTEXT_HP *) cpi->common.fc.mvc_hp, flag);
-  }
-
+#endif
 
   cpi->common.txfm_mode = ALLOW_8X8;
 
@@ -285,6 +299,7 @@
              sizeof(default_vp8_mode_contexts));
 
 }
+
 void vp8_setup_inter_frame(VP8_COMP *cpi) {
 
   cpi->common.txfm_mode = ALLOW_8X8;

diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 391254b..a2b234e 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c

@@ -37,6 +37,7 @@
 #include "vpx_mem/vpx_mem.h"
 #include "dct.h"
 #include "vp8/common/systemdependent.h"
+#include "vp8/encoder/encodemv.h"
 
 #include "vp8/common/seg_common.h"
 #include "vp8/common/pred_common.h"
@@ -58,8 +59,6 @@
 extern void vp8_ht_quantize_b(BLOCK *b, BLOCKD *d);
 #endif
 
-#define XMVCOST (x->e_mbd.allow_high_precision_mv?x->mvcost_hp:x->mvcost)
-
 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
 
 #define INVALID_MV 0x80008000
@@ -376,6 +375,17 @@
   cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
   vp8_init_mode_costs(cpi);
 
+  if (cpi->common.frame_type != KEY_FRAME)
+  {
+#if CONFIG_NEWMVENTROPY
+    vp8_build_nmv_cost_table(
+        cpi->mb.nmvjointcost,
+        cpi->mb.e_mbd.allow_high_precision_mv ?
+        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
+        &cpi->common.fc.nmvc,
+        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
+#endif
+  }
 }
 
 void vp8_auto_select_speed(VP8_COMP *cpi) {
@@ -404,7 +414,9 @@
       cpi->oxcf.cpu_used = -16;
   */
 
-  if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress) {
+  if (cpi->avg_pick_mode_time < milliseconds_for_compress &&
+      (cpi->avg_encode_time - cpi->avg_pick_mode_time) <
+      milliseconds_for_compress) {
     if (cpi->avg_pick_mode_time == 0) {
       cpi->Speed = 4;
     } else {
@@ -418,7 +430,8 @@
         }
       }
 
-      if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
+      if (milliseconds_for_compress * 100 >
+          cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {
         cpi->Speed          -= 1;
         cpi->avg_pick_mode_time = 0;
         cpi->avg_encode_time = 0;
@@ -1883,11 +1896,15 @@
   x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
 }
 
-static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label,
-                       B_PREDICTION_MODE this_mode,
-                       int_mv *this_mv, int_mv *this_second_mv,
-                       int_mv seg_mvs[MAX_REF_FRAMES - 1], int_mv *best_ref_mv,
-                       int_mv *second_best_ref_mv, int *mvcost[2]) {
+static int labels2mode(
+  MACROBLOCK *x,
+  int const *labelings, int which_label,
+  B_PREDICTION_MODE this_mode,
+  int_mv *this_mv, int_mv *this_second_mv,
+  int_mv seg_mvs[MAX_REF_FRAMES - 1],
+  int_mv *best_ref_mv,
+  int_mv *second_best_ref_mv,
+  DEC_MVCOSTS) {
   MACROBLOCKD *const xd = & x->e_mbd;
   MODE_INFO *const mic = xd->mode_info_context;
   MB_MODE_INFO * mbmi = &mic->mbmi;
@@ -1922,11 +1939,11 @@
               seg_mvs[mbmi->second_ref_frame - 1].as_int;
           }
 
-          thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost,
+          thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, MVCOSTS,
                                         102, xd->allow_high_precision_mv);
           if (mbmi->second_ref_frame) {
             thismvcost += vp8_mv_bit_cost(this_second_mv, second_best_ref_mv,
-                                          mvcost, 102,
+                                          MVCOSTS, 102,
                                           xd->allow_high_precision_mv);
           }
           break;
@@ -2656,8 +2673,10 @@
 
 void rd_update_mvcount(VP8_COMP *cpi, MACROBLOCK *x,
                        int_mv *best_ref_mv, int_mv *second_best_ref_mv) {
-
   MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
+#if CONFIG_NEWMVENTROPY
+  MV mv;
+#endif
 
   if (mbmi->mode == SPLITMV) {
     int i;
@@ -2665,6 +2684,21 @@
     for (i = 0; i < x->partition_info->count; i++) {
       if (x->partition_info->bmi[i].mode == NEW4X4) {
         if (x->e_mbd.allow_high_precision_mv) {
+#if CONFIG_NEWMVENTROPY
+          mv.row = (x->partition_info->bmi[i].mv.as_mv.row
+                    - best_ref_mv->as_mv.row);
+          mv.col = (x->partition_info->bmi[i].mv.as_mv.col
+                    - best_ref_mv->as_mv.col);
+          vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1);
+          if (x->e_mbd.mode_info_context->mbmi.second_ref_frame) {
+            mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row
+                      - second_best_ref_mv->as_mv.row);
+            mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col
+                      - second_best_ref_mv->as_mv.col);
+            vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv,
+                              &cpi->NMVcount, 1);
+          }
+#else
           cpi->MVcount_hp[0][mv_max_hp + (x->partition_info->bmi[i].mv.as_mv.row
                                           - best_ref_mv->as_mv.row)]++;
           cpi->MVcount_hp[1][mv_max_hp + (x->partition_info->bmi[i].mv.as_mv.col
@@ -2675,8 +2709,23 @@
             cpi->MVcount_hp[1][mv_max_hp + (x->partition_info->bmi[i].second_mv.as_mv.col
                                             - second_best_ref_mv->as_mv.col)]++;
           }
-        } else
-        {
+#endif
+        } else {
+#if CONFIG_NEWMVENTROPY
+          mv.row = (x->partition_info->bmi[i].mv.as_mv.row
+                    - best_ref_mv->as_mv.row);
+          mv.col = (x->partition_info->bmi[i].mv.as_mv.col
+                    - best_ref_mv->as_mv.col);
+          vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0);
+          if (x->e_mbd.mode_info_context->mbmi.second_ref_frame) {
+            mv.row = (x->partition_info->bmi[i].second_mv.as_mv.row
+                      - second_best_ref_mv->as_mv.row);
+            mv.col = (x->partition_info->bmi[i].second_mv.as_mv.col
+                      - second_best_ref_mv->as_mv.col);
+            vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv,
+                              &cpi->NMVcount, 0);
+          }
+#else
           cpi->MVcount[0][mv_max + ((x->partition_info->bmi[i].mv.as_mv.row
                                      - best_ref_mv->as_mv.row) >> 1)]++;
           cpi->MVcount[1][mv_max + ((x->partition_info->bmi[i].mv.as_mv.col
@@ -2687,11 +2736,22 @@
             cpi->MVcount[1][mv_max + ((x->partition_info->bmi[i].second_mv.as_mv.col
                                        - second_best_ref_mv->as_mv.col) >> 1)]++;
           }
+#endif
         }
       }
     }
   } else if (mbmi->mode == NEWMV) {
     if (x->e_mbd.allow_high_precision_mv) {
+#if CONFIG_NEWMVENTROPY
+      mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
+      mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
+      vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 1);
+      if (mbmi->second_ref_frame) {
+        mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row);
+        mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
+        vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 1);
+      }
+#else
       cpi->MVcount_hp[0][mv_max_hp + (mbmi->mv[0].as_mv.row
                                       - best_ref_mv->as_mv.row)]++;
       cpi->MVcount_hp[1][mv_max_hp + (mbmi->mv[0].as_mv.col
@@ -2702,8 +2762,18 @@
         cpi->MVcount_hp[1][mv_max_hp + (mbmi->mv[1].as_mv.col
                                         - second_best_ref_mv->as_mv.col)]++;
       }
-    } else
-    {
+#endif
+    } else {
+#if CONFIG_NEWMVENTROPY
+      mv.row = (mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row);
+      mv.col = (mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col);
+      vp8_increment_nmv(&mv, &best_ref_mv->as_mv, &cpi->NMVcount, 0);
+      if (mbmi->second_ref_frame) {
+        mv.row = (mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row);
+        mv.col = (mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col);
+        vp8_increment_nmv(&mv, &second_best_ref_mv->as_mv, &cpi->NMVcount, 0);
+      }
+#else
       cpi->MVcount[0][mv_max + ((mbmi->mv[0].as_mv.row
                                  - best_ref_mv->as_mv.row) >> 1)]++;
       cpi->MVcount[1][mv_max + ((mbmi->mv[0].as_mv.col
@@ -2714,6 +2784,7 @@
         cpi->MVcount[1][mv_max + ((mbmi->mv[1].as_mv.col
                                    - second_best_ref_mv->as_mv.col) >> 1)]++;
       }
+#endif
     }
   }
 }

diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 035eebb..d57613b 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c

@@ -187,7 +187,8 @@
   // Ignore mv costing by sending NULL pointer instead of cost arrays
   bestsme = vp8_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.as_mv.first,
                            step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16],
-                           NULL, NULL, &best_ref_mv1);
+                           NULLMVCOST, NULLMVCOST,
+                           &best_ref_mv1);
 
 #if ALT_REF_SUBPEL_ENABLED
   // Try sub-pixel MC?
@@ -200,7 +201,8 @@
                                            &best_ref_mv1,
                                            x->errorperbit,
                                            &cpi->fn_ptr[BLOCK_16X16],
-                                           NULL, &distortion, &sse);
+                                           NULLMVCOST,
+                                           &distortion, &sse);
   }
 #endif
commit	00f9eb659034cb5cef0bf063c1b72c72c6333f36	[log] [tgz]
author	Deb Mukherjee <debargha@google.com>	Thu Jul 26 13:42:07 2012 -0700
committer	Deb Mukherjee <debargha@google.com>	Thu Sep 06 08:28:21 2012 -0700
tree	760c659d32debf97ec1c0a6ecb5c9e834be4a915
parent	de6dfa6bb0cd680ca446a15b52b2a026859eb1e6 [diff]