Merge "Call vp10_fwd_txfm_4x4 in encode_inter_mb_segment" into nextgenv2
diff --git a/test/vp10_fht4x4_test.cc b/test/vp10_fht4x4_test.cc
index bee1a0c..a5f64fc 100644
--- a/test/vp10_fht4x4_test.cc
+++ b/test/vp10_fht4x4_test.cc
@@ -34,6 +34,19 @@
   vp10_fht4x4_c(in, out, stride, tx_type);
 }
 
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                              int tx_type, int bd);
+
+typedef std::tr1::tuple<FhtFunc, IhighbdHtFunc, int, vpx_bit_depth_t, int>
+HighbdHt4x4Param;
+
+void highbe_fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
+                       int tx_type) {
+  vp10_highbd_fht4x4_c(in, out, stride, tx_type);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 class VP10Trans4x4HT
     : public libvpx_test::TransformTestBase,
       public ::testing::TestWithParam<Ht4x4Param> {
@@ -69,6 +82,43 @@
   RunCoeffCheck();
 }
 
+#if CONFIG_VP9_HIGHBITDEPTH
+class VP10HighbdTrans4x4HT
+    : public libvpx_test::TransformTestBase,
+      public ::testing::TestWithParam<HighbdHt4x4Param> {
+ public:
+  virtual ~VP10HighbdTrans4x4HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_  = GET_PARAM(2);
+    pitch_    = 4;
+    fwd_txfm_ref = highbe_fht4x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+    num_coeffs_ = GET_PARAM(4);
+  }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_, bit_depth_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhighbdHtFunc inv_txfm_;
+};
+
+TEST_P(VP10HighbdTrans4x4HT, HighbdCoeffCheck) {
+  RunCoeffCheck();
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 using std::tr1::make_tuple;
 
 #if HAVE_SSE2
@@ -106,4 +156,89 @@
 #endif  // !CONFIG_EXT_TX
 #endif  // HAVE_SSE2
 
+#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, VP10HighbdTrans4x4HT,
+    ::testing::Values(
+#if !CONFIG_EXT_TX
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
+                 VPX_BITS_10, 16)));
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
+                 VPX_BITS_12, 16)));
+#else
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 4,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 5,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 6,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 7,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 8,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 10,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 11,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 12,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 13,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 14,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 15,
+                 VPX_BITS_10, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 4,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 5,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 6,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 7,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 8,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 10,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 11,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 12,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 13,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 14,
+                 VPX_BITS_12, 16),
+      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 15,
+                 VPX_BITS_12, 16)));
+#endif  // !CONFIG_EXT_TX
+#endif  // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
+
 }  // namespace
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index de91431..92a0885 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -102,7 +102,7 @@
   PREDICTION_MODE as_mode;
   int_mv as_mv[2];  // first, second inter predictor motion vectors
 #if CONFIG_REF_MV
-  int_mv pred_mv[2];
+  int_mv pred_mv_s8[2];
 #endif
 #if CONFIG_EXT_INTER
   int_mv ref_mv[2];
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c
index 2a8bc78..30d7790 100644
--- a/vp10/common/mvref_common.c
+++ b/vp10/common/mvref_common.c
@@ -811,11 +811,12 @@
     ref_mv_stack[above_count] = tmp_mv;
   }
 
-  for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *ref_mv_count); ++idx) {
-    mv_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
-    clamp_mv_ref(&mv_list[idx].as_mv,
+  for (idx = 0; idx < *ref_mv_count; ++idx)
+    clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv,
                  xd->n8_w << 3, xd->n8_h << 3, xd);
-  }
+
+  for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *ref_mv_count); ++idx)
+    mv_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
 #endif
 
   near_mv->as_int = 0;
diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h
index 76530e9..3a68b4d 100644
--- a/vp10/common/mvref_common.h
+++ b/vp10/common/mvref_common.h
@@ -156,7 +156,7 @@
                                            int search_col, int block_idx) {
   return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
           ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
-              .pred_mv[which_mv]
+              .pred_mv_s8[which_mv]
           : candidate->mbmi.pred_mv[which_mv];
 }
 #endif
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 5b2fa1f..a76d5d8 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -933,7 +933,7 @@
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   BLOCK_SIZE bsize = mbmi->sb_type;
   int_mv *pred_mv = (bsize >= BLOCK_8X8) ?
-      mbmi->pred_mv : xd->mi[0]->bmi[block].pred_mv;
+      mbmi->pred_mv : xd->mi[0]->bmi[block].pred_mv_s8;
 #endif
 
   switch (mode) {
@@ -1366,6 +1366,7 @@
       for (idx = 0; idx < 2; idx += num_4x4_w) {
         int_mv block[2];
         const int j = idy * 2 + idx;
+        int_mv ref_mv_s8[2];
 #if CONFIG_REF_MV
 #if CONFIG_EXT_INTER
         if (!is_compound)
@@ -1423,6 +1424,13 @@
 #endif  // CONFIG_EXT_INTER
         }
 
+        for (ref = 0; ref < 2; ++ref) {
+          ref_mv_s8[ref] = nearestmv[ref];
+        }
+#if CONFIG_EXT_INTER
+        (void)ref_mv_s8;
+#endif
+
         if (!assign_mv(cm, xd, b_mode,
 #if CONFIG_REF_MV
                        j,
@@ -1431,7 +1439,7 @@
 #if CONFIG_EXT_INTER
                        ref_mv[mv_idx],
 #else
-                       nearestmv,
+                       ref_mv_s8,
 #endif  // CONFIG_EXT_INTER
                        nearest_sub8x8, near_sub8x8,
                        is_compound, allow_hp, r)) {
@@ -1451,8 +1459,8 @@
     }
 
 #if CONFIG_REF_MV
-    mbmi->pred_mv[0].as_int = mi->bmi[3].pred_mv[0].as_int;
-    mbmi->pred_mv[1].as_int = mi->bmi[3].pred_mv[1].as_int;
+    mbmi->pred_mv[0].as_int = mi->bmi[3].pred_mv_s8[0].as_int;
+    mbmi->pred_mv[1].as_int = mi->bmi[3].pred_mv_s8[1].as_int;
 #endif
     mi->mbmi.mode = b_mode;
 
@@ -1460,19 +1468,19 @@
     mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
   } else {
     int ref;
+    int_mv ref_mv[2] = { nearestmv[0], nearestmv[1] };
     for (ref = 0; ref < 1 + is_compound && mbmi->mode == NEWMV; ++ref) {
-      int_mv ref_mv = nearestmv[ref];
 #if CONFIG_REF_MV
       uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
       if (xd->ref_mv_count[ref_frame_type] > 1) {
-        ref_mv = (ref == 0) ?
+        ref_mv[ref] = (ref == 0) ?
             xd->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx].this_mv :
             xd->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx].comp_mv;
-        clamp_mv_ref(&ref_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
-        lower_mv_precision(&ref_mv.as_mv, allow_hp);
+        clamp_mv_ref(&ref_mv[ref].as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+        lower_mv_precision(&ref_mv[ref].as_mv, allow_hp);
       }
 #endif
-      nearestmv[ref] = ref_mv;
+      nearestmv[ref] = ref_mv[ref];
     }
 
     xd->corrupted |= !assign_mv(cm, xd, mbmi->mode,
@@ -1484,7 +1492,7 @@
                                 mbmi->mode == NEWFROMNEARMV ?
                                               nearmv : nearestmv,
 #else
-                                nearestmv,
+                                ref_mv,
 #endif  // CONFIG_EXT_INTER
                                 nearestmv, nearmv, is_compound, allow_hp, r);
   }
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index a4a7d9f..c0767dc 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -1186,7 +1186,11 @@
 #if CONFIG_EXT_INTER
                              &mi->bmi[j].ref_mv[ref].as_mv,
 #else
+#if CONFIG_REF_MV
+                             &mi->bmi[j].pred_mv_s8[ref].as_mv,
+#else
                              &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
+#endif  // CONFIG_REF_MV
 #endif  // CONFIG_EXT_INTER
                              nmvc, allow_hp);
             }
diff --git a/vp10/encoder/encodemv.c b/vp10/encoder/encodemv.c
index 7941363..a5bfd1a 100644
--- a/vp10/encoder/encodemv.c
+++ b/vp10/encoder/encodemv.c
@@ -268,6 +268,9 @@
 #if CONFIG_EXT_INTER
 static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
                     const int_mv mvs[2],
+#if CONFIG_REF_MV
+                    const int_mv pred_mvs[2],
+#endif
                     nmv_context_counts *nmv_counts) {
   int i;
   PREDICTION_MODE mode = mbmi->mode;
@@ -285,6 +288,7 @@
       int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
                                  mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
       nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+      (void)pred_mvs;
 #endif
       vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
     }
@@ -363,6 +367,9 @@
 #else
 static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
                     const int_mv mvs[2],
+#if CONFIG_REF_MV
+                    const int_mv pred_mvs[2],
+#endif
                     nmv_context_counts *nmv_counts) {
   int i;
 #if !CONFIG_REF_MV
@@ -374,8 +381,10 @@
     int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
                                mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
-#endif
+    const MV *ref = &pred_mvs[i].as_mv;
+#else
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
+#endif
     const MV diff = {mvs[i].as_mv.row - ref->row,
                      mvs[i].as_mv.col - ref->col};
     vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
@@ -411,6 +420,7 @@
         if (mi->bmi[i].as_mode == NEWMV)
           inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv,
 #if CONFIG_REF_MV
+                  mi->bmi[i].pred_mv_s8,
                   td->counts->mv);
 #else
                   &td->counts->mv);
@@ -426,6 +436,7 @@
 #endif  // CONFIG_EXT_INTER
       inc_mvs(mbmi, mbmi_ext, mbmi->mv,
 #if CONFIG_REF_MV
+              mbmi->pred_mv,
               td->counts->mv);
 #else
               &td->counts->mv);
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index d736751..edf944e 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -4207,15 +4207,13 @@
 
 #if CONFIG_REF_MV
   if (mode == NEWMV) {
-    mic->bmi[i].pred_mv[0].as_int =
-        mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_int;
+    mic->bmi[i].pred_mv_s8[0].as_int = best_ref_mv[0]->as_int;
     if (is_compound)
-      mic->bmi[i].pred_mv[1].as_int =
-          mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_int;
+      mic->bmi[i].pred_mv_s8[1].as_int = best_ref_mv[1]->as_int;
   } else {
-    mic->bmi[i].pred_mv[0].as_int = this_mv[0].as_int;
+    mic->bmi[i].pred_mv_s8[0].as_int = this_mv[0].as_int;
     if (is_compound)
-      mic->bmi[i].pred_mv[1].as_int = this_mv[1].as_int;
+      mic->bmi[i].pred_mv_s8[1].as_int = this_mv[1].as_int;
   }
 #endif
 
@@ -4356,6 +4354,9 @@
   int64_t bsse;
   int64_t brdcost;
   int_mv mvs[2];
+#if CONFIG_REF_MV
+  int_mv pred_mv[2];
+#endif
 #if CONFIG_EXT_INTER
   int_mv ref_mv[2];
 #endif  // CONFIG_EXT_INTER
@@ -4854,6 +4855,12 @@
 #endif  // CONFIG_EXT_INTER
                                       &frame_mv[NEARESTMV][frame],
                                       &frame_mv[NEARMV][frame]);
+
+#if CONFIG_REF_MV
+        if (ref_mv_count[ref] > 0)
+          bsi->ref_mv[ref] = &ref_mv_stack[ref][0].this_mv;
+#endif
+
 #if CONFIG_EXT_INTER
         mv_ref_list[0].as_int = frame_mv[NEARESTMV][frame].as_int;
         mv_ref_list[1].as_int = frame_mv[NEARMV][frame].as_int;
@@ -5139,6 +5146,16 @@
           if (num_4x4_blocks_high > 1)
             bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
                 mode_mv[this_mode][ref].as_int;
+#if CONFIG_REF_MV
+          bsi->rdstat[i][mode_idx].pred_mv[ref].as_int =
+              mi->bmi[i].pred_mv_s8[ref].as_int;
+          if (num_4x4_blocks_wide > 1)
+            bsi->rdstat[i + 1][mode_idx].pred_mv[ref].as_int =
+                mi->bmi[i].pred_mv_s8[ref].as_int;
+          if (num_4x4_blocks_high > 1)
+            bsi->rdstat[i + 2][mode_idx].pred_mv[ref].as_int =
+                mi->bmi[i].pred_mv_s8[ref].as_int;
+#endif
 #if CONFIG_EXT_INTER
           bsi->rdstat[i][mode_idx].ref_mv[ref].as_int =
             bsi->ref_mv[ref]->as_int;
@@ -5313,6 +5330,11 @@
     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
     if (has_second_ref(mbmi))
       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
+#if CONFIG_REF_MV
+    mi->bmi[i].pred_mv_s8[0] = bsi->rdstat[i][mode_idx].pred_mv[0];
+    if (has_second_ref(mbmi))
+      mi->bmi[i].pred_mv_s8[1] = bsi->rdstat[i][mode_idx].pred_mv[1];
+#endif
 #if CONFIG_EXT_INTER
     mi->bmi[i].ref_mv[0].as_int = bsi->rdstat[i][mode_idx].ref_mv[0].as_int;
     if (has_second_rf)
@@ -9642,8 +9664,8 @@
     mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
     mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
 #if CONFIG_REF_MV
-    mbmi->pred_mv[0].as_int = xd->mi[0]->bmi[3].pred_mv[0].as_int;
-    mbmi->pred_mv[1].as_int = xd->mi[0]->bmi[3].pred_mv[1].as_int;
+    mbmi->pred_mv[0].as_int = xd->mi[0]->bmi[3].pred_mv_s8[0].as_int;
+    mbmi->pred_mv[1].as_int = xd->mi[0]->bmi[3].pred_mv_s8[1].as_int;
 #endif
   }
 
diff --git a/vp10/encoder/x86/highbd_fwd_txfm_sse4.c b/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
index 3dc0e56..5fa4fc8 100644
--- a/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
@@ -16,9 +16,6 @@
 #include "vpx_dsp/txfm_common.h"
 #include "vpx_ports/mem.h"
 
-
-#if CONFIG_VP9_HIGHBITDEPTH
-
 static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in,
                                    int stride, int flipud, int fliplr) {
   const __m128i k__nonzero_bias_a = _mm_setr_epi32(0, 1, 1, 1);
@@ -193,13 +190,12 @@
     case FLIPADST_ADST:
       vp10_highbd_fht4x4_c(input, output, stride, tx_type);
       break;
-    case DST_DST:
-    case DCT_DST:
-    case DST_DCT:
-    case DST_ADST:
-    case ADST_DST:
-    case DST_FLIPADST:
-    case FLIPADST_DST:
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
       vp10_highbd_fht4x4_c(input, output, stride, tx_type);
       break;
 #endif  // CONFIG_EXT_TX
@@ -207,5 +203,3 @@
       assert(0);
   }
 }
-
-#endif  // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk
index 8b2d0d7..810005c 100644
--- a/vp10/vp10cx.mk
+++ b/vp10/vp10cx.mk
@@ -112,7 +112,9 @@
 
 VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.c
 VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
 VP10_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c
+endif
 
 ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
 VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoiser_sse2.c