Merge "Rework motion vector precision limit" into nextgenv2
diff --git a/test/vp10_ans_test.cc b/test/vp10_ans_test.cc
index 9c93dd8..20aedba 100644
--- a/test/vp10_ans_test.cc
+++ b/test/vp10_ans_test.cc
@@ -148,23 +148,25 @@
   return okay;
 }
 
+// TODO(aconverse): replace this with a more representative distribution from
+// the codec.
 const rans_sym rans_sym_tab[] = {
-    {16, 0}, {100, 16}, {70, 116}, {70, 186},
+    {16 * 4, 0 * 4}, {100 * 4, 16 * 4}, {70 * 4, 116 *4}, {70 * 4, 186 *4},
 };
 const int kDistinctSyms = sizeof(rans_sym_tab) / sizeof(rans_sym_tab[0]);
 
 std::vector<int> ans_encode_build_vals(const rans_sym *tab, int iters) {
   std::vector<int> p_to_sym;
   int i = 0;
-  while (p_to_sym.size() < 256) {
+  while (p_to_sym.size() < rans_precision) {
     p_to_sym.insert(p_to_sym.end(), tab[i].prob, i);
     ++i;
   }
-  assert(p_to_sym.size() == 256);
+  assert(p_to_sym.size() == rans_precision);
   std::vector<int> ret;
   libvpx_test::ACMRandom gen(18543637);
   for (int i = 0; i < iters; ++i) {
-    int sym = p_to_sym[gen.Rand8()];
+    int sym = p_to_sym[gen.Rand8() * 4];
     ret.push_back(sym);
   }
   return ret;
@@ -173,7 +175,7 @@
 void rans_build_dec_tab(const struct rans_sym sym_tab[],
                         rans_dec_lut dec_tab) {
   dec_tab[0] = 0;
-  for (int i = 1; dec_tab[i - 1] < ans_p8_precision; ++i) {
+  for (int i = 1; dec_tab[i - 1] < rans_precision; ++i) {
     dec_tab[i] = dec_tab[i - 1] + sym_tab[i - 1].prob;
   }
 }
@@ -229,10 +231,10 @@
  *        -sym2  -sym3
  */
 void tab2tree(const rans_sym *tab, int tab_size, vpx_prob *treep) {
-  const unsigned basep = 256;
+  const unsigned basep = rans_precision;
   unsigned pleft = basep;
   for (int i = 0; i < tab_size - 1; ++i) {
-    unsigned prob = (tab[i].prob * basep + (basep / 2)) / pleft;
+    unsigned prob = (tab[i].prob * basep + basep * 2) / (pleft * 4);
     assert(prob > 0 && prob < 256);
     treep[i] = prob;
     pleft -= tab[i].prob;
diff --git a/test/vp10_dct_test.cc b/test/vp10_dct_test.cc
index b2c301a..8cf034f 100644
--- a/test/vp10_dct_test.cc
+++ b/test/vp10_dct_test.cc
@@ -26,7 +26,6 @@
 
 namespace {
 void reference_dct_1d(const double *in, double *out, int size) {
-  const double PI = 3.141592653589793238462643383279502884;
   const double kInvSqrt2 = 0.707106781186547524400844362104;
   for (int k = 0; k < size; ++k) {
     out[k] = 0;
diff --git a/test/vp10_fht4x4_test.cc b/test/vp10_fht4x4_test.cc
index 63d9ec7..9daf063 100644
--- a/test/vp10_fht4x4_test.cc
+++ b/test/vp10_fht4x4_test.cc
@@ -25,9 +25,9 @@
 namespace {
 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         int tx_type);
-
+using std::tr1::tuple;
 using libvpx_test::FhtFunc;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t, int> Ht4x4Param;
+typedef tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t, int> Ht4x4Param;
 
 void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
                 int tx_type) {
@@ -37,13 +37,14 @@
 #if CONFIG_VP9_HIGHBITDEPTH
 typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                               int tx_type, int bd);
+typedef void (*HBDFhtFunc)(const int16_t *input, int32_t *output, int stride,
+                        int tx_type, int bd);
+// Target optimized function, tx_type, bit depth
+typedef tuple<HBDFhtFunc, int, int> HighbdHt4x4Param;
 
-typedef std::tr1::tuple<FhtFunc, IhighbdHtFunc, int, vpx_bit_depth_t, int>
-HighbdHt4x4Param;
-
-void highbe_fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                       int tx_type) {
-  vp10_highbd_fht4x4_c(in, out, stride, tx_type);
+void highbe_fht4x4_ref(const int16_t *in, int32_t *out, int stride,
+                       int tx_type, int bd) {
+  vp10_fwd_txfm2d_4x4_c(in, out, stride, tx_type, bd);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
@@ -83,98 +84,76 @@
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-class VP10HighbdTrans4x4HT
-    : public libvpx_test::TransformTestBase,
-      public ::testing::TestWithParam<HighbdHt4x4Param> {
+class VP10HighbdTrans4x4HT : public ::testing::TestWithParam<HighbdHt4x4Param> {
  public:
   virtual ~VP10HighbdTrans4x4HT() {}
 
   virtual void SetUp() {
     fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
-    pitch_    = 4;
-    fwd_txfm_ref = highbe_fht4x4_ref;
-    bit_depth_ = GET_PARAM(3);
+    fwd_txfm_ref_ = highbe_fht4x4_ref;
+    tx_type_  = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
     mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
+    num_coeffs_ = 16;
+
+    input_ = reinterpret_cast<int16_t *>
+       (vpx_memalign(16, sizeof(int16_t) * num_coeffs_));
+    output_ = reinterpret_cast<int32_t *>
+        (vpx_memalign(16, sizeof(int32_t) * num_coeffs_));
+    output_ref_ = reinterpret_cast<int32_t *>
+        (vpx_memalign(16, sizeof(int32_t) * num_coeffs_));
   }
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+  virtual void TearDown() {
+    vpx_free(input_);
+    vpx_free(output_);
+    vpx_free(output_ref_);
+    libvpx_test::ClearSystemState();
+  }
 
  protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, tx_type_);
-  }
+  void RunBitexactCheck();
 
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, tx_type_, bit_depth_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhighbdHtFunc inv_txfm_;
+ private:
+  HBDFhtFunc fwd_txfm_;
+  HBDFhtFunc fwd_txfm_ref_;
+  int tx_type_;
+  int bit_depth_;
+  int mask_;
+  int num_coeffs_;
+  int16_t *input_;
+  int32_t *output_;
+  int32_t *output_ref_;
 };
 
+void VP10HighbdTrans4x4HT::RunBitexactCheck() {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int i, j;
+  const int stride = 4;
+  const int num_tests = 200000;
+  const int num_coeffs = 16;
+
+  for (i = 0; i < num_tests; ++i) {
+    for (j = 0; j < num_coeffs; ++j) {
+      input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+    }
+
+    fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
+    fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_);
+
+    for (j = 0; j < num_coeffs; ++j) {
+      EXPECT_EQ(output_[j], output_ref_[j])
+          << "Not bit-exact result at index: " << j
+          << " at test block: " << i;
+    }
+  }
+}
+
 TEST_P(VP10HighbdTrans4x4HT, HighbdCoeffCheck) {
-  RunCoeffCheck();
+  RunBitexactCheck();
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-#define SPEED_TEST (0)
-#if SPEED_TEST
-#if CONFIG_EXT_TX
-TEST(VP10Trans4x4HTSpeedTest, C_version) {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 200000;
-    int bit_depth = 8;
-    int mask = (1 << bit_depth) - 1;
-    const int num_coeffs = 16;
-    int16_t *input = new int16_t[num_coeffs];
-    tran_low_t *output = new tran_low_t[num_coeffs];
-    const int stride = 4;
-    int tx_type;
-
-    for (int j = 0; j < num_coeffs; ++j) {
-      input[j] = (rnd.Rand8() & mask) - (rnd.Rand8() & mask);
-    }
-    for (int i = 0; i < count_test_block; ++i) {
-      for (tx_type = V_DCT; tx_type <= H_FLIPADST; ++tx_type) {
-        vp10_fht4x4_c(input, output, stride, tx_type);
-      }
-    }
-
-    delete[] input;
-    delete[] output;
-}
-#endif  // CONFIG_EXT_TX
-
-#if HAVE_SSE2 && CONFIG_EXT_TX
-TEST(VP10Trans4x4HTSpeedTest, SSE2_version) {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 200000;
-    int bit_depth = 8;
-    int mask = (1 << bit_depth) - 1;
-    const int num_coeffs = 16;
-    int16_t *input = new int16_t[num_coeffs];
-    tran_low_t *output = reinterpret_cast<tran_low_t *>(
-        vpx_memalign(16, num_coeffs * sizeof(tran_low_t)));
-    const int stride = 4;
-    int tx_type;
-
-    for (int j = 0; j < num_coeffs; ++j) {
-      input[j] = (rnd.Rand8() & mask) - (rnd.Rand8() & mask);
-    }
-    for (int i = 0; i < count_test_block; ++i) {
-      for (tx_type = V_DCT; tx_type <= H_FLIPADST; ++tx_type) {
-        vp10_fht4x4_sse2(input, output, stride, tx_type);
-      }
-    }
-
-    delete[] input;
-    vpx_free(output);
-}
-#endif  // HAVE_SSE2 && CONFIG_EXT_TX
-#endif  // SPEED_TEST
-
 using std::tr1::make_tuple;
 
 #if HAVE_SSE2
@@ -229,83 +208,23 @@
     SSE4_1, VP10HighbdTrans4x4HT,
     ::testing::Values(
 #if !CONFIG_EXT_TX
-      // make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
-      //            VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
-                 VPX_BITS_10, 16),
-      // make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
-      //            VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
-                 VPX_BITS_12, 16)));
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 0, 10),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 0, 12),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 1, 10),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 1, 12),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 2, 10),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 2, 12),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 10),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 12)));
 #else
-      // make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
-      //            VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 4,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 5,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 6,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 7,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 8,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 10,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 11,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 12,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 13,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 14,
-                 VPX_BITS_10, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 15,
-                 VPX_BITS_10, 16),
-      // make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 0,
-      //            VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 1,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 2,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 3,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 4,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 5,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 6,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 7,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 8,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 10,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 11,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 12,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 13,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 14,
-                 VPX_BITS_12, 16),
-      make_tuple(&vp10_highbd_fht4x4_sse4_1, &vp10_highbd_iht4x4_16_add_c, 15,
-                 VPX_BITS_12, 16)));
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 0, 10),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 0, 12),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 1, 10),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 1, 12),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 2, 10),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 2, 12),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 10),
+         make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 12)));
 #endif  // !CONFIG_EXT_TX
 #endif  // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
 
diff --git a/test/vp10_inv_txfm_test.cc b/test/vp10_inv_txfm_test.cc
index 6c0a3d2..e37b906 100644
--- a/test/vp10_inv_txfm_test.cc
+++ b/test/vp10_inv_txfm_test.cc
@@ -28,7 +28,6 @@
 using libvpx_test::ACMRandom;
 
 namespace {
-const double PI = 3.141592653589793238462643383279502884;
 const double kInvSqrt2 = 0.707106781186547524400844362104;
 
 void reference_idct_1d(const double *in, double *out, int size) {
diff --git a/vp10/common/ans.h b/vp10/common/ans.h
index 32babee..24d7c09 100644
--- a/vp10/common/ans.h
+++ b/vp10/common/ans.h
@@ -58,7 +58,12 @@
 typedef uint8_t AnsP8;
 #define ans_p8_precision 256u
 #define ans_p8_shift 8
-#define l_base (ans_p8_precision * 4)  // l_base % precision must be 0
+typedef uint16_t AnsP10;
+#define ans_p10_precision 1024u
+
+#define rans_precision ans_p10_precision
+
+#define l_base (ans_p10_precision * 4)  // l_base % precision must be 0
 #define io_base 256
 // Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 }
 
@@ -75,14 +80,17 @@
   assert(ans->state < l_base * io_base);
   state = ans->state - l_base;
   if (state < (1 << 6)) {
-    ans->buf[ans->buf_offset] = (0 << 6) + state;
+    ans->buf[ans->buf_offset] = (0x00 << 6) + state;
     return ans->buf_offset + 1;
   } else if (state < (1 << 14)) {
-    mem_put_le16(ans->buf + ans->buf_offset, (1 << 14) + state);
+    mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state);
     return ans->buf_offset + 2;
-  } else {
-    mem_put_le24(ans->buf + ans->buf_offset, (1 << 23) + state);
+  } else if (state < (1 << 22)) {
+    mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state);
     return ans->buf_offset + 3;
+  } else {
+    assert(0 && "State is too large to be serialized");
+    return ans->buf_offset;
   }
 }
 
@@ -189,7 +197,7 @@
 static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
   AnsP8 p = ans_p8_precision - p0;
   const unsigned l_s = val ? p : p0;
-  if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+  while (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
     ans->buf[ans->buf_offset++] = ans->state % io_base;
     ans->state /= io_base;
   }
@@ -205,7 +213,7 @@
   // unsigned int xp1;
   unsigned xp, sp;
   unsigned state = ans->state;
-  if (state < l_base && ans->buf_offset > 0) {
+  while (state < l_base && ans->buf_offset > 0) {
     state = state * io_base + ans->buf[--ans->buf_offset];
   }
   sp = state * p;
@@ -223,7 +231,7 @@
 static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
   int s;
   unsigned state = ans->state;
-  if (state < l_base && ans->buf_offset > 0) {
+  while (state < l_base && ans->buf_offset > 0) {
     state = state * io_base + ans->buf[--ans->buf_offset];
   }
   s = (int)(state & 1);
@@ -256,31 +264,31 @@
 }
 
 struct rans_sym {
-  AnsP8 prob;
-  AnsP8 cum_prob;  // not-inclusive
+  AnsP10 prob;
+  AnsP10 cum_prob;  // not-inclusive
 };
 
 struct rans_dec_sym {
   uint8_t val;
-  AnsP8 prob;
-  AnsP8 cum_prob;  // not-inclusive
+  AnsP10 prob;
+  AnsP10 cum_prob;  // not-inclusive
 };
 
 // This is now just a boring cdf. It starts with an explicit zero.
 // TODO(aconverse): Remove starting zero.
 typedef uint16_t rans_dec_lut[16];
 
-static INLINE void rans_build_cdf_from_pdf(const AnsP8 token_probs[],
+static INLINE void rans_build_cdf_from_pdf(const AnsP10 token_probs[],
                                            rans_dec_lut cdf_tab) {
   int i;
   cdf_tab[0] = 0;
-  for (i = 1; cdf_tab[i - 1] < ans_p8_precision; ++i) {
+  for (i = 1; cdf_tab[i - 1] < rans_precision; ++i) {
     cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
   }
-  assert(cdf_tab[i - 1] == ans_p8_precision);
+  assert(cdf_tab[i - 1] == rans_precision);
 }
 
-static INLINE int ans_find_largest(const AnsP8 *const pdf_tab,
+static INLINE int ans_find_largest(const AnsP10 *const pdf_tab,
                                    int num_syms) {
   int largest_idx = -1;
   int largest_p = -1;
@@ -295,22 +303,22 @@
   return largest_idx;
 }
 
-static INLINE void rans_merge_prob_pdf(AnsP8 *const out_pdf,
-                                       const AnsP8 node_prob,
-                                       const AnsP8 *const src_pdf,
-                                       int in_syms) {
+static INLINE void rans_merge_prob8_pdf(AnsP10 *const out_pdf,
+                                        const AnsP8 node_prob,
+                                        const AnsP10 *const src_pdf,
+                                        int in_syms) {
   int i;
-  int adjustment = ans_p8_precision;
+  int adjustment = rans_precision;
   const int round_fact = ans_p8_precision >> 1;
   const AnsP8 p1 = ans_p8_precision - node_prob;
   const int out_syms = in_syms + 1;
   assert(src_pdf != out_pdf);
 
-  out_pdf[0] = node_prob;
-  adjustment -= node_prob;
+  out_pdf[0] = node_prob << (10 - 8);
+  adjustment -= out_pdf[0];
   for (i = 0; i < in_syms; ++i) {
     int p = (p1 * src_pdf[i] + round_fact) >> ans_p8_shift;
-    p = VPXMIN(p, (int)ans_p8_precision - in_syms);
+    p = VPXMIN(p, (int)rans_precision - in_syms);
     p = VPXMAX(p, 1);
     out_pdf[i + 1] = p;
     adjustment -= p;
@@ -332,20 +340,20 @@
 
 // rANS with normalization
 // sym->prob takes the place of l_s from the paper
-// ans_p8_precision is m
+// ans_p10_precision is m
 static INLINE void rans_write(struct AnsCoder *ans,
                               const struct rans_sym *const sym) {
-  const AnsP8 p = sym->prob;
-  if (ans->state >= l_base / ans_p8_precision * io_base * p) {
+  const AnsP10 p = sym->prob;
+  while (ans->state >= l_base / rans_precision * io_base * p) {
     ans->buf[ans->buf_offset++] = ans->state % io_base;
     ans->state /= io_base;
   }
   ans->state =
-      (ans->state / p) * ans_p8_precision + ans->state % p + sym->cum_prob;
+      (ans->state / p) * rans_precision + ans->state % p + sym->cum_prob;
 }
 
 static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf,
-                             AnsP8 rem) {
+                             AnsP10 rem) {
   int i = 0;
   // TODO(skal): if critical, could be a binary search.
   // Or, better, an O(1) alias-table.
@@ -353,8 +361,8 @@
     ++i;
   }
   out->val = i - 1;
-  out->prob = (AnsP8)(cdf[i] - cdf[i - 1]);
-  out->cum_prob = (AnsP8)cdf[i - 1];
+  out->prob = (AnsP10)(cdf[i] - cdf[i - 1]);
+  out->cum_prob = (AnsP10)cdf[i - 1];
 }
 
 static INLINE int rans_read(struct AnsDecoder *ans,
@@ -362,11 +370,11 @@
   unsigned rem;
   unsigned quo;
   struct rans_dec_sym sym;
-  if (ans->state < l_base && ans->buf_offset > 0) {
+  while (ans->state < l_base && ans->buf_offset > 0) {
     ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
   }
-  quo = ans->state / ans_p8_precision;
-  rem = ans->state % ans_p8_precision;
+  quo = ans->state / rans_precision;
+  rem = ans->state % rans_precision;
   fetch_sym(&sym, tab, rem);
   ans->state = quo * sym.prob + rem - sym.cum_prob;
   return sym.val;
diff --git a/vp10/common/blockd.c b/vp10/common/blockd.c
index 5f45077..7339493 100644
--- a/vp10/common/blockd.c
+++ b/vp10/common/blockd.c
@@ -9,6 +9,9 @@
  */
 
 #include <math.h>
+
+#include "vpx_ports/system_state.h"
+
 #include "vp10/common/blockd.h"
 
 PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
@@ -137,21 +140,90 @@
 }
 
 #if CONFIG_EXT_INTRA
-#define PI 3.14159265
+// If angle > 0 && angle < 90, dx = -((int)(256 / t)), dy = 1;
+// If angle > 90 && angle < 180, dx = (int)(256 / t), dy = (int)(256 * t);
+// If angle > 180 && angle < 270, dx = 1, dy = -((int)(256 * t));
+const int16_t dr_intra_derivative[270][2] = {
+    {     1,     1 }, { -14666,    1 }, { -7330,     1 }, { -4884,     1 },
+    { -3660,     1 }, { -2926,     1 }, { -2435,     1 }, { -2084,     1 },
+    { -1821,     1 }, { -1616,     1 }, { -1451,     1 }, { -1317,     1 },
+    { -1204,     1 }, { -1108,     1 }, { -1026,     1 }, {  -955,     1 },
+    {  -892,     1 }, {  -837,     1 }, {  -787,     1 }, {  -743,     1 },
+    {  -703,     1 }, {  -666,     1 }, {  -633,     1 }, {  -603,     1 },
+    {  -574,     1 }, {  -548,     1 }, {  -524,     1 }, {  -502,     1 },
+    {  -481,     1 }, {  -461,     1 }, {  -443,     1 }, {  -426,     1 },
+    {  -409,     1 }, {  -394,     1 }, {  -379,     1 }, {  -365,     1 },
+    {  -352,     1 }, {  -339,     1 }, {  -327,     1 }, {  -316,     1 },
+    {  -305,     1 }, {  -294,     1 }, {  -284,     1 }, {  -274,     1 },
+    {  -265,     1 }, {  -256,     1 }, {  -247,     1 }, {  -238,     1 },
+    {  -230,     1 }, {  -222,     1 }, {  -214,     1 }, {  -207,     1 },
+    {  -200,     1 }, {  -192,     1 }, {  -185,     1 }, {  -179,     1 },
+    {  -172,     1 }, {  -166,     1 }, {  -159,     1 }, {  -153,     1 },
+    {  -147,     1 }, {  -141,     1 }, {  -136,     1 }, {  -130,     1 },
+    {  -124,     1 }, {  -119,     1 }, {  -113,     1 }, {  -108,     1 },
+    {  -103,     1 }, {   -98,     1 }, {   -93,     1 }, {   -88,     1 },
+    {   -83,     1 }, {   -78,     1 }, {   -73,     1 }, {   -68,     1 },
+    {   -63,     1 }, {   -59,     1 }, {   -54,     1 }, {   -49,     1 },
+    {   -45,     1 }, {   -40,     1 }, {   -35,     1 }, {   -31,     1 },
+    {   -26,     1 }, {   -22,     1 }, {   -17,     1 }, {   -13,     1 },
+    {    -8,     1 }, {    -4,     1 }, {     1,     1 }, {     4, 14666 },
+    {     8,  7330 }, {    13,  4884 }, {    17,  3660 }, {    22,  2926 },
+    {    26,  2435 }, {    31,  2084 }, {    35,  1821 }, {    40,  1616 },
+    {    45,  1451 }, {    49,  1317 }, {    54,  1204 }, {    59,  1108 },
+    {    63,  1026 }, {    68,   955 }, {    73,   892 }, {    78,   837 },
+    {    83,   787 }, {    88,   743 }, {    93,   703 }, {    98,   666 },
+    {   103,   633 }, {   108,   603 }, {   113,   574 }, {   119,   548 },
+    {   124,   524 }, {   130,   502 }, {   136,   481 }, {   141,   461 },
+    {   147,   443 }, {   153,   426 }, {   159,   409 }, {   166,   394 },
+    {   172,   379 }, {   179,   365 }, {   185,   352 }, {   192,   339 },
+    {   200,   327 }, {   207,   316 }, {   214,   305 }, {   222,   294 },
+    {   230,   284 }, {   238,   274 }, {   247,   265 }, {   255,   256 },
+    {   265,   247 }, {   274,   238 }, {   284,   230 }, {   294,   222 },
+    {   305,   214 }, {   316,   207 }, {   327,   200 }, {   339,   192 },
+    {   352,   185 }, {   365,   179 }, {   379,   172 }, {   394,   166 },
+    {   409,   159 }, {   426,   153 }, {   443,   147 }, {   461,   141 },
+    {   481,   136 }, {   502,   130 }, {   524,   124 }, {   548,   119 },
+    {   574,   113 }, {   603,   108 }, {   633,   103 }, {   666,    98 },
+    {   703,    93 }, {   743,    88 }, {   787,    83 }, {   837,    78 },
+    {   892,    73 }, {   955,    68 }, {  1026,    63 }, {  1108,    59 },
+    {  1204,    54 }, {  1317,    49 }, {  1451,    45 }, {  1616,    40 },
+    {  1821,    35 }, {  2084,    31 }, {  2435,    26 }, {  2926,    22 },
+    {  3660,    17 }, {  4884,    13 }, {  7330,     8 }, { 14666,     4 },
+    {     1,     1 }, {     1,    -4 }, {     1,    -8 }, {     1,   -13 },
+    {     1,   -17 }, {     1,   -22 }, {     1,   -26 }, {     1,   -31 },
+    {     1,   -35 }, {     1,   -40 }, {     1,   -45 }, {     1,   -49 },
+    {     1,   -54 }, {     1,   -59 }, {     1,   -63 }, {     1,   -68 },
+    {     1,   -73 }, {     1,   -78 }, {     1,   -83 }, {     1,   -88 },
+    {     1,   -93 }, {     1,   -98 }, {     1,  -103 }, {     1,  -108 },
+    {     1,  -113 }, {     1,  -119 }, {     1,  -124 }, {     1,  -130 },
+    {     1,  -136 }, {     1,  -141 }, {     1,  -147 }, {     1,  -153 },
+    {     1,  -159 }, {     1,  -166 }, {     1,  -172 }, {     1,  -179 },
+    {     1,  -185 }, {     1,  -192 }, {     1,  -200 }, {     1,  -207 },
+    {     1,  -214 }, {     1,  -222 }, {     1,  -230 }, {     1,  -238 },
+    {     1,  -247 }, {     1,  -255 }, {     1,  -265 }, {     1,  -274 },
+    {     1,  -284 }, {     1,  -294 }, {     1,  -305 }, {     1,  -316 },
+    {     1,  -327 }, {     1,  -339 }, {     1,  -352 }, {     1,  -365 },
+    {     1,  -379 }, {     1,  -394 }, {     1,  -409 }, {     1,  -426 },
+    {     1,  -443 }, {     1,  -461 }, {     1,  -481 }, {     1,  -502 },
+    {     1,  -524 }, {     1,  -548 }, {     1,  -574 }, {     1,  -603 },
+    {     1,  -633 }, {     1,  -666 }, {     1,  -703 }, {     1,  -743 },
+    {     1,  -787 }, {     1,  -837 }, {     1,  -892 }, {     1,  -955 },
+    {     1, -1026 }, {     1, -1108 }, {     1, -1204 }, {     1, -1317 },
+    {     1, -1451 }, {     1, -1616 }, {     1, -1821 }, {     1, -2084 },
+    {     1, -2435 }, {     1, -2926 }, {     1, -3660 }, {     1, -4884 },
+    {     1, -7330 }, {     1, -14666 },
+};
+
 // Returns whether filter selection is needed for a given
 // intra prediction angle.
 int pick_intra_filter(int angle) {
+  assert(angle > 0 && angle < 270);
   if (angle % 45 == 0)
     return 0;
   if (angle > 90 && angle < 180) {
     return 1;
   } else {
-    double t = tan(angle * PI / 180.0);
-    double n;
-    if (angle < 90)
-      t = 1 / t;
-    n = floor(t);
-    return (t - n) * 1024 > 1;
+    return ((-(dr_intra_derivative[angle][angle > 180])) & 0xFF) > 0;
   }
 }
 #endif  // CONFIG_EXT_INTRA
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index fbcba09..b6eeee5 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -483,6 +483,8 @@
 #define ANGLE_SKIP_THRESH 0.10
 #define FILTER_FAST_SEARCH 1
 
+extern const int16_t dr_intra_derivative[270][2];
+
 static uint8_t mode_to_angle_map[INTRA_MODES] = {
     0, 90, 180, 45, 135, 111, 157, 203, 67, 0,
 };
diff --git a/vp10/common/common.h b/vp10/common/common.h
index fb5634a..80801aa 100644
--- a/vp10/common/common.h
+++ b/vp10/common/common.h
@@ -25,6 +25,8 @@
 extern "C" {
 #endif
 
+#define PI 3.141592653589793238462643383279502884
+
 // Only need this for fixed-size arrays, for structs just assign.
 #define vp10_copy(dest, src) {            \
     assert(sizeof(dest) == sizeof(src)); \
diff --git a/vp10/common/entropy.c b/vp10/common/entropy.c
index d17b854..20594df 100644
--- a/vp10/common/entropy.c
+++ b/vp10/common/entropy.c
@@ -417,263 +417,263 @@
 // beta = 8
 // Values for tokens ONE_TOKEN through CATEGORY6_TOKEN included here.
 // ZERO_TOKEN and EOB_TOKEN are coded as flags outside this coder.
-const vpx_prob vp10_pareto8_token_probs[COEFF_PROB_MODELS]
-                                       [ENTROPY_TOKENS - 2] = {
-  {1, 1, 1, 1, 2, 4, 8, 14, 26, 198},
-  {2, 2, 2, 2, 4, 7, 14, 26, 42, 155},
-  {3, 3, 3, 3, 6, 11, 20, 34, 51, 122},
-  {4, 4, 4, 4, 7, 14, 25, 41, 56, 97},
-  {5, 5, 5, 5, 9, 17, 30, 46, 58, 76},
-  {6, 6, 6, 5, 11, 20, 34, 50, 57, 61},
-  {7, 7, 7, 6, 12, 22, 37, 53, 56, 49},
-  {8, 8, 7, 7, 14, 25, 40, 54, 53, 40},
-  {9, 9, 8, 8, 15, 27, 43, 55, 50, 32},
-  {10, 10, 9, 9, 16, 29, 45, 55, 47, 26},
-  {11, 10, 10, 10, 18, 31, 47, 55, 43, 21},
-  {12, 11, 11, 10, 19, 32, 48, 55, 40, 18},
-  {13, 12, 12, 11, 20, 34, 49, 54, 37, 14},
-  {14, 13, 12, 12, 21, 35, 50, 53, 34, 12},
-  {15, 14, 13, 12, 22, 37, 51, 51, 31, 10},
-  {16, 15, 14, 13, 23, 38, 51, 50, 28, 8},
-  {17, 16, 15, 13, 24, 39, 51, 48, 26, 7},
-  {18, 17, 15, 14, 25, 40, 52, 46, 23, 6},
-  {19, 17, 16, 15, 26, 41, 51, 45, 21, 5},
-  {20, 18, 17, 15, 27, 42, 51, 43, 19, 4},
-  {21, 19, 17, 16, 28, 42, 51, 41, 18, 3},
-  {22, 20, 18, 16, 28, 43, 51, 39, 16, 3},
-  {23, 21, 19, 17, 29, 43, 50, 37, 14, 3},
-  {24, 22, 19, 17, 30, 44, 49, 36, 13, 2},
-  {25, 22, 20, 18, 30, 44, 49, 34, 12, 2},
-  {26, 23, 20, 18, 31, 44, 48, 33, 11, 2},
-  {27, 24, 21, 19, 31, 45, 47, 31, 10, 1},
-  {28, 25, 22, 19, 32, 45, 46, 29, 9, 1},
-  {29, 25, 22, 20, 32, 45, 46, 28, 8, 1},
-  {30, 26, 23, 20, 33, 45, 45, 26, 7, 1},
-  {31, 27, 23, 20, 33, 45, 44, 25, 7, 1},
-  {32, 27, 24, 21, 33, 45, 43, 24, 6, 1},
-  {33, 28, 24, 21, 34, 44, 42, 23, 6, 1},
-  {34, 29, 25, 21, 34, 44, 41, 22, 5, 1},
-  {35, 30, 25, 22, 34, 44, 40, 20, 5, 1},
-  {36, 30, 26, 22, 35, 44, 39, 19, 4, 1},
-  {37, 31, 26, 22, 35, 44, 38, 18, 4, 1},
-  {38, 32, 27, 22, 35, 43, 37, 17, 4, 1},
-  {39, 33, 27, 23, 35, 43, 36, 16, 3, 1},
-  {40, 33, 27, 23, 35, 43, 35, 16, 3, 1},
-  {41, 34, 28, 23, 35, 42, 34, 15, 3, 1},
-  {42, 35, 28, 23, 36, 42, 33, 14, 2, 1},
-  {43, 35, 29, 24, 35, 42, 32, 13, 2, 1},
-  {44, 36, 29, 24, 36, 41, 31, 12, 2, 1},
-  {45, 36, 29, 24, 36, 41, 30, 12, 2, 1},
-  {46, 37, 30, 24, 35, 40, 30, 11, 2, 1},
-  {47, 37, 30, 24, 36, 40, 29, 10, 2, 1},
-  {48, 38, 30, 24, 36, 40, 28, 10, 1, 1},
-  {49, 39, 31, 24, 36, 39, 27, 9, 1, 1},
-  {50, 39, 31, 25, 35, 39, 26, 9, 1, 1},
-  {51, 40, 31, 25, 36, 38, 25, 8, 1, 1},
-  {52, 40, 31, 25, 35, 38, 25, 8, 1, 1},
-  {53, 41, 32, 25, 35, 37, 24, 7, 1, 1},
-  {54, 41, 32, 25, 35, 37, 23, 7, 1, 1},
-  {55, 42, 32, 25, 35, 36, 22, 7, 1, 1},
-  {56, 42, 33, 25, 35, 35, 22, 6, 1, 1},
-  {57, 43, 33, 25, 34, 35, 21, 6, 1, 1},
-  {58, 43, 33, 25, 35, 34, 20, 6, 1, 1},
-  {59, 44, 33, 25, 34, 34, 20, 5, 1, 1},
-  {60, 45, 33, 25, 34, 33, 19, 5, 1, 1},
-  {61, 45, 33, 25, 34, 33, 18, 5, 1, 1},
-  {62, 45, 34, 25, 34, 32, 18, 4, 1, 1},
-  {63, 46, 34, 25, 33, 32, 17, 4, 1, 1},
-  {64, 46, 34, 25, 33, 31, 17, 4, 1, 1},
-  {65, 47, 34, 25, 33, 30, 16, 4, 1, 1},
-  {66, 47, 34, 25, 33, 30, 15, 4, 1, 1},
-  {67, 48, 34, 25, 33, 29, 15, 3, 1, 1},
-  {68, 48, 35, 25, 32, 29, 14, 3, 1, 1},
-  {69, 48, 35, 25, 32, 28, 14, 3, 1, 1},
-  {70, 49, 35, 25, 32, 27, 13, 3, 1, 1},
-  {71, 49, 35, 25, 31, 27, 13, 3, 1, 1},
-  {72, 49, 35, 25, 31, 27, 12, 3, 1, 1},
-  {73, 50, 35, 25, 31, 26, 12, 2, 1, 1},
-  {74, 50, 35, 25, 31, 25, 12, 2, 1, 1},
-  {75, 51, 35, 25, 30, 25, 11, 2, 1, 1},
-  {76, 51, 35, 25, 30, 24, 11, 2, 1, 1},
-  {77, 51, 35, 25, 30, 24, 10, 2, 1, 1},
-  {78, 52, 35, 24, 29, 24, 10, 2, 1, 1},
-  {79, 52, 35, 24, 29, 23, 10, 2, 1, 1},
-  {80, 52, 35, 24, 29, 23, 9, 2, 1, 1},
-  {81, 53, 35, 24, 28, 22, 9, 2, 1, 1},
-  {82, 53, 35, 24, 28, 22, 9, 1, 1, 1},
-  {83, 54, 35, 24, 28, 21, 8, 1, 1, 1},
-  {84, 54, 35, 24, 27, 21, 8, 1, 1, 1},
-  {85, 54, 35, 24, 27, 20, 8, 1, 1, 1},
-  {86, 54, 35, 24, 27, 20, 7, 1, 1, 1},
-  {87, 55, 35, 23, 27, 19, 7, 1, 1, 1},
-  {88, 55, 35, 23, 26, 19, 7, 1, 1, 1},
-  {89, 55, 35, 23, 26, 18, 7, 1, 1, 1},
-  {90, 55, 35, 23, 26, 18, 6, 1, 1, 1},
-  {91, 56, 35, 23, 25, 17, 6, 1, 1, 1},
-  {92, 56, 35, 22, 25, 17, 6, 1, 1, 1},
-  {93, 56, 35, 22, 24, 17, 6, 1, 1, 1},
-  {94, 57, 35, 22, 24, 16, 5, 1, 1, 1},
-  {95, 56, 35, 22, 24, 16, 5, 1, 1, 1},
-  {96, 57, 35, 22, 23, 15, 5, 1, 1, 1},
-  {97, 56, 35, 22, 23, 15, 5, 1, 1, 1},
-  {98, 57, 34, 21, 23, 15, 5, 1, 1, 1},
-  {99, 57, 35, 21, 23, 14, 4, 1, 1, 1},
-  {100, 58, 34, 21, 22, 14, 4, 1, 1, 1},
-  {101, 57, 34, 21, 22, 14, 4, 1, 1, 1},
-  {102, 58, 34, 21, 21, 13, 4, 1, 1, 1},
-  {103, 57, 34, 21, 21, 13, 4, 1, 1, 1},
-  {104, 57, 34, 20, 21, 13, 4, 1, 1, 1},
-  {105, 58, 34, 20, 20, 12, 4, 1, 1, 1},
-  {106, 58, 34, 20, 20, 12, 3, 1, 1, 1},
-  {107, 58, 33, 20, 20, 12, 3, 1, 1, 1},
-  {108, 59, 33, 20, 19, 11, 3, 1, 1, 1},
-  {109, 59, 33, 19, 19, 11, 3, 1, 1, 1},
-  {110, 58, 33, 19, 19, 11, 3, 1, 1, 1},
-  {111, 59, 33, 19, 18, 10, 3, 1, 1, 1},
-  {112, 58, 33, 19, 18, 10, 3, 1, 1, 1},
-  {113, 58, 32, 19, 18, 10, 3, 1, 1, 1},
-  {114, 59, 32, 18, 18, 10, 2, 1, 1, 1},
-  {115, 60, 32, 18, 17, 9, 2, 1, 1, 1},
-  {116, 59, 32, 18, 17, 9, 2, 1, 1, 1},
-  {117, 59, 32, 18, 16, 9, 2, 1, 1, 1},
-  {118, 59, 31, 18, 16, 9, 2, 1, 1, 1},
-  {119, 59, 32, 17, 16, 8, 2, 1, 1, 1},
-  {120, 59, 31, 17, 16, 8, 2, 1, 1, 1},
-  {121, 59, 31, 17, 15, 8, 2, 1, 1, 1},
-  {122, 59, 30, 17, 15, 8, 2, 1, 1, 1},
-  {123, 59, 30, 17, 15, 7, 2, 1, 1, 1},
-  {124, 59, 30, 16, 15, 7, 2, 1, 1, 1},
-  {125, 59, 30, 16, 14, 7, 2, 1, 1, 1},
-  {126, 59, 30, 16, 14, 7, 1, 1, 1, 1},
-  {127, 59, 30, 16, 14, 6, 1, 1, 1, 1},
-  {128, 59, 30, 16, 13, 6, 1, 1, 1, 1},
-  {129, 59, 30, 15, 13, 6, 1, 1, 1, 1},
-  {130, 59, 29, 15, 13, 6, 1, 1, 1, 1},
-  {131, 59, 29, 15, 12, 6, 1, 1, 1, 1},
-  {132, 59, 28, 15, 12, 6, 1, 1, 1, 1},
-  {133, 59, 28, 15, 12, 5, 1, 1, 1, 1},
-  {134, 59, 28, 14, 12, 5, 1, 1, 1, 1},
-  {135, 59, 28, 14, 11, 5, 1, 1, 1, 1},
-  {136, 58, 28, 14, 11, 5, 1, 1, 1, 1},
-  {137, 58, 27, 14, 11, 5, 1, 1, 1, 1},
-  {138, 58, 27, 13, 11, 5, 1, 1, 1, 1},
-  {139, 58, 27, 13, 11, 4, 1, 1, 1, 1},
-  {140, 58, 27, 13, 10, 4, 1, 1, 1, 1},
-  {141, 58, 26, 13, 10, 4, 1, 1, 1, 1},
-  {142, 57, 26, 13, 10, 4, 1, 1, 1, 1},
-  {143, 57, 26, 12, 10, 4, 1, 1, 1, 1},
-  {144, 57, 26, 12, 9, 4, 1, 1, 1, 1},
-  {145, 57, 25, 12, 9, 4, 1, 1, 1, 1},
-  {146, 57, 25, 12, 9, 3, 1, 1, 1, 1},
-  {147, 57, 25, 11, 9, 3, 1, 1, 1, 1},
-  {148, 57, 25, 11, 8, 3, 1, 1, 1, 1},
-  {149, 57, 24, 11, 8, 3, 1, 1, 1, 1},
-  {150, 56, 24, 11, 8, 3, 1, 1, 1, 1},
-  {151, 56, 23, 11, 8, 3, 1, 1, 1, 1},
-  {152, 56, 23, 10, 8, 3, 1, 1, 1, 1},
-  {153, 56, 23, 10, 7, 3, 1, 1, 1, 1},
-  {154, 55, 23, 10, 7, 3, 1, 1, 1, 1},
-  {155, 55, 22, 10, 7, 3, 1, 1, 1, 1},
-  {156, 55, 22, 10, 7, 2, 1, 1, 1, 1},
-  {157, 54, 22, 10, 7, 2, 1, 1, 1, 1},
-  {158, 54, 22, 9, 7, 2, 1, 1, 1, 1},
-  {159, 55, 21, 9, 6, 2, 1, 1, 1, 1},
-  {160, 54, 21, 9, 6, 2, 1, 1, 1, 1},
-  {161, 53, 21, 9, 6, 2, 1, 1, 1, 1},
-  {162, 53, 20, 9, 6, 2, 1, 1, 1, 1},
-  {163, 53, 20, 8, 6, 2, 1, 1, 1, 1},
-  {164, 53, 20, 8, 5, 2, 1, 1, 1, 1},
-  {165, 52, 20, 8, 5, 2, 1, 1, 1, 1},
-  {166, 52, 19, 8, 5, 2, 1, 1, 1, 1},
-  {167, 51, 19, 8, 5, 2, 1, 1, 1, 1},
-  {168, 51, 19, 7, 5, 2, 1, 1, 1, 1},
-  {169, 51, 19, 7, 5, 1, 1, 1, 1, 1},
-  {170, 51, 18, 7, 5, 1, 1, 1, 1, 1},
-  {171, 51, 18, 7, 4, 1, 1, 1, 1, 1},
-  {172, 50, 18, 7, 4, 1, 1, 1, 1, 1},
-  {173, 50, 17, 7, 4, 1, 1, 1, 1, 1},
-  {174, 49, 17, 7, 4, 1, 1, 1, 1, 1},
-  {175, 49, 17, 6, 4, 1, 1, 1, 1, 1},
-  {176, 49, 16, 6, 4, 1, 1, 1, 1, 1},
-  {177, 48, 16, 6, 4, 1, 1, 1, 1, 1},
-  {178, 47, 16, 6, 4, 1, 1, 1, 1, 1},
-  {179, 47, 16, 6, 3, 1, 1, 1, 1, 1},
-  {180, 47, 15, 6, 3, 1, 1, 1, 1, 1},
-  {181, 47, 15, 5, 3, 1, 1, 1, 1, 1},
-  {182, 46, 15, 5, 3, 1, 1, 1, 1, 1},
-  {183, 46, 14, 5, 3, 1, 1, 1, 1, 1},
-  {184, 45, 14, 5, 3, 1, 1, 1, 1, 1},
-  {185, 44, 14, 5, 3, 1, 1, 1, 1, 1},
-  {186, 44, 13, 5, 3, 1, 1, 1, 1, 1},
-  {187, 43, 13, 5, 3, 1, 1, 1, 1, 1},
-  {188, 44, 13, 4, 2, 1, 1, 1, 1, 1},
-  {189, 43, 13, 4, 2, 1, 1, 1, 1, 1},
-  {190, 43, 12, 4, 2, 1, 1, 1, 1, 1},
-  {191, 42, 12, 4, 2, 1, 1, 1, 1, 1},
-  {192, 41, 12, 4, 2, 1, 1, 1, 1, 1},
-  {193, 41, 11, 4, 2, 1, 1, 1, 1, 1},
-  {194, 40, 11, 4, 2, 1, 1, 1, 1, 1},
-  {195, 39, 11, 4, 2, 1, 1, 1, 1, 1},
-  {196, 39, 11, 3, 2, 1, 1, 1, 1, 1},
-  {197, 39, 10, 3, 2, 1, 1, 1, 1, 1},
-  {198, 38, 10, 3, 2, 1, 1, 1, 1, 1},
-  {199, 37, 10, 3, 2, 1, 1, 1, 1, 1},
-  {200, 37, 10, 3, 1, 1, 1, 1, 1, 1},
-  {201, 37, 9, 3, 1, 1, 1, 1, 1, 1},
-  {202, 36, 9, 3, 1, 1, 1, 1, 1, 1},
-  {203, 35, 9, 3, 1, 1, 1, 1, 1, 1},
-  {204, 35, 8, 3, 1, 1, 1, 1, 1, 1},
-  {205, 35, 8, 2, 1, 1, 1, 1, 1, 1},
-  {206, 34, 8, 2, 1, 1, 1, 1, 1, 1},
-  {207, 33, 8, 2, 1, 1, 1, 1, 1, 1},
-  {208, 32, 8, 2, 1, 1, 1, 1, 1, 1},
-  {209, 32, 7, 2, 1, 1, 1, 1, 1, 1},
-  {210, 31, 7, 2, 1, 1, 1, 1, 1, 1},
-  {211, 30, 7, 2, 1, 1, 1, 1, 1, 1},
-  {212, 30, 6, 2, 1, 1, 1, 1, 1, 1},
-  {213, 29, 6, 2, 1, 1, 1, 1, 1, 1},
-  {214, 28, 6, 2, 1, 1, 1, 1, 1, 1},
-  {215, 27, 6, 2, 1, 1, 1, 1, 1, 1},
-  {216, 27, 6, 1, 1, 1, 1, 1, 1, 1},
-  {217, 27, 5, 1, 1, 1, 1, 1, 1, 1},
-  {218, 26, 5, 1, 1, 1, 1, 1, 1, 1},
-  {219, 25, 5, 1, 1, 1, 1, 1, 1, 1},
-  {220, 24, 5, 1, 1, 1, 1, 1, 1, 1},
-  {221, 24, 4, 1, 1, 1, 1, 1, 1, 1},
-  {222, 23, 4, 1, 1, 1, 1, 1, 1, 1},
-  {223, 22, 4, 1, 1, 1, 1, 1, 1, 1},
-  {224, 21, 4, 1, 1, 1, 1, 1, 1, 1},
-  {225, 20, 4, 1, 1, 1, 1, 1, 1, 1},
-  {226, 20, 3, 1, 1, 1, 1, 1, 1, 1},
-  {227, 19, 3, 1, 1, 1, 1, 1, 1, 1},
-  {228, 18, 3, 1, 1, 1, 1, 1, 1, 1},
-  {229, 17, 3, 1, 1, 1, 1, 1, 1, 1},
-  {230, 16, 3, 1, 1, 1, 1, 1, 1, 1},
-  {231, 16, 2, 1, 1, 1, 1, 1, 1, 1},
-  {232, 15, 2, 1, 1, 1, 1, 1, 1, 1},
-  {233, 14, 2, 1, 1, 1, 1, 1, 1, 1},
-  {234, 13, 2, 1, 1, 1, 1, 1, 1, 1},
-  {235, 12, 2, 1, 1, 1, 1, 1, 1, 1},
-  {236, 11, 2, 1, 1, 1, 1, 1, 1, 1},
-  {237, 11, 1, 1, 1, 1, 1, 1, 1, 1},
-  {238, 10, 1, 1, 1, 1, 1, 1, 1, 1},
-  {239, 9, 1, 1, 1, 1, 1, 1, 1, 1},
-  {240, 8, 1, 1, 1, 1, 1, 1, 1, 1},
-  {241, 7, 1, 1, 1, 1, 1, 1, 1, 1},
-  {242, 6, 1, 1, 1, 1, 1, 1, 1, 1},
-  {243, 5, 1, 1, 1, 1, 1, 1, 1, 1},
-  {244, 4, 1, 1, 1, 1, 1, 1, 1, 1},
-  {245, 3, 1, 1, 1, 1, 1, 1, 1, 1},
-  {246, 2, 1, 1, 1, 1, 1, 1, 1, 1},
-  {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-  {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+const AnsP10 vp10_pareto8_token_probs[COEFF_PROB_MODELS]
+                                     [ENTROPY_TOKENS - 2] = {
+{ 4, 4, 4, 4, 8, 15, 30, 57, 103, 795 },
+{ 8, 8, 8, 8, 15, 30, 57, 103, 168, 619 },
+{ 12, 12, 12, 12, 23, 43, 80, 138, 205, 487 },
+{ 16, 16, 15, 15, 30, 56, 101, 165, 225, 385 },
+{ 20, 20, 19, 19, 36, 68, 119, 186, 231, 306 },
+{ 24, 23, 23, 22, 43, 79, 135, 201, 230, 244 },
+{ 28, 27, 26, 26, 49, 89, 149, 211, 223, 196 },
+{ 32, 31, 30, 29, 55, 98, 160, 218, 212, 159 },
+{ 36, 35, 33, 32, 60, 107, 171, 221, 200, 129 },
+{ 40, 38, 37, 35, 66, 115, 179, 222, 187, 105 },
+{ 44, 42, 40, 38, 71, 122, 186, 221, 174, 86 },
+{ 48, 45, 43, 41, 76, 129, 192, 219, 160, 71 },
+{ 52, 49, 46, 44, 80, 136, 196, 215, 148, 58 },
+{ 56, 53, 49, 46, 85, 142, 200, 210, 135, 48 },
+{ 60, 56, 52, 49, 89, 147, 203, 204, 124, 40 },
+{ 64, 60, 55, 52, 93, 151, 205, 198, 113, 33 },
+{ 68, 63, 58, 54, 97, 156, 205, 192, 103, 28 },
+{ 72, 66, 61, 57, 100, 160, 206, 185, 94, 23 },
+{ 76, 70, 64, 59, 104, 163, 205, 178, 85, 20 },
+{ 80, 73, 67, 61, 107, 166, 205, 171, 77, 17 },
+{ 84, 76, 69, 63, 110, 169, 204, 164, 71, 14 },
+{ 88, 80, 72, 65, 113, 171, 202, 157, 64, 12 },
+{ 92, 83, 75, 67, 116, 173, 200, 150, 58, 10 },
+{ 96, 86, 77, 69, 118, 175, 198, 143, 53, 9 },
+{ 100, 89, 80, 71, 121, 176, 195, 137, 48, 7 },
+{ 104, 92, 82, 73, 123, 178, 192, 130, 44, 6 },
+{ 108, 96, 84, 75, 125, 178, 189, 124, 40, 5 },
+{ 112, 98, 87, 76, 127, 179, 186, 118, 36, 5 },
+{ 116, 101, 89, 78, 129, 179, 183, 112, 33, 4 },
+{ 120, 104, 91, 80, 131, 180, 179, 106, 30, 3 },
+{ 124, 107, 93, 81, 132, 180, 176, 101, 27, 3 },
+{ 128, 110, 95, 82, 134, 179, 172, 96, 25, 3 },
+{ 132, 113, 97, 84, 135, 179, 168, 91, 23, 2 },
+{ 136, 116, 99, 85, 136, 179, 164, 86, 21, 2 },
+{ 140, 119, 101, 86, 137, 178, 160, 82, 19, 2 },
+{ 144, 122, 103, 88, 138, 177, 157, 77, 17, 1 },
+{ 148, 124, 105, 89, 139, 176, 153, 73, 16, 1 },
+{ 152, 127, 107, 90, 140, 175, 149, 69, 14, 1 },
+{ 156, 130, 108, 91, 141, 173, 145, 66, 13, 1 },
+{ 160, 133, 110, 92, 141, 172, 141, 62, 12, 1 },
+{ 164, 135, 111, 93, 142, 171, 137, 59, 11, 1 },
+{ 168, 138, 113, 94, 142, 169, 133, 56, 10, 1 },
+{ 172, 140, 115, 94, 142, 168, 130, 53, 9, 1 },
+{ 176, 143, 116, 95, 143, 166, 126, 50, 8, 1 },
+{ 180, 145, 118, 96, 143, 164, 122, 47, 8, 1 },
+{ 184, 147, 119, 96, 143, 163, 119, 45, 7, 1 },
+{ 188, 150, 120, 97, 143, 161, 116, 42, 6, 1 },
+{ 192, 152, 121, 98, 143, 159, 112, 40, 6, 1 },
+{ 196, 155, 123, 98, 142, 157, 109, 38, 5, 1 },
+{ 200, 157, 124, 99, 142, 155, 105, 36, 5, 1 },
+{ 204, 159, 125, 99, 142, 153, 102, 34, 5, 1 },
+{ 208, 161, 126, 100, 142, 151, 99, 32, 4, 1 },
+{ 212, 164, 127, 100, 141, 149, 96, 30, 4, 1 },
+{ 216, 166, 129, 100, 141, 147, 93, 28, 3, 1 },
+{ 220, 168, 130, 101, 140, 144, 90, 27, 3, 1 },
+{ 224, 170, 131, 101, 140, 142, 87, 25, 3, 1 },
+{ 228, 172, 132, 101, 139, 140, 84, 24, 3, 1 },
+{ 232, 174, 132, 101, 139, 138, 81, 23, 3, 1 },
+{ 236, 176, 133, 101, 138, 136, 79, 22, 2, 1 },
+{ 240, 178, 134, 102, 137, 134, 76, 20, 2, 1 },
+{ 244, 180, 135, 102, 136, 131, 74, 19, 2, 1 },
+{ 248, 182, 135, 102, 136, 129, 71, 18, 2, 1 },
+{ 252, 184, 136, 101, 135, 127, 69, 17, 2, 1 },
+{ 256, 186, 137, 102, 134, 124, 66, 16, 2, 1 },
+{ 260, 188, 138, 102, 133, 122, 64, 15, 1, 1 },
+{ 264, 190, 138, 101, 132, 120, 62, 15, 1, 1 },
+{ 268, 191, 139, 101, 131, 118, 60, 14, 1, 1 },
+{ 272, 193, 139, 101, 130, 116, 58, 13, 1, 1 },
+{ 276, 195, 139, 101, 129, 114, 56, 12, 1, 1 },
+{ 280, 196, 140, 101, 128, 111, 54, 12, 1, 1 },
+{ 284, 198, 140, 101, 127, 109, 52, 11, 1, 1 },
+{ 288, 200, 141, 100, 126, 107, 50, 10, 1, 1 },
+{ 292, 201, 141, 100, 125, 105, 48, 10, 1, 1 },
+{ 296, 203, 141, 100, 123, 103, 47, 9, 1, 1 },
+{ 300, 204, 142, 99, 122, 101, 45, 9, 1, 1 },
+{ 304, 206, 142, 99, 121, 99, 43, 8, 1, 1 },
+{ 308, 207, 142, 99, 119, 97, 42, 8, 1, 1 },
+{ 312, 209, 142, 99, 118, 95, 40, 7, 1, 1 },
+{ 316, 210, 142, 98, 117, 93, 39, 7, 1, 1 },
+{ 320, 211, 142, 98, 116, 91, 37, 7, 1, 1 },
+{ 324, 213, 142, 97, 115, 89, 36, 6, 1, 1 },
+{ 328, 214, 142, 97, 113, 87, 35, 6, 1, 1 },
+{ 332, 215, 143, 96, 112, 85, 33, 6, 1, 1 },
+{ 336, 216, 143, 96, 111, 83, 32, 5, 1, 1 },
+{ 340, 218, 143, 95, 109, 81, 31, 5, 1, 1 },
+{ 344, 219, 142, 95, 108, 79, 30, 5, 1, 1 },
+{ 348, 220, 142, 94, 107, 78, 29, 4, 1, 1 },
+{ 352, 221, 142, 94, 105, 76, 28, 4, 1, 1 },
+{ 356, 222, 142, 93, 104, 74, 27, 4, 1, 1 },
+{ 360, 223, 142, 92, 103, 72, 26, 4, 1, 1 },
+{ 364, 224, 142, 92, 101, 70, 25, 4, 1, 1 },
+{ 368, 225, 142, 91, 100, 69, 24, 3, 1, 1 },
+{ 372, 226, 141, 91, 99, 67, 23, 3, 1, 1 },
+{ 376, 227, 141, 90, 97, 66, 22, 3, 1, 1 },
+{ 380, 228, 141, 89, 96, 64, 21, 3, 1, 1 },
+{ 384, 229, 140, 89, 95, 62, 20, 3, 1, 1 },
+{ 388, 229, 140, 88, 93, 61, 20, 3, 1, 1 },
+{ 392, 230, 140, 87, 92, 60, 19, 2, 1, 1 },
+{ 396, 231, 140, 86, 91, 58, 18, 2, 1, 1 },
+{ 400, 232, 139, 86, 89, 57, 17, 2, 1, 1 },
+{ 404, 232, 139, 85, 88, 55, 17, 2, 1, 1 },
+{ 408, 233, 138, 84, 87, 54, 16, 2, 1, 1 },
+{ 412, 234, 138, 84, 85, 52, 15, 2, 1, 1 },
+{ 416, 234, 137, 83, 84, 51, 15, 2, 1, 1 },
+{ 420, 235, 137, 82, 82, 50, 14, 2, 1, 1 },
+{ 424, 236, 136, 81, 81, 48, 14, 2, 1, 1 },
+{ 428, 236, 136, 81, 80, 47, 13, 1, 1, 1 },
+{ 432, 236, 135, 80, 79, 46, 13, 1, 1, 1 },
+{ 436, 237, 135, 79, 77, 45, 12, 1, 1, 1 },
+{ 440, 238, 134, 78, 76, 43, 12, 1, 1, 1 },
+{ 444, 238, 134, 77, 75, 42, 11, 1, 1, 1 },
+{ 448, 238, 133, 77, 73, 41, 11, 1, 1, 1 },
+{ 452, 239, 132, 76, 72, 40, 10, 1, 1, 1 },
+{ 456, 239, 131, 75, 71, 39, 10, 1, 1, 1 },
+{ 460, 239, 131, 74, 70, 38, 9, 1, 1, 1 },
+{ 464, 240, 130, 73, 68, 37, 9, 1, 1, 1 },
+{ 468, 240, 129, 72, 67, 36, 9, 1, 1, 1 },
+{ 472, 240, 128, 72, 66, 35, 8, 1, 1, 1 },
+{ 476, 240, 127, 71, 65, 34, 8, 1, 1, 1 },
+{ 480, 240, 127, 70, 63, 33, 8, 1, 1, 1 },
+{ 484, 241, 126, 69, 62, 32, 7, 1, 1, 1 },
+{ 488, 241, 125, 68, 61, 31, 7, 1, 1, 1 },
+{ 492, 241, 124, 67, 60, 30, 7, 1, 1, 1 },
+{ 496, 241, 124, 66, 59, 29, 6, 1, 1, 1 },
+{ 500, 240, 123, 66, 58, 28, 6, 1, 1, 1 },
+{ 504, 240, 122, 65, 57, 27, 6, 1, 1, 1 },
+{ 508, 240, 121, 64, 55, 27, 6, 1, 1, 1 },
+{ 512, 241, 120, 63, 54, 26, 5, 1, 1, 1 },
+{ 516, 241, 119, 62, 53, 25, 5, 1, 1, 1 },
+{ 520, 240, 118, 62, 52, 24, 5, 1, 1, 1 },
+{ 524, 240, 117, 60, 51, 24, 5, 1, 1, 1 },
+{ 528, 239, 116, 60, 50, 23, 5, 1, 1, 1 },
+{ 532, 239, 116, 59, 49, 22, 4, 1, 1, 1 },
+{ 536, 239, 115, 58, 48, 21, 4, 1, 1, 1 },
+{ 540, 239, 113, 57, 47, 21, 4, 1, 1, 1 },
+{ 544, 238, 113, 56, 46, 20, 4, 1, 1, 1 },
+{ 548, 238, 112, 55, 45, 19, 4, 1, 1, 1 },
+{ 552, 238, 110, 55, 44, 19, 3, 1, 1, 1 },
+{ 556, 237, 110, 54, 43, 18, 3, 1, 1, 1 },
+{ 560, 237, 108, 53, 42, 18, 3, 1, 1, 1 },
+{ 564, 236, 108, 52, 41, 17, 3, 1, 1, 1 },
+{ 568, 236, 106, 51, 40, 17, 3, 1, 1, 1 },
+{ 572, 235, 105, 51, 39, 16, 3, 1, 1, 1 },
+{ 576, 235, 104, 50, 38, 15, 3, 1, 1, 1 },
+{ 580, 234, 103, 49, 37, 15, 3, 1, 1, 1 },
+{ 584, 234, 102, 48, 37, 14, 2, 1, 1, 1 },
+{ 588, 233, 101, 47, 36, 14, 2, 1, 1, 1 },
+{ 592, 233, 100, 46, 35, 13, 2, 1, 1, 1 },
+{ 596, 231, 99, 46, 34, 13, 2, 1, 1, 1 },
+{ 600, 230, 98, 45, 33, 13, 2, 1, 1, 1 },
+{ 604, 230, 97, 44, 32, 12, 2, 1, 1, 1 },
+{ 608, 229, 96, 43, 31, 12, 2, 1, 1, 1 },
+{ 612, 228, 95, 42, 31, 11, 2, 1, 1, 1 },
+{ 616, 227, 93, 42, 30, 11, 2, 1, 1, 1 },
+{ 620, 227, 92, 41, 29, 10, 2, 1, 1, 1 },
+{ 624, 226, 92, 40, 28, 10, 1, 1, 1, 1 },
+{ 628, 225, 90, 39, 28, 10, 1, 1, 1, 1 },
+{ 632, 224, 89, 39, 27, 9, 1, 1, 1, 1 },
+{ 636, 223, 88, 38, 26, 9, 1, 1, 1, 1 },
+{ 640, 222, 87, 37, 25, 9, 1, 1, 1, 1 },
+{ 644, 221, 86, 36, 25, 8, 1, 1, 1, 1 },
+{ 648, 220, 84, 36, 24, 8, 1, 1, 1, 1 },
+{ 652, 219, 83, 35, 23, 8, 1, 1, 1, 1 },
+{ 656, 218, 82, 34, 23, 7, 1, 1, 1, 1 },
+{ 660, 217, 81, 33, 22, 7, 1, 1, 1, 1 },
+{ 664, 215, 80, 33, 21, 7, 1, 1, 1, 1 },
+{ 668, 214, 78, 32, 21, 7, 1, 1, 1, 1 },
+{ 672, 213, 78, 31, 20, 6, 1, 1, 1, 1 },
+{ 676, 211, 76, 31, 20, 6, 1, 1, 1, 1 },
+{ 680, 210, 75, 30, 19, 6, 1, 1, 1, 1 },
+{ 684, 209, 74, 29, 18, 6, 1, 1, 1, 1 },
+{ 688, 208, 73, 28, 18, 5, 1, 1, 1, 1 },
+{ 692, 206, 72, 28, 17, 5, 1, 1, 1, 1 },
+{ 696, 205, 70, 27, 17, 5, 1, 1, 1, 1 },
+{ 700, 203, 69, 27, 16, 5, 1, 1, 1, 1 },
+{ 704, 201, 68, 26, 16, 5, 1, 1, 1, 1 },
+{ 708, 201, 67, 25, 15, 4, 1, 1, 1, 1 },
+{ 712, 198, 66, 25, 15, 4, 1, 1, 1, 1 },
+{ 716, 197, 65, 24, 14, 4, 1, 1, 1, 1 },
+{ 720, 196, 63, 23, 14, 4, 1, 1, 1, 1 },
+{ 724, 194, 62, 23, 13, 4, 1, 1, 1, 1 },
+{ 728, 193, 61, 22, 13, 3, 1, 1, 1, 1 },
+{ 732, 191, 60, 22, 12, 3, 1, 1, 1, 1 },
+{ 736, 189, 59, 21, 12, 3, 1, 1, 1, 1 },
+{ 740, 188, 58, 20, 11, 3, 1, 1, 1, 1 },
+{ 744, 186, 56, 20, 11, 3, 1, 1, 1, 1 },
+{ 748, 184, 55, 19, 11, 3, 1, 1, 1, 1 },
+{ 752, 182, 54, 19, 10, 3, 1, 1, 1, 1 },
+{ 756, 181, 53, 18, 10, 2, 1, 1, 1, 1 },
+{ 760, 179, 52, 18, 9, 2, 1, 1, 1, 1 },
+{ 764, 177, 51, 17, 9, 2, 1, 1, 1, 1 },
+{ 768, 174, 50, 17, 9, 2, 1, 1, 1, 1 },
+{ 772, 173, 49, 16, 8, 2, 1, 1, 1, 1 },
+{ 776, 171, 47, 16, 8, 2, 1, 1, 1, 1 },
+{ 780, 169, 46, 15, 8, 2, 1, 1, 1, 1 },
+{ 784, 167, 45, 15, 7, 2, 1, 1, 1, 1 },
+{ 788, 165, 44, 14, 7, 2, 1, 1, 1, 1 },
+{ 792, 162, 43, 14, 7, 2, 1, 1, 1, 1 },
+{ 796, 161, 42, 13, 7, 1, 1, 1, 1, 1 },
+{ 800, 159, 41, 13, 6, 1, 1, 1, 1, 1 },
+{ 804, 157, 40, 12, 6, 1, 1, 1, 1, 1 },
+{ 808, 154, 39, 12, 6, 1, 1, 1, 1, 1 },
+{ 812, 153, 38, 11, 5, 1, 1, 1, 1, 1 },
+{ 816, 150, 37, 11, 5, 1, 1, 1, 1, 1 },
+{ 820, 148, 36, 10, 5, 1, 1, 1, 1, 1 },
+{ 824, 145, 35, 10, 5, 1, 1, 1, 1, 1 },
+{ 828, 143, 34, 10, 4, 1, 1, 1, 1, 1 },
+{ 832, 141, 33, 9, 4, 1, 1, 1, 1, 1 },
+{ 836, 138, 32, 9, 4, 1, 1, 1, 1, 1 },
+{ 840, 136, 30, 9, 4, 1, 1, 1, 1, 1 },
+{ 844, 133, 30, 8, 4, 1, 1, 1, 1, 1 },
+{ 848, 131, 29, 8, 3, 1, 1, 1, 1, 1 },
+{ 852, 129, 28, 7, 3, 1, 1, 1, 1, 1 },
+{ 856, 126, 27, 7, 3, 1, 1, 1, 1, 1 },
+{ 860, 123, 26, 7, 3, 1, 1, 1, 1, 1 },
+{ 864, 121, 25, 6, 3, 1, 1, 1, 1, 1 },
+{ 868, 118, 24, 6, 3, 1, 1, 1, 1, 1 },
+{ 872, 116, 23, 6, 2, 1, 1, 1, 1, 1 },
+{ 876, 113, 22, 6, 2, 1, 1, 1, 1, 1 },
+{ 880, 111, 21, 5, 2, 1, 1, 1, 1, 1 },
+{ 884, 108, 20, 5, 2, 1, 1, 1, 1, 1 },
+{ 888, 105, 19, 5, 2, 1, 1, 1, 1, 1 },
+{ 892, 102, 19, 4, 2, 1, 1, 1, 1, 1 },
+{ 896, 99, 18, 4, 2, 1, 1, 1, 1, 1 },
+{ 900, 97, 17, 4, 1, 1, 1, 1, 1, 1 },
+{ 904, 94, 16, 4, 1, 1, 1, 1, 1, 1 },
+{ 908, 92, 15, 3, 1, 1, 1, 1, 1, 1 },
+{ 912, 89, 14, 3, 1, 1, 1, 1, 1, 1 },
+{ 916, 85, 14, 3, 1, 1, 1, 1, 1, 1 },
+{ 920, 82, 13, 3, 1, 1, 1, 1, 1, 1 },
+{ 924, 79, 12, 3, 1, 1, 1, 1, 1, 1 },
+{ 928, 77, 11, 2, 1, 1, 1, 1, 1, 1 },
+{ 932, 73, 11, 2, 1, 1, 1, 1, 1, 1 },
+{ 936, 70, 10, 2, 1, 1, 1, 1, 1, 1 },
+{ 940, 67, 9, 2, 1, 1, 1, 1, 1, 1 },
+{ 944, 64, 8, 2, 1, 1, 1, 1, 1, 1 },
+{ 948, 60, 8, 2, 1, 1, 1, 1, 1, 1 },
+{ 952, 58, 7, 1, 1, 1, 1, 1, 1, 1 },
+{ 956, 54, 7, 1, 1, 1, 1, 1, 1, 1 },
+{ 960, 51, 6, 1, 1, 1, 1, 1, 1, 1 },
+{ 964, 48, 5, 1, 1, 1, 1, 1, 1, 1 },
+{ 968, 44, 5, 1, 1, 1, 1, 1, 1, 1 },
+{ 972, 41, 4, 1, 1, 1, 1, 1, 1, 1 },
+{ 976, 37, 4, 1, 1, 1, 1, 1, 1, 1 },
+{ 980, 34, 3, 1, 1, 1, 1, 1, 1, 1 },
+{ 984, 30, 3, 1, 1, 1, 1, 1, 1, 1 },
+{ 988, 27, 2, 1, 1, 1, 1, 1, 1, 1 },
+{ 992, 23, 2, 1, 1, 1, 1, 1, 1, 1 },
+{ 996, 19, 2, 1, 1, 1, 1, 1, 1, 1 },
+{ 1000, 16, 1, 1, 1, 1, 1, 1, 1, 1 },
+{ 1004, 12, 1, 1, 1, 1, 1, 1, 1, 1 },
+{ 1008, 8, 1, 1, 1, 1, 1, 1, 1, 1 },
+{ 1012, 4, 1, 1, 1, 1, 1, 1, 1, 1 },
+{ 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+{ 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
 };
 #endif  // CONFIG_ANS
 
@@ -2800,11 +2800,13 @@
 
 #if CONFIG_ANS
 void vp10_build_token_cdfs(const vpx_prob *pdf_model, rans_dec_lut cdf) {
-  AnsP8 pdf_tab[ENTROPY_TOKENS - 1];
+  AnsP10 pdf_tab[ENTROPY_TOKENS - 1];
   assert(pdf_model[2] != 0);
-  rans_merge_prob_pdf(pdf_tab, pdf_model[1],
-                      vp10_pareto8_token_probs[pdf_model[2] - 1],
-                      ENTROPY_TOKENS - 2);
+  // TODO(aconverse): Investigate making the precision of the zero and EOB tree
+  // nodes 10-bits.
+  rans_merge_prob8_pdf(pdf_tab, pdf_model[1],
+                       vp10_pareto8_token_probs[pdf_model[2] - 1],
+                       ENTROPY_TOKENS - 2);
   rans_build_cdf_from_pdf(pdf_tab, cdf);
 }
 
diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h
index 1e47883..34da525 100644
--- a/vp10/common/entropy.h
+++ b/vp10/common/entropy.h
@@ -176,7 +176,7 @@
 extern const vpx_tree_index vp10_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
 extern const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
 #if CONFIG_ANS
-extern const vpx_prob
+extern const AnsP10
     vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2];
 
 typedef rans_dec_lut coeff_cdf_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS];
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index bafd0d6..ef046e9 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -12,6 +12,7 @@
 
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
+#include "vpx_ports/system_state.h"
 
 #if CONFIG_VP9_HIGHBITDEPTH
 #include "vpx_dsp/vpx_dsp_common.h"
@@ -389,7 +390,6 @@
 }
 
 #if CONFIG_EXT_INTRA
-#define PI 3.14159265
 #define FILTER_INTRA_PREC_BITS 10
 #define FILTER_INTRA_ROUND_VAL 511
 
@@ -672,24 +672,16 @@
 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
                          const uint8_t *above, const uint8_t *left, int angle,
                          INTRA_FILTER filter_type) {
-  double t = 0;
-  int dx, dy;
-  int bs = 4 << tx_size;
+  const int dx = (int)dr_intra_derivative[angle][0];
+  const int dy = (int)dr_intra_derivative[angle][1];
+  const int bs = 4 << tx_size;
+  assert(angle > 0 && angle < 270);
 
-  if (angle != 90 && angle != 180)
-    t = tan(angle * PI / 180.0);
   if (angle > 0 && angle < 90) {
-    dx = -((int)(256 / t));
-    dy = 1;
     dr_prediction_z1(dst, stride, bs, above, left, dx, dy, filter_type);
   } else if (angle > 90 && angle < 180) {
-    t = -t;
-    dx = (int)(256 / t);
-    dy = (int)(256 * t);
     dr_prediction_z2(dst, stride, bs, above, left, dx, dy, filter_type);
   } else if (angle > 180 && angle < 270) {
-    dx = 1;
-    dy = -((int)(256 * t));
     dr_prediction_z3(dst, stride, bs, above, left, dx, dy, filter_type);
   } else if (angle == 90) {
     pred[V_PRED][tx_size](dst, stride, above, left);
@@ -1008,23 +1000,15 @@
 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
                                 const uint16_t *above, const uint16_t *left,
                                 int angle, int bd, INTRA_FILTER filter) {
-  double t = 0;
-  int dx, dy;
+  const int dx = (int)dr_intra_derivative[angle][0];
+  const int dy = (int)dr_intra_derivative[angle][1];
+  assert(angle > 0 && angle < 270);
 
-  if (angle != 90 && angle != 180)
-    t = tan(angle * PI / 180.0);
   if (angle > 0 && angle < 90) {
-    dx = -((int)(256 / t));
-    dy = 1;
     highbd_dr_prediction_z1(dst, stride, bs, above, left, dx, dy, bd, filter);
   } else if (angle > 90 && angle < 180) {
-    t = -t;
-    dx = (int)(256 / t);
-    dy = (int)(256 * t);
     highbd_dr_prediction_z2(dst, stride, bs, above, left, dx, dy, bd, filter);
   } else if (angle > 180 && angle < 270) {
-    dx = 1;
-    dy = -((int)(256 * t));
     highbd_dr_prediction_z3(dst, stride, bs, above, left, dx, dy, bd, filter);
   } else if (angle == 90) {
     highbd_v_predictor(dst, stride, bs, above, left, bd);
diff --git a/vp10/common/vp10_fwd_txfm2d.c b/vp10/common/vp10_fwd_txfm2d.c
index 00f8834..ccb820f 100644
--- a/vp10/common/vp10_fwd_txfm2d.c
+++ b/vp10/common/vp10_fwd_txfm2d.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp10/common/x86/vp10_fwd_txfm2d_sse4.c b/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
index d884571..1b11087 100644
--- a/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
+++ b/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
@@ -87,15 +87,6 @@
   transpose_32(txfm_size, buf_128, out_128);
 }
 
-void vp10_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output,
-                                const int stride, int tx_type,
-                                const int bd) {
-  int32_t txfm_buf[16];
-  const TXFM_2D_CFG* cfg = vp10_get_txfm_4x4_cfg(tx_type);
-  (void)bd;
-  fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
-}
-
 void vp10_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output,
                                 const int stride, int tx_type,
                                 const int bd) {
diff --git a/vp10/encoder/buf_ans.h b/vp10/encoder/buf_ans.h
index c2d315a..11055d9 100644
--- a/vp10/encoder/buf_ans.h
+++ b/vp10/encoder/buf_ans.h
@@ -29,8 +29,8 @@
 struct buffered_ans_symbol {
   uint8_t method;    // one of ANS_METHOD_UABS or ANS_METHOD_RANS
   // TODO(aconverse): Should be possible to write this interms of start for ABS
-  AnsP8 val_start;  // Boolean value for ABS, start in symbol cycle for Rans
-  AnsP8 prob;  // Probability of this symbol
+  AnsP10 val_start;  // Boolean value for ABS, start in symbol cycle for Rans
+  AnsP10 prob;  // Probability of this symbol
 };
 
 struct BufAnsCoder {
diff --git a/vp10/encoder/cost.c b/vp10/encoder/cost.c
index 6318ad3..234e0b6 100644
--- a/vp10/encoder/cost.c
+++ b/vp10/encoder/cost.c
@@ -41,6 +41,97 @@
     48,   45,   42,   38,   35,   32,   29,   26,   23,   20,   18,   15,
     12,   9,    6,    3};
 
+#if CONFIG_ANS
+// round(-log2(i/1024.) * (1 << VP9_PROB_COST_SHIFT))
+static const uint16_t vp10_prob_cost10[1024] = {
+    5120, 5120, 4608, 4308, 4096, 3931, 3796, 3683, 3584, 3497, 3419, 3349,
+    3284, 3225, 3171, 3120, 3072, 3027, 2985, 2945, 2907, 2871, 2837, 2804,
+    2772, 2742, 2713, 2685, 2659, 2633, 2608, 2583, 2560, 2537, 2515, 2494,
+    2473, 2453, 2433, 2414, 2395, 2377, 2359, 2342, 2325, 2308, 2292, 2276,
+    2260, 2245, 2230, 2216, 2201, 2187, 2173, 2160, 2147, 2134, 2121, 2108,
+    2096, 2083, 2071, 2060, 2048, 2037, 2025, 2014, 2003, 1992, 1982, 1971,
+    1961, 1951, 1941, 1931, 1921, 1911, 1902, 1892, 1883, 1874, 1865, 1856,
+    1847, 1838, 1830, 1821, 1813, 1804, 1796, 1788, 1780, 1772, 1764, 1756,
+    1748, 1741, 1733, 1726, 1718, 1711, 1704, 1697, 1689, 1682, 1675, 1668,
+    1661, 1655, 1648, 1641, 1635, 1628, 1622, 1615, 1609, 1602, 1596, 1590,
+    1584, 1578, 1571, 1565, 1559, 1554, 1548, 1542, 1536, 1530, 1525, 1519,
+    1513, 1508, 1502, 1497, 1491, 1486, 1480, 1475, 1470, 1465, 1459, 1454,
+    1449, 1444, 1439, 1434, 1429, 1424, 1419, 1414, 1409, 1404, 1399, 1395,
+    1390, 1385, 1380, 1376, 1371, 1367, 1362, 1357, 1353, 1348, 1344, 1340,
+    1335, 1331, 1326, 1322, 1318, 1313, 1309, 1305, 1301, 1297, 1292, 1288,
+    1284, 1280, 1276, 1272, 1268, 1264, 1260, 1256, 1252, 1248, 1244, 1240,
+    1236, 1233, 1229, 1225, 1221, 1218, 1214, 1210, 1206, 1203, 1199, 1195,
+    1192, 1188, 1185, 1181, 1177, 1174, 1170, 1167, 1163, 1160, 1156, 1153,
+    1149, 1146, 1143, 1139, 1136, 1133, 1129, 1126, 1123, 1119, 1116, 1113,
+    1110, 1106, 1103, 1100, 1097, 1094, 1090, 1087, 1084, 1081, 1078, 1075,
+    1072, 1069, 1066, 1062, 1059, 1056, 1053, 1050, 1047, 1044, 1042, 1039,
+    1036, 1033, 1030, 1027, 1024, 1021, 1018, 1015, 1013, 1010, 1007, 1004,
+    1001, 998,  996,  993,  990,  987,  985,  982,  979,  977,  974,  971,
+    968,  966,  963,  960,  958,  955,  953,  950,  947,  945,  942,  940,
+    937,  934,  932,  929,  927,  924,  922,  919,  917,  914,  912,  909,
+    907,  904,  902,  899,  897,  895,  892,  890,  887,  885,  883,  880,
+    878,  876,  873,  871,  868,  866,  864,  861,  859,  857,  855,  852,
+    850,  848,  845,  843,  841,  839,  836,  834,  832,  830,  828,  825,
+    823,  821,  819,  817,  814,  812,  810,  808,  806,  804,  801,  799,
+    797,  795,  793,  791,  789,  787,  785,  783,  780,  778,  776,  774,
+    772,  770,  768,  766,  764,  762,  760,  758,  756,  754,  752,  750,
+    748,  746,  744,  742,  740,  738,  736,  734,  732,  730,  728,  726,
+    724,  723,  721,  719,  717,  715,  713,  711,  709,  707,  706,  704,
+    702,  700,  698,  696,  694,  693,  691,  689,  687,  685,  683,  682,
+    680,  678,  676,  674,  673,  671,  669,  667,  665,  664,  662,  660,
+    658,  657,  655,  653,  651,  650,  648,  646,  644,  643,  641,  639,
+    637,  636,  634,  632,  631,  629,  627,  626,  624,  622,  621,  619,
+    617,  616,  614,  612,  611,  609,  607,  606,  604,  602,  601,  599,
+    598,  596,  594,  593,  591,  590,  588,  586,  585,  583,  582,  580,
+    578,  577,  575,  574,  572,  571,  569,  567,  566,  564,  563,  561,
+    560,  558,  557,  555,  554,  552,  550,  549,  547,  546,  544,  543,
+    541,  540,  538,  537,  535,  534,  532,  531,  530,  528,  527,  525,
+    524,  522,  521,  519,  518,  516,  515,  513,  512,  511,  509,  508,
+    506,  505,  503,  502,  501,  499,  498,  496,  495,  493,  492,  491,
+    489,  488,  486,  485,  484,  482,  481,  480,  478,  477,  475,  474,
+    473,  471,  470,  469,  467,  466,  465,  463,  462,  460,  459,  458,
+    456,  455,  454,  452,  451,  450,  448,  447,  446,  444,  443,  442,
+    441,  439,  438,  437,  435,  434,  433,  431,  430,  429,  428,  426,
+    425,  424,  422,  421,  420,  419,  417,  416,  415,  414,  412,  411,
+    410,  409,  407,  406,  405,  404,  402,  401,  400,  399,  397,  396,
+    395,  394,  392,  391,  390,  389,  387,  386,  385,  384,  383,  381,
+    380,  379,  378,  377,  375,  374,  373,  372,  371,  369,  368,  367,
+    366,  365,  364,  362,  361,  360,  359,  358,  356,  355,  354,  353,
+    352,  351,  349,  348,  347,  346,  345,  344,  343,  341,  340,  339,
+    338,  337,  336,  335,  333,  332,  331,  330,  329,  328,  327,  326,
+    324,  323,  322,  321,  320,  319,  318,  317,  316,  314,  313,  312,
+    311,  310,  309,  308,  307,  306,  305,  303,  302,  301,  300,  299,
+    298,  297,  296,  295,  294,  293,  292,  291,  289,  288,  287,  286,
+    285,  284,  283,  282,  281,  280,  279,  278,  277,  276,  275,  274,
+    273,  272,  271,  269,  268,  267,  266,  265,  264,  263,  262,  261,
+    260,  259,  258,  257,  256,  255,  254,  253,  252,  251,  250,  249,
+    248,  247,  246,  245,  244,  243,  242,  241,  240,  239,  238,  237,
+    236,  235,  234,  233,  232,  231,  230,  229,  228,  227,  226,  225,
+    224,  223,  222,  221,  220,  219,  218,  217,  216,  215,  214,  213,
+    212,  212,  211,  210,  209,  208,  207,  206,  205,  204,  203,  202,
+    201,  200,  199,  198,  197,  196,  195,  194,  194,  193,  192,  191,
+    190,  189,  188,  187,  186,  185,  184,  183,  182,  181,  181,  180,
+    179,  178,  177,  176,  175,  174,  173,  172,  171,  170,  170,  169,
+    168,  167,  166,  165,  164,  163,  162,  161,  161,  160,  159,  158,
+    157,  156,  155,  154,  153,  152,  152,  151,  150,  149,  148,  147,
+    146,  145,  145,  144,  143,  142,  141,  140,  139,  138,  138,  137,
+    136,  135,  134,  133,  132,  132,  131,  130,  129,  128,  127,  126,
+    125,  125,  124,  123,  122,  121,  120,  120,  119,  118,  117,  116,
+    115,  114,  114,  113,  112,  111,  110,  109,  109,  108,  107,  106,
+    105,  104,  104,  103,  102,  101,  100,  99,   99,   98,   97,   96,
+    95,   95,   94,   93,   92,   91,   90,   90,   89,   88,   87,   86,
+    86,   85,   84,   83,   82,   82,   81,   80,   79,   78,   78,   77,
+    76,   75,   74,   74,   73,   72,   71,   70,   70,   69,   68,   67,
+    66,   66,   65,   64,   63,   62,   62,   61,   60,   59,   59,   58,
+    57,   56,   55,   55,   54,   53,   52,   52,   51,   50,   49,   48,
+    48,   47,   46,   45,   45,   44,   43,   42,   42,   41,   40,   39,
+    38,   38,   37,   36,   35,   35,   34,   33,   32,   32,   31,   30,
+    29,   29,   28,   27,   26,   26,   25,   24,   23,   23,   22,   21,
+    20,   20,   19,   18,   18,   17,   16,   15,   15,   14,   13,   12,
+    12,   11,   10,   9,    9,    8,    7,    7,    6,    5,    4,    4,
+    3,    2,    1,    1};
+#endif  // CONFIG_ANS
+
 static void cost(int *costs, vpx_tree tree, const vpx_prob *probs,
                  int i, int c) {
   const vpx_prob prob = probs[i / 2];
@@ -68,7 +159,7 @@
     c_tree = vp10_cost_bit(tree_probs[0], 1);
   for (i = ZERO_TOKEN; i <= CATEGORY6_TOKEN; ++i) {
     const int p = (*token_cdf)[i + 1] - (*token_cdf)[i];
-    costs[i] = c_tree + vp10_cost_bit(p, 0);
+    costs[i] = c_tree + vp10_prob_cost10[p];
   }
 }
 #endif  // CONFIG_ANS
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 6ffa790..de26654 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1407,6 +1407,7 @@
   int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   TX_SIZE best_tx = max_tx_size;
+  uint8_t zcoeff_blk[TX_SIZES][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
   const int tx_select = cm->tx_mode == TX_MODE_SELECT;
   const int is_inter = is_inter_block(mbmi);
 #if CONFIG_EXT_TX
@@ -1477,10 +1478,17 @@
       *rate       = r;
       *skip       = s;
       *psse       = sse;
+      memcpy(zcoeff_blk[mbmi->tx_size], x->zcoeff_blk[mbmi->tx_size],
+             sizeof(zcoeff_blk[mbmi->tx_size][0]) *
+             MAX_MIB_SIZE * MAX_MIB_SIZE * 4);
     }
   }
   mbmi->tx_size = best_tx;
 
+  memcpy(x->zcoeff_blk[mbmi->tx_size], zcoeff_blk[mbmi->tx_size],
+         sizeof(zcoeff_blk[mbmi->tx_size][0]) *
+         MAX_MIB_SIZE * MAX_MIB_SIZE * 4);
+
   return best_rd;
 }
 
@@ -1639,6 +1647,7 @@
                                    BLOCK_SIZE bs) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  uint8_t zcoeff_blk[TX_SIZES][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
   int r, s;
   int64_t d, sse;
   int64_t rd = INT64_MAX;
@@ -1671,6 +1680,9 @@
       *psse       = sse;
       best_tx_type = tx_type;
       best_tx = mbmi->tx_size;
+      memcpy(zcoeff_blk[mbmi->tx_size], x->zcoeff_blk[mbmi->tx_size],
+             sizeof(zcoeff_blk[mbmi->tx_size][0]) *
+             MAX_MIB_SIZE * MAX_MIB_SIZE * 4);
     }
   }
 
@@ -1681,6 +1693,10 @@
   if (mbmi->tx_size >= TX_32X32)
     assert(mbmi->tx_type == DCT_DCT);
 #endif
+
+  memcpy(x->zcoeff_blk[mbmi->tx_size], zcoeff_blk[mbmi->tx_size],
+         sizeof(zcoeff_blk[mbmi->tx_size][0]) *
+         MAX_MIB_SIZE * MAX_MIB_SIZE * 4);
 }
 
 static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
@@ -2517,16 +2533,17 @@
 
 static INLINE int get_angle_index(double angle) {
   const double step = 22.5, base = 45;
-  return (int)round((angle - base) / step);
+  return (int)lround((angle - base) / step);
 }
 
 static void angle_estimation(const uint8_t *src, int src_stride,
                              int rows, int cols, double *hist) {
   int r, c, i, index;
-  const double pi = 3.1415;
   double angle, dx, dy;
-  double temp, divisor = 0;
+  double temp, divisor;
 
+  vpx_clear_system_state();
+  divisor = 0;
   for (i = 0; i < DIRECTIONAL_MODES; ++i)
     hist[i] = 0;
 
@@ -2539,7 +2556,7 @@
       if (dy == 0)
         angle = 90;
       else
-        angle = (atan((double)dx / (double)dy)) * 180 / pi;
+        angle = (atan((double)dx / (double)dy)) * 180 / PI;
       assert(angle >= -90 && angle <= 90);
       index = get_angle_index(angle + 180);
       if (index < DIRECTIONAL_MODES) {
@@ -2567,11 +2584,12 @@
 static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
                                     int rows, int cols, double *hist) {
   int r, c, i, index;
-  const double pi = 3.1415;
   double angle, dx, dy;
-  double temp, divisor = 0;
+  double temp, divisor;
   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
 
+  vpx_clear_system_state();
+  divisor = 0;
   for (i = 0; i < DIRECTIONAL_MODES; ++i)
     hist[i] = 0;
 
@@ -2584,7 +2602,7 @@
       if (dy == 0)
         angle = 90;
       else
-        angle = (atan((double)dx / (double)dy)) * 180 / pi;
+        angle = (atan((double)dx / (double)dy)) * 180 / PI;
       assert(angle >= -90 && angle <= 90);
       index = get_angle_index(angle + 180);
       if (index < DIRECTIONAL_MODES) {
diff --git a/vp10/encoder/x86/highbd_fwd_txfm_sse4.c b/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
index 16323b3..3cda783 100644
--- a/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
@@ -111,52 +111,136 @@
   in[3] = _mm_unpackhi_epi64(v1, v3);
 }
 
-static INLINE void write_buffer_4x4(tran_low_t *output, __m128i *res) {
+static INLINE void write_buffer_4x4(__m128i *res, tran_low_t *output) {
   _mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
   _mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
   _mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
   _mm_store_si128((__m128i *)(output + 3 * 4), res[3]);
 }
 
+// Note:
+//  We implement vp10_fwd_txfm2d_4x4(). This function is kept here since
+//  vp10_highbd_fht4x4_c() is not removed yet
 void vp10_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output,
                                int stride, int tx_type) {
+  (void)input;
+  (void)output;
+  (void)stride;
+  (void)tx_type;
+  assert(0);
+}
+
+static void fadst4x4_sse4_1(__m128i *in, int bit) {
+  const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+  const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+  const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+  const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+  const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+  const __m128i kZero = _mm_setzero_si128();
+  __m128i s0, s1, s2, s3;
+  __m128i u0, u1, u2, u3;
+  __m128i v0, v1, v2, v3;
+
+  // stage 0
+  // stage 1
+  // stage 2
+  u0 = _mm_mullo_epi32(in[3], cospi8);
+  u1 = _mm_mullo_epi32(in[0], cospi56);
+  u2 = _mm_add_epi32(u0, u1);
+  s0 = _mm_add_epi32(u2, rnding);
+  s0 = _mm_srai_epi32(s0, bit);
+
+  v0 = _mm_mullo_epi32(in[3], cospi56);
+  v1 = _mm_mullo_epi32(in[0], cospi8);
+  v2 = _mm_sub_epi32(v0, v1);
+  s1 = _mm_add_epi32(v2, rnding);
+  s1 = _mm_srai_epi32(s1, bit);
+
+  u0 = _mm_mullo_epi32(in[1], cospi40);
+  u1 = _mm_mullo_epi32(in[2], cospi24);
+  u2 = _mm_add_epi32(u0, u1);
+  s2 = _mm_add_epi32(u2, rnding);
+  s2 = _mm_srai_epi32(s2, bit);
+
+  v0 = _mm_mullo_epi32(in[1], cospi24);
+  v1 = _mm_mullo_epi32(in[2], cospi40);
+  v2 = _mm_sub_epi32(v0, v1);
+  s3 = _mm_add_epi32(v2, rnding);
+  s3 = _mm_srai_epi32(s3, bit);
+
+  // stage 3
+  u0 = _mm_add_epi32(s0, s2);
+  u2 = _mm_sub_epi32(s0, s2);
+  u1 = _mm_add_epi32(s1, s3);
+  u3 = _mm_sub_epi32(s1, s3);
+
+  // stage 4
+  v0 = _mm_mullo_epi32(u2, cospi32);
+  v1 = _mm_mullo_epi32(u3, cospi32);
+  v2 = _mm_add_epi32(v0, v1);
+  s2 = _mm_add_epi32(v2, rnding);
+  u2 = _mm_srai_epi32(s2, bit);
+
+  v2 = _mm_sub_epi32(v0, v1);
+  s3 = _mm_add_epi32(v2, rnding);
+  u3 = _mm_srai_epi32(s3, bit);
+
+  // u0, u1, u2, u3
+  u2 = _mm_sub_epi32(kZero, u2);
+  u1 = _mm_sub_epi32(kZero, u1);
+
+  // u0, u2, u3, u1
+  // Transpose 4x4 32-bit
+  v0 = _mm_unpacklo_epi32(u0, u2);
+  v1 = _mm_unpackhi_epi32(u0, u2);
+  v2 = _mm_unpacklo_epi32(u3, u1);
+  v3 = _mm_unpackhi_epi32(u3, u1);
+
+  in[0] = _mm_unpacklo_epi64(v0, v2);
+  in[1] = _mm_unpackhi_epi64(v0, v2);
+  in[2] = _mm_unpacklo_epi64(v1, v3);
+  in[3] = _mm_unpackhi_epi64(v1, v3);
+}
+
+void vp10_fwd_txfm2d_4x4_sse4_1(const int16_t *input, tran_low_t *coeff,
+                                int input_stride, int tx_type,
+                                const int bd) {
   __m128i in[4];
-  const TXFM_2D_CFG *cfg;
-  int bit;
+  const TXFM_2D_CFG *cfg = NULL;
 
   switch (tx_type) {
     case DCT_DCT:
       cfg = &fwd_txfm_2d_cfg_dct_dct_4;
-      load_buffer_4x4(input, in, stride, 0, 0, cfg->shift[0]);
-      bit = cfg->cos_bit_col[2];
-      fdct4x4_sse4_1(in, bit);
-      bit = cfg->cos_bit_row[2];
-      fdct4x4_sse4_1(in, bit);
-      write_buffer_4x4(output, in);
+      load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+      fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      write_buffer_4x4(in, coeff);
       break;
     case ADST_DCT:
+      cfg = &fwd_txfm_2d_cfg_adst_dct_4;
+      load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+      fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      write_buffer_4x4(in, coeff);
+      break;
     case DCT_ADST:
+      cfg = &fwd_txfm_2d_cfg_dct_adst_4;
+      load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+      fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      write_buffer_4x4(in, coeff);
+      break;
     case ADST_ADST:
-      vp10_highbd_fht4x4_c(input, output, stride, tx_type);
+      cfg = &fwd_txfm_2d_cfg_adst_adst_4;
+      load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+      fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+      fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+      write_buffer_4x4(in, coeff);
       break;
-#if CONFIG_EXT_TX
-    case FLIPADST_DCT:
-    case DCT_FLIPADST:
-    case FLIPADST_FLIPADST:
-    case ADST_FLIPADST:
-    case FLIPADST_ADST:
-      vp10_highbd_fht4x4_c(input, output, stride, tx_type);
-      break;
-    case V_DCT:
-    case H_DCT:
-    case V_ADST:
-    case H_ADST:
-    case V_FLIPADST:
-    case H_FLIPADST:
-      vp10_highbd_fht4x4_c(input, output, stride, tx_type);
-      break;
-#endif  // CONFIG_EXT_TX
     default:
       assert(0);
   }
+  (void)bd;
 }