Merge "more ref_mv changes from aom/master" into nextgenv2
diff --git a/aom_dsp/ans.c b/aom_dsp/ans.c
new file mode 100644
index 0000000..18f6d48
--- /dev/null
+++ b/aom_dsp/ans.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include "./aom_config.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/ans.h"
+#include "aom_dsp/prob.h"
+
+void aom_rans_build_cdf_from_pdf(const AnsP10 token_probs[], rans_lut cdf_tab) {
+  int i;
+  cdf_tab[0] = 0;
+  for (i = 1; cdf_tab[i - 1] < RANS_PRECISION; ++i) {
+    cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
+  }
+  assert(cdf_tab[i - 1] == RANS_PRECISION);
+}
+
+static int find_largest(const AnsP10 *const pdf_tab, int num_syms) {
+  int largest_idx = -1;
+  int largest_p = -1;
+  int i;
+  for (i = 0; i < num_syms; ++i) {
+    int p = pdf_tab[i];
+    if (p > largest_p) {
+      largest_p = p;
+      largest_idx = i;
+    }
+  }
+  return largest_idx;
+}
+
+void aom_rans_merge_prob8_pdf(AnsP10 *const out_pdf, const AnsP8 node_prob,
+                              const AnsP10 *const src_pdf, int in_syms) {
+  int i;
+  int adjustment = RANS_PRECISION;
+  const int round_fact = ANS_P8_PRECISION >> 1;
+  const AnsP8 p1 = ANS_P8_PRECISION - node_prob;
+  const int out_syms = in_syms + 1;
+  assert(src_pdf != out_pdf);
+
+  out_pdf[0] = node_prob << (10 - 8);
+  adjustment -= out_pdf[0];
+  for (i = 0; i < in_syms; ++i) {
+    int p = (p1 * src_pdf[i] + round_fact) >> ANS_P8_SHIFT;
+    p = AOMMIN(p, (int)RANS_PRECISION - in_syms);
+    p = AOMMAX(p, 1);
+    out_pdf[i + 1] = p;
+    adjustment -= p;
+  }
+
+  // Adjust probabilities so they sum to the total probability
+  if (adjustment > 0) {
+    i = find_largest(out_pdf, out_syms);
+    out_pdf[i] += adjustment;
+  } else {
+    while (adjustment < 0) {
+      i = find_largest(out_pdf, out_syms);
+      --out_pdf[i];
+      assert(out_pdf[i] > 0);
+      adjustment++;
+    }
+  }
+}
diff --git a/aom_dsp/ans.h b/aom_dsp/ans.h
index c526e27..15fe729 100644
--- a/aom_dsp/ans.h
+++ b/aom_dsp/ans.h
@@ -1,413 +1,51 @@
 /*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
  *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
 #ifndef AOM_DSP_ANS_H_
 #define AOM_DSP_ANS_H_
-// An implementation of Asymmetric Numeral Systems
+// Constants, types and utilities for Asymmetric Numeral Systems
 // http://arxiv.org/abs/1311.2540v2
 
 #include <assert.h>
 #include "./aom_config.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/prob.h"
-#include "aom_ports/mem_ops.h"
-
-#define ANS_DIVIDE_BY_MULTIPLY 1
-#if ANS_DIVIDE_BY_MULTIPLY
-#include "aom_dsp/divide.h"
-#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
-  do {                                                     \
-    quotient = fastdiv(dividend, divisor);                 \
-    remainder = dividend - quotient * divisor;             \
-  } while (0)
-#define ANS_DIV(dividend, divisor) fastdiv(dividend, divisor)
-#else
-#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
-  do {                                                     \
-    quotient = dividend / divisor;                         \
-    remainder = dividend % divisor;                        \
-  } while (0)
-#define ANS_DIV(dividend, divisor) ((dividend) / (divisor))
-#endif
 
 #ifdef __cplusplus
 extern "C" {
 #endif  // __cplusplus
 
-struct AnsCoder {
-  uint8_t *buf;
-  int buf_offset;
-  uint32_t state;
-};
-
-struct AnsDecoder {
-  const uint8_t *buf;
-  int buf_offset;
-  uint32_t state;
-};
-
 typedef uint8_t AnsP8;
-#define ans_p8_precision 256u
-#define ans_p8_shift 8
+#define ANS_P8_PRECISION 256u
+#define ANS_P8_SHIFT 8
 typedef uint16_t AnsP10;
-#define ans_p10_precision 1024u
+#define ANS_P10_PRECISION 1024u
+#define RANS_PROB_BITS 10
 
-#define rans_precision ans_p10_precision
+#define RANS_PRECISION ANS_P10_PRECISION
 
-#define l_base (ans_p10_precision * 4)  // l_base % precision must be 0
-#define io_base 256
-// Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 }
-
-static INLINE void ans_write_init(struct AnsCoder *const ans,
-                                  uint8_t *const buf) {
-  ans->buf = buf;
-  ans->buf_offset = 0;
-  ans->state = l_base;
-}
-
-static INLINE int ans_write_end(struct AnsCoder *const ans) {
-  uint32_t state;
-  assert(ans->state >= l_base);
-  assert(ans->state < l_base * io_base);
-  state = ans->state - l_base;
-  if (state < (1 << 6)) {
-    ans->buf[ans->buf_offset] = (0x00 << 6) + state;
-    return ans->buf_offset + 1;
-  } else if (state < (1 << 14)) {
-    mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state);
-    return ans->buf_offset + 2;
-  } else if (state < (1 << 22)) {
-    mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state);
-    return ans->buf_offset + 3;
-  } else {
-    assert(0 && "State is too large to be serialized");
-    return ans->buf_offset;
-  }
-}
-
-// rABS with descending spread
-// p or p0 takes the place of l_s from the paper
-// ans_p8_precision is m
-static INLINE void rabs_desc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
-  const AnsP8 p = ans_p8_precision - p0;
-  const unsigned l_s = val ? p : p0;
-  unsigned quot, rem;
-  if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
-    ans->buf[ans->buf_offset++] = ans->state % io_base;
-    ans->state /= io_base;
-  }
-  ANS_DIVREM(quot, rem, ans->state, l_s);
-  ans->state = quot * ans_p8_precision + rem + (val ? 0 : p);
-}
-
-#define ANS_IMPL1 0
-#define UNPREDICTABLE(x) x
-static INLINE int rabs_desc_read(struct AnsDecoder *ans, AnsP8 p0) {
-  int val;
-#if ANS_IMPL1
-  unsigned l_s;
-#else
-  unsigned quot, rem, x, xn;
-#endif
-  const AnsP8 p = ans_p8_precision - p0;
-  if (ans->state < l_base) {
-    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
-  }
-#if ANS_IMPL1
-  val = ans->state % ans_p8_precision < p;
-  l_s = val ? p : p0;
-  ans->state = (ans->state / ans_p8_precision) * l_s +
-               ans->state % ans_p8_precision - (!val * p);
-#else
-  x = ans->state;
-  quot = x / ans_p8_precision;
-  rem = x % ans_p8_precision;
-  xn = quot * p;
-  val = rem < p;
-  if (UNPREDICTABLE(val)) {
-    ans->state = xn + rem;
-  } else {
-    // ans->state = quot * p0 + rem - p;
-    ans->state = x - xn - p;
-  }
-#endif
-  return val;
-}
-
-// rABS with ascending spread
-// p or p0 takes the place of l_s from the paper
-// ans_p8_precision is m
-static INLINE void rabs_asc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
-  const AnsP8 p = ans_p8_precision - p0;
-  const unsigned l_s = val ? p : p0;
-  unsigned quot, rem;
-  if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
-    ans->buf[ans->buf_offset++] = ans->state % io_base;
-    ans->state /= io_base;
-  }
-  ANS_DIVREM(quot, rem, ans->state, l_s);
-  ans->state = quot * ans_p8_precision + rem + (val ? p0 : 0);
-}
-
-static INLINE int rabs_asc_read(struct AnsDecoder *ans, AnsP8 p0) {
-  int val;
-#if ANS_IMPL1
-  unsigned l_s;
-#else
-  unsigned quot, rem, x, xn;
-#endif
-  const AnsP8 p = ans_p8_precision - p0;
-  if (ans->state < l_base) {
-    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
-  }
-#if ANS_IMPL1
-  val = ans->state % ans_p8_precision < p;
-  l_s = val ? p : p0;
-  ans->state = (ans->state / ans_p8_precision) * l_s +
-               ans->state % ans_p8_precision - (!val * p);
-#else
-  x = ans->state;
-  quot = x / ans_p8_precision;
-  rem = x % ans_p8_precision;
-  xn = quot * p;
-  val = rem >= p0;
-  if (UNPREDICTABLE(val)) {
-    ans->state = xn + rem - p0;
-  } else {
-    // ans->state = quot * p0 + rem - p0;
-    ans->state = x - xn;
-  }
-#endif
-  return val;
-}
-
-#define rabs_read rabs_desc_read
-#define rabs_write rabs_desc_write
-
-// uABS with normalization
-static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
-  AnsP8 p = ans_p8_precision - p0;
-  const unsigned l_s = val ? p : p0;
-  while (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
-    ans->buf[ans->buf_offset++] = ans->state % io_base;
-    ans->state /= io_base;
-  }
-  if (!val)
-    ans->state = ANS_DIV(ans->state * ans_p8_precision, p0);
-  else
-    ans->state = ANS_DIV((ans->state + 1) * ans_p8_precision + p - 1, p) - 1;
-}
-
-static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) {
-  AnsP8 p = ans_p8_precision - p0;
-  int s;
-  // unsigned int xp1;
-  unsigned xp, sp;
-  unsigned state = ans->state;
-  while (state < l_base && ans->buf_offset > 0) {
-    state = state * io_base + ans->buf[--ans->buf_offset];
-  }
-  sp = state * p;
-  // xp1 = (sp + p) / ans_p8_precision;
-  xp = sp / ans_p8_precision;
-  // s = xp1 - xp;
-  s = (sp & 0xFF) >= p0;
-  if (UNPREDICTABLE(s))
-    ans->state = xp;
-  else
-    ans->state = state - xp;
-  return s;
-}
-
-static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
-  int s;
-  unsigned state = ans->state;
-  while (state < l_base && ans->buf_offset > 0) {
-    state = state * io_base + ans->buf[--ans->buf_offset];
-  }
-  s = (int)(state & 1);
-  ans->state = state >> 1;
-  return s;
-}
-
-static INLINE int uabs_read_literal(struct AnsDecoder *ans, int bits) {
-  int literal = 0, bit;
-  assert(bits < 31);
-
-  // TODO(aconverse): Investigate ways to read/write literals faster,
-  // e.g. 8-bit chunks.
-  for (bit = bits - 1; bit >= 0; bit--) literal |= uabs_read_bit(ans) << bit;
-
-  return literal;
-}
-
-// TODO(aconverse): Replace trees with tokensets.
-static INLINE int uabs_read_tree(struct AnsDecoder *ans,
-                                 const aom_tree_index *tree,
-                                 const AnsP8 *probs) {
-  aom_tree_index i = 0;
-
-  while ((i = tree[i + uabs_read(ans, probs[i >> 1])]) > 0) continue;
-
-  return -i;
-}
-
-struct rans_sym {
-  AnsP10 prob;
-  AnsP10 cum_prob;  // not-inclusive
-};
-
-struct rans_dec_sym {
-  uint8_t val;
-  AnsP10 prob;
-  AnsP10 cum_prob;  // not-inclusive
-};
+#define L_BASE (ANS_P10_PRECISION * 4)  // L_BASE % precision must be 0
+#define IO_BASE 256
+// Range I = { L_BASE, L_BASE + 1, ..., L_BASE * IO_BASE - 1 }
 
 // This is now just a boring cdf. It starts with an explicit zero.
 // TODO(aconverse): Remove starting zero.
-typedef uint16_t rans_dec_lut[16];
+typedef uint16_t rans_lut[16];
+// TODO(aconverse): Update callers and remove this shim
+#define rans_dec_lut rans_lut
 
-static INLINE void rans_build_cdf_from_pdf(const AnsP10 token_probs[],
-                                           rans_dec_lut cdf_tab) {
-  int i;
-  cdf_tab[0] = 0;
-  for (i = 1; cdf_tab[i - 1] < rans_precision; ++i) {
-    cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
-  }
-  assert(cdf_tab[i - 1] == rans_precision);
-}
+void aom_rans_build_cdf_from_pdf(const AnsP10 token_probs[], rans_lut cdf_tab);
 
-static INLINE int ans_find_largest(const AnsP10 *const pdf_tab, int num_syms) {
-  int largest_idx = -1;
-  int largest_p = -1;
-  int i;
-  for (i = 0; i < num_syms; ++i) {
-    int p = pdf_tab[i];
-    if (p > largest_p) {
-      largest_p = p;
-      largest_idx = i;
-    }
-  }
-  return largest_idx;
-}
-
-static INLINE void rans_merge_prob8_pdf(AnsP10 *const out_pdf,
-                                        const AnsP8 node_prob,
-                                        const AnsP10 *const src_pdf,
-                                        int in_syms) {
-  int i;
-  int adjustment = rans_precision;
-  const int round_fact = ans_p8_precision >> 1;
-  const AnsP8 p1 = ans_p8_precision - node_prob;
-  const int out_syms = in_syms + 1;
-  assert(src_pdf != out_pdf);
-
-  out_pdf[0] = node_prob << (10 - 8);
-  adjustment -= out_pdf[0];
-  for (i = 0; i < in_syms; ++i) {
-    int p = (p1 * src_pdf[i] + round_fact) >> ans_p8_shift;
-    p = AOMMIN(p, (int)rans_precision - in_syms);
-    p = AOMMAX(p, 1);
-    out_pdf[i + 1] = p;
-    adjustment -= p;
-  }
-
-  // Adjust probabilities so they sum to the total probability
-  if (adjustment > 0) {
-    i = ans_find_largest(out_pdf, out_syms);
-    out_pdf[i] += adjustment;
-  } else {
-    while (adjustment < 0) {
-      i = ans_find_largest(out_pdf, out_syms);
-      --out_pdf[i];
-      assert(out_pdf[i] > 0);
-      adjustment++;
-    }
-  }
-}
-
-// rANS with normalization
-// sym->prob takes the place of l_s from the paper
-// ans_p10_precision is m
-static INLINE void rans_write(struct AnsCoder *ans,
-                              const struct rans_sym *const sym) {
-  const AnsP10 p = sym->prob;
-  while (ans->state >= l_base / rans_precision * io_base * p) {
-    ans->buf[ans->buf_offset++] = ans->state % io_base;
-    ans->state /= io_base;
-  }
-  ans->state =
-      (ans->state / p) * rans_precision + ans->state % p + sym->cum_prob;
-}
-
-static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf,
-                             AnsP10 rem) {
-  int i = 0;
-  // TODO(skal): if critical, could be a binary search.
-  // Or, better, an O(1) alias-table.
-  while (rem >= cdf[i]) {
-    ++i;
-  }
-  out->val = i - 1;
-  out->prob = (AnsP10)(cdf[i] - cdf[i - 1]);
-  out->cum_prob = (AnsP10)cdf[i - 1];
-}
-
-static INLINE int rans_read(struct AnsDecoder *ans, const rans_dec_lut tab) {
-  unsigned rem;
-  unsigned quo;
-  struct rans_dec_sym sym;
-  while (ans->state < l_base && ans->buf_offset > 0) {
-    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
-  }
-  quo = ans->state / rans_precision;
-  rem = ans->state % rans_precision;
-  fetch_sym(&sym, tab, rem);
-  ans->state = quo * sym.prob + rem - sym.cum_prob;
-  return sym.val;
-}
-
-static INLINE int ans_read_init(struct AnsDecoder *const ans,
-                                const uint8_t *const buf, int offset) {
-  unsigned x;
-  if (offset < 1) return 1;
-  ans->buf = buf;
-  x = buf[offset - 1] >> 6;
-  if (x == 0) {
-    ans->buf_offset = offset - 1;
-    ans->state = buf[offset - 1] & 0x3F;
-  } else if (x == 1) {
-    if (offset < 2) return 1;
-    ans->buf_offset = offset - 2;
-    ans->state = mem_get_le16(buf + offset - 2) & 0x3FFF;
-  } else if (x == 2) {
-    if (offset < 3) return 1;
-    ans->buf_offset = offset - 3;
-    ans->state = mem_get_le24(buf + offset - 3) & 0x3FFFFF;
-  } else {
-    // x == 3 implies this byte is a superframe marker
-    return 1;
-  }
-  ans->state += l_base;
-  if (ans->state >= l_base * io_base) return 1;
-  return 0;
-}
-
-static INLINE int ans_read_end(struct AnsDecoder *const ans) {
-  return ans->state == l_base;
-}
-
-static INLINE int ans_reader_has_error(const struct AnsDecoder *const ans) {
-  return ans->state < l_base && ans->buf_offset == 0;
-}
-#undef ANS_DIVREM
+void aom_rans_merge_prob8_pdf(AnsP10 *const out_pdf, const AnsP8 node_prob,
+                              const AnsP10 *const src_pdf, int in_syms);
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/aom_dsp/ansreader.h b/aom_dsp/ansreader.h
new file mode 100644
index 0000000..11619b0
--- /dev/null
+++ b/aom_dsp/ansreader.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_DSP_ANSREADER_H_
+#define AOM_DSP_ANSREADER_H_
+// A uABS and rANS decoder implementation of Asymmetric Numeral Systems
+// http://arxiv.org/abs/1311.2540v2
+
+#include <assert.h>
+#include "./aom_config.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/prob.h"
+#include "aom_dsp/ans.h"
+#include "aom_ports/mem_ops.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+struct AnsDecoder {
+  const uint8_t *buf;
+  int buf_offset;
+  uint32_t state;
+};
+
+static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) {
+  AnsP8 p = ANS_P8_PRECISION - p0;
+  int s;
+  unsigned xp, sp;
+  unsigned state = ans->state;
+  while (state < L_BASE && ans->buf_offset > 0) {
+    state = state * IO_BASE + ans->buf[--ans->buf_offset];
+  }
+  sp = state * p;
+  xp = sp / ANS_P8_PRECISION;
+  s = (sp & 0xFF) >= p0;
+  if (s)
+    ans->state = xp;
+  else
+    ans->state = state - xp;
+  return s;
+}
+
+static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
+  int s;
+  unsigned state = ans->state;
+  while (state < L_BASE && ans->buf_offset > 0) {
+    state = state * IO_BASE + ans->buf[--ans->buf_offset];
+  }
+  s = (int)(state & 1);
+  ans->state = state >> 1;
+  return s;
+}
+
+struct rans_dec_sym {
+  uint8_t val;
+  AnsP10 prob;
+  AnsP10 cum_prob;  // not-inclusive
+};
+
+static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_lut cdf,
+                             AnsP10 rem) {
+  int i = 0;
+  // TODO(skal): if critical, could be a binary search.
+  // Or, better, an O(1) alias-table.
+  while (rem >= cdf[i]) {
+    ++i;
+  }
+  out->val = i - 1;
+  out->prob = (AnsP10)(cdf[i] - cdf[i - 1]);
+  out->cum_prob = (AnsP10)cdf[i - 1];
+}
+
+static INLINE int rans_read(struct AnsDecoder *ans, const rans_lut tab) {
+  unsigned rem;
+  unsigned quo;
+  struct rans_dec_sym sym;
+  while (ans->state < L_BASE && ans->buf_offset > 0) {
+    ans->state = ans->state * IO_BASE + ans->buf[--ans->buf_offset];
+  }
+  quo = ans->state / RANS_PRECISION;
+  rem = ans->state % RANS_PRECISION;
+  fetch_sym(&sym, tab, rem);
+  ans->state = quo * sym.prob + rem - sym.cum_prob;
+  return sym.val;
+}
+
+static INLINE int ans_read_init(struct AnsDecoder *const ans,
+                                const uint8_t *const buf, int offset) {
+  unsigned x;
+  if (offset < 1) return 1;
+  ans->buf = buf;
+  x = buf[offset - 1] >> 6;
+  if (x == 0) {
+    ans->buf_offset = offset - 1;
+    ans->state = buf[offset - 1] & 0x3F;
+  } else if (x == 1) {
+    if (offset < 2) return 1;
+    ans->buf_offset = offset - 2;
+    ans->state = mem_get_le16(buf + offset - 2) & 0x3FFF;
+  } else if (x == 2) {
+    if (offset < 3) return 1;
+    ans->buf_offset = offset - 3;
+    ans->state = mem_get_le24(buf + offset - 3) & 0x3FFFFF;
+  } else {
+    // x == 3 implies this byte is a superframe marker
+    return 1;
+  }
+  ans->state += L_BASE;
+  if (ans->state >= L_BASE * IO_BASE) return 1;
+  return 0;
+}
+
+static INLINE int ans_read_end(struct AnsDecoder *const ans) {
+  return ans->state == L_BASE;
+}
+
+static INLINE int ans_reader_has_error(const struct AnsDecoder *const ans) {
+  return ans->state < L_BASE && ans->buf_offset == 0;
+}
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+#endif  // AOM_DSP_ANSREADER_H_
diff --git a/aom_dsp/answriter.h b/aom_dsp/answriter.h
new file mode 100644
index 0000000..5a82d35
--- /dev/null
+++ b/aom_dsp/answriter.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_DSP_ANSWRITER_H_
+#define AOM_DSP_ANSWRITER_H_
+// A uABS and rANS encoder implementation of Asymmetric Numeral Systems
+// http://arxiv.org/abs/1311.2540v2
+
+#include <assert.h>
+#include "./aom_config.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/ans.h"
+#include "aom_dsp/prob.h"
+#include "aom_ports/mem_ops.h"
+
+#define ANS_DIV(dividend, divisor) ((dividend) / (divisor))
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+struct AnsCoder {
+  uint8_t *buf;
+  int buf_offset;
+  uint32_t state;
+};
+
+static INLINE void ans_write_init(struct AnsCoder *const ans,
+                                  uint8_t *const buf) {
+  ans->buf = buf;
+  ans->buf_offset = 0;
+  ans->state = L_BASE;
+}
+
+static INLINE int ans_write_end(struct AnsCoder *const ans) {
+  uint32_t state;
+  assert(ans->state >= L_BASE);
+  assert(ans->state < L_BASE * IO_BASE);
+  state = ans->state - L_BASE;
+  if (state < (1 << 6)) {
+    ans->buf[ans->buf_offset] = (0x00 << 6) + state;
+    return ans->buf_offset + 1;
+  } else if (state < (1 << 14)) {
+    mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state);
+    return ans->buf_offset + 2;
+  } else if (state < (1 << 22)) {
+    mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state);
+    return ans->buf_offset + 3;
+  } else {
+    assert(0 && "State is too large to be serialized");
+    return ans->buf_offset;
+  }
+}
+
+// uABS with normalization
+static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+  AnsP8 p = ANS_P8_PRECISION - p0;
+  const unsigned l_s = val ? p : p0;
+  while (ans->state >= L_BASE / ANS_P8_PRECISION * IO_BASE * l_s) {
+    ans->buf[ans->buf_offset++] = ans->state % IO_BASE;
+    ans->state /= IO_BASE;
+  }
+  if (!val)
+    ans->state = ANS_DIV(ans->state * ANS_P8_PRECISION, p0);
+  else
+    ans->state = ANS_DIV((ans->state + 1) * ANS_P8_PRECISION + p - 1, p) - 1;
+}
+
+struct rans_sym {
+  AnsP10 prob;
+  AnsP10 cum_prob;  // not-inclusive
+};
+
+// rANS with normalization
+// sym->prob takes the place of l_s from the paper
+// ANS_P10_PRECISION is m
+static INLINE void rans_write(struct AnsCoder *ans,
+                              const struct rans_sym *const sym) {
+  const AnsP10 p = sym->prob;
+  while (ans->state >= L_BASE / RANS_PRECISION * IO_BASE * p) {
+    ans->buf[ans->buf_offset++] = ans->state % IO_BASE;
+    ans->state /= IO_BASE;
+  }
+  ans->state =
+      (ans->state / p) * RANS_PRECISION + ans->state % p + sym->cum_prob;
+}
+
+#undef ANS_DIV
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+#endif  // AOM_DSP_ANSWRITER_H_
diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk
index 25e7d8f..7d14384 100644
--- a/aom_dsp/aom_dsp.mk
+++ b/aom_dsp/aom_dsp.mk
@@ -20,19 +20,19 @@
 DSP_SRCS-yes += prob.h
 DSP_SRCS-yes += prob.c
 DSP_SRCS-$(CONFIG_ANS) += ans.h
+DSP_SRCS-$(CONFIG_ANS) += ans.c
 
 ifeq ($(CONFIG_ENCODERS),yes)
+DSP_SRCS-$(CONFIG_ANS) += answriter.h
 DSP_SRCS-yes += bitwriter.h
 DSP_SRCS-yes += dkboolwriter.h
 DSP_SRCS-yes += dkboolwriter.c
 DSP_SRCS-yes += bitwriter_buffer.c
 DSP_SRCS-yes += bitwriter_buffer.h
-DSP_SRCS-$(CONFIG_ANS) += buf_ans.h
-DSP_SRCS-$(CONFIG_ANS) += buf_ans.c
-DSP_SRCS-$(CONFIG_ANS) += divide.h
-DSP_SRCS-$(CONFIG_ANS) += divide.c
 DSP_SRCS-yes += psnr.c
 DSP_SRCS-yes += psnr.h
+DSP_SRCS-$(CONFIG_ANS) += buf_ans.h
+DSP_SRCS-$(CONFIG_ANS) += buf_ans.c
 DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.c
 DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.h
 DSP_SRCS-$(CONFIG_INTERNAL_STATS) += psnrhvs.c
@@ -40,6 +40,7 @@
 endif
 
 ifeq ($(CONFIG_DECODERS),yes)
+DSP_SRCS-$(CONFIG_ANS) += ansreader.h
 DSP_SRCS-yes += bitreader.h
 DSP_SRCS-yes += dkboolreader.h
 DSP_SRCS-yes += dkboolreader.c
diff --git a/aom_dsp/bitreader.h b/aom_dsp/bitreader.h
index 6092421..611949a 100644
--- a/aom_dsp/bitreader.h
+++ b/aom_dsp/bitreader.h
@@ -12,45 +12,91 @@
 #ifndef AOM_DSP_BITREADER_H_
 #define AOM_DSP_BITREADER_H_
 
+#include <assert.h>
+#include <limits.h>
+
 #include "./aom_config.h"
 #include "aom/aomdx.h"
 #include "aom/aom_integer.h"
+#if CONFIG_ANS
+#include "aom_dsp/ansreader.h"
+#else
 #include "aom_dsp/dkboolreader.h"
+#endif
 #include "aom_dsp/prob.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#if CONFIG_ANS
+typedef struct AnsDecoder aom_reader;
+#else
 typedef struct aom_dk_reader aom_reader;
+#endif
 
 static INLINE int aom_reader_init(aom_reader *r, const uint8_t *buffer,
                                   size_t size, aom_decrypt_cb decrypt_cb,
                                   void *decrypt_state) {
+#if CONFIG_ANS
+  (void)decrypt_cb;
+  (void)decrypt_state;
+  assert(size <= INT_MAX);
+  return ans_read_init(r, buffer, size);
+#else
   return aom_dk_reader_init(r, buffer, size, decrypt_cb, decrypt_state);
+#endif
 }
 
 static INLINE const uint8_t *aom_reader_find_end(aom_reader *r) {
+#if CONFIG_ANS
+  (void)r;
+  assert(0 && "Use the raw buffer size with ANS");
+  return NULL;
+#else
   return aom_dk_reader_find_end(r);
+#endif
 }
 
 static INLINE int aom_reader_has_error(aom_reader *r) {
+#if CONFIG_ANS
+  return ans_reader_has_error(r);
+#else
   return aom_dk_reader_has_error(r);
+#endif
 }
 
 static INLINE int aom_read(aom_reader *r, int prob) {
+#if CONFIG_ANS
+  return uabs_read(r, prob);
+#else
   return aom_dk_read(r, prob);
+#endif
 }
 
-static INLINE int aom_read_bit(aom_reader *r) { return aom_dk_read_bit(r); }
+static INLINE int aom_read_bit(aom_reader *r) {
+#if CONFIG_ANS
+  return uabs_read_bit(r);  // Non trivial optimization at half probability
+#else
+  return aom_read(r, 128);  // aom_prob_half
+#endif
+}
 
 static INLINE int aom_read_literal(aom_reader *r, int bits) {
-  return aom_dk_read_literal(r, bits);
+  int literal = 0, bit;
+
+  for (bit = bits - 1; bit >= 0; bit--) literal |= aom_read_bit(r) << bit;
+
+  return literal;
 }
 
 static INLINE int aom_read_tree(aom_reader *r, const aom_tree_index *tree,
                                 const aom_prob *probs) {
-  return aom_dk_read_tree(r, tree, probs);
+  aom_tree_index i = 0;
+
+  while ((i = tree[i + aom_read(r, probs[i >> 1])]) > 0) continue;
+
+  return -i;
 }
 
 #ifdef __cplusplus
diff --git a/aom_dsp/buf_ans.c b/aom_dsp/buf_ans.c
index a62aaba..1386722 100644
--- a/aom_dsp/buf_ans.c
+++ b/aom_dsp/buf_ans.c
@@ -1,11 +1,12 @@
 /*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
  *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
 #include <string.h>
diff --git a/aom_dsp/buf_ans.h b/aom_dsp/buf_ans.h
index b3fdad9..58d6e61 100644
--- a/aom_dsp/buf_ans.h
+++ b/aom_dsp/buf_ans.h
@@ -1,23 +1,25 @@
 /*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
  *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
 #ifndef AOM_DSP_BUF_ANS_H_
 #define AOM_DSP_BUF_ANS_H_
 // Buffered forward ANS writer.
-// Symbols are written to the writer in forward (decode) order and serialzed
+// Symbols are written to the writer in forward (decode) order and serialized
 // backwards due to ANS's stack like behavior.
 
 #include <assert.h>
 #include "./aom_config.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/ans.h"
+#include "aom_dsp/answriter.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -26,11 +28,14 @@
 #define ANS_METHOD_UABS 0
 #define ANS_METHOD_RANS 1
 
+struct aom_internal_error_info *error;
+
 struct buffered_ans_symbol {
-  uint8_t method;  // one of ANS_METHOD_UABS or ANS_METHOD_RANS
-  // TODO(aconverse): Should be possible to write this interms of start for ABS
-  AnsP10 val_start;  // Boolean value for ABS, start in symbol cycle for Rans
-  AnsP10 prob;       // Probability of this symbol
+  unsigned int method : 1;  // one of ANS_METHOD_UABS or ANS_METHOD_RANS
+  // TODO(aconverse): Should be possible to write this in terms of start for ABS
+  unsigned int val_start : RANS_PROB_BITS;  // Boolean value for ABS
+                                            // start in symbol cycle for Rans
+  unsigned int prob : RANS_PROB_BITS;       // Probability of this symbol
 };
 
 struct BufAnsCoder {
diff --git a/aom_dsp/divide.c b/aom_dsp/divide.c
deleted file mode 100644
index 3e58da5..0000000
--- a/aom_dsp/divide.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "aom_dsp/divide.h"
-
-/* Constants for divide by multiply for small divisors generated with:
-void init_fastdiv() {
-  int i;
-  for (i = 3; i < 256; ++i) {
-    const int s = 31 ^ __builtin_clz(2 * i + 1);
-    const unsigned long long base = (1ull << (sizeof(unsigned) * 8 + s)) - 1;
-    fastdiv_tab[i].mult = (base / i + 1) & 0xFFFFFFFF;
-    fastdiv_tab[i].shift = s;
-  }
-  for (i = 0; i < 8; ++i) {
-    fastdiv_tab[1 << i].mult = 0;
-    fastdiv_tab[1 << i].shift = i;
-  }
-}
-*/
-const struct fastdiv_elem av1_fastdiv_tab[256] = {
-  { 0, 0 },           { 0, 0 },           { 0, 1 },
-  { 1431655766, 2 },  { 0, 2 },           { 2576980378u, 3 },
-  { 1431655766, 3 },  { 613566757, 3 },   { 0, 3 },
-  { 3340530120u, 4 }, { 2576980378u, 4 }, { 1952257862, 4 },
-  { 1431655766, 4 },  { 991146300, 4 },   { 613566757, 4 },
-  { 286331154u, 4 },  { 0, 4 },           { 3789677026u, 5 },
-  { 3340530120u, 5 }, { 2938661835u, 5 }, { 2576980378u, 5 },
-  { 2249744775u, 5 }, { 1952257862, 5 },  { 1680639377, 5 },
-  { 1431655766, 5 },  { 1202590843, 5 },  { 991146300, 5 },
-  { 795364315, 5 },   { 613566757, 5 },   { 444306962, 5 },
-  { 286331154, 5 },   { 138547333, 5 },   { 0, 5 },
-  { 4034666248u, 6 }, { 3789677026u, 6 }, { 3558687189u, 6 },
-  { 3340530120u, 6 }, { 3134165325u, 6 }, { 2938661835u, 6 },
-  { 2753184165u, 6 }, { 2576980378u, 6 }, { 2409371898u, 6 },
-  { 2249744775u, 6 }, { 2097542168u, 6 }, { 1952257862, 6 },
-  { 1813430637, 6 },  { 1680639377, 6 },  { 1553498810, 6 },
-  { 1431655766, 6 },  { 1314785907, 6 },  { 1202590843, 6 },
-  { 1094795586, 6 },  { 991146300, 6 },   { 891408307, 6 },
-  { 795364315, 6 },   { 702812831, 6 },   { 613566757, 6 },
-  { 527452125, 6 },   { 444306962, 6 },   { 363980280, 6 },
-  { 286331154, 6 },   { 211227900, 6 },   { 138547333, 6 },
-  { 68174085, 6 },    { 0, 6 },           { 4162814457u, 7 },
-  { 4034666248u, 7 }, { 3910343360u, 7 }, { 3789677026u, 7 },
-  { 3672508268u, 7 }, { 3558687189u, 7 }, { 3448072337u, 7 },
-  { 3340530120u, 7 }, { 3235934265u, 7 }, { 3134165325u, 7 },
-  { 3035110223u, 7 }, { 2938661835u, 7 }, { 2844718599u, 7 },
-  { 2753184165u, 7 }, { 2663967058u, 7 }, { 2576980378u, 7 },
-  { 2492141518u, 7 }, { 2409371898u, 7 }, { 2328596727u, 7 },
-  { 2249744775u, 7 }, { 2172748162u, 7 }, { 2097542168, 7 },
-  { 2024065048, 7 },  { 1952257862, 7 },  { 1882064321, 7 },
-  { 1813430637, 7 },  { 1746305385, 7 },  { 1680639377, 7 },
-  { 1616385542, 7 },  { 1553498810, 7 },  { 1491936009, 7 },
-  { 1431655766, 7 },  { 1372618415, 7 },  { 1314785907, 7 },
-  { 1258121734, 7 },  { 1202590843, 7 },  { 1148159575, 7 },
-  { 1094795586, 7 },  { 1042467791, 7 },  { 991146300, 7 },
-  { 940802361, 7 },   { 891408307, 7 },   { 842937507, 7 },
-  { 795364315, 7 },   { 748664025, 7 },   { 702812831, 7 },
-  { 657787785, 7 },   { 613566757, 7 },   { 570128403, 7 },
-  { 527452125, 7 },   { 485518043, 7 },   { 444306962, 7 },
-  { 403800345, 7 },   { 363980280, 7 },   { 324829460, 7 },
-  { 286331154, 7 },   { 248469183, 7 },   { 211227900, 7 },
-  { 174592167, 7 },   { 138547333, 7 },   { 103079216, 7 },
-  { 68174085, 7 },    { 33818641, 7 },    { 0, 7 },
-  { 4228378656u, 8 }, { 4162814457u, 8 }, { 4098251237u, 8 },
-  { 4034666248u, 8 }, { 3972037425u, 8 }, { 3910343360u, 8 },
-  { 3849563281u, 8 }, { 3789677026u, 8 }, { 3730665024u, 8 },
-  { 3672508268u, 8 }, { 3615188300u, 8 }, { 3558687189u, 8 },
-  { 3502987511u, 8 }, { 3448072337u, 8 }, { 3393925206u, 8 },
-  { 3340530120u, 8 }, { 3287871517u, 8 }, { 3235934265u, 8 },
-  { 3184703642u, 8 }, { 3134165325u, 8 }, { 3084305374u, 8 },
-  { 3035110223u, 8 }, { 2986566663u, 8 }, { 2938661835u, 8 },
-  { 2891383213u, 8 }, { 2844718599u, 8 }, { 2798656110u, 8 },
-  { 2753184165u, 8 }, { 2708291480u, 8 }, { 2663967058u, 8 },
-  { 2620200175u, 8 }, { 2576980378u, 8 }, { 2534297473u, 8 },
-  { 2492141518u, 8 }, { 2450502814u, 8 }, { 2409371898u, 8 },
-  { 2368739540u, 8 }, { 2328596727u, 8 }, { 2288934667u, 8 },
-  { 2249744775u, 8 }, { 2211018668u, 8 }, { 2172748162u, 8 },
-  { 2134925265u, 8 }, { 2097542168, 8 },  { 2060591247, 8 },
-  { 2024065048, 8 },  { 1987956292, 8 },  { 1952257862, 8 },
-  { 1916962805, 8 },  { 1882064321, 8 },  { 1847555765, 8 },
-  { 1813430637, 8 },  { 1779682582, 8 },  { 1746305385, 8 },
-  { 1713292966, 8 },  { 1680639377, 8 },  { 1648338801, 8 },
-  { 1616385542, 8 },  { 1584774030, 8 },  { 1553498810, 8 },
-  { 1522554545, 8 },  { 1491936009, 8 },  { 1461638086, 8 },
-  { 1431655766, 8 },  { 1401984144, 8 },  { 1372618415, 8 },
-  { 1343553873, 8 },  { 1314785907, 8 },  { 1286310003, 8 },
-  { 1258121734, 8 },  { 1230216764, 8 },  { 1202590843, 8 },
-  { 1175239808, 8 },  { 1148159575, 8 },  { 1121346142, 8 },
-  { 1094795586, 8 },  { 1068504060, 8 },  { 1042467791, 8 },
-  { 1016683080, 8 },  { 991146300, 8 },   { 965853890, 8 },
-  { 940802361, 8 },   { 915988286, 8 },   { 891408307, 8 },
-  { 867059126, 8 },   { 842937507, 8 },   { 819040276, 8 },
-  { 795364315, 8 },   { 771906565, 8 },   { 748664025, 8 },
-  { 725633745, 8 },   { 702812831, 8 },   { 680198441, 8 },
-  { 657787785, 8 },   { 635578121, 8 },   { 613566757, 8 },
-  { 591751050, 8 },   { 570128403, 8 },   { 548696263, 8 },
-  { 527452125, 8 },   { 506393524, 8 },   { 485518043, 8 },
-  { 464823301, 8 },   { 444306962, 8 },   { 423966729, 8 },
-  { 403800345, 8 },   { 383805589, 8 },   { 363980280, 8 },
-  { 344322273, 8 },   { 324829460, 8 },   { 305499766, 8 },
-  { 286331154, 8 },   { 267321616, 8 },   { 248469183, 8 },
-  { 229771913, 8 },   { 211227900, 8 },   { 192835267, 8 },
-  { 174592167, 8 },   { 156496785, 8 },   { 138547333, 8 },
-  { 120742053, 8 },   { 103079216, 8 },   { 85557118, 8 },
-  { 68174085, 8 },    { 50928466, 8 },    { 33818641, 8 },
-  { 16843010, 8 },
-};
diff --git a/aom_dsp/divide.h b/aom_dsp/divide.h
deleted file mode 100644
index c92a58f..0000000
--- a/aom_dsp/divide.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef AOM_DSP_DIVIDE_H_
-#define AOM_DSP_DIVIDE_H_
-// An implemntation of the divide by multiply alogrithm
-// https://gmplib.org/~tege/divcnst-pldi94.pdf
-
-#include <limits.h>
-
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-struct fastdiv_elem {
-  unsigned mult;
-  unsigned shift;
-};
-
-extern const struct fastdiv_elem av1_fastdiv_tab[256];
-
-static INLINE unsigned fastdiv(unsigned x, int y) {
-  unsigned t =
-      ((uint64_t)x * av1_fastdiv_tab[y].mult) >> (sizeof(x) * CHAR_BIT);
-  return (t + x) >> av1_fastdiv_tab[y].shift;
-}
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
-#endif  // AOM_DSP_DIVIDE_H_
diff --git a/aom_dsp/dkboolreader.c b/aom_dsp/dkboolreader.c
index c26d90b..8ec7ffc 100644
--- a/aom_dsp/dkboolreader.c
+++ b/aom_dsp/dkboolreader.c
@@ -18,6 +18,10 @@
 #include "aom_mem/aom_mem.h"
 #include "aom_util/endian_inl.h"
 
+static INLINE int aom_dk_read_bit(struct aom_dk_reader *r) {
+  return aom_dk_read(r, 128);  // aom_prob_half
+}
+
 int aom_dk_reader_init(struct aom_dk_reader *r, const uint8_t *buffer,
                        size_t size, aom_decrypt_cb decrypt_cb,
                        void *decrypt_state) {
diff --git a/aom_dsp/dkboolreader.h b/aom_dsp/dkboolreader.h
index 531c5dc..fe68ecc 100644
--- a/aom_dsp/dkboolreader.h
+++ b/aom_dsp/dkboolreader.h
@@ -135,28 +135,6 @@
   return bit;
 }
 
-static INLINE int aom_dk_read_bit(struct aom_dk_reader *r) {
-  return aom_dk_read(r, 128);  // aom_prob_half
-}
-
-static INLINE int aom_dk_read_literal(struct aom_dk_reader *r, int bits) {
-  int literal = 0, bit;
-
-  for (bit = bits - 1; bit >= 0; bit--) literal |= aom_dk_read_bit(r) << bit;
-
-  return literal;
-}
-
-static INLINE int aom_dk_read_tree(struct aom_dk_reader *r,
-                                   const aom_tree_index *tree,
-                                   const aom_prob *probs) {
-  aom_tree_index i = 0;
-
-  while ((i = tree[i + aom_dk_read(r, probs[i >> 1])]) > 0) continue;
-
-  return -i;
-}
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/aom_dsp/dkboolwriter.c b/aom_dsp/dkboolwriter.c
index 259316c..238f37c 100644
--- a/aom_dsp/dkboolwriter.c
+++ b/aom_dsp/dkboolwriter.c
@@ -12,6 +12,10 @@
 
 #include "./dkboolwriter.h"
 
+static INLINE void aom_dk_write_bit(aom_dk_writer *w, int bit) {
+  aom_dk_write(w, bit, 128);  // aom_prob_half
+}
+
 void aom_dk_start_encode(aom_dk_writer *br, uint8_t *source) {
   br->lowvalue = 0;
   br->range = 255;
diff --git a/aom_dsp/dkboolwriter.h b/aom_dsp/dkboolwriter.h
index 8475238..8354368 100644
--- a/aom_dsp/dkboolwriter.h
+++ b/aom_dsp/dkboolwriter.h
@@ -97,16 +97,6 @@
   br->range = range;
 }
 
-static INLINE void aom_dk_write_bit(aom_dk_writer *w, int bit) {
-  aom_dk_write(w, bit, 128);  // aom_prob_half
-}
-
-static INLINE void aom_dk_write_literal(aom_dk_writer *w, int data, int bits) {
-  int bit;
-
-  for (bit = bits - 1; bit >= 0; bit--) aom_dk_write_bit(w, 1 & (data >> bit));
-}
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index 9976e7a..26ef9c5 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -82,7 +82,7 @@
 AV1_COMMON_SRCS-yes += common/av1_convolve.h
 AV1_COMMON_SRCS-$(CONFIG_LOOP_RESTORATION) += common/restoration.h
 AV1_COMMON_SRCS-$(CONFIG_LOOP_RESTORATION) += common/restoration.c
-ifeq (yes,$(filter yes,$(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION)))
+ifeq (yes,$(filter $(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION),yes))
 AV1_COMMON_SRCS-yes += common/warped_motion.h
 AV1_COMMON_SRCS-yes += common/warped_motion.c
 endif
diff --git a/av1/av1_cx.mk b/av1/av1_cx.mk
index ecbe2b3..2bb405c 100644
--- a/av1/av1_cx.mk
+++ b/av1/av1_cx.mk
@@ -19,7 +19,6 @@
 AV1_CX_SRCS-yes += av1_cx_iface.c
 
 AV1_CX_SRCS-yes += encoder/bitstream.c
-AV1_CX_SRCS-yes += encoder/bitwriter.h
 AV1_CX_SRCS-yes += encoder/context_tree.c
 AV1_CX_SRCS-yes += encoder/context_tree.h
 AV1_CX_SRCS-yes += encoder/variance_tree.c
diff --git a/av1/av1_dx.mk b/av1/av1_dx.mk
index 1ebf5fb..36eec30 100644
--- a/av1/av1_dx.mk
+++ b/av1/av1_dx.mk
@@ -30,6 +30,5 @@
 AV1_DX_SRCS-yes += decoder/decoder.h
 AV1_DX_SRCS-yes += decoder/dsubexp.c
 AV1_DX_SRCS-yes += decoder/dsubexp.h
-AV1_DX_SRCS-yes += decoder/bitreader.h
 
 AV1_DX_SRCS-yes := $(filter-out $(AV1_DX_SRCS_REMOVE-yes),$(AV1_DX_SRCS-yes))
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 94eb089..266f8fe 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -516,8 +516,10 @@
   return LUT[bsize];
 }
 
-static INLINE int is_rect_tx_allowed(const MB_MODE_INFO *mbmi) {
-  return is_inter_block(mbmi) && is_rect_tx_allowed_bsize(mbmi->sb_type);
+static INLINE int is_rect_tx_allowed(const MACROBLOCKD *xd,
+                                     const MB_MODE_INFO *mbmi) {
+  return is_inter_block(mbmi) && is_rect_tx_allowed_bsize(mbmi->sb_type) &&
+         !xd->lossless[mbmi->segment_id];
 }
 
 static INLINE int is_rect_tx(TX_SIZE tx_size) { return tx_size >= TX_SIZES; }
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index 25ac3ed..c20a701 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -2806,10 +2806,10 @@
   assert(pdf_model[2] != 0);
   // TODO(aconverse): Investigate making the precision of the zero and EOB tree
   // nodes 10-bits.
-  rans_merge_prob8_pdf(pdf_tab, pdf_model[1],
-                       av1_pareto8_token_probs[pdf_model[2] - 1],
-                       ENTROPY_TOKENS - 2);
-  rans_build_cdf_from_pdf(pdf_tab, cdf);
+  aom_rans_merge_prob8_pdf(pdf_tab, pdf_model[1],
+                           av1_pareto8_token_probs[pdf_model[2] - 1],
+                           ENTROPY_TOKENS - 2);
+  aom_rans_build_cdf_from_pdf(pdf_tab, cdf);
 }
 
 void av1_coef_pareto_cdfs(FRAME_CONTEXT *fc) {
diff --git a/av1/common/quant_common.c b/av1/common/quant_common.c
index 4b51016..4f3d481 100644
--- a/av1/common/quant_common.c
+++ b/av1/common/quant_common.c
@@ -20,8 +20,8 @@
 static void make_qmatrices(qm_val_t *wmatrix[NUM_QM_LEVELS][2][2][TX_SIZES],
                            qm_val_t *iwmatrix[NUM_QM_LEVELS][2][2][TX_SIZES]);
 #endif
-#if CONFIG_NEW_QUANT
 
+#if CONFIG_NEW_QUANT
 // Bin widths expressed as a fraction over 128 of the quant stepsize,
 // for the quantization bins 0-4.
 // So a value x indicates the bin is actually factor x/128 of the
@@ -47,54 +47,54 @@
 
 static const qprofile_type nuq[QUANT_PROFILES][QUANT_RANGES][COEF_BANDS] = {
   { {
-        { { 64, 128, 128 }, 0 },  // dc, band 0
-        { { 64, 128, 128 }, 0 },  // band 1
-        { { 64, 128, 128 }, 0 },  // band 2
-        { { 64, 128, 128 }, 0 },  // band 3
-        { { 64, 128, 128 }, 0 },  // band 4
-        { { 64, 128, 128 }, 0 }   // band 5
+        { { 64, 128, 128 }, 8 },   // dc, band 0
+        { { 64, 128, 128 }, 10 },   // band 1
+        { { 64, 128, 128 }, 12 },  // band 2
+        { { 72, 128, 128 }, 14 },  // band 3
+        { { 76, 128, 128 }, 16 },  // band 4
+        { { 80, 128, 128 }, 18 }   // band 5
     },
     {
-        { { 64, 128, 128 }, 0 },  // dc, band 0
-        { { 64, 128, 128 }, 0 },  // band 1
-        { { 64, 128, 128 }, 0 },  // band 2
-        { { 64, 128, 128 }, 0 },  // band 3
-        { { 64, 128, 128 }, 0 },  // band 4
-        { { 64, 128, 128 }, 0 }   // band 5
+        { { 64, 128, 128 }, 4 },   // dc, band 0
+        { { 64, 128, 128 }, 6 },  // band 1
+        { { 64, 128, 128 }, 8 },  // band 2
+        { { 64, 128, 128 }, 10 },  // band 3
+        { { 72, 128, 128 }, 12 },  // band 4
+        { { 80, 128, 128 }, 14 }   // band 5
     } },
 #if QUANT_PROFILES > 1
   { {
-        { { 64, 128, 128 }, 0 },  // dc, band 0
-        { { 64, 128, 128 }, 0 },  // band 1
-        { { 64, 128, 128 }, 0 },  // band 2
-        { { 64, 128, 128 }, 0 },  // band 3
-        { { 64, 128, 128 }, 0 },  // band 4
-        { { 64, 128, 128 }, 0 }   // band 5
+        { { 64, 128, 128 }, 6 },   // dc, band 0
+        { { 64, 128, 128 }, 8 },   // band 1
+        { { 64, 128, 128 }, 10 },  // band 2
+        { { 64, 128, 128 }, 12 },  // band 3
+        { { 72, 128, 128 }, 14 },  // band 4
+        { { 80, 128, 128 }, 16 }   // band 5
     },
     {
-        { { 64, 128, 128 }, 0 },  // dc, band 0
-        { { 64, 128, 128 }, 0 },  // band 1
-        { { 64, 128, 128 }, 0 },  // band 2
-        { { 64, 128, 128 }, 0 },  // band 3
-        { { 64, 128, 128 }, 0 },  // band 4
-        { { 64, 128, 128 }, 0 }   // band 5
+        { { 64, 128, 128 }, 4 },   // dc, band 0
+        { { 64, 128, 128 }, 6 },   // band 1
+        { { 64, 128, 128 }, 8 },  // band 2
+        { { 64, 128, 128 }, 10 },  // band 3
+        { { 72, 128, 128 }, 12 },  // band 4
+        { { 80, 128, 128 }, 14 }   // band 5
     } },
 #if QUANT_PROFILES > 2
   { {
-        { { 64, 128, 128 }, 0 },  // dc, band 0
-        { { 64, 128, 128 }, 0 },  // band 1
-        { { 64, 128, 128 }, 0 },  // band 2
-        { { 64, 128, 128 }, 0 },  // band 3
-        { { 64, 128, 128 }, 0 },  // band 4
-        { { 64, 128, 128 }, 0 },  // band 5
+        { { 64, 128, 128 }, 6 },   // dc, band 0
+        { { 64, 128, 128 }, 8 },   // band 1
+        { { 64, 128, 128 }, 10 },  // band 2
+        { { 64, 128, 128 }, 12 },  // band 3
+        { { 72, 128, 128 }, 14 },  // band 4
+        { { 80, 128, 128 }, 16 }   // band 5
     },
     {
-        { { 64, 128, 128 }, 0 },  // dc, band 0
-        { { 64, 128, 128 }, 0 },  // band 1
-        { { 64, 128, 128 }, 0 },  // band 2
-        { { 64, 128, 128 }, 0 },  // band 3
-        { { 64, 128, 128 }, 0 },  // band 4
-        { { 64, 128, 128 }, 0 },  // band 5
+        { { 64, 128, 128 }, 4 },   // dc, band 0
+        { { 64, 128, 128 }, 6 },   // band 1
+        { { 64, 128, 128 }, 8 },  // band 2
+        { { 64, 128, 128 }, 10 },  // band 3
+        { { 72, 128, 128 }, 12 },  // band 4
+        { { 80, 128, 128 }, 14 }   // band 5
     } }
 #endif  // QUANT_PROFILES > 2
 #endif  // QUANT_PROFILES > 1
diff --git a/av1/common/quant_common.h b/av1/common/quant_common.h
index a2607bf..b4d615b 100644
--- a/av1/common/quant_common.h
+++ b/av1/common/quant_common.h
@@ -70,7 +70,10 @@
 tran_low_t av1_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq);
 tran_low_t av1_dequant_coeff_nuq(int v, int q, const tran_low_t *dq);
 
-static INLINE int get_dq_profile_from_ctx(int q_ctx) {
+static INLINE int get_dq_profile_from_ctx(int q_ctx, int is_inter,
+                                          PLANE_TYPE plane_type) {
+  if (plane_type == PLANE_TYPE_UV) return 0;
+  if (!is_inter) return QUANT_PROFILES - 1;
   return AOMMIN(q_ctx, QUANT_PROFILES - 1);
 }
 #endif  // CONFIG_NEW_QUANT
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 167cb66..fc632c3 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -632,3 +632,587 @@
     default: assert(0 && "Invalid TransformationType");
   }
 }
+
+///////////////////////////////////////////////////////////////////////////////
+// svdcmp
+// Adopted from Numerical Recipes in C
+
+static const double TINY_NEAR_ZERO = 1.0E-12;
+
+static INLINE double sign(double a, double b) {
+  return ((b) >= 0 ? fabs(a) : -fabs(a));
+}
+
+static INLINE double pythag(double a, double b) {
+  double ct;
+  const double absa = fabs(a);
+  const double absb = fabs(b);
+
+  if (absa > absb) {
+    ct = absb / absa;
+    return absa * sqrt(1.0 + ct * ct);
+  } else {
+    ct = absa / absb;
+    return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
+  }
+}
+
+static void multiply_mat(const double *m1, const double *m2, double *res,
+                         const int m1_rows, const int inner_dim,
+                         const int m2_cols) {
+  double sum;
+
+  int row, col, inner;
+  for (row = 0; row < m1_rows; ++row) {
+    for (col = 0; col < m2_cols; ++col) {
+      sum = 0;
+      for (inner = 0; inner < inner_dim; ++inner)
+        sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
+      *(res++) = sum;
+    }
+  }
+}
+
+static int svdcmp(double **u, int m, int n, double w[], double **v) {
+  const int max_its = 30;
+  int flag, i, its, j, jj, k, l, nm;
+  double anorm, c, f, g, h, s, scale, x, y, z;
+  double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
+  g = scale = anorm = 0.0;
+  for (i = 0; i < n; i++) {
+    l = i + 1;
+    rv1[i] = scale * g;
+    g = s = scale = 0.0;
+    if (i < m) {
+      for (k = i; k < m; k++) scale += fabs(u[k][i]);
+      if (scale) {
+        for (k = i; k < m; k++) {
+          u[k][i] /= scale;
+          s += u[k][i] * u[k][i];
+        }
+        f = u[i][i];
+        g = -sign(sqrt(s), f);
+        h = f * g - s;
+        u[i][i] = f - g;
+        for (j = l; j < n; j++) {
+          for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
+          f = s / h;
+          for (k = i; k < m; k++) u[k][j] += f * u[k][i];
+        }
+        for (k = i; k < m; k++) u[k][i] *= scale;
+      }
+    }
+    w[i] = scale * g;
+    g = s = scale = 0.0;
+    if (i < m && i != n - 1) {
+      for (k = l; k < n; k++) scale += fabs(u[i][k]);
+      if (scale) {
+        for (k = l; k < n; k++) {
+          u[i][k] /= scale;
+          s += u[i][k] * u[i][k];
+        }
+        f = u[i][l];
+        g = -sign(sqrt(s), f);
+        h = f * g - s;
+        u[i][l] = f - g;
+        for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
+        for (j = l; j < m; j++) {
+          for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
+          for (k = l; k < n; k++) u[j][k] += s * rv1[k];
+        }
+        for (k = l; k < n; k++) u[i][k] *= scale;
+      }
+    }
+    anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
+  }
+
+  for (i = n - 1; i >= 0; i--) {
+    if (i < n - 1) {
+      if (g) {
+        for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
+        for (j = l; j < n; j++) {
+          for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
+          for (k = l; k < n; k++) v[k][j] += s * v[k][i];
+        }
+      }
+      for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
+    }
+    v[i][i] = 1.0;
+    g = rv1[i];
+    l = i;
+  }
+  for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
+    l = i + 1;
+    g = w[i];
+    for (j = l; j < n; j++) u[i][j] = 0.0;
+    if (g) {
+      g = 1.0 / g;
+      for (j = l; j < n; j++) {
+        for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
+        f = (s / u[i][i]) * g;
+        for (k = i; k < m; k++) u[k][j] += f * u[k][i];
+      }
+      for (j = i; j < m; j++) u[j][i] *= g;
+    } else {
+      for (j = i; j < m; j++) u[j][i] = 0.0;
+    }
+    ++u[i][i];
+  }
+  for (k = n - 1; k >= 0; k--) {
+    for (its = 0; its < max_its; its++) {
+      flag = 1;
+      for (l = k; l >= 0; l--) {
+        nm = l - 1;
+        if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
+          flag = 0;
+          break;
+        }
+        if ((double)(fabs(w[nm]) + anorm) == anorm) break;
+      }
+      if (flag) {
+        c = 0.0;
+        s = 1.0;
+        for (i = l; i <= k; i++) {
+          f = s * rv1[i];
+          rv1[i] = c * rv1[i];
+          if ((double)(fabs(f) + anorm) == anorm) break;
+          g = w[i];
+          h = pythag(f, g);
+          w[i] = h;
+          h = 1.0 / h;
+          c = g * h;
+          s = -f * h;
+          for (j = 0; j < m; j++) {
+            y = u[j][nm];
+            z = u[j][i];
+            u[j][nm] = y * c + z * s;
+            u[j][i] = z * c - y * s;
+          }
+        }
+      }
+      z = w[k];
+      if (l == k) {
+        if (z < 0.0) {
+          w[k] = -z;
+          for (j = 0; j < n; j++) v[j][k] = -v[j][k];
+        }
+        break;
+      }
+      if (its == max_its - 1) {
+        return 1;
+      }
+      assert(k > 0);
+      x = w[l];
+      nm = k - 1;
+      y = w[nm];
+      g = rv1[nm];
+      h = rv1[k];
+      f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
+      g = pythag(f, 1.0);
+      f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
+      c = s = 1.0;
+      for (j = l; j <= nm; j++) {
+        i = j + 1;
+        g = rv1[i];
+        y = w[i];
+        h = s * g;
+        g = c * g;
+        z = pythag(f, h);
+        rv1[j] = z;
+        c = f / z;
+        s = h / z;
+        f = x * c + g * s;
+        g = g * c - x * s;
+        h = y * s;
+        y *= c;
+        for (jj = 0; jj < n; jj++) {
+          x = v[jj][j];
+          z = v[jj][i];
+          v[jj][j] = x * c + z * s;
+          v[jj][i] = z * c - x * s;
+        }
+        z = pythag(f, h);
+        w[j] = z;
+        if (z) {
+          z = 1.0 / z;
+          c = f * z;
+          s = h * z;
+        }
+        f = c * g + s * y;
+        x = c * y - s * g;
+        for (jj = 0; jj < m; jj++) {
+          y = u[jj][j];
+          z = u[jj][i];
+          u[jj][j] = y * c + z * s;
+          u[jj][i] = z * c - y * s;
+        }
+      }
+      rv1[l] = 0.0;
+      rv1[k] = f;
+      w[k] = x;
+    }
+  }
+  aom_free(rv1);
+  return 0;
+}
+
+static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
+  // Assumes allocation for U is MxN
+  double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
+  double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
+  int problem, i;
+
+  problem = !(nrU && nrV);
+  if (!problem) {
+    for (i = 0; i < M; i++) {
+      nrU[i] = &U[i * N];
+    }
+    for (i = 0; i < N; i++) {
+      nrV[i] = &V[i * N];
+    }
+  } else {
+    if (nrU) aom_free(nrU);
+    if (nrV) aom_free(nrV);
+    return 1;
+  }
+
+  /* copy from given matx into nrU */
+  for (i = 0; i < M; i++) {
+    memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
+  }
+
+  /* HERE IT IS: do SVD */
+  if (svdcmp(nrU, M, N, W, nrV)) {
+    aom_free(nrU);
+    aom_free(nrV);
+    return 1;
+  }
+
+  /* aom_free Numerical Recipes arrays */
+  aom_free(nrU);
+  aom_free(nrV);
+
+  return 0;
+}
+
+int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
+  double ans;
+  int i, j, k;
+  double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
+  double *const W = (double *)aom_malloc(N * sizeof(*matx));
+  double *const V = (double *)aom_malloc(N * N * sizeof(*matx));
+
+  if (!(U && W && V)) {
+    return 1;
+  }
+  if (SVD(U, W, V, matx, M, N)) {
+    return 1;
+  }
+  for (i = 0; i < N; i++) {
+    if (fabs(W[i]) < TINY_NEAR_ZERO) {
+      return 1;
+    }
+  }
+
+  for (i = 0; i < N; i++) {
+    for (j = 0; j < M; j++) {
+      ans = 0;
+      for (k = 0; k < N; k++) {
+        ans += V[k + N * i] * U[k + N * j] / W[k];
+      }
+      inv[j + M * i] = ans;
+    }
+  }
+  aom_free(U);
+  aom_free(W);
+  aom_free(V);
+  return 0;
+}
+
+static void normalize_homography(double *pts, int n, double *T) {
+  // Assume the points are 2d coordinates with scale = 1
+  double *p = pts;
+  double mean[2] = { 0, 0 };
+  double msqe = 0;
+  double scale;
+  int i;
+  for (i = 0; i < n; ++i, p += 2) {
+    mean[0] += p[0];
+    mean[1] += p[1];
+  }
+  mean[0] /= n;
+  mean[1] /= n;
+  for (p = pts, i = 0; i < n; ++i, p += 2) {
+    p[0] -= mean[0];
+    p[1] -= mean[1];
+    msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
+  }
+  msqe /= n;
+  scale = sqrt(2) / msqe;
+  T[0] = scale;
+  T[1] = 0;
+  T[2] = -scale * mean[0];
+  T[3] = 0;
+  T[4] = scale;
+  T[5] = -scale * mean[1];
+  T[6] = 0;
+  T[7] = 0;
+  T[8] = 1;
+  for (p = pts, i = 0; i < n; ++i, p += 2) {
+    p[0] *= scale;
+    p[1] *= scale;
+  }
+}
+
+static void invnormalize_mat(double *T, double *iT) {
+  double is = 1.0 / T[0];
+  double m0 = -T[2] * is;
+  double m1 = -T[5] * is;
+  iT[0] = is;
+  iT[1] = 0;
+  iT[2] = m0;
+  iT[3] = 0;
+  iT[4] = is;
+  iT[5] = m1;
+  iT[6] = 0;
+  iT[7] = 0;
+  iT[8] = 1;
+}
+
+static void denormalize_homography(double *params, double *T1, double *T2) {
+  double iT2[9];
+  double params2[9];
+  invnormalize_mat(T2, iT2);
+  multiply_mat(params, T1, params2, 3, 3, 3);
+  multiply_mat(iT2, params2, params, 3, 3, 3);
+}
+
+static void denormalize_affine(double *params, double *T1, double *T2) {
+  double params_denorm[MAX_PARAMDIM];
+  params_denorm[0] = params[0];
+  params_denorm[1] = params[1];
+  params_denorm[2] = params[4];
+  params_denorm[3] = params[2];
+  params_denorm[4] = params[3];
+  params_denorm[5] = params[5];
+  params_denorm[6] = params_denorm[7] = 0;
+  params_denorm[8] = 1;
+  denormalize_homography(params_denorm, T1, T2);
+  params[0] = params_denorm[5];
+  params[1] = params_denorm[2];
+  params[2] = params_denorm[1];
+  params[3] = params_denorm[0];
+  params[4] = params_denorm[3];
+  params[5] = params_denorm[4];
+}
+
+static void denormalize_rotzoom(double *params, double *T1, double *T2) {
+  double params_denorm[MAX_PARAMDIM];
+  params_denorm[0] = params[0];
+  params_denorm[1] = params[1];
+  params_denorm[2] = params[2];
+  params_denorm[3] = -params[1];
+  params_denorm[4] = params[0];
+  params_denorm[5] = params[3];
+  params_denorm[6] = params_denorm[7] = 0;
+  params_denorm[8] = 1;
+  denormalize_homography(params_denorm, T1, T2);
+  params[0] = params_denorm[5];
+  params[1] = params_denorm[2];
+  params[2] = params_denorm[1];
+  params[3] = params_denorm[0];
+}
+
+static void denormalize_translation(double *params, double *T1, double *T2) {
+  double params_denorm[MAX_PARAMDIM];
+  params_denorm[0] = 1;
+  params_denorm[1] = 0;
+  params_denorm[2] = params[0];
+  params_denorm[3] = 0;
+  params_denorm[4] = 1;
+  params_denorm[5] = params[1];
+  params_denorm[6] = params_denorm[7] = 0;
+  params_denorm[8] = 1;
+  denormalize_homography(params_denorm, T1, T2);
+  params[0] = params_denorm[5];
+  params[1] = params_denorm[2];
+}
+
+int find_translation(const int np, double *pts1, double *pts2, double *mat) {
+  int i;
+  double sx, sy, dx, dy;
+  double sumx, sumy;
+
+  double T1[9], T2[9];
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
+
+  sumx = 0;
+  sumy = 0;
+  for (i = 0; i < np; ++i) {
+    dx = *(pts2++);
+    dy = *(pts2++);
+    sx = *(pts1++);
+    sy = *(pts1++);
+
+    sumx += dx - sx;
+    sumy += dy - sy;
+  }
+  mat[0] = sumx / np;
+  mat[1] = sumy / np;
+  denormalize_translation(mat, T1, T2);
+  return 0;
+}
+
+int find_rotzoom(const int np, double *pts1, double *pts2, double *mat) {
+  const int np2 = np * 2;
+  double *a = (double *)aom_malloc(sizeof(*a) * np2 * 9);
+  double *b = a + np2 * 4;
+  double *temp = b + np2;
+  int i;
+  double sx, sy, dx, dy;
+
+  double T1[9], T2[9];
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
+
+  for (i = 0; i < np; ++i) {
+    dx = *(pts2++);
+    dy = *(pts2++);
+    sx = *(pts1++);
+    sy = *(pts1++);
+
+    a[i * 2 * 4 + 0] = sx;
+    a[i * 2 * 4 + 1] = sy;
+    a[i * 2 * 4 + 2] = 1;
+    a[i * 2 * 4 + 3] = 0;
+    a[(i * 2 + 1) * 4 + 0] = sy;
+    a[(i * 2 + 1) * 4 + 1] = -sx;
+    a[(i * 2 + 1) * 4 + 2] = 0;
+    a[(i * 2 + 1) * 4 + 3] = 1;
+
+    b[2 * i] = dx;
+    b[2 * i + 1] = dy;
+  }
+  if (pseudo_inverse(temp, a, np2, 4)) {
+    aom_free(a);
+    return 1;
+  }
+  multiply_mat(temp, b, mat, 4, np2, 1);
+  denormalize_rotzoom(mat, T1, T2);
+  aom_free(a);
+  return 0;
+}
+
+int find_affine(const int np, double *pts1, double *pts2, double *mat) {
+  const int np2 = np * 2;
+  double *a = (double *)aom_malloc(sizeof(*a) * np2 * 13);
+  double *b = a + np2 * 6;
+  double *temp = b + np2;
+  int i;
+  double sx, sy, dx, dy;
+
+  double T1[9], T2[9];
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
+
+  for (i = 0; i < np; ++i) {
+    dx = *(pts2++);
+    dy = *(pts2++);
+    sx = *(pts1++);
+    sy = *(pts1++);
+
+    a[i * 2 * 6 + 0] = sx;
+    a[i * 2 * 6 + 1] = sy;
+    a[i * 2 * 6 + 2] = 0;
+    a[i * 2 * 6 + 3] = 0;
+    a[i * 2 * 6 + 4] = 1;
+    a[i * 2 * 6 + 5] = 0;
+    a[(i * 2 + 1) * 6 + 0] = 0;
+    a[(i * 2 + 1) * 6 + 1] = 0;
+    a[(i * 2 + 1) * 6 + 2] = sx;
+    a[(i * 2 + 1) * 6 + 3] = sy;
+    a[(i * 2 + 1) * 6 + 4] = 0;
+    a[(i * 2 + 1) * 6 + 5] = 1;
+
+    b[2 * i] = dx;
+    b[2 * i + 1] = dy;
+  }
+  if (pseudo_inverse(temp, a, np2, 6)) {
+    aom_free(a);
+    return 1;
+  }
+  multiply_mat(temp, b, mat, 6, np2, 1);
+  denormalize_affine(mat, T1, T2);
+  aom_free(a);
+  return 0;
+}
+
+int find_homography(const int np, double *pts1, double *pts2, double *mat) {
+  // Implemented from Peter Kovesi's normalized implementation
+  const int np3 = np * 3;
+  double *a = (double *)aom_malloc(sizeof(*a) * np3 * 18);
+  double *U = a + np3 * 9;
+  double S[9], V[9 * 9];
+  int i, mini;
+  double sx, sy, dx, dy;
+  double T1[9], T2[9];
+
+  normalize_homography(pts1, np, T1);
+  normalize_homography(pts2, np, T2);
+
+  for (i = 0; i < np; ++i) {
+    dx = *(pts2++);
+    dy = *(pts2++);
+    sx = *(pts1++);
+    sy = *(pts1++);
+
+    a[i * 3 * 9 + 0] = a[i * 3 * 9 + 1] = a[i * 3 * 9 + 2] = 0;
+    a[i * 3 * 9 + 3] = -sx;
+    a[i * 3 * 9 + 4] = -sy;
+    a[i * 3 * 9 + 5] = -1;
+    a[i * 3 * 9 + 6] = dy * sx;
+    a[i * 3 * 9 + 7] = dy * sy;
+    a[i * 3 * 9 + 8] = dy;
+
+    a[(i * 3 + 1) * 9 + 0] = sx;
+    a[(i * 3 + 1) * 9 + 1] = sy;
+    a[(i * 3 + 1) * 9 + 2] = 1;
+    a[(i * 3 + 1) * 9 + 3] = a[(i * 3 + 1) * 9 + 4] = a[(i * 3 + 1) * 9 + 5] =
+        0;
+    a[(i * 3 + 1) * 9 + 6] = -dx * sx;
+    a[(i * 3 + 1) * 9 + 7] = -dx * sy;
+    a[(i * 3 + 1) * 9 + 8] = -dx;
+
+    a[(i * 3 + 2) * 9 + 0] = -dy * sx;
+    a[(i * 3 + 2) * 9 + 1] = -dy * sy;
+    a[(i * 3 + 2) * 9 + 2] = -dy;
+    a[(i * 3 + 2) * 9 + 3] = dx * sx;
+    a[(i * 3 + 2) * 9 + 4] = dx * sy;
+    a[(i * 3 + 2) * 9 + 5] = dx;
+    a[(i * 3 + 2) * 9 + 6] = a[(i * 3 + 2) * 9 + 7] = a[(i * 3 + 2) * 9 + 8] =
+        0;
+  }
+
+  if (SVD(U, S, V, a, np3, 9)) {
+    aom_free(a);
+    return 1;
+  } else {
+    double minS = 1e12;
+    mini = -1;
+    for (i = 0; i < 9; ++i) {
+      if (S[i] < minS) {
+        minS = S[i];
+        mini = i;
+      }
+    }
+  }
+
+  for (i = 0; i < 9; i++) mat[i] = V[i * 9 + mini];
+  denormalize_homography(mat, T1, T2);
+  aom_free(a);
+  if (mat[8] == 0.0) {
+    return 1;
+  }
+  return 0;
+}
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index 53f06dd..da92599 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@@ -22,6 +22,8 @@
 #include "aom_dsp/aom_dsp_common.h"
 #include "av1/common/mv.h"
 
+#define MAX_PARAMDIM 9
+
 typedef void (*ProjectPointsFunc)(int16_t *mat, int *points, int *proj,
                                   const int n, const int stride_points,
                                   const int stride_proj,
@@ -67,4 +69,9 @@
 // Integerize model into the WarpedMotionParams structure
 void av1_integerize_model(const double *model, TransformationType wmtype,
                           WarpedMotionParams *wm);
+
+int find_translation(const int np, double *pts1, double *pts2, double *mat);
+int find_rotzoom(const int np, double *pts1, double *pts2, double *mat);
+int find_affine(const int np, double *pts1, double *pts2, double *mat);
+int find_homography(const int np, double *pts1, double *pts2, double *mat);
 #endif  // AV1_COMMON_WARPED_MOTION_H_
diff --git a/av1/decoder/bitreader.h b/av1/decoder/bitreader.h
deleted file mode 100644
index 4d77664..0000000
--- a/av1/decoder/bitreader.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/* The purpose of this header is to provide compile time pluggable bit reader
- * implementations with a common interface. */
-
-#ifndef AOM10_DECODER_BITREADER_H_
-#define AOM10_DECODER_BITREADER_H_
-
-#include "./aom_config.h"
-
-#if CONFIG_ANS
-#include "aom_dsp/ans.h"
-#include "aom/aomdx.h"  // for av1_decrypt_cb
-#define aom_reader struct AnsDecoder
-#define aom_reader_has_error ans_reader_has_error
-#define aom_read uabs_read
-#define aom_read_bit uabs_read_bit
-#define aom_read_literal uabs_read_literal
-#define aom_read_tree uabs_read_tree
-#else
-#include "aom_dsp/bitreader.h"
-#define aom_reader aom_reader
-#define aom_reader_has_error aom_reader_has_error
-#define aom_read aom_read
-#define aom_read_bit aom_read_bit
-#define aom_read_literal aom_read_literal
-#define aom_read_tree aom_read_tree
-#endif
-
-#endif  // AOM10_DECODER_BITREADER_H_
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index ceed8b3..f6efdf5 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -306,14 +306,14 @@
     if (allow_select && tx_mode == TX_MODE_SELECT) {
       const TX_SIZE coded_tx_size =
           read_selected_tx_size(cm, xd, inter_tx_size_cat_lookup[bsize], r);
-#if !CONFIG_RECT_TX
-      assert(coded_tx_size <= max_txsize_lookup[bsize]);
-#else
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
       if (coded_tx_size > max_txsize_lookup[bsize]) {
         assert(coded_tx_size == max_txsize_lookup[bsize] + 1);
         return max_txsize_rect_lookup[bsize];
       }
-#endif  // !CONFIG_RECT_TX
+#else
+      assert(coded_tx_size <= max_txsize_lookup[bsize]);
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
       return coded_tx_size;
     } else {
       return tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
@@ -1661,7 +1661,8 @@
       int idx, idy;
       int tx_size_cat = inter_tx_size_cat_lookup[bsize];
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-      int is_rect_tx_allowed = inter_block && is_rect_tx_allowed_bsize(bsize);
+      int is_rect_tx_allowed = inter_block && is_rect_tx_allowed_bsize(bsize) &&
+                               !xd->lossless[mbmi->segment_id];
       int use_rect_tx = 0;
 
       if (is_rect_tx_allowed) {
diff --git a/av1/decoder/decodemv.h b/av1/decoder/decodemv.h
index aa17b5b..e916262 100644
--- a/av1/decoder/decodemv.h
+++ b/av1/decoder/decodemv.h
@@ -12,7 +12,7 @@
 #ifndef AV1_DECODER_DECODEMV_H_
 #define AV1_DECODER_DECODEMV_H_
 
-#include "av1/decoder/bitreader.h"
+#include "aom_dsp/bitreader.h"
 
 #include "av1/decoder/decoder.h"
 
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index 43fac67..3900b44 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -15,7 +15,7 @@
 #include "./aom_config.h"
 
 #include "aom/aom_codec.h"
-#include "av1/decoder/bitreader.h"
+#include "aom_dsp/bitreader.h"
 #include "aom_scale/yv12config.h"
 #include "aom_util/aom_thread.h"
 
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index fbcf8fe..4dfac3c 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -505,7 +505,8 @@
   const int ctx =
       get_entropy_context(tx_size, pd->above_context + x, pd->left_context + y);
 #if CONFIG_NEW_QUANT
-  int dq = get_dq_profile_from_ctx(ctx);
+  const int ref = is_inter_block(&xd->mi[0]->mbmi);
+  int dq = get_dq_profile_from_ctx(ctx, ref, pd->plane_type);
 #endif  //  CONFIG_NEW_QUANT
 
 #if !CONFIG_ANS
diff --git a/av1/decoder/dsubexp.h b/av1/decoder/dsubexp.h
index ed88f28..c0d372a 100644
--- a/av1/decoder/dsubexp.h
+++ b/av1/decoder/dsubexp.h
@@ -12,7 +12,7 @@
 #ifndef AV1_DECODER_DSUBEXP_H_
 #define AV1_DECODER_DSUBEXP_H_
 
-#include "av1/decoder/bitreader.h"
+#include "aom_dsp/bitreader.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 7ef24bb..5de0f48 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -406,7 +406,7 @@
     const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
 
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-    assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(mbmi)));
+    assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi)));
     assert(
         IMPLIES(is_rect_tx(tx_size), tx_size == max_txsize_rect_lookup[bsize]));
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -1132,7 +1132,7 @@
       int idx, idy;
 
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-      if (is_rect_tx_allowed(mbmi)) {
+      if (is_rect_tx_allowed(xd, mbmi)) {
         int tx_size_cat = inter_tx_size_cat_lookup[bsize];
 
         aom_write(w, is_rect_tx(mbmi->tx_size),
diff --git a/av1/encoder/bitwriter.h b/av1/encoder/bitwriter.h
deleted file mode 100644
index 21cc6a3..0000000
--- a/av1/encoder/bitwriter.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/* The purpose of this header is to provide compile time pluggable bit writer
- * implementations with a common interface. */
-
-#ifndef AOM10_ENCODER_BITWRITER_H_
-#define AOM10_ENCODER_BITWRITER_H_
-
-#include "aom_dsp/bitwriter.h"
-
-#endif  // AOM10_ENCODER_BITWRITER_H_
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index ab896f4..12ef33d 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5142,14 +5142,14 @@
                                        : intra_tx_size_cat_lookup[bsize];
       const TX_SIZE coded_tx_size = txsize_sqr_up_map[mbmi->tx_size];
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-      assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(mbmi)));
+      assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 #if CONFIG_VAR_TX
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-      if (is_rect_tx_allowed(mbmi)) {
+      if (is_rect_tx_allowed(xd, mbmi)) {
         td->counts->rect_tx[tx_size_cat][is_rect_tx(mbmi->tx_size)]++;
       }
-      if (!is_rect_tx_allowed(mbmi) || !is_rect_tx(mbmi->tx_size)) {
+      if (!is_rect_tx_allowed(xd, mbmi) || !is_rect_tx(mbmi->tx_size)) {
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
         if (is_inter)
           tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts);
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 18fcb37..cf37140 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -96,7 +96,7 @@
 #endif
   const int shift = get_tx_scale(xd, tx_type, tx_size);
 #if CONFIG_NEW_QUANT
-  int dq = get_dq_profile_from_ctx(ctx);
+  int dq = get_dq_profile_from_ctx(ctx, ref, type);
   const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
 #else
   const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
@@ -517,12 +517,12 @@
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
-  const scan_order *const scan_order =
-      get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+  const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
+  const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter);
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  int dq = get_dq_profile_from_ctx(ctx);
+  int dq = get_dq_profile_from_ctx(ctx, is_inter, plane_type);
   uint16_t *const eob = &p->eobs[block];
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int16_t *src_diff;
@@ -584,11 +584,11 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   const struct macroblock_plane *const p = &x->plane[plane];
   const struct macroblockd_plane *const pd = &xd->plane[plane];
-  int dq = get_dq_profile_from_ctx(ctx);
+  const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
-  const scan_order *const scan_order =
-      get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+  const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter);
+  int dq = get_dq_profile_from_ctx(ctx, is_inter, plane_type);
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -660,7 +660,8 @@
   uint16_t *const eob = &p->eobs[block];
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int16_t *src_diff;
-  int dq = get_dq_profile_from_ctx(ctx);
+  const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
+  int dq = get_dq_profile_from_ctx(ctx, is_inter, plane_type);
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
@@ -720,7 +721,8 @@
   uint16_t *const eob = &p->eobs[block];
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int16_t *src_diff;
-  int dq = get_dq_profile_from_ctx(ctx);
+  const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
+  int dq = get_dq_profile_from_ctx(ctx, is_inter, plane_type);
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
diff --git a/av1/encoder/ransac.c b/av1/encoder/ransac.c
index 86177a0..0c8ad67 100644
--- a/av1/encoder/ransac.c
+++ b/av1/encoder/ransac.c
@@ -17,307 +17,11 @@
 
 #include "av1/encoder/ransac.h"
 
-#define MAX_PARAMDIM 9
 #define MAX_MINPTS 4
 
 #define MAX_DEGENERATE_ITER 10
 #define MINPTS_MULTIPLIER 5
 
-// svdcmp
-// Adopted from Numerical Recipes in C
-
-static const double TINY_NEAR_ZERO = 1.0E-12;
-
-static INLINE double sign(double a, double b) {
-  return ((b) >= 0 ? fabs(a) : -fabs(a));
-}
-
-static INLINE double pythag(double a, double b) {
-  double ct;
-  const double absa = fabs(a);
-  const double absb = fabs(b);
-
-  if (absa > absb) {
-    ct = absb / absa;
-    return absa * sqrt(1.0 + ct * ct);
-  } else {
-    ct = absa / absb;
-    return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
-  }
-}
-
-static void multiply_mat(const double *m1, const double *m2, double *res,
-                         const int m1_rows, const int inner_dim,
-                         const int m2_cols) {
-  double sum;
-
-  int row, col, inner;
-  for (row = 0; row < m1_rows; ++row) {
-    for (col = 0; col < m2_cols; ++col) {
-      sum = 0;
-      for (inner = 0; inner < inner_dim; ++inner)
-        sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
-      *(res++) = sum;
-    }
-  }
-}
-
-static int svdcmp(double **u, int m, int n, double w[], double **v) {
-  const int max_its = 30;
-  int flag, i, its, j, jj, k, l, nm;
-  double anorm, c, f, g, h, s, scale, x, y, z;
-  double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
-  g = scale = anorm = 0.0;
-  for (i = 0; i < n; i++) {
-    l = i + 1;
-    rv1[i] = scale * g;
-    g = s = scale = 0.0;
-    if (i < m) {
-      for (k = i; k < m; k++) scale += fabs(u[k][i]);
-      if (scale) {
-        for (k = i; k < m; k++) {
-          u[k][i] /= scale;
-          s += u[k][i] * u[k][i];
-        }
-        f = u[i][i];
-        g = -sign(sqrt(s), f);
-        h = f * g - s;
-        u[i][i] = f - g;
-        for (j = l; j < n; j++) {
-          for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
-          f = s / h;
-          for (k = i; k < m; k++) u[k][j] += f * u[k][i];
-        }
-        for (k = i; k < m; k++) u[k][i] *= scale;
-      }
-    }
-    w[i] = scale * g;
-    g = s = scale = 0.0;
-    if (i < m && i != n - 1) {
-      for (k = l; k < n; k++) scale += fabs(u[i][k]);
-      if (scale) {
-        for (k = l; k < n; k++) {
-          u[i][k] /= scale;
-          s += u[i][k] * u[i][k];
-        }
-        f = u[i][l];
-        g = -sign(sqrt(s), f);
-        h = f * g - s;
-        u[i][l] = f - g;
-        for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
-        for (j = l; j < m; j++) {
-          for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
-          for (k = l; k < n; k++) u[j][k] += s * rv1[k];
-        }
-        for (k = l; k < n; k++) u[i][k] *= scale;
-      }
-    }
-    anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
-  }
-
-  for (i = n - 1; i >= 0; i--) {
-    if (i < n - 1) {
-      if (g) {
-        for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
-        for (j = l; j < n; j++) {
-          for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
-          for (k = l; k < n; k++) v[k][j] += s * v[k][i];
-        }
-      }
-      for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
-    }
-    v[i][i] = 1.0;
-    g = rv1[i];
-    l = i;
-  }
-  for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
-    l = i + 1;
-    g = w[i];
-    for (j = l; j < n; j++) u[i][j] = 0.0;
-    if (g) {
-      g = 1.0 / g;
-      for (j = l; j < n; j++) {
-        for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
-        f = (s / u[i][i]) * g;
-        for (k = i; k < m; k++) u[k][j] += f * u[k][i];
-      }
-      for (j = i; j < m; j++) u[j][i] *= g;
-    } else {
-      for (j = i; j < m; j++) u[j][i] = 0.0;
-    }
-    ++u[i][i];
-  }
-  for (k = n - 1; k >= 0; k--) {
-    for (its = 0; its < max_its; its++) {
-      flag = 1;
-      for (l = k; l >= 0; l--) {
-        nm = l - 1;
-        if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
-          flag = 0;
-          break;
-        }
-        if ((double)(fabs(w[nm]) + anorm) == anorm) break;
-      }
-      if (flag) {
-        c = 0.0;
-        s = 1.0;
-        for (i = l; i <= k; i++) {
-          f = s * rv1[i];
-          rv1[i] = c * rv1[i];
-          if ((double)(fabs(f) + anorm) == anorm) break;
-          g = w[i];
-          h = pythag(f, g);
-          w[i] = h;
-          h = 1.0 / h;
-          c = g * h;
-          s = -f * h;
-          for (j = 0; j < m; j++) {
-            y = u[j][nm];
-            z = u[j][i];
-            u[j][nm] = y * c + z * s;
-            u[j][i] = z * c - y * s;
-          }
-        }
-      }
-      z = w[k];
-      if (l == k) {
-        if (z < 0.0) {
-          w[k] = -z;
-          for (j = 0; j < n; j++) v[j][k] = -v[j][k];
-        }
-        break;
-      }
-      if (its == max_its - 1) {
-        return 1;
-      }
-      assert(k > 0);
-      x = w[l];
-      nm = k - 1;
-      y = w[nm];
-      g = rv1[nm];
-      h = rv1[k];
-      f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
-      g = pythag(f, 1.0);
-      f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
-      c = s = 1.0;
-      for (j = l; j <= nm; j++) {
-        i = j + 1;
-        g = rv1[i];
-        y = w[i];
-        h = s * g;
-        g = c * g;
-        z = pythag(f, h);
-        rv1[j] = z;
-        c = f / z;
-        s = h / z;
-        f = x * c + g * s;
-        g = g * c - x * s;
-        h = y * s;
-        y *= c;
-        for (jj = 0; jj < n; jj++) {
-          x = v[jj][j];
-          z = v[jj][i];
-          v[jj][j] = x * c + z * s;
-          v[jj][i] = z * c - x * s;
-        }
-        z = pythag(f, h);
-        w[j] = z;
-        if (z) {
-          z = 1.0 / z;
-          c = f * z;
-          s = h * z;
-        }
-        f = c * g + s * y;
-        x = c * y - s * g;
-        for (jj = 0; jj < m; jj++) {
-          y = u[jj][j];
-          z = u[jj][i];
-          u[jj][j] = y * c + z * s;
-          u[jj][i] = z * c - y * s;
-        }
-      }
-      rv1[l] = 0.0;
-      rv1[k] = f;
-      w[k] = x;
-    }
-  }
-  aom_free(rv1);
-  return 0;
-}
-
-static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
-  // Assumes allocation for U is MxN
-  double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
-  double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
-  int problem, i;
-
-  problem = !(nrU && nrV);
-  if (!problem) {
-    for (i = 0; i < M; i++) {
-      nrU[i] = &U[i * N];
-    }
-    for (i = 0; i < N; i++) {
-      nrV[i] = &V[i * N];
-    }
-  } else {
-    if (nrU) aom_free(nrU);
-    if (nrV) aom_free(nrV);
-    return 1;
-  }
-
-  /* copy from given matx into nrU */
-  for (i = 0; i < M; i++) {
-    memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
-  }
-
-  /* HERE IT IS: do SVD */
-  if (svdcmp(nrU, M, N, W, nrV)) {
-    aom_free(nrU);
-    aom_free(nrV);
-    return 1;
-  }
-
-  /* aom_free Numerical Recipes arrays */
-  aom_free(nrU);
-  aom_free(nrV);
-
-  return 0;
-}
-
-int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
-  double ans;
-  int i, j, k;
-  double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
-  double *const W = (double *)aom_malloc(N * sizeof(*matx));
-  double *const V = (double *)aom_malloc(N * N * sizeof(*matx));
-
-  if (!(U && W && V)) {
-    return 1;
-  }
-  if (SVD(U, W, V, matx, M, N)) {
-    return 1;
-  }
-  for (i = 0; i < N; i++) {
-    if (fabs(W[i]) < TINY_NEAR_ZERO) {
-      return 1;
-    }
-  }
-
-  for (i = 0; i < N; i++) {
-    for (j = 0; j < M; j++) {
-      ans = 0;
-      for (k = 0; k < N; k++) {
-        ans += V[k + N * i] * U[k + N * j] / W[k];
-      }
-      inv[j + M * i] = ans;
-    }
-  }
-  aom_free(U);
-  aom_free(W);
-  aom_free(V);
-  return 0;
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // ransac
 typedef int (*IsDegenerateFunc)(double *p);
@@ -325,6 +29,68 @@
 typedef void (*DenormalizeFunc)(double *params, double *T1, double *T2);
 typedef int (*FindTransformationFunc)(int points, double *points1,
                                       double *points2, double *params);
+typedef void (*ProjectPointsDoubleFunc)(double *mat, double *points,
+                                        double *proj, const int n,
+                                        const int stride_points,
+                                        const int stride_proj);
+
+static void project_points_double_translation(double *mat, double *points,
+                                              double *proj, const int n,
+                                              const int stride_points,
+                                              const int stride_proj) {
+  int i;
+  for (i = 0; i < n; ++i) {
+    const double x = *(points++), y = *(points++);
+    *(proj++) = x + mat[1];
+    *(proj++) = y + mat[0];
+    points += stride_points - 2;
+    proj += stride_proj - 2;
+  }
+}
+
+static void project_points_double_rotzoom(double *mat, double *points,
+                                          double *proj, const int n,
+                                          const int stride_points,
+                                          const int stride_proj) {
+  int i;
+  for (i = 0; i < n; ++i) {
+    const double x = *(points++), y = *(points++);
+    *(proj++) = mat[3] * x + mat[2] * y + mat[1];
+    *(proj++) = -mat[2] * x + mat[3] * y + mat[0];
+    points += stride_points - 2;
+    proj += stride_proj - 2;
+  }
+}
+
+static void project_points_double_affine(double *mat, double *points,
+                                         double *proj, const int n,
+                                         const int stride_points,
+                                         const int stride_proj) {
+  int i;
+  for (i = 0; i < n; ++i) {
+    const double x = *(points++), y = *(points++);
+    *(proj++) = mat[3] * x + mat[2] * y + mat[1];
+    *(proj++) = mat[4] * x + mat[5] * y + mat[0];
+    points += stride_points - 2;
+    proj += stride_proj - 2;
+  }
+}
+
+static void project_points_double_homography(double *mat, double *points,
+                                             double *proj, const int n,
+                                             const int stride_points,
+                                             const int stride_proj) {
+  int i;
+  double x, y, Z;
+  for (i = 0; i < n; ++i) {
+    x = *(points++), y = *(points++);
+    Z = 1. / (mat[7] * x + mat[6] * y + 1);
+    *(proj++) = (mat[1] * x + mat[0] * y + mat[3]) * Z;
+    *(proj++) = (mat[2] * x + mat[4] * y + mat[4]) * Z;
+    points += stride_points - 2;
+    proj += stride_proj - 2;
+  }
+}
 
 static int get_rand_indices(int npoints, int minpts, int *indices) {
   int i, j;
@@ -353,7 +119,7 @@
                   const int paramdim, IsDegenerateFunc is_degenerate,
                   NormalizeFunc normalize, DenormalizeFunc denormalize,
                   FindTransformationFunc find_transformation,
-                  ProjectPointsFunc projectpoints, TransformationType type) {
+                  ProjectPointsDoubleFunc projectpoints) {
   static const double INLIER_THRESHOLD_NORMALIZED = 0.1;
   static const double INLIER_THRESHOLD_UNNORMALIZED = 1.0;
   static const double PROBABILITY_REQUIRED = 0.9;
@@ -380,9 +146,8 @@
   double *inlier_set1;
   double *inlier_set2;
   double *corners1;
-  int *corners1_int;
   double *corners2;
-  int *image1_coord;
+  double *image1_coord;
   int *inlier_mask;
 
   double *cnp1, *cnp2;
@@ -406,13 +171,12 @@
   inlier_set1 = (double *)aom_malloc(sizeof(*inlier_set1) * npoints * 2);
   inlier_set2 = (double *)aom_malloc(sizeof(*inlier_set2) * npoints * 2);
   corners1 = (double *)aom_malloc(sizeof(*corners1) * npoints * 2);
-  corners1_int = (int *)aom_malloc(sizeof(*corners1_int) * npoints * 2);
   corners2 = (double *)aom_malloc(sizeof(*corners2) * npoints * 2);
-  image1_coord = (int *)aom_malloc(sizeof(*image1_coord) * npoints * 2);
+  image1_coord = (double *)aom_malloc(sizeof(*image1_coord) * npoints * 2);
   inlier_mask = (int *)aom_malloc(sizeof(*inlier_mask) * npoints);
 
   if (!(best_inlier_set1 && best_inlier_set2 && inlier_set1 && inlier_set2 &&
-        corners1 && corners1_int && corners2 && image1_coord && inlier_mask)) {
+        corners1 && corners2 && image1_coord && inlier_mask)) {
     ret_val = 1;
     goto finish_ransac;
   }
@@ -465,20 +229,11 @@
       continue;
     }
 
-    for (i = 0; i < npoints; ++i) {
-      corners1_int[2 * i] = (int)corners1[i * 2];
-      corners1_int[2 * i + 1] = (int)corners1[i * 2 + 1];
-    }
-
-    av1_integerize_model(params, type, &wm);
-    projectpoints((int16_t *)wm.wmmat, corners1_int, image1_coord, npoints, 2,
-                  2, 0, 0);
+    projectpoints(params, corners1, image1_coord, npoints, 2, 2);
 
     for (i = 0; i < npoints; ++i) {
-      double dx =
-          (image1_coord[i * 2] >> WARPEDPIXEL_PREC_BITS) - corners2[i * 2];
-      double dy = (image1_coord[i * 2 + 1] >> WARPEDPIXEL_PREC_BITS) -
-                  corners2[i * 2 + 1];
+      double dx = image1_coord[i * 2] - corners2[i * 2];
+      double dy = image1_coord[i * 2 + 1] - corners2[i * 2 + 1];
       double distance = sqrt(dx * dx + dy * dy);
 
       inlier_mask[i] = distance < inlier_threshold;
@@ -543,117 +298,6 @@
   return ret_val;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-
-static void normalize_homography(double *pts, int n, double *T) {
-  // Assume the points are 2d coordinates with scale = 1
-  double *p = pts;
-  double mean[2] = { 0, 0 };
-  double msqe = 0;
-  double scale;
-  int i;
-  for (i = 0; i < n; ++i, p += 2) {
-    mean[0] += p[0];
-    mean[1] += p[1];
-  }
-  mean[0] /= n;
-  mean[1] /= n;
-  for (p = pts, i = 0; i < n; ++i, p += 2) {
-    p[0] -= mean[0];
-    p[1] -= mean[1];
-    msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
-  }
-  msqe /= n;
-  scale = sqrt(2) / msqe;
-  T[0] = scale;
-  T[1] = 0;
-  T[2] = -scale * mean[0];
-  T[3] = 0;
-  T[4] = scale;
-  T[5] = -scale * mean[1];
-  T[6] = 0;
-  T[7] = 0;
-  T[8] = 1;
-  for (p = pts, i = 0; i < n; ++i, p += 2) {
-    p[0] *= scale;
-    p[1] *= scale;
-  }
-}
-
-static void invnormalize_mat(double *T, double *iT) {
-  double is = 1.0 / T[0];
-  double m0 = -T[2] * is;
-  double m1 = -T[5] * is;
-  iT[0] = is;
-  iT[1] = 0;
-  iT[2] = m0;
-  iT[3] = 0;
-  iT[4] = is;
-  iT[5] = m1;
-  iT[6] = 0;
-  iT[7] = 0;
-  iT[8] = 1;
-}
-
-static void denormalize_homography(double *params, double *T1, double *T2) {
-  double iT2[9];
-  double params2[9];
-  invnormalize_mat(T2, iT2);
-  multiply_mat(params, T1, params2, 3, 3, 3);
-  multiply_mat(iT2, params2, params, 3, 3, 3);
-}
-
-static void denormalize_affine(double *params, double *T1, double *T2) {
-  double params_denorm[MAX_PARAMDIM];
-  params_denorm[0] = params[0];
-  params_denorm[1] = params[1];
-  params_denorm[2] = params[4];
-  params_denorm[3] = params[2];
-  params_denorm[4] = params[3];
-  params_denorm[5] = params[5];
-  params_denorm[6] = params_denorm[7] = 0;
-  params_denorm[8] = 1;
-  denormalize_homography(params_denorm, T1, T2);
-  params[0] = params_denorm[5];
-  params[1] = params_denorm[2];
-  params[2] = params_denorm[1];
-  params[3] = params_denorm[0];
-  params[4] = params_denorm[3];
-  params[5] = params_denorm[4];
-}
-
-static void denormalize_rotzoom(double *params, double *T1, double *T2) {
-  double params_denorm[MAX_PARAMDIM];
-  params_denorm[0] = params[0];
-  params_denorm[1] = params[1];
-  params_denorm[2] = params[2];
-  params_denorm[3] = -params[1];
-  params_denorm[4] = params[0];
-  params_denorm[5] = params[3];
-  params_denorm[6] = params_denorm[7] = 0;
-  params_denorm[8] = 1;
-  denormalize_homography(params_denorm, T1, T2);
-  params[0] = params_denorm[5];
-  params[1] = params_denorm[2];
-  params[2] = params_denorm[1];
-  params[3] = params_denorm[0];
-}
-
-static void denormalize_translation(double *params, double *T1, double *T2) {
-  double params_denorm[MAX_PARAMDIM];
-  params_denorm[0] = 1;
-  params_denorm[1] = 0;
-  params_denorm[2] = params[0];
-  params_denorm[3] = 0;
-  params_denorm[4] = 1;
-  params_denorm[5] = params[1];
-  params_denorm[6] = params_denorm[7] = 0;
-  params_denorm[8] = 1;
-  denormalize_homography(params_denorm, T1, T2);
-  params[0] = params_denorm[5];
-  params[1] = params_denorm[2];
-}
-
 static int is_collinear3(double *p1, double *p2, double *p3) {
   static const double collinear_eps = 1e-3;
   const double v =
@@ -674,185 +318,6 @@
          is_collinear3(p, p + 4, p + 6) || is_collinear3(p + 2, p + 4, p + 6);
 }
 
-int find_translation(const int np, double *pts1, double *pts2, double *mat) {
-  int i;
-  double sx, sy, dx, dy;
-  double sumx, sumy;
-
-  double T1[9], T2[9];
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  sumx = 0;
-  sumy = 0;
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    sumx += dx - sx;
-    sumy += dy - sy;
-  }
-  mat[0] = sumx / np;
-  mat[1] = sumy / np;
-  denormalize_translation(mat, T1, T2);
-  return 0;
-}
-
-int find_rotzoom(const int np, double *pts1, double *pts2, double *mat) {
-  const int np2 = np * 2;
-  double *a = (double *)aom_malloc(sizeof(*a) * np2 * 9);
-  double *b = a + np2 * 4;
-  double *temp = b + np2;
-  int i;
-  double sx, sy, dx, dy;
-
-  double T1[9], T2[9];
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    a[i * 2 * 4 + 0] = sx;
-    a[i * 2 * 4 + 1] = sy;
-    a[i * 2 * 4 + 2] = 1;
-    a[i * 2 * 4 + 3] = 0;
-    a[(i * 2 + 1) * 4 + 0] = sy;
-    a[(i * 2 + 1) * 4 + 1] = -sx;
-    a[(i * 2 + 1) * 4 + 2] = 0;
-    a[(i * 2 + 1) * 4 + 3] = 1;
-
-    b[2 * i] = dx;
-    b[2 * i + 1] = dy;
-  }
-  if (pseudo_inverse(temp, a, np2, 4)) {
-    aom_free(a);
-    return 1;
-  }
-  multiply_mat(temp, b, mat, 4, np2, 1);
-  denormalize_rotzoom(mat, T1, T2);
-  aom_free(a);
-  return 0;
-}
-
-int find_affine(const int np, double *pts1, double *pts2, double *mat) {
-  const int np2 = np * 2;
-  double *a = (double *)aom_malloc(sizeof(*a) * np2 * 13);
-  double *b = a + np2 * 6;
-  double *temp = b + np2;
-  int i;
-  double sx, sy, dx, dy;
-
-  double T1[9], T2[9];
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    a[i * 2 * 6 + 0] = sx;
-    a[i * 2 * 6 + 1] = sy;
-    a[i * 2 * 6 + 2] = 0;
-    a[i * 2 * 6 + 3] = 0;
-    a[i * 2 * 6 + 4] = 1;
-    a[i * 2 * 6 + 5] = 0;
-    a[(i * 2 + 1) * 6 + 0] = 0;
-    a[(i * 2 + 1) * 6 + 1] = 0;
-    a[(i * 2 + 1) * 6 + 2] = sx;
-    a[(i * 2 + 1) * 6 + 3] = sy;
-    a[(i * 2 + 1) * 6 + 4] = 0;
-    a[(i * 2 + 1) * 6 + 5] = 1;
-
-    b[2 * i] = dx;
-    b[2 * i + 1] = dy;
-  }
-  if (pseudo_inverse(temp, a, np2, 6)) {
-    aom_free(a);
-    return 1;
-  }
-  multiply_mat(temp, b, mat, 6, np2, 1);
-  denormalize_affine(mat, T1, T2);
-  aom_free(a);
-  return 0;
-}
-
-int find_homography(const int np, double *pts1, double *pts2, double *mat) {
-  // Implemented from Peter Kovesi's normalized implementation
-  const int np3 = np * 3;
-  double *a = (double *)aom_malloc(sizeof(*a) * np3 * 18);
-  double *U = a + np3 * 9;
-  double S[9], V[9 * 9];
-  int i, mini;
-  double sx, sy, dx, dy;
-  double T1[9], T2[9];
-
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    a[i * 3 * 9 + 0] = a[i * 3 * 9 + 1] = a[i * 3 * 9 + 2] = 0;
-    a[i * 3 * 9 + 3] = -sx;
-    a[i * 3 * 9 + 4] = -sy;
-    a[i * 3 * 9 + 5] = -1;
-    a[i * 3 * 9 + 6] = dy * sx;
-    a[i * 3 * 9 + 7] = dy * sy;
-    a[i * 3 * 9 + 8] = dy;
-
-    a[(i * 3 + 1) * 9 + 0] = sx;
-    a[(i * 3 + 1) * 9 + 1] = sy;
-    a[(i * 3 + 1) * 9 + 2] = 1;
-    a[(i * 3 + 1) * 9 + 3] = a[(i * 3 + 1) * 9 + 4] = a[(i * 3 + 1) * 9 + 5] =
-        0;
-    a[(i * 3 + 1) * 9 + 6] = -dx * sx;
-    a[(i * 3 + 1) * 9 + 7] = -dx * sy;
-    a[(i * 3 + 1) * 9 + 8] = -dx;
-
-    a[(i * 3 + 2) * 9 + 0] = -dy * sx;
-    a[(i * 3 + 2) * 9 + 1] = -dy * sy;
-    a[(i * 3 + 2) * 9 + 2] = -dy;
-    a[(i * 3 + 2) * 9 + 3] = dx * sx;
-    a[(i * 3 + 2) * 9 + 4] = dx * sy;
-    a[(i * 3 + 2) * 9 + 5] = dx;
-    a[(i * 3 + 2) * 9 + 6] = a[(i * 3 + 2) * 9 + 7] = a[(i * 3 + 2) * 9 + 8] =
-        0;
-  }
-
-  if (SVD(U, S, V, a, np3, 9)) {
-    aom_free(a);
-    return 1;
-  } else {
-    double minS = 1e12;
-    mini = -1;
-    for (i = 0; i < 9; ++i) {
-      if (S[i] < minS) {
-        minS = S[i];
-        mini = i;
-      }
-    }
-  }
-
-  for (i = 0; i < 9; i++) mat[i] = V[i * 9 + mini];
-  denormalize_homography(mat, T1, T2);
-  aom_free(a);
-  if (mat[8] == 0.0) {
-    return 1;
-  }
-  return 0;
-}
-
 int ransac_translation(double *matched_points, int npoints,
                        int *number_of_inliers, int *best_inlier_mask,
                        double *best_params) {
@@ -860,7 +325,7 @@
                 best_params, 3, 2, is_degenerate_translation,
                 NULL,  // normalize_homography,
                 NULL,  // denormalize_rotzoom,
-                find_translation, project_points_translation, TRANSLATION);
+                find_translation, project_points_double_translation);
 }
 
 int ransac_rotzoom(double *matched_points, int npoints, int *number_of_inliers,
@@ -869,7 +334,7 @@
                 best_params, 3, 4, is_degenerate_affine,
                 NULL,  // normalize_homography,
                 NULL,  // denormalize_rotzoom,
-                find_rotzoom, project_points_rotzoom, ROTZOOM);
+                find_rotzoom, project_points_double_rotzoom);
 }
 
 int ransac_affine(double *matched_points, int npoints, int *number_of_inliers,
@@ -878,7 +343,7 @@
                 best_params, 3, 6, is_degenerate_affine,
                 NULL,  // normalize_homography,
                 NULL,  // denormalize_affine,
-                find_affine, project_points_affine, AFFINE);
+                find_affine, project_points_double_affine);
 }
 
 int ransac_homography(double *matched_points, int npoints,
@@ -889,7 +354,7 @@
              best_params, 4, 8, is_degenerate_homography,
              NULL,  // normalize_homography,
              NULL,  // denormalize_homography,
-             find_homography, project_points_homography, HOMOGRAPHY);
+             find_homography, project_points_double_homography);
   if (!result) {
     // normalize so that H33 = 1
     int i;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 3d00687..44b56bd 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1385,14 +1385,14 @@
   const int is_inter = is_inter_block(mbmi);
 #if CONFIG_EXT_TX
 #if CONFIG_RECT_TX
-  int evaulate_rect_tx = 0;
+  int evaluate_rect_tx = 0;
 #endif  // CONFIG_RECT_TX
   int ext_tx_set;
 #endif  // CONFIG_EXT_TX
 
   if (tx_select) {
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-    evaulate_rect_tx = is_rect_tx_allowed(mbmi);
+    evaluate_rect_tx = is_rect_tx_allowed(xd, mbmi);
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
     start_tx = max_tx_size;
     end_tx = 0;
@@ -1400,8 +1400,8 @@
     const TX_SIZE chosen_tx_size =
         tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-    evaulate_rect_tx = is_rect_tx(chosen_tx_size);
-    assert(IMPLIES(evaulate_rect_tx, is_rect_tx_allowed(mbmi)));
+    evaluate_rect_tx = is_rect_tx(chosen_tx_size);
+    assert(IMPLIES(evaluate_rect_tx, is_rect_tx_allowed(xd, mbmi)));
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
     start_tx = chosen_tx_size;
     end_tx = chosen_tx_size;
@@ -1415,7 +1415,7 @@
   mbmi->tx_type = tx_type;
 
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-  if (evaulate_rect_tx) {
+  if (evaluate_rect_tx) {
     const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
     const int ext_tx_set = get_ext_tx_set(rect_tx_size, bs, 1);
     if (ext_tx_used_inter[ext_tx_set][tx_type]) {
@@ -3214,7 +3214,7 @@
   mbmi->tx_type = tx_type;
   inter_block_yrd(cpi, x, rate, dist, skippable, sse, bsize, ref_best_rd);
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
-  if (is_rect_tx_allowed(mbmi)) {
+  if (is_rect_tx_allowed(xd, mbmi)) {
     int rate_rect_tx, skippable_rect_tx = 0;
     int64_t dist_rect_tx, sse_rect_tx, rd, rd_rect_tx;
     int tx_size_cat = inter_tx_size_cat_lookup[bsize];
diff --git a/av1/encoder/subexp.c b/av1/encoder/subexp.c
index aa02e53..0ca5247 100644
--- a/av1/encoder/subexp.c
+++ b/av1/encoder/subexp.c
@@ -8,7 +8,7 @@
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
-#include "av1/encoder/bitwriter.h"
+#include "aom_dsp/bitwriter.h"
 
 #include "av1/common/common.h"
 #include "av1/common/entropy.h"
diff --git a/av1/encoder/treewriter.h b/av1/encoder/treewriter.h
index eb7f0a7..a7b38b9 100644
--- a/av1/encoder/treewriter.h
+++ b/av1/encoder/treewriter.h
@@ -17,7 +17,7 @@
 #define tree_writer aom_dk_writer
 #define tree_bit_write aom_dk_write
 #else
-#include "av1/encoder/bitwriter.h"
+#include "aom_dsp/bitwriter.h"
 #define tree_writer aom_writer
 #define tree_bit_write aom_write
 #endif
diff --git a/test/ans_test.cc b/test/ans_test.cc
new file mode 100644
index 0000000..ca38de2
--- /dev/null
+++ b/test/ans_test.cc
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <ctime>
+#include <utility>
+#include <vector>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "aom_dsp/ansreader.h"
+#include "aom_dsp/answriter.h"
+
+namespace {
+typedef std::vector<std::pair<uint8_t, bool> > PvVec;
+
+const int kPrintStats = 0;
+
+PvVec abs_encode_build_vals(int iters) {
+  PvVec ret;
+  libaom_test::ACMRandom gen(0x30317076);
+  double entropy = 0;
+  for (int i = 0; i < iters; ++i) {
+    uint8_t p;
+    do {
+      p = gen.Rand8();
+    } while (p == 0);  // zero is not a valid coding probability
+    bool b = gen.Rand8() < p;
+    ret.push_back(std::make_pair(static_cast<uint8_t>(p), b));
+    if (kPrintStats) {
+      double d = p / 256.;
+      entropy += -d * log2(d) - (1 - d) * log2(1 - d);
+    }
+  }
+  if (kPrintStats) printf("entropy %f\n", entropy);
+  return ret;
+}
+
+bool check_uabs(const PvVec &pv_vec, uint8_t *buf) {
+  AnsCoder a;
+  ans_write_init(&a, buf);
+
+  std::clock_t start = std::clock();
+  for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend();
+       ++it) {
+    uabs_write(&a, it->second, 256 - it->first);
+  }
+  std::clock_t enc_time = std::clock() - start;
+  int offset = ans_write_end(&a);
+  bool okay = true;
+  AnsDecoder d;
+  if (ans_read_init(&d, buf, offset)) return false;
+  start = std::clock();
+  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+    okay &= uabs_read(&d, 256 - it->first) == it->second;
+  }
+  std::clock_t dec_time = std::clock() - start;
+  if (!okay) return false;
+  if (kPrintStats)
+    printf("uABS size %d enc_time %f dec_time %f\n", offset,
+           static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+           static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+  return ans_read_end(&d);
+}
+
+// TODO(aconverse@google.com): replace this with a more representative
+// distribution from the codec.
+const rans_sym rans_sym_tab[] = {
+  { 67, 0 }, { 99, 67 }, { 575, 166 }, { 283, 741 },
+};
+
+std::vector<int> ans_encode_build_vals(const rans_sym *tab, int iters) {
+  std::vector<int> p_to_sym;
+  int i = 0;
+  while (p_to_sym.size() < RANS_PRECISION) {
+    p_to_sym.insert(p_to_sym.end(), tab[i].prob, i);
+    ++i;
+  }
+  assert(p_to_sym.size() == RANS_PRECISION);
+  std::vector<int> ret;
+  libaom_test::ACMRandom gen(18543637);
+  for (int i = 0; i < iters; ++i) {
+    int sym = p_to_sym[gen.Rand8() * 4];
+    ret.push_back(sym);
+  }
+  return ret;
+}
+
+void rans_build_dec_tab(const struct rans_sym sym_tab[], rans_lut dec_tab) {
+  dec_tab[0] = 0;
+  for (int i = 1; dec_tab[i - 1] < RANS_PRECISION; ++i) {
+    dec_tab[i] = dec_tab[i - 1] + sym_tab[i - 1].prob;
+  }
+}
+
+bool check_rans(const std::vector<int> &sym_vec, const rans_sym *const tab,
+                uint8_t *buf) {
+  AnsCoder a;
+  ans_write_init(&a, buf);
+  rans_lut dec_tab;
+  rans_build_dec_tab(tab, dec_tab);
+
+  std::clock_t start = std::clock();
+  for (std::vector<int>::const_reverse_iterator it = sym_vec.rbegin();
+       it != sym_vec.rend(); ++it) {
+    rans_write(&a, &tab[*it]);
+  }
+  std::clock_t enc_time = std::clock() - start;
+  int offset = ans_write_end(&a);
+  bool okay = true;
+  AnsDecoder d;
+  if (ans_read_init(&d, buf, offset)) return false;
+  start = std::clock();
+  for (std::vector<int>::const_iterator it = sym_vec.begin();
+       it != sym_vec.end(); ++it) {
+    okay &= rans_read(&d, dec_tab) == *it;
+  }
+  std::clock_t dec_time = std::clock() - start;
+  if (!okay) return false;
+  if (kPrintStats)
+    printf("rANS size %d enc_time %f dec_time %f\n", offset,
+           static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+           static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+  return ans_read_end(&d);
+}
+
+class AbsTest : public ::testing::Test {
+ protected:
+  static void SetUpTestCase() { pv_vec_ = abs_encode_build_vals(kNumBools); }
+  virtual void SetUp() { buf_ = new uint8_t[kNumBools / 8]; }
+  virtual void TearDown() { delete[] buf_; }
+  static const int kNumBools = 100000000;
+  static PvVec pv_vec_;
+  uint8_t *buf_;
+};
+PvVec AbsTest::pv_vec_;
+
+class AnsTest : public ::testing::Test {
+ protected:
+  static void SetUpTestCase() {
+    sym_vec_ = ans_encode_build_vals(rans_sym_tab, kNumSyms);
+  }
+  virtual void SetUp() { buf_ = new uint8_t[kNumSyms / 2]; }
+  virtual void TearDown() { delete[] buf_; }
+  static const int kNumSyms = 25000000;
+  static std::vector<int> sym_vec_;
+  uint8_t *buf_;
+};
+std::vector<int> AnsTest::sym_vec_;
+
+TEST_F(AbsTest, Uabs) { EXPECT_TRUE(check_uabs(pv_vec_, buf_)); }
+TEST_F(AnsTest, Rans) { EXPECT_TRUE(check_rans(sym_vec_, rans_sym_tab, buf_)); }
+}  // namespace
diff --git a/test/av1_ans_test.cc b/test/av1_ans_test.cc
deleted file mode 100644
index b0cec90..0000000
--- a/test/av1_ans_test.cc
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#define AV1_FORCE_AOMBOOL_TREEWRITER
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <ctime>
-#include <utility>
-#include <vector>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "aom_dsp/ans.h"
-#include "aom_dsp/bitreader.h"
-#include "aom_dsp/bitwriter.h"
-#include "aom_dsp/dkboolwriter.h"
-#include "av1/encoder/treewriter.h"
-#include "test/acm_random.h"
-
-namespace {
-typedef std::vector<std::pair<uint8_t, bool> > PvVec;
-
-PvVec abs_encode_build_vals(int iters) {
-  PvVec ret;
-  libaom_test::ACMRandom gen(0x30317076);
-  double entropy = 0;
-  for (int i = 0; i < iters; ++i) {
-    uint8_t p;
-    do {
-      p = gen.Rand8();
-    } while (p == 0);  // zero is not a valid coding probability
-    bool b = gen.Rand8() < p;
-    ret.push_back(std::make_pair(static_cast<uint8_t>(p), b));
-    double d = p / 256.;
-    entropy += -d * log2(d) - (1 - d) * log2(1 - d);
-  }
-  printf("entropy %f\n", entropy);
-  return ret;
-}
-
-bool check_rabs(const PvVec &pv_vec, uint8_t *buf) {
-  AnsCoder a;
-  ans_write_init(&a, buf);
-
-  std::clock_t start = std::clock();
-  for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend();
-       ++it) {
-    rabs_write(&a, it->second, 256 - it->first);
-  }
-  std::clock_t enc_time = std::clock() - start;
-  int offset = ans_write_end(&a);
-  bool okay = true;
-  AnsDecoder d;
-  if (ans_read_init(&d, buf, offset)) return false;
-  start = std::clock();
-  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
-    okay &= rabs_read(&d, 256 - it->first) == it->second;
-  }
-  std::clock_t dec_time = std::clock() - start;
-  if (!okay) return false;
-  printf("rABS size %d enc_time %f dec_time %f\n", offset,
-         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
-         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
-  return ans_read_end(&d);
-}
-
-bool check_rabs_asc(const PvVec &pv_vec, uint8_t *buf) {
-  AnsCoder a;
-  ans_write_init(&a, buf);
-
-  std::clock_t start = std::clock();
-  for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend();
-       ++it) {
-    rabs_asc_write(&a, it->second, 256 - it->first);
-  }
-  std::clock_t enc_time = std::clock() - start;
-  int offset = ans_write_end(&a);
-  bool okay = true;
-  AnsDecoder d;
-  if (ans_read_init(&d, buf, offset)) return false;
-  start = std::clock();
-  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
-    okay &= rabs_asc_read(&d, 256 - it->first) == it->second;
-  }
-  std::clock_t dec_time = std::clock() - start;
-  if (!okay) return false;
-  printf("rABS (asc) size %d enc_time %f dec_time %f\n", offset,
-         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
-         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
-  return ans_read_end(&d);
-}
-
-bool check_uabs(const PvVec &pv_vec, uint8_t *buf) {
-  AnsCoder a;
-  ans_write_init(&a, buf);
-
-  std::clock_t start = std::clock();
-  for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend();
-       ++it) {
-    uabs_write(&a, it->second, 256 - it->first);
-  }
-  std::clock_t enc_time = std::clock() - start;
-  int offset = ans_write_end(&a);
-  bool okay = true;
-  AnsDecoder d;
-  if (ans_read_init(&d, buf, offset)) return false;
-  start = std::clock();
-  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
-    okay &= uabs_read(&d, 256 - it->first) == it->second;
-  }
-  std::clock_t dec_time = std::clock() - start;
-  if (!okay) return false;
-  printf("uABS size %d enc_time %f dec_time %f\n", offset,
-         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
-         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
-  return ans_read_end(&d);
-}
-
-bool check_aombool(const PvVec &pv_vec, uint8_t *buf) {
-  aom_dk_writer w;
-  aom_reader r;
-  aom_dk_start_encode(&w, buf);
-
-  std::clock_t start = std::clock();
-  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
-    aom_dk_write(&w, it->second, 256 - it->first);
-  }
-  std::clock_t enc_time = std::clock() - start;
-  aom_dk_stop_encode(&w);
-  bool okay = true;
-  aom_reader_init(&r, buf, w.pos, NULL, NULL);
-  start = std::clock();
-  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
-    okay &= aom_read(&r, 256 - it->first) == it->second;
-  }
-  std::clock_t dec_time = std::clock() - start;
-  printf("AOM size %d enc_time %f dec_time %f\n", w.pos,
-         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
-         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
-  return okay;
-}
-
-// TODO(aconverse): replace this with a more representative distribution from
-// the codec.
-const rans_sym rans_sym_tab[] = {
-  { 16 * 4, 0 * 4 },
-  { 100 * 4, 16 * 4 },
-  { 70 * 4, 116 * 4 },
-  { 70 * 4, 186 * 4 },
-};
-const int kDistinctSyms = sizeof(rans_sym_tab) / sizeof(rans_sym_tab[0]);
-
-std::vector<int> ans_encode_build_vals(const rans_sym *tab, int iters) {
-  std::vector<int> p_to_sym;
-  int i = 0;
-  while (p_to_sym.size() < rans_precision) {
-    p_to_sym.insert(p_to_sym.end(), tab[i].prob, i);
-    ++i;
-  }
-  assert(p_to_sym.size() == rans_precision);
-  std::vector<int> ret;
-  libaom_test::ACMRandom gen(18543637);
-  for (int i = 0; i < iters; ++i) {
-    int sym = p_to_sym[gen.Rand8() * 4];
-    ret.push_back(sym);
-  }
-  return ret;
-}
-
-void rans_build_dec_tab(const struct rans_sym sym_tab[], rans_dec_lut dec_tab) {
-  dec_tab[0] = 0;
-  for (int i = 1; dec_tab[i - 1] < rans_precision; ++i) {
-    dec_tab[i] = dec_tab[i - 1] + sym_tab[i - 1].prob;
-  }
-}
-
-bool check_rans(const std::vector<int> &sym_vec, const rans_sym *const tab,
-                uint8_t *buf) {
-  AnsCoder a;
-  ans_write_init(&a, buf);
-  rans_dec_lut dec_tab;
-  rans_build_dec_tab(tab, dec_tab);
-
-  std::clock_t start = std::clock();
-  for (std::vector<int>::const_reverse_iterator it = sym_vec.rbegin();
-       it != sym_vec.rend(); ++it) {
-    rans_write(&a, &tab[*it]);
-  }
-  std::clock_t enc_time = std::clock() - start;
-  int offset = ans_write_end(&a);
-  bool okay = true;
-  AnsDecoder d;
-  if (ans_read_init(&d, buf, offset)) return false;
-  start = std::clock();
-  for (std::vector<int>::const_iterator it = sym_vec.begin();
-       it != sym_vec.end(); ++it) {
-    okay &= rans_read(&d, dec_tab) == *it;
-  }
-  std::clock_t dec_time = std::clock() - start;
-  if (!okay) return false;
-  printf("rANS size %d enc_time %f dec_time %f\n", offset,
-         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
-         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
-  return ans_read_end(&d);
-}
-
-void build_tree(aom_tree_index *tree, int num_syms) {
-  aom_tree_index i;
-  int sym = 0;
-  for (i = 0; i < num_syms - 1; ++i) {
-    tree[2 * i] = sym--;
-    tree[2 * i + 1] = 2 * (i + 1);
-  }
-  tree[2 * i - 1] = sym;
-}
-
-/* The treep array contains the probabilities of nodes of a tree structured
- * like:
- *          *
- *         / \
- *    -sym0   *
- *           / \
- *       -sym1  *
- *             / \
- *        -sym2  -sym3
- */
-void tab2tree(const rans_sym *tab, int tab_size, aom_prob *treep) {
-  const unsigned basep = rans_precision;
-  unsigned pleft = basep;
-  for (int i = 0; i < tab_size - 1; ++i) {
-    unsigned prob = (tab[i].prob * basep + basep * 2) / (pleft * 4);
-    assert(prob > 0 && prob < 256);
-    treep[i] = prob;
-    pleft -= tab[i].prob;
-  }
-}
-
-struct sym_bools {
-  unsigned bits;
-  int len;
-};
-
-static void make_tree_bits_tab(sym_bools *tab, int num_syms) {
-  unsigned bits = 0;
-  int len = 0;
-  int i;
-  for (i = 0; i < num_syms - 1; ++i) {
-    bits *= 2;
-    ++len;
-    tab[i].bits = bits;
-    tab[i].len = len;
-    ++bits;
-  }
-  tab[i].bits = bits;
-  tab[i].len = len;
-}
-
-void build_tpb(aom_prob probs[/*num_syms*/],
-               aom_tree_index tree[/*2*num_syms*/],
-               sym_bools bit_len[/*num_syms*/],
-               const rans_sym sym_tab[/*num_syms*/], int num_syms) {
-  tab2tree(sym_tab, num_syms, probs);
-  build_tree(tree, num_syms);
-  make_tree_bits_tab(bit_len, num_syms);
-}
-
-bool check_aomtree(const std::vector<int> &sym_vec, const rans_sym *sym_tab,
-                   uint8_t *buf) {
-  aom_dk_writer w;
-  aom_reader r;
-  aom_dk_start_encode(&w, buf);
-
-  aom_prob probs[kDistinctSyms];
-  aom_tree_index tree[2 * kDistinctSyms];
-  sym_bools bit_len[kDistinctSyms];
-  build_tpb(probs, tree, bit_len, sym_tab, kDistinctSyms);
-
-  std::clock_t start = std::clock();
-  for (std::vector<int>::const_iterator it = sym_vec.begin();
-       it != sym_vec.end(); ++it) {
-    av1_write_tree(&w, tree, probs, bit_len[*it].bits, bit_len[*it].len, 0);
-  }
-  std::clock_t enc_time = std::clock() - start;
-  aom_dk_stop_encode(&w);
-  aom_reader_init(&r, buf, w.pos, NULL, NULL);
-  start = std::clock();
-  for (std::vector<int>::const_iterator it = sym_vec.begin();
-       it != sym_vec.end(); ++it) {
-    if (aom_read_tree(&r, tree, probs) != *it) return false;
-  }
-  std::clock_t dec_time = std::clock() - start;
-  printf("AOMtree size %u enc_time %f dec_time %f\n", w.pos,
-         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
-         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
-  return true;
-}
-
-class Av1AbsTest : public ::testing::Test {
- protected:
-  static void SetUpTestCase() { pv_vec_ = abs_encode_build_vals(kNumBools); }
-  virtual void SetUp() { buf_ = new uint8_t[kNumBools / 8]; }
-  virtual void TearDown() { delete[] buf_; }
-  static const int kNumBools = 100000000;
-  static PvVec pv_vec_;
-  uint8_t *buf_;
-};
-PvVec Av1AbsTest::pv_vec_;
-
-class Av1AnsTest : public ::testing::Test {
- protected:
-  static void SetUpTestCase() {
-    sym_vec_ = ans_encode_build_vals(rans_sym_tab, kNumSyms);
-  }
-  virtual void SetUp() { buf_ = new uint8_t[kNumSyms / 2]; }
-  virtual void TearDown() { delete[] buf_; }
-  static const int kNumSyms = 25000000;
-  static std::vector<int> sym_vec_;
-  uint8_t *buf_;
-};
-std::vector<int> Av1AnsTest::sym_vec_;
-
-TEST_F(Av1AbsTest, Avxbool) { EXPECT_TRUE(check_aombool(pv_vec_, buf_)); }
-TEST_F(Av1AbsTest, Rabs) { EXPECT_TRUE(check_rabs(pv_vec_, buf_)); }
-TEST_F(Av1AbsTest, RabsAsc) { EXPECT_TRUE(check_rabs_asc(pv_vec_, buf_)); }
-TEST_F(Av1AbsTest, Uabs) { EXPECT_TRUE(check_uabs(pv_vec_, buf_)); }
-
-TEST_F(Av1AnsTest, Rans) {
-  EXPECT_TRUE(check_rans(sym_vec_, rans_sym_tab, buf_));
-}
-TEST_F(Av1AnsTest, Avxtree) {
-  EXPECT_TRUE(check_aomtree(sym_vec_, rans_sym_tab, buf_));
-}
-}  // namespace
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 42fe699..227e2e2 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -910,6 +910,11 @@
                                  &idct16x16_10_add_12_sse2, 3167, AOM_BITS_12),
                       make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2,
                                  3167, AOM_BITS_12)));
+// TODO(luoyi):
+// For this test case, we should test function: aom_highbd_fdct16x16_1_sse2.
+// However this function is not available yet. if we mistakely test
+// aom_fdct16x16_1_sse2, it could only pass AOM_BITS_8/AOM_BITS_10 but not
+// AOM_BITS_12.
 INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
                         ::testing::Values(make_tuple(&aom_fdct16x16_1_sse2,
                                                      AOM_BITS_8)));
diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc
index 3177769..95a0eb5 100644
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -19,7 +19,7 @@
 namespace libaom_test {
 
 const char kVP8Name[] = "WebM Project VP8";
-const char kAV1Name[] = "WebM Project AV1";
+const char kAV1Name[] = "AOMedia Project AV1 Decoder";
 
 aom_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size,
                                     aom_codec_stream_info_t *stream_info) {
diff --git a/test/test.mk b/test/test.mk
index ce9e342..a41efbe 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -102,7 +102,7 @@
 LIBAOM_TEST_SRCS-yes                   += superframe_test.cc
 LIBAOM_TEST_SRCS-yes                   += tile_independence_test.cc
 ifeq ($(CONFIG_ANS),yes)
-LIBAOM_TEST_SRCS-yes                   += av1_ans_test.cc
+LIBAOM_TEST_SRCS-yes                   += ans_test.cc
 else
 LIBAOM_TEST_SRCS-yes                   += boolcoder_test.cc
 endif