Merge "Add min_tx_size variable to recursive transform block partition system" into nextgenv2

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bfaa1f6..6ff8cef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt

@@ -12,8 +12,12 @@
 project(AOM C CXX)
 
 set(AOM_ROOT "${CMAKE_CURRENT_SOURCE_DIR}")
+set(AOM_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}")
+include("${AOM_ROOT}/build/cmake/aom_configure.cmake")
 
 set(AOM_SRCS
+    "${AOM_CONFIG_DIR}/aom_config.c"
+    "${AOM_CONFIG_DIR}/aom_config.h"
     "${AOM_ROOT}/aom/aom.h"
     "${AOM_ROOT}/aom/aom_codec.h"
     "${AOM_ROOT}/aom/aom_decoder.h"
@@ -242,8 +246,10 @@
 
 # Targets
 add_library(aom_dsp ${AOM_DSP_SRCS})
+include_directories(${AOM_ROOT} ${AOM_CONFIG_DIR})
 add_library(aom_mem ${AOM_MEM_SRCS})
 add_library(aom_scale ${AOM_SCALE_SRCS})
+include_directories(${AOM_ROOT} ${AOM_CONFIG_DIR})
 add_library(aom_util ${AOM_UTIL_SRCS})
 add_library(aom_av1_decoder ${AOM_AV1_DECODER_SRCS})
 add_library(aom_av1_encoder ${AOM_AV1_ENCODER_SRCS})

diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk
index eebdc0c..07fbe02 100644
--- a/aom_dsp/aom_dsp.mk
+++ b/aom_dsp/aom_dsp.mk

@@ -216,6 +216,24 @@
 DSP_SRCS-$(HAVE_MSA)    += mips/fwd_dct32x32_msa.c
 endif  # CONFIG_AV1_ENCODER
 
+ifeq ($(CONFIG_PVQ),yes)
+DSP_SRCS-yes            += fwd_txfm.c
+DSP_SRCS-yes            += fwd_txfm.h
+DSP_SRCS-$(HAVE_SSE2)   += x86/fwd_txfm_sse2.h
+DSP_SRCS-$(HAVE_SSE2)   += x86/fwd_txfm_sse2.c
+DSP_SRCS-$(HAVE_SSE2)   += x86/fwd_txfm_impl_sse2.h
+DSP_SRCS-$(HAVE_SSE2)   += x86/fwd_dct32x32_impl_sse2.h
+ifeq ($(ARCH_X86_64),yes)
+DSP_SRCS-$(HAVE_SSSE3)  += x86/fwd_txfm_ssse3_x86_64.asm
+endif
+DSP_SRCS-$(HAVE_AVX2)   += x86/fwd_txfm_avx2.c
+DSP_SRCS-$(HAVE_AVX2)   += x86/fwd_dct32x32_impl_avx2.h
+DSP_SRCS-$(HAVE_NEON)   += arm/fwd_txfm_neon.c
+DSP_SRCS-$(HAVE_MSA)    += mips/fwd_txfm_msa.h
+DSP_SRCS-$(HAVE_MSA)    += mips/fwd_txfm_msa.c
+DSP_SRCS-$(HAVE_MSA)    += mips/fwd_dct32x32_msa.c
+endif  # CONFIG_PVQ
+
 # inverse transform
 ifeq ($(CONFIG_AV1), yes)
 DSP_SRCS-yes            += inv_txfm.h

diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index b073b1b..94e2587 100644
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl

@@ -699,7 +699,7 @@
 #
 # Forward transform
 #
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+if ((aom_config("CONFIG_AV1_ENCODER") eq "yes") || (aom_config("CONFIG_PVQ") eq "yes")){
   if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
     add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
     specialize qw/aom_fdct4x4 sse2/;

diff --git a/aom_dsp/daalaboolreader.h b/aom_dsp/daalaboolreader.h
index 9d6cebd..8977995 100644
--- a/aom_dsp/daalaboolreader.h
+++ b/aom_dsp/daalaboolreader.h

@@ -41,7 +41,7 @@
 
 static INLINE int aom_daala_read(daala_reader *r, int prob) {
   if (prob == 128) {
-    return od_ec_dec_bits(&r->ec, 1);
+    return od_ec_dec_bits(&r->ec, 1, "aom_bits");
   } else {
     int p = ((prob << 15) + (256 - prob)) >> 8;
     return od_ec_decode_bool_q15(&r->ec, p);

diff --git a/aom_dsp/entcode.c b/aom_dsp/entcode.c
index 49284b0..ff8e8e2 100644
--- a/aom_dsp/entcode.c
+++ b/aom_dsp/entcode.c

@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2012 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
 
 #ifdef HAVE_CONFIG_H
 #include "./config.h"

diff --git a/aom_dsp/entcode.h b/aom_dsp/entcode.h
index 77ed171..91fcb67 100644
--- a/aom_dsp/entcode.h
+++ b/aom_dsp/entcode.h

@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
 
 #if !defined(_entcode_H)
 #define _entcode_H (1)

diff --git a/aom_dsp/entdec.c b/aom_dsp/entdec.c
index 18563b2..b015956 100644
--- a/aom_dsp/entdec.c
+++ b/aom_dsp/entdec.c

@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
 
 #ifdef HAVE_CONFIG_H
 #include "./config.h"
@@ -440,7 +427,7 @@
     ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS;
     ft1 = (int)(ft >> ftb) + 1;
     t = od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft1), ft1);
-    t = t << ftb | od_ec_dec_bits(dec, ftb);
+    t = t << ftb | od_ec_dec_bits(dec, ftb, "");
     if (t <= ft) return t;
     dec->error = 1;
     return ft;
@@ -453,7 +440,7 @@
   ftb: The number of bits to extract.
        This must be between 0 and 25, inclusive.
   Return: The decoded bits.*/
-uint32_t od_ec_dec_bits(od_ec_dec *dec, unsigned ftb) {
+uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb) {
   od_ec_window window;
   int available;
   uint32_t ret;

diff --git a/aom_dsp/entdec.h b/aom_dsp/entdec.h
index 80363b5..6d6e2b5 100644
--- a/aom_dsp/entdec.h
+++ b/aom_dsp/entdec.h

@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
 
 #if !defined(_entdec_H)
 #define _entdec_H (1)
@@ -33,6 +20,14 @@
 
 typedef struct od_ec_dec od_ec_dec;
 
+#if OD_ACCOUNTING
+#define OD_ACC_STR , char *acc_str
+#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb, str)
+#else
+#define OD_ACC_STR
+#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb)
+#endif
+
 /*The entropy decoder context.*/
 struct od_ec_dec {
   /*The start of the current input buffer.*/
@@ -91,7 +86,7 @@
 OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft)
     OD_ARG_NONNULL(1);
 
-OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits(od_ec_dec *dec, unsigned ftb)
+OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb)
     OD_ARG_NONNULL(1);
 
 OD_WARN_UNUSED_RESULT int od_ec_dec_tell(const od_ec_dec *dec)

diff --git a/aom_dsp/entenc.c b/aom_dsp/entenc.c
index 3e9cb62..390f61b 100644
--- a/aom_dsp/entenc.c
+++ b/aom_dsp/entenc.c

@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
 
 #ifdef HAVE_CONFIG_H
 #include "./config.h"

diff --git a/aom_dsp/entenc.h b/aom_dsp/entenc.h
index 32163f7..5e121b6 100644
--- a/aom_dsp/entenc.h
+++ b/aom_dsp/entenc.h

@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
 
 #if !defined(_entenc_H)
 #define _entenc_H (1)

diff --git a/aom_ports/aom_timer.h b/aom_ports/aom_timer.h
index cc5203c..904f2fe 100644
--- a/aom_ports/aom_timer.h
+++ b/aom_ports/aom_timer.h

@@ -97,11 +97,14 @@
   void *dummy;
 };
 
-static INLINE void aom_usec_timer_start(struct aom_usec_timer *t) {}
+static INLINE void aom_usec_timer_start(struct aom_usec_timer *t) { (void)t; }
 
-static INLINE void aom_usec_timer_mark(struct aom_usec_timer *t) {}
+static INLINE void aom_usec_timer_mark(struct aom_usec_timer *t) { (void)t; }
 
-static INLINE int aom_usec_timer_elapsed(struct aom_usec_timer *t) { return 0; }
+static INLINE int aom_usec_timer_elapsed(struct aom_usec_timer *t) {
+  (void)t;
+  return 0;
+}
 
 #endif /* CONFIG_OS_SUPPORT */
 

diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index 0fe4a89..3571323 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk

@@ -106,6 +106,24 @@
 AV1_COMMON_SRCS-yes += common/odintrin.c
 AV1_COMMON_SRCS-yes += common/odintrin.h
 
+ifeq ($(CONFIG_PVQ),yes)
+# PVQ from daala
+AV1_COMMON_SRCS-yes += common/pvq.c
+AV1_COMMON_SRCS-yes += common/pvq.h
+AV1_COMMON_SRCS-yes += common/partition.c
+AV1_COMMON_SRCS-yes += common/partition.h
+AV1_COMMON_SRCS-yes += common/zigzag4.c
+AV1_COMMON_SRCS-yes += common/zigzag8.c
+AV1_COMMON_SRCS-yes += common/zigzag16.c
+AV1_COMMON_SRCS-yes += common/zigzag32.c
+AV1_COMMON_SRCS-yes += common/zigzag.h
+AV1_COMMON_SRCS-yes += common/generic_code.c
+AV1_COMMON_SRCS-yes += common/generic_code.h
+AV1_COMMON_SRCS-yes += common/pvq_state.c
+AV1_COMMON_SRCS-yes += common/pvq_state.h
+AV1_COMMON_SRCS-yes += common/laplace_tables.c
+endif
+
 ifneq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
 AV1_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/itrans4_dspr2.c
 AV1_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/itrans8_dspr2.c

diff --git a/av1/av1_cx.mk b/av1/av1_cx.mk
index 3f85a5f..35a0447 100644
--- a/av1/av1_cx.mk
+++ b/av1/av1_cx.mk

@@ -112,6 +112,16 @@
 AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/clpf_rdo_sse4_1.c
 AV1_CX_SRCS-$(HAVE_NEON) += encoder/clpf_rdo_neon.c
 endif
+ifeq ($(CONFIG_PVQ),yes)
+# PVQ from daala
+AV1_CX_SRCS-yes += encoder/daala_compat_enc.c
+AV1_CX_SRCS-yes += encoder/pvq_encoder.c
+AV1_CX_SRCS-yes += encoder/pvq_encoder.h
+AV1_CX_SRCS-yes += encoder/encint.h
+AV1_CX_SRCS-yes += encoder/generic_encoder.c
+AV1_CX_SRCS-yes += encoder/laplace_encoder.c
+endif
+
 AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
 AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
 ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)

diff --git a/av1/av1_dx.mk b/av1/av1_dx.mk
index 36eec30..81f526c 100644
--- a/av1/av1_dx.mk
+++ b/av1/av1_dx.mk

@@ -31,4 +31,29 @@
 AV1_DX_SRCS-yes += decoder/dsubexp.c
 AV1_DX_SRCS-yes += decoder/dsubexp.h
 
+ifeq ($(CONFIG_PVQ),yes)
+# PVQ from daala
+AV1_DX_SRCS-yes += decoder/pvq_decoder.c
+AV1_DX_SRCS-yes += decoder/pvq_decoder.h
+AV1_DX_SRCS-yes += decoder/decint.h
+AV1_DX_SRCS-yes += decoder/generic_decoder.c
+AV1_DX_SRCS-yes += decoder/laplace_decoder.c
+AV1_DX_SRCS-yes += encoder/hybrid_fwd_txfm.c
+AV1_DX_SRCS-yes += encoder/hybrid_fwd_txfm.h
+
+AV1_DX_SRCS-yes += encoder/dct.c
+AV1_DX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
+AV1_DX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c
+AV1_DX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
+
+ifneq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
+AV1_DX_SRCS-$(HAVE_NEON) += encoder/arm/neon/dct_neon.c
+endif
+
+AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct4x4_msa.c
+AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct8x8_msa.c
+AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct16x16_msa.c
+AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct_msa.h
+endif
+
 AV1_DX_SRCS-yes := $(filter-out $(AV1_DX_SRCS_REMOVE-yes),$(AV1_DX_SRCS-yes))

diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index ee46820..f96dcf2 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -572,6 +572,196 @@
 }
 # end encoder functions
 
+# If PVQ is enabled, fwd transforms are required by decoder
+if (aom_config("CONFIG_PVQ") eq "yes") {
+# fdct functions
+
+if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
+  add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/av1_fht4x4 sse2/;
+
+  add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/av1_fht8x8 sse2/;
+
+  add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/av1_fht16x16 sse2/;
+
+  add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+  specialize qw/av1_fwht4x4 sse2/;
+  if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+    add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct4x4/;
+
+    add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct4x4_1/;
+
+    add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct8x8/;
+
+    add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct8x8_1/;
+
+    add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct16x16/;
+
+    add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct16x16_1/;
+
+    add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32/;
+
+    add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32_rd/;
+
+    add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32_1/;
+
+    add_proto qw/void av1_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct4x4/;
+
+    add_proto qw/void av1_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct8x8/;
+
+    add_proto qw/void av1_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct8x8_1/;
+
+    add_proto qw/void av1_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct16x16/;
+
+    add_proto qw/void av1_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct16x16_1/;
+
+    add_proto qw/void av1_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct32x32/;
+
+    add_proto qw/void av1_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct32x32_rd/;
+
+    add_proto qw/void av1_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct32x32_1/;
+  } else {
+    add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct4x4 sse2/;
+
+    add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct4x4_1 sse2/;
+
+    add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct8x8 sse2/;
+
+    add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct8x8_1 sse2/;
+
+    add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct16x16 sse2/;
+
+    add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct16x16_1 sse2/;
+
+    add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32 sse2/;
+
+    add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32_rd sse2/;
+
+    add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32_1 sse2/;
+
+    add_proto qw/void av1_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct4x4 sse2/;
+
+    add_proto qw/void av1_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct8x8 sse2/;
+
+    add_proto qw/void av1_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct8x8_1/;
+
+    add_proto qw/void av1_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct16x16 sse2/;
+
+    add_proto qw/void av1_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct16x16_1/;
+
+    add_proto qw/void av1_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct32x32 sse2/;
+
+    add_proto qw/void av1_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct32x32_rd sse2/;
+
+    add_proto qw/void av1_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_highbd_fdct32x32_1/;
+  }
+} else {
+  add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/av1_fht4x4 sse2 msa/;
+
+  add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/av1_fht8x8 sse2 msa/;
+
+  add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+  specialize qw/av1_fht16x16 sse2 msa/;
+
+  add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+  specialize qw/av1_fwht4x4 msa sse2/;
+  if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+    add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct4x4/;
+
+    add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct4x4_1/;
+
+    add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct8x8/;
+
+    add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct8x8_1/;
+
+    add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct16x16/;
+
+    add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct16x16_1/;
+
+    add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32/;
+
+    add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32_rd/;
+
+    add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32_1/;
+  } else {
+    add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct4x4 sse2/;
+
+    add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct4x4_1 sse2/;
+
+    add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct8x8 sse2/;
+
+    add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct8x8_1 sse2/;
+
+    add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct16x16 sse2/;
+
+    add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct16x16_1 sse2/;
+
+    add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32 sse2/;
+
+    add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32_rd sse2/;
+
+    add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fdct32x32_1 sse2/;
+  }
+}
+
+}
+
 # Deringing Functions
 
 if (aom_config("CONFIG_DERING") eq "yes") {

diff --git a/av1/common/blockd.c b/av1/common/blockd.c
index 27c874a..8cfd223 100644
--- a/av1/common/blockd.c
+++ b/av1/common/blockd.c

@@ -94,6 +94,7 @@
     av1_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
 }
 
+#if !CONFIG_PVQ
 void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
                       TX_SIZE tx_size, int has_eob, int aoff, int loff) {
   ENTROPY_CONTEXT *const a = pd->above_context + aoff;
@@ -130,6 +131,7 @@
     memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * txs_high);
   }
 }
+#endif
 
 void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) {
   int i;

diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 8649b7d..31836fb 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h

@@ -26,6 +26,11 @@
 #include "av1/common/scale.h"
 #include "av1/common/seg_common.h"
 #include "av1/common/tile_common.h"
+#if CONFIG_PVQ
+#include "av1/common/pvq.h"
+#include "av1/common/pvq_state.h"
+#include "av1/decoder/decint.h"
+#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -53,6 +58,33 @@
 #endif  // CONFIG_EXT_INTER
 }
 
+#if CONFIG_PVQ
+typedef struct PVQ_INFO {
+  int theta[PVQ_MAX_PARTITIONS];
+  int max_theta[PVQ_MAX_PARTITIONS];
+  int qg[PVQ_MAX_PARTITIONS];
+  int k[PVQ_MAX_PARTITIONS];
+  od_coeff y[OD_TXSIZE_MAX * OD_TXSIZE_MAX];
+  int nb_bands;
+  int off[PVQ_MAX_PARTITIONS];
+  int size[PVQ_MAX_PARTITIONS];
+  int skip_rest;
+  int skip_dir;
+  int bs;           // log of the block size minus two,
+                    // i.e. equivalent to aom's TX_SIZE
+  int ac_dc_coded;  // block skip info, indicating whether DC/AC is coded.
+                    // bit0: DC coded, bit1 : AC coded (1 means coded)
+  tran_low_t dq_dc_residue;
+} PVQ_INFO;
+
+typedef struct PVQ_QUEUE {
+  PVQ_INFO *buf;  // buffer for pvq info, stored in encoding order
+  int curr_pos;   // curr position to write PVQ_INFO
+  int buf_len;    // allocated buffer length
+  int last_pos;   // last written position of PVQ_INFO in a tile
+} PVQ_QUEUE;
+#endif
+
 #if CONFIG_EXT_INTER
 static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
   return mode >= NEARESTMV && mode <= NEWFROMNEARMV;
@@ -179,6 +211,23 @@
 } FILTER_INTRA_MODE_INFO;
 #endif  // CONFIG_FILTER_INTRA
 
+#if CONFIG_VAR_TX
+#define TXB_COEFF_COST_MAP_SIZE (2 * MAX_MIB_SIZE)
+
+// TODO(angiebird): Merge RD_COST and RD_STATS
+typedef struct RD_STATS {
+  int rate;
+  int64_t dist;
+  int64_t sse;
+  int skip;
+#if CONFIG_RD_DEBUG
+  int txb_coeff_cost[MAX_MB_PLANE];
+  int txb_coeff_cost_map[MAX_MB_PLANE][TXB_COEFF_COST_MAP_SIZE]
+                        [TXB_COEFF_COST_MAP_SIZE];
+#endif
+} RD_STATS;
+#endif  // CONFIG_VAR_TX
+
 // This structure now relates to 8x8 block regions.
 typedef struct {
   // Common for both INTER and INTRA blocks
@@ -252,7 +301,7 @@
   int current_q_index;
 #endif
 #if CONFIG_RD_DEBUG
-  int64_t txb_coeff_cost[MAX_MB_PLANE];
+  RD_STATS rd_stats;
   int mi_row;
   int mi_col;
 #endif
@@ -327,6 +376,12 @@
 #if CONFIG_AOM_QM
   const qm_val_t *seg_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
 #endif
+
+#if CONFIG_PVQ
+  DECLARE_ALIGNED(16, int16_t, pred[MAX_SB_SQUARE]);
+  // PVQ: forward transformed predicted image, a reference for PVQ.
+  tran_low_t *pvq_ref_coeff;
+#endif
 } MACROBLOCKD_PLANE;
 
 #define BLOCK_OFFSET(x, i) ((x) + (i)*16)
@@ -400,6 +455,9 @@
   uint8_t is_sec_rect;
 #endif
 
+#if CONFIG_PVQ
+  daala_dec_ctx daala_dec;
+#endif
 #if CONFIG_AOM_HIGHBITDEPTH
   /* Bit depth: 8, 10, 12 */
   int bd;

diff --git a/av1/common/generic_code.c b/av1/common/generic_code.c
new file mode 100644
index 0000000..4022cf1
--- /dev/null
+++ b/av1/common/generic_code.c

@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "generic_code.h"
+
+void od_cdf_init(uint16_t *cdf, int ncdfs, int nsyms, int val, int first) {
+  int i;
+  int j;
+  for (i = 0; i < ncdfs; i++) {
+    for (j = 0; j < nsyms; j++) {
+      cdf[i*nsyms + j] = val*j + first;
+    }
+  }
+}
+
+/** Adapts a Q15 cdf after encoding/decoding a symbol. */
+void od_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate) {
+  int i;
+  *count = OD_MINI(*count + 1, 1 << rate);
+  OD_ASSERT(cdf[n - 1] == 32768);
+  if (*count >= 1 << rate) {
+    /* Steady-state adaptation based on a simple IIR with dyadic rate. */
+    for (i = 0; i < n; i++) {
+      int tmp;
+      /* When (i < val), we want the adjustment ((cdf[i] - tmp) >> rate) to be
+         positive so long as (cdf[i] > i + 1), and 0 when (cdf[i] == i + 1),
+         to ensure we don't drive any probabilities to 0. Replacing cdf[i] with
+         (i + 2) and solving ((i + 2 - tmp) >> rate == 1) for tmp produces
+         tmp == i + 2 - (1 << rate). Using this value of tmp with
+         cdf[i] == i + 1 instead gives an adjustment of 0 as desired.
+
+         When (i >= val), we want ((cdf[i] - tmp) >> rate) to be negative so
+         long as cdf[i] < 32768 - (n - 1 - i), and 0 when
+         cdf[i] == 32768 - (n - 1 - i), again to ensure we don't drive any
+         probabilities to 0. Since right-shifting any negative value is still
+         negative, we can solve (32768 - (n - 1 - i) - tmp == 0) for tmp,
+         producing tmp = 32769 - n + i. Using this value of tmp with smaller
+         values of cdf[i] instead gives negative adjustments, as desired.
+
+         Combining the two cases gives the expression below. These could be
+         stored in a lookup table indexed by n and rate to avoid the
+         arithmetic. */
+      tmp = 2 - (1<<rate) + i + (32767 + (1<<rate) - n)*(i >= val);
+      cdf[i] -= (cdf[i] - tmp) >> rate;
+    }
+  }
+  else {
+    int alpha;
+    /* Initial adaptation for the first symbols. The adaptation rate is
+       computed to be equivalent to what od_{en,de}code_cdf_adapt() does
+       when the initial cdf is set to increment/4. */
+    alpha = 4*32768/(n + 4**count);
+    for (i = 0; i < n; i++) {
+      int tmp;
+      tmp = (32768 - n)*(i >= val) + i + 1;
+      cdf[i] -= ((cdf[i] - tmp)*alpha) >> 15;
+    }
+  }
+  OD_ASSERT(cdf[n - 1] == 32768);
+}
+
+/** Initializes the cdfs and freq counts for a model.
+ *
+ * @param [out] model model being initialized
+ */
+void generic_model_init(generic_encoder *model) {
+  int i;
+  int j;
+  model->increment = 64;
+  for (i = 0; i < GENERIC_TABLES; i++) {
+    for (j = 0; j < 16; j++) {
+      /* Do flat initialization equivalent to a single symbol in each bin. */
+      model->cdf[i][j] = (j + 1) * model->increment;
+    }
+  }
+}
+
+/** Takes the base-2 log of E(x) in Q1.
+ *
+ * @param [in] ExQ16 expectation of x in Q16
+ *
+ * @retval 2*log2(ExQ16/2^16)
+ */
+int log_ex(int ex_q16) {
+  int lg;
+  int lg_q1;
+  int odd;
+  lg = OD_ILOG(ex_q16);
+  if (lg < 15) {
+    odd = ex_q16*ex_q16 > 2 << 2*lg;
+  }
+  else {
+    int tmp;
+    tmp = ex_q16 >> (lg - 8);
+    odd = tmp*tmp > (1 << 15);
+  }
+  lg_q1 = OD_MAXI(0, 2*lg - 33 + odd);
+  return lg_q1;
+}
+
+/** Updates the probability model based on the encoded/decoded value
+ *
+ * @param [in,out] model generic prob model
+ * @param [in,out] ExQ16 expectation of x
+ * @param [in]     x     variable encoded/decoded (used for ExQ16)
+ * @param [in]     xs    variable x after shift (used for the model)
+ * @param [in]     id    id of the icdf to adapt
+ * @param [in]     integration integration period of ExQ16 (leaky average over
+ * 1<<integration samples)
+ */
+void generic_model_update(generic_encoder *model, int *ex_q16, int x, int xs,
+ int id, int integration) {
+  int i;
+  int xenc;
+  uint16_t *cdf;
+  cdf = model->cdf[id];
+  /* Renormalize if we cannot add increment */
+  if (cdf[15] + model->increment > 32767) {
+    for (i = 0; i < 16; i++) {
+      /* Second term ensures that the pdf is non-null */
+      cdf[i] = (cdf[i] >> 1) + i + 1;
+    }
+  }
+  /* Update freq count */
+  xenc = OD_MINI(15, xs);
+  /* This can be easily vectorized */
+  for (i = xenc; i < 16; i++) cdf[i] += model->increment;
+  /* We could have saturated ExQ16 directly, but this is safe and simpler */
+  x = OD_MINI(x, 32767);
+  OD_IIR_DIADIC(*ex_q16, x << 16, integration);
+}

diff --git a/av1/common/generic_code.h b/av1/common/generic_code.h
new file mode 100644
index 0000000..6059190
--- /dev/null
+++ b/av1/common/generic_code.h

@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_generic_code_H)
+# define _generic_code_H
+
+# include "aom_dsp/entdec.h"
+# include "aom_dsp/entenc.h"
+
+# define GENERIC_TABLES 12
+
+#if OD_ACCOUNTING
+# define generic_decode(dec, model, max, ex_q16, integration, str) generic_decode_(dec, model, max, ex_q16, integration, str)
+# define od_decode_cdf_adapt_q15(ec, cdf, n, count, rate, str) od_decode_cdf_adapt_q15_(ec, cdf, n, count, rate, str)
+# define od_decode_cdf_adapt(ec, cdf, n, increment, str) od_decode_cdf_adapt_(ec, cdf, n, increment, str)
+#else
+# define generic_decode(dec, model, max, ex_q16, integration, str) generic_decode_(dec, model, max, ex_q16, integration)
+# define od_decode_cdf_adapt_q15(ec, cdf, n, count, rate, str) od_decode_cdf_adapt_q15_(ec, cdf, n, count, rate)
+# define od_decode_cdf_adapt(ec, cdf, n, increment, str) od_decode_cdf_adapt_(ec, cdf, n, increment)
+#endif
+
+typedef struct {
+  /** cdf for multiple expectations of x */
+  uint16_t cdf[GENERIC_TABLES][16];
+  /** Frequency increment for learning the cdfs */
+  int increment;
+} generic_encoder;
+
+#define OD_IIR_DIADIC(y, x, shift) ((y) += ((x) - (y)) >> (shift))
+
+void generic_model_init(generic_encoder *model);
+
+#define OD_CDFS_INIT(cdf, val) od_cdf_init(&cdf[0][0],\
+ sizeof(cdf)/sizeof(cdf[0]), sizeof(cdf[0])/sizeof(cdf[0][0]), val, val)
+
+#define OD_CDFS_INIT_FIRST(cdf, val, first) od_cdf_init(&cdf[0][0],\
+ sizeof(cdf)/sizeof(cdf[0]), sizeof(cdf[0])/sizeof(cdf[0][0]), val, first)
+
+#define OD_SINGLE_CDF_INIT(cdf, val) od_cdf_init(cdf,\
+ 1, sizeof(cdf)/sizeof(cdf[0]), val, val)
+
+#define OD_SINGLE_CDF_INIT_FIRST(cdf, val, first) od_cdf_init(cdf,\
+ 1, sizeof(cdf)/sizeof(cdf[0]), val, first)
+
+void od_cdf_init(uint16_t *cdf, int ncdfs, int nsyms, int val, int first);
+
+void od_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate);
+
+void od_encode_cdf_adapt_q15(od_ec_enc *ec, int val, uint16_t *cdf, int n,
+ int *count, int rate);
+
+void od_encode_cdf_adapt(od_ec_enc *ec, int val, uint16_t *cdf, int n,
+ int increment);
+
+int od_decode_cdf_adapt_(od_ec_dec *ec, uint16_t *cdf, int n,
+ int increment OD_ACC_STR);
+
+void generic_encode(od_ec_enc *enc, generic_encoder *model, int x, int max,
+ int *ex_q16, int integration);
+double generic_encode_cost(generic_encoder *model, int x, int max,
+ int *ex_q16);
+
+double od_encode_cdf_cost(int val, uint16_t *cdf, int n);
+
+int od_decode_cdf_adapt_q15_(od_ec_dec *ec, uint16_t *cdf, int n,
+ int *count, int rate OD_ACC_STR);
+
+int generic_decode_(od_ec_dec *dec, generic_encoder *model, int max,
+ int *ex_q16, int integration OD_ACC_STR);
+
+int log_ex(int ex_q16);
+
+void generic_model_update(generic_encoder *model, int *ex_q16, int x, int xs,
+ int id, int integration);
+
+#endif

diff --git a/av1/common/laplace_tables.c b/av1/common/laplace_tables.c
new file mode 100644
index 0000000..f1c3f9a
--- /dev/null
+++ b/av1/common/laplace_tables.c

@@ -0,0 +1,272 @@
+/* This file is auto-generated using "gen_laplace_tables 128 7" */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "pvq.h"
+
+const uint16_t EXP_CDF_TABLE[128][16] = {
+  {32753,32754,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {32499,32753,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {32243,32747,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {31987,32737,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {31732,32724,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {31476,32706,32754,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {31220,32684,32753,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {30964,32658,32751,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {30708,32628,32748,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {30452,32594,32745,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {30198,32558,32742,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {29941,32515,32736,32755,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {29686,32470,32731,32755,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {29429,32419,32723,32754,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {29174,32366,32715,32753,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {28918,32308,32705,32752,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {28662,32246,32694,32750,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {28406,32180,32681,32748,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {28150,32110,32667,32745,32756,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {27894,32036,32651,32742,32756,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {27639,31959,32634,32739,32755,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {27383,31877,32614,32735,32755,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {27126,31790,32592,32730,32754,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {26871,31701,32569,32725,32753,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {26615,31607,32543,32719,32752,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {26361,31511,32517,32713,32751,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {26104,31408,32485,32704,32748,32757,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {25848,31302,32452,32695,32746,32757,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {25591,31191,32416,32684,32743,32756,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {25336,31078,32379,32674,32741,32756,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {25080,30960,32338,32661,32737,32755,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {24824,30838,32295,32648,32733,32754,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {24568,30712,32248,32632,32728,32752,32758,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {24313,30583,32199,32616,32723,32751,32758,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {24057,30449,32147,32598,32718,32750,32758,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {23801,30311,32091,32578,32711,32747,32757,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {23546,30170,32033,32557,32704,32745,32757,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {23288,30022,31969,32532,32695,32742,32756,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {23033,29873,31904,32507,32686,32739,32755,32760,32761,32762,32763,32764,32765,32766,32767,32768},
+  {22778,29720,31835,32479,32675,32735,32753,32759,32761,32762,32763,32764,32765,32766,32767,32768},
+  {22521,29561,31761,32449,32664,32731,32752,32759,32761,32762,32763,32764,32765,32766,32767,32768},
+  {22267,29401,31686,32418,32652,32727,32751,32759,32761,32762,32763,32764,32765,32766,32767,32768},
+  {22011,29235,31605,32383,32638,32722,32749,32758,32761,32762,32763,32764,32765,32766,32767,32768},
+  {21754,29064,31520,32345,32622,32715,32746,32757,32761,32762,32763,32764,32765,32766,32767,32768},
+  {21501,28893,31434,32307,32607,32710,32745,32757,32761,32762,32763,32764,32765,32766,32767,32768},
+  {21243,28713,31339,32262,32587,32701,32741,32755,32760,32762,32763,32764,32765,32766,32767,32768},
+  {20988,28532,31243,32217,32567,32693,32738,32754,32760,32762,32763,32764,32765,32766,32767,32768},
+  {20730,28344,31140,32167,32544,32682,32733,32752,32759,32762,32763,32764,32765,32766,32767,32768},
+  {20476,28156,31036,32116,32521,32673,32730,32751,32759,32762,32763,32764,32765,32766,32767,32768},
+  {20220,27962,30926,32061,32495,32661,32725,32749,32758,32762,32763,32764,32765,32766,32767,32768},
+  {19963,27763,30810,32000,32465,32647,32718,32746,32757,32761,32763,32764,32765,32766,32767,32768},
+  {19708,27562,30691,31938,32435,32633,32712,32743,32756,32761,32763,32764,32765,32766,32767,32768},
+  {19454,27358,30569,31873,32403,32618,32705,32741,32755,32761,32763,32764,32765,32766,32767,32768},
+  {19196,27146,30438,31801,32365,32599,32696,32736,32753,32760,32763,32764,32765,32766,32767,32768},
+  {18942,26934,30306,31728,32328,32581,32688,32733,32752,32760,32763,32764,32765,32766,32767,32768},
+  {18684,26714,30164,31647,32284,32558,32676,32727,32749,32758,32762,32764,32765,32766,32767,32768},
+  {18429,26493,30021,31565,32240,32535,32664,32721,32746,32757,32762,32764,32765,32766,32767,32768},
+  {18174,26268,29872,31477,32192,32510,32652,32715,32743,32756,32762,32764,32765,32766,32767,32768},
+  {17920,26040,29719,31386,32141,32483,32638,32708,32740,32754,32761,32764,32765,32766,32767,32768},
+  {17661,25803,29556,31286,32083,32451,32620,32698,32734,32751,32759,32763,32765,32766,32767,32768},
+  {17406,25566,29391,31184,32024,32418,32603,32690,32731,32750,32759,32763,32765,32766,32767,32768},
+  {17151,25325,29220,31076,31961,32383,32584,32680,32726,32748,32758,32763,32765,32766,32767,32768},
+  {16896,25080,29044,30964,31894,32344,32562,32668,32719,32744,32756,32762,32765,32766,32767,32768},
+  {16639,24829,28860,30844,31821,32302,32539,32655,32712,32740,32754,32761,32764,32766,32767,32768},
+  {16384,24576,28672,30720,31744,32256,32512,32640,32704,32736,32752,32760,32764,32766,32767,32768},
+  {16130,24320,28479,30591,31663,32208,32485,32625,32696,32732,32750,32759,32764,32766,32767,32768},
+  {15872,24056,28276,30452,31574,32152,32450,32604,32683,32724,32745,32756,32762,32765,32766,32768},
+  {15615,23789,28068,30308,31480,32094,32415,32583,32671,32717,32741,32754,32761,32764,32766,32768},
+  {15361,23521,27856,30159,31382,32032,32377,32560,32657,32709,32737,32752,32760,32764,32766,32768},
+  {15103,23245,27634,30000,31275,31963,32334,32534,32642,32700,32731,32748,32757,32762,32765,32768},
+  {14848,22968,27409,29837,31165,31891,32288,32505,32624,32689,32725,32744,32755,32761,32764,32768},
+  {14592,22686,27176,29666,31047,31813,32238,32474,32605,32678,32718,32740,32752,32759,32763,32768},
+  {14336,22400,26936,29488,30923,31730,32184,32439,32583,32664,32709,32735,32749,32757,32762,32768},
+  {14079,22109,26689,29301,30791,31641,32125,32401,32559,32649,32700,32729,32746,32756,32761,32768},
+  {13825,21817,26437,29108,30652,31545,32061,32359,32532,32632,32690,32723,32742,32753,32759,32768},
+  {13568,21518,26176,28905,30504,31441,31990,32312,32501,32611,32676,32714,32736,32749,32757,32768},
+  {13314,21218,25911,28697,30351,31333,31916,32262,32468,32590,32662,32705,32731,32746,32755,32768},
+  {13054,20908,25633,28475,30185,31214,31833,32205,32429,32564,32645,32694,32723,32741,32752,32768},
+  {12803,20603,25356,28252,30017,31093,31748,32147,32390,32538,32628,32683,32717,32737,32749,32768},
+  {12544,20286,25064,28013,29833,30956,31649,32077,32341,32504,32605,32667,32705,32729,32744,32768},
+  {12288,19968,24768,27768,29643,30815,31547,32005,32291,32470,32582,32652,32696,32723,32740,32768},
+  {12033,19647,24465,27514,29443,30664,31437,31926,32235,32431,32555,32633,32683,32714,32734,32768},
+  {11777,19321,24154,27250,29233,30504,31318,31839,32173,32387,32524,32612,32668,32704,32727,32768},
+  {11521,18991,23835,26976,29013,30334,31190,31745,32105,32338,32489,32587,32651,32692,32719,32768},
+  {11265,18657,23508,26691,28780,30151,31051,31641,32028,32282,32449,32559,32631,32678,32709,32768},
+  {11006,18316,23170,26394,28535,29957,30901,31528,31944,32220,32404,32526,32607,32661,32697,32768},
+  {10752,17976,22830,26091,28282,29754,30743,31408,31854,32154,32356,32491,32582,32643,32684,32768},
+  {10496,17630,22479,25775,28015,29538,30573,31276,31754,32079,32300,32450,32552,32621,32668,32768},
+  {10240,17280,22120,25448,27736,29309,30390,31133,31644,31995,32237,32403,32517,32595,32649,32768},
+  { 9984,16926,21753,25109,27443,29066,30194,30978,31523,31902,32166,32349,32476,32565,32627,32768},
+  { 9728,16568,21377,24759,27137,28809,29984,30811,31392,31801,32088,32290,32432,32532,32602,32768},
+  { 9474,16208,20995,24399,26819,28539,29762,30631,31249,31688,32000,32222,32380,32492,32572,32768},
+  { 9216,15840,20601,24023,26483,28251,29522,30435,31091,31563,31902,32146,32321,32447,32537,32768},
+  { 8959,15469,20199,23636,26133,27947,29265,30223,30919,31425,31792,32059,32253,32394,32496,32768},
+  { 8705,15097,19791,23238,25770,27629,28994,29997,30733,31274,31671,31963,32177,32334,32449,32768},
+  { 8449,14719,19373,22827,25390,27292,28704,29752,30530,31107,31535,31853,32089,32264,32394,32768},
+  { 8192,14336,18944,22400,24992,26936,28394,29488,30308,30923,31384,31730,31989,32184,32330,32768},
+  { 7936,13950,18507,21961,24578,26561,28064,29203,30066,30720,31216,31592,31877,32093,32256,32768},
+  { 7678,13558,18060,21507,24146,26166,27713,28897,29804,30498,31030,31437,31749,31988,32171,32768},
+  { 7423,13165,17606,21041,23698,25753,27342,28571,29522,30257,30826,31266,31606,31869,32073,32768},
+  { 7168,12768,17143,20561,23231,25317,26947,28220,29215,29992,30599,31073,31444,31734,31960,32768},
+  { 6911,12365,16669,20065,22744,24858,26526,27842,28881,29701,30348,30858,31261,31579,31830,32768},
+  { 6657,11961,16188,19556,22240,24379,26083,27441,28523,29385,30072,30620,31056,31404,31681,32768},
+  { 6400,11550,15694,19029,21712,23871,25609,27007,28132,29037,29766,30352,30824,31204,31509,32768},
+  { 6142,11134,15190,18486,21164,23340,25108,26544,27711,28659,29429,30055,30564,30977,31313,32768},
+  { 5890,10720,14682,17932,20598,22785,24579,26051,27258,28248,29060,29726,30273,30721,31089,32768},
+  { 5631,10295,14157,17356,20005,22199,24016,25520,26766,27798,28652,29359,29945,30430,30832,32768},
+  { 5377, 9871,13628,16768,19393,21587,23421,24954,26236,27308,28204,28953,29579,30102,30539,32768},
+  { 5121, 9441,13086,16161,18756,20945,22792,24351,25666,26776,27712,28502,29169,29731,30206,32768},
+  { 4865, 9007,12534,15538,18096,20274,22129,23708,25053,26198,27173,28004,28711,29313,29826,32768},
+  { 4608, 8568,11971,14896,17409,19569,21425,23020,24391,25569,26581,27451,28199,28842,29394,32768},
+  { 4351, 8125,11398,14236,16697,18831,20682,22287,23679,24886,25933,26841,27628,28311,28903,32768},
+  { 4096, 7680,10816,13560,15961,18062,19900,21508,22915,24146,25224,26167,26992,27714,28346,32768},
+  { 3840, 7230,10223,12865,15197,17256,19074,20679,22096,23347,24451,25426,26287,27047,27718,32768},
+  { 3584, 6776, 9619,12151,14406,16414,18203,19796,21215,22479,23604,24606,25499,26294,27002,32768},
+  { 3328, 6318, 9004,11417,13585,15533,17283,18856,20269,21538,22678,23703,24624,25451,26194,32768},
+  { 3072, 5856, 8379,10665,12737,14615,16317,17859,19257,20524,21672,22712,23655,24509,25283,32768},
+  { 2816, 5390, 7743, 9894,11860,13657,15299,16800,18172,19426,20573,21621,22579,23455,24255,32768},
+  { 2560, 4920, 7096, 9102,10951,12656,14227,15676,17011,18242,19377,20423,21388,22277,23097,32768},
+  { 2304, 4446, 6437, 8288,10009,11609,13097,14480,15766,16961,18072,19105,20066,20959,21789,32768},
+  { 2048, 3968, 5768, 7456, 9038,10521,11911,13215,14437,15583,16657,17664,18608,19493,20323,32768},
+  { 1792, 3486, 5087, 6601, 8032, 9385,10664,11873,13016,14096,15117,16082,16995,17858,18673,32768},
+  { 1536, 3000, 4395, 5725, 6993, 8201, 9353,10451,11497,12494,13444,14350,15213,16036,16820,32768},
+  { 1280, 2510, 3692, 4828, 5919, 6968, 7976, 8944, 9875,10769,11628,12454,13248,14011,14744,32768},
+  { 1024, 2016, 2977, 3908, 4810, 5684, 6530, 7350, 8144, 8913, 9658,10380,11080,11758,12415,32768},
+  {  768, 1518, 2250, 2965, 3663, 4345, 5011, 5662, 6297, 6917, 7523, 8115, 8693, 9257, 9808,32768},
+  {  512, 1016, 1512, 2000, 2481, 2954, 3420, 3879, 4330, 4774, 5211, 5642, 6066, 6483, 6894,32768},
+  {  256,  510,  762, 1012, 1260, 1506, 1750, 1992, 2232, 2471, 2708, 2943, 3176, 3407, 3636,32768},
+};
+
+
+const uint16_t LAPLACE_OFFSET[128] = {
+  0,
+  29871,
+  28672,
+  27751,
+  26975,
+  26291,
+  25673,
+  25105,
+  24576,
+  24079,
+  23609,
+  23162,
+  22734,
+  22325,
+  21931,
+  21550,
+  21182,
+  20826,
+  20480,
+  20143,
+  19815,
+  19495,
+  19183,
+  18877,
+  18579,
+  18286,
+  17999,
+  17718,
+  17442,
+  17170,
+  16904,
+  16642,
+  16384,
+  16129,
+  15879,
+  15633,
+  15390,
+  15150,
+  14913,
+  14680,
+  14450,
+  14222,
+  13997,
+  13775,
+  13556,
+  13338,
+  13124,
+  12911,
+  12701,
+  12493,
+  12288,
+  12084,
+  11882,
+  11682,
+  11484,
+  11288,
+  11094,
+  10901,
+  10710,
+  10521,
+  10333,
+  10147,
+  9962,
+  9779,
+  9597,
+  9417,
+  9238,
+  9060,
+  8884,
+  8709,
+  8535,
+  8363,
+  8192,
+  8021,
+  7853,
+  7685,
+  7518,
+  7352,
+  7188,
+  7025,
+  6862,
+  6701,
+  6540,
+  6381,
+  6222,
+  6065,
+  5908,
+  5753,
+  5598,
+  5444,
+  5291,
+  5138,
+  4987,
+  4837,
+  4687,
+  4538,
+  4390,
+  4242,
+  4096,
+  3950,
+  3804,
+  3660,
+  3516,
+  3373,
+  3231,
+  3089,
+  2948,
+  2808,
+  2668,
+  2529,
+  2391,
+  2253,
+  2116,
+  1979,
+  1843,
+  1708,
+  1573,
+  1439,
+  1306,
+  1172,
+  1040,
+  908,
+  777,
+  646,
+  516,
+  386,
+  257,
+  128,
+};

diff --git a/av1/common/mv.h b/av1/common/mv.h
index d49fc3f..e5400d9 100644
--- a/av1/common/mv.h
+++ b/av1/common/mv.h

@@ -36,7 +36,7 @@
 
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 // Bits of precision used for the model
-#define WARPEDMODEL_PREC_BITS 8
+#define WARPEDMODEL_PREC_BITS 12
 #define WARPEDMODEL_ROW3HOMO_PREC_BITS 12
 
 // Bits of subpel precision for warped interpolation
@@ -65,7 +65,7 @@
 
 typedef struct {
   TransformationType wmtype;
-  int_mv wmmat[4];  // For homography wmmat[9] is assumed to be 1
+  int32_t wmmat[8];  // For homography wmmat[9] is assumed to be 1
 } WarpedMotionParams;
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 
@@ -94,16 +94,16 @@
 //
 // XX_MIN, XX_MAX are also computed to avoid repeated computation
 
-#define GM_TRANS_PREC_BITS 8
+#define GM_TRANS_PREC_BITS 3
 #define GM_TRANS_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_TRANS_PREC_BITS)
 #define GM_TRANS_DECODE_FACTOR (1 << GM_TRANS_PREC_DIFF)
 
-#define GM_ALPHA_PREC_BITS 8
+#define GM_ALPHA_PREC_BITS 12
 #define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS)
 #define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF)
 
-#define GM_ABS_ALPHA_BITS 8
-#define GM_ABS_TRANS_BITS 8
+#define GM_ABS_ALPHA_BITS 9
+#define GM_ABS_TRANS_BITS 9
 
 #define GM_TRANS_MAX (1 << GM_ABS_TRANS_BITS)
 #define GM_ALPHA_MAX (1 << GM_ABS_ALPHA_BITS)
@@ -123,6 +123,17 @@
   WarpedMotionParams motion_params;
 } Global_Motion_Params;
 
+// Convert a global motion translation vector (which may have more bits than a
+// regular motion vector) into a motion vector
+static INLINE int_mv gm_get_motion_vector(const Global_Motion_Params *gm) {
+  int_mv res;
+  res.as_mv.row = (int16_t)ROUND_POWER_OF_TWO_SIGNED(gm->motion_params.wmmat[0],
+                                                     WARPEDMODEL_PREC_BITS - 3);
+  res.as_mv.col = (int16_t)ROUND_POWER_OF_TWO_SIGNED(gm->motion_params.wmmat[1],
+                                                     WARPEDMODEL_PREC_BITS - 3);
+  return res;
+}
+
 static INLINE TransformationType gm_to_trans_type(GLOBAL_MOTION_TYPE gmtype) {
   switch (gmtype) {
     case GLOBAL_ZERO: return UNKNOWN_TRANSFORM; break;
@@ -135,10 +146,11 @@
 }
 
 static INLINE GLOBAL_MOTION_TYPE get_gmtype(const Global_Motion_Params *gm) {
-  if (!gm->motion_params.wmmat[2].as_int) {
-    if (!gm->motion_params.wmmat[1].as_int) {
-      return (gm->motion_params.wmmat[0].as_int ? GLOBAL_TRANSLATION
-                                                : GLOBAL_ZERO);
+  if (!gm->motion_params.wmmat[5] && !gm->motion_params.wmmat[4]) {
+    if (!gm->motion_params.wmmat[3] && !gm->motion_params.wmmat[2]) {
+      return ((!gm->motion_params.wmmat[1] && !gm->motion_params.wmmat[0])
+                  ? GLOBAL_ZERO
+                  : GLOBAL_TRANSLATION);
     } else {
       return GLOBAL_ROTZOOM;
     }

diff --git a/av1/common/odintrin.c b/av1/common/odintrin.c
index bb36104..868efac 100644
--- a/av1/common/odintrin.c
+++ b/av1/common/odintrin.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
@@ -8,8 +8,21 @@
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
+
+/* clang-format off */
+
 #include "av1/common/odintrin.h"
 
+#if defined(OD_ENABLE_ASSERTIONS)
+# include <stdio.h>
+
+void od_fatal_impl(const char *_str, const char *_file, int _line) {
+  fprintf(stderr, "Fatal (internal) error in %s, line %d: %s\n",
+   _file, _line, _str);
+  abort();
+}
+#endif
+
 /*Constants for use with OD_DIVU_SMALL().
   See \cite{Rob05} for details on computing these constants.
   @INPROCEEDINGS{Rob05,

diff --git a/av1/common/odintrin.h b/av1/common/odintrin.h
index 417b714..96131f0 100644
--- a/av1/common/odintrin.h
+++ b/av1/common/odintrin.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
@@ -8,9 +8,16 @@
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
+
+/* clang-format off */
+
 #ifndef AV1_COMMON_ODINTRIN_H_
 #define AV1_COMMON_ODINTRIN_H_
 
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
 #include "aom/aom_integer.h"
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_ports/bitops.h"
@@ -20,14 +27,52 @@
 extern "C" {
 #endif
 
+# if !defined(M_LOG2E)
+#  define M_LOG2E (1.4426950408889634073599246810019)
+# endif
+
+# if !defined(M_LN2)
+#  define M_LN2 (0.69314718055994530941723212145818)
+# endif
+
 /*Smallest blocks are 4x4*/
 #define OD_LOG_BSIZE0 (2)
 /*There are 5 block sizes total (4x4, 8x8, 16x16, 32x32 and 64x64).*/
 #define OD_NBSIZES (5)
-/*The log of the maximum length of the side of a block.*/
-#define OD_LOG_BSIZE_MAX (OD_LOG_BSIZE0 + OD_NBSIZES - 1)
 /*The maximum length of the side of a block.*/
-#define OD_BSIZE_MAX (1 << OD_LOG_BSIZE_MAX)
+#define OD_BSIZE_MAX MAX_SB_SIZE
+
+/*There are 4 transform sizes total in AV1 (4x4, 8x8, 16x16 and 32x32).*/
+#define OD_TXSIZES TX_SIZES
+/*The log of the maximum length of the side of a transform.*/
+#define OD_LOG_TXSIZE_MAX (OD_LOG_BSIZE0 + OD_TXSIZES - 1)
+/*The maximum length of the side of a transform.*/
+#define OD_TXSIZE_MAX (1 << OD_LOG_TXSIZE_MAX)
+
+/**The maximum number of color planes allowed in a single frame.*/
+# define OD_NPLANES_MAX (3)
+
+# define OD_COEFF_SHIFT (4)
+
+# define OD_DISABLE_CFL (1)
+# define OD_DISABLE_FILTER (1)
+
+# define OD_ENABLE_ASSERTIONS (1)
+
+# define OD_LOG(a)
+# define OD_LOG_PARTIAL(a)
+
+/*Possible block sizes, note that OD_BLOCK_NXN = log2(N) - 2.*/
+#define OD_BLOCK_4X4 (0)
+#define OD_BLOCK_8X8 (1)
+#define OD_BLOCK_16X16 (2)
+#define OD_BLOCK_32X32 (3)
+#define OD_BLOCK_SIZES (OD_BLOCK_32X32 + 1)
+
+# define OD_LIMIT_BSIZE_MIN (OD_BLOCK_4X4)
+# define OD_LIMIT_BSIZE_MAX (OD_BLOCK_32X32)
+
+# define OD_ROBUST_STREAM (1)
 
 typedef int od_coeff;
 
@@ -58,7 +103,8 @@
   We define a special version of the macro to use when x can be zero.*/
 #define OD_ILOG(x) ((x) ? OD_ILOG_NZ(x) : 0)
 
-#define OD_LOG2 AOMLOG2
+#define OD_LOG2(x) (M_LOG2E*log(x))
+#define OD_EXP2(x) (exp(M_LN2*(x)))
 
 /*Enable special features for gcc and compatible compilers.*/
 #if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
@@ -121,10 +167,92 @@
 /** Copy n elements of memory from src to dst, allowing overlapping regions.
     The 0* term provides compile-time type checking */
 #if !defined(OVERRIDE_OD_MOVE)
-#define OD_MOVE(dst, src, n) \
-  (memmove((dst), (src), sizeof(*(dst)) * (n) + 0 * ((dst) - (src))))
+# define OD_MOVE(dst, src, n) \
+ (memmove((dst), (src), sizeof(*(dst))*(n) + 0*((dst) - (src)) ))
 #endif
 
+/** Linkage will break without this if using a C++ compiler, and will issue
+ * warnings without this for a C compiler*/
+#if defined(__cplusplus)
+# define OD_EXTERN extern
+#else
+# define OD_EXTERN
+#endif
+
+/** Set n elements of dst to zero */
+#if !defined(OVERRIDE_OD_CLEAR)
+# define OD_CLEAR(dst, n) (memset((dst), 0, sizeof(*(dst))*(n)))
+#endif
+
+/** Silence unused parameter/variable warnings */
+# define OD_UNUSED(expr) (void)(expr)
+
+#if defined(OD_FLOAT_PVQ)
+typedef double od_val16;
+typedef double od_val32;
+# define OD_QCONST32(x, bits) (x)
+# define OD_ROUND16(x) (x)
+# define OD_ROUND32(x) (x)
+# define OD_SHL(x, shift) (x)
+# define OD_SHR(x, shift) (x)
+# define OD_SHR_ROUND(x, shift) (x)
+# define OD_ABS(x) (fabs(x))
+# define OD_MULT16_16(a, b) ((a)*(b))
+# define OD_MULT16_32_Q16(a, b) ((a)*(b))
+#else
+typedef int16_t od_val16;
+typedef int32_t od_val32;
+/** Compile-time conversion of float constant to 32-bit value */
+# define OD_QCONST32(x, bits) ((od_val32)(.5 + (x)*(((od_val32)1) << (bits))))
+# define OD_ROUND16(x) (int16_t)(floor(.5 + (x)))
+# define OD_ROUND32(x) (int32_t)(floor(.5 + (x)))
+/*Shift x left by shift*/
+# define OD_SHL(a, shift) ((int32_t)((uint32_t)(a) << (shift)))
+/*Shift x right by shift (without rounding)*/
+# define OD_SHR(x, shift) \
+  ((int32_t)((x) >> (shift)))
+/*Shift x right by shift (with rounding)*/
+# define OD_SHR_ROUND(x, shift) \
+  ((int32_t)(((x) + (1 << (shift) >> 1)) >> (shift)))
+/*Shift x right by shift (without rounding) or left by -shift if shift
+  is negative.*/
+# define OD_VSHR(x, shift) \
+  (((shift) > 0) ? OD_SHR(x, shift) : OD_SHL(x, -(shift)))
+/*Shift x right by shift (with rounding) or left by -shift if shift
+  is negative.*/
+# define OD_VSHR_ROUND(x, shift) \
+  (((shift) > 0) ? OD_SHR_ROUND(x, shift) : OD_SHL(x, -(shift)))
+# define OD_ABS(x) (abs(x))
+/* (od_val32)(od_val16) gives TI compiler a hint that it's 16x16->32 multiply */
+/** 16x16 multiplication where the result fits in 32 bits */
+# define OD_MULT16_16(a, b) \
+ (((od_val32)(od_val16)(a))*((od_val32)(od_val16)(b)))
+/* Multiplies 16-bit a by 32-bit b and keeps bits [16:47]. */
+# define OD_MULT16_32_Q16(a, b) ((int16_t)(a)*(int64_t)(int32_t)(b) >> 16)
+/*16x16 multiplication where the result fits in 16 bits, without rounding.*/
+# define OD_MULT16_16_Q15(a, b) \
+  (((int16_t)(a)*((int32_t)(int16_t)(b))) >> 15)
+/*16x16 multiplication where the result fits in 16 bits, without rounding.*/
+# define OD_MULT16_16_Q16(a, b) \
+  ((((int16_t)(a))*((int32_t)(int16_t)(b))) >> 16)
+#endif
+
+/*All of these macros should expect floats as arguments.*/
+/*These two should compile as a single SSE instruction.*/
+# define OD_MINF(a, b) ((a) < (b) ? (a) : (b))
+# define OD_MAXF(a, b) ((a) > (b) ? (a) : (b))
+
+# define OD_DIV_R0(x, y) (((x) + OD_FLIPSIGNI((((y) + 1) >> 1) - 1, (x)))/(y))
+
+# define OD_SIGNMASK(a) (-((a) < 0))
+# define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b))
+
+# define OD_MULT16_16_Q15(a, b) \
+  (((int16_t)(a)*((int32_t)(int16_t)(b))) >> 15)
+
+/* Multiplies 16-bit a by 32-bit b and keeps bits [16:47]. */
+# define OD_MULT16_32_Q16(a, b) ((int16_t)(a)*(int64_t)(int32_t)(b) >> 16)
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif

diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index 9bf672d..20270cb 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h

@@ -28,6 +28,10 @@
 #include "av1/common/restoration.h"
 #endif  // CONFIG_LOOP_RESTORATION
 #include "av1/common/tile_common.h"
+#include "av1/common/odintrin.h"
+#if CONFIG_PVQ
+#include "av1/common/pvq.h"
+#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -475,10 +479,16 @@
 }
 
 static INLINE void av1_init_macroblockd(AV1_COMMON *cm, MACROBLOCKD *xd,
+#if CONFIG_PVQ
+                                        tran_low_t *pvq_ref_coeff,
+#endif
                                         tran_low_t *dqcoeff) {
   int i;
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     xd->plane[i].dqcoeff = dqcoeff;
+#if CONFIG_PVQ
+    xd->plane[i].pvq_ref_coeff = pvq_ref_coeff;
+#endif
     xd->above_context[i] = cm->above_context[i];
     if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
       memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));

diff --git a/av1/common/partition.c b/av1/common/partition.c
new file mode 100644
index 0000000..6b9b6fa
--- /dev/null
+++ b/av1/common/partition.c

@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "enums.h"
+#include "odintrin.h"
+#include "partition.h"
+#include "zigzag.h"
+
+OD_EXTERN const index_pair *OD_ZIGZAG4[4] = {
+  OD_ZIGZAG4_DCT_DCT,
+  OD_ZIGZAG4_ADST_DCT,
+  OD_ZIGZAG4_DCT_ADST,
+  OD_ZIGZAG4_ADST_ADST
+};
+
+OD_EXTERN const index_pair *OD_ZIGZAG8[4] = {
+  OD_ZIGZAG8_DCT_DCT,
+  OD_ZIGZAG8_ADST_DCT,
+  OD_ZIGZAG8_DCT_ADST,
+  OD_ZIGZAG8_ADST_ADST
+};
+
+OD_EXTERN const index_pair *OD_ZIGZAG16[4] = {
+  OD_ZIGZAG16_DCT_DCT,
+  OD_ZIGZAG16_ADST_DCT,
+  OD_ZIGZAG16_DCT_ADST,
+  OD_ZIGZAG16_ADST_ADST
+};
+
+OD_EXTERN const index_pair *OD_ZIGZAG32[4] = {
+  OD_ZIGZAG32_DCT_DCT,
+  OD_ZIGZAG32_DCT_DCT,
+  OD_ZIGZAG32_DCT_DCT,
+  OD_ZIGZAG32_DCT_DCT
+};
+
+/* The tables below specify how coefficient blocks are translated to
+   and from PVQ partition coding scan order for 4x4, 8x8 and 16x16 */
+
+static const int OD_LAYOUT32_OFFSETS[4] = { 0, 128, 256, 768 };
+const band_layout OD_LAYOUT32 = {
+  OD_ZIGZAG32,
+  32,
+  3,
+  OD_LAYOUT32_OFFSETS
+};
+
+static const int OD_LAYOUT16_OFFSETS[4] = { 0, 32, 64, 192 };
+const band_layout OD_LAYOUT16 = {
+  OD_ZIGZAG16,
+  16,
+  3,
+  OD_LAYOUT16_OFFSETS
+};
+
+const int OD_LAYOUT8_OFFSETS[4] = { 0, 8, 16, 48 };
+const band_layout OD_LAYOUT8 = {
+  OD_ZIGZAG8,
+  8,
+  3,
+  OD_LAYOUT8_OFFSETS
+};
+
+static const int OD_LAYOUT4_OFFSETS[2] = { 0, 15 };
+const band_layout OD_LAYOUT4 = {
+  OD_ZIGZAG4,
+  4,
+  1,
+  OD_LAYOUT4_OFFSETS
+};
+
+/* First element is the number of bands, followed by the list all the band
+  boundaries. */
+static const int OD_BAND_OFFSETS4[] = {1, 1, 16};
+static const int OD_BAND_OFFSETS8[] = {4, 1, 16, 24, 32, 64};
+static const int OD_BAND_OFFSETS16[] = {7, 1, 16, 24, 32, 64, 96, 128, 256};
+static const int OD_BAND_OFFSETS32[] = {10, 1, 16, 24, 32, 64, 96, 128, 256,
+ 384, 512, 1024};
+static const int OD_BAND_OFFSETS64[] = {13, 1, 16, 24, 32, 64, 96, 128, 256,
+ 384, 512, 1024, 1536, 2048, 4096};
+
+const int *const OD_BAND_OFFSETS[OD_TXSIZES + 1] = {
+  OD_BAND_OFFSETS4,
+  OD_BAND_OFFSETS8,
+  OD_BAND_OFFSETS16,
+  OD_BAND_OFFSETS32,
+  OD_BAND_OFFSETS64
+};
+
+/** Perform a single stage of conversion from a coefficient block in
+ * raster order into coding scan order
+ *
+ * @param [in]     layout  scan order specification
+ * @param [out]    dst     destination vector
+ * @param [in]     src     source coefficient block
+ * @param [int]    int     source vector row stride
+ */
+static void od_band_from_raster(const band_layout *layout, int16_t *dst,
+ const int16_t *src, int stride, TX_TYPE tx_type) {
+  int i;
+  int len;
+  len = layout->band_offsets[layout->nb_bands];
+  for (i = 0; i < len; i++) {
+    dst[i] = src[layout->dst_table[tx_type][i][1]*stride + layout->dst_table[tx_type][i][0]];
+  }
+}
+
+/** Perform a single stage of conversion from a vector in coding scan
+    order back into a coefficient block in raster order
+ *
+ * @param [in]     layout  scan order specification
+ * @param [out]    dst     destination coefficient block
+ * @param [in]     src     source vector
+ * @param [int]    stride  destination vector row stride
+ */
+static void od_raster_from_band(const band_layout *layout, int16_t *dst,
+ int stride, TX_TYPE tx_type, const int16_t *src) {
+  int i;
+  int len;
+  len = layout->band_offsets[layout->nb_bands];
+  for (i = 0; i < len; i++) {
+    dst[layout->dst_table[tx_type][i][1]*stride + layout->dst_table[tx_type][i][0]] = src[i];
+  }
+}
+
+static const band_layout *const OD_LAYOUTS[] = {&OD_LAYOUT4, &OD_LAYOUT8,
+ &OD_LAYOUT16, &OD_LAYOUT32};
+
+/** Converts a coefficient block in raster order into a vector in
+ * coding scan order with the PVQ partitions laid out one after
+ * another.  This works in stages; the 4x4 conversion is applied to
+ * the coefficients nearest DC, then the 8x8 applied to the 8x8 block
+ * nearest DC that was not already coded by 4x4, then 16x16 following
+ * the same pattern.
+ *
+ * @param [out]    dst        destination vector
+ * @param [in]     n          block size (along one side)
+ * @param [in]     ty_type    transfrom type
+ * @param [in]     src        source coefficient block
+ * @param [in]     stride     source vector row stride
+ */
+void od_raster_to_coding_order(int16_t *dst, int n, TX_TYPE ty_type,
+ const int16_t *src, int stride) {
+  int bs;
+  /* dst + 1 because DC is not included for 4x4 blocks. */
+  od_band_from_raster(OD_LAYOUTS[0], dst + 1, src, stride, ty_type);
+  for (bs = 1; bs < OD_TXSIZES; bs++) {
+    int size;
+    int offset;
+    /* Length of block size > 4. */
+    size = 1 << (OD_LOG_BSIZE0 + bs);
+    /* Offset is the size of the previous block squared. */
+    offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs);
+    if (n >= size) {
+      /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */
+      od_band_from_raster(OD_LAYOUTS[bs], dst + offset, src, stride, ty_type);
+    }
+  }
+  dst[0] = src[0];
+}
+
+/** Converts a vector in coding scan order witht he PVQ partitions
+ * laid out one after another into a coefficient block in raster
+ * order. This works in stages in the reverse order of raster->scan
+ * order; the 16x16 conversion is applied to the coefficients that
+ * don't appear in an 8x8 block, then the 8x8 applied to the 8x8 block
+ * sans the 4x4 block it contains, then 4x4 is converted sans DC.
+ *
+ * @param [out]    dst        destination coefficient block
+ * @param [in]     stride     destination vector row stride
+ * @param [in]     src        source vector
+ * @param [in]     n          block size (along one side)
+ */
+void od_coding_order_to_raster(int16_t *dst, int stride, TX_TYPE ty_type,
+ const int16_t *src, int n) {
+  int bs;
+  /* src + 1 because DC is not included for 4x4 blocks. */
+  od_raster_from_band(OD_LAYOUTS[0], dst, stride, ty_type, src + 1);
+  for (bs = 1; bs < OD_TXSIZES; bs++) {
+    int size;
+    int offset;
+    /* Length of block size > 4 */
+    size = 1 << (OD_LOG_BSIZE0 + bs);
+    /* Offset is the size of the previous block squared. */
+    offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs);
+    if (n >= size) {
+      /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */
+      od_raster_from_band(OD_LAYOUTS[bs], dst, stride, ty_type, src + offset);
+    }
+  }
+  dst[0] = src[0];
+}
+
+/** Perform a single stage of conversion from a coefficient block in
+ * raster order into coding scan order
+ *
+ * @param [in]     layout  scan order specification
+ * @param [out]    dst     destination vector
+ * @param [in]     src     source coefficient block
+ * @param [int]    int     source vector row stride
+ */
+static void od_band_from_raster_16(const band_layout *layout, int16_t *dst,
+ const int16_t *src, int stride) {
+  int i;
+  int len;
+  len = layout->band_offsets[layout->nb_bands];
+  for (i = 0; i < len; i++) {
+    dst[i] = src[layout->dst_table[DCT_DCT][i][1]*stride + layout->dst_table[DCT_DCT][i][0]];
+  }
+}
+
+/** Converts a coefficient block in raster order into a vector in
+ * coding scan order with the PVQ partitions laid out one after
+ * another.  This works in stages; the 4x4 conversion is applied to
+ * the coefficients nearest DC, then the 8x8 applied to the 8x8 block
+ * nearest DC that was not already coded by 4x4, then 16x16 following
+ * the same pattern.
+ *
+ * @param [out]    dst        destination vector
+ * @param [in]     n          block size (along one side)
+ * @param [in]     src        source coefficient block
+ * @param [in]     stride     source vector row stride
+ */
+void od_raster_to_coding_order_16(int16_t *dst, int n, const int16_t *src,
+ int stride) {
+  int bs;
+  /* dst + 1 because DC is not included for 4x4 blocks. */
+  od_band_from_raster_16(OD_LAYOUTS[0], dst + 1, src, stride);
+  for (bs = 1; bs < OD_TXSIZES; bs++) {
+    int size;
+    int offset;
+    /* Length of block size > 4. */
+    size = 1 << (OD_LOG_BSIZE0 + bs);
+    /* Offset is the size of the previous block squared. */
+    offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs);
+    if (n >= size) {
+      /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */
+      od_band_from_raster_16(OD_LAYOUTS[bs], dst + offset, src, stride);
+    }
+  }
+  dst[0] = src[0];
+}

diff --git a/av1/common/partition.h b/av1/common/partition.h
new file mode 100644
index 0000000..5ee7f15
--- /dev/null
+++ b/av1/common/partition.h

@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_partition_H)
+# define _partition_H
+
+#include "av1/common/enums.h"
+#include "odintrin.h"
+
+typedef unsigned char index_pair[2];
+
+typedef struct {
+  const index_pair **const dst_table;
+  int size;
+  int nb_bands;
+  const int *const band_offsets;
+} band_layout;
+
+extern const int *const OD_BAND_OFFSETS[OD_TXSIZES + 1];
+
+void od_raster_to_coding_order(int16_t *dst, int n,  TX_TYPE ty_type,
+ const int16_t *src, int stride);
+
+void od_coding_order_to_raster(int16_t *dst, int stride,  TX_TYPE ty_type,
+ const int16_t *src, int n);
+
+void od_raster_to_coding_order_16(int16_t *dst, int n, const int16_t *src,
+ int stride);
+
+#endif

diff --git a/av1/common/pvq.c b/av1/common/pvq.c
new file mode 100644
index 0000000..81d0839
--- /dev/null
+++ b/av1/common/pvq.c

@@ -0,0 +1,954 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "odintrin.h"
+#include "partition.h"
+#include "pvq.h"
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Quantization matrices for 8x8. For other block sizes, we currently just do
+   resampling. */
+/* Flat quantization, i.e. optimize for PSNR. */
+const int OD_QM8_Q4_FLAT[] = {
+  16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16
+};
+# if 0
+/* M1: MPEG2 matrix for inter (which has a dead zone). */
+const int OD_QM8_Q4[] = {
+  16, 17, 18, 19, 20, 21, 22, 23,
+  17, 18, 19, 20, 21, 22, 23, 24,
+  18, 19, 20, 21, 22, 23, 24, 25,
+  19, 20, 21, 22, 23, 24, 26, 27,
+  20, 21, 22, 23, 25, 26, 27, 28,
+  21, 22, 23, 24, 26, 27, 28, 30,
+  22, 23, 24, 26, 27, 28, 30, 31,
+  23, 24, 25, 27, 28, 30, 31, 33};
+# endif
+# if 0
+/* M2: MPEG2 matrix for intra (no dead zone). */
+const int OD_QM8_Q4[] = {
+  16, 16, 19, 22, 22, 26, 26, 27,
+  16, 16, 22, 22, 26, 27, 27, 29,
+  19, 22, 26, 26, 27, 29, 29, 35,
+  22, 24, 27, 27, 29, 32, 34, 38,
+  26, 27, 29, 29, 32, 35, 38, 46,
+  27, 29, 34, 34, 35, 40, 46, 56,
+  29, 34, 34, 37, 40, 48, 56, 69,
+  34, 37, 38, 40, 48, 58, 69, 83
+};
+# endif
+# if 0
+/* M3: Taken from dump_psnrhvs. */
+const int OD_QM8_Q4[] = {
+  16, 16, 17, 20, 24, 29, 36, 42,
+  16, 17, 17, 19, 22, 26, 31, 37,
+  17, 17, 21, 23, 26, 30, 34, 40,
+  20, 19, 23, 28, 31, 35, 39, 45,
+  24, 22, 26, 31, 36, 41, 46, 51,
+  29, 26, 30, 35, 41, 47, 52, 58,
+  36, 31, 34, 39, 46, 52, 59, 66,
+  42, 37, 40, 45, 51, 58, 66, 73
+};
+# endif
+# if 1
+/* M4: a compromise equal to .5*(M3 + .5*(M2+transpose(M2))) */
+const int OD_QM8_Q4_HVS[] = {
+  16, 16, 18, 21, 24, 28, 32, 36,
+  16, 17, 20, 21, 24, 27, 31, 35,
+  18, 20, 24, 25, 27, 31, 33, 38,
+  21, 21, 25, 28, 30, 34, 37, 42,
+  24, 24, 27, 30, 34, 38, 43, 49,
+  28, 27, 31, 34, 38, 44, 50, 58,
+  32, 31, 33, 37, 43, 50, 58, 68,
+  36, 35, 38, 42, 49, 58, 68, 78
+};
+#endif
+
+/* Constants for the beta parameter, which controls how activity masking is
+   used.
+   beta = 1 / (1 - alpha), so when beta is 1, alpha is 0 and activity
+   masking is disabled. When beta is 1.5, activity masking is used. Note that
+   activity masking is neither used for 4x4 blocks nor for chroma. */
+#define OD_BETA(b) OD_QCONST32(b, OD_BETA_SHIFT)
+static const od_val16 OD_PVQ_BETA4_LUMA[1] = {OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA8_LUMA[4] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA16_LUMA[7] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA32_LUMA[10] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA64_LUMA[13] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
+
+static const od_val16 OD_PVQ_BETA4_LUMA_MASKING[1] = {OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA8_LUMA_MASKING[4] = {OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)};
+static const od_val16 OD_PVQ_BETA16_LUMA_MASKING[7] = {OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
+ OD_BETA(1.5)};
+static const od_val16 OD_PVQ_BETA32_LUMA_MASKING[10] = {OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)};
+static const od_val16 OD_PVQ_BETA64_LUMA_MASKING[13] = {OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5),
+ OD_BETA(1.5), OD_BETA(1.5)};
+
+static const od_val16 OD_PVQ_BETA4_CHROMA[1] = {OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA8_CHROMA[4] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA16_CHROMA[7] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA32_CHROMA[10] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.)};
+static const od_val16 OD_PVQ_BETA64_CHROMA[13] = {OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.),
+ OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)};
+
+const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_TXSIZES + 1] = {
+ {{OD_PVQ_BETA4_LUMA, OD_PVQ_BETA8_LUMA,
+   OD_PVQ_BETA16_LUMA, OD_PVQ_BETA32_LUMA},
+  {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
+   OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA},
+  {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
+   OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}},
+ {{OD_PVQ_BETA4_LUMA_MASKING, OD_PVQ_BETA8_LUMA_MASKING,
+   OD_PVQ_BETA16_LUMA_MASKING, OD_PVQ_BETA32_LUMA_MASKING},
+  {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
+   OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA},
+  {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA,
+   OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}}
+};
+
+void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe) {
+  od_pvq_codeword_ctx *ctx;
+  int i;
+  int pli;
+  int bs;
+  ctx = &state->pvq_codeword_ctx;
+  generic_model_init(&state->pvq_param_model[0]);
+  generic_model_init(&state->pvq_param_model[1]);
+  generic_model_init(&state->pvq_param_model[2]);
+  for (i = 0; i < 2*OD_TXSIZES; i++) {
+    ctx->pvq_adapt[4*i + OD_ADAPT_K_Q8] = 384;
+    ctx->pvq_adapt[4*i + OD_ADAPT_SUM_EX_Q8] = 256;
+    ctx->pvq_adapt[4*i + OD_ADAPT_COUNT_Q8] = 104;
+    ctx->pvq_adapt[4*i + OD_ADAPT_COUNT_EX_Q8] = 128;
+  }
+  ctx->pvq_k1_increment = 128;
+  OD_CDFS_INIT(ctx->pvq_k1_cdf, ctx->pvq_k1_increment);
+  for (pli = 0; pli < OD_NPLANES_MAX; pli++) {
+    for (bs = 0; bs < OD_TXSIZES; bs++)
+    for (i = 0; i < PVQ_MAX_PARTITIONS; i++) {
+      state->pvq_exg[pli][bs][i] = 2 << 16;
+    }
+  }
+  for (i = 0; i < OD_TXSIZES*PVQ_MAX_PARTITIONS; i++) {
+    state->pvq_ext[i] = is_keyframe ? 24576 : 2 << 16;
+  }
+  state->pvq_gaintheta_increment = 128;
+  OD_CDFS_INIT(state->pvq_gaintheta_cdf, state->pvq_gaintheta_increment >> 2);
+  state->pvq_skip_dir_increment = 128;
+  OD_CDFS_INIT(state->pvq_skip_dir_cdf, state->pvq_skip_dir_increment >> 2);
+  ctx->pvq_split_increment = 128;
+  OD_CDFS_INIT(ctx->pvq_split_cdf, ctx->pvq_split_increment >> 1);
+}
+
+/* QMs are arranged from smallest to largest blocksizes, first for
+   blocks with decimation=0, followed by blocks with decimation=1.*/
+int od_qm_offset(int bs, int xydec)
+{
+    return xydec*OD_QM_STRIDE + OD_QM_OFFSET(bs);
+}
+
+/* Initialize the quantization matrix. */
+// Note: When varying scan orders for hybrid transform is used by PVQ,
+// since AOM does not use magnitude compensation (i.e. simplay x16 for all coeffs),
+// we don't need seperate qm and qm_inv for each transform type.
+void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm) {
+  int i;
+  int j;
+  int16_t y[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
+  int16_t y_inv[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
+  int16_t *x1;
+  int16_t *x1_inv;
+  int off;
+  int bs;
+  int xydec;
+  for (bs = 0; bs < OD_TXSIZES; bs++) {
+    for (xydec = 0; xydec < 2; xydec++) {
+      off = od_qm_offset(bs, xydec);
+      x1 = x + off;
+      x1_inv = x_inv + off;
+      for (i = 0; i < 4 << bs; i++) {
+        for (j = 0; j < 4 << bs; j++) {
+          double mag;
+          mag = 1.0;
+          if (i == 0 && j == 0) {
+            mag = 1.0;
+          }
+          else {
+            mag /= 0.0625*qm[(i << 1 >> bs)*8 + (j << 1 >> bs)];
+            OD_ASSERT(mag > 0.0);
+          }
+          /*Convert to fit in 16 bits.*/
+          y[i*(4 << bs) + j] = (int16_t)OD_MINI(OD_QM_SCALE_MAX,
+           (int32_t)floor(.5 + mag*OD_QM_SCALE));
+          y_inv[i*(4 << bs) + j] = (int16_t)floor(.5
+           + OD_QM_SCALE*OD_QM_INV_SCALE/(double)y[i*(4 << bs) + j]);
+        }
+      }
+      od_raster_to_coding_order_16(x1, 4 << bs, y, 4 << bs);
+      od_raster_to_coding_order_16(x1_inv, 4 << bs, y_inv, 4 << bs);
+    }
+  }
+}
+
+/* Maps each possible size (n) in the split k-tokenizer to a different value.
+   Possible values of n are:
+   2, 3, 4, 7, 8, 14, 15, 16, 31, 32, 63, 64, 127, 128
+   Since we don't care about the order (even in the bit-stream) the simplest
+   ordering (implemented here) is:
+   14, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 */
+int od_pvq_size_ctx(int n) {
+  int logn;
+  int odd;
+  logn = OD_ILOG(n - 1);
+  odd = n & 1;
+  return 2*logn - 1 - odd - 7*(n == 14);
+}
+
+/* Maps a length n to a context for the (k=1, n<=16) coder, with a special
+   case when n is the original length (orig_length=1) of the vector (i.e. we
+   haven't split it yet). For orig_length=0, we use the same mapping as
+   od_pvq_size_ctx() up to n=16. When orig_length=1, we map lengths
+   7, 8, 14, 15 to contexts 8 to 11. */
+int od_pvq_k1_ctx(int n, int orig_length) {
+  if (orig_length) return 8 + 2*(n > 8) + (n & 1);
+  else return od_pvq_size_ctx(n);
+}
+
+/* Indexing for the packed quantization matrices. */
+int od_qm_get_index(int bs, int band) {
+  /* The -band/3 term is due to the fact that we force corresponding horizontal
+     and vertical bands to have the same quantization. */
+  OD_ASSERT(bs >= 0 && bs < OD_TXSIZES);
+  return bs*(bs + 1) + band - band/3;
+}
+
+#if !defined(OD_FLOAT_PVQ)
+/*See celt/mathops.c in Opus and tools/cos_search.c.*/
+static int16_t od_pvq_cos_pi_2(int16_t x)
+{
+  int16_t x2;
+  x2 = OD_MULT16_16_Q15(x, x);
+  return OD_MINI(32767, (1073758164 - x*x + x2*(-7654 + OD_MULT16_16_Q16(x2,
+   16573 + OD_MULT16_16_Q16(-2529, x2)))) >> 15);
+}
+#endif
+
+/*Approximates cos(x) for -pi < x < pi.
+  Input is in OD_THETA_SCALE.*/
+od_val16 od_pvq_cos(od_val32 x) {
+#if defined(OD_FLOAT_PVQ)
+  return cos(x);
+#else
+  /*Wrap x around by masking, since cos is periodic.*/
+  x = x & 0x0001ffff;
+  if (x > (1 << 16)) {
+    x = (1 << 17) - x;
+  }
+  if (x & 0x00007fff) {
+    if (x < (1 << 15)) {
+       return od_pvq_cos_pi_2((int16_t)x);
+    }
+    else {
+      return -od_pvq_cos_pi_2((int16_t)(65536 - x));
+    }
+  }
+  else {
+    if (x & 0x0000ffff) {
+      return 0;
+    }
+    else if (x & 0x0001ffff) {
+      return -32767;
+    }
+    else {
+      return 32767;
+    }
+  }
+#endif
+}
+
+/*Approximates sin(x) for 0 <= x < pi.
+  Input is in OD_THETA_SCALE.*/
+od_val16 od_pvq_sin(od_val32 x) {
+#if defined(OD_FLOAT_PVQ)
+  return sin(x);
+#else
+  return od_pvq_cos(32768 - x);
+#endif
+}
+
+#if !defined(OD_FLOAT_PVQ)
+/* Computes an upper-bound on the number of bits required to store the L2 norm
+   of a vector (excluding sign). */
+int od_vector_log_mag(const od_coeff *x, int n) {
+  int i;
+  int32_t sum;
+  sum = 0;
+  for (i = 0; i < n; i++) {
+    int16_t tmp;
+    tmp = x[i] >> 8;
+    sum += tmp*(int32_t)tmp;
+  }
+  /* We add one full bit (instead of rounding OD_ILOG() up) for safety because
+     the >> 8 above causes the sum to be slightly underestimated. */
+  return 8 + 1 + OD_ILOG(n + sum)/2;
+}
+#endif
+
+/** Computes Householder reflection that aligns the reference r to the
+ *  dimension in r with the greatest absolute value. The reflection
+ *  vector is returned in r.
+ *
+ * @param [in,out]  r      reference vector to be reflected, reflection
+ *                         also returned in r
+ * @param [in]      n      number of dimensions in r
+ * @param [in]      gr     gain of reference vector
+ * @param [out]     sign   sign of reflection
+ * @return                 dimension number to which reflection aligns
+ **/
+int od_compute_householder(od_val16 *r, int n, od_val32 gr, int *sign,
+ int shift) {
+  int m;
+  int i;
+  int s;
+  od_val16 maxr;
+  OD_UNUSED(shift);
+  /* Pick component with largest magnitude. Not strictly
+   * necessary, but it helps numerical stability */
+  m = 0;
+  maxr = 0;
+  for (i = 0; i < n; i++) {
+    if (OD_ABS(r[i]) > maxr) {
+      maxr = OD_ABS(r[i]);
+      m = i;
+    }
+  }
+  s = r[m] > 0 ? 1 : -1;
+  /* This turns r into a Householder reflection vector that would reflect
+   * the original r[] to e_m */
+  r[m] += OD_SHR_ROUND(gr*s, shift);
+  *sign = s;
+  return m;
+}
+
+#if !defined(OD_FLOAT_PVQ)
+#define OD_RCP_INSHIFT 15
+#define OD_RCP_OUTSHIFT 14
+static od_val16 od_rcp(od_val16 x)
+{
+  int i;
+  od_val16 n;
+  od_val16 r;
+  i = OD_ILOG(x) - 1;
+  /*n is Q15 with range [0,1).*/
+  n = OD_VSHR_ROUND(x, i - OD_RCP_INSHIFT) - (1 << OD_RCP_INSHIFT);
+  /*Start with a linear approximation:
+    r = 1.8823529411764706-0.9411764705882353*n.
+    The coefficients and the result are Q14 in the range [15420,30840].*/
+  r = 30840 + OD_MULT16_16_Q15(-15420, n);
+  /*Perform two Newton iterations:
+    r -= r*((r*n)-1.Q15)
+       = r*((r*n)+(r-1.Q15)).*/
+  r = r - OD_MULT16_16_Q15(r, (OD_MULT16_16_Q15(r, n) + r - 32768));
+  /*We subtract an extra 1 in the second iteration to avoid overflow; it also
+     neatly compensates for truncation error in the rest of the process.*/
+  r = r - (1 + OD_MULT16_16_Q15(r, OD_MULT16_16_Q15(r, n) + r - 32768));
+  /*r is now the Q15 solution to 2/(n+1), with a maximum relative error
+     of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
+     error of 1.24665/32768.*/
+  return OD_VSHR_ROUND(r, i - OD_RCP_OUTSHIFT);
+}
+#endif
+
+/** Applies Householder reflection from compute_householder(). The
+ * reflection is its own inverse.
+ *
+ * @param [out]     out    reflected vector
+ * @param [in]      x      vector to be reflected
+ * @param [in]      r      reflection
+ * @param [in]      n      number of dimensions in x,r
+ */
+void od_apply_householder(od_val16 *out, const od_val16 *x, const od_val16 *r,
+ int n) {
+  int i;
+  od_val32 proj;
+  od_val16 proj_1;
+  od_val32 l2r;
+#if !defined(OD_FLOAT_PVQ)
+  od_val16 proj_norm;
+  od_val16 l2r_norm;
+  od_val16 rcp;
+  int proj_shift;
+  int l2r_shift;
+  int outshift;
+#endif
+  /*FIXME: Can we get l2r and/or l2r_shift from an earlier computation?*/
+  l2r = 0;
+  for (i = 0; i < n; i++) {
+    l2r += OD_MULT16_16(r[i], r[i]);
+  }
+  /* Apply Householder reflection */
+  proj = 0;
+  for (i = 0; i < n; i++) {
+    proj += OD_MULT16_16(r[i], x[i]);
+  }
+#if defined(OD_FLOAT_PVQ)
+  proj_1 = proj*2./(1e-100 + l2r);
+  for (i = 0; i < n; i++) {
+    out[i] = x[i] - r[i]*proj_1;
+  }
+#else
+  /*l2r_norm is [0.5, 1.0[ in Q15.*/
+  l2r_shift = (OD_ILOG(l2r) - 1) - 14;
+  l2r_norm = OD_VSHR_ROUND(l2r, l2r_shift);
+  rcp = od_rcp(l2r_norm);
+  proj_shift = (OD_ILOG(abs(proj)) - 1) - 14;
+  /*proj_norm is [0.5, 1.0[ in Q15.*/
+  proj_norm = OD_VSHR_ROUND(proj, proj_shift);
+  proj_1 = OD_MULT16_16_Q15(proj_norm, rcp);
+  /*The proj*2. in the float code becomes -1 in the final outshift.
+    The sign of l2r_shift is positive since we're taking the reciprocal of
+     l2r_norm and this is a right shift.*/
+  outshift = OD_MINI(30, OD_RCP_OUTSHIFT - proj_shift - 1 + l2r_shift);
+  if (outshift >= 0) {
+    for (i = 0; i < n; i++) {
+      int32_t tmp;
+      tmp = OD_MULT16_16(r[i], proj_1);
+      tmp = OD_SHR_ROUND(tmp, outshift);
+      out[i] = x[i] - tmp;
+    }
+  }
+  else {
+    /*FIXME: Can we make this case impossible?
+      Right now, if r[] is all zeros except for 1, 2, or 3 ones, and
+       if x[] is all zeros except for large values at the same position as the
+       ones in r[], then we can end up with a shift of -1.*/
+    for (i = 0; i < n; i++) {
+      int32_t tmp;
+      tmp = OD_MULT16_16(r[i], proj_1);
+      tmp = OD_SHL(tmp, -outshift);
+      out[i] = x[i] - tmp;
+    }
+  }
+#endif
+}
+
+#if !defined(OD_FLOAT_PVQ)
+#define OD_EXP2_INSHIFT 15
+#define OD_EXP2_FRACSHIFT 15
+#define OD_EXP2_OUTSHIFT 15
+static const int32_t OD_EXP2_C[5] = {32768, 22709, 7913, 1704, 443};
+/*Output is [1.0, 2.0) in Q(OD_EXP2_FRACSHIFT).
+  It does not include the integer offset, which is added in od_exp2 after the
+   final shift).*/
+static int32_t od_exp2_frac(int32_t x)
+{
+  return OD_MULT16_16_Q15(x, (OD_EXP2_C[1] + OD_MULT16_16_Q15(x,
+   (OD_EXP2_C[2] + OD_MULT16_16_Q15(x, (OD_EXP2_C[3]
+   + OD_MULT16_16_Q15(x, OD_EXP2_C[4])))))));
+}
+
+/** Base-2 exponential approximation (2^x) with Q15 input and output.*/
+static int32_t od_exp2(int32_t x)
+{
+  int integer;
+  int32_t frac;
+  integer = x >> OD_EXP2_INSHIFT;
+  if (integer > 14)
+    return 0x7f000000;
+  else if (integer < -15)
+    return 0;
+  frac = od_exp2_frac(x - OD_SHL(integer, OD_EXP2_INSHIFT));
+  return OD_VSHR_ROUND(OD_EXP2_C[0] + frac, -integer) + 1;
+}
+
+#define OD_LOG2_INSHIFT 15
+#define OD_LOG2_OUTSHIFT 15
+#define OD_LOG2_INSCALE_1 (1./(1 << OD_LOG2_INSHIFT))
+#define OD_LOG2_OUTSCALE (1 << OD_LOG2_OUTSHIFT)
+static int16_t od_log2(int16_t x)
+{
+  return x + OD_MULT16_16_Q15(x, (14482 + OD_MULT16_16_Q15(x, (-23234
+   + OD_MULT16_16_Q15(x, (13643 + OD_MULT16_16_Q15(x, (-6403
+   + OD_MULT16_16_Q15(x, 1515)))))))));
+}
+
+static int32_t od_pow(int32_t x, od_val16 beta)
+{
+  int16_t t;
+  int xshift;
+  int log2_x;
+  od_val32 logr;
+  /*FIXME: this conditional is to avoid doing log2(0).*/
+  if (x == 0)
+    return 0;
+  log2_x = (OD_ILOG(x) - 1);
+  xshift = log2_x - OD_LOG2_INSHIFT;
+  /*t should be in range [0.0, 1.0[ in Q(OD_LOG2_INSHIFT).*/
+  t = OD_VSHR(x, xshift) - (1 << OD_LOG2_INSHIFT);
+  /*log2(g/OD_COMPAND_SCALE) = log2(x) - OD_COMPAND_SHIFT in
+     Q(OD_LOG2_OUTSHIFT).*/
+  logr = od_log2(t) + (log2_x - OD_COMPAND_SHIFT)*OD_LOG2_OUTSCALE;
+  logr = OD_MULT16_32_QBETA(beta, logr);
+  return od_exp2(logr);
+}
+#endif
+
+/** Gain companding: raises gain to the power 1/beta for activity masking.
+ *
+ * @param [in]  g     real (uncompanded) gain
+ * @param [in]  q0    uncompanded quality parameter
+ * @param [in]  beta  activity masking beta param (exponent)
+ * @return            g^(1/beta)
+ */
+static od_val32 od_gain_compand(od_val32 g, int q0, od_val16 beta) {
+#if defined(OD_FLOAT_PVQ)
+  if (beta == 1) return OD_ROUND32(OD_CGAIN_SCALE*g/(double)q0);
+  else {
+    return OD_ROUND32(OD_CGAIN_SCALE*OD_COMPAND_SCALE*pow(g*OD_COMPAND_SCALE_1,
+     1./beta)/(double)q0);
+  }
+#else
+  if (beta == OD_BETA(1)) return (OD_CGAIN_SCALE*g + (q0 >> 1))/q0;
+  else {
+    int32_t expr;
+    /*FIXME: This is 1/beta in Q(BETA_SHIFT), should use od_rcp() instead.*/
+    expr = od_pow(g, OD_ROUND16((1 << (2*OD_BETA_SHIFT))/(double)beta));
+    expr <<= OD_CGAIN_SHIFT + OD_COMPAND_SHIFT - OD_EXP2_OUTSHIFT;
+    return (expr + (q0 >> 1))/q0;
+  }
+#endif
+}
+
+#if !defined(OD_FLOAT_PVQ)
+#define OD_SQRT_INSHIFT 16
+#define OD_SQRT_OUTSHIFT 15
+static int16_t od_rsqrt_norm(int16_t x);
+
+static int16_t od_sqrt_norm(int32_t x)
+{
+  OD_ASSERT(x < 65536);
+  return OD_MINI(OD_SHR_ROUND(x*od_rsqrt_norm(x), OD_SQRT_OUTSHIFT), 32767);
+}
+
+static int16_t od_sqrt(int32_t x, int *sqrt_shift)
+{
+  int k;
+  int s;
+  int32_t t;
+  if (x == 0) {
+    *sqrt_shift = 0;
+     return 0;
+  }
+  OD_ASSERT(x < (1 << 30));
+  k = ((OD_ILOG(x) - 1) >> 1);
+  /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s).
+    Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/
+  s = 2*k - (OD_SQRT_INSHIFT - 2);
+  t = OD_VSHR(x, s);
+  /*We want to express od_sqrt() in terms of od_sqrt_norm(), which is
+     defined as (2^OUTSHIFT)*sqrt(t*(2^-INSHIFT)) with t=x*(2^-s).
+    This simplifies to 2^(OUTSHIFT-(INSHIFT/2)-(s/2))*sqrt(x), so the caller
+     needs to shift right by OUTSHIFT - INSHIFT/2 - s/2.*/
+  *sqrt_shift = OD_SQRT_OUTSHIFT - ((s + OD_SQRT_INSHIFT) >> 1);
+  return od_sqrt_norm(t);
+}
+#endif
+
+/** Gain expanding: raises gain to the power beta for activity masking.
+ *
+ * @param [in]  cg    companded gain
+ * @param [in]  q0    uncompanded quality parameter
+ * @param [in]  beta  activity masking beta param (exponent)
+ * @return            g^beta
+ */
+od_val32 od_gain_expand(od_val32 cg0, int q0, od_val16 beta) {
+  if (beta == OD_BETA(1)) {
+    /*The multiply fits into 28 bits because the expanded gain has a range from
+       0 to 2^20.*/
+    return OD_SHR_ROUND(cg0*q0, OD_CGAIN_SHIFT);
+  }
+  else if (beta == OD_BETA(1.5)) {
+#if defined(OD_FLOAT_PVQ)
+    double cg;
+    cg = cg0*OD_CGAIN_SCALE_1;
+    cg *= q0*OD_COMPAND_SCALE_1;
+    return OD_ROUND32(OD_COMPAND_SCALE*cg*sqrt(cg));
+#else
+    int32_t irt;
+    int64_t tmp;
+    int sqrt_inshift;
+    int sqrt_outshift;
+    /*cg0 is in Q(OD_CGAIN_SHIFT) and we need to divide it by
+       2^OD_COMPAND_SHIFT.*/
+    irt = od_sqrt(cg0*q0, &sqrt_outshift);
+    sqrt_inshift = (OD_CGAIN_SHIFT + OD_COMPAND_SHIFT) >> 1;
+    /*tmp is in Q(OD_CGAIN_SHIFT + OD_COMPAND_SHIFT).*/
+    tmp = cg0*q0*(int64_t)irt;
+    /*Expanded gain must be in Q(OD_COMPAND_SHIFT), thus OD_COMPAND_SHIFT is
+       not included here.*/
+    return OD_VSHR_ROUND(tmp, OD_CGAIN_SHIFT + sqrt_outshift + sqrt_inshift);
+#endif
+  }
+  else {
+#if defined(OD_FLOAT_PVQ)
+    /*Expanded gain must be in Q(OD_COMPAND_SHIFT), hence the multiply by
+       OD_COMPAND_SCALE.*/
+    double cg;
+    cg = cg0*OD_CGAIN_SCALE_1;
+    return OD_ROUND32(OD_COMPAND_SCALE*pow(cg*q0*OD_COMPAND_SCALE_1, beta));
+#else
+    int32_t expr;
+    int32_t cg;
+    cg = OD_SHR_ROUND(cg0*q0, OD_CGAIN_SHIFT);
+    expr = od_pow(cg, beta);
+    /*Expanded gain must be in Q(OD_COMPAND_SHIFT), hence the subtraction by
+       OD_COMPAND_SHIFT.*/
+    return OD_SHR_ROUND(expr, OD_EXP2_OUTSHIFT - OD_COMPAND_SHIFT);
+#endif
+  }
+}
+
+/** Computes the raw and quantized/companded gain of a given input
+ * vector
+ *
+ * @param [in]      x      vector of input data
+ * @param [in]      n      number of elements in vector x
+ * @param [in]      q0     quantizer
+ * @param [out]     g      raw gain
+ * @param [in]      beta   activity masking beta param
+ * @param [in]      bshift shift to be applied to raw gain
+ * @return                 quantized/companded gain
+ */
+od_val32 od_pvq_compute_gain(const od_val16 *x, int n, int q0, od_val32 *g,
+ od_val16 beta, int bshift) {
+  int i;
+  od_val32 acc;
+#if !defined(OD_FLOAT_PVQ)
+  od_val32 irt;
+  int sqrt_shift;
+#else
+  OD_UNUSED(bshift);
+#endif
+  acc = 0;
+  for (i = 0; i < n; i++) {
+    acc += x[i]*(od_val32)x[i];
+  }
+#if defined(OD_FLOAT_PVQ)
+  *g = sqrt(acc);
+#else
+  irt = od_sqrt(acc, &sqrt_shift);
+  *g = OD_VSHR_ROUND(irt, sqrt_shift - bshift);
+#endif
+  /* Normalize gain by quantization step size and apply companding
+     (if ACTIVITY != 1). */
+  return od_gain_compand(*g, q0, beta);
+}
+
+static od_val16 od_beta_rcp(od_val16 beta){
+  if (beta == OD_BETA(1.))
+    return OD_BETA(1.);
+  else if (beta == OD_BETA(1.5))
+    return OD_BETA(1./1.5);
+  else {
+    od_val16 rcp_beta;
+    /*Shift by 1 less, transposing beta to range [.5, .75] and thus < 32768.*/
+    rcp_beta = od_rcp(beta << (OD_RCP_INSHIFT - 1 - OD_BETA_SHIFT));
+    return OD_SHR_ROUND(rcp_beta, OD_RCP_OUTSHIFT + 1 - OD_BETA_SHIFT);
+  }
+}
+
+/** Compute theta quantization range from quantized/companded gain
+ *
+ * @param [in]      qcg    quantized companded gain value
+ * @param [in]      beta   activity masking beta param
+ * @return                 max theta value
+ */
+int od_pvq_compute_max_theta(od_val32 qcg, od_val16 beta){
+  /* Set angular resolution (in ra) to match the encoded gain */
+#if defined(OD_FLOAT_PVQ)
+  int ts = (int)floor(.5 + qcg*OD_CGAIN_SCALE_1*M_PI/(2*beta));
+#else
+  int ts = OD_SHR_ROUND(qcg*OD_MULT16_16_QBETA(OD_QCONST32(M_PI/2,
+   OD_CGAIN_SHIFT), od_beta_rcp(beta)), OD_CGAIN_SHIFT*2);
+#endif
+  /* Special case for low gains -- will need to be tuned anyway */
+  if (qcg < OD_QCONST32(1.4, OD_CGAIN_SHIFT)) ts = 1;
+  return ts;
+}
+
+/** Decode quantized theta value from coded value
+ *
+ * @param [in]      t          quantized companded gain value
+ * @param [in]      max_theta  maximum theta value
+ * @return                     decoded theta value
+ */
+od_val32 od_pvq_compute_theta(int t, int max_theta) {
+  if (max_theta != 0) {
+#if defined(OD_FLOAT_PVQ)
+    return OD_MINI(t, max_theta - 1)*.5*M_PI/max_theta;
+#else
+    return (OD_MAX_THETA_SCALE*OD_MINI(t, max_theta - 1)
+     + (max_theta >> 1))/max_theta;
+#endif
+  }
+  else return 0;
+}
+
+#define OD_ITHETA_SHIFT 15
+/** Compute the number of pulses used for PVQ encoding a vector from
+ * available metrics (encode and decode side)
+ *
+ * @param [in]      qcg        quantized companded gain value
+ * @param [in]      itheta     quantized PVQ error angle theta
+ * @param [in]      theta      PVQ error angle theta
+ * @param [in]      noref      indicates present or lack of reference
+ *                             (prediction)
+ * @param [in]      n          number of elements to be coded
+ * @param [in]      beta       activity masking beta param
+ * @param [in]      nodesync   do not use info that depends on the reference
+ * @return                     number of pulses to use for coding
+ */
+int od_pvq_compute_k(od_val32 qcg, int itheta, od_val32 theta, int noref, int n,
+ od_val16 beta, int nodesync) {
+  if (noref) {
+    if (qcg == 0) return 0;
+    if (n == 15 && qcg == OD_CGAIN_SCALE && beta > OD_BETA(1.25)) {
+      return 1;
+    }
+    else {
+#if defined(OD_FLOAT_PVQ)
+      return OD_MAXI(1, (int)floor(.5 + (qcg*OD_CGAIN_SCALE_1 - .2)*
+       sqrt((n + 3)/2)/beta));
+#else
+      od_val32 rt;
+      int sqrt_shift;
+      rt = od_sqrt((n + 3) >> 1, &sqrt_shift);
+      /*FIXME: get rid of 64-bit mul.*/
+      return OD_MAXI(1, OD_SHR_ROUND((int64_t)((qcg
+       - (int64_t)OD_QCONST32(.2, OD_CGAIN_SHIFT))*rt/(beta*OD_BETA_SCALE_1)),
+       OD_CGAIN_SHIFT + sqrt_shift));
+#endif
+    }
+  }
+  else {
+    if (itheta == 0) return 0;
+    /* Sets K according to gain and theta, based on the high-rate
+       PVQ distortion curves (see PVQ document). Low-rate will have to be
+       perceptually tuned anyway. We subtract 0.2 from the radius as an
+       approximation for the fact that the coefficients aren't identically
+       distributed within a band so at low gain the number of dimensions that
+       are likely to have a pulse is less than n. */
+    if (nodesync) {
+#if defined(OD_FLOAT_PVQ)
+      return OD_MAXI(1, (int)floor(.5 + (itheta - .2)*sqrt((n + 2)/2)));
+#else
+      od_val32 rt;
+      int sqrt_outshift;
+      rt = od_sqrt((n + 2)/2, &sqrt_outshift);
+      /*FIXME: get rid of 64-bit mul.*/
+      return OD_MAXI(1, OD_VSHR_ROUND(((OD_SHL(itheta, OD_ITHETA_SHIFT)
+       - OD_QCONST32(.2, OD_ITHETA_SHIFT)))*(int64_t)rt,
+       sqrt_outshift + OD_ITHETA_SHIFT));
+#endif
+    }
+    else {
+      return OD_MAXI(1, (int)floor(.5 + (qcg*OD_CGAIN_SCALE_1*
+       od_pvq_sin(theta)*OD_TRIG_SCALE_1 - .2)*sqrt((n
+       + 2)/2)/(beta*OD_BETA_SCALE_1)));
+    }
+  }
+}
+
+#if !defined(OD_FLOAT_PVQ)
+#define OD_RSQRT_INSHIFT 16
+#define OD_RSQRT_OUTSHIFT 14
+/** Reciprocal sqrt approximation where the input is in the range [0.25,1) in
+     Q16 and the output is in the range (1.0, 2.0] in Q14).
+    Error is always within +/1 of round(1/sqrt(t))*/
+static int16_t od_rsqrt_norm(int16_t t)
+{
+  int16_t n;
+  int32_t r;
+  int32_t r2;
+  int32_t ry;
+  int32_t y;
+  int32_t ret;
+  /* Range of n is [-16384,32767] ([-0.5,1) in Q15).*/
+  n = t - 32768;
+  OD_ASSERT(n >= -16384);
+  /*Get a rough initial guess for the root.
+    The optimal minimax quadratic approximation (using relative error) is
+     r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
+    Coefficients here, and the final result r, are Q14.*/
+  r = (23565 + OD_MULT16_16_Q15(n, (-13481 + OD_MULT16_16_Q15(n, 6711))));
+  /*We want y = t*r*r-1 in Q15, but t is 32-bit Q16 and r is Q14.
+    We can compute the result from n and r using Q15 multiplies with some
+     adjustment, carefully done to avoid overflow.*/
+  r2 = r*r;
+  y = (((r2 >> 15)*n + r2) >> 12) - 131077;
+  ry = r*y;
+  /*Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
+    This yields the Q14 reciprocal square root of the Q16 t, with a maximum
+     relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a peak
+     absolute error of 2.26591/16384.*/
+  ret = r + ((((ry >> 16)*(3*y) >> 3) - ry) >> 18);
+  OD_ASSERT(ret >= 16384 && ret < 32768);
+  return (int16_t)ret;
+}
+
+static int16_t od_rsqrt(int32_t x, int *rsqrt_shift)
+{
+   int k;
+   int s;
+   int16_t t;
+   k = (OD_ILOG(x) - 1) >> 1;
+  /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s).
+    Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/
+   s = 2*k - (OD_RSQRT_INSHIFT - 2);
+   t = OD_VSHR(x, s);
+   /*We want to express od_rsqrt() in terms of od_rsqrt_norm(), which is
+      defined as (2^OUTSHIFT)/sqrt(t*(2^-INSHIFT)) with t=x*(2^-s).
+     This simplifies to 2^(OUTSHIFT+(INSHIFT/2)+(s/2))/sqrt(x), so the caller
+      needs to shift right by OUTSHIFT + INSHIFT/2 + s/2.*/
+   *rsqrt_shift = OD_RSQRT_OUTSHIFT + ((s + OD_RSQRT_INSHIFT) >> 1);
+   return od_rsqrt_norm(t);
+}
+#endif
+
+/** Synthesizes one parition of coefficient values from a PVQ-encoded
+ * vector.  This 'partial' version is called by the encode loop where
+ * the Householder reflection has already been computed and there's no
+ * need to recompute it.
+ *
+ * @param [out]     xcoeff  output coefficient partition (x in math doc)
+ * @param [in]      ypulse  PVQ-encoded values (y in the math doc); in
+ *                          the noref case, this vector has n entries,
+ *                          in the reference case it contains n-1 entries
+ *                          (the m-th entry is not included)
+ * @param [in]      r       reference vector (prediction)
+ * @param [in]      n       number of elements in this partition
+ * @param [in]      noref   indicates presence or lack of prediction
+ * @param [in]      g       decoded quantized vector gain
+ * @param [in]      theta   decoded theta (prediction error)
+ * @param [in]      m       alignment dimension of Householder reflection
+ * @param [in]      s       sign of Householder reflection
+ * @param [in]      qm_inv  inverse of the QM with magnitude compensation
+ */
+void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse,
+ const od_val16 *r16, int n, int noref, od_val32 g, od_val32 theta, int m, int s,
+ const int16_t *qm_inv) {
+  int i;
+  int yy;
+  od_val32 scale;
+  int nn;
+  int gshift;
+  int qshift;
+  OD_ASSERT(g != 0);
+  nn = n-(!noref); /* when noref==0, vector in is sized n-1 */
+  yy = 0;
+  for (i = 0; i < nn; i++)
+    yy += ypulse[i]*(int32_t)ypulse[i];
+  /* Shift required for the magnitude of the pre-qm synthesis to be guaranteed
+     to fit in 16 bits. In practice, the range will be 8192-16384 after scaling
+     most of the time. */
+  gshift = OD_MAXI(0, OD_ILOG(g) - 14);
+  /*scale is g/sqrt(yy) in Q(16-gshift) so that x[]*scale has a norm that fits
+     in 16 bits.*/
+  if (yy == 0) scale = 0;
+#if defined(OD_FLOAT_PVQ)
+  else {
+    scale = g/sqrt(yy);
+  }
+  OD_UNUSED(gshift);
+  OD_UNUSED(qshift);
+#else
+  else {
+    int rsqrt_shift;
+    int16_t rsqrt;
+    /*FIXME: should be < int64_t*/
+    int64_t tmp;
+    rsqrt = od_rsqrt(yy, &rsqrt_shift);
+    tmp = rsqrt*(int64_t)g;
+    scale = OD_VSHR_ROUND(tmp, rsqrt_shift + gshift - 16);
+  }
+  /* Shift to apply after multiplying by the inverse QM, taking into account
+     gshift. */
+  qshift = OD_QM_INV_SHIFT - gshift;
+#endif
+  if (noref) {
+    for (i = 0; i < n; i++) {
+      od_val32 x;
+      /* This multiply doesn't round, so it introduces some bias.
+         It would be nice (but not critical) to fix this. */
+      x = OD_MULT16_32_Q16(ypulse[i], scale);
+#if defined(OD_FLOAT_PVQ)
+      xcoeff[i] = (od_coeff)floor(.5
+       + x*(qm_inv[i]*OD_QM_INV_SCALE_1));
+#else
+      xcoeff[i] = OD_SHR_ROUND(x*qm_inv[i], qshift);
+#endif
+    }
+  }
+  else{
+    od_val16 x[MAXN];
+    scale = OD_ROUND32(scale*OD_TRIG_SCALE_1*od_pvq_sin(theta));
+    /* The following multiply doesn't round, but it's probably OK since
+       the Householder reflection is likely to undo most of the resulting
+       bias. */
+    for (i = 0; i < m; i++)
+      x[i] = OD_MULT16_32_Q16(ypulse[i], scale);
+    x[m] = OD_ROUND16(-s*(OD_SHR_ROUND(g, gshift))*OD_TRIG_SCALE_1*
+     od_pvq_cos(theta));
+    for (i = m; i < nn; i++)
+      x[i+1] = OD_MULT16_32_Q16(ypulse[i], scale);
+    od_apply_householder(x, x, r16, n);
+    for (i = 0; i < n; i++) {
+#if defined(OD_FLOAT_PVQ)
+      xcoeff[i] = (od_coeff)floor(.5 + (x[i]*(qm_inv[i]*OD_QM_INV_SCALE_1)));
+#else
+      xcoeff[i] = OD_SHR_ROUND(x[i]*qm_inv[i], qshift);
+#endif
+    }
+  }
+}

diff --git a/av1/common/pvq.h b/av1/common/pvq.h
new file mode 100644
index 0000000..5a49a84
--- /dev/null
+++ b/av1/common/pvq.h

@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_pvq_H)
+# define _pvq_H (1)
+# include "generic_code.h"
+# include "odintrin.h"
+
+extern const int OD_QM8_Q4_FLAT[];
+extern const int OD_QM8_Q4_HVS[];
+
+extern const uint16_t EXP_CDF_TABLE[][16];
+extern const uint16_t LAPLACE_OFFSET[];
+
+# define PVQ_MAX_PARTITIONS (1 + 3*(OD_TXSIZES-1))
+
+# define OD_NOREF_ADAPT_SPEED (4)
+/* Normalized lambda for PVQ quantizer. Since we normalize the gain by q, the
+   distortion is normalized by q^2 and lambda does not need the q^2 factor.
+   At high rate, this would be log(2)/6, but we're using a slightly more
+   aggressive value, closer to:
+   Li, Xiang, et al. "Laplace distribution based Lagrangian rate distortion
+   optimization for hybrid video coding." Circuits and Systems for Video
+   Technology, IEEE Transactions on 19.2 (2009): 193-205.
+   */
+# define OD_PVQ_LAMBDA (.1146)
+
+#define OD_PVQ_SKIP_ZERO 1
+#define OD_PVQ_SKIP_COPY 2
+
+/* Maximum size for coding a PVQ band. */
+#define OD_MAX_PVQ_SIZE (1024)
+
+#if defined(OD_FLOAT_PVQ)
+#define OD_QM_SHIFT (15)
+#else
+#define OD_QM_SHIFT (11)
+#endif
+#define OD_QM_SCALE (1 << OD_QM_SHIFT)
+#if defined(OD_FLOAT_PVQ)
+#define OD_QM_SCALE_1 (1./OD_QM_SCALE)
+#endif
+#define OD_QM_SCALE_MAX 32767
+#define OD_QM_INV_SHIFT (12)
+#define OD_QM_INV_SCALE (1 << OD_QM_INV_SHIFT)
+#if defined(OD_FLOAT_PVQ)
+#define OD_QM_INV_SCALE_1 (1./OD_QM_INV_SCALE)
+#endif
+#define OD_QM_OFFSET(bs) ((((1 << 2*bs) - 1) << 2*OD_LOG_BSIZE0)/3)
+#define OD_QM_STRIDE (OD_QM_OFFSET(OD_TXSIZES))
+#define OD_QM_BUFFER_SIZE (2*OD_QM_STRIDE)
+
+#if !defined(OD_FLOAT_PVQ)
+#define OD_THETA_SHIFT (15)
+#define OD_THETA_SCALE ((1 << OD_THETA_SHIFT)*2./M_PI)
+#define OD_MAX_THETA_SCALE (1 << OD_THETA_SHIFT)
+#define OD_TRIG_SCALE (32768)
+#define OD_BETA_SHIFT (12)
+#define OD_BETA_SCALE_1 (1./(1 << OD_BETA_SHIFT))
+/*Multiplies 16-bit a by 32-bit b and keeps bits [16:64-OD_BETA_SHIFT-1].*/
+#define OD_MULT16_32_QBETA(a, b) \
+ ((int16_t)(a)*(int64_t)(int32_t)(b) >> OD_BETA_SHIFT)
+# define OD_MULT16_16_QBETA(a, b) \
+  ((((int16_t)(a))*((int32_t)(int16_t)(b))) >> OD_BETA_SHIFT)
+#define OD_CGAIN_SHIFT (8)
+#define OD_CGAIN_SCALE (1 << OD_CGAIN_SHIFT)
+#else
+#define OD_BETA_SCALE_1 (1.)
+#define OD_THETA_SCALE (1)
+#define OD_TRIG_SCALE (1)
+#define OD_CGAIN_SCALE (1)
+#endif
+#define OD_THETA_SCALE_1 (1./OD_THETA_SCALE)
+#define OD_TRIG_SCALE_1 (1./OD_TRIG_SCALE)
+#define OD_CGAIN_SCALE_1 (1./OD_CGAIN_SCALE)
+#define OD_CGAIN_SCALE_2 (OD_CGAIN_SCALE_1*OD_CGAIN_SCALE_1)
+
+/* Largest PVQ partition is half the coefficients of largest block size. */
+#define MAXN (OD_TXSIZE_MAX*OD_TXSIZE_MAX/2)
+
+#define OD_COMPAND_SHIFT (8 + OD_COEFF_SHIFT)
+#define OD_COMPAND_SCALE (1 << OD_COMPAND_SHIFT)
+#define OD_COMPAND_SCALE_1 (1./OD_COMPAND_SCALE)
+
+#define OD_QM_SIZE (OD_TXSIZES*(OD_TXSIZES + 1))
+
+#define OD_FLAT_QM 0
+#define OD_HVS_QM  1
+
+# define OD_NSB_ADAPT_CTXS (4)
+
+# define OD_ADAPT_K_Q8        0
+# define OD_ADAPT_SUM_EX_Q8   1
+# define OD_ADAPT_COUNT_Q8    2
+# define OD_ADAPT_COUNT_EX_Q8 3
+
+# define OD_ADAPT_NO_VALUE (-2147483647-1)
+
+typedef struct od_pvq_adapt_ctx  od_pvq_adapt_ctx;
+typedef struct od_pvq_codeword_ctx od_pvq_codeword_ctx;
+
+struct od_pvq_codeword_ctx {
+  int                 pvq_adapt[2*OD_TXSIZES*OD_NSB_ADAPT_CTXS];
+  int                 pvq_k1_increment;
+  /* CDFs are size 16 despite the fact that we're using less than that. */
+  uint16_t            pvq_k1_cdf[12][16];
+  uint16_t            pvq_split_cdf[22*7][8];
+  int                 pvq_split_increment;
+};
+
+struct od_pvq_adapt_ctx {
+  od_pvq_codeword_ctx pvq_codeword_ctx;
+  generic_encoder     pvq_param_model[3];
+  int                 pvq_ext[OD_TXSIZES*PVQ_MAX_PARTITIONS];
+  int                 pvq_exg[OD_NPLANES_MAX][OD_TXSIZES][PVQ_MAX_PARTITIONS];
+  int                 pvq_gaintheta_increment;
+  uint16_t        pvq_gaintheta_cdf[2*OD_TXSIZES*PVQ_MAX_PARTITIONS][16];
+  int                 pvq_skip_dir_increment;
+  uint16_t        pvq_skip_dir_cdf[2*(OD_TXSIZES-1)][7];
+};
+
+void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe);
+int od_pvq_size_ctx(int n);
+int od_pvq_k1_ctx(int n, int orig_size);
+
+od_val16 od_pvq_sin(od_val32 x);
+od_val16 od_pvq_cos(od_val32 x);
+#if !defined(OD_FLOAT_PVQ)
+int od_vector_log_mag(const od_coeff *x, int n);
+#endif
+
+int od_qm_get_index(int bs, int band);
+
+extern const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_TXSIZES + 1];
+
+void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm);
+int od_compute_householder(od_val16 *r, int n, od_val32 gr, int *sign,
+ int shift);
+void od_apply_householder(od_val16 *out, const od_val16 *x, const od_val16 *r,
+ int n);
+void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse,
+                                  const od_val16 *r, int n,
+                                  int noref, od_val32 g,
+                                  od_val32 theta, int m, int s,
+                                  const int16_t *qm_inv);
+od_val32 od_gain_expand(od_val32 cg, int q0, od_val16 beta);
+od_val32 od_pvq_compute_gain(const od_val16 *x, int n, int q0, od_val32 *g,
+ od_val16 beta, int bshift);
+int od_pvq_compute_max_theta(od_val32 qcg, od_val16 beta);
+od_val32 od_pvq_compute_theta(int t, int max_theta);
+int od_pvq_compute_k(od_val32 qcg, int itheta, od_val32 theta, int noref,
+ int n, od_val16 beta, int nodesync);
+
+int od_vector_is_null(const od_coeff *x, int len);
+int od_qm_offset(int bs, int xydec);
+
+#endif

diff --git a/av1/common/pvq_state.c b/av1/common/pvq_state.c
new file mode 100644
index 0000000..2329d66
--- /dev/null
+++ b/av1/common/pvq_state.c

@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/common/pvq_state.h"
+#include "av1/common/odintrin.h"
+
+void od_adapt_ctx_reset(od_adapt_ctx *adapt, int is_keyframe) {
+  int i;
+  int pli;
+  od_adapt_pvq_ctx_reset(&adapt->pvq, is_keyframe);
+  adapt->skip_increment = 128;
+  OD_CDFS_INIT(adapt->skip_cdf, adapt->skip_increment >> 2);
+  for (pli = 0; pli < OD_NPLANES_MAX; pli++) {
+    generic_model_init(&adapt->model_dc[pli]);
+    for (i = 0; i < OD_TXSIZES; i++) {
+      adapt->ex_g[pli][i] = 8;
+    }
+    for (i = 0; i < 4; i++) {
+      int j;
+      for (j = 0; j < 3; j++) {
+        adapt->ex_dc[pli][i][j] = pli > 0 ? 8 : 32768;
+      }
+    }
+  }
+}
+
+void od_init_skipped_coeffs(int16_t *d, int16_t *pred, int is_keyframe, int bo,
+                            int n, int w) {
+  int i;
+  int j;
+  if (is_keyframe) {
+    for (i = 0; i < n; i++) {
+      for (j = 0; j < n; j++) {
+        /* skip DC */
+        if (i || j) d[bo + i * w + j] = 0;
+      }
+    }
+  } else {
+    for (i = 0; i < n; i++) {
+      for (j = 0; j < n; j++) {
+        d[bo + i * w + j] = pred[i * n + j];
+      }
+    }
+  }
+}

diff --git a/av1/common/pvq_state.h b/av1/common/pvq_state.h
new file mode 100644
index 0000000..0519451
--- /dev/null
+++ b/av1/common/pvq_state.h

@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_state_H)
+# define _state_H (1)
+
+typedef struct od_state     od_state;
+typedef struct od_adapt_ctx od_adapt_ctx;
+
+# include "generic_code.h"
+# include "odintrin.h"
+# include "pvq.h"
+
+/*Adaptation speed of scalar Laplace encoding.*/
+# define OD_SCALAR_ADAPT_SPEED (4)
+
+struct od_adapt_ctx {
+  /* Support for PVQ encode/decode */
+  od_pvq_adapt_ctx pvq;
+
+  generic_encoder model_dc[OD_NPLANES_MAX];
+
+  int ex_dc[OD_NPLANES_MAX][OD_TXSIZES][3];
+  int ex_g[OD_NPLANES_MAX][OD_TXSIZES];
+
+  /* Joint skip flag for DC and AC */
+  uint16_t skip_cdf[OD_TXSIZES*2][4];
+  int skip_increment;
+};
+
+struct od_state {
+  od_adapt_ctx adapt;
+  /* TODO(yushin): Enable this for activity masking,
+     when pvq_qm_q4 is available in AOM. */
+  /* unsigned char pvq_qm_q4[OD_NPLANES_MAX][OD_QM_SIZE]; */
+
+  /* Quantization matrices and their inverses. */
+  int16_t qm[OD_QM_BUFFER_SIZE];
+  int16_t qm_inv[OD_QM_BUFFER_SIZE];
+};
+
+void od_adapt_ctx_reset(od_adapt_ctx *state, int is_keyframe);
+void od_init_skipped_coeffs(int16_t *d, int16_t *pred, int is_keyframe,
+ int bo, int n, int w);
+
+#endif

diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index ad92150..7bbf20f 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c

@@ -26,7 +26,7 @@
   }
 }
 
-void project_points_translation(int16_t *mat, int *points, int *proj,
+void project_points_translation(int32_t *mat, int *points, int *proj,
                                 const int n, const int stride_points,
                                 const int stride_proj, const int subsampling_x,
                                 const int subsampling_y) {
@@ -52,7 +52,7 @@
   }
 }
 
-void project_points_rotzoom(int16_t *mat, int *points, int *proj, const int n,
+void project_points_rotzoom(int32_t *mat, int *points, int *proj, const int n,
                             const int stride_points, const int stride_proj,
                             const int subsampling_x, const int subsampling_y) {
   int i;
@@ -79,7 +79,7 @@
   }
 }
 
-void project_points_affine(int16_t *mat, int *points, int *proj, const int n,
+void project_points_affine(int32_t *mat, int *points, int *proj, const int n,
                            const int stride_points, const int stride_proj,
                            const int subsampling_x, const int subsampling_y) {
   int i;
@@ -106,7 +106,7 @@
   }
 }
 
-void project_points_homography(int16_t *mat, int *points, int *proj,
+void project_points_homography(int32_t *mat, int *points, int *proj,
                                const int n, const int stride_points,
                                const int stride_proj, const int subsampling_x,
                                const int subsampling_y) {
@@ -186,9 +186,9 @@
 
 static int32_t do_cubic_filter(int32_t *p, int x) {
   if (x == 0) {
-    return p[0];
+    return p[0] * (1 << WARPEDPIXEL_FILTER_BITS);
   } else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
-    return p[1];
+    return p[1] * (1 << WARPEDPIXEL_FILTER_BITS);
   } else {
     const int64_t v1 = (int64_t)x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
     const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
@@ -443,8 +443,7 @@
       int in[2], out[2];
       in[0] = j;
       in[1] = i;
-      projectpoints((int16_t *)wm->wmmat, in, out, 1, 2, 2, subsampling_x,
-                    subsampling_y);
+      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
       out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
       out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
       gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
@@ -475,8 +474,7 @@
       int in[2], out[2];
       in[0] = j;
       in[1] = i;
-      projectpoints((int16_t *)wm->wmmat, in, out, 1, 2, 2, subsampling_x,
-                    subsampling_y);
+      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
       out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
       out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
       if (ref_frm)
@@ -507,8 +505,7 @@
       int in[2], out[2];
       in[0] = j;
       in[1] = i;
-      projectpoints((int16_t *)wm->wmmat, in, out, 1, 2, 2, subsampling_x,
-                    subsampling_y);
+      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
       out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
       out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
       gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
@@ -535,8 +532,7 @@
       int in[2], out[2];
       in[0] = j;
       in[1] = i;
-      projectpoints((int16_t *)wm->wmmat, in, out, 1, 2, 2, subsampling_x,
-                    subsampling_y);
+      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
       out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
       out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
       if (ref_frm)
@@ -596,28 +592,22 @@
   switch (wmtype) {
     case HOMOGRAPHY:
       assert(fabs(model[8] - 1.0) < 1e-12);
-      wm->wmmat[3].as_mv.row =
-          (int16_t)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
-      wm->wmmat[3].as_mv.col =
-          (int16_t)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
+      wm->wmmat[6] =
+          (int32_t)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
+      wm->wmmat[7] =
+          (int32_t)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
     /* fallthrough intended */
     case AFFINE:
-      wm->wmmat[2].as_mv.row =
-          (int16_t)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
-      wm->wmmat[2].as_mv.col =
-          (int16_t)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
+      wm->wmmat[4] = (int32_t)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
+      wm->wmmat[5] = (int32_t)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
     /* fallthrough intended */
     case ROTZOOM:
-      wm->wmmat[1].as_mv.row =
-          (int16_t)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
-      wm->wmmat[1].as_mv.col =
-          (int16_t)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
+      wm->wmmat[2] = (int32_t)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
+      wm->wmmat[3] = (int32_t)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
     /* fallthrough intended */
     case TRANSLATION:
-      wm->wmmat[0].as_mv.row =
-          (int16_t)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
-      wm->wmmat[0].as_mv.col =
-          (int16_t)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
+      wm->wmmat[0] = (int32_t)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
+      wm->wmmat[1] = (int32_t)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
       break;
     default: assert(0 && "Invalid TransformationType");
   }

diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index e7b9038..7c9919f 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h

@@ -24,26 +24,26 @@
 
 #define MAX_PARAMDIM 9
 
-typedef void (*ProjectPointsFunc)(int16_t *mat, int *points, int *proj,
+typedef void (*ProjectPointsFunc)(int32_t *mat, int *points, int *proj,
                                   const int n, const int stride_points,
                                   const int stride_proj,
                                   const int subsampling_x,
                                   const int subsampling_y);
 
-void project_points_translation(int16_t *mat, int *points, int *proj,
+void project_points_translation(int32_t *mat, int *points, int *proj,
                                 const int n, const int stride_points,
                                 const int stride_proj, const int subsampling_x,
                                 const int subsampling_y);
 
-void project_points_rotzoom(int16_t *mat, int *points, int *proj, const int n,
+void project_points_rotzoom(int32_t *mat, int *points, int *proj, const int n,
                             const int stride_points, const int stride_proj,
                             const int subsampling_x, const int subsampling_y);
 
-void project_points_affine(int16_t *mat, int *points, int *proj, const int n,
+void project_points_affine(int32_t *mat, int *points, int *proj, const int n,
                            const int stride_points, const int stride_proj,
                            const int subsampling_x, const int subsampling_y);
 
-void project_points_homography(int16_t *mat, int *points, int *proj,
+void project_points_homography(int32_t *mat, int *points, int *proj,
                                const int n, const int stride_points,
                                const int stride_proj, const int subsampling_x,
                                const int subsampling_y);

diff --git a/av1/common/zigzag.h b/av1/common/zigzag.h
new file mode 100644
index 0000000..295ed23
--- /dev/null
+++ b/av1/common/zigzag.h

@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_zigzag_H)
+# define _zigzag_H (1)
+
+extern const unsigned char OD_ZIGZAG4_DCT_DCT[15][2];
+extern const unsigned char OD_ZIGZAG4_ADST_DCT[15][2];
+extern const unsigned char OD_ZIGZAG4_DCT_ADST[15][2];
+extern const unsigned char OD_ZIGZAG4_ADST_ADST[15][2];
+
+extern const unsigned char OD_ZIGZAG8_DCT_DCT[48][2];
+extern const unsigned char OD_ZIGZAG8_ADST_DCT[48][2];
+extern const unsigned char OD_ZIGZAG8_DCT_ADST[48][2];
+extern const unsigned char OD_ZIGZAG8_ADST_ADST[48][2];
+
+extern const unsigned char OD_ZIGZAG16_DCT_DCT[192][2];
+extern const unsigned char OD_ZIGZAG16_ADST_DCT[192][2];
+extern const unsigned char OD_ZIGZAG16_DCT_ADST[192][2];
+extern const unsigned char OD_ZIGZAG16_ADST_ADST[192][2];
+
+extern const unsigned char OD_ZIGZAG32_DCT_DCT[768][2];
+#endif

diff --git a/av1/common/zigzag16.c b/av1/common/zigzag16.c
new file mode 100644
index 0000000..94c3487
--- /dev/null
+++ b/av1/common/zigzag16.c

@@ -0,0 +1,208 @@
+/* This file is generated by gen_zigzag16.m */
+
+/* clang-format off */
+
+#include "odintrin.h"
+OD_EXTERN const unsigned char OD_ZIGZAG16_DCT_DCT[192][2] = {
+  {8, 0}, {8, 1}, {8, 2}, {9, 0},
+  {8, 3}, {9, 1}, {9, 2}, {10, 0},
+  {9, 3}, {10, 1}, {10, 2}, {11, 0},
+  {10, 3}, {11, 1}, {11, 2}, {11, 3},
+  {12, 0}, {12, 1}, {13, 0}, {12, 2},
+  {12, 3}, {13, 1}, {13, 2}, {14, 0},
+  {13, 3}, {14, 1}, {15, 0}, {14, 2},
+  {14, 3}, {15, 1}, {15, 2}, {15, 3},
+  {0, 8}, {1, 8}, {0, 9}, {2, 8},
+  {1, 9}, {3, 8}, {0, 10}, {2, 9},
+  {1, 10}, {3, 9}, {0, 11}, {2, 10},
+  {1, 11}, {3, 10}, {0, 12}, {2, 11},
+  {1, 12}, {3, 11}, {0, 13}, {2, 12},
+  {1, 13}, {0, 14}, {3, 12}, {2, 13},
+  {1, 14}, {3, 13}, {0, 15}, {2, 14},
+  {1, 15}, {3, 14}, {2, 15}, {3, 15},
+  {4, 8}, {5, 8}, {4, 9}, {8, 4},
+  {8, 5}, {6, 8}, {5, 9}, {4, 10},
+  {9, 4}, {8, 6}, {7, 8}, {9, 5},
+  {5, 10}, {8, 7}, {6, 9}, {4, 11},
+  {10, 4}, {9, 6}, {7, 9}, {8, 8},
+  {10, 5}, {6, 10}, {5, 11}, {9, 7},
+  {8, 9}, {10, 6}, {7, 10}, {4, 12},
+  {11, 4}, {9, 8}, {6, 11}, {10, 7},
+  {11, 5}, {5, 12}, {8, 10}, {7, 11},
+  {9, 9}, {4, 13}, {10, 8}, {11, 6},
+  {11, 7}, {6, 12}, {8, 11}, {9, 10},
+  {12, 4}, {5, 13}, {10, 9}, {12, 5},
+  {7, 12}, {11, 8}, {4, 14}, {6, 13},
+  {10, 10}, {9, 11}, {12, 6}, {13, 4},
+  {11, 9}, {8, 12}, {5, 14}, {12, 7},
+  {7, 13}, {4, 15}, {13, 5}, {10, 11},
+  {11, 10}, {9, 12}, {13, 6}, {12, 8},
+  {6, 14}, {8, 13}, {5, 15}, {13, 7},
+  {14, 4}, {12, 9}, {7, 14}, {11, 11},
+  {10, 12}, {9, 13}, {14, 5}, {6, 15},
+  {13, 8}, {8, 14}, {12, 10}, {14, 6},
+  {7, 15}, {13, 9}, {15, 4}, {10, 13},
+  {11, 12}, {14, 7}, {9, 14}, {12, 11},
+  {8, 15}, {15, 5}, {13, 10}, {14, 8},
+  {11, 13}, {15, 6}, {9, 15}, {10, 14},
+  {14, 9}, {15, 7}, {13, 11}, {12, 12},
+  {10, 15}, {11, 14}, {15, 8}, {14, 10},
+  {12, 13}, {13, 12}, {15, 9}, {11, 15},
+  {14, 11}, {13, 13}, {15, 10}, {12, 14},
+  {13, 14}, {15, 11}, {14, 12}, {12, 15},
+  {14, 13}, {13, 15}, {15, 12}, {14, 14},
+  {15, 13}, {14, 15}, {15, 14}, {15, 15}
+  };
+
+OD_EXTERN const unsigned char OD_ZIGZAG16_ADST_DCT[192][2] = {
+  {8, 0}, {9, 0}, {10, 0}, {8, 1},
+  {11, 0}, {9, 1}, {8, 2}, {12, 0},
+  {10, 1}, {9, 2}, {8, 3}, {13, 0},
+  {11, 1}, {10, 2}, {9, 3}, {14, 0},
+  {12, 1}, {10, 3}, {15, 0}, {11, 2},
+  {13, 1}, {11, 3}, {12, 2}, {14, 1},
+  {12, 3}, {13, 2}, {15, 1}, {13, 3},
+  {14, 2}, {14, 3}, {15, 2}, {15, 3},
+  {0, 8}, {1, 8}, {2, 8}, {0, 9},
+  {3, 8}, {1, 9}, {2, 9}, {0, 10},
+  {3, 9}, {1, 10}, {2, 10}, {0, 11},
+  {3, 10}, {1, 11}, {2, 11}, {0, 12},
+  {3, 11}, {1, 12}, {2, 12}, {0, 13},
+  {3, 12}, {1, 13}, {0, 14}, {2, 13},
+  {0, 15}, {1, 14}, {3, 13}, {2, 14},
+  {1, 15}, {3, 14}, {2, 15}, {3, 15},
+  {8, 4}, {9, 4}, {8, 5}, {4, 8},
+  {10, 4}, {9, 5}, {5, 8}, {8, 6},
+  {4, 9}, {10, 5}, {9, 6}, {6, 8},
+  {8, 7}, {11, 4}, {7, 8}, {5, 9},
+  {9, 7}, {11, 5}, {10, 6}, {4, 10},
+  {6, 9}, {8, 8}, {5, 10}, {7, 9},
+  {12, 4}, {10, 7}, {9, 8}, {11, 6},
+  {8, 9}, {4, 11}, {6, 10}, {7, 10},
+  {12, 5}, {5, 11}, {10, 8}, {11, 7},
+  {9, 9}, {4, 12}, {13, 4}, {8, 10},
+  {6, 11}, {12, 6}, {5, 12}, {10, 9},
+  {7, 11}, {9, 10}, {11, 8}, {13, 5},
+  {8, 11}, {4, 13}, {6, 12}, {10, 10},
+  {12, 7}, {11, 9}, {7, 12}, {14, 4},
+  {5, 13}, {9, 11}, {13, 6}, {8, 12},
+  {4, 14}, {12, 8}, {6, 13}, {11, 10},
+  {10, 11}, {12, 9}, {5, 14}, {13, 7},
+  {14, 5}, {9, 12}, {4, 15}, {7, 13},
+  {8, 13}, {6, 14}, {13, 8}, {11, 11},
+  {10, 12}, {15, 4}, {12, 10}, {14, 6},
+  {13, 9}, {5, 15}, {9, 13}, {7, 14},
+  {15, 5}, {6, 15}, {8, 14}, {14, 7},
+  {11, 12}, {7, 15}, {9, 14}, {13, 10},
+  {10, 13}, {14, 8}, {15, 6}, {14, 9},
+  {12, 11}, {8, 15}, {15, 7}, {10, 14},
+  {11, 13}, {9, 15}, {13, 11}, {12, 12},
+  {15, 8}, {14, 10}, {15, 9}, {10, 15},
+  {11, 14}, {13, 12}, {12, 13}, {15, 10},
+  {14, 11}, {11, 15}, {13, 13}, {15, 11},
+  {14, 12}, {12, 14}, {15, 12}, {13, 14},
+  {12, 15}, {14, 13}, {13, 15}, {15, 13},
+  {14, 14}, {15, 14}, {14, 15}, {15, 15}
+  };
+
+OD_EXTERN const unsigned char OD_ZIGZAG16_DCT_ADST[192][2] = {
+  {8, 0}, {8, 1}, {8, 2}, {8, 3},
+  {9, 0}, {9, 1}, {9, 2}, {9, 3},
+  {10, 0}, {10, 1}, {10, 2}, {10, 3},
+  {11, 0}, {11, 1}, {11, 2}, {11, 3},
+  {12, 0}, {12, 1}, {12, 2}, {12, 3},
+  {13, 0}, {13, 1}, {13, 2}, {13, 3},
+  {14, 0}, {15, 0}, {14, 1}, {14, 2},
+  {14, 3}, {15, 1}, {15, 2}, {15, 3},
+  {0, 8}, {0, 9}, {0, 10}, {1, 8},
+  {0, 11}, {1, 9}, {2, 8}, {0, 12},
+  {1, 10}, {2, 9}, {0, 13}, {1, 11},
+  {3, 8}, {2, 10}, {0, 14}, {1, 12},
+  {3, 9}, {0, 15}, {2, 11}, {3, 10},
+  {1, 13}, {2, 12}, {3, 11}, {1, 14},
+  {2, 13}, {1, 15}, {3, 12}, {2, 14},
+  {3, 13}, {2, 15}, {3, 14}, {3, 15},
+  {4, 8}, {4, 9}, {5, 8}, {4, 10},
+  {5, 9}, {4, 11}, {6, 8}, {5, 10},
+  {8, 4}, {6, 9}, {4, 12}, {5, 11},
+  {8, 5}, {6, 10}, {7, 8}, {8, 6},
+  {4, 13}, {7, 9}, {5, 12}, {8, 7},
+  {9, 4}, {6, 11}, {8, 8}, {7, 10},
+  {5, 13}, {9, 5}, {4, 14}, {9, 6},
+  {8, 9}, {6, 12}, {9, 7}, {7, 11},
+  {4, 15}, {8, 10}, {9, 8}, {5, 14},
+  {10, 4}, {6, 13}, {10, 5}, {9, 9},
+  {7, 12}, {8, 11}, {10, 6}, {5, 15},
+  {10, 7}, {6, 14}, {9, 10}, {7, 13},
+  {8, 12}, {10, 8}, {9, 11}, {6, 15},
+  {11, 4}, {11, 5}, {10, 9}, {8, 13},
+  {7, 14}, {11, 6}, {9, 12}, {11, 7},
+  {10, 10}, {7, 15}, {8, 14}, {12, 4},
+  {11, 8}, {12, 5}, {9, 13}, {10, 11},
+  {8, 15}, {11, 9}, {12, 6}, {12, 7},
+  {10, 12}, {9, 14}, {11, 10}, {13, 4},
+  {12, 8}, {9, 15}, {13, 5}, {11, 11},
+  {12, 9}, {10, 13}, {13, 6}, {13, 7},
+  {12, 10}, {14, 4}, {11, 12}, {13, 8},
+  {10, 14}, {14, 5}, {12, 11}, {13, 9},
+  {14, 6}, {10, 15}, {11, 13}, {15, 4},
+  {14, 7}, {12, 12}, {13, 10}, {14, 8},
+  {15, 5}, {13, 11}, {15, 6}, {11, 14},
+  {14, 9}, {12, 13}, {11, 15}, {15, 7},
+  {14, 10}, {15, 8}, {13, 12}, {12, 14},
+  {15, 9}, {14, 11}, {13, 13}, {12, 15},
+  {15, 10}, {14, 12}, {13, 14}, {15, 11},
+  {13, 15}, {14, 13}, {14, 14}, {15, 12},
+  {14, 15}, {15, 13}, {15, 14}, {15, 15}
+  };
+
+OD_EXTERN const unsigned char OD_ZIGZAG16_ADST_ADST[192][2] = {
+  {8, 0}, {8, 1}, {8, 2}, {9, 0},
+  {8, 3}, {9, 1}, {9, 2}, {10, 0},
+  {9, 3}, {10, 1}, {10, 2}, {11, 0},
+  {10, 3}, {11, 1}, {11, 2}, {11, 3},
+  {12, 0}, {12, 1}, {13, 0}, {12, 2},
+  {12, 3}, {13, 1}, {13, 2}, {14, 0},
+  {13, 3}, {14, 1}, {15, 0}, {14, 2},
+  {14, 3}, {15, 1}, {15, 2}, {15, 3},
+  {0, 8}, {1, 8}, {0, 9}, {2, 8},
+  {1, 9}, {3, 8}, {0, 10}, {2, 9},
+  {1, 10}, {3, 9}, {0, 11}, {2, 10},
+  {1, 11}, {3, 10}, {0, 12}, {2, 11},
+  {1, 12}, {3, 11}, {0, 13}, {2, 12},
+  {1, 13}, {0, 14}, {3, 12}, {2, 13},
+  {1, 14}, {3, 13}, {0, 15}, {2, 14},
+  {1, 15}, {3, 14}, {2, 15}, {3, 15},
+  {4, 8}, {5, 8}, {4, 9}, {8, 4},
+  {8, 5}, {6, 8}, {5, 9}, {4, 10},
+  {9, 4}, {8, 6}, {7, 8}, {9, 5},
+  {5, 10}, {8, 7}, {6, 9}, {4, 11},
+  {10, 4}, {9, 6}, {7, 9}, {8, 8},
+  {10, 5}, {6, 10}, {5, 11}, {9, 7},
+  {8, 9}, {10, 6}, {7, 10}, {4, 12},
+  {11, 4}, {9, 8}, {6, 11}, {10, 7},
+  {11, 5}, {5, 12}, {8, 10}, {7, 11},
+  {9, 9}, {4, 13}, {10, 8}, {11, 6},
+  {11, 7}, {6, 12}, {8, 11}, {9, 10},
+  {12, 4}, {5, 13}, {10, 9}, {12, 5},
+  {7, 12}, {11, 8}, {4, 14}, {6, 13},
+  {10, 10}, {9, 11}, {12, 6}, {13, 4},
+  {11, 9}, {8, 12}, {5, 14}, {12, 7},
+  {7, 13}, {4, 15}, {13, 5}, {10, 11},
+  {11, 10}, {9, 12}, {13, 6}, {12, 8},
+  {6, 14}, {8, 13}, {5, 15}, {13, 7},
+  {14, 4}, {12, 9}, {7, 14}, {11, 11},
+  {10, 12}, {9, 13}, {14, 5}, {6, 15},
+  {13, 8}, {8, 14}, {12, 10}, {14, 6},
+  {7, 15}, {13, 9}, {15, 4}, {10, 13},
+  {11, 12}, {14, 7}, {9, 14}, {12, 11},
+  {8, 15}, {15, 5}, {13, 10}, {14, 8},
+  {11, 13}, {15, 6}, {9, 15}, {10, 14},
+  {14, 9}, {15, 7}, {13, 11}, {12, 12},
+  {10, 15}, {11, 14}, {15, 8}, {14, 10},
+  {12, 13}, {13, 12}, {15, 9}, {11, 15},
+  {14, 11}, {13, 13}, {15, 10}, {12, 14},
+  {13, 14}, {15, 11}, {14, 12}, {12, 15},
+  {14, 13}, {13, 15}, {15, 12}, {14, 14},
+  {15, 13}, {14, 15}, {15, 14}, {15, 15}
+  };

diff --git a/av1/common/zigzag32.c b/av1/common/zigzag32.c
new file mode 100644
index 0000000..cb3b9bc
--- /dev/null
+++ b/av1/common/zigzag32.c

@@ -0,0 +1,199 @@
+/* This file is generated by gen_zigzag32.m */
+
+/* clang-format off */
+
+#include "odintrin.h"
+OD_EXTERN const unsigned char OD_ZIGZAG32_DCT_DCT[768][2] = {
+  { 16, 0 }, { 17, 0 }, { 18, 0 }, { 19, 0 },
+  { 16, 1 }, { 17, 1 }, { 20, 0 }, { 16, 2 },
+  { 18, 1 }, { 21, 0 }, { 17, 2 }, { 16, 3 },
+  { 19, 1 }, { 22, 0 }, { 18, 2 }, { 17, 3 },
+  { 20, 1 }, { 16, 4 }, { 23, 0 }, { 19, 2 },
+  { 24, 0 }, { 16, 5 }, { 21, 1 }, { 17, 4 },
+  { 18, 3 }, { 20, 2 }, { 17, 5 }, { 16, 6 },
+  { 19, 3 }, { 18, 4 }, { 25, 0 }, { 22, 1 },
+  { 16, 7 }, { 21, 2 }, { 17, 6 }, { 20, 3 },
+  { 26, 0 }, { 18, 5 }, { 19, 4 }, { 17, 7 },
+  { 23, 1 }, { 22, 2 }, { 18, 6 }, { 27, 0 },
+  { 19, 5 }, { 24, 1 }, { 21, 3 }, { 28, 0 },
+  { 20, 4 }, { 18, 7 }, { 19, 6 }, { 23, 2 },
+  { 29, 0 }, { 25, 1 }, { 21, 4 }, { 30, 0 },
+  { 20, 5 }, { 22, 3 }, { 31, 0 }, { 19, 7 },
+  { 24, 2 }, { 26, 1 }, { 20, 6 }, { 21, 5 },
+  { 22, 4 }, { 23, 3 }, { 27, 1 }, { 25, 2 },
+  { 20, 7 }, { 28, 1 }, { 24, 3 }, { 21, 6 },
+  { 22, 5 }, { 23, 4 }, { 26, 2 }, { 21, 7 },
+  { 29, 1 }, { 25, 3 }, { 30, 1 }, { 27, 2 },
+  { 22, 6 }, { 23, 5 }, { 31, 1 }, { 24, 4 },
+  { 26, 3 }, { 28, 2 }, { 22, 7 }, { 23, 6 },
+  { 25, 4 }, { 24, 5 }, { 29, 2 }, { 30, 2 },
+  { 27, 3 }, { 23, 7 }, { 31, 2 }, { 24, 6 },
+  { 26, 4 }, { 25, 5 }, { 28, 3 }, { 24, 7 },
+  { 27, 4 }, { 29, 3 }, { 25, 6 }, { 26, 5 },
+  { 30, 3 }, { 31, 3 }, { 28, 4 }, { 27, 5 },
+  { 25, 7 }, { 29, 4 }, { 26, 6 }, { 28, 5 },
+  { 30, 4 }, { 26, 7 }, { 27, 6 }, { 31, 4 },
+  { 29, 5 }, { 27, 7 }, { 30, 5 }, { 28, 6 },
+  { 31, 5 }, { 29, 6 }, { 28, 7 }, { 30, 6 },
+  { 31, 6 }, { 29, 7 }, { 30, 7 }, { 31, 7 },
+  { 0, 16 }, { 0, 17 }, { 1, 16 }, { 0, 18 },
+  { 1, 17 }, { 0, 19 }, { 2, 16 }, { 1, 18 },
+  { 0, 20 }, { 2, 17 }, { 3, 16 }, { 1, 19 },
+  { 2, 18 }, { 0, 21 }, { 3, 17 }, { 4, 16 },
+  { 1, 20 }, { 2, 19 }, { 0, 22 }, { 3, 18 },
+  { 4, 17 }, { 5, 16 }, { 0, 23 }, { 3, 19 },
+  { 2, 20 }, { 1, 21 }, { 4, 18 }, { 6, 16 },
+  { 5, 17 }, { 3, 20 }, { 2, 21 }, { 1, 22 },
+  { 0, 24 }, { 0, 25 }, { 4, 19 }, { 7, 16 },
+  { 6, 17 }, { 5, 18 }, { 0, 26 }, { 3, 21 },
+  { 2, 22 }, { 1, 23 }, { 4, 20 }, { 5, 19 },
+  { 6, 18 }, { 1, 24 }, { 7, 17 }, { 0, 27 },
+  { 2, 23 }, { 3, 22 }, { 4, 21 }, { 1, 25 },
+  { 5, 20 }, { 7, 18 }, { 0, 28 }, { 6, 19 },
+  { 2, 24 }, { 1, 26 }, { 0, 29 }, { 4, 22 },
+  { 3, 23 }, { 2, 25 }, { 5, 21 }, { 0, 31 },
+  { 7, 19 }, { 6, 20 }, { 0, 30 }, { 1, 27 },
+  { 3, 24 }, { 2, 26 }, { 4, 23 }, { 5, 22 },
+  { 7, 20 }, { 1, 28 }, { 6, 21 }, { 3, 25 },
+  { 2, 27 }, { 1, 29 }, { 4, 24 }, { 2, 28 },
+  { 1, 30 }, { 7, 21 }, { 5, 23 }, { 3, 26 },
+  { 6, 22 }, { 1, 31 }, { 4, 25 }, { 7, 22 },
+  { 3, 27 }, { 2, 29 }, { 2, 30 }, { 5, 24 },
+  { 2, 31 }, { 6, 23 }, { 4, 26 }, { 3, 28 },
+  { 5, 25 }, { 3, 29 }, { 6, 24 }, { 7, 23 },
+  { 3, 30 }, { 4, 27 }, { 3, 31 }, { 5, 26 },
+  { 6, 25 }, { 4, 28 }, { 7, 24 }, { 4, 29 },
+  { 5, 27 }, { 4, 30 }, { 4, 31 }, { 6, 26 },
+  { 5, 28 }, { 7, 25 }, { 6, 27 }, { 5, 29 },
+  { 7, 26 }, { 5, 30 }, { 5, 31 }, { 6, 28 },
+  { 7, 27 }, { 6, 29 }, { 6, 30 }, { 7, 28 },
+  { 6, 31 }, { 7, 29 }, { 7, 30 }, { 7, 31 },
+  { 8, 16 }, { 9, 16 }, { 8, 17 }, { 10, 16 },
+  { 9, 17 }, { 16, 8 }, { 8, 18 }, { 16, 9 },
+  { 10, 17 }, { 11, 16 }, { 17, 8 }, { 9, 18 },
+  { 8, 19 }, { 16, 10 }, { 11, 17 }, { 12, 16 },
+  { 10, 18 }, { 17, 9 }, { 9, 19 }, { 16, 11 },
+  { 8, 20 }, { 18, 8 }, { 17, 10 }, { 10, 19 },
+  { 12, 17 }, { 11, 18 }, { 9, 20 }, { 16, 12 },
+  { 18, 9 }, { 8, 21 }, { 13, 16 }, { 17, 11 },
+  { 19, 8 }, { 18, 10 }, { 13, 17 }, { 16, 13 },
+  { 11, 19 }, { 12, 18 }, { 10, 20 }, { 17, 12 },
+  { 9, 21 }, { 19, 9 }, { 8, 22 }, { 14, 16 },
+  { 18, 11 }, { 11, 20 }, { 10, 21 }, { 20, 8 },
+  { 13, 18 }, { 16, 14 }, { 12, 19 }, { 17, 13 },
+  { 19, 10 }, { 14, 17 }, { 9, 22 }, { 18, 12 },
+  { 8, 23 }, { 17, 14 }, { 20, 9 }, { 15, 16 },
+  { 16, 15 }, { 13, 19 }, { 10, 22 }, { 19, 11 },
+  { 11, 21 }, { 14, 18 }, { 12, 20 }, { 18, 13 },
+  { 20, 10 }, { 21, 8 }, { 15, 17 }, { 9, 23 },
+  { 19, 12 }, { 11, 22 }, { 8, 24 }, { 21, 9 },
+  { 17, 15 }, { 16, 16 }, { 14, 19 }, { 18, 14 },
+  { 12, 21 }, { 13, 20 }, { 20, 11 }, { 10, 23 },
+  { 19, 13 }, { 15, 18 }, { 16, 17 }, { 21, 10 },
+  { 22, 8 }, { 9, 24 }, { 8, 25 }, { 20, 12 },
+  { 15, 19 }, { 11, 23 }, { 17, 16 }, { 18, 15 },
+  { 14, 20 }, { 12, 22 }, { 10, 24 }, { 22, 9 },
+  { 21, 11 }, { 19, 14 }, { 13, 21 }, { 16, 18 },
+  { 9, 25 }, { 17, 17 }, { 8, 26 }, { 20, 13 },
+  { 23, 8 }, { 12, 23 }, { 13, 22 }, { 22, 10 },
+  { 19, 15 }, { 15, 20 }, { 16, 19 }, { 21, 12 },
+  { 11, 24 }, { 14, 21 }, { 8, 27 }, { 18, 16 },
+  { 10, 25 }, { 9, 26 }, { 22, 11 }, { 20, 14 },
+  { 23, 9 }, { 18, 17 }, { 17, 18 }, { 17, 19 },
+  { 19, 16 }, { 21, 13 }, { 10, 26 }, { 12, 24 },
+  { 23, 10 }, { 24, 8 }, { 8, 28 }, { 16, 20 },
+  { 9, 27 }, { 15, 21 }, { 22, 12 }, { 14, 22 },
+  { 13, 23 }, { 20, 15 }, { 11, 25 }, { 24, 9 },
+  { 18, 18 }, { 19, 17 }, { 23, 11 }, { 10, 27 },
+  { 8, 29 }, { 12, 25 }, { 9, 28 }, { 8, 30 },
+  { 21, 14 }, { 13, 24 }, { 11, 26 }, { 25, 8 },
+  { 24, 10 }, { 20, 16 }, { 19, 18 }, { 14, 23 },
+  { 22, 13 }, { 8, 31 }, { 17, 20 }, { 9, 29 },
+  { 23, 12 }, { 15, 22 }, { 25, 9 }, { 11, 27 },
+  { 10, 28 }, { 20, 17 }, { 21, 15 }, { 18, 19 },
+  { 16, 21 }, { 24, 11 }, { 9, 30 }, { 12, 26 },
+  { 10, 29 }, { 22, 14 }, { 14, 24 }, { 9, 31 },
+  { 26, 8 }, { 13, 25 }, { 25, 10 }, { 18, 20 },
+  { 19, 19 }, { 11, 28 }, { 15, 23 }, { 20, 18 },
+  { 10, 30 }, { 12, 27 }, { 17, 21 }, { 23, 13 },
+  { 24, 12 }, { 21, 16 }, { 16, 22 }, { 26, 9 },
+  { 27, 8 }, { 13, 26 }, { 22, 15 }, { 10, 31 },
+  { 14, 25 }, { 12, 28 }, { 25, 11 }, { 21, 17 },
+  { 26, 10 }, { 20, 19 }, { 11, 29 }, { 15, 24 },
+  { 23, 14 }, { 27, 9 }, { 11, 30 }, { 13, 27 },
+  { 19, 20 }, { 24, 13 }, { 28, 8 }, { 11, 31 },
+  { 22, 16 }, { 17, 22 }, { 16, 23 }, { 25, 12 },
+  { 18, 21 }, { 12, 29 }, { 21, 18 }, { 28, 9 },
+  { 27, 10 }, { 26, 11 }, { 29, 8 }, { 14, 26 },
+  { 15, 25 }, { 13, 28 }, { 12, 30 }, { 23, 15 },
+  { 30, 8 }, { 16, 24 }, { 13, 29 }, { 25, 13 },
+  { 24, 14 }, { 20, 20 }, { 31, 8 }, { 12, 31 },
+  { 14, 27 }, { 28, 10 }, { 26, 12 }, { 22, 17 },
+  { 21, 19 }, { 17, 23 }, { 18, 22 }, { 29, 9 },
+  { 27, 11 }, { 19, 21 }, { 27, 12 }, { 30, 9 },
+  { 31, 9 }, { 13, 30 }, { 24, 15 }, { 23, 16 },
+  { 15, 26 }, { 14, 28 }, { 29, 10 }, { 28, 11 },
+  { 26, 13 }, { 17, 24 }, { 13, 31 }, { 25, 14 },
+  { 22, 18 }, { 16, 25 }, { 30, 10 }, { 14, 29 },
+  { 15, 27 }, { 19, 22 }, { 21, 20 }, { 20, 21 },
+  { 27, 13 }, { 29, 11 }, { 18, 23 }, { 23, 17 },
+  { 16, 26 }, { 31, 10 }, { 24, 16 }, { 14, 30 },
+  { 22, 19 }, { 14, 31 }, { 28, 12 }, { 26, 14 },
+  { 30, 11 }, { 15, 28 }, { 25, 15 }, { 17, 25 },
+  { 23, 18 }, { 18, 24 }, { 15, 30 }, { 29, 12 },
+  { 31, 11 }, { 16, 27 }, { 24, 17 }, { 28, 13 },
+  { 19, 23 }, { 15, 29 }, { 25, 16 }, { 17, 26 },
+  { 27, 14 }, { 22, 20 }, { 15, 31 }, { 20, 22 },
+  { 21, 21 }, { 16, 28 }, { 17, 27 }, { 30, 12 },
+  { 26, 15 }, { 19, 24 }, { 18, 25 }, { 23, 19 },
+  { 29, 13 }, { 31, 12 }, { 24, 18 }, { 26, 16 },
+  { 25, 17 }, { 16, 29 }, { 28, 14 }, { 20, 23 },
+  { 18, 26 }, { 21, 22 }, { 19, 25 }, { 22, 21 },
+  { 27, 15 }, { 17, 28 }, { 16, 30 }, { 26, 17 },
+  { 23, 20 }, { 16, 31 }, { 25, 18 }, { 27, 16 },
+  { 20, 24 }, { 24, 19 }, { 31, 13 }, { 30, 13 },
+  { 29, 14 }, { 18, 27 }, { 28, 15 }, { 17, 29 },
+  { 19, 26 }, { 17, 30 }, { 21, 23 }, { 22, 22 },
+  { 30, 14 }, { 20, 25 }, { 23, 21 }, { 17, 31 },
+  { 18, 28 }, { 25, 19 }, { 24, 20 }, { 28, 16 },
+  { 31, 14 }, { 26, 18 }, { 19, 27 }, { 29, 15 },
+  { 27, 17 }, { 30, 15 }, { 21, 24 }, { 22, 23 },
+  { 26, 19 }, { 23, 22 }, { 28, 17 }, { 29, 16 },
+  { 18, 30 }, { 24, 21 }, { 25, 20 }, { 18, 31 },
+  { 18, 29 }, { 20, 26 }, { 19, 28 }, { 27, 18 },
+  { 31, 15 }, { 20, 27 }, { 30, 16 }, { 19, 29 },
+  { 29, 17 }, { 31, 16 }, { 27, 19 }, { 21, 25 },
+  { 28, 18 }, { 26, 20 }, { 22, 24 }, { 25, 21 },
+  { 19, 30 }, { 24, 22 }, { 30, 17 }, { 21, 26 },
+  { 23, 23 }, { 19, 31 }, { 20, 28 }, { 31, 17 },
+  { 28, 19 }, { 27, 20 }, { 21, 27 }, { 29, 18 },
+  { 30, 18 }, { 25, 22 }, { 26, 21 }, { 20, 29 },
+  { 22, 25 }, { 24, 23 }, { 29, 19 }, { 23, 24 },
+  { 20, 31 }, { 20, 30 }, { 28, 20 }, { 21, 28 },
+  { 22, 26 }, { 31, 18 }, { 27, 21 }, { 30, 19 },
+  { 22, 27 }, { 29, 20 }, { 23, 25 }, { 24, 24 },
+  { 26, 22 }, { 21, 29 }, { 25, 23 }, { 31, 19 },
+  { 21, 30 }, { 23, 26 }, { 28, 21 }, { 21, 31 },
+  { 22, 28 }, { 30, 20 }, { 25, 24 }, { 27, 22 },
+  { 29, 21 }, { 26, 23 }, { 24, 25 }, { 31, 20 },
+  { 23, 27 }, { 22, 29 }, { 30, 21 }, { 28, 22 },
+  { 24, 26 }, { 25, 25 }, { 27, 23 }, { 22, 30 },
+  { 23, 28 }, { 22, 31 }, { 26, 24 }, { 31, 21 },
+  { 24, 27 }, { 29, 22 }, { 27, 24 }, { 30, 22 },
+  { 25, 26 }, { 28, 23 }, { 23, 30 }, { 23, 29 },
+  { 24, 28 }, { 25, 27 }, { 31, 22 }, { 23, 31 },
+  { 26, 25 }, { 28, 24 }, { 29, 23 }, { 24, 29 },
+  { 24, 30 }, { 27, 25 }, { 25, 28 }, { 26, 26 },
+  { 30, 23 }, { 26, 27 }, { 31, 23 }, { 28, 25 },
+  { 27, 26 }, { 25, 29 }, { 24, 31 }, { 29, 24 },
+  { 30, 24 }, { 27, 27 }, { 29, 25 }, { 26, 28 },
+  { 31, 24 }, { 25, 30 }, { 25, 31 }, { 28, 26 },
+  { 27, 28 }, { 26, 29 }, { 30, 25 }, { 29, 26 },
+  { 28, 27 }, { 26, 30 }, { 31, 25 }, { 27, 29 },
+  { 26, 31 }, { 30, 26 }, { 28, 28 }, { 31, 26 },
+  { 29, 27 }, { 27, 30 }, { 28, 29 }, { 27, 31 },
+  { 30, 27 }, { 31, 27 }, { 28, 30 }, { 29, 28 },
+  { 30, 28 }, { 29, 29 }, { 30, 29 }, { 31, 28 },
+  { 28, 31 }, { 29, 30 }, { 29, 31 }, { 31, 29 },
+  { 30, 30 }, { 30, 31 }, { 31, 30 }, { 31, 31 }
+};

diff --git a/av1/common/zigzag4.c b/av1/common/zigzag4.c
new file mode 100644
index 0000000..7ccc160
--- /dev/null
+++ b/av1/common/zigzag4.c

@@ -0,0 +1,28 @@
+/* This file is generated by gen_zigzag4.m */
+
+/* clang-format off */
+
+#include "odintrin.h"
+OD_EXTERN const unsigned char OD_ZIGZAG4_DCT_DCT[15][2] = {
+  {0, 1}, {1, 0}, {1, 1}, {0, 2},
+  {2, 0}, {0, 3}, {1, 2}, {3, 0},
+  {2, 1}, {1, 3}, {2, 2}, {3, 1},
+  {2, 3}, {3, 2}, {3, 3} };
+
+OD_EXTERN const unsigned char OD_ZIGZAG4_ADST_DCT[15][2] = {
+  {1, 0}, {0, 1}, {2, 0}, {1, 1},
+  {3, 0}, {2, 1}, {0, 2}, {1, 2},
+  {3, 1}, {0, 3}, {2, 2}, {1, 3},
+  {3, 2}, {2, 3}, {3, 3} };
+
+OD_EXTERN const unsigned char OD_ZIGZAG4_DCT_ADST[15][2] = {
+  {0, 1}, {0, 2}, {1, 0}, {0, 3},
+  {1, 1}, {1, 2}, {2, 0}, {1, 3},
+  {2, 1}, {2, 2}, {3, 0}, {3, 1},
+  {2, 3}, {3, 2}, {3, 3} };
+
+OD_EXTERN const unsigned char OD_ZIGZAG4_ADST_ADST[15][2] = {
+  {0, 1}, {1, 0}, {1, 1}, {0, 2},
+  {2, 0}, {0, 3}, {1, 2}, {3, 0},
+  {2, 1}, {1, 3}, {2, 2}, {3, 1},
+  {2, 3}, {3, 2}, {3, 3} };

diff --git a/av1/common/zigzag8.c b/av1/common/zigzag8.c
new file mode 100644
index 0000000..ba39ac0
--- /dev/null
+++ b/av1/common/zigzag8.c

@@ -0,0 +1,65 @@
+/* This file is generated by gen_zigzag8.m */
+
+/* clang-format off */
+
+#include "odintrin.h"
+
+OD_EXTERN const unsigned char OD_ZIGZAG8_DCT_DCT[48][2] = {
+  {4, 0}, {4, 1}, {5, 0}, {5, 1},
+  {6, 0}, {7, 0}, {6, 1}, {7, 1},
+  {0, 4}, {1, 4}, {0, 5}, {1, 5},
+  {0, 6}, {1, 6}, {0, 7}, {1, 7},
+  {2, 4}, {4, 2}, {3, 4}, {2, 5},
+  {4, 3}, {5, 2}, {4, 4}, {3, 5},
+  {5, 3}, {2, 6}, {4, 5}, {6, 2},
+  {5, 4}, {3, 6}, {2, 7}, {6, 3},
+  {5, 5}, {7, 2}, {4, 6}, {3, 7},
+  {6, 4}, {7, 3}, {4, 7}, {5, 6},
+  {6, 5}, {7, 4}, {5, 7}, {6, 6},
+  {7, 5}, {6, 7}, {7, 6}, {7, 7}
+  };
+
+OD_EXTERN const unsigned char OD_ZIGZAG8_ADST_DCT[48][2] = {
+  {4, 0}, {5, 0}, {4, 1}, {6, 0},
+  {5, 1}, {7, 0}, {6, 1}, {7, 1},
+  {0, 4}, {1, 4}, {0, 5}, {1, 5},
+  {0, 6}, {1, 6}, {0, 7}, {1, 7},
+  {4, 2}, {2, 4}, {5, 2}, {4, 3},
+  {3, 4}, {2, 5}, {5, 3}, {4, 4},
+  {6, 2}, {3, 5}, {5, 4}, {2, 6},
+  {4, 5}, {6, 3}, {7, 2}, {3, 6},
+  {2, 7}, {5, 5}, {6, 4}, {4, 6},
+  {7, 3}, {3, 7}, {5, 6}, {6, 5},
+  {4, 7}, {7, 4}, {5, 7}, {7, 5},
+  {6, 6}, {7, 6}, {6, 7}, {7, 7}
+  };
+
+OD_EXTERN const unsigned char OD_ZIGZAG8_DCT_ADST[48][2] = {
+  {4, 0}, {4, 1}, {5, 0}, {5, 1},
+  {6, 0}, {6, 1}, {7, 0}, {7, 1},
+  {0, 4}, {0, 5}, {1, 4}, {0, 6},
+  {1, 5}, {0, 7}, {1, 6}, {1, 7},
+  {2, 4}, {2, 5}, {3, 4}, {4, 2},
+  {2, 6}, {4, 3}, {3, 5}, {4, 4},
+  {2, 7}, {3, 6}, {5, 2}, {4, 5},
+  {5, 3}, {3, 7}, {5, 4}, {4, 6},
+  {6, 2}, {5, 5}, {4, 7}, {6, 3},
+  {6, 4}, {5, 6}, {7, 2}, {6, 5},
+  {7, 3}, {5, 7}, {7, 4}, {6, 6},
+  {7, 5}, {6, 7}, {7, 6}, {7, 7}
+  };
+
+OD_EXTERN const unsigned char OD_ZIGZAG8_ADST_ADST[48][2] = {
+  {4, 0}, {4, 1}, {5, 0}, {5, 1},
+  {6, 0}, {7, 0}, {6, 1}, {7, 1},
+  {0, 4}, {1, 4}, {0, 5}, {1, 5},
+  {0, 6}, {1, 6}, {0, 7}, {1, 7},
+  {2, 4}, {4, 2}, {3, 4}, {2, 5},
+  {4, 3}, {5, 2}, {4, 4}, {3, 5},
+  {5, 3}, {2, 6}, {4, 5}, {6, 2},
+  {5, 4}, {3, 6}, {2, 7}, {6, 3},
+  {5, 5}, {7, 2}, {4, 6}, {3, 7},
+  {6, 4}, {7, 3}, {4, 7}, {5, 6},
+  {6, 5}, {7, 4}, {5, 7}, {6, 6},
+  {7, 5}, {6, 7}, {7, 6}, {7, 7}
+  };

diff --git a/av1/decoder/decint.h b/av1/decoder/decint.h
new file mode 100644
index 0000000..99dbc43
--- /dev/null
+++ b/av1/decoder/decint.h

@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_decint_H)
+# define _decint_H (1)
+# include "av1/common/pvq_state.h"
+# include "aom_dsp/entdec.h"
+
+typedef struct daala_dec_ctx daala_dec_ctx;
+
+typedef struct daala_dec_ctx od_dec_ctx;
+
+
+struct daala_dec_ctx {
+  /* Stores context-adaptive CDFs for PVQ. */
+  od_state state;
+  /* Daala entropy decoder. */
+  od_ec_dec *ec;
+  /* Mode of quantization matrice : FLAT (0) or HVS (1) */
+  int qm;
+};
+
+#endif

diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 026dcbc..fb240d7 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c

@@ -57,6 +57,16 @@
 #define MAX_AV1_HEADER_SIZE 80
 #define ACCT_STR __func__
 
+#if CONFIG_PVQ
+#include "av1/decoder/pvq_decoder.h"
+#include "av1/encoder/encodemb.h"
+
+#include "aom_dsp/entdec.h"
+#include "av1/common/partition.h"
+#include "av1/decoder/decint.h"
+#include "av1/encoder/hybrid_fwd_txfm.h"
+#endif
+
 static struct aom_read_bit_buffer *init_read_bit_buffer(
     AV1Decoder *pbi, struct aom_read_bit_buffer *rb, const uint8_t *data,
     const uint8_t *data_end, uint8_t clear_data[MAX_AV1_HEADER_SIZE]);
@@ -299,6 +309,142 @@
   memset(dqcoeff, 0, (scan_line + 1) * sizeof(dqcoeff[0]));
 }
 
+#if CONFIG_PVQ
+static int av1_pvq_decode_helper(od_dec_ctx *dec, int16_t *ref_coeff,
+                                 int16_t *dqcoeff, int16_t *quant, int pli,
+                                 int bs, TX_TYPE tx_type, int xdec,
+                                 int ac_dc_coded) {
+  unsigned int flags;  // used for daala's stream analyzer.
+  int off;
+  const int is_keyframe = 0;
+  const int has_dc_skip = 1;
+  int quant_shift = bs == TX_32X32 ? 1 : 0;
+  // DC quantizer for PVQ
+  int pvq_dc_quant;
+  int lossless = (quant[0] == 0);
+  const int blk_size = tx_size_wide[bs];
+  int eob = 0;
+  int i;
+  // TODO(yushin) : To enable activity masking,
+  // int use_activity_masking = dec->use_activity_masking;
+  int use_activity_masking = 0;
+
+  DECLARE_ALIGNED(16, int16_t, dqcoeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+  DECLARE_ALIGNED(16, int16_t, ref_coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+
+  od_coeff ref_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX];
+  od_coeff out_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX];
+
+  od_raster_to_coding_order(ref_coeff_pvq, blk_size, tx_type, ref_coeff,
+                            blk_size);
+
+  if (lossless)
+    pvq_dc_quant = 1;
+  else {
+    // TODO(yushin): Enable this for activity masking,
+    // when pvq_qm_q4 is available in AOM.
+    // pvq_dc_quant = OD_MAXI(1, quant*
+    // dec->state.pvq_qm_q4[pli][od_qm_get_index(bs, 0)] >> 4);
+    pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift);
+  }
+
+  off = od_qm_offset(bs, xdec);
+
+  // copy int16 inputs to int32
+  for (i = 0; i < blk_size * blk_size; i++) ref_int32[i] = ref_coeff_pvq[i];
+
+  od_pvq_decode(dec, ref_int32, out_int32, (int)quant[1] >> quant_shift, pli,
+                bs, OD_PVQ_BETA[use_activity_masking][pli][bs],
+                OD_ROBUST_STREAM, is_keyframe, &flags, ac_dc_coded,
+                dec->state.qm + off, dec->state.qm_inv + off);
+
+  // copy int32 result back to int16
+  for (i = 0; i < blk_size * blk_size; i++) dqcoeff_pvq[i] = out_int32[i];
+
+  if (!has_dc_skip || dqcoeff_pvq[0]) {
+    dqcoeff_pvq[0] =
+        has_dc_skip + generic_decode(dec->ec, &dec->state.adapt.model_dc[pli],
+                                     -1, &dec->state.adapt.ex_dc[pli][bs][0], 2,
+                                     "dc:mag");
+    if (dqcoeff_pvq[0])
+      dqcoeff_pvq[0] *= od_ec_dec_bits(dec->ec, 1, "dc:sign") ? -1 : 1;
+  }
+  dqcoeff_pvq[0] = dqcoeff_pvq[0] * pvq_dc_quant + ref_coeff_pvq[0];
+
+  od_coding_order_to_raster(dqcoeff, blk_size, tx_type, dqcoeff_pvq, blk_size);
+
+  eob = blk_size * blk_size;
+
+  return eob;
+}
+
+static int av1_pvq_decode_helper2(MACROBLOCKD *const xd,
+                                  MB_MODE_INFO *const mbmi, int plane, int row,
+                                  int col, TX_SIZE tx_size, TX_TYPE tx_type) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  // transform block size in pixels
+  int tx_blk_size = tx_size_wide[tx_size];
+  int i, j;
+  tran_low_t *pvq_ref_coeff = pd->pvq_ref_coeff;
+  const int diff_stride = tx_blk_size;
+  int16_t *pred = pd->pred;
+  tran_low_t *const dqcoeff = pd->dqcoeff;
+  int ac_dc_coded;  // bit0: DC coded, bit1 : AC coded
+  uint8_t *dst;
+  int eob;
+
+  eob = 0;
+  dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
+
+  // decode ac/dc coded flag. bit0: DC coded, bit1 : AC coded
+  // NOTE : we don't use 5 symbols for luma here in aom codebase,
+  // since block partition is taken care of by aom.
+  // So, only AC/DC skip info is coded
+  ac_dc_coded = od_decode_cdf_adapt(
+      xd->daala_dec.ec,
+      xd->daala_dec.state.adapt.skip_cdf[2 * tx_size + (plane != 0)], 4,
+      xd->daala_dec.state.adapt.skip_increment, "skip");
+
+  if (ac_dc_coded) {
+    int xdec = pd->subsampling_x;
+    int seg_id = mbmi->segment_id;
+    int16_t *quant;
+    FWD_TXFM_PARAM fwd_txfm_param;
+    // ToDo(yaowu): correct this with optimal number from decoding process.
+    const int max_scan_line = tx_size_2d[tx_size];
+
+    for (j = 0; j < tx_blk_size; j++)
+      for (i = 0; i < tx_blk_size; i++) {
+        pred[diff_stride * j + i] = dst[pd->dst.stride * j + i];
+      }
+
+    fwd_txfm_param.tx_type = tx_type;
+    fwd_txfm_param.tx_size = tx_size;
+    fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
+    fwd_txfm_param.rd_transform = 0;
+    fwd_txfm_param.lossless = xd->lossless[seg_id];
+
+    fwd_txfm(pred, pvq_ref_coeff, diff_stride, &fwd_txfm_param);
+
+    quant = &pd->seg_dequant[seg_id][0];  // aom's quantizer
+
+    eob = av1_pvq_decode_helper(&xd->daala_dec, pvq_ref_coeff, dqcoeff, quant,
+                                plane, tx_size, tx_type, xdec, ac_dc_coded);
+
+    // Since av1 does not have separate inverse transform
+    // but also contains adding to predicted image,
+    // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
+    for (j = 0; j < tx_blk_size; j++)
+      for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
+
+    inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
+                            max_scan_line, eob);
+  }
+
+  return eob;
+}
+#endif
+
 static void predict_and_reconstruct_intra_block(AV1_COMMON *cm,
                                                 MACROBLOCKD *const xd,
 #if CONFIG_ANS
@@ -314,6 +460,10 @@
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   uint8_t *dst;
   int block_idx = (row << 1) + col;
+#if CONFIG_PVQ
+  (void)cm;
+  (void)r;
+#endif
   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
 
   if (mbmi->sb_type < BLOCK_8X8)
@@ -324,6 +474,7 @@
 
   if (!mbmi->skip) {
     TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+#if !CONFIG_PVQ
     const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
     int16_t max_scan_line = 0;
     const int eob =
@@ -335,6 +486,9 @@
     if (eob)
       inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
                               max_scan_line, eob);
+#else
+    av1_pvq_decode_helper2(xd, mbmi, plane, row, col, tx_size, tx_type);
+#endif
   }
 }
 
@@ -400,10 +554,19 @@
 #endif
                                    int segment_id, int plane, int row, int col,
                                    TX_SIZE tx_size) {
-  struct macroblockd_plane *const pd = &xd->plane[plane];
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   int block_idx = (row << 1) + col;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+#if CONFIG_PVQ
+  int eob;
+  (void)cm;
+  (void)r;
+  (void)segment_id;
+#else
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+#endif
+
+#if !CONFIG_PVQ
   const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
   int16_t max_scan_line = 0;
   const int eob =
@@ -416,6 +579,10 @@
     inverse_transform_block(xd, plane, tx_type, tx_size,
                             &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
                             pd->dst.stride, max_scan_line, eob);
+#else
+  eob = av1_pvq_decode_helper2(xd, &xd->mi[0]->mbmi, plane, row, col, tx_size,
+                               tx_type);
+#endif
   return eob;
 }
 #endif  // !CONFIG_VAR_TX || CONFIG_SUPER_TX
@@ -1507,6 +1674,11 @@
 #endif
                              n8x8_l2);
   subsize = subsize_lookup[partition][bsize];  // get_subsize(bsize, partition);
+
+#if CONFIG_PVQ
+  assert(partition < PARTITION_TYPES);
+  assert(subsize < BLOCK_SIZES);
+#endif
 #if CONFIG_SUPERTX
   if (!frame_is_intra_only(cm) && partition != PARTITION_NONE &&
       bsize <= MAX_SUPERTX_BLOCK_SIZE && !supertx_enabled && !xd->lossless[0]) {
@@ -1897,6 +2069,7 @@
 }
 #endif
 
+#if !CONFIG_PVQ
 static void read_coef_probs_common(av1_coeff_probs_model *coef_probs,
                                    aom_reader *r) {
   int i, j, k, l, m;
@@ -1921,6 +2094,7 @@
   for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
     read_coef_probs_common(fc->coef_probs[tx_size], r);
 }
+#endif
 
 static void setup_segmentation(AV1_COMMON *const cm,
                                struct aom_read_bit_buffer *rb) {
@@ -2767,6 +2941,18 @@
 }
 #endif  // CONFIG_EXT_TILE
 
+#if CONFIG_PVQ
+static void daala_dec_init(daala_dec_ctx *daala_dec, od_ec_dec *ec) {
+  daala_dec->ec = ec;
+  od_adapt_ctx_reset(&daala_dec->state.adapt, 0);
+
+  daala_dec->qm = OD_FLAT_QM;
+
+  od_init_qm(daala_dec->state.qm, daala_dec->state.qm_inv,
+             daala_dec->qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT);
+}
+#endif
+
 static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
                                    const uint8_t *data_end) {
   AV1_COMMON *const cm = &pbi->common;
@@ -2849,6 +3035,9 @@
               ? &cm->counts
               : NULL;
       av1_zero(td->dqcoeff);
+#if CONFIG_PVQ
+      av1_zero(td->pvq_ref_coeff);
+#endif
       av1_tile_init(&td->xd.tile, td->cm, tile_row, tile_col);
 #if !CONFIG_ANS
       setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
@@ -2864,7 +3053,14 @@
         td->bit_reader.accounting = NULL;
       }
 #endif
-      av1_init_macroblockd(cm, &td->xd, td->dqcoeff);
+      av1_init_macroblockd(cm, &td->xd,
+#if CONFIG_PVQ
+                           td->pvq_ref_coeff,
+#endif
+                           td->dqcoeff);
+#if CONFIG_PVQ
+      daala_dec_init(&td->xd.daala_dec, &td->bit_reader.ec);
+#endif
 #if CONFIG_PALETTE
       td->xd.plane[0].color_index_map = td->color_index_map[0];
       td->xd.plane[1].color_index_map = td->color_index_map[1];
@@ -3196,7 +3392,14 @@
                             &twd->bit_reader, pbi->decrypt_cb,
                             pbi->decrypt_state);
 #endif  // CONFIG_ANS
-        av1_init_macroblockd(cm, &twd->xd, twd->dqcoeff);
+        av1_init_macroblockd(cm, &twd->xd,
+#if CONFIG_PVQ
+                             twd->pvq_ref_coeff,
+#endif
+                             twd->dqcoeff);
+#if CONFIG_PVQ
+        daala_dec_init(&twd->xd.daala_dec, &twd->bit_reader.ec);
+#endif
 #if CONFIG_PALETTE
         twd->xd.plane[0].color_index_map = twd->color_index_map[0];
         twd->xd.plane[1].color_index_map = twd->color_index_map[1];
@@ -3657,28 +3860,28 @@
   switch (gmtype) {
     case GLOBAL_ZERO: break;
     case GLOBAL_AFFINE:
-      params->motion_params.wmmat[2].as_mv.row =
+      params->motion_params.wmmat[4] =
           (aom_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
            GM_ALPHA_DECODE_FACTOR);
-      params->motion_params.wmmat[2].as_mv.col =
+      params->motion_params.wmmat[5] =
           aom_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
               GM_ALPHA_DECODE_FACTOR +
           (1 << WARPEDMODEL_PREC_BITS);
     // fallthrough intended
     case GLOBAL_ROTZOOM:
-      params->motion_params.wmmat[1].as_mv.row =
+      params->motion_params.wmmat[2] =
           aom_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
           GM_ALPHA_DECODE_FACTOR;
-      params->motion_params.wmmat[1].as_mv.col =
+      params->motion_params.wmmat[3] =
           (aom_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
            GM_ALPHA_DECODE_FACTOR) +
           (1 << WARPEDMODEL_PREC_BITS);
     // fallthrough intended
     case GLOBAL_TRANSLATION:
-      params->motion_params.wmmat[0].as_mv.row =
+      params->motion_params.wmmat[0] =
           aom_read_primitive_symmetric(r, GM_ABS_TRANS_BITS) *
           GM_TRANS_DECODE_FACTOR;
-      params->motion_params.wmmat[0].as_mv.col =
+      params->motion_params.wmmat[1] =
           aom_read_primitive_symmetric(r, GM_ABS_TRANS_BITS) *
           GM_TRANS_DECODE_FACTOR;
       break;
@@ -3734,6 +3937,7 @@
 
   if (cm->tx_mode == TX_MODE_SELECT) read_tx_size_probs(fc, &r);
 
+#if !CONFIG_PVQ
   read_coef_probs(fc, cm->tx_mode, &r);
 
 #if CONFIG_VAR_TX
@@ -3745,8 +3949,8 @@
       av1_diff_update_prob(&r, &fc->rect_tx_prob[i], ACCT_STR);
   }
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
-#endif
-
+#endif  // CONFIG_VAR_TX
+#endif  // !CONFIG_PVQ
   for (k = 0; k < SKIP_CONTEXTS; ++k)
     av1_diff_update_prob(&r, &fc->skip_probs[k], ACCT_STR);
 

diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index f302ad2..a318c2f 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c

@@ -1191,10 +1191,10 @@
     case ZEROMV: {
 #if CONFIG_GLOBAL_MOTION
       mv[0].as_int =
-          cm->global_motion[ref_frame[0]].motion_params.wmmat[0].as_int;
+          gm_get_motion_vector(&cm->global_motion[ref_frame[0]]).as_int;
       if (is_compound)
         mv[1].as_int =
-            cm->global_motion[ref_frame[1]].motion_params.wmmat[0].as_int;
+            gm_get_motion_vector(&cm->global_motion[ref_frame[1]]).as_int;
 #else
       mv[0].as_int = 0;
       if (is_compound) mv[1].as_int = 0;

diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index c3099ba..7547656 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c

@@ -33,7 +33,10 @@
 
 #include "av1/decoder/decodeframe.h"
 #include "av1/decoder/decoder.h"
+
+#if !CONFIG_PVQ
 #include "av1/decoder/detokenize.h"
+#endif
 
 static void initialize_dec(void) {
   static volatile int init_done = 0;

diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index 262995a..f50da1c 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h

@@ -26,6 +26,12 @@
 #include "av1/common/accounting.h"
 #endif
 
+#if CONFIG_PVQ
+#include "aom_dsp/entdec.h"
+#include "av1/decoder/decint.h"
+#include "av1/encoder/encodemb.h"
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -37,6 +43,10 @@
   DECLARE_ALIGNED(16, MACROBLOCKD, xd);
   /* dqcoeff are shared by all the planes. So planes must be decoded serially */
   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
+#if CONFIG_PVQ
+  /* forward transformed predicted image, a reference for PVQ */
+  DECLARE_ALIGNED(16, tran_low_t, pvq_ref_coeff[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+#endif
 #if CONFIG_PALETTE
   DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
 #endif  // CONFIG_PALETTE
@@ -49,6 +59,10 @@
   DECLARE_ALIGNED(16, MACROBLOCKD, xd);
   /* dqcoeff are shared by all the planes. So planes must be decoded serially */
   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
+#if CONFIG_PVQ
+  /* forward transformed predicted image, a reference for PVQ */
+  DECLARE_ALIGNED(16, tran_low_t, pvq_ref_coeff[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+#endif
 #if CONFIG_PALETTE
   DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
 #endif  // CONFIG_PALETTE

diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 795b1b0..0f183f2 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c

@@ -9,9 +9,11 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
+#if !CONFIG_PVQ
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem.h"
-
+#endif
+#if !CONFIG_PVQ
 #if CONFIG_ANS
 #include "aom_dsp/ans.h"
 #endif  // CONFIG_ANS
@@ -356,3 +358,4 @@
   av1_set_contexts(xd, pd, tx_size, eob > 0, x, y);
   return eob;
 }
+#endif

diff --git a/av1/decoder/detokenize.h b/av1/decoder/detokenize.h
index 1eb1e6c..ec68665 100644
--- a/av1/decoder/detokenize.h
+++ b/av1/decoder/detokenize.h

@@ -9,6 +9,7 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
+#if !CONFIG_PVQ
 #ifndef AV1_DECODER_DETOKENIZE_H_
 #define AV1_DECODER_DETOKENIZE_H_
 
@@ -39,5 +40,5 @@
 #ifdef __cplusplus
 }  // extern "C"
 #endif
-
 #endif  // AV1_DECODER_DETOKENIZE_H_
+#endif

diff --git a/av1/decoder/generic_decoder.c b/av1/decoder/generic_decoder.c
new file mode 100644
index 0000000..86187fa
--- /dev/null
+++ b/av1/decoder/generic_decoder.c

@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "aom_dsp/entdec.h"
+#include "av1/common/generic_code.h"
+#include "av1/common/odintrin.h"
+#include "pvq_decoder.h"
+
+/** Decodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts
+ * the cdf accordingly.
+ *
+ * @param [in,out] enc   range encoder
+ * @param [in,out] cdf   CDF of the variable (Q15)
+ * @param [in]     n     number of values possible
+ * @param [in,out] count number of symbols encoded with that cdf so far
+ * @param [in]     rate  adaptation rate shift (smaller is faster)
+ * @return decoded variable
+ */
+int od_decode_cdf_adapt_q15_(od_ec_dec *ec, uint16_t *cdf, int n,
+ int *count, int rate OD_ACC_STR) {
+  int val;
+  int i;
+  if (*count == 0) {
+    int ft;
+    ft = cdf[n - 1];
+    for (i = 0; i < n; i++) {
+      cdf[i] = cdf[i]*32768/ft;
+    }
+  }
+  val = od_ec_decode_cdf_q15(ec, cdf, n);
+  od_cdf_adapt_q15(val, cdf, n, count, rate);
+  return val;
+}
+
+/** Decodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts
+ * the cdf accordingly.
+ *
+ * @param [in,out] enc   range encoder
+ * @param [in]     cdf   CDF of the variable (Q15)
+ * @param [in]     n     number of values possible
+ * @param [in]     increment adaptation speed (Q15)
+ *
+ * @retval decoded variable
+ */
+int od_decode_cdf_adapt_(od_ec_dec *ec, uint16_t *cdf, int n,
+ int increment OD_ACC_STR) {
+  int i;
+  int val;
+  val = od_ec_decode_cdf_unscaled(ec, cdf, n);
+  if (cdf[n-1] + increment > 32767) {
+    for (i = 0; i < n; i++) {
+      /* Second term ensures that the pdf is non-null */
+      cdf[i] = (cdf[i] >> 1) + i + 1;
+    }
+  }
+  for (i = val; i < n; i++) cdf[i] += increment;
+  return val;
+}
+
+/** Encodes a random variable using a "generic" model, assuming that the
+ * distribution is one-sided (zero and up), has a single mode, and decays
+ * exponentially past the model.
+ *
+ * @param [in,out] dec   range decoder
+ * @param [in,out] model generic probability model
+ * @param [in]     x     variable being encoded
+ * @param [in,out] ExQ16 expectation of x (adapted)
+ * @param [in]     integration integration period of ExQ16 (leaky average over
+ * 1<<integration samples)
+ *
+ * @retval decoded variable x
+ */
+int generic_decode_(od_ec_dec *dec, generic_encoder *model, int max,
+ int *ex_q16, int integration OD_ACC_STR) {
+  int lg_q1;
+  int shift;
+  int id;
+  uint16_t *cdf;
+  int xs;
+  int lsb;
+  int x;
+  int ms;
+  lsb = 0;
+  if (max == 0) return 0;
+  lg_q1 = log_ex(*ex_q16);
+  /* If expectation is too large, shift x to ensure that
+     all we have past xs=15 is the exponentially decaying tail
+     of the distribution. */
+  shift = OD_MAXI(0, (lg_q1 - 5) >> 1);
+  /* Choose the cdf to use: we have two per "octave" of ExQ16. */
+  id = OD_MINI(GENERIC_TABLES - 1, lg_q1);
+  cdf = model->cdf[id];
+  ms = (max + (1 << shift >> 1)) >> shift;
+  if (max == -1) xs = od_ec_decode_cdf_unscaled(dec, cdf, 16);
+  else xs = od_ec_decode_cdf_unscaled(dec, cdf, OD_MINI(ms + 1, 16));
+  if (xs == 15) {
+    int e;
+    unsigned decay;
+    /* Estimate decay based on the assumption that the distribution is close
+       to Laplacian for large values. We should probably have an adaptive
+       estimate instead. Note: The 2* is a kludge that's not fully understood
+       yet. */
+    OD_ASSERT(*ex_q16 < INT_MAX >> 1);
+    e = ((2**ex_q16 >> 8) + (1 << shift >> 1)) >> shift;
+    decay = OD_MAXI(2, OD_MINI(254, 256*e/(e + 256)));
+    xs += laplace_decode_special(dec, decay, (max == -1) ? -1 : ms - 15, acc_str);
+  }
+  if (shift != 0) {
+    int special;
+    /* Because of the rounding, there's only half the number of possibilities
+       for xs=0 */
+    special = xs == 0;
+    if (shift - special > 0) lsb = od_ec_dec_bits(dec, shift - special, acc_str);
+    lsb -= !special << (shift - 1);
+  }
+  x = (xs << shift) + lsb;
+  generic_model_update(model, ex_q16, x, xs, id, integration);
+  OD_LOG((OD_LOG_ENTROPY_CODER, OD_LOG_DEBUG,
+   "dec: %d %d %d %d %d %x", *ex_q16, x, shift, id, xs, dec->rng));
+  return x;
+}

diff --git a/av1/decoder/laplace_decoder.c b/av1/decoder/laplace_decoder.c
new file mode 100644
index 0000000..4c3def5
--- /dev/null
+++ b/av1/decoder/laplace_decoder.c

@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "aom_dsp/entdec.h"
+#include "av1/common/pvq.h"
+#include "pvq_decoder.h"
+
+#if OD_ACCOUNTING
+# define od_decode_pvq_split(ec, adapt, sum, ctx, str) od_decode_pvq_split_(ec, adapt, sum, ctx, str)
+#else
+# define od_decode_pvq_split(ec, adapt, sum, ctx, str) od_decode_pvq_split_(ec, adapt, sum, ctx)
+#endif
+
+static int od_decode_pvq_split_(od_ec_dec *ec, od_pvq_codeword_ctx *adapt,
+ int sum, int ctx OD_ACC_STR) {
+  int shift;
+  int count;
+  int msbs;
+  int fctx;
+  count = 0;
+  if (sum == 0) return 0;
+  shift = OD_MAXI(0, OD_ILOG(sum) - 3);
+  fctx = 7*ctx + (sum >> shift) - 1;
+  msbs = od_decode_cdf_adapt(ec, adapt->pvq_split_cdf[fctx],
+   (sum >> shift) + 1, adapt->pvq_split_increment, acc_str);
+  if (shift) count = od_ec_dec_bits(ec, shift, acc_str);
+  count += msbs << shift;
+  if (count > sum) {
+    count = sum;
+    ec->error = 1;
+  }
+  return count;
+}
+
+void od_decode_band_pvq_splits(od_ec_dec *ec, od_pvq_codeword_ctx *adapt,
+ od_coeff *y, int n, int k, int level) {
+  int mid;
+  int count_right;
+  if (n == 1) {
+    y[0] = k;
+  }
+  else if (k == 0) {
+    OD_CLEAR(y, n);
+  }
+  else if (k == 1 && n <= 16) {
+    int cdf_id;
+    int pos;
+    cdf_id = od_pvq_k1_ctx(n, level == 0);
+    OD_CLEAR(y, n);
+    pos = od_decode_cdf_adapt(ec, adapt->pvq_k1_cdf[cdf_id], n,
+     adapt->pvq_k1_increment, "pvq:k1");
+    y[pos] = 1;
+  }
+  else {
+    mid = n >> 1;
+    count_right = od_decode_pvq_split(ec, adapt, k, od_pvq_size_ctx(n),
+     "pvq:split");
+    od_decode_band_pvq_splits(ec, adapt, y, mid, k - count_right, level + 1);
+    od_decode_band_pvq_splits(ec, adapt, y + mid, n - mid, count_right,
+     level + 1);
+  }
+}
+
+/** Decodes the tail of a Laplace-distributed variable, i.e. it doesn't
+ * do anything special for the zero case.
+ *
+ * @param [dec] range decoder
+ * @param [decay] decay factor of the distribution, i.e. pdf ~= decay^x
+ * @param [max] maximum possible value of x (used to truncate the pdf)
+ *
+ * @retval decoded variable x
+ */
+int od_laplace_decode_special_(od_ec_dec *dec, unsigned decay, int max OD_ACC_STR) {
+  int pos;
+  int shift;
+  int xs;
+  int ms;
+  int sym;
+  const uint16_t *cdf;
+  shift = 0;
+  if (max == 0) return 0;
+  /* We don't want a large decay value because that would require too many
+     symbols. However, it's OK if the max is below 15. */
+  while (((max >> shift) >= 15 || max == -1) && decay > 235) {
+    decay = (decay*decay + 128) >> 8;
+    shift++;
+  }
+  decay = OD_MINI(decay, 254);
+  decay = OD_MAXI(decay, 2);
+  ms = max >> shift;
+  cdf = EXP_CDF_TABLE[(decay + 1) >> 1];
+  OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "decay = %d\n", decay));
+  xs = 0;
+  do {
+    sym = OD_MINI(xs, 15);
+    {
+      int i;
+      OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "%d %d %d %d", xs, shift, sym, max));
+      for (i = 0; i < 16; i++) {
+        OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "%d ", cdf[i]));
+      }
+      OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "\n"));
+    }
+    if (ms > 0 && ms < 15) {
+      /* Simple way of truncating the pdf when we have a bound. */
+      sym = od_ec_decode_cdf_unscaled(dec, cdf, ms + 1);
+    }
+    else sym = od_ec_decode_cdf_q15(dec, cdf, 16);
+    xs += sym;
+    ms -= 15;
+  }
+  while (sym >= 15 && ms != 0);
+  if (shift) pos = (xs << shift) + od_ec_dec_bits(dec, shift, acc_str);
+  else pos = xs;
+  OD_ASSERT(pos >> shift <= max >> shift || max == -1);
+  if (max != -1 && pos > max) {
+    pos = max;
+    dec->error = 1;
+  }
+  OD_ASSERT(pos <= max || max == -1);
+  return pos;
+}
+
+/** Decodes a Laplace-distributed variable for use in PVQ.
+ *
+ * @param [in,out] dec  range decoder
+ * @param [in]     ExQ8 expectation of the absolute value of x
+ * @param [in]     K    maximum value of |x|
+ *
+ * @retval decoded variable (including sign)
+ */
+int od_laplace_decode_(od_ec_dec *dec, unsigned ex_q8, int k OD_ACC_STR) {
+  int j;
+  int shift;
+  uint16_t cdf[16];
+  int sym;
+  int lsb;
+  int decay;
+  int offset;
+  lsb = 0;
+  /* Shift down x if expectation is too high. */
+  shift = OD_ILOG(ex_q8) - 11;
+  if (shift < 0) shift = 0;
+  /* Apply the shift with rounding to Ex, K and xs. */
+  ex_q8 = (ex_q8 + (1 << shift >> 1)) >> shift;
+  k = (k + (1 << shift >> 1)) >> shift;
+  decay = OD_MINI(254, OD_DIVU(256*ex_q8, (ex_q8 + 256)));
+  offset = LAPLACE_OFFSET[(decay + 1) >> 1];
+  for (j = 0; j < 16; j++) {
+    cdf[j] = EXP_CDF_TABLE[(decay + 1) >> 1][j] - offset;
+  }
+  /* Simple way of truncating the pdf when we have a bound */
+  if (k == 0) sym = 0;
+  else sym = od_ec_decode_cdf_unscaled(dec, cdf, OD_MINI(k + 1, 16));
+  if (shift) {
+    int special;
+    /* Because of the rounding, there's only half the number of possibilities
+       for xs=0 */
+    special = (sym == 0);
+    if (shift - special > 0) lsb = od_ec_dec_bits(dec, shift - special, acc_str);
+    lsb -= (!special << (shift - 1));
+  }
+  /* Handle the exponentially-decaying tail of the distribution */
+  if (sym == 15) sym += laplace_decode_special(dec, decay, k - 15, acc_str);
+  return (sym << shift) + lsb;
+}
+
+#if OD_ACCOUNTING
+# define laplace_decode_vector_delta(dec, y, n, k, curr, means, str) laplace_decode_vector_delta_(dec, y, n, k, curr, means, str)
+#else
+# define laplace_decode_vector_delta(dec, y, n, k, curr, means, str) laplace_decode_vector_delta_(dec, y, n, k, curr, means)
+#endif
+
+static void laplace_decode_vector_delta_(od_ec_dec *dec, od_coeff *y, int n, int k,
+                                        int32_t *curr, const int32_t *means
+                                        OD_ACC_STR) {
+  int i;
+  int prev;
+  int sum_ex;
+  int sum_c;
+  int coef;
+  int pos;
+  int k0;
+  int sign;
+  int first;
+  int k_left;
+  prev = 0;
+  sum_ex = 0;
+  sum_c = 0;
+  coef = 256*means[OD_ADAPT_COUNT_Q8]/
+   (1 + means[OD_ADAPT_COUNT_EX_Q8]);
+  pos = 0;
+  sign = 0;
+  first = 1;
+  k_left = k;
+  for (i = 0; i < n; i++) y[i] = 0;
+  k0 = k_left;
+  coef = OD_MAXI(coef, 1);
+  for (i = 0; i < k0; i++) {
+    int count;
+    if (first) {
+      int decay;
+      int ex = coef*(n - prev)/k_left;
+      if (ex > 65280) decay = 255;
+      else {
+        decay = OD_MINI(255,
+         (int)((256*ex/(ex + 256) + (ex>>5)*ex/((n + 1)*(n - 1)*(n - 1)))));
+      }
+      /*Update mean position.*/
+      count = laplace_decode_special(dec, decay, n - 1, acc_str);
+      first = 0;
+    }
+    else count = laplace_decode(dec, coef*(n - prev)/k_left, n - prev - 1, acc_str);
+    sum_ex += 256*(n - prev);
+    sum_c += count*k_left;
+    pos += count;
+    OD_ASSERT(pos < n);
+    if (y[pos] == 0)
+      sign = od_ec_dec_bits(dec, 1, acc_str);
+    y[pos] += sign ? -1 : 1;
+    prev = pos;
+    k_left--;
+    if (k_left == 0) break;
+  }
+  if (k > 0) {
+    curr[OD_ADAPT_COUNT_Q8] = 256*sum_c;
+    curr[OD_ADAPT_COUNT_EX_Q8] = sum_ex;
+  }
+  else {
+    curr[OD_ADAPT_COUNT_Q8] = -1;
+    curr[OD_ADAPT_COUNT_EX_Q8] = 0;
+  }
+  curr[OD_ADAPT_K_Q8] = 0;
+  curr[OD_ADAPT_SUM_EX_Q8] = 0;
+}
+
+/** Decodes a vector of integers assumed to come from rounding a sequence of
+ * Laplace-distributed real values in decreasing order of variance.
+ *
+ * @param [in,out] dec range decoder
+ * @param [in]     y     decoded vector
+ * @param [in]     N     dimension of the vector
+ * @param [in]     K     sum of the absolute value of components of y
+ * @param [out]    curr  Adaptation context output, may alias means.
+ * @param [in]     means Adaptation context input.
+ */
+void od_laplace_decode_vector_(od_ec_dec *dec, od_coeff *y, int n, int k,
+                           int32_t *curr, const int32_t *means OD_ACC_STR) {
+  int i;
+  int sum_ex;
+  int kn;
+  int exp_q8;
+  int mean_k_q8;
+  int mean_sum_ex_q8;
+  int ran_delta;
+  ran_delta = 0;
+  if (k <= 1) {
+    laplace_decode_vector_delta(dec, y, n, k, curr, means, acc_str);
+    return;
+  }
+  if (k == 0) {
+    curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE;
+    curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE;
+    curr[OD_ADAPT_K_Q8] = 0;
+    curr[OD_ADAPT_SUM_EX_Q8] = 0;
+    for (i = 0; i < n; i++) y[i] = 0;
+    return;
+  }
+  sum_ex = 0;
+  kn = k;
+  /* Estimates the factor relating pulses_left and positions_left to E(|x|).*/
+  mean_k_q8 = means[OD_ADAPT_K_Q8];
+  mean_sum_ex_q8 = means[OD_ADAPT_SUM_EX_Q8];
+  if (mean_k_q8 < 1 << 23) exp_q8 = 256*mean_k_q8/(1 + mean_sum_ex_q8);
+  else exp_q8 = mean_k_q8/(1 + (mean_sum_ex_q8 >> 8));
+  for (i = 0; i < n; i++) {
+    int ex;
+    int x;
+    if (kn == 0) break;
+    if (kn <= 1 && i != n - 1) {
+      laplace_decode_vector_delta(dec, y + i, n - i, kn, curr, means, acc_str);
+      ran_delta = 1;
+      i = n;
+      break;
+    }
+    /* Expected value of x (round-to-nearest) is
+       expQ8*pulses_left/positions_left. */
+    ex = (2*exp_q8*kn + (n - i))/(2*(n - i));
+    if (ex > kn*256) ex = kn*256;
+    sum_ex += (2*256*kn + (n - i))/(2*(n - i));
+    /* No need to encode the magnitude for the last bin. */
+    if (i != n - 1) x = laplace_decode(dec, ex, kn, acc_str);
+    else x = kn;
+    if (x != 0) {
+      if (od_ec_dec_bits(dec, 1, acc_str)) x = -x;
+    }
+    y[i] = x;
+    kn -= abs(x);
+  }
+  /* Adapting the estimates for expQ8. */
+  if (!ran_delta) {
+    curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE;
+    curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE;
+  }
+  curr[OD_ADAPT_K_Q8] = k - kn;
+  curr[OD_ADAPT_SUM_EX_Q8] = sum_ex;
+  for (; i < n; i++) y[i] = 0;
+}

diff --git a/av1/decoder/pvq_decoder.c b/av1/decoder/pvq_decoder.c
new file mode 100644
index 0000000..1cc75f8
--- /dev/null
+++ b/av1/decoder/pvq_decoder.c

@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "./aom_config.h"
+#include "aom_dsp/entcode.h"
+#include "aom_dsp/entdec.h"
+#include "av1/common/odintrin.h"
+#include "av1/common/partition.h"
+#include "av1/common/pvq_state.h"
+#include "av1/decoder/decint.h"
+#include "av1/decoder/pvq_decoder.h"
+
+static void od_decode_pvq_codeword(od_ec_dec *ec, od_pvq_codeword_ctx *ctx,
+ od_coeff *y, int n, int k) {
+  int i;
+  od_decode_band_pvq_splits(ec, ctx, y, n, k, 0);
+  for (i = 0; i < n; i++) {
+    if (y[i] && od_ec_dec_bits(ec, 1, "pvq:sign")) y[i] = -y[i];
+  }
+}
+
+/** Inverse of neg_interleave; decodes the interleaved gain.
+ *
+ * @param [in]      x      quantized/interleaved gain to decode
+ * @param [in]      ref    quantized gain of the reference
+ * @return                 original quantized gain value
+ */
+static int neg_deinterleave(int x, int ref) {
+  if (x < 2*ref-1) {
+    if (x & 1) return ref - 1 - (x >> 1);
+    else return ref + (x >> 1);
+  }
+  else return x+1;
+}
+
+/** Synthesizes one parition of coefficient values from a PVQ-encoded
+ * vector.
+ *
+ * @param [out]     xcoeff  output coefficient partition (x in math doc)
+ * @param [in]      ypulse  PVQ-encoded values (y in math doc); in the noref
+ *                          case, this vector has n entries, in the
+ *                          reference case it contains n-1 entries
+ *                          (the m-th entry is not included)
+ * @param [in]      ref     reference vector (prediction)
+ * @param [in]      n       number of elements in this partition
+ * @param [in]      gr      gain of the reference vector (prediction)
+ * @param [in]      noref   indicates presence or lack of prediction
+ * @param [in]      g       decoded quantized vector gain
+ * @param [in]      theta   decoded theta (prediction error)
+ * @param [in]      qm      QM with magnitude compensation
+ * @param [in]      qm_inv  Inverse of QM with magnitude compensation
+ */
+static void pvq_synthesis(od_coeff *xcoeff, od_coeff *ypulse, od_val16 *r16,
+ int n, od_val32 gr, int noref, od_val32 g, od_val32 theta, const int16_t *qm_inv,
+ int shift) {
+  int s;
+  int m;
+  /* Sign of the Householder reflection vector */
+  s = 0;
+  /* Direction of the Householder reflection vector */
+  m = noref ? 0 : od_compute_householder(r16, n, gr, &s, shift);
+  od_pvq_synthesis_partial(xcoeff, ypulse, r16, n, noref, g, theta, m, s,
+   qm_inv);
+}
+
+typedef struct {
+  od_coeff *ref;
+  int nb_coeffs;
+  int allow_flip;
+} cfl_ctx;
+
+/** Decodes a single vector of integers (eg, a partition within a
+ *  coefficient block) encoded using PVQ
+ *
+ * @param [in,out] ec      range encoder
+ * @param [in]     q0      scale/quantizer
+ * @param [in]     n       number of coefficients in partition
+ * @param [in,out] model   entropy decoder state
+ * @param [in,out] adapt   adaptation context
+ * @param [in,out] exg     ExQ16 expectation of decoded gain value
+ * @param [in,out] ext     ExQ16 expectation of decoded theta value
+ * @param [in]     ref     'reference' (prediction) vector
+ * @param [out]    out     decoded partition
+ * @param [out]    noref   boolean indicating absence of reference
+ * @param [in]     beta    per-band activity masking beta param
+ * @param [in]     robust  stream is robust to error in the reference
+ * @param [in]     is_keyframe whether we're encoding a keyframe
+ * @param [in]     pli     plane index
+ * @param [in]     cdf_ctx selects which cdf context to use
+ * @param [in,out] skip_rest whether to skip further bands in each direction
+ * @param [in]     band    index of the band being decoded
+ * @param [in]     band    index of the band being decoded
+ * @param [out]    skip    skip flag with range [0,1]
+ * @param [in]     qm      QM with magnitude compensation
+ * @param [in]     qm_inv  Inverse of QM with magnitude compensation
+ */
+static void pvq_decode_partition(od_ec_dec *ec,
+                                 int q0,
+                                 int n,
+                                 generic_encoder model[3],
+                                 od_adapt_ctx *adapt,
+                                 int *exg,
+                                 int *ext,
+                                 od_coeff *ref,
+                                 od_coeff *out,
+                                 int *noref,
+                                 od_val16 beta,
+                                 int robust,
+                                 int is_keyframe,
+                                 int pli,
+                                 int cdf_ctx,
+                                 cfl_ctx *cfl,
+                                 int has_skip,
+                                 int *skip_rest,
+                                 int band,
+                                 int *skip,
+                                 const int16_t *qm,
+                                 const int16_t *qm_inv) {
+  int k;
+  od_val32 qcg;
+  int max_theta;
+  int itheta;
+  od_val32 theta;
+  od_val32 gr;
+  od_val32 gain_offset;
+  od_coeff y[MAXN];
+  int qg;
+  int nodesync;
+  int id;
+  int i;
+  od_val16 ref16[MAXN];
+  int rshift;
+  theta = 0;
+  gr = 0;
+  gain_offset = 0;
+  /* We always use the robust bitstream for keyframes to avoid having
+     PVQ and entropy decoding depending on each other, hurting parallelism. */
+  nodesync = robust || is_keyframe;
+  /* Skip is per-direction. For band=0, we can use any of the flags. */
+  if (skip_rest[(band + 2) % 3]) {
+    qg = 0;
+    if (is_keyframe) {
+      itheta = -1;
+      *noref = 1;
+    }
+    else {
+      itheta = 0;
+      *noref = 0;
+    }
+  }
+  else {
+    /* Jointly decode gain, itheta and noref for small values. Then we handle
+       larger gain. We need to wait for itheta because in the !nodesync case
+       it depends on max_theta, which depends on the gain. */
+    id = od_decode_cdf_adapt(ec, &adapt->pvq.pvq_gaintheta_cdf[cdf_ctx][0],
+     8 + 7*has_skip, adapt->pvq.pvq_gaintheta_increment,
+     "pvq:gaintheta");
+    if (!is_keyframe && id >= 10) id++;
+    if (is_keyframe && id >= 8) id++;
+    if (id >= 8) {
+      id -= 8;
+      skip_rest[0] = skip_rest[1] = skip_rest[2] = 1;
+    }
+    qg = id & 1;
+    itheta = (id >> 1) - 1;
+    *noref = (itheta == -1);
+  }
+  /* The CfL flip bit is only decoded on the first band that has noref=0. */
+  if (cfl->allow_flip && !*noref) {
+    int flip;
+    flip = od_ec_dec_bits(ec, 1, "cfl:flip");
+    if (flip) {
+      for (i = 0; i < cfl->nb_coeffs; i++) cfl->ref[i] = -cfl->ref[i];
+    }
+    cfl->allow_flip = 0;
+  }
+  if (qg > 0) {
+    int tmp;
+    tmp = *exg;
+    qg = 1 + generic_decode(ec, &model[!*noref], -1, &tmp, 2, "pvq:gain");
+    OD_IIR_DIADIC(*exg, qg << 16, 2);
+  }
+  *skip = 0;
+#if defined(OD_FLOAT_PVQ)
+  rshift = 0;
+#else
+  /* Shift needed to make the reference fit in 15 bits, so that the Householder
+     vector can fit in 16 bits. */
+  rshift = OD_MAXI(0, od_vector_log_mag(ref, n) - 14);
+#endif
+  for (i = 0; i < n; i++) {
+#if defined(OD_FLOAT_PVQ)
+    ref16[i] = ref[i]*(double)qm[i]*OD_QM_SCALE_1;
+#else
+    ref16[i] = OD_SHR_ROUND(ref[i]*qm[i], OD_QM_SHIFT + rshift);
+#endif
+  }
+  if(!*noref){
+    /* we have a reference; compute its gain */
+    od_val32 cgr;
+    int icgr;
+    int cfl_enabled;
+    cfl_enabled = pli != 0 && is_keyframe && !OD_DISABLE_CFL;
+    cgr = od_pvq_compute_gain(ref16, n, q0, &gr, beta, rshift);
+    if (cfl_enabled) cgr = OD_CGAIN_SCALE;
+#if defined(OD_FLOAT_PVQ)
+    icgr = (int)floor(.5 + cgr);
+#else
+    icgr = OD_SHR_ROUND(cgr, OD_CGAIN_SHIFT);
+#endif
+    /* quantized gain is interleave encoded when there's a reference;
+       deinterleave it now */
+    if (is_keyframe) qg = neg_deinterleave(qg, icgr);
+    else {
+      qg = neg_deinterleave(qg, icgr + 1) - 1;
+      if (qg == 0) *skip = (icgr ? OD_PVQ_SKIP_ZERO : OD_PVQ_SKIP_COPY);
+    }
+    if (qg == icgr && itheta == 0 && !cfl_enabled) *skip = OD_PVQ_SKIP_COPY;
+    gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT);
+    qcg = OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset;
+    /* read and decode first-stage PVQ error theta */
+    max_theta = od_pvq_compute_max_theta(qcg, beta);
+    if (itheta > 1 && (nodesync || max_theta > 3)) {
+      int tmp;
+      tmp = *ext;
+      itheta = 2 + generic_decode(ec, &model[2], nodesync ? -1 : max_theta - 3,
+       &tmp, 2, "pvq:theta");
+      OD_IIR_DIADIC(*ext, itheta << 16, 2);
+    }
+    theta = od_pvq_compute_theta(itheta, max_theta);
+  }
+  else{
+    itheta = 0;
+    if (!is_keyframe) qg++;
+    qcg = OD_SHL(qg, OD_CGAIN_SHIFT);
+    if (qg == 0) *skip = OD_PVQ_SKIP_ZERO;
+  }
+
+  k = od_pvq_compute_k(qcg, itheta, theta, *noref, n, beta, nodesync);
+  if (k != 0) {
+    /* when noref==0, y is actually size n-1 */
+    od_decode_pvq_codeword(ec, &adapt->pvq.pvq_codeword_ctx, y, n - !*noref,
+     k);
+  }
+  else {
+    OD_CLEAR(y, n);
+  }
+  if (*skip) {
+    if (*skip == OD_PVQ_SKIP_COPY) OD_COPY(out, ref, n);
+    else OD_CLEAR(out, n);
+  }
+  else {
+    od_val32 g;
+    g = od_gain_expand(qcg, q0, beta);
+    pvq_synthesis(out, y, ref16, n, gr, *noref, g, theta, qm_inv, rshift);
+  }
+  *skip = !!*skip;
+}
+
+/** Decodes a coefficient block (except for DC) encoded using PVQ
+ *
+ * @param [in,out] dec     daala decoder context
+ * @param [in]     ref     'reference' (prediction) vector
+ * @param [out]    out     decoded partition
+ * @param [in]     q0      quantizer
+ * @param [in]     pli     plane index
+ * @param [in]     bs      log of the block size minus two
+ * @param [in]     beta    per-band activity masking beta param
+ * @param [in]     robust  stream is robust to error in the reference
+ * @param [in]     is_keyframe whether we're encoding a keyframe
+ * @param [out]    flags   bitmask of the per band skip and noref flags
+ * @param [in]     block_skip skip flag for the block (range 0-3)
+ * @param [in]     qm      QM with magnitude compensation
+ * @param [in]     qm_inv  Inverse of QM with magnitude compensation
+ */
+void od_pvq_decode(daala_dec_ctx *dec,
+                   od_coeff *ref,
+                   od_coeff *out,
+                   int q0,
+                   int pli,
+                   int bs,
+                   const od_val16 *beta,
+                   int robust,
+                   int is_keyframe,
+                   unsigned int *flags,
+                   int block_skip,
+                   const int16_t *qm,
+                   const int16_t *qm_inv){
+
+  int noref[PVQ_MAX_PARTITIONS];
+  int skip[PVQ_MAX_PARTITIONS];
+  int *exg;
+  int *ext;
+  int nb_bands;
+  int i;
+  const int *off;
+  int size[PVQ_MAX_PARTITIONS];
+  generic_encoder *model;
+  int skip_rest[3] = {0};
+  cfl_ctx cfl;
+  /* const unsigned char *pvq_qm; */
+  /*Default to skip=1 and noref=0 for all bands.*/
+  for (i = 0; i < PVQ_MAX_PARTITIONS; i++) {
+    noref[i] = 0;
+    skip[i] = 1;
+  }
+  /* TODO(yushin): Enable this for activity masking,
+     when pvq_qm_q4 is available in AOM. */
+  /*pvq_qm = &dec->state.pvq_qm_q4[pli][0];*/
+  exg = &dec->state.adapt.pvq.pvq_exg[pli][bs][0];
+  ext = dec->state.adapt.pvq.pvq_ext + bs*PVQ_MAX_PARTITIONS;
+  model = dec->state.adapt.pvq.pvq_param_model;
+  nb_bands = OD_BAND_OFFSETS[bs][0];
+  off = &OD_BAND_OFFSETS[bs][1];
+  OD_ASSERT(block_skip < 4);
+  out[0] = block_skip & 1;
+  if (!(block_skip >> 1)) {
+    if (is_keyframe) for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = 0;
+    else for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = ref[i];
+  }
+  else {
+    for (i = 0; i < nb_bands; i++) size[i] = off[i+1] - off[i];
+    cfl.ref = ref;
+    cfl.nb_coeffs = off[nb_bands];
+    cfl.allow_flip = pli != 0 && is_keyframe;
+    for (i = 0; i < nb_bands; i++) {
+      int q;
+      /* TODO(yushin): Enable this for activity masking,
+         when pvq_qm_q4 is available in AOM. */
+      /*q = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);*/
+      q = OD_MAXI(1, q0);
+      pvq_decode_partition(dec->ec, q, size[i],
+       model, &dec->state.adapt, exg + i, ext + i, ref + off[i], out + off[i],
+       &noref[i], beta[i], robust, is_keyframe, pli,
+       (pli != 0)*OD_TXSIZES*PVQ_MAX_PARTITIONS + bs*PVQ_MAX_PARTITIONS + i,
+       &cfl, i == 0 && (i < nb_bands - 1), skip_rest, i, &skip[i],
+       qm + off[i], qm_inv + off[i]);
+      if (i == 0 && !skip_rest[0] && bs > 0) {
+        int skip_dir;
+        int j;
+        skip_dir = od_decode_cdf_adapt(dec->ec,
+         &dec->state.adapt.pvq.pvq_skip_dir_cdf[(pli != 0) + 2*(bs - 1)][0], 7,
+         dec->state.adapt.pvq.pvq_skip_dir_increment, "pvq:skiprest");
+        for (j = 0; j < 3; j++) skip_rest[j] = !!(skip_dir & (1 << j));
+      }
+    }
+  }
+  *flags = 0;
+  for (i = nb_bands - 1; i >= 0; i--) {
+    *flags <<= 1;
+    *flags |= noref[i]&1;
+    *flags <<= 1;
+    *flags |= skip[i]&1;
+  }
+}

diff --git a/av1/decoder/pvq_decoder.h b/av1/decoder/pvq_decoder.h
new file mode 100644
index 0000000..d749040
--- /dev/null
+++ b/av1/decoder/pvq_decoder.h

@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_pvq_decoder_H)
+# define _pvq_decoder_H (1)
+# include "aom_dsp/entdec.h"
+# include "av1/common/pvq.h"
+# include "av1/decoder/decint.h"
+
+void od_decode_band_pvq_splits(od_ec_dec *ec, od_pvq_codeword_ctx *adapt,
+ od_coeff *y, int n, int k, int level);
+
+#if OD_ACCOUNTING
+# define laplace_decode_special(dec, decay, max, str) od_laplace_decode_special_(dec, decay, max, str)
+# define laplace_decode(dec, ex_q8, k, str) od_laplace_decode_(dec, ex_q8, k, str)
+#define laplace_decode_vector(dec, y, n, k, curr, means, str) od_laplace_decode_vector_(dec, y, n, k, curr, means, str)
+#else
+# define laplace_decode_special(dec, decay, max, str) od_laplace_decode_special_(dec, decay, max)
+# define laplace_decode(dec, ex_q8, k, str) od_laplace_decode_(dec, ex_q8, k)
+#define laplace_decode_vector(dec, y, n, k, curr, means, str) od_laplace_decode_vector_(dec, y, n, k, curr, means)
+#endif
+
+int od_laplace_decode_special_(od_ec_dec *dec, unsigned decay, int max OD_ACC_STR);
+int od_laplace_decode_(od_ec_dec *dec, unsigned ex_q8, int k OD_ACC_STR);
+void od_laplace_decode_vector_(od_ec_dec *dec, od_coeff *y, int n, int k,
+                                  int32_t *curr, const int32_t *means
+                                  OD_ACC_STR);
+
+
+void od_pvq_decode(daala_dec_ctx *dec, od_coeff *ref, od_coeff *out, int q0,
+ int pli, int bs, const od_val16 *beta, int robust, int is_keyframe,
+ unsigned int *flags, int block_skip, const int16_t *qm,
+ const int16_t *qm_inv);
+
+#endif

diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index e631989..3b66fa7 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c

@@ -49,6 +49,9 @@
 #include "av1/encoder/segmentation.h"
 #include "av1/encoder/subexp.h"
 #include "av1/encoder/tokenize.h"
+#if CONFIG_PVQ
+#include "av1/encoder/pvq_encoder.h"
+#endif
 
 static struct av1_token intra_mode_encodings[INTRA_MODES];
 static struct av1_token switchable_interp_encodings[SWITCHABLE_FILTERS];
@@ -688,7 +691,7 @@
   *tp = p;
 }
 #endif  // CONFIG_PALETTE
-
+#if !CONFIG_PVQ
 #if CONFIG_SUPERTX
 static void update_supertx_probs(AV1_COMMON *cm, int probwt, aom_writer *w) {
   const int savings_thresh = av1_cost_one(GROUP_DIFF_UPDATE_PROB) -
@@ -821,7 +824,7 @@
 
   *tp = p;
 }
-
+#endif  // !CONFIG_PVG
 #if CONFIG_VAR_TX
 static void pack_txb_tokens(aom_writer *w, const TOKENEXTRA **tp,
                             const TOKENEXTRA *const tok_end, MACROBLOCKD *xd,
@@ -1727,6 +1730,20 @@
 }
 #endif
 
+#if CONFIG_PVQ
+PVQ_INFO *get_pvq_block(PVQ_QUEUE *pvq_q) {
+  PVQ_INFO *pvq;
+
+  assert(pvq_q->curr_pos <= pvq_q->last_pos);
+  assert(pvq_q->curr_pos < pvq_q->buf_len);
+
+  pvq = pvq_q->buf + pvq_q->curr_pos;
+  ++pvq_q->curr_pos;
+
+  return pvq;
+}
+#endif
+
 static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
                           aom_writer *w, const TOKENEXTRA **tok,
                           const TOKENEXTRA *const tok_end,
@@ -1747,7 +1764,13 @@
   (void)tok_end;
   (void)plane;
 #endif  // !CONFIG_RANS
-
+#if CONFIG_PVQ
+  MB_MODE_INFO *mbmi;
+  BLOCK_SIZE bsize;
+  od_adapt_ctx *adapt;
+  (void)tok;
+  (void)tok_end;
+#endif
   xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
   m = xd->mi[0];
 
@@ -1759,6 +1782,12 @@
   cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
 
   set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
+#if CONFIG_PVQ
+  mbmi = &m->mbmi;
+  bsize = mbmi->sb_type;
+  adapt = &cpi->td.mb.daala_enc.state.adapt;
+#endif
+
   if (frame_is_intra_only(cm)) {
     write_mb_modes_kf(cm, xd, xd->mi, w);
   } else {
@@ -1818,7 +1847,7 @@
     }
   }
 #endif  // CONFIG_PALETTE
-
+#if !CONFIG_PVQ
 #if CONFIG_SUPERTX
   if (supertx_enabled) return;
 #endif  // CONFIG_SUPERTX
@@ -1898,11 +1927,109 @@
 
 #if CONFIG_RD_DEBUG
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    if (m->mbmi.txb_coeff_cost[plane] != txb_coeff_cost[plane]) {
+    if (m->mbmi.rd_stats.txb_coeff_cost[plane] != txb_coeff_cost[plane]) {
       dump_mode_info(m);
       assert(0);
     }
   }
+#endif  // CONFIG_RD_DEBUG
+#else
+  // PVQ writes its tokens (i.e. symbols) here.
+  if (!m->mbmi.skip) {
+    for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+      PVQ_INFO *pvq;
+      TX_SIZE tx_size =
+          plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) : m->mbmi.tx_size;
+      int idx, idy;
+      const struct macroblockd_plane *const pd = &xd->plane[plane];
+      int num_4x4_w;
+      int num_4x4_h;
+      int max_blocks_wide;
+      int max_blocks_high;
+      int step = (1 << tx_size);
+      const int step_xy = 1 << (tx_size << 1);
+      int block = 0;
+
+      if (tx_size == TX_4X4 && bsize <= BLOCK_8X8) {
+        num_4x4_w = 2 >> xd->plane[plane].subsampling_x;
+        num_4x4_h = 2 >> xd->plane[plane].subsampling_y;
+      } else {
+        num_4x4_w =
+            num_4x4_blocks_wide_lookup[bsize] >> xd->plane[plane].subsampling_x;
+        num_4x4_h =
+            num_4x4_blocks_high_lookup[bsize] >> xd->plane[plane].subsampling_y;
+      }
+      // TODO: Do we need below for 4x4,4x8,8x4 cases as well?
+      max_blocks_wide =
+          num_4x4_w + (xd->mb_to_right_edge >= 0
+                           ? 0
+                           : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+      max_blocks_high =
+          num_4x4_h + (xd->mb_to_bottom_edge >= 0
+                           ? 0
+                           : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+      // TODO(yushin) Try to use av1_foreach_transformed_block_in_plane().
+      // Logic like the mb_to_right_edge/mb_to_bottom_edge stuff should
+      // really be centralized in one place.
+
+      for (idy = 0; idy < max_blocks_high; idy += step) {
+        for (idx = 0; idx < max_blocks_wide; idx += step) {
+          const int is_keyframe = 0;
+          const int encode_flip = 0;
+          const int flip = 0;
+          const int robust = 1;
+          int i;
+          const int has_dc_skip = 1;
+          int *exg = &adapt->pvq.pvq_exg[plane][tx_size][0];
+          int *ext = adapt->pvq.pvq_ext + tx_size * PVQ_MAX_PARTITIONS;
+          generic_encoder *model = adapt->pvq.pvq_param_model;
+
+          pvq = get_pvq_block(cpi->td.mb.pvq_q);
+
+          // encode block skip info
+          od_encode_cdf_adapt(&w->ec, pvq->ac_dc_coded,
+                              adapt->skip_cdf[2 * tx_size + (plane != 0)], 4,
+                              adapt->skip_increment);
+
+          // AC coeffs coded?
+          if (pvq->ac_dc_coded & 0x02) {
+            assert(pvq->bs <= tx_size);
+            for (i = 0; i < pvq->nb_bands; i++) {
+              if (i == 0 || (!pvq->skip_rest &&
+                             !(pvq->skip_dir & (1 << ((i - 1) % 3))))) {
+                pvq_encode_partition(
+                    &w->ec, pvq->qg[i], pvq->theta[i], pvq->max_theta[i],
+                    pvq->y + pvq->off[i], pvq->size[i], pvq->k[i], model, adapt,
+                    exg + i, ext + i, robust || is_keyframe,
+                    (plane != 0) * OD_TXSIZES * PVQ_MAX_PARTITIONS +
+                        pvq->bs * PVQ_MAX_PARTITIONS + i,
+                    is_keyframe, i == 0 && (i < pvq->nb_bands - 1),
+                    pvq->skip_rest, encode_flip, flip);
+              }
+              if (i == 0 && !pvq->skip_rest && pvq->bs > 0) {
+                od_encode_cdf_adapt(
+                    &w->ec, pvq->skip_dir,
+                    &adapt->pvq
+                         .pvq_skip_dir_cdf[(plane != 0) + 2 * (pvq->bs - 1)][0],
+                    7, adapt->pvq.pvq_skip_dir_increment);
+              }
+            }
+          }
+          // Encode residue of DC coeff, if exist.
+          if (!has_dc_skip || (pvq->ac_dc_coded & 1)) {  // DC coded?
+            generic_encode(&w->ec, &adapt->model_dc[plane],
+                           abs(pvq->dq_dc_residue) - has_dc_skip, -1,
+                           &adapt->ex_dc[plane][pvq->bs][0], 2);
+          }
+          if ((pvq->ac_dc_coded & 1)) {  // DC coded?
+            od_ec_enc_bits(&w->ec, pvq->dq_dc_residue < 0, 1);
+          }
+          block += step_xy;
+        }
+      }  // for (idy = 0;
+    }    // for (plane =
+  }      // if (!m->mbmi.skip)
 #endif
 }
 
@@ -2175,6 +2302,9 @@
   const int mi_col_end = tile->mi_col_end;
   int mi_row, mi_col;
   av1_zero_above_context(cm, mi_col_start, mi_col_end);
+#if CONFIG_PVQ
+  assert(cpi->td.mb.pvq_q->curr_pos == 0);
+#endif
 #if CONFIG_DELTA_Q
   if (cpi->common.delta_q_present_flag) {
     xd->prev_qindex = cpi->common.base_qindex;
@@ -2189,8 +2319,16 @@
                              cm->sb_size);
     }
   }
+#if CONFIG_PVQ
+  // Check that the number of PVQ blocks encoded and written to the bitstream
+  // are the same
+  assert(cpi->td.mb.pvq_q->curr_pos == cpi->td.mb.pvq_q->last_pos);
+  // Reset curr_pos in case we repack the bitstream
+  cpi->td.mb.pvq_q->curr_pos = 0;
+#endif
 }
 
+#if !CONFIG_PVQ
 static void build_tree_distribution(AV1_COMP *cpi, TX_SIZE tx_size,
                                     av1_coeff_stats *coef_branch_ct,
                                     av1_coeff_probs_model *coef_probs) {
@@ -2671,6 +2809,7 @@
   if (update) av1_coef_pareto_cdfs(cpi->common.fc);
 #endif  // CONFIG_RANS
 }
+#endif
 
 #if CONFIG_LOOP_RESTORATION
 static void encode_restoration_mode(AV1_COMMON *cm,
@@ -3298,15 +3437,19 @@
     av1_tile_set_row(&tile_info, cm, tile_row);
 
     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+      const int tile_idx = tile_row * tile_cols + tile_col;
       TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
       const int is_last_col = (tile_col == tile_cols - 1);
       unsigned int tile_size;
+#if CONFIG_PVQ
+      TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
+#endif
       const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
       const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
 #if !CONFIG_TILE_GROUPS
       const int is_last_tile = is_last_col && is_last_row;
+      (void)tile_idx;
 #else
-      const int tile_idx = tile_row * tile_cols + tile_col;
       // All tiles in a tile group have a length
       const int is_last_tile = 0;
       if (tile_count >= tg_size) {
@@ -3343,10 +3486,18 @@
       tile_size = ans_write_end(&token_ans);
 #else
       aom_start_encode(&mode_bc, dst + total_size);
+#if CONFIG_PVQ
+      // NOTE: This will not work with CONFIG_ANS turned on.
+      od_adapt_ctx_reset(&cpi->td.mb.daala_enc.state.adapt, 0);
+      cpi->td.mb.pvq_q = &this_tile->pvq_q;
+#endif
       write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
       assert(tok == tok_end);
       aom_stop_encode(&mode_bc);
       tile_size = mode_bc.pos;
+#endif
+#if CONFIG_PVQ
+      cpi->td.mb.pvq_q = NULL;
 #endif  // !CONFIG_ANS
 
       assert(tile_size > 0);
@@ -3656,28 +3807,28 @@
     case GLOBAL_ZERO: break;
     case GLOBAL_AFFINE:
       aom_write_primitive_symmetric(
-          w, (params->motion_params.wmmat[2].as_mv.row >> GM_ALPHA_PREC_DIFF),
+          w, (params->motion_params.wmmat[4] >> GM_ALPHA_PREC_DIFF),
           GM_ABS_ALPHA_BITS);
       aom_write_primitive_symmetric(
-          w, (params->motion_params.wmmat[2].as_mv.col >> GM_ALPHA_PREC_DIFF) -
+          w, (params->motion_params.wmmat[5] >> GM_ALPHA_PREC_DIFF) -
                  (1 << GM_ALPHA_PREC_BITS),
           GM_ABS_ALPHA_BITS);
     // fallthrough intended
     case GLOBAL_ROTZOOM:
       aom_write_primitive_symmetric(
-          w, (params->motion_params.wmmat[1].as_mv.row >> GM_ALPHA_PREC_DIFF),
+          w, (params->motion_params.wmmat[2] >> GM_ALPHA_PREC_DIFF),
           GM_ABS_ALPHA_BITS);
       aom_write_primitive_symmetric(
-          w, (params->motion_params.wmmat[1].as_mv.col >> GM_ALPHA_PREC_DIFF) -
+          w, (params->motion_params.wmmat[3] >> GM_ALPHA_PREC_DIFF) -
                  (1 << GM_ALPHA_PREC_BITS),
           GM_ABS_ALPHA_BITS);
     // fallthrough intended
     case GLOBAL_TRANSLATION:
       aom_write_primitive_symmetric(
-          w, (params->motion_params.wmmat[0].as_mv.row >> GM_TRANS_PREC_DIFF),
+          w, (params->motion_params.wmmat[0] >> GM_TRANS_PREC_DIFF),
           GM_ABS_TRANS_BITS);
       aom_write_primitive_symmetric(
-          w, (params->motion_params.wmmat[0].as_mv.col >> GM_TRANS_PREC_DIFF),
+          w, (params->motion_params.wmmat[1] >> GM_TRANS_PREC_DIFF),
           GM_ABS_TRANS_BITS);
       break;
     default: assert(0);
@@ -3736,8 +3887,9 @@
   encode_restoration(cm, header_bc);
 #endif  // CONFIG_LOOP_RESTORATION
   update_txfm_probs(cm, header_bc, counts);
+#if !CONFIG_PVQ
   update_coef_probs(cpi, header_bc);
-
+#endif
 #if CONFIG_VAR_TX
   update_txfm_partition_probs(cm, header_bc, counts, probwt);
 #if CONFIG_EXT_TX && CONFIG_RECT_TX

diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 5c8a2f6..aa04389 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h

@@ -14,6 +14,9 @@
 
 #include "av1/common/entropymv.h"
 #include "av1/common/entropy.h"
+#if CONFIG_PVQ
+#include "av1/encoder/encint.h"
+#endif
 #if CONFIG_REF_MV
 #include "av1/common/mvref_common.h"
 #endif
@@ -22,6 +25,12 @@
 extern "C" {
 #endif
 
+#if CONFIG_PVQ
+// Maximum possible # of tx blocks in luma plane, which is currently 256,
+// since there can be 16x16 of 4x4 tx.
+#define MAX_PVQ_BLOCKS_IN_SB (MAX_SB_SQUARE >> 2 * OD_LOG_BSIZE0)
+#endif
+
 typedef struct {
   unsigned int sse;
   int sum;
@@ -30,6 +39,9 @@
 
 typedef struct macroblock_plane {
   DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
+#if CONFIG_PVQ
+  DECLARE_ALIGNED(16, int16_t, src_int16[MAX_SB_SQUARE]);
+#endif
   tran_low_t *qcoeff;
   tran_low_t *coeff;
   uint16_t *eobs;
@@ -176,6 +188,25 @@
   int use_default_intra_tx_type;
   // use default transform and skip transform type search for inter modes
   int use_default_inter_tx_type;
+#if CONFIG_PVQ
+  int rate;
+  // 1 if neither AC nor DC is coded. Only used during RDO.
+  int pvq_skip[MAX_MB_PLANE];
+  PVQ_QUEUE *pvq_q;
+
+  // Storage for PVQ tx block encodings in a superblock.
+  // There can be max 16x16 of 4x4 blocks (and YUV) encode by PVQ
+  // 256 is the max # of 4x4 blocks in a SB (64x64), which comes from:
+  // 1) Since PVQ is applied to each trasnform-ed block
+  // 2) 4x4 is the smallest tx size in AV1
+  // 3) AV1 allows using smaller tx size than block (i.e. partition) size
+  // TODO(yushin) : The memory usage could be improved a lot, since this has
+  // storage for 10 bands and 128 coefficients for every 4x4 block,
+  PVQ_INFO pvq[MAX_PVQ_BLOCKS_IN_SB][MAX_MB_PLANE];
+  daala_enc_ctx daala_enc;
+  int pvq_speed;
+  int pvq_coded;  // Indicates whether pvq_info needs be stored to tokenize
+#endif
 };
 
 #ifdef __cplusplus

diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c
index 6d1cb39..e1db4be 100644
--- a/av1/encoder/context_tree.c
+++ b/av1/encoder/context_tree.c

@@ -26,7 +26,7 @@
                                PICK_MODE_CONTEXT *ctx) {
   const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk);
   const int num_pix = num_blk << 4;
-  int i, k;
+  int i;
   ctx->num_4x4_blk = num_blk;
 #if CONFIG_EXT_PARTITION_TYPES
   ctx->partition = partition;
@@ -36,16 +36,18 @@
 #if CONFIG_VAR_TX
     CHECK_MEM_ERROR(cm, ctx->blk_skip[i], aom_calloc(num_blk, sizeof(uint8_t)));
 #endif
-    for (k = 0; k < 3; ++k) {
-      CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
-                      aom_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
-      CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
-                      aom_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k])));
-      CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
-                      aom_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
-      CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
-                      aom_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
-    }
+    CHECK_MEM_ERROR(cm, ctx->coeff[i],
+                    aom_memalign(32, num_pix * sizeof(*ctx->coeff[i])));
+    CHECK_MEM_ERROR(cm, ctx->qcoeff[i],
+                    aom_memalign(32, num_pix * sizeof(*ctx->qcoeff[i])));
+    CHECK_MEM_ERROR(cm, ctx->dqcoeff[i],
+                    aom_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i])));
+    CHECK_MEM_ERROR(cm, ctx->eobs[i],
+                    aom_memalign(32, num_blk * sizeof(*ctx->eobs[i])));
+#if CONFIG_PVQ
+    CHECK_MEM_ERROR(cm, ctx->pvq_ref_coeff[i],
+                    aom_memalign(32, num_pix * sizeof(*ctx->pvq_ref_coeff[i])));
+#endif
   }
 
 #if CONFIG_PALETTE
@@ -60,22 +62,24 @@
 }
 
 static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
-  int i, k;
+  int i;
   for (i = 0; i < MAX_MB_PLANE; ++i) {
 #if CONFIG_VAR_TX
     aom_free(ctx->blk_skip[i]);
     ctx->blk_skip[i] = 0;
 #endif
-    for (k = 0; k < 3; ++k) {
-      aom_free(ctx->coeff[i][k]);
-      ctx->coeff[i][k] = 0;
-      aom_free(ctx->qcoeff[i][k]);
-      ctx->qcoeff[i][k] = 0;
-      aom_free(ctx->dqcoeff[i][k]);
-      ctx->dqcoeff[i][k] = 0;
-      aom_free(ctx->eobs[i][k]);
-      ctx->eobs[i][k] = 0;
-    }
+    aom_free(ctx->coeff[i]);
+    ctx->coeff[i] = 0;
+    aom_free(ctx->qcoeff[i]);
+    ctx->qcoeff[i] = 0;
+    aom_free(ctx->dqcoeff[i]);
+    ctx->dqcoeff[i] = 0;
+#if CONFIG_PVQ
+    aom_free(ctx->pvq_ref_coeff[i]);
+    ctx->pvq_ref_coeff[i] = 0;
+#endif
+    aom_free(ctx->eobs[i]);
+    ctx->eobs[i] = 0;
   }
 
 #if CONFIG_PALETTE

diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h
index 9aff773..7496d11 100644
--- a/av1/encoder/context_tree.h
+++ b/av1/encoder/context_tree.h

@@ -35,10 +35,13 @@
 #endif
 
   // dual buffer pointers, 0: in use, 1: best in store
-  tran_low_t *coeff[MAX_MB_PLANE][3];
-  tran_low_t *qcoeff[MAX_MB_PLANE][3];
-  tran_low_t *dqcoeff[MAX_MB_PLANE][3];
-  uint16_t *eobs[MAX_MB_PLANE][3];
+  tran_low_t *coeff[MAX_MB_PLANE];
+  tran_low_t *qcoeff[MAX_MB_PLANE];
+  tran_low_t *dqcoeff[MAX_MB_PLANE];
+#if CONFIG_PVQ
+  tran_low_t *pvq_ref_coeff[MAX_MB_PLANE];
+#endif
+  uint16_t *eobs[MAX_MB_PLANE];
 
   int num_4x4_blk;
   int skip;

diff --git a/av1/encoder/daala_compat_enc.c b/av1/encoder/daala_compat_enc.c
new file mode 100644
index 0000000..c23b26d
--- /dev/null
+++ b/av1/encoder/daala_compat_enc.c

@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "encint.h"
+
+void od_encode_checkpoint(const daala_enc_ctx *enc, od_rollback_buffer *rbuf) {
+  od_ec_enc_checkpoint(&rbuf->ec, &enc->ec);
+  OD_COPY(&rbuf->adapt, &enc->state.adapt, 1);
+}
+
+void od_encode_rollback(daala_enc_ctx *enc, const od_rollback_buffer *rbuf) {
+  od_ec_enc_rollback(&enc->ec, &rbuf->ec);
+  OD_COPY(&enc->state.adapt, &rbuf->adapt, 1);
+}

diff --git a/av1/encoder/encint.h b/av1/encoder/encint.h
new file mode 100644
index 0000000..1e3516c
--- /dev/null
+++ b/av1/encoder/encint.h

@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+/* clang-format off */
+
+#if !defined(_encint_H)
+# define _encint_H (1)
+
+typedef struct daala_enc_ctx od_enc_ctx;
+typedef struct od_params_ctx od_params_ctx;
+typedef struct od_rollback_buffer od_rollback_buffer;
+
+# include "aom_dsp/entenc.h"
+# include "av1/common/odintrin.h"
+# include "av1/common/pvq_state.h"
+
+struct daala_enc_ctx{
+  /* Stores context-adaptive CDFs for PVQ. */
+  od_state state;
+  /* Daala entropy encoder. */
+  od_ec_enc ec;
+  int use_activity_masking;
+  /* Mode of quantization matrice : FLAT (0) or HVS (1) */
+  int qm;
+  /*Normalized PVQ lambda for use where we've already performed
+     quantization.*/
+  double pvq_norm_lambda;
+  double pvq_norm_lambda_dc;
+};
+
+// from daalaenc.h
+/**The encoder context.*/
+typedef struct daala_enc_ctx daala_enc_ctx;
+
+/** Holds important encoder information so we can roll back decisions */
+struct od_rollback_buffer {
+  od_ec_enc ec;
+  od_adapt_ctx adapt;
+};
+
+void od_encode_checkpoint(const daala_enc_ctx *enc, od_rollback_buffer *rbuf);
+void od_encode_rollback(daala_enc_ctx *enc, const od_rollback_buffer *rbuf);
+
+#endif

diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 9de8695..baf61e9 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c

@@ -53,7 +53,9 @@
 #include "av1/encoder/rdopt.h"
 #include "av1/encoder/segmentation.h"
 #include "av1/encoder/tokenize.h"
-
+#if CONFIG_PVQ
+#include "av1/encoder/pvq_encoder.h"
+#endif
 #if CONFIG_AOM_HIGHBITDEPTH
 #define IF_HBD(...) __VA_ARGS__
 #else
@@ -1034,7 +1036,6 @@
   const int mis = cm->mi_stride;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
-  int max_plane;
 
 #if CONFIG_REF_MV
   int8_t rf_type;
@@ -1084,21 +1085,15 @@
     }
   }
 
-  max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
-  for (i = 0; i < max_plane; ++i) {
-    p[i].coeff = ctx->coeff[i][1];
-    p[i].qcoeff = ctx->qcoeff[i][1];
-    pd[i].dqcoeff = ctx->dqcoeff[i][1];
-    p[i].eobs = ctx->eobs[i][1];
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    p[i].coeff = ctx->coeff[i];
+    p[i].qcoeff = ctx->qcoeff[i];
+    pd[i].dqcoeff = ctx->dqcoeff[i];
+#if CONFIG_PVQ
+    pd[i].pvq_ref_coeff = ctx->pvq_ref_coeff[i];
+#endif
+    p[i].eobs = ctx->eobs[i];
   }
-
-  for (i = max_plane; i < MAX_MB_PLANE; ++i) {
-    p[i].coeff = ctx->coeff[i][2];
-    p[i].qcoeff = ctx->qcoeff[i][2];
-    pd[i].dqcoeff = ctx->dqcoeff[i][2];
-    p[i].eobs = ctx->eobs[i][2];
-  }
-
 #if CONFIG_PALETTE
   for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
 #endif  // CONFIG_PALETTE
@@ -1482,10 +1477,10 @@
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     if (pmc != NULL) {
-      p[i].coeff = pmc->coeff[i][1];
-      p[i].qcoeff = pmc->qcoeff[i][1];
-      pd[i].dqcoeff = pmc->dqcoeff[i][1];
-      p[i].eobs = pmc->eobs[i][1];
+      p[i].coeff = pmc->coeff[i];
+      p[i].qcoeff = pmc->qcoeff[i];
+      pd[i].dqcoeff = pmc->dqcoeff[i];
+      p[i].eobs = pmc->eobs[i];
     } else {
       // These should never be used
       p[i].coeff = NULL;
@@ -1635,6 +1630,11 @@
   // Use the lower precision, but faster, 32x32 fdct for mode selection.
   x->use_lp32x32fdct = 1;
 
+#if CONFIG_PVQ
+  x->pvq_speed = 1;
+  x->pvq_coded = 0;
+#endif
+
   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
   mbmi = &xd->mi[0]->mbmi;
   mbmi->sb_type = bsize;
@@ -1655,10 +1655,13 @@
 #endif
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
-    p[i].coeff = ctx->coeff[i][0];
-    p[i].qcoeff = ctx->qcoeff[i][0];
-    pd[i].dqcoeff = ctx->dqcoeff[i][0];
-    p[i].eobs = ctx->eobs[i][0];
+    p[i].coeff = ctx->coeff[i];
+    p[i].qcoeff = ctx->qcoeff[i];
+    pd[i].dqcoeff = ctx->dqcoeff[i];
+#if CONFIG_PVQ
+    pd[i].pvq_ref_coeff = ctx->pvq_ref_coeff[i];
+#endif
+    p[i].eobs = ctx->eobs[i];
   }
 
 #if CONFIG_PALETTE
@@ -2075,7 +2078,11 @@
 
 static void restore_context(MACROBLOCK *x,
                             const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row,
-                            int mi_col, BLOCK_SIZE bsize) {
+                            int mi_col,
+#if CONFIG_PVQ
+                            od_rollback_buffer *rdo_buf,
+#endif
+                            BLOCK_SIZE bsize) {
   MACROBLOCKD *xd = &x->e_mbd;
   int p;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
@@ -2105,10 +2112,17 @@
   memcpy(xd->left_txfm_context, ctx->tl,
          sizeof(*xd->left_txfm_context) * mi_height);
 #endif
+#if CONFIG_PVQ
+  od_encode_rollback(&x->daala_enc, rdo_buf);
+#endif
 }
 
 static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
-                         int mi_row, int mi_col, BLOCK_SIZE bsize) {
+                         int mi_row, int mi_col,
+#if CONFIG_PVQ
+                         od_rollback_buffer *rdo_buf,
+#endif
+                         BLOCK_SIZE bsize) {
   const MACROBLOCKD *xd = &x->e_mbd;
   int p;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
@@ -2140,6 +2154,9 @@
   ctx->p_ta = xd->above_txfm_context;
   ctx->p_tl = xd->left_txfm_context;
 #endif
+#if CONFIG_PVQ
+  od_encode_checkpoint(&x->daala_enc, rdo_buf);
+#endif
 }
 
 static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile,
@@ -2479,7 +2496,9 @@
   int none_rate_nocoef = INT_MAX;
   int chosen_rate_nocoef = INT_MAX;
 #endif
-
+#if CONFIG_PVQ
+  od_rollback_buffer pre_rdo_buf;
+#endif
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
 
   assert(num_4x4_blocks_wide_lookup[bsize] ==
@@ -2496,8 +2515,11 @@
   xd->left_txfm_context =
       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
 #endif
-
+#if !CONFIG_PVQ
   save_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+  save_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
 
   if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -2543,8 +2565,11 @@
 #endif
       }
 
+#if !CONFIG_PVQ
       restore_context(x, &x_ctx, mi_row, mi_col, bsize);
-
+#else
+      restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
       mib[0]->mbmi.sb_type = bs_type;
       pc_tree->partitioning = partition;
     }
@@ -2729,9 +2754,11 @@
 #if CONFIG_SUPERTX
     chosen_rate_nocoef = 0;
 #endif
-
+#if !CONFIG_PVQ
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
-
+#else
+    restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
     pc_tree->partitioning = PARTITION_SPLIT;
 
     // Split partition.
@@ -2742,10 +2769,17 @@
 #if CONFIG_SUPERTX
       int rt_nocoef = 0;
 #endif
+#if CONFIG_PVQ
+      od_rollback_buffer buf;
+#endif
       if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
         continue;
 
+#if !CONFIG_PVQ
       save_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+      save_context(x, &x_ctx, mi_row, mi_col, &buf, bsize);
+#endif
       pc_tree->split[i]->partitioning = PARTITION_NONE;
       rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
                        &tmp_rdc,
@@ -2757,8 +2791,11 @@
 #endif
                        split_subsize, &pc_tree->split[i]->none, INT64_MAX);
 
+#if !CONFIG_PVQ
       restore_context(x, &x_ctx, mi_row, mi_col, bsize);
-
+#else
+      restore_context(x, &x_ctx, mi_row, mi_col, &buf, bsize);
+#endif
       if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
         av1_rd_cost_reset(&chosen_rdc);
 #if CONFIG_SUPERTX
@@ -2810,7 +2847,11 @@
 #endif
   }
 
+#if !CONFIG_PVQ
   restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+  restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
 
   // We must have chosen a partitioning and encoding or we'll fail later on.
   // No other opportunities for success.
@@ -3369,6 +3410,11 @@
       !force_vert_split && yss <= xss && bsize_at_least_8x8;
   int partition_vert_allowed =
       !force_horz_split && xss <= yss && bsize_at_least_8x8;
+
+#if CONFIG_PVQ
+  od_rollback_buffer pre_rdo_buf;
+#endif
+
   (void)*tp_orig;
 
   if (force_horz_split || force_vert_split) {
@@ -3448,8 +3494,11 @@
   xd->left_txfm_context =
       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
 #endif
-
+#if !CONFIG_PVQ
   save_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+  save_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
 
 #if CONFIG_FP_MB_STATS
   if (cpi->use_fp_mb_stats) {
@@ -3606,8 +3655,11 @@
 #endif
       }
     }
-
+#if !CONFIG_PVQ
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+    restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
   }
 
   // store estimated motion vector
@@ -3796,8 +3848,11 @@
       // gives better rd cost
       do_rectangular_split &= !partition_none_allowed;
     }
-
+#if !CONFIG_PVQ
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+    restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
   }  // if (do_split)
 
   // PARTITION_HORZ
@@ -3936,8 +3991,11 @@
         pc_tree->partitioning = PARTITION_HORZ;
       }
     }
-
+#if !CONFIG_PVQ
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+    restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
   }
 
   // PARTITION_VERT
@@ -4076,7 +4134,11 @@
         pc_tree->partitioning = PARTITION_VERT;
       }
     }
+#if !CONFIG_PVQ
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+    restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
   }
 
 #if CONFIG_EXT_PARTITION_TYPES
@@ -4160,7 +4222,9 @@
   }
 
   if (bsize == cm->sb_size) {
+#if !CONFIG_PVQ
     assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip));
+#endif
     assert(best_rdc.rate < INT_MAX);
     assert(best_rdc.dist < INT64_MAX);
   } else {
@@ -4426,6 +4490,14 @@
             tile_data->mode_map[i][j] = j;
           }
         }
+#if CONFIG_PVQ
+        // This will be dynamically increased as more pvq block is encoded.
+        tile_data->pvq_q.buf_len = 1000;
+        CHECK_MEM_ERROR(
+            cm, tile_data->pvq_q.buf,
+            aom_malloc(tile_data->pvq_q.buf_len * sizeof(PVQ_INFO)));
+        tile_data->pvq_q.curr_pos = 0;
+#endif
       }
   }
 
@@ -4438,6 +4510,9 @@
       cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
       pre_tok = cpi->tile_tok[tile_row][tile_col];
       tile_tok = allocated_tokens(*tile_info);
+#if CONFIG_PVQ
+      cpi->tile_data[tile_row * tile_cols + tile_col].pvq_q.curr_pos = 0;
+#endif
     }
   }
 }
@@ -4450,6 +4525,9 @@
   const TileInfo *const tile_info = &this_tile->tile_info;
   TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
   int mi_row;
+#if CONFIG_PVQ
+  od_adapt_ctx *adapt;
+#endif
 
   av1_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end);
 
@@ -4459,6 +4537,35 @@
   td->mb.m_search_count_ptr = &this_tile->m_search_count;
   td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
 
+#if CONFIG_PVQ
+  td->mb.pvq_q = &this_tile->pvq_q;
+
+  // TODO(yushin)
+  // If activity masking is enabled, change below to OD_HVS_QM
+  td->mb.daala_enc.qm = OD_FLAT_QM;  // Hard coded. Enc/dec required to sync.
+  {
+    // FIXME: Multiple segments support
+    int segment_id = 0;
+    int rdmult = set_segment_rdmult(cpi, &td->mb, segment_id);
+    int qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+    int64_t q_ac = av1_ac_quant(qindex, 0, cpi->common.bit_depth);
+    int64_t q_dc = av1_dc_quant(qindex, 0, cpi->common.bit_depth);
+    /* td->mb.daala_enc.pvq_norm_lambda = OD_PVQ_LAMBDA; */
+    td->mb.daala_enc.pvq_norm_lambda =
+        (double)rdmult * (64 / 16) / (q_ac * q_ac * (1 << RDDIV_BITS));
+    td->mb.daala_enc.pvq_norm_lambda_dc =
+        (double)rdmult * (64 / 16) / (q_dc * q_dc * (1 << RDDIV_BITS));
+    // printf("%f\n", td->mb.daala_enc.pvq_norm_lambda);
+  }
+  od_init_qm(td->mb.daala_enc.state.qm, td->mb.daala_enc.state.qm_inv,
+             td->mb.daala_enc.qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT);
+  od_ec_enc_init(&td->mb.daala_enc.ec, 65025);
+
+  adapt = &td->mb.daala_enc.state.adapt;
+  od_ec_enc_reset(&td->mb.daala_enc.ec);
+  od_adapt_ctx_reset(adapt, 0);
+#endif
+
   for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
        mi_row += cm->mib_size) {
     encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
@@ -4467,6 +4574,16 @@
   cpi->tok_count[tile_row][tile_col] =
       (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
   assert(cpi->tok_count[tile_row][tile_col] <= allocated_tokens(*tile_info));
+#if CONFIG_PVQ
+  od_ec_enc_clear(&td->mb.daala_enc.ec);
+
+  td->mb.pvq_q->last_pos = td->mb.pvq_q->curr_pos;
+  // rewind current position so that bitstream can be written
+  // from the 1st pvq block
+  td->mb.pvq_q->curr_pos = 0;
+
+  td->mb.pvq_q = NULL;
+#endif
 }
 
 static void encode_tiles(AV1_COMP *cpi) {
@@ -4502,8 +4619,8 @@
 // Adds some offset to a global motion parameter and handles
 // all of the necessary precision shifts, clamping, and
 // zero-centering.
-static int16_t add_param_offset(int param_index, int16_t param_value,
-                                int16_t offset) {
+static int32_t add_param_offset(int param_index, int32_t param_value,
+                                int32_t offset) {
   const int scale_vals[2] = { GM_ALPHA_PREC_DIFF, GM_TRANS_PREC_DIFF };
   const int clamp_vals[2] = { GM_ALPHA_MAX, GM_TRANS_MAX };
   const int is_trans_param = param_index < 2;
@@ -4517,7 +4634,7 @@
   param_value += offset;
   // Clamp the parameter so it does not overflow the number of bits allotted
   // to it in the bitstream
-  param_value = (int16_t)clamp(param_value, -clamp_vals[is_trans_param],
+  param_value = (int32_t)clamp(param_value, -clamp_vals[is_trans_param],
                                clamp_vals[is_trans_param]);
   // Rescale the parameter to WARPEDMODEL_PRECISION_BITS so it is compatible
   // with the warped motion library
@@ -4537,12 +4654,12 @@
                                      int n_refinements) {
   int i = 0, p;
   int n_params = n_trans_model_params[wm->wmtype];
-  int16_t *param_mat = (int16_t *)wm->wmmat;
+  int32_t *param_mat = wm->wmmat;
   double step_error;
-  int16_t step;
-  int16_t *param;
-  int16_t curr_param;
-  int16_t best_param;
+  int32_t step;
+  int32_t *param;
+  int32_t curr_param;
+  int32_t best_param;
 
   double best_error =
       av1_warp_erroradv(wm,
@@ -4601,22 +4718,22 @@
 }
 
 static void convert_to_params(const double *params, TransformationType type,
-                              int16_t *model) {
+                              int32_t *model) {
   int i, diag_value;
   int alpha_present = 0;
   int n_params = n_trans_model_params[type];
-  model[0] = (int16_t)floor(params[0] * (1 << GM_TRANS_PREC_BITS) + 0.5);
-  model[1] = (int16_t)floor(params[1] * (1 << GM_TRANS_PREC_BITS) + 0.5);
-  model[0] = (int16_t)clamp(model[0], GM_TRANS_MIN, GM_TRANS_MAX) *
+  model[0] = (int32_t)floor(params[0] * (1 << GM_TRANS_PREC_BITS) + 0.5);
+  model[1] = (int32_t)floor(params[1] * (1 << GM_TRANS_PREC_BITS) + 0.5);
+  model[0] = (int32_t)clamp(model[0], GM_TRANS_MIN, GM_TRANS_MAX) *
              GM_TRANS_DECODE_FACTOR;
-  model[1] = (int16_t)clamp(model[1], GM_TRANS_MIN, GM_TRANS_MAX) *
+  model[1] = (int32_t)clamp(model[1], GM_TRANS_MIN, GM_TRANS_MAX) *
              GM_TRANS_DECODE_FACTOR;
 
   for (i = 2; i < n_params; ++i) {
     diag_value = ((i & 1) ? (1 << GM_ALPHA_PREC_BITS) : 0);
-    model[i] = (int16_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
+    model[i] = (int32_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
     model[i] =
-        (int16_t)(clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX) +
+        (int32_t)(clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX) +
                   diag_value) *
         GM_ALPHA_DECODE_FACTOR;
     alpha_present |= (model[i] != 0);
@@ -4635,7 +4752,7 @@
                                     Global_Motion_Params *model) {
   // TODO(sarahparker) implement for homography
   if (type > HOMOGRAPHY)
-    convert_to_params(params, type, (int16_t *)model->motion_params.wmmat);
+    convert_to_params(params, type, model->motion_params.wmmat);
   model->gmtype = get_gmtype(model);
   model->motion_params.wmtype = gm_to_trans_type(model->gmtype);
 }
@@ -5140,6 +5257,11 @@
 
   x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
 
+#if CONFIG_PVQ
+  x->pvq_speed = 0;
+  x->pvq_coded = !dry_run ? 1 : 0;
+#endif
+
   if (!is_inter_block(mbmi)) {
     int plane;
     mbmi->skip = 1;

diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index f7f9021..dfde235 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c

@@ -28,6 +28,12 @@
 #include "av1/encoder/rd.h"
 #include "av1/encoder/tokenize.h"
 
+#if CONFIG_PVQ
+#include "av1/encoder/encint.h"
+#include "av1/common/partition.h"
+#include "av1/encoder/pvq_encoder.h"
+#endif
+
 void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
   struct macroblock_plane *const p = &x->plane[plane];
   const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
@@ -411,7 +417,8 @@
       { av1_quantize_dc_facade, av1_highbd_quantize_dc_facade },
       { NULL, NULL } };
 
-#else
+#elif !CONFIG_PVQ
+
 typedef enum QUANT_FUNC {
   QUANT_FUNC_LOWBD = 0,
   QUANT_FUNC_LAST = 1
@@ -434,8 +441,13 @@
                      int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
                      TX_SIZE tx_size, AV1_XFORM_QUANT xform_quant_idx) {
   MACROBLOCKD *const xd = &x->e_mbd;
+#if !CONFIG_PVQ
   const struct macroblock_plane *const p = &x->plane[plane];
   const struct macroblockd_plane *const pd = &xd->plane[plane];
+#else
+  struct macroblock_plane *const p = &x->plane[plane];
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+#endif
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
   const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
@@ -450,11 +462,51 @@
   const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][!is_inter][tx_size];
   const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!is_inter][tx_size];
 #endif
-  const int16_t *src_diff;
-  const int tx2d_size = tx_size_2d[tx_size];
 
   FWD_TXFM_PARAM fwd_txfm_param;
+
+#if !CONFIG_PVQ
+  const int tx2d_size = tx_size_2d[tx_size];
   QUANT_PARAM qparam;
+  const int16_t *src_diff;
+
+  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+  qparam.log_scale = get_tx_scale(xd, tx_type, tx_size);
+#else
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
+  uint8_t *src, *dst;
+  int16_t *src_int16, *pred;
+  const int src_stride = p->src.stride;
+  const int dst_stride = pd->dst.stride;
+  int tx_blk_size;
+  int i, j;
+  int skip = 1;
+  PVQ_INFO *pvq_info = NULL;
+
+  (void)scan_order;
+  (void)qcoeff;
+
+  if (x->pvq_coded) {
+    assert(block < MAX_PVQ_BLOCKS_IN_SB);
+    pvq_info = &x->pvq[block][plane];
+  }
+  dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
+  src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
+  src_int16 = &p->src_int16[4 * (blk_row * diff_stride + blk_col)];
+  pred = &pd->pred[4 * (blk_row * diff_stride + blk_col)];
+
+  // transform block size in pixels
+  tx_blk_size = tx_size_wide[tx_size];
+
+  // copy uint8 orig and predicted block to int16 buffer
+  // in order to use existing VP10 transform functions
+  for (j = 0; j < tx_blk_size; j++)
+    for (i = 0; i < tx_blk_size; i++) {
+      src_int16[diff_stride * j + i] = src[src_stride * j + i];
+      pred[diff_stride * j + i] = dst[dst_stride * j + i];
+    }
+#endif
 
   fwd_txfm_param.tx_type = tx_type;
   fwd_txfm_param.tx_size = tx_size;
@@ -462,9 +514,6 @@
   fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
   fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
 
-  src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
-
-  qparam.log_scale = get_tx_scale(xd, tx_type, tx_size);
 #if CONFIG_AOM_HIGHBITDEPTH
   fwd_txfm_param.bd = xd->bd;
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -486,6 +535,7 @@
   }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
+#if !CONFIG_PVQ
   fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
   if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
     if (LIKELY(!x->skip_block)) {
@@ -500,6 +550,31 @@
       av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
     }
   }
+#else   // #if !CONFIG_PVQ
+  fwd_txfm_param.rd_transform = 0;
+
+  fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
+  fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+
+  // PVQ for inter mode block
+  if (!x->skip_block)
+    skip = av1_pvq_encode_helper(&x->daala_enc,
+                                 coeff,        // target original vector
+                                 ref_coeff,    // reference vector
+                                 dqcoeff,      // de-quantized vector
+                                 eob,          // End of Block marker
+                                 pd->dequant,  // aom's quantizers
+                                 plane,        // image plane
+                                 tx_size,      // block size in log_2 - 2
+                                 tx_type,
+                                 &x->rate,  // rate measured
+                                 x->pvq_speed,
+                                 pvq_info);  // PVQ info for a block
+
+  x->pvq_skip[plane] = skip;
+
+  if (!skip) mbmi->skip = 0;
+#endif  // #if !CONFIG_PVQ
 }
 
 #if CONFIG_NEW_QUANT
@@ -783,6 +858,10 @@
   uint8_t *dst;
   ENTROPY_CONTEXT *a, *l;
   INV_TXFM_PARAM inv_txfm_param;
+#if CONFIG_PVQ
+  int tx_blk_size;
+  int i, j;
+#endif
 #if CONFIG_VAR_TX
   int i;
   const int bwl = b_width_log2_lookup[plane_bsize];
@@ -817,7 +896,7 @@
     p->eobs[block] = 0;
   }
 #endif
-
+#if !CONFIG_PVQ
   if (p->eobs[block]) {
     *a = *l = av1_optimize_b(cm, x, plane, block, tx_size, ctx) > 0;
   } else {
@@ -833,6 +912,24 @@
   if (p->eobs[block]) *(args->skip) = 0;
 
   if (p->eobs[block] == 0) return;
+#else
+  (void)ctx;
+  *a = *l = !x->pvq_skip[plane];
+
+  if (!x->pvq_skip[plane]) *(args->skip) = 0;
+
+  if (x->pvq_skip[plane]) return;
+
+  // transform block size in pixels
+  tx_blk_size = tx_size_wide[tx_size];
+
+  // Since av1 does not have separate function which does inverse transform
+  // but av1_inv_txfm_add_*x*() also does addition of predicted image to
+  // inverse transformed image,
+  // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
+  for (j = 0; j < tx_blk_size; j++)
+    for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
+#endif
 
   // inverse transform parameters
   inv_txfm_param.tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
@@ -928,8 +1025,26 @@
   av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                   AV1_XFORM_QUANT_B);
 #endif  // CONFIG_NEW_QUANT
-
+#if !CONFIG_PVQ
   if (p->eobs[block] > 0) {
+#else
+  if (!x->pvq_skip[plane]) {
+#endif
+#if CONFIG_PVQ
+    {
+      int tx_blk_size;
+      int i, j;
+      // transform block size in pixels
+      tx_blk_size = tx_size_wide[tx_size];
+
+      // Since av1 does not have separate function which does inverse transform
+      // but av1_inv_txfm_add_*x*() also does addition of predicted image to
+      // inverse transformed image,
+      // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
+      for (j = 0; j < tx_blk_size; j++)
+        for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
+    }
+#endif
 #if CONFIG_AOM_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
@@ -988,7 +1103,9 @@
     const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
     av1_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
 #endif
+#if !CONFIG_PVQ
     av1_subtract_plane(x, bsize, plane);
+#endif
     arg.ta = ctx.ta[plane];
     arg.tl = ctx.tl[plane];
 
@@ -1048,7 +1165,9 @@
                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                             void *arg) {
   struct encode_b_args *const args = arg;
+#if !CONFIG_PVQ
   AV1_COMMON *cm = args->cm;
+#endif
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -1066,10 +1185,28 @@
   const int dst_stride = pd->dst.stride;
   const int tx1d_width = tx_size_wide[tx_size];
   const int tx1d_height = tx_size_high[tx_size];
+#if !CONFIG_PVQ
   ENTROPY_CONTEXT *a = NULL, *l = NULL;
   int ctx;
-
   INV_TXFM_PARAM inv_txfm_param;
+#else
+  FWD_TXFM_PARAM fwd_txfm_param;
+  tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
+  tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
+  int16_t *src_int16;
+  int tx_blk_size;
+  int i, j;
+  int16_t *pred = &pd->pred[4 * (blk_row * diff_stride + blk_col)];
+  int skip = 1;
+  PVQ_INFO *pvq_info = NULL;
+  int seg_id = xd->mi[0]->mbmi.segment_id;
+
+  if (x->pvq_coded) {
+    assert(block < MAX_PVQ_BLOCKS_IN_SB);
+    pvq_info = &x->pvq[block][plane];
+  }
+  src_int16 = &p->src_int16[4 * (blk_row * diff_stride + blk_col)];
+#endif
 
   assert(tx1d_width == tx1d_height);
 
@@ -1092,6 +1229,7 @@
                      src_stride, dst, dst_stride);
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
+#if !CONFIG_PVQ
   a = &args->ta[blk_col];
   l = &args->tl[blk_row];
   ctx = combine_entropy_contexts(*a, *l);
@@ -1134,6 +1272,81 @@
 
     *(args->skip) = 0;
   }
+#else   // #if !CONFIG_PVQ
+  // transform block size in pixels
+  tx_blk_size = tx_size_wide[tx_size];
+
+  // copy uint8 orig and predicted block to int16 buffer
+  // in order to use existing VP10 transform functions
+  for (j = 0; j < tx_blk_size; j++)
+    for (i = 0; i < tx_blk_size; i++) {
+      src_int16[diff_stride * j + i] = src[src_stride * j + i];
+      pred[diff_stride * j + i] = dst[dst_stride * j + i];
+    }
+
+  fwd_txfm_param.rd_transform = 0;
+  fwd_txfm_param.tx_type = tx_type;
+  fwd_txfm_param.tx_size = tx_size;
+  fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
+  fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id];
+  fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
+  fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+
+  // PVQ for intra mode block
+  if (!x->skip_block)
+    skip = av1_pvq_encode_helper(&x->daala_enc,
+                                 coeff,        // target original vector
+                                 ref_coeff,    // reference vector
+                                 dqcoeff,      // de-quantized vector
+                                 eob,          // End of Block marker
+                                 pd->dequant,  // aom's quantizers
+                                 plane,        // image plane
+                                 tx_size,      // block size in log_2 - 2
+                                 tx_type,
+                                 &x->rate,  // rate measured
+                                 x->pvq_speed,
+                                 pvq_info);  // PVQ info for a block
+
+  x->pvq_skip[plane] = skip;
+
+  if (!skip) mbmi->skip = 0;
+
+  // Since av1 does not have separate function which does inverse transform
+  // but av1_inv_txfm_add_*x*() also does addition of predicted image to
+  // inverse transformed image,
+  // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
+
+  if (!skip) {
+    for (j = 0; j < tx_blk_size; j++)
+      for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0;
+
+    switch (tx_size) {
+      case TX_32X32:
+        av1_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, tx_type);
+        break;
+      case TX_16X16:
+        av1_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, tx_type);
+        break;
+      case TX_8X8:
+        av1_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, tx_type);
+        break;
+      case TX_4X4:
+        // this is like av1_short_idct4x4 but has a special case around eob<=1
+        // which is significant (not just an optimization) for the lossless
+        // case.
+        av1_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type,
+                             xd->lossless[seg_id]);
+        break;
+      default: assert(0); break;
+    }
+  }
+#endif  // #if !CONFIG_PVQ
+
+#if !CONFIG_PVQ
+  if (*eob) *(args->skip) = 0;
+#else
+// Note : *(args->skip) == mbmi->skip
+#endif
 }
 
 void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
@@ -1155,3 +1368,140 @@
   av1_foreach_transformed_block_in_plane(xd, bsize, plane,
                                          av1_encode_block_intra, &arg);
 }
+
+#if CONFIG_PVQ
+int av1_pvq_encode_helper(daala_enc_ctx *daala_enc, tran_low_t *const coeff,
+                          tran_low_t *ref_coeff, tran_low_t *const dqcoeff,
+                          uint16_t *eob, const int16_t *quant, int plane,
+                          int tx_size, TX_TYPE tx_type, int *rate, int speed,
+                          PVQ_INFO *pvq_info) {
+  const int tx_blk_size = tx_size_wide[tx_size];
+  int skip;
+  // TODO(yushin): Enable this later, when pvq_qm_q4 is available in AOM.
+  // int pvq_dc_quant = OD_MAXI(1,
+  //  quant * daala_enc->state.pvq_qm_q4[plane][od_qm_get_index(tx_size, 0)] >>
+  //  4);
+  int quant_shift = tx_size == TX_32X32 ? 1 : 0;
+  // DC quantizer for PVQ
+  int pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift);
+  int tell;
+  int has_dc_skip = 1;
+  int i;
+  int off = od_qm_offset(tx_size, plane ? 1 : 0);
+#if PVQ_CHROMA_RD
+  double save_pvq_lambda;
+#endif
+  DECLARE_ALIGNED(16, int16_t, coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+  DECLARE_ALIGNED(16, int16_t, ref_coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+  DECLARE_ALIGNED(16, int16_t, dqcoeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+
+  DECLARE_ALIGNED(16, int32_t, in_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+  DECLARE_ALIGNED(16, int32_t, ref_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+  DECLARE_ALIGNED(16, int32_t, out_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
+
+  *eob = 0;
+
+  tell = od_ec_enc_tell_frac(&daala_enc->ec);
+
+  // Change coefficient ordering for pvq encoding.
+  od_raster_to_coding_order(coeff_pvq, tx_blk_size, tx_type, coeff,
+                            tx_blk_size);
+  od_raster_to_coding_order(ref_coeff_pvq, tx_blk_size, tx_type, ref_coeff,
+                            tx_blk_size);
+
+  // copy int16 inputs to int32
+  for (i = 0; i < tx_blk_size * tx_blk_size; i++) {
+    ref_int32[i] = ref_coeff_pvq[i];
+    in_int32[i] = coeff_pvq[i];
+  }
+
+#if PVQ_CHROMA_RD
+  if (plane != 0) {
+    save_pvq_lambda = daala_enc->pvq_norm_lambda;
+    daala_enc->pvq_norm_lambda *= 0.8;
+  }
+#endif
+  if (abs(in_int32[0] - ref_int32[0]) < pvq_dc_quant * 141 / 256) { /* 0.55 */
+    out_int32[0] = 0;
+  } else {
+    out_int32[0] = OD_DIV_R0(in_int32[0] - ref_int32[0], pvq_dc_quant);
+  }
+
+  skip = od_pvq_encode(
+      daala_enc, ref_int32, in_int32, out_int32,
+      (int)quant[0] >> quant_shift,  // scale/quantizer
+      (int)quant[1] >> quant_shift,  // scale/quantizer
+      // TODO(yushin): Instead of 0,
+      //   use daala_enc->use_activity_masking for activity masking.
+      plane, tx_size, OD_PVQ_BETA[0][plane][tx_size], OD_ROBUST_STREAM,
+      0,        // is_keyframe,
+      0, 0, 0,  // q_scaling, bx, by,
+      daala_enc->state.qm + off, daala_enc->state.qm_inv + off,
+      speed,  // speed
+      pvq_info);
+
+  if (skip && pvq_info) assert(pvq_info->ac_dc_coded == 0);
+
+  if (!skip && pvq_info) assert(pvq_info->ac_dc_coded > 0);
+
+  // Encode residue of DC coeff, if required.
+  if (!has_dc_skip || out_int32[0]) {
+    generic_encode(&daala_enc->ec, &daala_enc->state.adapt.model_dc[plane],
+                   abs(out_int32[0]) - has_dc_skip, -1,
+                   &daala_enc->state.adapt.ex_dc[plane][tx_size][0], 2);
+  }
+  if (out_int32[0]) {
+    od_ec_enc_bits(&daala_enc->ec, out_int32[0] < 0, 1);
+    skip = 0;
+  }
+
+  // need to save quantized residue of DC coeff
+  // so that final pvq bitstream writing can know whether DC is coded.
+  if (pvq_info) pvq_info->dq_dc_residue = out_int32[0];
+
+  out_int32[0] = out_int32[0] * pvq_dc_quant;
+  out_int32[0] += ref_int32[0];
+
+  // copy int32 result back to int16
+  for (i = 0; i < tx_blk_size * tx_blk_size; i++) dqcoeff_pvq[i] = out_int32[i];
+
+  // Back to original coefficient order
+  od_coding_order_to_raster(dqcoeff, tx_blk_size, tx_type, dqcoeff_pvq,
+                            tx_blk_size);
+
+  *eob = tx_blk_size * tx_blk_size;
+
+  *rate = (od_ec_enc_tell_frac(&daala_enc->ec) - tell)
+          << (AV1_PROB_COST_SHIFT - OD_BITRES);
+  assert(*rate >= 0);
+#if PVQ_CHROMA_RD
+  if (plane != 0) daala_enc->pvq_norm_lambda = save_pvq_lambda;
+#endif
+  return skip;
+}
+
+void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta,
+                            int *max_theta, int *k, od_coeff *y, int nb_bands,
+                            const int *off, int *size, int skip_rest,
+                            int skip_dir,
+                            int bs) {  // block size in log_2 -2
+  int i;
+  const int tx_blk_size = tx_size_wide[bs];
+
+  for (i = 0; i < nb_bands; i++) {
+    pvq_info->qg[i] = qg[i];
+    pvq_info->theta[i] = theta[i];
+    pvq_info->max_theta[i] = max_theta[i];
+    pvq_info->k[i] = k[i];
+    pvq_info->off[i] = off[i];
+    pvq_info->size[i] = size[i];
+  }
+
+  memcpy(pvq_info->y, y, tx_blk_size * tx_blk_size * sizeof(od_coeff));
+
+  pvq_info->nb_bands = nb_bands;
+  pvq_info->skip_rest = skip_rest;
+  pvq_info->skip_dir = skip_dir;
+  pvq_info->bs = bs;
+}
+#endif

diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index e9b6bc8..2f2b93b 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h

@@ -13,6 +13,7 @@
 #define AV1_ENCODER_ENCODEMB_H_
 
 #include "./aom_config.h"
+#include "av1/common/onyxc_int.h"
 #include "av1/encoder/block.h"
 
 #ifdef __cplusplus
@@ -77,6 +78,19 @@
                                   BLOCK_SIZE bsize, int plane,
                                   int enable_optimize_b);
 
+#if CONFIG_PVQ
+int av1_pvq_encode_helper(daala_enc_ctx *daala_enc, tran_low_t *const coeff,
+                          tran_low_t *ref_coeff, tran_low_t *const dqcoeff,
+                          uint16_t *eob, const int16_t *quant, int plane,
+                          int tx_size, TX_TYPE tx_type, int *rate, int speed,
+                          PVQ_INFO *pvq_info);
+
+void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta,
+                            int *max_theta, int *k, od_coeff *y, int nb_bands,
+                            const int *off, int *size, int skip_rest,
+                            int skip_dir, int bs);
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif

diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 52408b9..2bd2001 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c

@@ -415,6 +415,20 @@
   aom_free(cpi->mbmi_ext_base);
   cpi->mbmi_ext_base = NULL;
 
+#if CONFIG_PVQ
+  if (cpi->oxcf.pass != 1) {
+    const int tile_cols = 1 << cm->log2_tile_cols;
+    const int tile_rows = 1 << cm->log2_tile_rows;
+    int tile_col, tile_row;
+
+    for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+      for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+        TileDataEnc *tile_data =
+            &cpi->tile_data[tile_row * tile_cols + tile_col];
+        aom_free(tile_data->pvq_q.buf);
+      }
+  }
+#endif
   aom_free(cpi->tile_data);
   cpi->tile_data = NULL;
 
@@ -834,7 +848,11 @@
 
   av1_set_mb_mi(cm, cm->width, cm->height);
   av1_init_context_buffers(cm);
-  av1_init_macroblockd(cm, xd, NULL);
+  av1_init_macroblockd(cm, xd,
+#if CONFIG_PVQ
+                       NULL,
+#endif
+                       NULL);
   memset(cpi->mbmi_ext_base, 0,
          cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
 

diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 00abc71..ae48474 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h

@@ -270,6 +270,9 @@
   int mode_map[BLOCK_SIZES][MAX_MODES];
   int m_search_count;
   int ex_search_count;
+#if CONFIG_PVQ
+  PVQ_QUEUE pvq_q;
+#endif
 } TileDataEnc;
 
 typedef struct RD_COUNTS {

diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 0f7fcca..1ea28f2 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c

@@ -493,6 +493,9 @@
   double brightness_factor;
   BufferPool *const pool = cm->buffer_pool;
   const int qindex = find_fp_qindex(cm->bit_depth);
+#if CONFIG_PVQ
+  PVQ_QUEUE pvq_q;
+#endif
 
   // First pass code requires valid last and new frame buffers.
   assert(new_yv12 != NULL);
@@ -527,11 +530,45 @@
 
   av1_frame_init_quantizer(cpi);
 
+#if CONFIG_PVQ
+  // For pass 1 of 2-pass encoding, init here for PVQ for now.
+  {
+    od_adapt_ctx *adapt;
+
+    pvq_q.buf_len = 5000;
+    CHECK_MEM_ERROR(cm, pvq_q.buf,
+                    aom_malloc(pvq_q.buf_len * sizeof(PVQ_INFO)));
+    pvq_q.curr_pos = 0;
+    x->pvq_coded = 0;
+
+    x->pvq_q = &pvq_q;
+
+    // TODO(yushin): Since this init step is also called in 2nd pass,
+    // or 1-pass encoding, consider factoring out it as a function.
+    // TODO(yushin)
+    // If activity masking is enabled, change below to OD_HVS_QM
+    x->daala_enc.qm = OD_FLAT_QM;  // Hard coded. Enc/dec required to sync.
+    x->daala_enc.pvq_norm_lambda = OD_PVQ_LAMBDA;
+    x->daala_enc.pvq_norm_lambda_dc = OD_PVQ_LAMBDA;
+
+    od_init_qm(x->daala_enc.state.qm, x->daala_enc.state.qm_inv,
+               x->daala_enc.qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT);
+    od_ec_enc_init(&x->daala_enc.ec, 65025);
+
+    adapt = &x->daala_enc.state.adapt;
+    od_ec_enc_reset(&x->daala_enc.ec);
+    od_adapt_ctx_reset(adapt, 0);
+  }
+#endif
+
   for (i = 0; i < MAX_MB_PLANE; ++i) {
-    p[i].coeff = ctx->coeff[i][1];
-    p[i].qcoeff = ctx->qcoeff[i][1];
-    pd[i].dqcoeff = ctx->dqcoeff[i][1];
-    p[i].eobs = ctx->eobs[i][1];
+    p[i].coeff = ctx->coeff[i];
+    p[i].qcoeff = ctx->qcoeff[i];
+    pd[i].dqcoeff = ctx->dqcoeff[i];
+#if CONFIG_PVQ
+    pd[i].pvq_ref_coeff = ctx->pvq_ref_coeff[i];
+#endif
+    p[i].eobs = ctx->eobs[i];
   }
 
   av1_init_mv_probs(cm);
@@ -926,6 +963,16 @@
     aom_clear_system_state();
   }
 
+#if CONFIG_PVQ
+  od_ec_enc_clear(&x->daala_enc.ec);
+
+  x->pvq_q->last_pos = x->pvq_q->curr_pos;
+  x->pvq_q->curr_pos = 0;
+  x->pvq_q = NULL;
+
+  aom_free(pvq_q.buf);
+#endif
+
   // Clamp the image start to rows/2. This number of rows is discarded top
   // and bottom as dead data so rows / 2 means the frame is blank.
   if ((image_data_start_row > cm->mb_rows / 2) ||

diff --git a/av1/encoder/generic_encoder.c b/av1/encoder/generic_encoder.c
new file mode 100644
index 0000000..466ede3
--- /dev/null
+++ b/av1/encoder/generic_encoder.c

@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "aom_dsp/entdec.h"
+#include "aom_dsp/entenc.h"
+#include "av1/common/generic_code.h"
+#include "av1/common/odintrin.h"
+#include "pvq_encoder.h"
+
+/** Encodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts
+ * the cdf accordingly.
+ *
+ * @param [in,out] enc   range encoder
+ * @param [in]     val   variable being encoded
+ * @param [in,out] cdf   CDF of the variable (Q15)
+ * @param [in]     n     number of values possible
+ * @param [in,out] count number of symbols encoded with that cdf so far
+ * @param [in]     rate  adaptation rate shift (smaller is faster)
+ */
+void od_encode_cdf_adapt_q15(od_ec_enc *ec, int val, uint16_t *cdf, int n,
+ int *count, int rate) {
+  int i;
+  if (*count == 0) {
+    /* On the first call, we normalize the cdf to (32768 - n). This should
+       eventually be moved to the state init, but for now it makes it much
+       easier to experiment and convert symbols to the Q15 adaptation.*/
+    int ft;
+    ft = cdf[n - 1];
+    for (i = 0; i < n; i++) {
+      cdf[i] = cdf[i]*32768/ft;
+    }
+  }
+  od_ec_encode_cdf_q15(ec, val, cdf, n);
+  od_cdf_adapt_q15(val, cdf, n, count, rate);
+}
+
+/** Encodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts
+ * the cdf accordingly.
+ *
+ * @param [in,out] enc   range encoder
+ * @param [in]     val   variable being encoded
+ * @param [in]     cdf   CDF of the variable (Q15)
+ * @param [in]     n     number of values possible
+ * @param [in]     increment adaptation speed (Q15)
+ */
+void od_encode_cdf_adapt(od_ec_enc *ec, int val, uint16_t *cdf, int n,
+ int increment) {
+  int i;
+  od_ec_encode_cdf_unscaled(ec, val, cdf, n);
+  if (cdf[n-1] + increment > 32767) {
+    for (i = 0; i < n; i++) {
+      /* Second term ensures that the pdf is non-null */
+      cdf[i] = (cdf[i] >> 1) + i + 1;
+    }
+  }
+  for (i = val; i < n; i++) cdf[i] += increment;
+}
+
+/** Encodes a random variable using a "generic" model, assuming that the
+ * distribution is one-sided (zero and up), has a single mode, and decays
+ * exponentially past the model.
+ *
+ * @param [in,out] enc   range encoder
+ * @param [in,out] model generic probability model
+ * @param [in]     x     variable being encoded
+ * @param [in]     max   largest value possible
+ * @param [in,out] ExQ16 expectation of x (adapted)
+ * @param [in]     integration integration period of ExQ16 (leaky average over
+ * 1<<integration samples)
+ */
+void generic_encode(od_ec_enc *enc, generic_encoder *model, int x, int max,
+ int *ex_q16, int integration) {
+  int lg_q1;
+  int shift;
+  int id;
+  uint16_t *cdf;
+  int xs;
+  int ms;
+  if (max == 0) return;
+  lg_q1 = log_ex(*ex_q16);
+  OD_LOG((OD_LOG_ENTROPY_CODER, OD_LOG_DEBUG,
+   "%d %d", *ex_q16, lg_q1));
+  /* If expectation is too large, shift x to ensure that
+     all we have past xs=15 is the exponentially decaying tail
+     of the distribution */
+  shift = OD_MAXI(0, (lg_q1 - 5) >> 1);
+  /* Choose the cdf to use: we have two per "octave" of ExQ16 */
+  id = OD_MINI(GENERIC_TABLES - 1, lg_q1);
+  cdf = model->cdf[id];
+  xs = (x + (1 << shift >> 1)) >> shift;
+  ms = (max + (1 << shift >> 1)) >> shift;
+  OD_ASSERT(max == -1 || xs <= ms);
+  if (max == -1) od_ec_encode_cdf_unscaled(enc, OD_MINI(15, xs), cdf, 16);
+  else {
+    od_ec_encode_cdf_unscaled(enc, OD_MINI(15, xs), cdf, OD_MINI(ms + 1, 16));
+  }
+  if (xs >= 15) {
+    int e;
+    unsigned decay;
+    /* Estimate decay based on the assumption that the distribution is close
+       to Laplacian for large values. We should probably have an adaptive
+       estimate instead. Note: The 2* is a kludge that's not fully understood
+       yet. */
+    OD_ASSERT(*ex_q16 < INT_MAX >> 1);
+    e = ((2**ex_q16 >> 8) + (1 << shift >> 1)) >> shift;
+    decay = OD_MAXI(2, OD_MINI(254, 256*e/(e + 256)));
+    /* Encode the tail of the distribution assuming exponential decay. */
+    od_laplace_encode_special(enc, xs - 15, decay, (max == -1) ? -1 : ms - 15);
+  }
+  if (shift != 0) {
+    int special;
+    /* Because of the rounding, there's only half the number of possibilities
+       for xs=0. */
+    special = xs == 0;
+    if (shift - special > 0) {
+      od_ec_enc_bits(enc, x - (xs << shift) + (!special << (shift - 1)),
+       shift - special);
+    }
+  }
+  generic_model_update(model, ex_q16, x, xs, id, integration);
+  OD_LOG((OD_LOG_ENTROPY_CODER, OD_LOG_DEBUG,
+   "enc: %d %d %d %d %d %x", *ex_q16, x, shift, id, xs, enc->rng));
+}
+
+/** Estimates the cost of encoding a value with generic_encode().
+ *
+ * @param [in,out] model generic probability model
+ * @param [in]     x     variable being encoded
+ * @param [in]     max   largest value possible
+ * @param [in,out] ExQ16 expectation of x (adapted)
+ * @return number of bits (approximation)
+ */
+double generic_encode_cost(generic_encoder *model, int x, int max,
+ int *ex_q16) {
+  int lg_q1;
+  int shift;
+  int id;
+  uint16_t *cdf;
+  int xs;
+  int ms;
+  int extra;
+  if (max == 0) return 0;
+  lg_q1 = log_ex(*ex_q16);
+  /* If expectation is too large, shift x to ensure that
+       all we have past xs=15 is the exponentially decaying tail
+       of the distribution */
+  shift = OD_MAXI(0, (lg_q1 - 5) >> 1);
+  /* Choose the cdf to use: we have two per "octave" of ExQ16 */
+  id = OD_MINI(GENERIC_TABLES - 1, lg_q1);
+  cdf = model->cdf[id];
+  xs = (x + (1 << shift >> 1)) >> shift;
+  ms = (max + (1 << shift >> 1)) >> shift;
+  OD_ASSERT(max == -1 || xs <= ms);
+  extra = 0;
+  if (shift) extra = shift - (xs == 0);
+  xs = OD_MINI(15, xs);
+  /* Shortcut: assume it's going to cost 2 bits for the Laplace coder. */
+  if (xs == 15) extra += 2;
+  if (max == -1) {
+    return extra - OD_LOG2((double)(cdf[xs] - (xs == 0 ? 0 : cdf[xs - 1]))/
+     cdf[15]);
+  }
+  else {
+    return extra - OD_LOG2((double)(cdf[xs] - (xs == 0 ? 0 : cdf[xs - 1]))/
+     cdf[OD_MINI(ms, 15)]);
+  }
+}
+
+/*Estimates the cost of encoding a value with a given CDF.*/
+double od_encode_cdf_cost(int val, uint16_t *cdf, int n) {
+  int total_prob;
+  int prev_prob;
+  double val_prob;
+  OD_ASSERT(n > 0);
+  total_prob = cdf[n - 1];
+  if (val == 0) {
+    prev_prob = 0;
+  }
+  else {
+    prev_prob = cdf[val - 1];
+  }
+  val_prob = (cdf[val] - prev_prob) / (double)total_prob;
+  return -OD_LOG2(val_prob);
+}

diff --git a/av1/encoder/laplace_encoder.c b/av1/encoder/laplace_encoder.c
new file mode 100644
index 0000000..07dcaca
--- /dev/null
+++ b/av1/encoder/laplace_encoder.c

@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "aom_dsp/entdec.h"
+#include "aom_dsp/entenc.h"
+#include "av1/common/odintrin.h"
+#include "av1/common/pvq.h"
+#include "pvq_encoder.h"
+
+static void od_encode_pvq_split(od_ec_enc *ec, od_pvq_codeword_ctx *adapt,
+ int count, int sum, int ctx) {
+  int shift;
+  int rest;
+  int fctx;
+  if (sum == 0) return;
+  shift = OD_MAXI(0, OD_ILOG(sum) - 3);
+  if (shift) {
+    rest = count & ((1 << shift) - 1);
+    count >>= shift;
+    sum >>= shift;
+  }
+  fctx = 7*ctx + sum - 1;
+  od_encode_cdf_adapt(ec, count, adapt->pvq_split_cdf[fctx],
+   sum + 1, adapt->pvq_split_increment);
+  if (shift) od_ec_enc_bits(ec, rest, shift);
+}
+
+void od_encode_band_pvq_splits(od_ec_enc *ec, od_pvq_codeword_ctx *adapt,
+ const int *y, int n, int k, int level) {
+  int mid;
+  int i;
+  int count_right;
+  if (n <= 1 || k == 0) return;
+  if (k == 1 && n <= 16) {
+    int cdf_id;
+    int pos;
+    cdf_id = od_pvq_k1_ctx(n, level == 0);
+    for (pos = 0; !y[pos]; pos++);
+    OD_ASSERT(pos < n);
+    od_encode_cdf_adapt(ec, pos, adapt->pvq_k1_cdf[cdf_id], n,
+     adapt->pvq_k1_increment);
+  }
+  else {
+    mid = n >> 1;
+    count_right = k;
+    for (i = 0; i < mid; i++) count_right -= abs(y[i]);
+    od_encode_pvq_split(ec, adapt, count_right, k, od_pvq_size_ctx(n));
+    od_encode_band_pvq_splits(ec, adapt, y, mid, k - count_right, level + 1);
+    od_encode_band_pvq_splits(ec, adapt, y + mid, n - mid, count_right,
+     level + 1);
+  }
+}
+
+/** Encodes the tail of a Laplace-distributed variable, i.e. it doesn't
+ * do anything special for the zero case.
+ *
+ * @param [in,out] enc     range encoder
+ * @param [in]     x       variable to encode (has to be positive)
+ * @param [in]     decay   decay factor of the distribution in Q8 format,
+ * i.e. pdf ~= decay^x
+ * @param [in]     max     maximum possible value of x (used to truncate
+ * the pdf)
+ */
+void od_laplace_encode_special(od_ec_enc *enc, int x, unsigned decay, int max) {
+  int shift;
+  int xs;
+  int ms;
+  int sym;
+  const uint16_t *cdf;
+  shift = 0;
+  if (max == 0) return;
+  /* We don't want a large decay value because that would require too many
+     symbols. However, it's OK if the max is below 15. */
+  while (((max >> shift) >= 15 || max == -1) && decay > 235) {
+    decay = (decay*decay + 128) >> 8;
+    shift++;
+  }
+  OD_ASSERT(x <= max || max == -1);
+  decay = OD_MINI(decay, 254);
+  decay = OD_MAXI(decay, 2);
+  xs = x >> shift;
+  ms = max >> shift;
+  cdf = EXP_CDF_TABLE[(decay + 1) >> 1];
+  OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "decay = %d", decay));
+  do {
+    sym = OD_MINI(xs, 15);
+    {
+      int i;
+      OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "%d %d %d %d %d\n", x, xs, shift,
+       sym, max));
+      for (i = 0; i < 16; i++) {
+        OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "%d ", cdf[i]));
+      }
+      OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "\n"));
+    }
+    if (ms > 0 && ms < 15) {
+      /* Simple way of truncating the pdf when we have a bound */
+      od_ec_encode_cdf_unscaled(enc, sym, cdf, ms + 1);
+    }
+    else {
+      od_ec_encode_cdf_q15(enc, sym, cdf, 16);
+    }
+    xs -= 15;
+    ms -= 15;
+  }
+  while (sym >= 15 && ms != 0);
+  if (shift) od_ec_enc_bits(enc, x & ((1 << shift) - 1), shift);
+}
+
+/** Encodes a Laplace-distributed variable for use in PVQ
+ *
+ * @param [in,out] enc  range encoder
+ * @param [in]     x    variable to encode (including sign)
+ * @param [in]     ExQ8 expectation of the absolute value of x in Q8
+ * @param [in]     K    maximum value of |x|
+ */
+void od_laplace_encode(od_ec_enc *enc, int x, int ex_q8, int k) {
+  int j;
+  int shift;
+  int xs;
+  uint16_t cdf[16];
+  int sym;
+  int decay;
+  int offset;
+  /* shift down x if expectation is too high */
+  shift = OD_ILOG(ex_q8) - 11;
+  if (shift < 0) shift = 0;
+  /* Apply the shift with rounding to Ex, K and xs */
+  ex_q8 = (ex_q8 + (1 << shift >> 1)) >> shift;
+  k = (k + (1 << shift >> 1)) >> shift;
+  xs = (x + (1 << shift >> 1)) >> shift;
+  decay = OD_MINI(254, 256*ex_q8/(ex_q8 + 256));
+  offset = LAPLACE_OFFSET[(decay + 1) >> 1];
+  for (j = 0; j < 16; j++) {
+    cdf[j] = EXP_CDF_TABLE[(decay + 1) >> 1][j] - offset;
+  }
+  sym = xs;
+  if (sym > 15) sym = 15;
+  /* Simple way of truncating the pdf when we have a bound */
+  if (k != 0) od_ec_encode_cdf_unscaled(enc, sym, cdf, OD_MINI(k + 1, 16));
+  if (shift) {
+    int special;
+    /* Because of the rounding, there's only half the number of possibilities
+       for xs=0 */
+    special = xs == 0;
+    if (shift - special > 0) {
+      od_ec_enc_bits(enc, x - (xs << shift) + (!special << (shift - 1)),
+       shift - special);
+    }
+  }
+  /* Handle the exponentially-decaying tail of the distribution */
+  OD_ASSERT(xs - 15 <= k - 15);
+  if (xs >= 15) od_laplace_encode_special(enc, xs - 15, decay, k - 15);
+}
+
+static void laplace_encode_vector_delta(od_ec_enc *enc, const od_coeff *y, int n, int k,
+                                        int32_t *curr, const int32_t *means) {
+  int i;
+  int prev;
+  int sum_ex;
+  int sum_c;
+  int first;
+  int k_left;
+  int coef;
+  prev = 0;
+  sum_ex = 0;
+  sum_c = 0;
+  first = 1;
+  k_left = k;
+  coef = 256*means[OD_ADAPT_COUNT_Q8]/
+   (1 + means[OD_ADAPT_COUNT_EX_Q8]);
+  coef = OD_MAXI(coef, 1);
+  for (i = 0; i < n; i++) {
+    if (y[i] != 0) {
+      int j;
+      int count;
+      int mag;
+      mag = abs(y[i]);
+      count = i - prev;
+      if (first) {
+        int decay;
+        int ex = coef*(n - prev)/k_left;
+        if (ex > 65280) decay = 255;
+        else {
+          decay = OD_MINI(255,
+           (int)((256*ex/(ex + 256) + (ex>>5)*ex/((n + 1)*(n - 1)*(n - 1)))));
+        }
+        /*Update mean position.*/
+        OD_ASSERT(count <= n - 1);
+        od_laplace_encode_special(enc, count, decay, n - 1);
+        first = 0;
+      }
+      else od_laplace_encode(enc, count, coef*(n - prev)/k_left, n - prev - 1);
+      sum_ex += 256*(n - prev);
+      sum_c += count*k_left;
+      od_ec_enc_bits(enc, y[i] < 0, 1);
+      for (j = 0; j < mag - 1; j++) {
+        od_laplace_encode(enc, 0, coef*(n - i)/(k_left - 1 - j), n - i - 1);
+        sum_ex += 256*(n - i);
+      }
+      k_left -= mag;
+      prev = i;
+      if (k_left == 0) break;
+    }
+  }
+  if (k > 0) {
+    curr[OD_ADAPT_COUNT_Q8] = 256*sum_c;
+    curr[OD_ADAPT_COUNT_EX_Q8] = sum_ex;
+  }
+  else {
+    curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE;
+    curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE;
+  }
+  curr[OD_ADAPT_K_Q8] = 0;
+  curr[OD_ADAPT_SUM_EX_Q8] = 0;
+}
+
+/** Encodes a vector of integers assumed to come from rounding a sequence of
+ * Laplace-distributed real values in decreasing order of variance.
+ *
+ * @param [in,out] enc range encoder
+ * @param [in]     y     vector to encode
+ * @param [in]     N     dimension of the vector
+ * @param [in]     K     sum of the absolute value of components of y
+ * @param [out]    curr  Adaptation context output, may alias means.
+ * @param [in]     means Adaptation context input.
+ */
+void od_laplace_encode_vector(od_ec_enc *enc, const od_coeff *y, int n, int k,
+                           int32_t *curr, const int32_t *means) {
+  int i;
+  int sum_ex;
+  int kn;
+  int exp_q8;
+  int mean_k_q8;
+  int mean_sum_ex_q8;
+  int ran_delta;
+  ran_delta = 0;
+  if (k <= 1) {
+    laplace_encode_vector_delta(enc, y, n, k, curr, means);
+    return;
+  }
+  sum_ex = 0;
+  kn = k;
+  /* Estimates the factor relating pulses_left and positions_left to E(|x|) */
+  mean_k_q8 = means[OD_ADAPT_K_Q8];
+  mean_sum_ex_q8 = means[OD_ADAPT_SUM_EX_Q8];
+  if (mean_k_q8 < 1 << 23) exp_q8 = 256*mean_k_q8/(1 + mean_sum_ex_q8);
+  else exp_q8 = mean_k_q8/(1 + (mean_sum_ex_q8 >> 8));
+  for (i = 0; i < n; i++) {
+    int ex;
+    int x;
+    if (kn == 0) break;
+    if (kn <= 1 && i != n - 1) {
+      laplace_encode_vector_delta(enc, y + i, n - i, kn, curr, means);
+      ran_delta = 1;
+      break;
+    }
+    x = abs(y[i]);
+    /* Expected value of x (round-to-nearest) is
+       expQ8*pulses_left/positions_left */
+    ex = (2*exp_q8*kn + (n - i))/(2*(n - i));
+    if (ex > kn*256) ex = kn*256;
+    sum_ex += (2*256*kn + (n - i))/(2*(n - i));
+    /* No need to encode the magnitude for the last bin. */
+    if (i != n - 1) od_laplace_encode(enc, x, ex, kn);
+    if (x != 0) od_ec_enc_bits(enc, y[i] < 0, 1);
+    kn -= x;
+  }
+  /* Adapting the estimates for expQ8 */
+  if (!ran_delta) {
+    curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE;
+    curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE;
+  }
+  curr[OD_ADAPT_K_Q8] = k - kn;
+  curr[OD_ADAPT_SUM_EX_Q8] = sum_ex;
+}

diff --git a/av1/encoder/pvq_encoder.c b/av1/encoder/pvq_encoder.c
new file mode 100644
index 0000000..2d8340d
--- /dev/null
+++ b/av1/encoder/pvq_encoder.c

@@ -0,0 +1,1016 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "aom_dsp/entcode.h"
+#include "aom_dsp/entenc.h"
+#include "av1/common/blockd.h"
+#include "av1/common/odintrin.h"
+#include "av1/common/partition.h"
+#include "av1/common/pvq_state.h"
+#include "av1/encoder/encodemb.h"
+#include "av1/encoder/pvq_encoder.h"
+
+#define OD_PVQ_RATE_APPROX (0)
+/*Shift to ensure that the upper bound (i.e. for the max blocksize) of the
+   dot-product of the 1st band of chroma with the luma ref doesn't overflow.*/
+#define OD_CFL_FLIP_SHIFT (OD_LIMIT_BSIZE_MAX + 0)
+
+static void od_encode_pvq_codeword(od_ec_enc *ec, od_pvq_codeword_ctx *adapt,
+ const od_coeff *in, int n, int k) {
+  int i;
+  od_encode_band_pvq_splits(ec, adapt, in, n, k, 0);
+  for (i = 0; i < n; i++) if (in[i]) od_ec_enc_bits(ec, in[i] < 0, 1);
+}
+
+/* Computes 1/sqrt(i) using a table for small values. */
+static double od_rsqrt_table(int i) {
+  static double table[16] = {
+    1.000000, 0.707107, 0.577350, 0.500000,
+    0.447214, 0.408248, 0.377964, 0.353553,
+    0.333333, 0.316228, 0.301511, 0.288675,
+    0.277350, 0.267261, 0.258199, 0.250000};
+  if (i <= 16) return table[i-1];
+  else return 1./sqrt(i);
+}
+
+/*Computes 1/sqrt(start+2*i+1) using a lookup table containing the results
+   where 0 <= i < table_size.*/
+static double od_custom_rsqrt_dynamic_table(const double* table,
+ const int table_size, const double start, const int i) {
+  if (i < table_size) return table[i];
+  else return od_rsqrt_table(start + 2*i + 1);
+}
+
+/*Fills tables used in od_custom_rsqrt_dynamic_table for a given start.*/
+static void od_fill_dynamic_rqrt_table(double *table, const int table_size,
+ const double start) {
+  int i;
+  for (i = 0; i < table_size; i++)
+    table[i] = od_rsqrt_table(start + 2*i + 1);
+}
+
+/** Find the codepoint on the given PSphere closest to the desired
+ * vector. Double-precision PVQ search just to make sure our tests
+ * aren't limited by numerical accuracy.
+ *
+ * @param [in]      xcoeff  input vector to quantize (x in the math doc)
+ * @param [in]      n       number of dimensions
+ * @param [in]      k       number of pulses
+ * @param [out]     ypulse  optimal codevector found (y in the math doc)
+ * @param [out]     g2      multiplier for the distortion (typically squared
+ *                          gain units)
+ * @param [in] pvq_norm_lambda enc->pvq_norm_lambda for quantized RDO
+ * @param [in]      prev_k  number of pulses already in ypulse that we should
+ *                          reuse for the search (or 0 for a new search)
+ * @return                  cosine distance between x and y (between 0 and 1)
+ */
+static double pvq_search_rdo_double(const od_val16 *xcoeff, int n, int k,
+ od_coeff *ypulse, double g2, double pvq_norm_lambda, int prev_k) {
+  int i, j;
+  double xy;
+  double yy;
+  /* TODO - This blows our 8kB stack space budget and should be fixed when
+   converting PVQ to fixed point. */
+  double x[MAXN];
+  double xx;
+  double lambda;
+  double norm_1;
+  int rdo_pulses;
+  double delta_rate;
+  xx = xy = yy = 0;
+  for (j = 0; j < n; j++) {
+    x[j] = fabs((float)xcoeff[j]);
+    xx += x[j]*x[j];
+  }
+  norm_1 = 1./sqrt(1e-30 + xx);
+  lambda = pvq_norm_lambda/(1e-30 + g2);
+  i = 0;
+  if (prev_k > 0 && prev_k <= k) {
+    /* We reuse pulses from a previous search so we don't have to search them
+       again. */
+    for (j = 0; j < n; j++) {
+      ypulse[j] = abs(ypulse[j]);
+      xy += x[j]*ypulse[j];
+      yy += ypulse[j]*ypulse[j];
+      i += ypulse[j];
+    }
+  }
+  else if (k > 2) {
+    double l1_norm;
+    double l1_inv;
+    l1_norm = 0;
+    for (j = 0; j < n; j++) l1_norm += x[j];
+    l1_inv = 1./OD_MAXF(l1_norm, 1e-100);
+    for (j = 0; j < n; j++) {
+      double tmp;
+      tmp = k*x[j]*l1_inv;
+      ypulse[j] = OD_MAXI(0, (int)floor(tmp));
+      xy += x[j]*ypulse[j];
+      yy += ypulse[j]*ypulse[j];
+      i += ypulse[j];
+    }
+  }
+  else OD_CLEAR(ypulse, n);
+
+  /* Only use RDO on the last few pulses. This not only saves CPU, but using
+     RDO on all pulses actually makes the results worse for reasons I don't
+     fully understand. */
+  rdo_pulses = 1 + k/4;
+  /* Rough assumption for now, the last position costs about 3 bits more than
+     the first. */
+  delta_rate = 3./n;
+  /* Search one pulse at a time */
+  for (; i < k - rdo_pulses; i++) {
+    int pos;
+    double best_xy;
+    double best_yy;
+    pos = 0;
+    best_xy = -10;
+    best_yy = 1;
+    for (j = 0; j < n; j++) {
+      double tmp_xy;
+      double tmp_yy;
+      tmp_xy = xy + x[j];
+      tmp_yy = yy + 2*ypulse[j] + 1;
+      tmp_xy *= tmp_xy;
+      if (j == 0 || tmp_xy*best_yy > best_xy*tmp_yy) {
+        best_xy = tmp_xy;
+        best_yy = tmp_yy;
+        pos = j;
+      }
+    }
+    xy = xy + x[pos];
+    yy = yy + 2*ypulse[pos] + 1;
+    ypulse[pos]++;
+  }
+  /* Search last pulses with RDO. Distortion is D = (x-y)^2 = x^2 - 2*x*y + y^2
+     and since x^2 and y^2 are constant, we just maximize x*y, plus a
+     lambda*rate term. Note that since x and y aren't normalized here,
+     we need to divide by sqrt(x^2)*sqrt(y^2). */
+  for (; i < k; i++) {
+    double rsqrt_table[4];
+    int rsqrt_table_size = 4;
+    int pos;
+    double best_cost;
+    pos = 0;
+    best_cost = -1e5;
+    /*Fill the small rsqrt lookup table with inputs relative to yy.
+      Specifically, the table of n values is filled with
+       rsqrt(yy + 1), rsqrt(yy + 2 + 1) .. rsqrt(yy + 2*(n-1) + 1).*/
+    od_fill_dynamic_rqrt_table(rsqrt_table, rsqrt_table_size, yy);
+    for (j = 0; j < n; j++) {
+      double tmp_xy;
+      double tmp_yy;
+      tmp_xy = xy + x[j];
+      /*Calculate rsqrt(yy + 2*ypulse[j] + 1) using an optimized method.*/
+      tmp_yy = od_custom_rsqrt_dynamic_table(rsqrt_table, rsqrt_table_size,
+       yy, ypulse[j]);
+      tmp_xy = 2*tmp_xy*norm_1*tmp_yy - lambda*j*delta_rate;
+      if (j == 0 || tmp_xy > best_cost) {
+        best_cost = tmp_xy;
+        pos = j;
+      }
+    }
+    xy = xy + x[pos];
+    yy = yy + 2*ypulse[pos] + 1;
+    ypulse[pos]++;
+  }
+  for (i = 0; i < n; i++) {
+    if (xcoeff[i] < 0) ypulse[i] = -ypulse[i];
+  }
+  return xy/(1e-100 + sqrt(xx*yy));
+}
+
+/** Encodes the gain so that the return value increases with the
+ * distance |x-ref|, so that we can encode a zero when x=ref. The
+ * value x=0 is not covered because it is only allowed in the noref
+ * case.
+ *
+ * @param [in]      x      quantized gain to encode
+ * @param [in]      ref    quantized gain of the reference
+ * @return                 interleave-encoded quantized gain value
+ */
+static int neg_interleave(int x, int ref) {
+  if (x < ref) return -2*(x - ref) - 1;
+  else if (x < 2*ref) return 2*(x - ref);
+  else return x-1;
+}
+
+int od_vector_is_null(const od_coeff *x, int len) {
+  int i;
+  for (i = 0; i < len; i++) if (x[i]) return 0;
+  return 1;
+}
+
+static double od_pvq_rate(int qg, int icgr, int theta, int ts,
+ const od_adapt_ctx *adapt, const od_coeff *y0, int k, int n,
+ int is_keyframe, int pli, int speed) {
+  double rate;
+  if (k == 0) rate = 0;
+  else if (speed > 0) {
+    int i;
+    int sum;
+    double f;
+    /* Compute "center of mass" of the pulse vector. */
+    sum = 0;
+    for (i = 0; i < n - (theta != -1); i++) sum += i*abs(y0[i]);
+    f = sum/(double)(k*n);
+    /* Estimates the number of bits it will cost to encode K pulses in
+       N dimensions based on hand-tuned fit for bitrate vs K, N and
+       "center of mass". */
+    rate = (1 + .4*f)*n*OD_LOG2(1 + OD_MAXF(0, log(n*2*(1*f + .025))*k/n)) + 3;
+  }
+  else {
+    od_ec_enc ec;
+    od_pvq_codeword_ctx cd;
+    int tell;
+    od_ec_enc_init(&ec, 1000);
+    OD_COPY(&cd, &adapt->pvq.pvq_codeword_ctx, 1);
+    tell = od_ec_enc_tell_frac(&ec);
+    od_encode_pvq_codeword(&ec, &cd, y0, n - (theta != -1), k);
+    rate = (od_ec_enc_tell_frac(&ec)-tell)/8.;
+    od_ec_enc_clear(&ec);
+  }
+  if (qg > 0 && theta >= 0) {
+    /* Approximate cost of entropy-coding theta */
+    rate += .9*OD_LOG2(ts);
+    /* Adding a cost to using the H/V pred because it's going to be off
+       most of the time. Cost is optimized on subset1, while making
+       sure we don't hurt the checkerboard image too much.
+       FIXME: Do real RDO instead of this arbitrary cost. */
+    if (is_keyframe && pli == 0) rate += 6;
+    if (qg == icgr) rate -= .5;
+  }
+  return rate;
+}
+
+#define MAX_PVQ_ITEMS (20)
+/* This stores the information about a PVQ search candidate, so we can sort
+   based on K. */
+typedef struct {
+  int gain;
+  int k;
+  od_val32 qtheta;
+  int theta;
+  int ts;
+  od_val32 qcg;
+} pvq_search_item;
+
+int items_compare(pvq_search_item *a, pvq_search_item *b) {
+  return a->k - b->k;
+}
+
+/** Perform PVQ quantization with prediction, trying several
+ * possible gains and angles. See draft-valin-videocodec-pvq and
+ * http://jmvalin.ca/slides/pvq.pdf for more details.
+ *
+ * @param [out]    out       coefficients after quantization
+ * @param [in]     x0        coefficients before quantization
+ * @param [in]     r0        reference, aka predicted coefficients
+ * @param [in]     n         number of dimensions
+ * @param [in]     q0        quantization step size
+ * @param [out]    y         pulse vector (i.e. selected PVQ codevector)
+ * @param [out]    itheta    angle between input and reference (-1 if noref)
+ * @param [out]    max_theta maximum value of itheta that could have been
+ * @param [out]    vk        total number of pulses
+ * @param [in]     beta      per-band activity masking beta param
+ * @param [out]    skip_diff distortion cost of skipping this block
+ *                           (accumulated)
+ * @param [in]     robust    make stream robust to error in the reference
+ * @param [in]     is_keyframe whether we're encoding a keyframe
+ * @param [in]     pli       plane index
+ * @param [in]     adapt     probability adaptation context
+ * @param [in]     qm        QM with magnitude compensation
+ * @param [in]     qm_inv    Inverse of QM with magnitude compensation
+ * @param [in] pvq_norm_lambda enc->pvq_norm_lambda for quantized RDO
+ * @param [in]     speed     Make search faster by making approximations
+ * @return         gain      index of the quatized gain
+*/
+static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
+ int n, int q0, od_coeff *y, int *itheta, int *max_theta, int *vk,
+ od_val16 beta, double *skip_diff, int robust, int is_keyframe, int pli,
+ const od_adapt_ctx *adapt, const int16_t *qm,
+ const int16_t *qm_inv, double pvq_norm_lambda, int speed) {
+  od_val32 g;
+  od_val32 gr;
+  od_coeff y_tmp[MAXN];
+  int i;
+  /* Number of pulses. */
+  int k;
+  /* Companded gain of x and reference, normalized to q. */
+  od_val32 cg;
+  od_val32 cgr;
+  int icgr;
+  int qg;
+  /* Best RDO cost (D + lamdba*R) so far. */
+  double best_cost;
+  double dist0;
+  /* Distortion (D) that corresponds to the best RDO cost. */
+  double best_dist;
+  double dist;
+  /* Sign of Householder reflection. */
+  int s;
+  /* Dimension on which Householder reflects. */
+  int m;
+  od_val32 theta;
+  double corr;
+  int best_k;
+  od_val32 best_qtheta;
+  od_val32 gain_offset;
+  int noref;
+  double skip_dist;
+  int cfl_enabled;
+  int skip;
+  double gain_weight;
+  od_val16 x16[MAXN];
+  od_val16 r16[MAXN];
+  int xshift;
+  int rshift;
+  /* Give more weight to gain error when calculating the total distortion. */
+  gain_weight = 1.0;
+  OD_ASSERT(n > 1);
+  corr = 0;
+#if !defined(OD_FLOAT_PVQ)
+  /* Shift needed to make x fit in 16 bits even after rotation.
+     This shift value is not normative (it can be changed without breaking
+     the bitstream) */
+  xshift = OD_MAXI(0, od_vector_log_mag(x0, n) - 15);
+  /* Shift needed to make the reference fit in 15 bits, so that the Householder
+     vector can fit in 16 bits.
+     This shift value *is* normative, and has to match the decoder. */
+  rshift = OD_MAXI(0, od_vector_log_mag(r0, n) - 14);
+#else
+  xshift = 0;
+  rshift = 0;
+#endif
+  for (i = 0; i < n; i++) {
+#if defined(OD_FLOAT_PVQ)
+    /*This is slightly different from the original float PVQ code,
+       where the qm was applied in the accumulation in od_pvq_compute_gain and
+       the vectors were od_coeffs, not od_val16 (i.e. double).*/
+    x16[i] = x0[i]*(double)qm[i]*OD_QM_SCALE_1;
+    r16[i] = r0[i]*(double)qm[i]*OD_QM_SCALE_1;
+#else
+    x16[i] = OD_SHR_ROUND(x0[i]*qm[i], OD_QM_SHIFT + xshift);
+    r16[i] = OD_SHR_ROUND(r0[i]*qm[i], OD_QM_SHIFT + rshift);
+#endif
+    corr += OD_MULT16_16(x16[i], r16[i]);
+  }
+  cfl_enabled = is_keyframe && pli != 0 && !OD_DISABLE_CFL;
+  cg  = od_pvq_compute_gain(x16, n, q0, &g, beta, xshift);
+  cgr = od_pvq_compute_gain(r16, n, q0, &gr, beta, rshift);
+  if (cfl_enabled) cgr = OD_CGAIN_SCALE;
+  /* gain_offset is meant to make sure one of the quantized gains has
+     exactly the same gain as the reference. */
+#if defined(OD_FLOAT_PVQ)
+  icgr = (int)floor(.5 + cgr);
+#else
+  icgr = OD_SHR_ROUND(cgr, OD_CGAIN_SHIFT);
+#endif
+  gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT);
+  /* Start search with null case: gain=0, no pulse. */
+  qg = 0;
+  dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2;
+  best_dist = dist;
+  best_cost = dist + pvq_norm_lambda*od_pvq_rate(0, 0, -1, 0, adapt, NULL, 0,
+   n, is_keyframe, pli, speed);
+  noref = 1;
+  best_k = 0;
+  *itheta = -1;
+  *max_theta = 0;
+  OD_CLEAR(y, n);
+  best_qtheta = 0;
+  m = 0;
+  s = 1;
+  corr = corr/(1e-100 + g*(double)gr/OD_SHL(1, xshift + rshift));
+  corr = OD_MAXF(OD_MINF(corr, 1.), -1.);
+  if (is_keyframe) skip_dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2;
+  else {
+    skip_dist = gain_weight*(cg - cgr)*(cg - cgr)
+     + cgr*(double)cg*(2 - 2*corr);
+    skip_dist *= OD_CGAIN_SCALE_2;
+  }
+  if (!is_keyframe) {
+    /* noref, gain=0 isn't allowed, but skip is allowed. */
+    od_val32 scgr;
+    scgr = OD_MAXF(0,gain_offset);
+    if (icgr == 0) {
+      best_dist = gain_weight*(cg - scgr)*(cg - scgr)
+       + scgr*(double)cg*(2 - 2*corr);
+      best_dist *= OD_CGAIN_SCALE_2;
+    }
+    best_cost = best_dist + pvq_norm_lambda*od_pvq_rate(0, icgr, 0, 0, adapt,
+     NULL, 0, n, is_keyframe, pli, speed);
+    best_qtheta = 0;
+    *itheta = 0;
+    *max_theta = 0;
+    noref = 0;
+  }
+  dist0 = best_dist;
+  if (n <= OD_MAX_PVQ_SIZE && !od_vector_is_null(r0, n) && corr > 0) {
+    od_val16 xr[MAXN];
+    int gain_bound;
+    int prev_k;
+    pvq_search_item items[MAX_PVQ_ITEMS];
+    int idx;
+    int nitems;
+    double cos_dist;
+    idx = 0;
+    gain_bound = OD_SHR(cg - gain_offset, OD_CGAIN_SHIFT);
+    /* Perform theta search only if prediction is useful. */
+    theta = OD_ROUND32(OD_THETA_SCALE*acos(corr));
+    m = od_compute_householder(r16, n, gr, &s, rshift);
+    od_apply_householder(xr, x16, r16, n);
+    prev_k = 0;
+    for (i = m; i < n - 1; i++) xr[i] = xr[i + 1];
+    /* Compute all candidate PVQ searches within a reasonable range of gain
+       and theta. */
+    for (i = OD_MAXI(1, gain_bound - 1); i <= gain_bound + 1; i++) {
+      int j;
+      od_val32 qcg;
+      int ts;
+      int theta_lower;
+      int theta_upper;
+      /* Quantized companded gain */
+      qcg = OD_SHL(i, OD_CGAIN_SHIFT) + gain_offset;
+      /* Set angular resolution (in ra) to match the encoded gain */
+      ts = od_pvq_compute_max_theta(qcg, beta);
+      theta_lower = OD_MAXI(0, (int)floor(.5 +
+       theta*OD_THETA_SCALE_1*2/M_PI*ts) - 2);
+      theta_upper = OD_MINI(ts - 1, (int)ceil(theta*OD_THETA_SCALE_1*2/M_PI*ts));
+      /* Include the angles within a reasonable range. */
+      for (j = theta_lower; j <= theta_upper; j++) {
+        od_val32 qtheta;
+        qtheta = od_pvq_compute_theta(j, ts);
+        k = od_pvq_compute_k(qcg, j, qtheta, 0, n, beta, robust || is_keyframe);
+        items[idx].gain = i;
+        items[idx].theta = j;
+        items[idx].k = k;
+        items[idx].qcg = qcg;
+        items[idx].qtheta = qtheta;
+        items[idx].ts = ts;
+        idx++;
+        OD_ASSERT(idx < MAX_PVQ_ITEMS);
+      }
+    }
+    nitems = idx;
+    cos_dist = 0;
+    /* Sort PVQ search candidates in ascending order of pulses K so that
+       we can reuse all the previously searched pulses across searches. */
+    qsort(items, nitems, sizeof(items[0]),
+     (int (*)(const void *, const void *))items_compare);
+    /* Search for the best gain/theta in order. */
+    for (idx = 0; idx < nitems; idx++) {
+      int j;
+      od_val32 qcg;
+      int ts;
+      double cost;
+      double dist_theta;
+      double sin_prod;
+      od_val32 qtheta;
+      /* Quantized companded gain */
+      qcg = items[idx].qcg;
+      i = items[idx].gain;
+      j = items[idx].theta;
+      /* Set angular resolution (in ra) to match the encoded gain */
+      ts = items[idx].ts;
+      /* Search for the best angle within a reasonable range. */
+      qtheta = items[idx].qtheta;
+      k = items[idx].k;
+      /* Compute the minimal possible distortion by not taking the PVQ
+         cos_dist into account. */
+      dist_theta = 2 - 2.*od_pvq_cos(theta - qtheta)*OD_TRIG_SCALE_1;
+      dist = gain_weight*(qcg - cg)*(qcg - cg) + qcg*(double)cg*dist_theta;
+      dist *= OD_CGAIN_SCALE_2;
+      /* If we have no hope of beating skip (including a 1-bit worst-case
+         penalty), stop now. */
+      if (dist > dist0 + 1.0*pvq_norm_lambda && k != 0) continue;
+      sin_prod = od_pvq_sin(theta)*OD_TRIG_SCALE_1*od_pvq_sin(qtheta)*
+       OD_TRIG_SCALE_1;
+      /* PVQ search, using a gain of qcg*cg*sin(theta)*sin(qtheta) since
+         that's the factor by which cos_dist is multiplied to get the
+         distortion metric. */
+      if (k == 0) {
+        cos_dist = 0;
+        OD_CLEAR(y_tmp, n-1);
+      }
+      else if (k != prev_k) {
+        cos_dist = pvq_search_rdo_double(xr, n - 1, k, y_tmp,
+         qcg*(double)cg*sin_prod*OD_CGAIN_SCALE_2, pvq_norm_lambda, prev_k);
+      }
+      prev_k = k;
+      /* See Jmspeex' Journal of Dubious Theoretical Results. */
+      dist_theta = 2 - 2.*od_pvq_cos(theta - qtheta)*OD_TRIG_SCALE_1
+       + sin_prod*(2 - 2*cos_dist);
+      dist = gain_weight*(qcg - cg)*(qcg - cg) + qcg*(double)cg*dist_theta;
+      dist *= OD_CGAIN_SCALE_2;
+      /* Do approximate RDO. */
+      cost = dist + pvq_norm_lambda*od_pvq_rate(i, icgr, j, ts, adapt, y_tmp,
+       k, n, is_keyframe, pli, speed);
+      if (cost < best_cost) {
+        best_cost = cost;
+        best_dist = dist;
+        qg = i;
+        best_k = k;
+        best_qtheta = qtheta;
+        *itheta = j;
+        *max_theta = ts;
+        noref = 0;
+        OD_COPY(y, y_tmp, n - 1);
+      }
+    }
+  }
+  /* Don't bother with no-reference version if there's a reasonable
+     correlation. The only exception is luma on a keyframe because
+     H/V prediction is unreliable. */
+  if (n <= OD_MAX_PVQ_SIZE &&
+   ((is_keyframe && pli == 0) || corr < .5
+   || cg < (od_val32)(OD_SHL(2, OD_CGAIN_SHIFT)))) {
+    int gain_bound;
+    int prev_k;
+    gain_bound = OD_SHR(cg, OD_CGAIN_SHIFT);
+    prev_k = 0;
+    /* Search for the best gain (haven't determined reasonable range yet). */
+    for (i = OD_MAXI(1, gain_bound); i <= gain_bound + 1; i++) {
+      double cos_dist;
+      double cost;
+      od_val32 qcg;
+      qcg = OD_SHL(i, OD_CGAIN_SHIFT);
+      k = od_pvq_compute_k(qcg, -1, -1, 1, n, beta, robust || is_keyframe);
+      /* Compute the minimal possible distortion by not taking the PVQ
+         cos_dist into account. */
+      dist = gain_weight*(qcg - cg)*(qcg - cg);
+      dist *= OD_CGAIN_SCALE_2;
+      if (dist > dist0 && k != 0) continue;
+      cos_dist = pvq_search_rdo_double(x16, n, k, y_tmp,
+       qcg*(double)cg*OD_CGAIN_SCALE_2, pvq_norm_lambda, prev_k);
+      prev_k = k;
+      /* See Jmspeex' Journal of Dubious Theoretical Results. */
+      dist = gain_weight*(qcg - cg)*(qcg - cg)
+       + qcg*(double)cg*(2 - 2*cos_dist);
+      dist *= OD_CGAIN_SCALE_2;
+      /* Do approximate RDO. */
+      cost = dist + pvq_norm_lambda*od_pvq_rate(i, 0, -1, 0, adapt, y_tmp, k,
+       n, is_keyframe, pli, speed);
+      if (cost <= best_cost) {
+        best_cost = cost;
+        best_dist = dist;
+        qg = i;
+        noref = 1;
+        best_k = k;
+        *itheta = -1;
+        *max_theta = 0;
+        OD_COPY(y, y_tmp, n);
+      }
+    }
+  }
+  k = best_k;
+  theta = best_qtheta;
+  skip = 0;
+  if (noref) {
+    if (qg == 0) skip = OD_PVQ_SKIP_ZERO;
+  }
+  else {
+    if (!is_keyframe && qg == 0) {
+      skip = (icgr ? OD_PVQ_SKIP_ZERO : OD_PVQ_SKIP_COPY);
+    }
+    if (qg == icgr && *itheta == 0 && !cfl_enabled) skip = OD_PVQ_SKIP_COPY;
+  }
+  /* Synthesize like the decoder would. */
+  if (skip) {
+    if (skip == OD_PVQ_SKIP_COPY) OD_COPY(out, r0, n);
+    else OD_CLEAR(out, n);
+  }
+  else {
+    if (noref) gain_offset = 0;
+    g = od_gain_expand(OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset, q0, beta);
+    od_pvq_synthesis_partial(out, y, r16, n, noref, g, theta, m, s,
+     qm_inv);
+  }
+  *vk = k;
+  *skip_diff += skip_dist - best_dist;
+  /* Encode gain differently depending on whether we use prediction or not.
+     Special encoding on inter frames where qg=0 is allowed for noref=0
+     but not noref=1.*/
+  if (is_keyframe) return noref ? qg : neg_interleave(qg, icgr);
+  else return noref ? qg - 1 : neg_interleave(qg + 1, icgr + 1);
+}
+
+/** Encodes a single vector of integers (eg, a partition within a
+ *  coefficient block) using PVQ
+ *
+ * @param [in,out] ec         range encoder
+ * @param [in]     qg         quantized gain
+ * @param [in]     theta      quantized post-prediction theta
+ * @param [in]     max_theta  maximum possible quantized theta value
+ * @param [in]     in         coefficient vector to code
+ * @param [in]     n          number of coefficients in partition
+ * @param [in]     k          number of pulses in partition
+ * @param [in,out] model      entropy encoder state
+ * @param [in,out] adapt      adaptation context
+ * @param [in,out] exg        ExQ16 expectation of gain value
+ * @param [in,out] ext        ExQ16 expectation of theta value
+ * @param [in]     nodesync   do not use info that depend on the reference
+ * @param [in]     cdf_ctx    selects which cdf context to use
+ * @param [in]     is_keyframe whether we're encoding a keyframe
+ * @param [in]     code_skip  whether the "skip rest" flag is allowed
+ * @param [in]     skip_rest  when set, we skip all higher bands
+ * @param [in]     encode_flip whether we need to encode the CfL flip flag now
+ * @param [in]     flip       value of the CfL flip flag
+ */
+void pvq_encode_partition(od_ec_enc *ec,
+                                 int qg,
+                                 int theta,
+                                 int max_theta,
+                                 const od_coeff *in,
+                                 int n,
+                                 int k,
+                                 generic_encoder model[3],
+                                 od_adapt_ctx *adapt,
+                                 int *exg,
+                                 int *ext,
+                                 int nodesync,
+                                 int cdf_ctx,
+                                 int is_keyframe,
+                                 int code_skip,
+                                 int skip_rest,
+                                 int encode_flip,
+                                 int flip) {
+  int noref;
+  int id;
+  noref = (theta == -1);
+  id = (qg > 0) + 2*OD_MINI(theta + 1,3) + 8*code_skip*skip_rest;
+  if (is_keyframe) {
+    OD_ASSERT(id != 8);
+    if (id >= 8) id--;
+  }
+  else {
+    OD_ASSERT(id != 10);
+    if (id >= 10) id--;
+  }
+  /* Jointly code gain, theta and noref for small values. Then we handle
+     larger gain and theta values. For noref, theta = -1. */
+  od_encode_cdf_adapt(ec, id, &adapt->pvq.pvq_gaintheta_cdf[cdf_ctx][0],
+   8 + 7*code_skip, adapt->pvq.pvq_gaintheta_increment);
+  if (encode_flip) {
+    /* We could eventually do some smarter entropy coding here, but it would
+       have to be good enough to overcome the overhead of the entropy coder.
+       An early attempt using a "toogle" flag with simple adaptation wasn't
+       worth the trouble. */
+    od_ec_enc_bits(ec, flip, 1);
+  }
+  if (qg > 0) {
+    int tmp;
+    tmp = *exg;
+    generic_encode(ec, &model[!noref], qg - 1, -1, &tmp, 2);
+    OD_IIR_DIADIC(*exg, qg << 16, 2);
+  }
+  if (theta > 1 && (nodesync || max_theta > 3)) {
+    int tmp;
+    tmp = *ext;
+    generic_encode(ec, &model[2], theta - 2, nodesync ? -1 : max_theta - 3,
+     &tmp, 2);
+    OD_IIR_DIADIC(*ext, theta << 16, 2);
+  }
+  od_encode_pvq_codeword(ec, &adapt->pvq.pvq_codeword_ctx, in,
+   n - (theta != -1), k);
+}
+
+/** Quantizes a scalar with rate-distortion optimization (RDO)
+ * @param [in] x      unquantized value
+ * @param [in] q      quantization step size
+ * @param [in] delta0 rate increase for encoding a 1 instead of a 0
+ * @param [in] pvq_norm_lambda enc->pvq_norm_lambda for quantized RDO
+ * @retval quantized value
+ */
+int od_rdo_quant(od_coeff x, int q, double delta0, double pvq_norm_lambda) {
+  int n;
+  /* Optimal quantization threshold is 1/2 + lambda*delta_rate/2. See
+     Jmspeex' Journal of Dubious Theoretical Results for details. */
+  n = OD_DIV_R0(abs(x), q);
+  if ((double)abs(x)/q < (double)n/2 + pvq_norm_lambda*delta0/(2*n)) {
+    return 0;
+  }
+  else {
+    return OD_DIV_R0(x, q);
+  }
+}
+
+#if OD_SIGNAL_Q_SCALING
+void od_encode_quantizer_scaling(daala_enc_ctx *enc, int q_scaling,
+ int sbx, int sby, int skip) {
+  int nhsb;
+  OD_ASSERT(skip == !!skip);
+  nhsb = enc->state.nhsb;
+  OD_ASSERT(sbx < nhsb);
+  OD_ASSERT(sby < enc->state.nvsb);
+  OD_ASSERT(!skip || q_scaling == 0);
+  enc->state.sb_q_scaling[sby*nhsb + sbx] = q_scaling;
+  if (!skip) {
+    int above;
+    int left;
+    /* use value from neighbour if possible, otherwise use 0 */
+    above = sby > 0 ? enc->state.sb_q_scaling[(sby - 1)*enc->state.nhsb + sbx]
+     : 0;
+    left = sbx > 0 ? enc->state.sb_q_scaling[sby*enc->state.nhsb + (sbx - 1)]
+     : 0;
+    od_encode_cdf_adapt(&enc->ec, q_scaling,
+     enc->state.adapt.q_cdf[above + left*4], 4,
+     enc->state.adapt.q_increment);
+  }
+}
+#endif
+
+/** Encode a coefficient block (excepting DC) using PVQ
+ *
+ * @param [in,out] enc     daala encoder context
+ * @param [in]     ref     'reference' (prediction) vector
+ * @param [in]     in      coefficient block to quantize and encode
+ * @param [out]    out     quantized coefficient block
+ * @param [in]     q0      scale/quantizer
+ * @param [in]     pli     plane index
+ * @param [in]     bs      log of the block size minus two
+ * @param [in]     beta    per-band activity masking beta param
+ * @param [in]     robust  make stream robust to error in the reference
+ * @param [in]     is_keyframe whether we're encoding a keyframe
+ * @param [in]     q_scaling scaling factor to apply to quantizer
+ * @param [in]     bx      x-coordinate of this block
+ * @param [in]     by      y-coordinate of this block
+ * @param [in]     qm      QM with magnitude compensation
+ * @param [in]     qm_inv  Inverse of QM with magnitude compensation
+ * @param [in]     speed   Make search faster by making approximations
+ * @param [in]     pvq_info If null, conisdered as RDO search mode
+ * @return         Returns 1 if both DC and AC coefficients are skipped,
+ *                 zero otherwise
+ */
+int od_pvq_encode(daala_enc_ctx *enc,
+                   od_coeff *ref,
+                   const od_coeff *in,
+                   od_coeff *out,
+                   int q_dc,
+                   int q_ac,
+                   int pli,
+                   int bs,
+                   const od_val16 *beta,
+                   int robust,
+                   int is_keyframe,
+                   int q_scaling,
+                   int bx,
+                   int by,
+                   const int16_t *qm,
+                   const int16_t *qm_inv,
+                   int speed,
+                   PVQ_INFO *pvq_info){
+  int theta[PVQ_MAX_PARTITIONS];
+  int max_theta[PVQ_MAX_PARTITIONS];
+  int qg[PVQ_MAX_PARTITIONS];
+  int k[PVQ_MAX_PARTITIONS];
+  od_coeff y[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
+  int *exg;
+  int *ext;
+  int nb_bands;
+  int i;
+  const int *off;
+  int size[PVQ_MAX_PARTITIONS];
+  generic_encoder *model;
+  double skip_diff;
+  int tell;
+  uint16_t *skip_cdf;
+  od_rollback_buffer buf;
+  int dc_quant;
+  int flip;
+  int cfl_encoded;
+  int skip_rest;
+  int skip_dir;
+  int skip_theta_value;
+  /* const unsigned char *pvq_qm; */
+  double dc_rate;
+#if !OD_SIGNAL_Q_SCALING
+  OD_UNUSED(q_scaling);
+  OD_UNUSED(bx);
+  OD_UNUSED(by);
+#endif
+  /* TODO(yushin): Enable this for activity masking,
+     when pvq_qm_q4 is available in AOM. */
+  /* pvq_qm = &enc->state.pvq_qm_q4[pli][0]; */
+  exg = &enc->state.adapt.pvq.pvq_exg[pli][bs][0];
+  ext = enc->state.adapt.pvq.pvq_ext + bs*PVQ_MAX_PARTITIONS;
+  skip_cdf = enc->state.adapt.skip_cdf[2*bs + (pli != 0)];
+  model = enc->state.adapt.pvq.pvq_param_model;
+  nb_bands = OD_BAND_OFFSETS[bs][0];
+  off = &OD_BAND_OFFSETS[bs][1];
+  /*dc_quant = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, 0)] >> 4);*/
+  dc_quant = OD_MAXI(1, q_dc);
+  tell = 0;
+  for (i = 0; i < nb_bands; i++) size[i] = off[i+1] - off[i];
+  skip_diff = 0;
+  flip = 0;
+  /*If we are coding a chroma block of a keyframe, we are doing CfL.*/
+  if (pli != 0 && is_keyframe) {
+    od_val32 xy;
+    xy = 0;
+    /*Compute the dot-product of the first band of chroma with the luma ref.*/
+    for (i = off[0]; i < off[1]; i++) {
+#if defined(OD_FLOAT_PVQ)
+      xy += ref[i]*(double)qm[i]*OD_QM_SCALE_1*
+       (double)in[i]*(double)qm[i]*OD_QM_SCALE_1;
+#else
+      od_val32 rq;
+      od_val32 inq;
+      rq = ref[i]*qm[i];
+      inq = in[i]*qm[i];
+      xy += OD_SHR(rq*(int64_t)inq, OD_SHL(OD_QM_SHIFT + OD_CFL_FLIP_SHIFT,
+       1));
+#endif
+    }
+    /*If cos(theta) < 0, then |theta| > pi/2 and we should negate the ref.*/
+    if (xy < 0) {
+      flip = 1;
+      for(i = off[0]; i < off[nb_bands]; i++) ref[i] = -ref[i];
+    }
+  }
+  for (i = 0; i < nb_bands; i++) {
+    int q;
+    /* TODO(yushin): Enable this for activity masking,
+       when pvq_qm_q4 is available in AOM. */
+    /*q = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);*/
+    q = OD_MAXI(1, q_ac);
+    qg[i] = pvq_theta(out + off[i], in + off[i], ref + off[i], size[i],
+     q, y + off[i], &theta[i], &max_theta[i],
+     &k[i], beta[i], &skip_diff, robust, is_keyframe, pli, &enc->state.adapt,
+     qm + off[i], qm_inv + off[i], enc->pvq_norm_lambda, speed);
+  }
+  od_encode_checkpoint(enc, &buf);
+  if (is_keyframe) out[0] = 0;
+  else {
+    int n;
+    n = OD_DIV_R0(abs(in[0] - ref[0]), dc_quant);
+    if (n == 0) {
+      out[0] = 0;
+#if PVQ_CHROMA_RD
+    } else if (pli == 0) {
+#else
+    } else {
+#endif
+      int tell2;
+      od_rollback_buffer dc_buf;
+
+      dc_rate = -OD_LOG2((double)(skip_cdf[3] - skip_cdf[2])/
+       (double)(skip_cdf[2] - skip_cdf[1]));
+      dc_rate += 1;
+
+      tell2 = od_ec_enc_tell_frac(&enc->ec);
+      od_encode_checkpoint(enc, &dc_buf);
+      generic_encode(&enc->ec, &enc->state.adapt.model_dc[pli],
+       n - 1, -1, &enc->state.adapt.ex_dc[pli][bs][0], 2);
+      tell2 = od_ec_enc_tell_frac(&enc->ec) - tell2;
+      dc_rate += tell2/8.0;
+      od_encode_rollback(enc, &dc_buf);
+
+      out[0] = od_rdo_quant(in[0] - ref[0], dc_quant, dc_rate,
+       enc->pvq_norm_lambda);
+    }
+  }
+  tell = od_ec_enc_tell_frac(&enc->ec);
+  /* Code as if we're not skipping. */
+  od_encode_cdf_adapt(&enc->ec, 2 + (out[0] != 0), skip_cdf,
+   4, enc->state.adapt.skip_increment);
+  if (pvq_info)
+    pvq_info->ac_dc_coded = 2 + (out[0] != 0);
+#if OD_SIGNAL_Q_SCALING
+  if (bs == OD_TXSIZES - 1 && pli == 0) {
+    od_encode_quantizer_scaling(enc, q_scaling, bx >> (OD_TXSIZES - 1),
+     by >> (OD_TXSIZES - 1), 0);
+  }
+#endif
+  cfl_encoded = 0;
+  skip_rest = 1;
+  skip_theta_value = is_keyframe ? -1 : 0;
+  for (i = 1; i < nb_bands; i++) {
+    if (theta[i] != skip_theta_value || qg[i]) skip_rest = 0;
+  }
+  skip_dir = 0;
+  if (nb_bands > 1) {
+    for (i = 0; i < 3; i++) {
+      int j;
+      int tmp;
+      tmp = 1;
+      // ToDo(yaowu): figure out better stop condition without gcc warning.
+      for (j = i + 1; j < nb_bands && j < PVQ_MAX_PARTITIONS; j += 3) {
+        if (theta[j] != skip_theta_value || qg[j]) tmp = 0;
+      }
+      skip_dir |= tmp << i;
+    }
+  }
+  if (theta[0] == skip_theta_value && qg[0] == 0 && skip_rest) nb_bands = 0;
+
+  /* NOTE: There was no other better place to put this function. */
+  if (pvq_info)
+    av1_store_pvq_enc_info(pvq_info, qg, theta, max_theta, k,
+      y, nb_bands, off, size,
+      skip_rest, skip_dir, bs);
+
+  for (i = 0; i < nb_bands; i++) {
+    int encode_flip;
+    /* Encode CFL flip bit just after the first time it's used. */
+    encode_flip = pli != 0 && is_keyframe && theta[i] != -1 && !cfl_encoded;
+    if (i == 0 || (!skip_rest && !(skip_dir & (1 << ((i - 1)%3))))) {
+      pvq_encode_partition(&enc->ec, qg[i], theta[i], max_theta[i], y + off[i],
+       size[i], k[i], model, &enc->state.adapt, exg + i, ext + i,
+       robust || is_keyframe, (pli != 0)*OD_TXSIZES*PVQ_MAX_PARTITIONS
+       + bs*PVQ_MAX_PARTITIONS + i, is_keyframe, i == 0 && (i < nb_bands - 1),
+       skip_rest, encode_flip, flip);
+    }
+    if (i == 0 && !skip_rest && bs > 0) {
+      od_encode_cdf_adapt(&enc->ec, skip_dir,
+       &enc->state.adapt.pvq.pvq_skip_dir_cdf[(pli != 0) + 2*(bs - 1)][0], 7,
+       enc->state.adapt.pvq.pvq_skip_dir_increment);
+    }
+    if (encode_flip) cfl_encoded = 1;
+  }
+  tell = od_ec_enc_tell_frac(&enc->ec) - tell;
+  /* Account for the rate of skipping the AC, based on the same DC decision
+     we made when trying to not skip AC. */
+  {
+    double skip_rate;
+    if (out[0] != 0) {
+      skip_rate = -OD_LOG2((skip_cdf[1] - skip_cdf[0])/
+     (double)skip_cdf[3]);
+    }
+    else {
+      skip_rate = -OD_LOG2(skip_cdf[0]/
+     (double)skip_cdf[3]);
+    }
+    tell -= (int)floor(.5+8*skip_rate);
+  }
+  if (nb_bands == 0 || skip_diff <= enc->pvq_norm_lambda/8*tell) {
+    if (is_keyframe) out[0] = 0;
+    else {
+      int n;
+      n = OD_DIV_R0(abs(in[0] - ref[0]), dc_quant);
+      if (n == 0) {
+        out[0] = 0;
+#if PVQ_CHROMA_RD
+      } else if (pli == 0) {
+#else
+      } else {
+#endif
+        int tell2;
+        od_rollback_buffer dc_buf;
+
+        dc_rate = -OD_LOG2((double)(skip_cdf[1] - skip_cdf[0])/
+         (double)skip_cdf[0]);
+        dc_rate += 1;
+
+        tell2 = od_ec_enc_tell_frac(&enc->ec);
+        od_encode_checkpoint(enc, &dc_buf);
+        generic_encode(&enc->ec, &enc->state.adapt.model_dc[pli],
+         n - 1, -1, &enc->state.adapt.ex_dc[pli][bs][0], 2);
+        tell2 = od_ec_enc_tell_frac(&enc->ec) - tell2;
+        dc_rate += tell2/8.0;
+        od_encode_rollback(enc, &dc_buf);
+
+        out[0] = od_rdo_quant(in[0] - ref[0], dc_quant, dc_rate,
+         enc->pvq_norm_lambda);
+      }
+    }
+    /* We decide to skip, roll back everything as it was before. */
+    od_encode_rollback(enc, &buf);
+    od_encode_cdf_adapt(&enc->ec, out[0] != 0, skip_cdf,
+     4, enc->state.adapt.skip_increment);
+    if (pvq_info)
+      pvq_info->ac_dc_coded = (out[0] != 0);
+#if OD_SIGNAL_Q_SCALING
+    if (bs == OD_TXSIZES - 1 && pli == 0) {
+      int skip;
+      skip = out[0] == 0;
+      if (skip) {
+        q_scaling = 0;
+      }
+      od_encode_quantizer_scaling(enc, q_scaling, bx >> (OD_TXSIZES - 1),
+       by >> (OD_TXSIZES - 1), skip);
+    }
+#endif
+    if (is_keyframe) for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = 0;
+    else for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = ref[i];
+    if (out[0] == 0) return 1;
+  }
+  return 0;
+}

diff --git a/av1/encoder/pvq_encoder.h b/av1/encoder/pvq_encoder.h
new file mode 100644
index 0000000..6cf1c3b
--- /dev/null
+++ b/av1/encoder/pvq_encoder.h

@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_pvq_encoder_H)
+# define _pvq_encoder_H (1)
+# include "aom_dsp/entenc.h"
+# include "av1/common/blockd.h"
+# include "av1/common/pvq.h"
+# include "av1/encoder/encint.h"
+
+#define PVQ_CHROMA_RD 1
+
+void od_encode_band_pvq_splits(od_ec_enc *ec, od_pvq_codeword_ctx *adapt,
+ const int *y, int n, int k, int level);
+
+void od_laplace_encode_special(od_ec_enc *enc, int x, unsigned decay, int max);
+void od_laplace_encode(od_ec_enc *enc, int x, int ex_q8, int k);
+void od_laplace_encode_vector(od_ec_enc *enc, const od_coeff *y, int n, int k,
+                                  int32_t *curr, const int32_t *means);
+
+#if OD_SIGNAL_Q_SCALING
+void od_encode_quantizer_scaling(daala_enc_ctx *enc, int q_scaling, int bx,
+ int by, int skip);
+#endif
+
+void pvq_encode_partition(od_ec_enc *ec,
+                                 int qg,
+                                 int theta,
+                                 int max_theta,
+                                 const od_coeff *in,
+                                 int n,
+                                 int k,
+                                 generic_encoder model[3],
+                                 od_adapt_ctx *adapt,
+                                 int *exg,
+                                 int *ext,
+                                 int nodesync,
+                                 int cdf_ctx,
+                                 int is_keyframe,
+                                 int code_skip,
+                                 int skip_rest,
+                                 int encode_flip,
+                                 int flip);
+
+int od_pvq_encode(daala_enc_ctx *enc, od_coeff *ref, const od_coeff *in,
+ od_coeff *out, int q_dc, int q_ac, int pli, int bs, const od_val16 *beta, int robust,
+ int is_keyframe, int q_scaling, int bx, int by, const int16_t *qm,
+ const int16_t *qm_inv, int speed, PVQ_INFO *pvq_info);
+
+#endif

diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 792654a..7daa992 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -49,7 +49,9 @@
 #include "av1/encoder/rd.h"
 #include "av1/encoder/rdopt.h"
 #include "av1/encoder/tokenize.h"
-
+#if CONFIG_PVQ
+#include "av1/encoder/pvq_encoder.h"
+#endif
 #if CONFIG_DUAL_FILTER
 #if CONFIG_EXT_INTERP
 static const int filter_sets[25][2] = {
@@ -819,6 +821,33 @@
   *out_dist_sum = dist_sum;
 }
 
+#if CONFIG_PVQ
+// Without PVQ, av1_block_error_c() return two kind of errors,
+// 1) reconstruction (i.e. decoded) error and
+// 2) Squared sum of transformed residue (i.e. 'coeff')
+// However, if PVQ is enabled, coeff does not keep the transformed residue
+// but instead a transformed original is kept.
+// Hence, new parameter ref vector (i.e. transformed predicted signal)
+// is required to derive the residue signal,
+// i.e. coeff - ref = residue (all transformed).
+
+// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
+// a separate function that does not do the extra computations for ssz.
+int64_t av1_block_error2_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
+                           const tran_low_t *ref, intptr_t block_size,
+                           int64_t *ssz) {
+  int64_t error;
+
+  // Use the existing sse codes for calculating distortion of decoded signal:
+  // i.e. (orig - decoded)^2
+  error = av1_block_error_fp(coeff, dqcoeff, block_size);
+  // prediction residue^2 = (orig - ref)^2
+  *ssz = av1_block_error_fp(coeff, ref, block_size);
+
+  return error;
+}
+#endif
+
 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                           intptr_t block_size, int64_t *ssz) {
   int i;
@@ -870,6 +899,7 @@
 }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
+#if !CONFIG_PVQ
 /* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
  * decide whether to include cost of a trailing EOB node or not (i.e. we
  * can skip this if the last coefficient in this transform block, e.g. the
@@ -982,6 +1012,7 @@
 
   return cost;
 }
+#endif
 
 static void dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
                        int blk_row, int blk_col, TX_SIZE tx_size,
@@ -998,11 +1029,18 @@
     int shift = (MAX_TX_SCALE - get_tx_scale(xd, tx_type, tx_size)) * 2;
     tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
     tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+#if CONFIG_PVQ
+    tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
+#endif
 #if CONFIG_AOM_HIGHBITDEPTH
     const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
     *out_dist =
         av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
         shift;
+#elif CONFIG_PVQ
+    *out_dist = av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length,
+                                   &this_sse) >>
+                shift;
 #else
     *out_dist =
         av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
@@ -1072,6 +1110,7 @@
   }
 }
 
+#if !CONFIG_PVQ
 static int rate_block(int plane, int block, int coeff_ctx, TX_SIZE tx_size,
                       struct rdcost_block_args *args) {
   return av1_cost_coeffs(&args->cpi->common, args->x, plane, block, coeff_ctx,
@@ -1079,6 +1118,7 @@
                          args->scan_order->neighbors,
                          args->use_fast_coef_costing);
 }
+#endif
 
 static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
                                TX_SIZE tx_size) {
@@ -1194,11 +1234,13 @@
     args->exit_early = 1;
     return;
   }
-
+#if !CONFIG_PVQ
   rate = rate_block(plane, block, coeff_ctx, tx_size, args);
   args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0);
   args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0);
-
+#else
+  rate = x->rate;
+#endif
   rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
   rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
 
@@ -1214,8 +1256,11 @@
     args->exit_early = 1;
     return;
   }
-
+#if !CONFIG_PVQ
   args->skippable &= !x->plane[plane].eobs[block];
+#else
+  args->skippable &= x->pvq_skip[plane];
+#endif
 }
 
 static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, int *rate,
@@ -1552,7 +1597,14 @@
 #if CONFIG_EXT_TX
   if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
       !xd->lossless[mbmi->segment_id]) {
-    for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+#if CONFIG_PVQ
+    od_rollback_buffer pre_buf, post_buf;
+
+    od_encode_checkpoint(&x->daala_enc, &pre_buf);
+    od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
+
+    for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
       if (is_inter) {
         if (x->use_default_inter_tx_type &&
             tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
@@ -1575,7 +1627,9 @@
 
       txfm_rd_in_plane(x, cpi, &r, &d, &s, &psse, ref_best_rd, 0, bs,
                        mbmi->tx_size, cpi->sf.use_fast_coef_costing);
-
+#if CONFIG_PVQ
+      od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
       if (r == INT_MAX) continue;
       if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) {
         if (is_inter) {
@@ -1603,8 +1657,14 @@
         *rate = r;
         *skip = s;
         *sse = psse;
+#if CONFIG_PVQ
+        od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
       }
     }
+#if CONFIG_PVQ
+    od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
   } else {
     mbmi->tx_type = DCT_DCT;
     txfm_rd_in_plane(x, cpi, rate, distortion, skip, sse, ref_best_rd, 0, bs,
@@ -1691,6 +1751,9 @@
   TX_TYPE tx_type, best_tx_type = DCT_DCT;
   int prune = 0;
 
+#if CONFIG_PVQ
+  od_rollback_buffer buf;
+#endif
   if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
     // passing -1 in for tx_type indicates that all 1D
     // transforms should be considered for pruning
@@ -1701,6 +1764,10 @@
   *skip = 0;
   *psse = INT64_MAX;
 
+#if CONFIG_PVQ
+  od_encode_checkpoint(&x->daala_enc, &buf);
+#endif
+
   for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
 #if CONFIG_REF_MV
     if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
@@ -1728,6 +1795,11 @@
 #if !CONFIG_EXT_TX
   if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
 #endif
+#if CONFIG_PVQ
+  if (best_tx < TX_SIZES)
+    txfm_rd_in_plane(x, cpi, &r, &d, &s, &sse, ref_best_rd, 0, bs, best_tx,
+                     cpi->sf.use_fast_coef_costing);
+#endif
 }
 
 static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate,
@@ -1915,7 +1987,9 @@
     PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
     ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
     BLOCK_SIZE bsize, int *y_skip, int64_t rd_thresh) {
+#if !CONFIG_PVQ
   const AV1_COMMON *const cm = &cpi->common;
+#endif
   PREDICTION_MODE mode;
   MACROBLOCKD *const xd = &x->e_mbd;
   int64_t best_rd = rd_thresh;
@@ -1936,6 +2010,12 @@
   uint16_t best_dst16[8 * 8];
 #endif
 
+#if CONFIG_PVQ
+  od_rollback_buffer pre_buf, post_buf;
+  od_encode_checkpoint(&x->daala_enc, &pre_buf);
+  od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
+
   memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
   memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
   xd->mi[0]->mbmi.tx_size = TX_4X4;
@@ -2066,6 +2146,10 @@
   }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
+#if CONFIG_PVQ
+  od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
+
   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
     int64_t this_rd;
     int ratey = 0;
@@ -2089,15 +2173,54 @@
         const int block = (row + idy) * 2 + (col + idx);
         const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
         uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
+#if !CONFIG_PVQ
         int16_t *const src_diff =
             av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
+#else
+        int lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+        const int diff_stride = 8;
+        tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
+        tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, block);
+        tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
+        int16_t *pred = &pd->pred[4 * (row * diff_stride + col)];
+        int16_t *src_int16 = &p->src_int16[4 * (row * diff_stride + col)];
+        int i, j, tx_blk_size;
+        TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+        int rate_pvq;
+        int skip;
+#endif
         xd->mi[0]->bmi[block].as_mode = mode;
         av1_predict_intra_block(xd, pd->width, pd->height, TX_4X4, mode, dst,
                                 dst_stride, dst, dst_stride, col + idx,
                                 row + idy, 0);
+#if !CONFIG_PVQ
         aom_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
+#else
+        if (lossless) tx_type = DCT_DCT;
+        // transform block size in pixels
+        tx_blk_size = 4;
+
+        // copy uint8 orig and predicted block to int16 buffer
+        // in order to use existing VP10 transform functions
+        for (j = 0; j < tx_blk_size; j++)
+          for (i = 0; i < tx_blk_size; i++) {
+            src_int16[diff_stride * j + i] = src[src_stride * j + i];
+            pred[diff_stride * j + i] = dst[dst_stride * j + i];
+          }
+        {
+          FWD_TXFM_PARAM fwd_txfm_param;
+          fwd_txfm_param.tx_type = tx_type;
+          fwd_txfm_param.tx_size = TX_4X4;
+          fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
+          fwd_txfm_param.rd_transform = 0;
+          fwd_txfm_param.lossless = lossless;
+          fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
+          fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+        }
+#endif
 
         if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+#if !CONFIG_PVQ
           TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
           const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type, 0);
           const int coeff_ctx =
@@ -2115,13 +2238,28 @@
           *(tempa + idx) = !(p->eobs[block] == 0);
           *(templ + idy) = !(p->eobs[block] == 0);
           can_skip &= (p->eobs[block] == 0);
+#else
+          skip = av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff,
+                                       &p->eobs[block], pd->dequant, 0, TX_4X4,
+                                       tx_type, &rate_pvq, x->pvq_speed, NULL);
+          ratey += rate_pvq;
+#endif
           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
             goto next;
-          av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
-                               dst_stride, p->eobs[block], DCT_DCT, 1);
+#if CONFIG_PVQ
+          if (!skip) {
+            for (j = 0; j < tx_blk_size; j++)
+              for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0;
+#endif
+            av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
+                                 dst_stride, p->eobs[block], DCT_DCT, 1);
+#if CONFIG_PVQ
+          }
+#endif
         } else {
           int64_t dist;
           unsigned int tmp;
+#if !CONFIG_PVQ
           TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
           const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type, 0);
           const int coeff_ctx =
@@ -2137,9 +2275,13 @@
           ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, TX_4X4,
                                    scan_order->scan, scan_order->neighbors,
                                    cpi->sf.use_fast_coef_costing);
-          *(tempa + idx) = !(p->eobs[block] == 0);
-          *(templ + idy) = !(p->eobs[block] == 0);
-          can_skip &= (p->eobs[block] == 0);
+#else
+          skip = av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff,
+                                       &p->eobs[block], pd->dequant, 0, TX_4X4,
+                                       tx_type, &rate_pvq, x->pvq_speed, NULL);
+          ratey += rate_pvq;
+#endif
+          // No need for av1_block_error2_c because the ssz is unused
           av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
                                dst_stride, p->eobs[block], tx_type, 0);
           cpi->fn_ptr[BLOCK_4X4].vf(src, src_stride, dst, dst_stride, &tmp);
@@ -2150,6 +2292,14 @@
           // in the frequency domain, the overhead of encoding effort is low.
           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
             goto next;
+#if CONFIG_PVQ
+          if (!skip) {
+            for (j = 0; j < tx_blk_size; j++)
+              for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0;
+#endif
+#if CONFIG_PVQ
+          }
+#endif
         }
       }
     }
@@ -2166,15 +2316,25 @@
       *best_mode = mode;
       memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
       memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
+#if CONFIG_PVQ
+      od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
       for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
         memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
                num_4x4_blocks_wide * 4);
     }
   next : {}
-  }
+#if CONFIG_PVQ
+    od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
+  }  // mode decision loop
 
   if (best_rd >= rd_thresh) return best_rd;
 
+#if CONFIG_PVQ
+  od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
+
   if (y_skip) *y_skip &= best_can_skip;
 
   for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
@@ -2681,6 +2841,12 @@
   const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, 0);
   const PREDICTION_MODE FINAL_MODE_SEARCH = TM_PRED + 1;
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+#if CONFIG_PVQ
+  od_rollback_buffer pre_buf, post_buf;
+
+  od_encode_checkpoint(&x->daala_enc, &pre_buf);
+  od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
   bmode_costs = cpi->y_mode_costs[A][L];
 
 #if CONFIG_EXT_INTRA
@@ -2722,6 +2888,9 @@
     } else {
       mic->mbmi.mode = mode_idx;
     }
+#if CONFIG_PVQ
+    od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
 #if CONFIG_EXT_INTRA
     is_directional_mode =
         (mic->mbmi.mode != DC_PRED && mic->mbmi.mode != TM_PRED);
@@ -2802,9 +2971,16 @@
       *rate_tokenonly = this_rate_tokenonly;
       *distortion = this_distortion;
       *skippable = s;
+#if CONFIG_PVQ
+      od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
     }
   }
 
+#if CONFIG_PVQ
+  od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
+
 #if CONFIG_PALETTE
   if (cpi->common.allow_screen_content_tools)
     rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
@@ -2870,12 +3046,12 @@
   int is_cost_valid = 1;
 
   if (ref_best_rd < 0) is_cost_valid = 0;
-
+#if !CONFIG_PVQ
   if (is_inter_block(mbmi) && is_cost_valid) {
     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
       av1_subtract_plane(x, bsize, plane);
   }
-
+#endif
   *rate = 0;
   *distortion = 0;
   *sse = 0;
@@ -3050,7 +3226,19 @@
   rd_stats->rate += txb_coeff_cost;
   rd_stats->skip &= (p->eobs[block] == 0);
 #if CONFIG_RD_DEBUG
-  rd_stats->txb_coeff_cost[plane] += txb_coeff_cost;
+  {
+    int idx, idy;
+    rd_stats->txb_coeff_cost[plane] += txb_coeff_cost;
+
+    for (idy = 0; idy < txb_h; ++idy)
+      for (idx = 0; idx < txb_w; ++idx)
+        rd_stats->txb_coeff_cost_map[plane][blk_row + idy][blk_col + idx] = 0;
+
+    rd_stats->txb_coeff_cost_map[plane][blk_row][blk_col] = txb_coeff_cost;
+
+    assert(blk_row < 16);
+    assert(blk_col < 16);
+  }
 #endif
 }
 
@@ -3438,7 +3626,7 @@
   mbmi->min_tx_size = best_min_tx_size;
 #if CONFIG_RD_DEBUG
   // record plane y's transform block coefficient cost
-  mbmi->txb_coeff_cost[0] = rd_stats->txb_coeff_cost[0];
+  mbmi->rd_stats = *rd_stats;
 #endif
   memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
 }
@@ -3906,6 +4094,11 @@
   int64_t best_rd = INT64_MAX, this_rd;
   int this_rate_tokenonly, this_rate, s;
   int64_t this_distortion, this_sse;
+#if CONFIG_PVQ
+  od_rollback_buffer buf;
+
+  od_encode_checkpoint(&x->daala_enc, &buf);
+#endif
 #if CONFIG_PALETTE
   const int rows =
       (4 * num_4x4_blocks_high_lookup[bsize]) >> (xd->plane[1].subsampling_y);
@@ -3944,8 +4137,12 @@
         continue;
     } else {
       if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
-                            &this_sse, bsize, best_rd))
+                            &this_sse, bsize, best_rd)) {
+#if CONFIG_PVQ
+        od_encode_rollback(&x->daala_enc, &buf);
+#endif
         continue;
+      }
     }
     this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mbmi->mode][mode];
     if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode)
@@ -3953,8 +4150,12 @@
                                       MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
 #else
     if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
-                          &this_sse, bsize, best_rd))
+                          &this_sse, bsize, best_rd)) {
+#if CONFIG_PVQ
+      od_encode_rollback(&x->daala_enc, &buf);
+#endif
       continue;
+    }
     this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mbmi->mode][mode];
 #endif  // CONFIG_EXT_INTRA
 #if CONFIG_FILTER_INTRA
@@ -3968,7 +4169,21 @@
           av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
 #endif  // CONFIG_PALETTE
 
+#if CONFIG_PVQ
+    // For chroma channels, multiply lambda by 0.5 when doing intra prediction
+    // NOTE: Chroma intra prediction itself has a separate RDO,
+    // though final chroma intra mode's D and R is simply added to
+    // those of luma then global RDO is performed to decide the modes of SB.
+    // Also, for chroma, the RDO cannot decide tx_size (follow luma's decision)
+    // or tx_type (DCT only), then only the intra prediction is
+    // chroma's own mode decision based on separate RDO.
+    // TODO(yushin) : Seek for more reasonable solution than this.
+    this_rd = RDCOST(x->rdmult >> (1 * PVQ_CHROMA_RD), x->rddiv, this_rate,
+                     this_distortion);
+    od_encode_rollback(&x->daala_enc, &buf);
+#else
     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+#endif
 
     if (this_rd < best_rd) {
       mode_selected = mode;
@@ -4121,13 +4336,14 @@
   int gmtype_cost[GLOBAL_MOTION_TYPES];
   int bits;
   av1_cost_tokens(gmtype_cost, probs, av1_global_motion_types_tree);
-  if (gm->motion_params.wmmat[2].as_int) {
+  if (gm->motion_params.wmmat[5] || gm->motion_params.wmmat[4]) {
     bits = (GM_ABS_TRANS_BITS + 1) * 2 + 4 * GM_ABS_ALPHA_BITS + 4;
-  } else if (gm->motion_params.wmmat[1].as_int) {
+  } else if (gm->motion_params.wmmat[3] || gm->motion_params.wmmat[2]) {
     bits = (GM_ABS_TRANS_BITS + 1) * 2 + 2 * GM_ABS_ALPHA_BITS + 2;
   } else {
-    bits =
-        (gm->motion_params.wmmat[0].as_int ? ((GM_ABS_TRANS_BITS + 1) * 2) : 0);
+    bits = ((gm->motion_params.wmmat[1] || gm->motion_params.wmmat[0])
+                ? ((GM_ABS_TRANS_BITS + 1) * 2)
+                : 0);
   }
   return bits ? (bits << AV1_PROB_COST_SHIFT) + gmtype_cost[gm->gmtype] : 0;
 }
@@ -4205,14 +4421,14 @@
       break;
     case ZEROMV:
 #if CONFIG_GLOBAL_MOTION
-      this_mv[0].as_int = cpi->common.global_motion[mbmi->ref_frame[0]]
-                              .motion_params.wmmat[0]
-                              .as_int;
+      this_mv[0].as_int =
+          gm_get_motion_vector(&cpi->common.global_motion[mbmi->ref_frame[0]])
+              .as_int;
       thismvcost += GLOBAL_MOTION_RATE(mbmi->ref_frame[0]);
       if (is_compound) {
-        this_mv[1].as_int = cpi->common.global_motion[mbmi->ref_frame[1]]
-                                .motion_params.wmmat[0]
-                                .as_int;
+        this_mv[1].as_int =
+            gm_get_motion_vector(&cpi->common.global_motion[mbmi->ref_frame[1]])
+                .as_int;
         thismvcost += GLOBAL_MOTION_RATE(mbmi->ref_frame[1]);
       }
 #else   // CONFIG_GLOBAL_MOTION
@@ -4315,7 +4531,9 @@
                                        int64_t *distortion, int64_t *sse,
                                        ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
                                        int ir, int ic, int mi_row, int mi_col) {
+#if !CONFIG_PVQ
   const AV1_COMMON *const cm = &cpi->common;
+#endif
   int k;
   MACROBLOCKD *xd = &x->e_mbd;
   struct macroblockd_plane *const pd = &xd->plane[0];
@@ -4334,9 +4552,15 @@
   TX_SIZE tx_size = mi->mbmi.tx_size;
 
   TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, tx_size);
-  const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
   const int num_4x4_w = tx_size_wide_unit[tx_size];
   const int num_4x4_h = tx_size_high_unit[tx_size];
+#if !CONFIG_PVQ
+  const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
+#else
+  (void)cpi;
+  (void)ta;
+  (void)tl;
+#endif
 
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
   assert(IMPLIES(xd->lossless[mi->mbmi.segment_id], tx_size == TX_4X4));
@@ -4349,6 +4573,7 @@
 
   av1_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
 
+#if !CONFIG_PVQ
 #if CONFIG_AOM_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     aom_highbd_subtract_block(
@@ -4364,19 +4589,33 @@
                      av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
                      8, src, p->src.stride, dst, pd->dst.stride);
 #endif  // CONFIG_AOM_HIGHBITDEPTH
+#endif  // !CONFIG_PVQ
 
   k = i;
   for (idy = 0; idy < height / 4; idy += num_4x4_h) {
     for (idx = 0; idx < width / 4; idx += num_4x4_w) {
       int64_t dist, ssz, rd, rd1, rd2;
       int block;
+#if !CONFIG_PVQ
       int coeff_ctx;
+#else
+      const int src_stride = p->src.stride;
+      const int dst_stride = pd->dst.stride;
+      const int diff_stride = 8;
+      tran_low_t *coeff;
+      tran_low_t *dqcoeff;
+      tran_low_t *ref_coeff;
+      int16_t *pred = &pd->pred[4 * (ir * diff_stride + ic)];
+      int16_t *src_int16 = &p->src_int16[4 * (ir * diff_stride + ic)];
+      int ii, j, tx_blk_size;
+      int rate_pvq;
+#endif
       k += (idy * 2 + idx);
       if (tx_size == TX_4X4)
         block = k;
       else
         block = (i ? 2 : 0);
-
+#if !CONFIG_PVQ
       coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)), *(tl + (k >> 1)));
 #if CONFIG_NEW_QUANT
       av1_xform_quant_fp_nuq(cm, x, 0, block, idy + (i >> 1), idx + (i & 0x01),
@@ -4387,13 +4626,51 @@
 #endif  // CONFIG_NEW_QUANT
       if (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)
         av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
+#else
+      coeff = BLOCK_OFFSET(p->coeff, k);
+      dqcoeff = BLOCK_OFFSET(pd->dqcoeff, k);
+      ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, k);
+
+      // transform block size in pixels
+      tx_blk_size = 4;
+
+      // copy uint8 orig and predicted block to int16 buffer
+      // in order to use existing VP10 transform functions
+      for (j = 0; j < tx_blk_size; j++)
+        for (ii = 0; ii < tx_blk_size; ii++) {
+          src_int16[diff_stride * j + ii] =
+              src[src_stride * (j + 4 * idy) + (ii + 4 * idx)];
+          pred[diff_stride * j + ii] =
+              dst[dst_stride * (j + 4 * idy) + (ii + 4 * idx)];
+        }
+
+      {
+        FWD_TXFM_PARAM fwd_txfm_param;
+        fwd_txfm_param.tx_type = DCT_DCT;
+        fwd_txfm_param.tx_size = TX_4X4;
+        fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
+        fwd_txfm_param.rd_transform = 0;
+        fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+        fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
+        fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+      }
+      av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff,
+                            &p->eobs[k], pd->dequant, 0, TX_4X4, tx_type,
+                            &rate_pvq, x->pvq_speed, NULL);
+#endif
+
       dist_block(cpi, x, 0, block, idy + (i >> 1), idx + (i & 0x1), tx_size,
                  &dist, &ssz);
       thisdistortion += dist;
       thissse += ssz;
+#if !CONFIG_PVQ
       thisrate +=
           av1_cost_coeffs(cm, x, 0, block, coeff_ctx, tx_size, scan_order->scan,
                           scan_order->neighbors, cpi->sf.use_fast_coef_costing);
+#else
+      thisrate += rate_pvq;
+#endif
       *(ta + (k & 1)) = !(p->eobs[block] == 0);
       *(tl + (k >> 1)) = !(p->eobs[block] == 0);
 #if CONFIG_EXT_TX
@@ -4859,6 +5136,11 @@
   const int has_second_rf = has_second_ref(mbmi);
   const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+#if CONFIG_PVQ
+  od_rollback_buffer pre_buf;
+
+  od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
   mbmi->tx_size =
       xd->lossless[mbmi->segment_id] ? TX_4X4 : max_txsize_rect_lookup[bsize];
@@ -4916,6 +5198,11 @@
       int mv_idx;
       int_mv ref_mvs_sub8x8[2][2];
 #endif  // CONFIG_EXT_INTER
+#if CONFIG_PVQ
+      od_rollback_buffer idx_buf, post_buf;
+      od_encode_checkpoint(&x->daala_enc, &idx_buf);
+      od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
 
       for (ref = 0; ref < 1 + has_second_rf; ++ref) {
         const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
@@ -4926,7 +5213,7 @@
 #endif  // CONFIG_EXT_INTER
 #if CONFIG_GLOBAL_MOTION
         frame_mv[ZEROMV][frame].as_int =
-            cm->global_motion[frame].motion_params.wmmat[0].as_int;
+            gm_get_motion_vector(&cm->global_motion[frame]).as_int;
 #else   // CONFIG_GLOBAL_MOTION
         frame_mv[ZEROMV][frame].as_int = 0;
 #endif  // CONFIG_GLOBAL_MOTION
@@ -5085,6 +5372,9 @@
                sizeof(bsi->rdstat[index][mode_idx].ta));
         memcpy(bsi->rdstat[index][mode_idx].tl, t_left,
                sizeof(bsi->rdstat[index][mode_idx].tl));
+#if CONFIG_PVQ
+        od_encode_rollback(&x->daala_enc, &idx_buf);
+#endif
 
         // motion search for newmv (single predictor case only)
         if (!has_second_rf &&
@@ -5468,6 +5758,9 @@
 #endif
               mode_selected = this_mode;
               new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
+#if CONFIG_PVQ
+              od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
             }
             continue;
           }
@@ -5513,6 +5806,10 @@
 #endif
           mode_selected = this_mode;
           new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
+
+#if CONFIG_PVQ
+          od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
         }
       } /*for each 4x4 mode*/
 
@@ -5526,12 +5823,18 @@
 #endif  // CONFIG_EXT_INTER
             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
         bsi->segment_rd = INT64_MAX;
+#if CONFIG_PVQ
+        od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
         return INT64_MAX;
       }
 
       mode_idx = INTER_OFFSET(mode_selected);
       memcpy(t_above, bsi->rdstat[index][mode_idx].ta, sizeof(t_above));
       memcpy(t_left, bsi->rdstat[index][mode_idx].tl, sizeof(t_left));
+#if CONFIG_PVQ
+      od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
 
 #if CONFIG_EXT_INTER
       mv_idx = (mode_selected == NEWFROMNEARMV) ? 1 : 0;
@@ -5564,10 +5867,16 @@
 #endif  // CONFIG_EXT_INTER
             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
         bsi->segment_rd = INT64_MAX;
+#if CONFIG_PVQ
+        od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
         return INT64_MAX;
       }
     }
   } /* for each label */
+#if CONFIG_PVQ
+  od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
 
   bsi->r = br;
   bsi->d = bd;
@@ -6806,11 +7115,14 @@
 
 #if CONFIG_REF_MV
 #if CONFIG_EXT_INTER
-  if (this_mode == NEAREST_NEARESTMV) {
+  if (this_mode == NEAREST_NEARESTMV)
 #else
-  if (this_mode == NEARESTMV && is_comp_pred) {
-    uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+  if (this_mode == NEARESTMV && is_comp_pred)
 #endif  // CONFIG_EXT_INTER
+  {
+#if !CONFIG_EXT_INTER
+    uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+#endif
     if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
       cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
       cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
@@ -7495,8 +7807,10 @@
       RD_STATS rd_stats_uv;
 #endif
 
-      // Y cost and distortion
+// Y cost and distortion
+#if !CONFIG_PVQ
       av1_subtract_plane(x, bsize, 0);
+#endif
 #if CONFIG_VAR_TX
       if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
         RD_STATS rd_stats_y;
@@ -7546,8 +7860,7 @@
           inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, ref_best_rd - rdcosty);
 #if CONFIG_RD_DEBUG
       // record uv planes' transform block coefficient cost
-      mbmi->txb_coeff_cost[1] = rd_stats_uv.txb_coeff_cost[1];
-      mbmi->txb_coeff_cost[2] = rd_stats_uv.txb_coeff_cost[2];
+      if (is_cost_valid_uv) av1_merge_rd_stats(&mbmi->rd_stats, &rd_stats_uv);
 #endif
       *rate_uv = rd_stats_uv.rate;
       distortion_uv = rd_stats_uv.dist;
@@ -8161,6 +8474,10 @@
   int *mode_map = tile_data->mode_map[bsize];
   const int mode_search_skip_flags = sf->mode_search_skip_flags;
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+#if CONFIG_PVQ
+  od_rollback_buffer pre_buf;
+#endif
+
 #if CONFIG_PALETTE || CONFIG_EXT_INTRA
   const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
   const int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
@@ -8259,7 +8576,7 @@
     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
 #if CONFIG_GLOBAL_MOTION
     frame_mv[ZEROMV][ref_frame].as_int =
-        cm->global_motion[ref_frame].motion_params.wmmat[0].as_int;
+        gm_get_motion_vector(&cm->global_motion[ref_frame]).as_int;
 #else   // CONFIG_GLOBAL_MOTION
     frame_mv[ZEROMV][ref_frame].as_int = 0;
 #endif  // CONFIG_GLOBAL_MOTION
@@ -8353,7 +8670,7 @@
       mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
 #if CONFIG_GLOBAL_MOTION
       zeromv.as_int =
-          cm->global_motion[ALTREF_FRAME].motion_params.wmmat[0].as_int;
+          gm_get_motion_vector(&cm->global_motion[ALTREF_FRAME]).as_int;
 #else
       zeromv.as_int = 0;
 #endif  // CONFIG_GLOBAL_MOTION
@@ -8430,7 +8747,9 @@
     x->use_default_inter_tx_type = 1;
   else
     x->use_default_inter_tx_type = 0;
-
+#if CONFIG_PVQ
+  od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
 #if CONFIG_EXT_INTER
   for (i = 0; i < MB_MODE_COUNT; ++i)
     for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
@@ -8455,6 +8774,9 @@
 #if CONFIG_REF_MV
     uint8_t ref_frame_type;
 #endif
+#if CONFIG_PVQ
+    od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
     mode_index = mode_map[midx];
     this_mode = av1_mode_order[mode_index].mode;
     ref_frame = av1_mode_order[mode_index].ref_frame[0];
@@ -9450,11 +9772,11 @@
     const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame);
 #endif  // CONFIG_REF_MV
 #if CONFIG_GLOBAL_MOTION
-    zeromv[0].as_int = cm->global_motion[refs[0]].motion_params.wmmat[0].as_int;
-    if (comp_pred_mode) {
-      zeromv[1].as_int =
-          cm->global_motion[refs[1]].motion_params.wmmat[0].as_int;
-    }
+    zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]]).as_int;
+    zeromv[1].as_int =
+        comp_pred_mode
+            ? gm_get_motion_vector(&cm->global_motion[refs[1]]).as_int
+            : 0;
 #else
     zeromv[0].as_int = 0;
     zeromv[1].as_int = 0;
@@ -9714,7 +10036,7 @@
   mbmi->ref_frame[1] = NONE;
 #if CONFIG_GLOBAL_MOTION
   mbmi->mv[0].as_int =
-      cm->global_motion[mbmi->ref_frame[0]].motion_params.wmmat[0].as_int;
+      gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]]).as_int;
 #else   // CONFIG_GLOBAL_MOTION
   mbmi->mv[0].as_int = 0;
 #endif  // CONFIG_GLOBAL_MOTION
@@ -9867,6 +10189,11 @@
   int ref_frame_skip_mask[2] = { 0 };
   int internal_active_edge =
       av1_active_edge_sb(cpi, mi_row, mi_col) && av1_internal_image_edge(cpi);
+#if CONFIG_PVQ
+  od_rollback_buffer pre_buf;
+
+  od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
 
 #if CONFIG_SUPERTX
   best_rd_so_far = INT64_MAX;
@@ -9945,6 +10272,10 @@
     int this_skip2 = 0;
     int64_t total_sse = INT_MAX;
 
+#if CONFIG_PVQ
+    od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
+
     ref_frame = av1_ref_order[ref_index].ref_frame[0];
     second_ref_frame = av1_ref_order[ref_index].ref_frame[1];
 

diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index cb9666a..678c0db 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h

@@ -27,17 +27,6 @@
 struct RD_COST;
 
 #if CONFIG_VAR_TX
-// TODO(angiebird): Merge RD_COST and RD_STATS
-typedef struct RD_STATS {
-  int rate;
-  int64_t dist;
-  int64_t sse;
-  int skip;
-#if CONFIG_RD_DEBUG
-  int txb_coeff_cost[MAX_MB_PLANE];
-#endif
-} RD_STATS;
-
 static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
 #if CONFIG_RD_DEBUG
   int plane;
@@ -47,8 +36,13 @@
   rd_stats->sse = 0;
   rd_stats->skip = 1;
 #if CONFIG_RD_DEBUG
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    int r, c;
     rd_stats->txb_coeff_cost[plane] = 0;
+    for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
+      for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
+        rd_stats->txb_coeff_cost_map[plane][r][c] = 0;
+  }
 #endif
 }
 
@@ -61,8 +55,13 @@
   rd_stats->sse = INT64_MAX;
   rd_stats->skip = 0;
 #if CONFIG_RD_DEBUG
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    int r, c;
     rd_stats->txb_coeff_cost[plane] = INT_MAX;
+    for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
+      for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
+        rd_stats->txb_coeff_cost_map[plane][r][c] = INT_MAX;
+  }
 #endif
 }
 
@@ -76,8 +75,19 @@
   rd_stats_dst->sse += rd_stats_src->sse;
   rd_stats_dst->skip &= rd_stats_src->skip;
 #if CONFIG_RD_DEBUG
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    int r, c;
+    int ref_txb_coeff_cost = 0;
     rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
+    // TODO(angiebird): optimize this part
+    for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
+      for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
+        rd_stats_dst->txb_coeff_cost_map[plane][r][c] +=
+            rd_stats_src->txb_coeff_cost_map[plane][r][c];
+        ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c];
+      }
+    assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]);
+  }
 #endif
 }
 #endif

diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 212f5d7..5aafa79 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c

@@ -320,6 +320,7 @@
   int this_rate;
 };
 
+#if !CONFIG_PVQ
 static void cost_coeffs_b(int plane, int block, int blk_row, int blk_col,
                           BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
   struct tokenize_b_args *const args = arg;
@@ -514,6 +515,7 @@
 
   av1_set_contexts(xd, pd, tx_size, c > 0, blk_col, blk_row);
 }
+#endif
 
 struct is_skippable_args {
   uint16_t *eobs;
@@ -560,7 +562,42 @@
                                          has_high_freq_coeff, &args);
   return result;
 }
+#if CONFIG_PVQ
+void add_pvq_block(AV1_COMMON *const cm, MACROBLOCK *const x, PVQ_INFO *pvq) {
+  PVQ_QUEUE *q = x->pvq_q;
+  if (q->curr_pos >= q->buf_len) {
+    q->buf_len = 2 * q->buf_len + 1;
+    CHECK_MEM_ERROR(cm, q->buf,
+                    aom_realloc(q->buf, q->buf_len * sizeof(PVQ_INFO)));
+  }
+  // memcpy(q->buf + q->curr_pos, pvq, sizeof(PVQ_INFO));
+  OD_COPY(q->buf + q->curr_pos, pvq, 1);
+  ++q->curr_pos;
+}
 
+// NOTE: This does not actually generate tokens, instead we store the encoding
+// decisions made for PVQ in a queue that we will read from when
+// actually writing the bitstream in write_modes_b
+static void tokenize_pvq(int plane, int block, int blk_row, int blk_col,
+                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
+  struct tokenize_b_args *const args = arg;
+  const AV1_COMP *cpi = args->cpi;
+  const AV1_COMMON *const cm = &cpi->common;
+  ThreadData *const td = args->td;
+  MACROBLOCK *const x = &td->mb;
+  PVQ_INFO *pvq_info;
+
+  (void)block;
+  (void)blk_row;
+  (void)blk_col;
+  (void)plane_bsize;
+  (void)tx_size;
+
+  assert(block < MAX_PVQ_BLOCKS_IN_SB);
+  pvq_info = &x->pvq[block][plane];
+  add_pvq_block((AV1_COMMON * const)cm, x, pvq_info);
+}
+#endif
 #if CONFIG_VAR_TX
 void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
                     TX_SIZE tx_size, BLOCK_SIZE plane_bsize, int blk_row,
@@ -688,11 +725,11 @@
     return;
   }
 
+#if !CONFIG_PVQ
   if (!dry_run) {
     int plane;
 
     td->counts->skip[ctx][0] += skip_inc;
-
     for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
       av1_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b,
                                              &arg);
@@ -704,6 +741,17 @@
   } else if (dry_run == DRY_RUN_COSTCOEFFS) {
     av1_foreach_transformed_block(xd, bsize, cost_coeffs_b, &arg);
   }
+#else
+  if (!dry_run) {
+    int plane;
+
+    td->counts->skip[ctx][0] += skip_inc;
+
+    for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+      av1_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_pvq,
+                                             &arg);
+  }
+#endif
   if (rate) *rate += arg.this_rate;
 }
 

diff --git a/build/cmake/aom_config.c.cmake b/build/cmake/aom_config.c.cmake
new file mode 100644
index 0000000..70bf950
--- /dev/null
+++ b/build/cmake/aom_config.c.cmake

@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include "aom/aom_codec.h"
+static const char* const cfg = "${AOM_CMAKE_CONFIG}";
+static const char* const aom_git_hash = "${AOM_GIT_HASH}";
+const char *aom_codec_build_config(void) {return cfg;}
+const char *aom_codec_git_hash(void) {return aom_git_hash;}

diff --git a/build/cmake/aom_config.h.cmake b/build/cmake/aom_config.h.cmake
new file mode 100644
index 0000000..f5f2583
--- /dev/null
+++ b/build/cmake/aom_config.h.cmake

@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* This file is processed by cmake and used to produce aom_config.h in the
+ * directory where cmake was executed. */
+#ifndef AOM_CONFIG_H
+#define AOM_CONFIG_H
+#define RESTRICT ${RESTRICT}
+#define INLINE ${INLINE}
+#define ARCH_ARM ${ARCH_ARM}
+#define ARCH_MIPS ${ARCH_MIPS}
+#define ARCH_X86 ${ARCH_X86}
+#define ARCH_X86_64 ${ARCH_X86_64}
+#define HAVE_EDSP ${HAVE_EDSP}
+#define HAVE_MEDIA ${HAVE_MEDIA}
+#define HAVE_NEON ${HAVE_NEON}
+#define HAVE_NEON_ASM ${HAVE_NEON_ASM}
+#define HAVE_MIPS32 ${HAVE_MIPS32}
+#define HAVE_DSPR2 ${HAVE_DSPR2}
+#define HAVE_MSA ${HAVE_MSA}
+#define HAVE_MIPS64 ${HAVE_MIPS64}
+#define HAVE_MMX ${HAVE_MMX}
+#define HAVE_SSE ${HAVE_SSE}
+#define HAVE_SSE2 ${HAVE_SSE2}
+#define HAVE_SSE3 ${HAVE_SSE3}
+#define HAVE_SSSE3 ${HAVE_SSSE3}
+#define HAVE_SSE4_1 ${HAVE_SSE4_1}
+#define HAVE_AVX ${HAVE_AVX}
+#define HAVE_AVX2 ${HAVE_AVX2}
+#define HAVE_AOM_PORTS ${HAVE_AOM_PORTS}
+#define HAVE_PTHREAD_H ${HAVE_PTHREAD_H}
+#define HAVE_UNISTD_H ${HAVE_UNISTD_H}
+#define CONFIG_DEPENDENCY_TRACKING ${CONFIG_DEPENDENCY_TRACKING}
+#define CONFIG_EXTERNAL_BUILD ${CONFIG_EXTERNAL_BUILD}
+#define CONFIG_INSTALL_DOCS ${CONFIG_INSTALL_DOCS}
+#define CONFIG_INSTALL_BINS ${CONFIG_INSTALL_BINS}
+#define CONFIG_INSTALL_LIBS ${CONFIG_INSTALL_LIBS}
+#define CONFIG_INSTALL_SRCS ${CONFIG_INSTALL_SRCS}
+#define CONFIG_USE_X86INC ${CONFIG_USE_X86INC}
+#define CONFIG_DEBUG ${CONFIG_DEBUG}
+#define CONFIG_GPROF ${CONFIG_GPROF}
+#define CONFIG_GCOV ${CONFIG_GCOV}
+#define CONFIG_RVCT ${CONFIG_RVCT}
+#define CONFIG_GCC ${CONFIG_GCC}
+#define CONFIG_MSVS ${CONFIG_MSVS}
+#define CONFIG_PIC ${CONFIG_PIC}
+#define CONFIG_BIG_ENDIAN ${CONFIG_BIG_ENDIAN}
+#define CONFIG_CODEC_SRCS ${CONFIG_CODEC_SRCS}
+#define CONFIG_DEBUG_LIBS ${CONFIG_DEBUG_LIBS}
+#define CONFIG_DEQUANT_TOKENS ${CONFIG_DEQUANT_TOKENS}
+#define CONFIG_DC_RECON ${CONFIG_DC_RECON}
+#define CONFIG_RUNTIME_CPU_DETECT ${CONFIG_RUNTIME_CPU_DETECT}
+#define CONFIG_MULTITHREAD ${CONFIG_MULTITHREAD}
+#define CONFIG_INTERNAL_STATS ${CONFIG_INTERNAL_STATS}
+#define CONFIG_AV1_ENCODER ${CONFIG_AV1_ENCODER}
+#define CONFIG_AV1_DECODER ${CONFIG_AV1_DECODER}
+#define CONFIG_AV1 ${CONFIG_AV1}
+#define CONFIG_ENCODERS ${CONFIG_ENCODERS}
+#define CONFIG_DECODERS ${CONFIG_DECODERS}
+#define CONFIG_STATIC_MSVCRT ${CONFIG_STATIC_MSVCRT}
+#define CONFIG_SPATIAL_RESAMPLING ${CONFIG_SPATIAL_RESAMPLING}
+#define CONFIG_REALTIME_ONLY ${CONFIG_REALTIME_ONLY}
+#define CONFIG_ONTHEFLY_BITPACKING ${CONFIG_ONTHEFLY_BITPACKING}
+#define CONFIG_ERROR_CONCEALMENT ${CONFIG_ERROR_CONCEALMENT}
+#define CONFIG_SHARED ${CONFIG_SHARED}
+#define CONFIG_STATIC ${CONFIG_STATIC}
+#define CONFIG_SMALL ${CONFIG_SMALL}
+#define CONFIG_OS_SUPPORT ${CONFIG_OS_SUPPORT}
+#define CONFIG_UNIT_TESTS ${CONFIG_UNIT_TESTS}
+#define CONFIG_WEBM_IO ${CONFIG_WEBM_IO}
+#define CONFIG_LIBYUV ${CONFIG_LIBYUV}
+#define CONFIG_ACCOUNTING ${CONFIG_ACCOUNTING}
+#define CONFIG_DECODE_PERF_TESTS ${CONFIG_DECODE_PERF_TESTS}
+#define CONFIG_ENCODE_PERF_TESTS ${CONFIG_ENCODE_PERF_TESTS}
+#define CONFIG_MULTI_RES_ENCODING ${CONFIG_MULTI_RES_ENCODING}
+#define CONFIG_TEMPORAL_DENOISING ${CONFIG_TEMPORAL_DENOISING}
+#define CONFIG_COEFFICIENT_RANGE_CHECKING ${CONFIG_COEFFICIENT_RANGE_CHECKING}
+#define CONFIG_AOM_HIGHBITDEPTH ${CONFIG_AOM_HIGHBITDEPTH}
+#define CONFIG_EXPERIMENTAL ${CONFIG_EXPERIMENTAL}
+#define CONFIG_SIZE_LIMIT ${CONFIG_SIZE_LIMIT}
+#define CONFIG_AOM_QM ${CONFIG_AOM_QM}
+#define CONFIG_SPATIAL_SVC ${CONFIG_SPATIAL_SVC}
+#define CONFIG_FP_MB_STATS ${CONFIG_FP_MB_STATS}
+#define CONFIG_EMULATE_HARDWARE ${CONFIG_EMULATE_HARDWARE}
+#define CONFIG_CLPF ${CONFIG_CLPF}
+#define CONFIG_DERING ${CONFIG_DERING}
+#define CONFIG_REF_MV ${CONFIG_REF_MV}
+#define CONFIG_SUB8X8_MC ${CONFIG_SUB8X8_MC}
+#define CONFIG_EXT_INTRA ${CONFIG_EXT_INTRA}
+#define CONFIG_EXT_INTERP ${CONFIG_EXT_INTERP}
+#define CONFIG_EXT_TX ${CONFIG_EXT_TX}
+#define CONFIG_MOTION_VAR ${CONFIG_MOTION_VAR}
+#define CONFIG_EXT_REFS ${CONFIG_EXT_REFS}
+#define CONFIG_EXT_COMPOUND ${CONFIG_EXT_COMPOUND}
+#define CONFIG_SUPERTX ${CONFIG_SUPERTX}
+#define CONFIG_ANS ${CONFIG_ANS}
+#define CONFIG_EC_MULTISYMBOL ${CONFIG_EC_MULTISYMBOL}
+#define CONFIG_DAALA_EC ${CONFIG_DAALA_EC}
+#define CONFIG_PARALLEL_DEBLOCKING ${CONFIG_PARALLEL_DEBLOCKING}
+#define CONFIG_CB4X4 ${CONFIG_CB4X4}
+#define CONFIG_PALETTE ${CONFIG_PALETTE}
+#define CONFIG_FRAME_SIZE ${CONFIG_FRAME_SIZE}
+#define CONFIG_FILTER_7BIT ${CONFIG_FILTER_7BIT}
+#define CONFIG_DELTA_Q ${CONFIG_DELTA_Q}
+#define CONFIG_ADAPT_SCAN ${CONFIG_ADAPT_SCAN}
+#define CONFIG_BITSTREAM_DEBUG ${CONFIG_BITSTREAM_DEBUG}
+#define CONFIG_TILE_GROUPS ${CONFIG_TILE_GROUPS}
+#define CONFIG_EC_ADAPT ${CONFIG_EC_ADAPT}
+#endif /* AOM_CONFIG_H */

diff --git a/build/cmake/aom_configure.cmake b/build/cmake/aom_configure.cmake
new file mode 100644
index 0000000..f1c7691
--- /dev/null
+++ b/build/cmake/aom_configure.cmake

@@ -0,0 +1,152 @@
+##
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+cmake_minimum_required(VERSION 3.2)
+
+include("${AOM_ROOT}/build/cmake/compiler_flags.cmake")
+
+include(FindGit)
+
+# Defaults for every libaom configuration variable.
+set(RESTRICT)
+set(INLINE)
+set(ARCH_ARM 0)
+set(ARCH_MIPS 0)
+set(ARCH_X86 0)
+set(ARCH_X86_64 0)
+set(HAVE_EDSP 0)
+set(HAVE_MEDIA 0)
+set(HAVE_NEON 0)
+set(HAVE_NEON_ASM 0)
+set(HAVE_MIPS32 0)
+set(HAVE_DSPR2 0)
+set(HAVE_MSA 0)
+set(HAVE_MIPS64 0)
+set(HAVE_MMX 0)
+set(HAVE_SSE 0)
+set(HAVE_SSE2 0)
+set(HAVE_SSE3 0)
+set(HAVE_SSSE3 0)
+set(HAVE_SSE4_1 0)
+set(HAVE_AVX 0)
+set(HAVE_AVX2 0)
+set(HAVE_AOM_PORTS 0)
+set(HAVE_PTHREAD_H 0)
+set(HAVE_UNISTD_H 0)
+set(CONFIG_DEPENDENCY_TRACKING 1)
+set(CONFIG_EXTERNAL_BUILD 0)
+set(CONFIG_INSTALL_DOCS 0)
+set(CONFIG_INSTALL_BINS 0)
+set(CONFIG_INSTALL_LIBS 0)
+set(CONFIG_INSTALL_SRCS 0)
+set(CONFIG_USE_X86INC 0)
+set(CONFIG_DEBUG 0)
+set(CONFIG_GPROF 0)
+set(CONFIG_GCOV 0)
+set(CONFIG_RVCT 0)
+set(CONFIG_GCC 0)
+set(CONFIG_MSVS 0)
+set(CONFIG_PIC 0)
+set(CONFIG_BIG_ENDIAN 0)
+set(CONFIG_CODEC_SRCS 0)
+set(CONFIG_DEBUG_LIBS 0)
+set(CONFIG_DEQUANT_TOKENS 0)
+set(CONFIG_DC_RECON 0)
+set(CONFIG_RUNTIME_CPU_DETECT 0)
+set(CONFIG_MULTITHREAD 0)
+set(CONFIG_INTERNAL_STATS 0)
+set(CONFIG_AV1_ENCODER 1)
+set(CONFIG_AV1_DECODER 1)
+set(CONFIG_AV1 1)
+set(CONFIG_ENCODERS 1)
+set(CONFIG_DECODERS 1)
+set(CONFIG_STATIC_MSVCRT 0)
+set(CONFIG_SPATIAL_RESAMPLING 1)
+set(CONFIG_REALTIME_ONLY 0)
+set(CONFIG_ONTHEFLY_BITPACKING 0)
+set(CONFIG_ERROR_CONCEALMENT 0)
+set(CONFIG_SHARED 0)
+set(CONFIG_STATIC 1)
+set(CONFIG_SMALL 0)
+set(CONFIG_OS_SUPPORT 0)
+set(CONFIG_UNIT_TESTS 0)
+set(CONFIG_WEBM_IO 0)
+set(CONFIG_LIBYUV 0)
+set(CONFIG_ACCOUNTING 0)
+set(CONFIG_DECODE_PERF_TESTS 0)
+set(CONFIG_ENCODE_PERF_TESTS 0)
+set(CONFIG_MULTI_RES_ENCODING 0)
+set(CONFIG_TEMPORAL_DENOISING 1)
+set(CONFIG_COEFFICIENT_RANGE_CHECKING 0)
+set(CONFIG_AOM_HIGHBITDEPTH 0)
+set(CONFIG_EXPERIMENTAL 0)
+set(CONFIG_SIZE_LIMIT 0)
+set(CONFIG_AOM_QM 0)
+set(CONFIG_SPATIAL_SVC 0)
+set(CONFIG_FP_MB_STATS 0)
+set(CONFIG_EMULATE_HARDWARE 0)
+set(CONFIG_CLPF 0)
+set(CONFIG_DERING 0)
+set(CONFIG_REF_MV 0)
+set(CONFIG_SUB8X8_MC 0)
+set(CONFIG_EXT_INTRA 0)
+set(CONFIG_EXT_INTERP 0)
+set(CONFIG_EXT_TX 0)
+set(CONFIG_MOTION_VAR 0)
+set(CONFIG_EXT_REFS 0)
+set(CONFIG_EXT_COMPOUND 0)
+set(CONFIG_SUPERTX 0)
+set(CONFIG_ANS 0)
+set(CONFIG_EC_MULTISYMBOL 0)
+set(CONFIG_DAALA_EC 0)
+set(CONFIG_PARALLEL_DEBLOCKING 0)
+set(CONFIG_CB4X4 0)
+set(CONFIG_PALETTE 0)
+set(CONFIG_FRAME_SIZE 0)
+set(CONFIG_FILTER_7BIT 0)
+set(CONFIG_DELTA_Q 0)
+set(CONFIG_ADAPT_SCAN 0)
+set(CONFIG_BITSTREAM_DEBUG 0)
+set(CONFIG_TILE_GROUPS 0)
+set(CONFIG_EC_ADAPT 0)
+
+# TODO(tomfinegan): consume trailing whitespace after configure_file().
+configure_file("${AOM_ROOT}/build/cmake/aom_config.h.cmake"
+               "${CMAKE_CURRENT_BINARY_DIR}/aom_config.h")
+
+# Read the current git hash.
+find_package(Git)
+if (GIT_FOUND)
+  # TODO(tomfinegan): Make this smart enough to write a proper version string
+  # when in a repo that is on a label and clean.
+  # TODO(tomfinegan): In addition to the one above, also make this a custom
+  # build rule so users don't have to re-run cmake to create accurately
+  # versioned cmake builds.
+  execute_process(COMMAND ${GIT_EXECUTABLE}
+                  --git-dir=${AOM_ROOT}/.git rev-parse HEAD
+                  OUTPUT_VARIABLE AOM_GIT_HASH)
+  # Consume the newline at the end of the git output.
+  string(STRIP ${AOM_GIT_HASH} AOM_GIT_HASH)
+else ()
+  set(AOM_GIT_HASH)
+endIf ()
+
+# TODO(tomfinegan): An alternative to dumping the configure command line to
+# aom_config.c is needed in cmake. Normal cmake generation runs do not make the
+# command line available in the cmake script. For now, we just set the variable
+# to the following. The configure_file() command will expand the message in
+# aom_config.c.
+# Note: This message isn't strictly true. When cmake is run in script mode (with
+# the -P argument), CMAKE_ARGC and CMAKE_ARGVn are defined (n = 0 through
+# n = CMAKE_ARGC become valid). Normal cmake generation runs do not make the
+# information available.
+set(AOM_CMAKE_CONFIG "cmake")
+configure_file("${AOM_ROOT}/build/cmake/aom_config.c.cmake"
+               "${CMAKE_CURRENT_BINARY_DIR}/aom_config.c")

diff --git a/build/cmake/compiler_flags.cmake b/build/cmake/compiler_flags.cmake
new file mode 100644
index 0000000..3ccbc0f
--- /dev/null
+++ b/build/cmake/compiler_flags.cmake

@@ -0,0 +1,56 @@
+##
+## Copyright (c) 2016, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+cmake_minimum_required(VERSION 3.2)
+
+include(CheckCCompilerFlag)
+include(CheckCXXCompilerFlag)
+
+function (add_c_flag_if_supported c_flag)
+  unset(C_FLAG_SUPPORTED CACHE)
+  message("Checking C compiler flag support for: " ${c_flag})
+  check_c_compiler_flag("${c_flag}" C_FLAG_SUPPORTED)
+  if (C_FLAG_SUPPORTED)
+    set(CMAKE_C_FLAGS "${c_flag} ${CMAKE_C_FLAGS}" CACHE STRING "" FORCE)
+  endif ()
+endfunction ()
+
+function (add_cxx_flag_if_supported cxx_flag)
+  unset(CXX_FLAG_SUPPORTED CACHE)
+  message("Checking CXX compiler flag support for: " ${cxx_flag})
+  check_cxx_compiler_flag("${cxx_flag}" CXX_FLAG_SUPPORTED)
+  if (CXX_FLAG_SUPPORTED)
+    set(CMAKE_CXX_FLAGS "${cxx_flag} ${CMAKE_CXX_FLAGS}" CACHE STRING "" FORCE)
+  endif ()
+endfunction ()
+
+function (add_compiler_flag_if_supported flag)
+  add_c_flag_if_supported(${flag})
+  add_cxx_flag_if_supported(${flag})
+endfunction ()
+
+# Set warning levels.
+if (MSVC)
+  add_compiler_flag_if_supported("/W3")
+  # Disable MSVC warnings that suggest making code non-portable.
+  add_compiler_flag_if_supported("/wd4996")
+  if (ENABLE_WERROR)
+    add_compiler_flag_if_supported("/WX")
+  endif ()
+else ()
+  add_compiler_flag_if_supported("-Wall")
+  add_compiler_flag_if_supported("-Wextra")
+  add_compiler_flag_if_supported("-Wno-deprecated")
+  add_compiler_flag_if_supported("-Wshorten-64-to-32")
+  add_compiler_flag_if_supported("-Wnarrowing")
+  if (ENABLE_WERROR)
+    add_compiler_flag_if_supported("-Werror")
+  endif ()
+endif ()

diff --git a/configure b/configure
index 9505b05..75a8844 100755
--- a/configure
+++ b/configure

@@ -285,6 +285,7 @@
     alt_intra
     palette
     daala_ec
+    pvq
     cb4x4
     frame_size
     delta_q
@@ -457,6 +458,9 @@
         enabled ${c} && enable_feature ${c##*_}s
     done
 
+    # Enable daala_ec by default
+    ! enabled ans && soft_enable daala_ec
+
     # Fix up experiment dependencies
     enabled ec_adapt && enable_feature ec_multisymbol
     enabled ec_multisymbol && ! enabled ans && soft_enable daala_ec

diff --git a/test/accounting_test.cc b/test/accounting_test.cc
index 122f9b8..e0dfaa2 100644
--- a/test/accounting_test.cc
+++ b/test/accounting_test.cc

@@ -35,7 +35,7 @@
   }
   aom_stop_encode(&bw);
   aom_reader br;
-  aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+  aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
 
   Accounting accounting;
   aom_accounting_init(&accounting);
@@ -51,7 +51,7 @@
   GTEST_ASSERT_EQ(accounting.syms.num_syms, 0);
 
   // Should record 2 * kSymbols accounting symbols.
-  aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+  aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
   br.accounting = &accounting;
   for (int i = 0; i < kSymbols; i++) {
     aom_read(&br, 32, "A");

diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc
index e2ebdda..c6a3288 100644
--- a/test/boolcoder_test.cc
+++ b/test/boolcoder_test.cc

@@ -103,7 +103,7 @@
   uint8_t bw_buffer[kBufferSize];
   const int kSymbols = 1024;
   // Coders are noisier at low probabilities, so we start at p = 4.
-  for (int p = 4; p <= 256; p++) {
+  for (int p = 4; p < 256; p++) {
     double probability = p / 256.;
     aom_start_encode(&bw, bw_buffer);
     for (int i = 0; i < kSymbols; i++) {
@@ -111,7 +111,7 @@
     }
     aom_stop_encode(&bw);
     aom_reader br;
-    aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+    aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
     uint32_t last_tell = aom_reader_tell(&br);
     uint32_t last_tell_frac = aom_reader_tell_frac(&br);
     double frac_diff_total = 0;

diff --git a/test/divu_small_test.cc b/test/divu_small_test.cc
index ea6da47..d3a134a 100644
--- a/test/divu_small_test.cc
+++ b/test/divu_small_test.cc

@@ -1,26 +1,13 @@
-/*Daala video codec
-Copyright (c) 2013 Daala project contributors.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
 
 #include <stdlib.h>