New experiment: Perceptual Vector Quantization from Daala
PVQ replaces the scalar quantizer and coefficient coding with a new
design originally developed in Daala. It currently depends on the
Daala entropy coder although it could be adapted to work with another
entropy coder if needed:
./configure --enable-experimental --enable-daala_ec --enable-pvq
The version of PVQ in this commit is adapted from the following
revision of Daala:
https://github.com/xiph/daala/commit/fb51c1ade6a31b668a0157d89de8f0a4493162a8
More information about PVQ:
- https://people.xiph.org/~jm/daala/pvq_demo/
- https://jmvalin.ca/papers/spie_pvq.pdf
The following files are copied as-is from Daala with minimal
adaptations, therefore we disable clang-format on those files
to make it easier to synchronize the AV1 and Daala codebases in the future:
av1/common/generic_code.c
av1/common/generic_code.h
av1/common/laplace_tables.c
av1/common/partition.c
av1/common/partition.h
av1/common/pvq.c
av1/common/pvq.h
av1/common/state.c
av1/common/state.h
av1/common/zigzag.h
av1/common/zigzag16.c
av1/common/zigzag32.c
av1/common/zigzag4.c
av1/common/zigzag64.c
av1/common/zigzag8.c
av1/decoder/decint.h
av1/decoder/generic_decoder.c
av1/decoder/laplace_decoder.c
av1/decoder/pvq_decoder.c
av1/decoder/pvq_decoder.h
av1/encoder/daala_compat_enc.c
av1/encoder/encint.h
av1/encoder/generic_encoder.c
av1/encoder/laplace_encoder.c
av1/encoder/pvq_encoder.c
av1/encoder/pvq_encoder.h
Known issues:
- Lossless mode is not supported, '--lossless=1' will give the same result as
'--end-usage=q --cq-level=1'.
- High bit depth is not supported by PVQ.
Change-Id: I1ae0d6517b87f4c1ccea944b2e12dc906979f25e
diff --git a/av1/decoder/decint.h b/av1/decoder/decint.h
new file mode 100644
index 0000000..99dbc43
--- /dev/null
+++ b/av1/decoder/decint.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_decint_H)
+# define _decint_H (1)
+# include "av1/common/pvq_state.h"
+# include "aom_dsp/entdec.h"
+
+typedef struct daala_dec_ctx daala_dec_ctx;
+
+typedef struct daala_dec_ctx od_dec_ctx;
+
+
+struct daala_dec_ctx {
+ /* Stores context-adaptive CDFs for PVQ. */
+ od_state state;
+ /* Daala entropy decoder. */
+ od_ec_dec *ec;
+ /* Mode of quantization matrice : FLAT (0) or HVS (1) */
+ int qm;
+};
+
+#endif
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 026dcbc..ee067f7 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -57,6 +57,16 @@
#define MAX_AV1_HEADER_SIZE 80
#define ACCT_STR __func__
+#if CONFIG_PVQ
+#include "av1/decoder/pvq_decoder.h"
+#include "av1/encoder/encodemb.h"
+
+#include "aom_dsp/entdec.h"
+#include "av1/common/partition.h"
+#include "av1/decoder/decint.h"
+#include "av1/encoder/hybrid_fwd_txfm.h"
+#endif
+
static struct aom_read_bit_buffer *init_read_bit_buffer(
AV1Decoder *pbi, struct aom_read_bit_buffer *rb, const uint8_t *data,
const uint8_t *data_end, uint8_t clear_data[MAX_AV1_HEADER_SIZE]);
@@ -299,6 +309,141 @@
memset(dqcoeff, 0, (scan_line + 1) * sizeof(dqcoeff[0]));
}
+#if CONFIG_PVQ
+static int av1_pvq_decode_helper(od_dec_ctx *dec, int16_t *ref_coeff,
+ int16_t *dqcoeff, int16_t *quant, int pli,
+ int bs, TX_TYPE tx_type, int xdec,
+ int ac_dc_coded) {
+ unsigned int flags; // used for daala's stream analyzer.
+ int off;
+ const int is_keyframe = 0;
+ const int has_dc_skip = 1;
+ int quant_shift = bs == TX_32X32 ? 1 : 0;
+ // DC quantizer for PVQ
+ int pvq_dc_quant;
+ int lossless = (quant[0] == 0);
+ const int blk_size = tx_size_wide[bs];
+ int eob = 0;
+ int i;
+ // TODO(yushin) : To enable activity masking,
+ // int use_activity_masking = dec->use_activity_masking;
+ int use_activity_masking = 0;
+
+ DECLARE_ALIGNED(16, int16_t, dqcoeff_pvq[OD_BSIZE_MAX * OD_BSIZE_MAX]);
+ DECLARE_ALIGNED(16, int16_t, ref_coeff_pvq[OD_BSIZE_MAX * OD_BSIZE_MAX]);
+
+ od_coeff ref_int32[OD_BSIZE_MAX * OD_BSIZE_MAX];
+ od_coeff out_int32[OD_BSIZE_MAX * OD_BSIZE_MAX];
+
+ od_raster_to_coding_order(ref_coeff_pvq, blk_size, tx_type, ref_coeff,
+ blk_size);
+
+ if (lossless)
+ pvq_dc_quant = 1;
+ else {
+ // TODO(yushin): Enable this for activity masking,
+ // when pvq_qm_q4 is available in AOM.
+ // pvq_dc_quant = OD_MAXI(1, quant*
+ // dec->state.pvq_qm_q4[pli][od_qm_get_index(bs, 0)] >> 4);
+ pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift);
+ }
+
+ off = od_qm_offset(bs, xdec);
+
+ // copy int16 inputs to int32
+ for (i = 0; i < blk_size * blk_size; i++) ref_int32[i] = ref_coeff_pvq[i];
+
+ od_pvq_decode(dec, ref_int32, out_int32, (int)quant[1] >> quant_shift, pli,
+ bs, OD_PVQ_BETA[use_activity_masking][pli][bs],
+ OD_ROBUST_STREAM,
+ is_keyframe, &flags, ac_dc_coded, dec->state.qm + off,
+ dec->state.qm_inv + off);
+
+ // copy int32 result back to int16
+ for (i = 0; i < blk_size * blk_size; i++) dqcoeff_pvq[i] = out_int32[i];
+
+ if (!has_dc_skip || dqcoeff_pvq[0]) {
+ dqcoeff_pvq[0] =
+ has_dc_skip + generic_decode(dec->ec, &dec->state.adapt.model_dc[pli],
+ -1, &dec->state.adapt.ex_dc[pli][bs][0], 2,
+ "dc:mag");
+ if (dqcoeff_pvq[0])
+ dqcoeff_pvq[0] *= od_ec_dec_bits(dec->ec, 1, "dc:sign") ? -1 : 1;
+ }
+ dqcoeff_pvq[0] = dqcoeff_pvq[0] * pvq_dc_quant + ref_coeff_pvq[0];
+
+ od_coding_order_to_raster(dqcoeff, blk_size, tx_type, dqcoeff_pvq, blk_size);
+
+ eob = blk_size * blk_size;
+
+ return eob;
+}
+
+static int av1_pvq_decode_helper2(
+ MACROBLOCKD *const xd, MB_MODE_INFO *const mbmi, int plane, int row,
+ int col, TX_SIZE tx_size, TX_TYPE tx_type ) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ // transform block size in pixels
+ int tx_blk_size = tx_size_wide[tx_size];
+ int i, j;
+ tran_low_t *pvq_ref_coeff = pd->pvq_ref_coeff;
+ const int diff_stride = tx_blk_size;
+ int16_t *pred = pd->pred;
+ tran_low_t *const dqcoeff = pd->dqcoeff;
+ int ac_dc_coded; // bit0: DC coded, bit1 : AC coded
+ uint8_t *dst;
+ int eob;
+
+ eob = 0;
+ dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
+
+ // decode ac/dc coded flag. bit0: DC coded, bit1 : AC coded
+ // NOTE : we don't use 5 symbols for luma here in aom codebase,
+ // since block partition is taken care of by aom.
+ // So, only AC/DC skip info is coded
+ ac_dc_coded = od_decode_cdf_adapt(
+ xd->daala_dec.ec,
+ xd->daala_dec.state.adapt.skip_cdf[2 * tx_size + (plane != 0)], 4,
+ xd->daala_dec.state.adapt.skip_increment, "skip");
+
+ if (ac_dc_coded) {
+ int xdec = pd->subsampling_x;
+ int seg_id = mbmi->segment_id;
+ int16_t *quant;
+ FWD_TXFM_PARAM fwd_txfm_param;
+
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) {
+ pred[diff_stride * j + i] = dst[pd->dst.stride * j + i];
+ }
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
+ fwd_txfm_param.rd_transform = 0;
+ fwd_txfm_param.lossless = xd->lossless[seg_id];
+
+ fwd_txfm(pred, pvq_ref_coeff, diff_stride, &fwd_txfm_param);
+
+ quant = &pd->seg_dequant[seg_id][0]; // aom's quantizer
+
+ eob = av1_pvq_decode_helper(&xd->daala_dec, pvq_ref_coeff, dqcoeff, quant,
+ plane, tx_size, tx_type, xdec, ac_dc_coded);
+
+ // Since av1 does not have separate inverse transform
+ // but also contains adding to predicted image,
+ // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
+
+ inverse_transform_block(xd, plane, tx_type, tx_size, dst,
+ pd->dst.stride, eob);
+ }
+
+ return eob;
+}
+#endif
+
static void predict_and_reconstruct_intra_block(AV1_COMMON *cm,
MACROBLOCKD *const xd,
#if CONFIG_ANS
@@ -314,6 +459,10 @@
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
uint8_t *dst;
int block_idx = (row << 1) + col;
+#if CONFIG_PVQ
+ (void)cm;
+ (void)r;
+#endif
dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
if (mbmi->sb_type < BLOCK_8X8)
@@ -324,6 +473,7 @@
if (!mbmi->skip) {
TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+#if !CONFIG_PVQ
const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
int16_t max_scan_line = 0;
const int eob =
@@ -335,6 +485,9 @@
if (eob)
inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
max_scan_line, eob);
+#else
+ av1_pvq_decode_helper2(xd, mbmi, plane, row, col, tx_size, tx_type);
+#endif
}
}
@@ -404,6 +557,13 @@
PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
int block_idx = (row << 1) + col;
TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+#if CONFIG_PVQ
+ int eob;
+ (void)cm;
+ (void)r;
+#endif
+
+#if !CONFIG_PVQ
const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
int16_t max_scan_line = 0;
const int eob =
@@ -416,6 +576,9 @@
inverse_transform_block(xd, plane, tx_type, tx_size,
&pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
pd->dst.stride, max_scan_line, eob);
+#else
+ eob = av1_pvq_decode_helper2(xd, mbmi, plane, row, col, tx_size, tx_type);
+#endif
return eob;
}
#endif // !CONFIG_VAR_TX || CONFIG_SUPER_TX
@@ -1507,6 +1670,11 @@
#endif
n8x8_l2);
subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition);
+
+#if CONFIG_PVQ
+ assert(partition < PARTITION_TYPES);
+ assert(subsize < BLOCK_SIZES);
+#endif
#if CONFIG_SUPERTX
if (!frame_is_intra_only(cm) && partition != PARTITION_NONE &&
bsize <= MAX_SUPERTX_BLOCK_SIZE && !supertx_enabled && !xd->lossless[0]) {
@@ -1897,6 +2065,7 @@
}
#endif
+#if !CONFIG_PVQ
static void read_coef_probs_common(av1_coeff_probs_model *coef_probs,
aom_reader *r) {
int i, j, k, l, m;
@@ -1921,6 +2090,7 @@
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
read_coef_probs_common(fc->coef_probs[tx_size], r);
}
+#endif
static void setup_segmentation(AV1_COMMON *const cm,
struct aom_read_bit_buffer *rb) {
@@ -2767,6 +2937,18 @@
}
#endif // CONFIG_EXT_TILE
+#if CONFIG_PVQ
+static void daala_dec_init(daala_dec_ctx *daala_dec, od_ec_dec *ec) {
+ daala_dec->ec = ec;
+ od_adapt_ctx_reset(&daala_dec->state.adapt, 0);
+
+ daala_dec->qm = OD_FLAT_QM;
+
+ od_init_qm(daala_dec->state.qm, daala_dec->state.qm_inv,
+ daala_dec->qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT);
+}
+#endif
+
static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
const uint8_t *data_end) {
AV1_COMMON *const cm = &pbi->common;
@@ -2849,6 +3031,9 @@
? &cm->counts
: NULL;
av1_zero(td->dqcoeff);
+#if CONFIG_PVQ
+ av1_zero(tile_data->pvq_ref_coeff);
+#endif
av1_tile_init(&td->xd.tile, td->cm, tile_row, tile_col);
#if !CONFIG_ANS
setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
@@ -2864,7 +3049,14 @@
td->bit_reader.accounting = NULL;
}
#endif
- av1_init_macroblockd(cm, &td->xd, td->dqcoeff);
+ av1_init_macroblockd(cm, &td->xd,
+#if CONFIG_PVQ
+ td->pvq_ref_coeff,
+#endif
+ td->dqcoeff);
+#if CONFIG_PVQ
+ daala_dec_init(&td->xd.daala_dec, &td->bit_reader.ec);
+#endif
#if CONFIG_PALETTE
td->xd.plane[0].color_index_map = td->color_index_map[0];
td->xd.plane[1].color_index_map = td->color_index_map[1];
@@ -3196,7 +3388,14 @@
&twd->bit_reader, pbi->decrypt_cb,
pbi->decrypt_state);
#endif // CONFIG_ANS
- av1_init_macroblockd(cm, &twd->xd, twd->dqcoeff);
+ av1_init_macroblockd(cm, &twd->xd,
+#if CONFIG_PVQ
+ twd->pvq_ref_coeff,
+#endif
+ twd->dqcoeff);
+#if CONFIG_PVQ
+ daala_dec_init(&twd->xd.daala_dec, &twd->bit_reader.ec);
+#endif
#if CONFIG_PALETTE
twd->xd.plane[0].color_index_map = twd->color_index_map[0];
twd->xd.plane[1].color_index_map = twd->color_index_map[1];
@@ -3734,6 +3933,7 @@
if (cm->tx_mode == TX_MODE_SELECT) read_tx_size_probs(fc, &r);
+#if !CONFIG_PVQ
read_coef_probs(fc, cm->tx_mode, &r);
#if CONFIG_VAR_TX
@@ -3745,8 +3945,8 @@
av1_diff_update_prob(&r, &fc->rect_tx_prob[i], ACCT_STR);
}
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
-#endif
-
+#endif // CONFIG_VAR_TX
+#endif // !CONFIG_PVQ
for (k = 0; k < SKIP_CONTEXTS; ++k)
av1_diff_update_prob(&r, &fc->skip_probs[k], ACCT_STR);
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index c3099ba..7547656 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -33,7 +33,10 @@
#include "av1/decoder/decodeframe.h"
#include "av1/decoder/decoder.h"
+
+#if !CONFIG_PVQ
#include "av1/decoder/detokenize.h"
+#endif
static void initialize_dec(void) {
static volatile int init_done = 0;
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index 262995a..0ee922f 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -26,6 +26,12 @@
#include "av1/common/accounting.h"
#endif
+#if CONFIG_PVQ
+#include "aom_dsp/entdec.h"
+#include "av1/decoder/decint.h"
+#include "av1/encoder/encodemb.h"
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -37,6 +43,10 @@
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
+#if CONFIG_PVQ
+ /* forward transformed predicted image, a reference for PVQ */
+ DECLARE_ALIGNED(16, tran_low_t, pvq_ref_coeff[OD_BSIZE_MAX * OD_BSIZE_MAX]);
+#endif
#if CONFIG_PALETTE
DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
#endif // CONFIG_PALETTE
@@ -49,6 +59,10 @@
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
+#if CONFIG_PVQ
+ /* forward transformed predicted image, a reference for PVQ */
+ DECLARE_ALIGNED(16, tran_low_t, pvq_ref_coeff[OD_BSIZE_MAX * OD_BSIZE_MAX]);
+#endif
#if CONFIG_PALETTE
DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
#endif // CONFIG_PALETTE
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 795b1b0..0f183f2 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -9,9 +9,11 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+#if !CONFIG_PVQ
#include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h"
-
+#endif
+#if !CONFIG_PVQ
#if CONFIG_ANS
#include "aom_dsp/ans.h"
#endif // CONFIG_ANS
@@ -356,3 +358,4 @@
av1_set_contexts(xd, pd, tx_size, eob > 0, x, y);
return eob;
}
+#endif
diff --git a/av1/decoder/detokenize.h b/av1/decoder/detokenize.h
index 1eb1e6c..ec68665 100644
--- a/av1/decoder/detokenize.h
+++ b/av1/decoder/detokenize.h
@@ -9,6 +9,7 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+#if !CONFIG_PVQ
#ifndef AV1_DECODER_DETOKENIZE_H_
#define AV1_DECODER_DETOKENIZE_H_
@@ -39,5 +40,5 @@
#ifdef __cplusplus
} // extern "C"
#endif
-
#endif // AV1_DECODER_DETOKENIZE_H_
+#endif
diff --git a/av1/decoder/generic_decoder.c b/av1/decoder/generic_decoder.c
new file mode 100644
index 0000000..86187fa
--- /dev/null
+++ b/av1/decoder/generic_decoder.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "aom_dsp/entdec.h"
+#include "av1/common/generic_code.h"
+#include "av1/common/odintrin.h"
+#include "pvq_decoder.h"
+
+/** Decodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts
+ * the cdf accordingly.
+ *
+ * @param [in,out] enc range encoder
+ * @param [in,out] cdf CDF of the variable (Q15)
+ * @param [in] n number of values possible
+ * @param [in,out] count number of symbols encoded with that cdf so far
+ * @param [in] rate adaptation rate shift (smaller is faster)
+ * @return decoded variable
+ */
+int od_decode_cdf_adapt_q15_(od_ec_dec *ec, uint16_t *cdf, int n,
+ int *count, int rate OD_ACC_STR) {
+ int val;
+ int i;
+ if (*count == 0) {
+ int ft;
+ ft = cdf[n - 1];
+ for (i = 0; i < n; i++) {
+ cdf[i] = cdf[i]*32768/ft;
+ }
+ }
+ val = od_ec_decode_cdf_q15(ec, cdf, n);
+ od_cdf_adapt_q15(val, cdf, n, count, rate);
+ return val;
+}
+
+/** Decodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts
+ * the cdf accordingly.
+ *
+ * @param [in,out] enc range encoder
+ * @param [in] cdf CDF of the variable (Q15)
+ * @param [in] n number of values possible
+ * @param [in] increment adaptation speed (Q15)
+ *
+ * @retval decoded variable
+ */
+int od_decode_cdf_adapt_(od_ec_dec *ec, uint16_t *cdf, int n,
+ int increment OD_ACC_STR) {
+ int i;
+ int val;
+ val = od_ec_decode_cdf_unscaled(ec, cdf, n);
+ if (cdf[n-1] + increment > 32767) {
+ for (i = 0; i < n; i++) {
+ /* Second term ensures that the pdf is non-null */
+ cdf[i] = (cdf[i] >> 1) + i + 1;
+ }
+ }
+ for (i = val; i < n; i++) cdf[i] += increment;
+ return val;
+}
+
+/** Encodes a random variable using a "generic" model, assuming that the
+ * distribution is one-sided (zero and up), has a single mode, and decays
+ * exponentially past the model.
+ *
+ * @param [in,out] dec range decoder
+ * @param [in,out] model generic probability model
+ * @param [in] x variable being encoded
+ * @param [in,out] ExQ16 expectation of x (adapted)
+ * @param [in] integration integration period of ExQ16 (leaky average over
+ * 1<<integration samples)
+ *
+ * @retval decoded variable x
+ */
+int generic_decode_(od_ec_dec *dec, generic_encoder *model, int max,
+ int *ex_q16, int integration OD_ACC_STR) {
+ int lg_q1;
+ int shift;
+ int id;
+ uint16_t *cdf;
+ int xs;
+ int lsb;
+ int x;
+ int ms;
+ lsb = 0;
+ if (max == 0) return 0;
+ lg_q1 = log_ex(*ex_q16);
+ /* If expectation is too large, shift x to ensure that
+ all we have past xs=15 is the exponentially decaying tail
+ of the distribution. */
+ shift = OD_MAXI(0, (lg_q1 - 5) >> 1);
+ /* Choose the cdf to use: we have two per "octave" of ExQ16. */
+ id = OD_MINI(GENERIC_TABLES - 1, lg_q1);
+ cdf = model->cdf[id];
+ ms = (max + (1 << shift >> 1)) >> shift;
+ if (max == -1) xs = od_ec_decode_cdf_unscaled(dec, cdf, 16);
+ else xs = od_ec_decode_cdf_unscaled(dec, cdf, OD_MINI(ms + 1, 16));
+ if (xs == 15) {
+ int e;
+ unsigned decay;
+ /* Estimate decay based on the assumption that the distribution is close
+ to Laplacian for large values. We should probably have an adaptive
+ estimate instead. Note: The 2* is a kludge that's not fully understood
+ yet. */
+ OD_ASSERT(*ex_q16 < INT_MAX >> 1);
+ e = ((2**ex_q16 >> 8) + (1 << shift >> 1)) >> shift;
+ decay = OD_MAXI(2, OD_MINI(254, 256*e/(e + 256)));
+ xs += laplace_decode_special(dec, decay, (max == -1) ? -1 : ms - 15, acc_str);
+ }
+ if (shift != 0) {
+ int special;
+ /* Because of the rounding, there's only half the number of possibilities
+ for xs=0 */
+ special = xs == 0;
+ if (shift - special > 0) lsb = od_ec_dec_bits(dec, shift - special, acc_str);
+ lsb -= !special << (shift - 1);
+ }
+ x = (xs << shift) + lsb;
+ generic_model_update(model, ex_q16, x, xs, id, integration);
+ OD_LOG((OD_LOG_ENTROPY_CODER, OD_LOG_DEBUG,
+ "dec: %d %d %d %d %d %x", *ex_q16, x, shift, id, xs, dec->rng));
+ return x;
+}
diff --git a/av1/decoder/laplace_decoder.c b/av1/decoder/laplace_decoder.c
new file mode 100644
index 0000000..4c3def5
--- /dev/null
+++ b/av1/decoder/laplace_decoder.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+
+#include "aom_dsp/entdec.h"
+#include "av1/common/pvq.h"
+#include "pvq_decoder.h"
+
+#if OD_ACCOUNTING
+# define od_decode_pvq_split(ec, adapt, sum, ctx, str) od_decode_pvq_split_(ec, adapt, sum, ctx, str)
+#else
+# define od_decode_pvq_split(ec, adapt, sum, ctx, str) od_decode_pvq_split_(ec, adapt, sum, ctx)
+#endif
+
+static int od_decode_pvq_split_(od_ec_dec *ec, od_pvq_codeword_ctx *adapt,
+ int sum, int ctx OD_ACC_STR) {
+ int shift;
+ int count;
+ int msbs;
+ int fctx;
+ count = 0;
+ if (sum == 0) return 0;
+ shift = OD_MAXI(0, OD_ILOG(sum) - 3);
+ fctx = 7*ctx + (sum >> shift) - 1;
+ msbs = od_decode_cdf_adapt(ec, adapt->pvq_split_cdf[fctx],
+ (sum >> shift) + 1, adapt->pvq_split_increment, acc_str);
+ if (shift) count = od_ec_dec_bits(ec, shift, acc_str);
+ count += msbs << shift;
+ if (count > sum) {
+ count = sum;
+ ec->error = 1;
+ }
+ return count;
+}
+
+void od_decode_band_pvq_splits(od_ec_dec *ec, od_pvq_codeword_ctx *adapt,
+ od_coeff *y, int n, int k, int level) {
+ int mid;
+ int count_right;
+ if (n == 1) {
+ y[0] = k;
+ }
+ else if (k == 0) {
+ OD_CLEAR(y, n);
+ }
+ else if (k == 1 && n <= 16) {
+ int cdf_id;
+ int pos;
+ cdf_id = od_pvq_k1_ctx(n, level == 0);
+ OD_CLEAR(y, n);
+ pos = od_decode_cdf_adapt(ec, adapt->pvq_k1_cdf[cdf_id], n,
+ adapt->pvq_k1_increment, "pvq:k1");
+ y[pos] = 1;
+ }
+ else {
+ mid = n >> 1;
+ count_right = od_decode_pvq_split(ec, adapt, k, od_pvq_size_ctx(n),
+ "pvq:split");
+ od_decode_band_pvq_splits(ec, adapt, y, mid, k - count_right, level + 1);
+ od_decode_band_pvq_splits(ec, adapt, y + mid, n - mid, count_right,
+ level + 1);
+ }
+}
+
+/** Decodes the tail of a Laplace-distributed variable, i.e. it doesn't
+ * do anything special for the zero case.
+ *
+ * @param [dec] range decoder
+ * @param [decay] decay factor of the distribution, i.e. pdf ~= decay^x
+ * @param [max] maximum possible value of x (used to truncate the pdf)
+ *
+ * @retval decoded variable x
+ */
+int od_laplace_decode_special_(od_ec_dec *dec, unsigned decay, int max OD_ACC_STR) {
+ int pos;
+ int shift;
+ int xs;
+ int ms;
+ int sym;
+ const uint16_t *cdf;
+ shift = 0;
+ if (max == 0) return 0;
+ /* We don't want a large decay value because that would require too many
+ symbols. However, it's OK if the max is below 15. */
+ while (((max >> shift) >= 15 || max == -1) && decay > 235) {
+ decay = (decay*decay + 128) >> 8;
+ shift++;
+ }
+ decay = OD_MINI(decay, 254);
+ decay = OD_MAXI(decay, 2);
+ ms = max >> shift;
+ cdf = EXP_CDF_TABLE[(decay + 1) >> 1];
+ OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "decay = %d\n", decay));
+ xs = 0;
+ do {
+ sym = OD_MINI(xs, 15);
+ {
+ int i;
+ OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "%d %d %d %d", xs, shift, sym, max));
+ for (i = 0; i < 16; i++) {
+ OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "%d ", cdf[i]));
+ }
+ OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "\n"));
+ }
+ if (ms > 0 && ms < 15) {
+ /* Simple way of truncating the pdf when we have a bound. */
+ sym = od_ec_decode_cdf_unscaled(dec, cdf, ms + 1);
+ }
+ else sym = od_ec_decode_cdf_q15(dec, cdf, 16);
+ xs += sym;
+ ms -= 15;
+ }
+ while (sym >= 15 && ms != 0);
+ if (shift) pos = (xs << shift) + od_ec_dec_bits(dec, shift, acc_str);
+ else pos = xs;
+ OD_ASSERT(pos >> shift <= max >> shift || max == -1);
+ if (max != -1 && pos > max) {
+ pos = max;
+ dec->error = 1;
+ }
+ OD_ASSERT(pos <= max || max == -1);
+ return pos;
+}
+
+/** Decodes a Laplace-distributed variable for use in PVQ.
+ *
+ * @param [in,out] dec range decoder
+ * @param [in] ExQ8 expectation of the absolute value of x
+ * @param [in] K maximum value of |x|
+ *
+ * @retval decoded variable (including sign)
+ */
+int od_laplace_decode_(od_ec_dec *dec, unsigned ex_q8, int k OD_ACC_STR) {
+ int j;
+ int shift;
+ uint16_t cdf[16];
+ int sym;
+ int lsb;
+ int decay;
+ int offset;
+ lsb = 0;
+ /* Shift down x if expectation is too high. */
+ shift = OD_ILOG(ex_q8) - 11;
+ if (shift < 0) shift = 0;
+ /* Apply the shift with rounding to Ex, K and xs. */
+ ex_q8 = (ex_q8 + (1 << shift >> 1)) >> shift;
+ k = (k + (1 << shift >> 1)) >> shift;
+ decay = OD_MINI(254, OD_DIVU(256*ex_q8, (ex_q8 + 256)));
+ offset = LAPLACE_OFFSET[(decay + 1) >> 1];
+ for (j = 0; j < 16; j++) {
+ cdf[j] = EXP_CDF_TABLE[(decay + 1) >> 1][j] - offset;
+ }
+ /* Simple way of truncating the pdf when we have a bound */
+ if (k == 0) sym = 0;
+ else sym = od_ec_decode_cdf_unscaled(dec, cdf, OD_MINI(k + 1, 16));
+ if (shift) {
+ int special;
+ /* Because of the rounding, there's only half the number of possibilities
+ for xs=0 */
+ special = (sym == 0);
+ if (shift - special > 0) lsb = od_ec_dec_bits(dec, shift - special, acc_str);
+ lsb -= (!special << (shift - 1));
+ }
+ /* Handle the exponentially-decaying tail of the distribution */
+ if (sym == 15) sym += laplace_decode_special(dec, decay, k - 15, acc_str);
+ return (sym << shift) + lsb;
+}
+
+#if OD_ACCOUNTING
+# define laplace_decode_vector_delta(dec, y, n, k, curr, means, str) laplace_decode_vector_delta_(dec, y, n, k, curr, means, str)
+#else
+# define laplace_decode_vector_delta(dec, y, n, k, curr, means, str) laplace_decode_vector_delta_(dec, y, n, k, curr, means)
+#endif
+
+static void laplace_decode_vector_delta_(od_ec_dec *dec, od_coeff *y, int n, int k,
+ int32_t *curr, const int32_t *means
+ OD_ACC_STR) {
+ int i;
+ int prev;
+ int sum_ex;
+ int sum_c;
+ int coef;
+ int pos;
+ int k0;
+ int sign;
+ int first;
+ int k_left;
+ prev = 0;
+ sum_ex = 0;
+ sum_c = 0;
+ coef = 256*means[OD_ADAPT_COUNT_Q8]/
+ (1 + means[OD_ADAPT_COUNT_EX_Q8]);
+ pos = 0;
+ sign = 0;
+ first = 1;
+ k_left = k;
+ for (i = 0; i < n; i++) y[i] = 0;
+ k0 = k_left;
+ coef = OD_MAXI(coef, 1);
+ for (i = 0; i < k0; i++) {
+ int count;
+ if (first) {
+ int decay;
+ int ex = coef*(n - prev)/k_left;
+ if (ex > 65280) decay = 255;
+ else {
+ decay = OD_MINI(255,
+ (int)((256*ex/(ex + 256) + (ex>>5)*ex/((n + 1)*(n - 1)*(n - 1)))));
+ }
+ /*Update mean position.*/
+ count = laplace_decode_special(dec, decay, n - 1, acc_str);
+ first = 0;
+ }
+ else count = laplace_decode(dec, coef*(n - prev)/k_left, n - prev - 1, acc_str);
+ sum_ex += 256*(n - prev);
+ sum_c += count*k_left;
+ pos += count;
+ OD_ASSERT(pos < n);
+ if (y[pos] == 0)
+ sign = od_ec_dec_bits(dec, 1, acc_str);
+ y[pos] += sign ? -1 : 1;
+ prev = pos;
+ k_left--;
+ if (k_left == 0) break;
+ }
+ if (k > 0) {
+ curr[OD_ADAPT_COUNT_Q8] = 256*sum_c;
+ curr[OD_ADAPT_COUNT_EX_Q8] = sum_ex;
+ }
+ else {
+ curr[OD_ADAPT_COUNT_Q8] = -1;
+ curr[OD_ADAPT_COUNT_EX_Q8] = 0;
+ }
+ curr[OD_ADAPT_K_Q8] = 0;
+ curr[OD_ADAPT_SUM_EX_Q8] = 0;
+}
+
+/** Decodes a vector of integers assumed to come from rounding a sequence of
+ * Laplace-distributed real values in decreasing order of variance.
+ *
+ * @param [in,out] dec range decoder
+ * @param [in] y decoded vector
+ * @param [in] N dimension of the vector
+ * @param [in] K sum of the absolute value of components of y
+ * @param [out] curr Adaptation context output, may alias means.
+ * @param [in] means Adaptation context input.
+ */
+void od_laplace_decode_vector_(od_ec_dec *dec, od_coeff *y, int n, int k,
+ int32_t *curr, const int32_t *means OD_ACC_STR) {
+ int i;
+ int sum_ex;
+ int kn;
+ int exp_q8;
+ int mean_k_q8;
+ int mean_sum_ex_q8;
+ int ran_delta;
+ ran_delta = 0;
+ if (k <= 1) {
+ laplace_decode_vector_delta(dec, y, n, k, curr, means, acc_str);
+ return;
+ }
+ if (k == 0) {
+ curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE;
+ curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE;
+ curr[OD_ADAPT_K_Q8] = 0;
+ curr[OD_ADAPT_SUM_EX_Q8] = 0;
+ for (i = 0; i < n; i++) y[i] = 0;
+ return;
+ }
+ sum_ex = 0;
+ kn = k;
+ /* Estimates the factor relating pulses_left and positions_left to E(|x|).*/
+ mean_k_q8 = means[OD_ADAPT_K_Q8];
+ mean_sum_ex_q8 = means[OD_ADAPT_SUM_EX_Q8];
+ if (mean_k_q8 < 1 << 23) exp_q8 = 256*mean_k_q8/(1 + mean_sum_ex_q8);
+ else exp_q8 = mean_k_q8/(1 + (mean_sum_ex_q8 >> 8));
+ for (i = 0; i < n; i++) {
+ int ex;
+ int x;
+ if (kn == 0) break;
+ if (kn <= 1 && i != n - 1) {
+ laplace_decode_vector_delta(dec, y + i, n - i, kn, curr, means, acc_str);
+ ran_delta = 1;
+ i = n;
+ break;
+ }
+ /* Expected value of x (round-to-nearest) is
+ expQ8*pulses_left/positions_left. */
+ ex = (2*exp_q8*kn + (n - i))/(2*(n - i));
+ if (ex > kn*256) ex = kn*256;
+ sum_ex += (2*256*kn + (n - i))/(2*(n - i));
+ /* No need to encode the magnitude for the last bin. */
+ if (i != n - 1) x = laplace_decode(dec, ex, kn, acc_str);
+ else x = kn;
+ if (x != 0) {
+ if (od_ec_dec_bits(dec, 1, acc_str)) x = -x;
+ }
+ y[i] = x;
+ kn -= abs(x);
+ }
+ /* Adapting the estimates for expQ8. */
+ if (!ran_delta) {
+ curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE;
+ curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE;
+ }
+ curr[OD_ADAPT_K_Q8] = k - kn;
+ curr[OD_ADAPT_SUM_EX_Q8] = sum_ex;
+ for (; i < n; i++) y[i] = 0;
+}
diff --git a/av1/decoder/pvq_decoder.c b/av1/decoder/pvq_decoder.c
new file mode 100644
index 0000000..2340605
--- /dev/null
+++ b/av1/decoder/pvq_decoder.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "./aom_config.h"
+#include "aom_dsp/entcode.h"
+#include "aom_dsp/entdec.h"
+#include "av1/common/odintrin.h"
+#include "av1/common/partition.h"
+#include "av1/common/pvq_state.h"
+#include "av1/decoder/decint.h"
+#include "av1/decoder/pvq_decoder.h"
+
+static void od_decode_pvq_codeword(od_ec_dec *ec, od_pvq_codeword_ctx *ctx,
+ od_coeff *y, int n, int k) {
+ int i;
+ od_decode_band_pvq_splits(ec, ctx, y, n, k, 0);
+ for (i = 0; i < n; i++) {
+ if (y[i] && od_ec_dec_bits(ec, 1, "pvq:sign")) y[i] = -y[i];
+ }
+}
+
+/** Inverse of neg_interleave; decodes the interleaved gain.
+ *
+ * @param [in] x quantized/interleaved gain to decode
+ * @param [in] ref quantized gain of the reference
+ * @return original quantized gain value
+ */
+static int neg_deinterleave(int x, int ref) {
+ if (x < 2*ref-1) {
+ if (x & 1) return ref - 1 - (x >> 1);
+ else return ref + (x >> 1);
+ }
+ else return x+1;
+}
+
+/** Synthesizes one parition of coefficient values from a PVQ-encoded
+ * vector.
+ *
+ * @param [out] xcoeff output coefficient partition (x in math doc)
+ * @param [in] ypulse PVQ-encoded values (y in math doc); in the noref
+ * case, this vector has n entries, in the
+ * reference case it contains n-1 entries
+ * (the m-th entry is not included)
+ * @param [in] ref reference vector (prediction)
+ * @param [in] n number of elements in this partition
+ * @param [in] gr gain of the reference vector (prediction)
+ * @param [in] noref indicates presence or lack of prediction
+ * @param [in] g decoded quantized vector gain
+ * @param [in] theta decoded theta (prediction error)
+ * @param [in] qm QM with magnitude compensation
+ * @param [in] qm_inv Inverse of QM with magnitude compensation
+ */
+static void pvq_synthesis(od_coeff *xcoeff, od_coeff *ypulse, od_val16 *r16,
+ int n, od_val32 gr, int noref, od_val32 g, od_val32 theta, const int16_t *qm_inv,
+ int shift) {
+ int s;
+ int m;
+ /* Sign of the Householder reflection vector */
+ s = 0;
+ /* Direction of the Householder reflection vector */
+ m = noref ? 0 : od_compute_householder(r16, n, gr, &s, shift);
+ od_pvq_synthesis_partial(xcoeff, ypulse, r16, n, noref, g, theta, m, s,
+ qm_inv);
+}
+
+typedef struct {
+ od_coeff *ref;
+ int nb_coeffs;
+ int allow_flip;
+} cfl_ctx;
+
+/** Decodes a single vector of integers (eg, a partition within a
+ * coefficient block) encoded using PVQ
+ *
+ * @param [in,out] ec range encoder
+ * @param [in] q0 scale/quantizer
+ * @param [in] n number of coefficients in partition
+ * @param [in,out] model entropy decoder state
+ * @param [in,out] adapt adaptation context
+ * @param [in,out] exg ExQ16 expectation of decoded gain value
+ * @param [in,out] ext ExQ16 expectation of decoded theta value
+ * @param [in] ref 'reference' (prediction) vector
+ * @param [out] out decoded partition
+ * @param [out] noref boolean indicating absence of reference
+ * @param [in] beta per-band activity masking beta param
+ * @param [in] robust stream is robust to error in the reference
+ * @param [in] is_keyframe whether we're encoding a keyframe
+ * @param [in] pli plane index
+ * @param [in] cdf_ctx selects which cdf context to use
+ * @param [in,out] skip_rest whether to skip further bands in each direction
+ * @param [in] band index of the band being decoded
+ * @param [in] band index of the band being decoded
+ * @param [out] skip skip flag with range [0,1]
+ * @param [in] qm QM with magnitude compensation
+ * @param [in] qm_inv Inverse of QM with magnitude compensation
+ */
+static void pvq_decode_partition(od_ec_dec *ec,
+ int q0,
+ int n,
+ generic_encoder model[3],
+ od_adapt_ctx *adapt,
+ int *exg,
+ int *ext,
+ od_coeff *ref,
+ od_coeff *out,
+ int *noref,
+ od_val16 beta,
+ int robust,
+ int is_keyframe,
+ int pli,
+ int cdf_ctx,
+ cfl_ctx *cfl,
+ int has_skip,
+ int *skip_rest,
+ int band,
+ int *skip,
+ const int16_t *qm,
+ const int16_t *qm_inv) {
+ int k;
+ od_val32 qcg;
+ int max_theta;
+ int itheta;
+ od_val32 theta;
+ od_val32 gr;
+ od_val32 gain_offset;
+ od_coeff y[MAXN];
+ int qg;
+ int nodesync;
+ int id;
+ int i;
+ od_val16 ref16[MAXN];
+ int rshift;
+ theta = 0;
+ gr = 0;
+ gain_offset = 0;
+ /* We always use the robust bitstream for keyframes to avoid having
+ PVQ and entropy decoding depending on each other, hurting parallelism. */
+ nodesync = robust || is_keyframe;
+ /* Skip is per-direction. For band=0, we can use any of the flags. */
+ if (skip_rest[(band + 2) % 3]) {
+ qg = 0;
+ if (is_keyframe) {
+ itheta = -1;
+ *noref = 1;
+ }
+ else {
+ itheta = 0;
+ *noref = 0;
+ }
+ }
+ else {
+ /* Jointly decode gain, itheta and noref for small values. Then we handle
+ larger gain. We need to wait for itheta because in the !nodesync case
+ it depends on max_theta, which depends on the gain. */
+ id = od_decode_cdf_adapt(ec, &adapt->pvq.pvq_gaintheta_cdf[cdf_ctx][0],
+ 8 + 7*has_skip, adapt->pvq.pvq_gaintheta_increment,
+ "pvq:gaintheta");
+ if (!is_keyframe && id >= 10) id++;
+ if (is_keyframe && id >= 8) id++;
+ if (id >= 8) {
+ id -= 8;
+ skip_rest[0] = skip_rest[1] = skip_rest[2] = 1;
+ }
+ qg = id & 1;
+ itheta = (id >> 1) - 1;
+ *noref = (itheta == -1);
+ }
+ /* The CfL flip bit is only decoded on the first band that has noref=0. */
+ if (cfl->allow_flip && !*noref) {
+ int flip;
+ flip = od_ec_dec_bits(ec, 1, "cfl:flip");
+ if (flip) {
+ for (i = 0; i < cfl->nb_coeffs; i++) cfl->ref[i] = -cfl->ref[i];
+ }
+ cfl->allow_flip = 0;
+ }
+ if (qg > 0) {
+ int tmp;
+ tmp = *exg;
+ qg = 1 + generic_decode(ec, &model[!*noref], -1, &tmp, 2, "pvq:gain");
+ OD_IIR_DIADIC(*exg, qg << 16, 2);
+ }
+ *skip = 0;
+#if defined(OD_FLOAT_PVQ)
+ rshift = 0;
+#else
+ /* Shift needed to make the reference fit in 15 bits, so that the Householder
+ vector can fit in 16 bits. */
+ rshift = OD_MAXI(0, od_vector_log_mag(ref, n) - 14);
+#endif
+ for (i = 0; i < n; i++) {
+#if defined(OD_FLOAT_PVQ)
+ ref16[i] = ref[i]*(double)qm[i]*OD_QM_SCALE_1;
+#else
+ ref16[i] = OD_SHR_ROUND(ref[i]*qm[i], OD_QM_SHIFT + rshift);
+#endif
+ }
+ if(!*noref){
+ /* we have a reference; compute its gain */
+ od_val32 cgr;
+ int icgr;
+ int cfl_enabled;
+ cfl_enabled = pli != 0 && is_keyframe && !OD_DISABLE_CFL;
+ cgr = od_pvq_compute_gain(ref16, n, q0, &gr, beta, rshift);
+ if (cfl_enabled) cgr = OD_CGAIN_SCALE;
+#if defined(OD_FLOAT_PVQ)
+ icgr = (int)floor(.5 + cgr);
+#else
+ icgr = OD_SHR_ROUND(cgr, OD_CGAIN_SHIFT);
+#endif
+ /* quantized gain is interleave encoded when there's a reference;
+ deinterleave it now */
+ if (is_keyframe) qg = neg_deinterleave(qg, icgr);
+ else {
+ qg = neg_deinterleave(qg, icgr + 1) - 1;
+ if (qg == 0) *skip = (icgr ? OD_PVQ_SKIP_ZERO : OD_PVQ_SKIP_COPY);
+ }
+ if (qg == icgr && itheta == 0 && !cfl_enabled) *skip = OD_PVQ_SKIP_COPY;
+ gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT);
+ qcg = OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset;
+ /* read and decode first-stage PVQ error theta */
+ max_theta = od_pvq_compute_max_theta(qcg, beta);
+ if (itheta > 1 && (nodesync || max_theta > 3)) {
+ int tmp;
+ tmp = *ext;
+ itheta = 2 + generic_decode(ec, &model[2], nodesync ? -1 : max_theta - 3,
+ &tmp, 2, "pvq:theta");
+ OD_IIR_DIADIC(*ext, itheta << 16, 2);
+ }
+ theta = od_pvq_compute_theta(itheta, max_theta);
+ }
+ else{
+ itheta = 0;
+ if (!is_keyframe) qg++;
+ qcg = OD_SHL(qg, OD_CGAIN_SHIFT);
+ if (qg == 0) *skip = OD_PVQ_SKIP_ZERO;
+ }
+
+ k = od_pvq_compute_k(qcg, itheta, theta, *noref, n, beta, nodesync);
+ if (k != 0) {
+ /* when noref==0, y is actually size n-1 */
+ od_decode_pvq_codeword(ec, &adapt->pvq.pvq_codeword_ctx, y, n - !*noref,
+ k);
+ }
+ else {
+ OD_CLEAR(y, n);
+ }
+ if (*skip) {
+ if (*skip == OD_PVQ_SKIP_COPY) OD_COPY(out, ref, n);
+ else OD_CLEAR(out, n);
+ }
+ else {
+ od_val32 g;
+ g = od_gain_expand(qcg, q0, beta);
+ pvq_synthesis(out, y, ref16, n, gr, *noref, g, theta, qm_inv, rshift);
+ }
+ *skip = !!*skip;
+}
+
+/** Decodes a coefficient block (except for DC) encoded using PVQ
+ *
+ * @param [in,out] dec daala decoder context
+ * @param [in] ref 'reference' (prediction) vector
+ * @param [out] out decoded partition
+ * @param [in] q0 quantizer
+ * @param [in] pli plane index
+ * @param [in] bs log of the block size minus two
+ * @param [in] beta per-band activity masking beta param
+ * @param [in] robust stream is robust to error in the reference
+ * @param [in] is_keyframe whether we're encoding a keyframe
+ * @param [out] flags bitmask of the per band skip and noref flags
+ * @param [in] block_skip skip flag for the block (range 0-3)
+ * @param [in] qm QM with magnitude compensation
+ * @param [in] qm_inv Inverse of QM with magnitude compensation
+ */
+void od_pvq_decode(daala_dec_ctx *dec,
+ od_coeff *ref,
+ od_coeff *out,
+ int q0,
+ int pli,
+ int bs,
+ const od_val16 *beta,
+ int robust,
+ int is_keyframe,
+ unsigned int *flags,
+ int block_skip,
+ const int16_t *qm,
+ const int16_t *qm_inv){
+
+ int noref[PVQ_MAX_PARTITIONS];
+ int skip[PVQ_MAX_PARTITIONS];
+ int *exg;
+ int *ext;
+ int nb_bands;
+ int i;
+ const int *off;
+ int size[PVQ_MAX_PARTITIONS];
+ generic_encoder *model;
+ int skip_rest[3] = {0};
+ cfl_ctx cfl;
+ /* const unsigned char *pvq_qm; */
+ /*Default to skip=1 and noref=0 for all bands.*/
+ for (i = 0; i < PVQ_MAX_PARTITIONS; i++) {
+ noref[i] = 0;
+ skip[i] = 1;
+ }
+ /* TODO(yushin): Enable this for activity masking,
+ when pvq_qm_q4 is available in AOM. */
+ /*pvq_qm = &dec->state.pvq_qm_q4[pli][0];*/
+ exg = &dec->state.adapt.pvq.pvq_exg[pli][bs][0];
+ ext = dec->state.adapt.pvq.pvq_ext + bs*PVQ_MAX_PARTITIONS;
+ model = dec->state.adapt.pvq.pvq_param_model;
+ nb_bands = OD_BAND_OFFSETS[bs][0];
+ off = &OD_BAND_OFFSETS[bs][1];
+ OD_ASSERT(block_skip < 4);
+ out[0] = block_skip & 1;
+ if (!(block_skip >> 1)) {
+ if (is_keyframe) for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = 0;
+ else for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = ref[i];
+ }
+ else {
+ for (i = 0; i < nb_bands; i++) size[i] = off[i+1] - off[i];
+ cfl.ref = ref;
+ cfl.nb_coeffs = off[nb_bands];
+ cfl.allow_flip = pli != 0 && is_keyframe;
+ for (i = 0; i < nb_bands; i++) {
+ int q;
+ /* TODO(yushin): Enable this for activity masking,
+ when pvq_qm_q4 is available in AOM. */
+ /*q = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);*/
+ q = OD_MAXI(1, q0);
+ pvq_decode_partition(dec->ec, q, size[i],
+ model, &dec->state.adapt, exg + i, ext + i, ref + off[i], out + off[i],
+ &noref[i], beta[i], robust, is_keyframe, pli,
+ (pli != 0)*OD_NBSIZES*PVQ_MAX_PARTITIONS + bs*PVQ_MAX_PARTITIONS + i,
+ &cfl, i == 0 && (i < nb_bands - 1), skip_rest, i, &skip[i],
+ qm + off[i], qm_inv + off[i]);
+ if (i == 0 && !skip_rest[0] && bs > 0) {
+ int skip_dir;
+ int j;
+ skip_dir = od_decode_cdf_adapt(dec->ec,
+ &dec->state.adapt.pvq.pvq_skip_dir_cdf[(pli != 0) + 2*(bs - 1)][0], 7,
+ dec->state.adapt.pvq.pvq_skip_dir_increment, "pvq:skiprest");
+ for (j = 0; j < 3; j++) skip_rest[j] = !!(skip_dir & (1 << j));
+ }
+ }
+ }
+ *flags = 0;
+ for (i = nb_bands - 1; i >= 0; i--) {
+ *flags <<= 1;
+ *flags |= noref[i]&1;
+ *flags <<= 1;
+ *flags |= skip[i]&1;
+ }
+}
diff --git a/av1/decoder/pvq_decoder.h b/av1/decoder/pvq_decoder.h
new file mode 100644
index 0000000..d749040
--- /dev/null
+++ b/av1/decoder/pvq_decoder.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/* clang-format off */
+
+#if !defined(_pvq_decoder_H)
+# define _pvq_decoder_H (1)
+# include "aom_dsp/entdec.h"
+# include "av1/common/pvq.h"
+# include "av1/decoder/decint.h"
+
+void od_decode_band_pvq_splits(od_ec_dec *ec, od_pvq_codeword_ctx *adapt,
+ od_coeff *y, int n, int k, int level);
+
+#if OD_ACCOUNTING
+# define laplace_decode_special(dec, decay, max, str) od_laplace_decode_special_(dec, decay, max, str)
+# define laplace_decode(dec, ex_q8, k, str) od_laplace_decode_(dec, ex_q8, k, str)
+#define laplace_decode_vector(dec, y, n, k, curr, means, str) od_laplace_decode_vector_(dec, y, n, k, curr, means, str)
+#else
+# define laplace_decode_special(dec, decay, max, str) od_laplace_decode_special_(dec, decay, max)
+# define laplace_decode(dec, ex_q8, k, str) od_laplace_decode_(dec, ex_q8, k)
+#define laplace_decode_vector(dec, y, n, k, curr, means, str) od_laplace_decode_vector_(dec, y, n, k, curr, means)
+#endif
+
+int od_laplace_decode_special_(od_ec_dec *dec, unsigned decay, int max OD_ACC_STR);
+int od_laplace_decode_(od_ec_dec *dec, unsigned ex_q8, int k OD_ACC_STR);
+void od_laplace_decode_vector_(od_ec_dec *dec, od_coeff *y, int n, int k,
+ int32_t *curr, const int32_t *means
+ OD_ACC_STR);
+
+
+void od_pvq_decode(daala_dec_ctx *dec, od_coeff *ref, od_coeff *out, int q0,
+ int pli, int bs, const od_val16 *beta, int robust, int is_keyframe,
+ unsigned int *flags, int block_skip, const int16_t *qm,
+ const int16_t *qm_inv);
+
+#endif