New experiment: Perceptual Vector Quantization from Daala
PVQ replaces the scalar quantizer and coefficient coding with a new
design originally developed in Daala. It currently depends on the
Daala entropy coder although it could be adapted to work with another
entropy coder if needed:
./configure --enable-experimental --enable-daala_ec --enable-pvq
The version of PVQ in this commit is adapted from the following
revision of Daala:
https://github.com/xiph/daala/commit/fb51c1ade6a31b668a0157d89de8f0a4493162a8
More information about PVQ:
- https://people.xiph.org/~jm/daala/pvq_demo/
- https://jmvalin.ca/papers/spie_pvq.pdf
The following files are copied as-is from Daala with minimal
adaptations, therefore we disable clang-format on those files
to make it easier to synchronize the AV1 and Daala codebases in the future:
av1/common/generic_code.c
av1/common/generic_code.h
av1/common/laplace_tables.c
av1/common/partition.c
av1/common/partition.h
av1/common/pvq.c
av1/common/pvq.h
av1/common/state.c
av1/common/state.h
av1/common/zigzag.h
av1/common/zigzag16.c
av1/common/zigzag32.c
av1/common/zigzag4.c
av1/common/zigzag64.c
av1/common/zigzag8.c
av1/decoder/decint.h
av1/decoder/generic_decoder.c
av1/decoder/laplace_decoder.c
av1/decoder/pvq_decoder.c
av1/decoder/pvq_decoder.h
av1/encoder/daala_compat_enc.c
av1/encoder/encint.h
av1/encoder/generic_encoder.c
av1/encoder/laplace_encoder.c
av1/encoder/pvq_encoder.c
av1/encoder/pvq_encoder.h
Known issues:
- Lossless mode is not supported, '--lossless=1' will give the same result as
'--end-usage=q --cq-level=1'.
- High bit depth is not supported by PVQ.
Change-Id: I1ae0d6517b87f4c1ccea944b2e12dc906979f25e
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 947fefd..dfd7163 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -49,7 +49,9 @@
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
#include "av1/encoder/tokenize.h"
-
+#if CONFIG_PVQ
+#include "av1/encoder/pvq_encoder.h"
+#endif
#if CONFIG_DUAL_FILTER
#if CONFIG_EXT_INTERP
static const int filter_sets[25][2] = {
@@ -66,6 +68,7 @@
#endif
#endif
+
#if CONFIG_EXT_REFS
#define LAST_FRAME_MODE_MASK \
@@ -819,6 +822,33 @@
*out_dist_sum = dist_sum;
}
+#if CONFIG_PVQ
+// Without PVQ, av1_block_error_c() return two kind of errors,
+// 1) reconstruction (i.e. decoded) error and
+// 2) Squared sum of transformed residue (i.e. 'coeff')
+// However, if PVQ is enabled, coeff does not keep the transformed residue
+// but instead a transformed original is kept.
+// Hence, new parameter ref vector (i.e. transformed predicted signal)
+// is required to derive the residue signal,
+// i.e. coeff - ref = residue (all transformed).
+
+// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
+// a separate function that does not do the extra computations for ssz.
+int64_t av1_block_error2_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
+ const tran_low_t *ref, intptr_t block_size,
+ int64_t *ssz) {
+ int64_t error;
+
+ // Use the existing sse codes for calculating distortion of decoded signal:
+ // i.e. (orig - decoded)^2
+ error = av1_block_error_fp(coeff, dqcoeff, block_size);
+ // prediction residue^2 = (orig - ref)^2
+ *ssz = av1_block_error_fp(coeff, ref, block_size);
+
+ return error;
+}
+#endif
+
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
int i;
@@ -870,6 +900,7 @@
}
#endif // CONFIG_AOM_HIGHBITDEPTH
+#if !CONFIG_PVQ
/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
* decide whether to include cost of a trailing EOB node or not (i.e. we
* can skip this if the last coefficient in this transform block, e.g. the
@@ -982,6 +1013,7 @@
return cost;
}
+#endif
static void dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col, TX_SIZE tx_size,
@@ -998,11 +1030,18 @@
int shift = (MAX_TX_SCALE - get_tx_scale(xd, tx_type, tx_size)) * 2;
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+#if CONFIG_PVQ
+ tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
+#endif
#if CONFIG_AOM_HIGHBITDEPTH
const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
*out_dist =
av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
shift;
+#elif CONFIG_PVQ
+ *out_dist = av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length,
+ &this_sse) >>
+ shift;
#else
*out_dist =
av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
@@ -1072,6 +1111,7 @@
}
}
+#if !CONFIG_PVQ
static int rate_block(int plane, int block, int coeff_ctx, TX_SIZE tx_size,
struct rdcost_block_args *args) {
return av1_cost_coeffs(&args->cpi->common, args->x, plane, block, coeff_ctx,
@@ -1079,6 +1119,7 @@
args->scan_order->neighbors,
args->use_fast_coef_costing);
}
+#endif
static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
TX_SIZE tx_size) {
@@ -1194,11 +1235,13 @@
args->exit_early = 1;
return;
}
-
+#if !CONFIG_PVQ
rate = rate_block(plane, block, coeff_ctx, tx_size, args);
args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0);
args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0);
-
+#else
+ rate = x->rate;
+#endif
rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
@@ -1214,8 +1257,11 @@
args->exit_early = 1;
return;
}
-
+#if !CONFIG_PVQ
args->skippable &= !x->plane[plane].eobs[block];
+#else
+ args->skippable &= x->pvq_skip[plane];
+#endif
}
static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, int *rate,
@@ -1550,7 +1596,14 @@
#if CONFIG_EXT_TX
if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
!xd->lossless[mbmi->segment_id]) {
- for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf, post_buf;
+
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
+
+ for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
if (is_inter) {
if (x->use_default_inter_tx_type &&
tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
@@ -1573,7 +1626,9 @@
txfm_rd_in_plane(x, cpi, &r, &d, &s, &psse, ref_best_rd, 0, bs,
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
-
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
if (r == INT_MAX) continue;
if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) {
if (is_inter) {
@@ -1601,8 +1656,14 @@
*rate = r;
*skip = s;
*sse = psse;
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
}
}
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
} else {
mbmi->tx_type = DCT_DCT;
txfm_rd_in_plane(x, cpi, rate, distortion, skip, sse, ref_best_rd, 0, bs,
@@ -1686,6 +1747,9 @@
TX_TYPE tx_type, best_tx_type = DCT_DCT;
int prune = 0;
+#if CONFIG_PVQ
+ od_rollback_buffer buf;
+#endif
if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
// passing -1 in for tx_type indicates that all 1D
// transforms should be considered for pruning
@@ -1696,6 +1760,10 @@
*skip = 0;
*psse = INT64_MAX;
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &buf);
+#endif
+
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
#if CONFIG_REF_MV
if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
@@ -1719,6 +1787,11 @@
#if !CONFIG_EXT_TX
if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
#endif
+#if CONFIG_PVQ
+ if (best_tx < TX_SIZES)
+ txfm_rd_in_plane(x, cpi, &r, &d, &s, &sse, ref_best_rd, 0, bs, best_tx,
+ cpi->sf.use_fast_coef_costing);
+#endif
}
static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate,
@@ -1906,7 +1979,9 @@
PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
BLOCK_SIZE bsize, int *y_skip, int64_t rd_thresh) {
+#if !CONFIG_PVQ
const AV1_COMMON *const cm = &cpi->common;
+#endif
PREDICTION_MODE mode;
MACROBLOCKD *const xd = &x->e_mbd;
int64_t best_rd = rd_thresh;
@@ -1927,6 +2002,12 @@
uint16_t best_dst16[8 * 8];
#endif
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf, post_buf;
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
+
memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
xd->mi[0]->mbmi.tx_size = TX_4X4;
@@ -2057,6 +2138,10 @@
}
#endif // CONFIG_AOM_HIGHBITDEPTH
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
+
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
int64_t this_rd;
int ratey = 0;
@@ -2080,15 +2165,54 @@
const int block = (row + idy) * 2 + (col + idx);
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
+#if !CONFIG_PVQ
int16_t *const src_diff =
av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
+#else
+ int lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+ const int diff_stride = 8;
+ tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, block);
+ tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
+ int16_t *pred = &pd->pred[4 * (row * diff_stride + col)];
+ int16_t *src_int16 = &p->src_int16[4 * (row * diff_stride + col)];
+ int i, j, tx_blk_size;
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ int rate_pvq;
+ int skip;
+#endif
xd->mi[0]->bmi[block].as_mode = mode;
av1_predict_intra_block(xd, pd->width, pd->height, TX_4X4, mode, dst,
dst_stride, dst, dst_stride, col + idx,
row + idy, 0);
+#if !CONFIG_PVQ
aom_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
+#else
+ if (lossless) tx_type = DCT_DCT;
+ // transform block size in pixels
+ tx_blk_size = 4;
+
+ // copy uint8 orig and predicted block to int16 buffer
+ // in order to use existing VP10 transform functions
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) {
+ src_int16[diff_stride * j + i] = src[src_stride * j + i];
+ pred[diff_stride * j + i] = dst[dst_stride * j + i];
+ }
+ {
+ FWD_TXFM_PARAM fwd_txfm_param;
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = TX_4X4;
+ fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
+ fwd_txfm_param.rd_transform = 0;
+ fwd_txfm_param.lossless = lossless;
+ fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
+ fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+ }
+#endif
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+#if !CONFIG_PVQ
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type, 0);
const int coeff_ctx =
@@ -2106,13 +2230,28 @@
*(tempa + idx) = !(p->eobs[block] == 0);
*(templ + idy) = !(p->eobs[block] == 0);
can_skip &= (p->eobs[block] == 0);
+#else
+ skip = av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff,
+ &p->eobs[block], pd->dequant, 0, TX_4X4,
+ tx_type, &rate_pvq, x->pvq_speed, NULL);
+ ratey += rate_pvq;
+#endif
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next;
- av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
- dst_stride, p->eobs[block], DCT_DCT, 1);
+#if CONFIG_PVQ
+ if (!skip) {
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0;
+#endif
+ av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
+ dst_stride, p->eobs[block], DCT_DCT, 1);
+#if CONFIG_PVQ
+ }
+#endif
} else {
int64_t dist;
unsigned int tmp;
+#if !CONFIG_PVQ
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type, 0);
const int coeff_ctx =
@@ -2128,9 +2267,13 @@
ratey += av1_cost_coeffs(cm, x, 0, block, coeff_ctx, TX_4X4,
scan_order->scan, scan_order->neighbors,
cpi->sf.use_fast_coef_costing);
- *(tempa + idx) = !(p->eobs[block] == 0);
- *(templ + idy) = !(p->eobs[block] == 0);
- can_skip &= (p->eobs[block] == 0);
+#else
+ skip = av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff,
+ &p->eobs[block], pd->dequant, 0, TX_4X4,
+ tx_type, &rate_pvq, x->pvq_speed, NULL);
+ ratey += rate_pvq;
+#endif
+ // No need for av1_block_error2_c because the ssz is unused
av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
dst_stride, p->eobs[block], tx_type, 0);
cpi->fn_ptr[BLOCK_4X4].vf(src, src_stride, dst, dst_stride, &tmp);
@@ -2141,6 +2284,14 @@
// in the frequency domain, the overhead of encoding effort is low.
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next;
+#if CONFIG_PVQ
+ if (!skip) {
+ for (j = 0; j < tx_blk_size; j++)
+ for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0;
+#endif
+#if CONFIG_PVQ
+ }
+#endif
}
}
}
@@ -2157,15 +2308,25 @@
*best_mode = mode;
memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
num_4x4_blocks_wide * 4);
}
next : {}
- }
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
+ } // mode decision loop
if (best_rd >= rd_thresh) return best_rd;
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
+
if (y_skip) *y_skip &= best_can_skip;
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
@@ -2672,6 +2833,12 @@
const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, 0);
const PREDICTION_MODE FINAL_MODE_SEARCH = TM_PRED + 1;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf, post_buf;
+
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
bmode_costs = cpi->y_mode_costs[A][L];
#if CONFIG_EXT_INTRA
@@ -2713,6 +2880,9 @@
} else {
mic->mbmi.mode = mode_idx;
}
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
#if CONFIG_EXT_INTRA
is_directional_mode =
(mic->mbmi.mode != DC_PRED && mic->mbmi.mode != TM_PRED);
@@ -2793,9 +2963,16 @@
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
}
}
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
+
#if CONFIG_PALETTE
if (cpi->common.allow_screen_content_tools)
rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
@@ -2861,12 +3038,12 @@
int is_cost_valid = 1;
if (ref_best_rd < 0) is_cost_valid = 0;
-
+#if !CONFIG_PVQ
if (is_inter_block(mbmi) && is_cost_valid) {
for (plane = 1; plane < MAX_MB_PLANE; ++plane)
av1_subtract_plane(x, bsize, plane);
}
-
+#endif
*rate = 0;
*distortion = 0;
*sse = 0;
@@ -3887,6 +4064,11 @@
int64_t best_rd = INT64_MAX, this_rd;
int this_rate_tokenonly, this_rate, s;
int64_t this_distortion, this_sse;
+#if CONFIG_PVQ
+ od_rollback_buffer buf;
+
+ od_encode_checkpoint(&x->daala_enc, &buf);
+#endif
#if CONFIG_PALETTE
const int rows =
(4 * num_4x4_blocks_high_lookup[bsize]) >> (xd->plane[1].subsampling_y);
@@ -3925,8 +4107,12 @@
continue;
} else {
if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
- &this_sse, bsize, best_rd))
+ &this_sse, bsize, best_rd)) {
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &buf);
+#endif
continue;
+ }
}
this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mbmi->mode][mode];
if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode)
@@ -3934,8 +4120,12 @@
MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
#else
if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
- &this_sse, bsize, best_rd))
+ &this_sse, bsize, best_rd)) {
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &buf);
+#endif
continue;
+ }
this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mbmi->mode][mode];
#endif // CONFIG_EXT_INTRA
#if CONFIG_FILTER_INTRA
@@ -3949,7 +4139,21 @@
av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
#endif // CONFIG_PALETTE
+#if CONFIG_PVQ
+ // For chroma channels, multiply lambda by 0.5 when doing intra prediction
+ // NOTE: Chroma intra prediction itself has a separate RDO,
+ // though final chroma intra mode's D and R is simply added to
+ // those of luma then global RDO is performed to decide the modes of SB.
+ // Also, for chroma, the RDO cannot decide tx_size (follow luma's decision)
+ // or tx_type (DCT only), then only the intra prediction is
+ // chroma's own mode decision based on separate RDO.
+ // TODO(yushin) : Seek for more reasonable solution than this.
+ this_rd = RDCOST(x->rdmult >> (1 * PVQ_CHROMA_RD), x->rddiv, this_rate,
+ this_distortion);
+ od_encode_rollback(&x->daala_enc, &buf);
+#else
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+#endif
if (this_rd < best_rd) {
mode_selected = mode;
@@ -4296,7 +4500,9 @@
int64_t *distortion, int64_t *sse,
ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
int ir, int ic, int mi_row, int mi_col) {
+#if !CONFIG_PVQ
const AV1_COMMON *const cm = &cpi->common;
+#endif
int k;
MACROBLOCKD *xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
@@ -4315,9 +4521,15 @@
TX_SIZE tx_size = mi->mbmi.tx_size;
TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, tx_size);
- const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
const int num_4x4_w = tx_size_wide_unit[tx_size];
const int num_4x4_h = tx_size_high_unit[tx_size];
+#if !CONFIG_PVQ
+ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
+#else
+ (void)cpi;
+ (void)ta;
+ (void)tl;
+#endif
#if CONFIG_EXT_TX && CONFIG_RECT_TX
assert(IMPLIES(xd->lossless[mi->mbmi.segment_id], tx_size == TX_4X4));
@@ -4330,6 +4542,7 @@
av1_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
+#if !CONFIG_PVQ
#if CONFIG_AOM_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
aom_highbd_subtract_block(
@@ -4345,6 +4558,7 @@
av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
8, src, p->src.stride, dst, pd->dst.stride);
#endif // CONFIG_AOM_HIGHBITDEPTH
+#endif // !CONFIG_PVQ
k = i;
for (idy = 0; idy < height / 4; idy += num_4x4_h) {
@@ -4352,12 +4566,24 @@
int64_t dist, ssz, rd, rd1, rd2;
int block;
int coeff_ctx;
+#if CONFIG_PVQ
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ const int diff_stride = 8;
+ tran_low_t *coeff;
+ tran_low_t *dqcoeff;
+ tran_low_t *ref_coeff;
+ int16_t *pred = &pd->pred[4 * (ir * diff_stride + ic)];
+ int16_t *src_int16 = &p->src_int16[4 * (ir * diff_stride + ic)];
+ int ii, j, tx_blk_size;
+ int rate_pvq;
+#endif
k += (idy * 2 + idx);
if (tx_size == TX_4X4)
block = k;
else
block = (i ? 2 : 0);
-
+#if !CONFIG_PVQ
coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)), *(tl + (k >> 1)));
#if CONFIG_NEW_QUANT
av1_xform_quant_fp_nuq(cm, x, 0, block, idy + (i >> 1), idx + (i & 0x01),
@@ -4368,13 +4594,43 @@
#endif // CONFIG_NEW_QUANT
if (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)
av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
+#else
+ coeff = BLOCK_OFFSET(p->coeff, k);
+ dqcoeff = BLOCK_OFFSET(pd->dqcoeff, k);
+ ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, k);
+
+ // transform block size in pixels
+ tx_blk_size = 4;
+
+ // copy uint8 orig and predicted block to int16 buffer
+ // in order to use existing VP10 transform functions
+ for (j = 0; j < tx_blk_size; j++)
+ for (ii = 0; ii < tx_blk_size; ii++) {
+ src_int16[diff_stride * j + ii] =
+ src[src_stride * (j + 4 * idy) + (ii + 4 * idx)];
+ pred[diff_stride * j + ii] =
+ dst[dst_stride * (j + 4 * idy) + (ii + 4 * idx)];
+ }
+
+ fwd_txm4x4(src_int16, coeff, diff_stride);
+ fwd_txm4x4(pred, ref_coeff, diff_stride);
+
+ av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff,
+ &p->eobs[k], pd->dequant, 0, TX_4X4, tx_type,
+ &rate_pvq, x->pvq_speed, NULL);
+#endif
+
dist_block(cpi, x, 0, block, idy + (i >> 1), idx + (i & 0x1), tx_size,
&dist, &ssz);
thisdistortion += dist;
thissse += ssz;
+#if !CONFIG_PVQ
thisrate +=
av1_cost_coeffs(cm, x, 0, block, coeff_ctx, tx_size, scan_order->scan,
scan_order->neighbors, cpi->sf.use_fast_coef_costing);
+#else
+ thisrate += rate_pvq;
+#endif
*(ta + (k & 1)) = !(p->eobs[block] == 0);
*(tl + (k >> 1)) = !(p->eobs[block] == 0);
#if CONFIG_EXT_TX
@@ -4840,6 +5096,11 @@
const int has_second_rf = has_second_ref(mbmi);
const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf;
+
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
#if CONFIG_EXT_TX && CONFIG_RECT_TX
mbmi->tx_size =
xd->lossless[mbmi->segment_id] ? TX_4X4 : max_txsize_rect_lookup[bsize];
@@ -4897,6 +5158,11 @@
int mv_idx;
int_mv ref_mvs_sub8x8[2][2];
#endif // CONFIG_EXT_INTER
+#if CONFIG_PVQ
+ od_rollback_buffer idx_buf, post_buf;
+ od_encode_checkpoint(&x->daala_enc, &idx_buf);
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
@@ -5066,6 +5332,9 @@
sizeof(bsi->rdstat[index][mode_idx].ta));
memcpy(bsi->rdstat[index][mode_idx].tl, t_left,
sizeof(bsi->rdstat[index][mode_idx].tl));
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &idx_buf);
+#endif
// motion search for newmv (single predictor case only)
if (!has_second_rf &&
@@ -5449,6 +5718,9 @@
#endif
mode_selected = this_mode;
new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
}
continue;
}
@@ -5494,6 +5766,10 @@
#endif
mode_selected = this_mode;
new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
+
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &post_buf);
+#endif
}
} /*for each 4x4 mode*/
@@ -5507,12 +5783,18 @@
#endif // CONFIG_EXT_INTER
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
return INT64_MAX;
}
mode_idx = INTER_OFFSET(mode_selected);
memcpy(t_above, bsi->rdstat[index][mode_idx].ta, sizeof(t_above));
memcpy(t_left, bsi->rdstat[index][mode_idx].tl, sizeof(t_left));
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &post_buf);
+#endif
#if CONFIG_EXT_INTER
mv_idx = (mode_selected == NEWFROMNEARMV) ? 1 : 0;
@@ -5545,10 +5827,16 @@
#endif // CONFIG_EXT_INTER
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
return INT64_MAX;
}
}
} /* for each label */
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
bsi->r = br;
bsi->d = bd;
@@ -7476,8 +7764,10 @@
RD_STATS rd_stats_uv;
#endif
- // Y cost and distortion
+// Y cost and distortion
+#if !CONFIG_PVQ
av1_subtract_plane(x, bsize, 0);
+#endif
#if CONFIG_VAR_TX
if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
RD_STATS rd_stats_y;
@@ -8142,6 +8432,10 @@
int *mode_map = tile_data->mode_map[bsize];
const int mode_search_skip_flags = sf->mode_search_skip_flags;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf;
+#endif
+
#if CONFIG_PALETTE || CONFIG_EXT_INTRA
const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
const int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
@@ -8411,7 +8705,9 @@
x->use_default_inter_tx_type = 1;
else
x->use_default_inter_tx_type = 0;
-
+#if CONFIG_PVQ
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
#if CONFIG_EXT_INTER
for (i = 0; i < MB_MODE_COUNT; ++i)
for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
@@ -8436,6 +8732,9 @@
#if CONFIG_REF_MV
uint8_t ref_frame_type;
#endif
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
mode_index = mode_map[midx];
this_mode = av1_mode_order[mode_index].mode;
ref_frame = av1_mode_order[mode_index].ref_frame[0];
@@ -9846,6 +10145,11 @@
int ref_frame_skip_mask[2] = { 0 };
int internal_active_edge =
av1_active_edge_sb(cpi, mi_row, mi_col) && av1_internal_image_edge(cpi);
+#if CONFIG_PVQ
+ od_rollback_buffer pre_buf;
+
+ od_encode_checkpoint(&x->daala_enc, &pre_buf);
+#endif
#if CONFIG_SUPERTX
best_rd_so_far = INT64_MAX;
@@ -9924,6 +10228,10 @@
int this_skip2 = 0;
int64_t total_sse = INT_MAX;
+#if CONFIG_PVQ
+ od_encode_rollback(&x->daala_enc, &pre_buf);
+#endif
+
ref_frame = av1_ref_order[ref_index].ref_frame[0];
second_ref_frame = av1_ref_order[ref_index].ref_frame[1];