port ext_quant experiment into research branch
Change-Id: I46db3720e3dd75a8b74615824684f0dfb1d6b6df
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 02b450e..67af89d 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -983,6 +983,37 @@
rc_cfg->worst_allowed_q = extra_cfg->lossless ? 0 : cfg->rc_max_quantizer;
rc_cfg->qp = extra_cfg->qp;
+#if CONFIG_EXTQUANT
+ int offset_best_allowed_q;
+ int offset_worst_allowed_q;
+ int offset_qp;
+ switch (cfg->g_bit_depth) {
+ case AOM_BITS_8:
+ offset_best_allowed_q = 0;
+ offset_worst_allowed_q = 0;
+ offset_qp = 0;
+ break;
+ case AOM_BITS_10:
+ offset_best_allowed_q = qindex_10b_offset[rc_cfg->best_allowed_q != 0];
+ offset_worst_allowed_q = qindex_10b_offset[rc_cfg->worst_allowed_q != 0];
+ offset_qp = qindex_10b_offset[rc_cfg->qp != 0];
+ break;
+ case AOM_BITS_12:
+ offset_best_allowed_q = qindex_12b_offset[rc_cfg->best_allowed_q != 0];
+ offset_worst_allowed_q = qindex_12b_offset[rc_cfg->worst_allowed_q != 0];
+ offset_qp = qindex_12b_offset[rc_cfg->qp != 0];
+ break;
+ default:
+ offset_best_allowed_q = 0;
+ offset_worst_allowed_q = 0;
+ offset_qp = 0;
+ break;
+ }
+ rc_cfg->best_allowed_q += offset_best_allowed_q;
+ rc_cfg->worst_allowed_q += offset_worst_allowed_q;
+ rc_cfg->qp += offset_qp;
+#endif
+
rc_cfg->under_shoot_pct = cfg->rc_undershoot_pct;
rc_cfg->over_shoot_pct = cfg->rc_overshoot_pct;
rc_cfg->maximum_buffer_size_ms = is_vbr ? 240000 : cfg->rc_buf_sz;
@@ -1243,6 +1274,34 @@
(uint8_t)cfg->rc_superres_kf_denominator;
superres_cfg->superres_qthresh = cfg->rc_superres_qthresh;
superres_cfg->superres_kf_qthresh = cfg->rc_superres_kf_qthresh;
+#if CONFIG_EXTQUANT
+ int offset_superres_qthresh;
+ int offset_superres_kf_qthresh;
+ switch (cfg->g_bit_depth) {
+ case AOM_BITS_8:
+ offset_superres_qthresh = 0;
+ offset_superres_kf_qthresh = 0;
+ break;
+ case AOM_BITS_10:
+ offset_superres_qthresh =
+ qindex_10b_offset[superres_cfg->superres_qthresh != 0];
+ offset_superres_kf_qthresh =
+ qindex_10b_offset[superres_cfg->superres_kf_qthresh != 0];
+ break;
+ case AOM_BITS_12:
+ offset_superres_qthresh =
+ qindex_12b_offset[superres_cfg->superres_qthresh != 0];
+ offset_superres_kf_qthresh =
+ qindex_12b_offset[superres_cfg->superres_kf_qthresh != 0];
+ break;
+ default:
+ offset_superres_qthresh = 0;
+ offset_superres_kf_qthresh = 0;
+ break;
+ }
+ superres_cfg->superres_qthresh += offset_superres_qthresh;
+ superres_cfg->superres_kf_qthresh += offset_superres_kf_qthresh;
+#endif
if (superres_cfg->superres_mode == AOM_SUPERRES_FIXED &&
superres_cfg->superres_scale_denominator == SCALE_NUMERATOR &&
superres_cfg->superres_kf_scale_denominator == SCALE_NUMERATOR) {
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h
index 0f39579..74a754e 100644
--- a/av1/common/av1_common_int.h
+++ b/av1/common/av1_common_int.h
@@ -59,6 +59,14 @@
#define FRAME_ID_LENGTH 15
#define DELTA_FRAME_ID_LENGTH 14
+#if CONFIG_EXTQUANT
+#define DELTA_DCQUANT_BITS 5
+#define DELTA_DCQUANT_MAX (1 << (DELTA_DCQUANT_BITS - 2))
+#define DELTA_DCQUANT_MIN (DELTA_DCQUANT_MAX - (1 << DELTA_DCQUANT_BITS) + 1)
+#endif // CONFIG_EXTQUANT
+
+#define DEBUG_EXTQUANT 0
+
#define FRAME_CONTEXTS (FRAME_BUFFERS + 1)
// Extra frame context which is always kept at default values
#define FRAME_CONTEXT_DEFAULTS (FRAME_CONTEXTS - 1)
@@ -300,6 +308,10 @@
int subsampling_y; // Chroma subsampling for y
aom_chroma_sample_position_t chroma_sample_position;
uint8_t separate_uv_delta_q;
+#if CONFIG_EXTQUANT
+ int8_t base_y_dc_delta_q;
+ int8_t base_uv_dc_delta_q;
+#endif // CONFIG_EXTQUANT
uint8_t film_grain_params_present;
// Operating point info.
@@ -639,9 +651,15 @@
* shift/scale as TX.
*/
/**@{*/
+#if CONFIG_EXTQUANT
+ int32_t y_dequant_QTX[MAX_SEGMENTS][2]; /*!< Dequant for Y plane */
+ int32_t u_dequant_QTX[MAX_SEGMENTS][2]; /*!< Dequant for U plane */
+ int32_t v_dequant_QTX[MAX_SEGMENTS][2]; /*!< Dequant for V plane */
+#else
int16_t y_dequant_QTX[MAX_SEGMENTS][2]; /*!< Dequant for Y plane */
int16_t u_dequant_QTX[MAX_SEGMENTS][2]; /*!< Dequant for U plane */
int16_t v_dequant_QTX[MAX_SEGMENTS][2]; /*!< Dequant for V plane */
+#endif
/**@}*/
/**
@@ -1068,6 +1086,11 @@
#if CONFIG_LPF_MASK
int is_decoding;
#endif // CONFIG_LPF_MASK
+
+#if DEBUG_EXTQUANT
+ FILE *fEncCoeffLog;
+ FILE *fDecCoeffLog;
+#endif
} AV1_COMMON;
/*!\cond */
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 447c0ce..b61b431 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -277,21 +277,28 @@
add_proto qw/int64_t av1_block_error_lp/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size";
specialize qw/av1_block_error_lp avx2 neon/;
- add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_fp sse2 avx2 neon/;
+ if (aom_config("CONFIG_EXTQUANT") eq "yes") {
+ add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr, const int32_t *round_ptr, const int32_t *quant_ptr, const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr, const int32_t *round_ptr, const int32_t *quant_ptr, const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr, const int32_t *round_ptr, const int32_t *quant_ptr, const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void av1_quantize_lp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *round_ptr, const int32_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan";
+ add_proto qw/void aom_quantize_b_helper/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr, const int32_t *round_ptr, const int32_t *quant_ptr, const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale";
+ } else {
+ add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/av1_quantize_fp sse2 avx2 neon/;
- add_proto qw/void av1_quantize_lp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan";
- specialize qw/av1_quantize_lp avx2 neon/;
+ add_proto qw/void av1_quantize_lp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan";
+ specialize qw/av1_quantize_lp avx2 neon/;
+ add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/av1_quantize_fp_32x32 neon avx2/;
- add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_fp_32x32 neon avx2/;
+ add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/av1_quantize_fp_64x64 neon avx2/;
- add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_fp_64x64 neon avx2/;
-
- add_proto qw/void aom_quantize_b_helper/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale";
- specialize qw/aom_quantize_b_helper neon/;
+ add_proto qw/void aom_quantize_b_helper/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale";
+ specialize qw/aom_quantize_b_helper neon/;
+ }
# fdct functions
@@ -351,14 +358,23 @@
add_proto qw/void av1_highbd_apply_temporal_filter/, "const struct yv12_buffer_config *ref_frame, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int num_planes, const double *noise_levels, const MV *subblock_mvs, const int *subblock_mses, const int q_factor, const int filter_strength, const uint8_t *pred, uint32_t *accum, uint16_t *count";
specialize qw/av1_highbd_apply_temporal_filter sse2/;
}
- add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
+
+ if (aom_config("CONFIG_EXTQUANT") eq "yes") {
+ add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr, const int32_t *round_ptr, const int32_t *quant_ptr, const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
+ } else {
+ add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
+ }
# ENCODEMB INVOKE
add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/av1_highbd_block_error sse2 avx2/;
- add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
- specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
+ if (aom_config("CONFIG_EXTQUANT") eq "yes") {
+ add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr, const int32_t *round_ptr, const int32_t *quant_ptr, const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+ } else {
+ add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+ specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
+ }
add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/av1_highbd_fwht4x4 neon/;
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 80b72ce..b1759bb 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -430,7 +430,11 @@
// The dequantizers below are true dequantizers used only in the
// dequantization process. They have the same coefficient
// shift/scale as TX.
+#if CONFIG_EXTQUANT
+ int32_t seg_dequant_QTX[MAX_SEGMENTS][2];
+#else
int16_t seg_dequant_QTX[MAX_SEGMENTS][2];
+#endif
// Pointer to color index map of:
// - Current coding block, on encoder side.
// - Current superblock, on decoder side.
diff --git a/av1/common/quant_common.c b/av1/common/quant_common.c
index e96d71a..fcf2582 100644
--- a/av1/common/quant_common.c
+++ b/av1/common/quant_common.c
@@ -16,6 +16,8 @@
#include "av1/common/quant_common.h"
#include "av1/common/seg_common.h"
+// clang-format off
+#if !CONFIG_EXTQUANT
static const int16_t dc_qlookup_QTX[QINDEX_RANGE] = {
4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18,
19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30,
@@ -87,7 +89,119 @@
13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949,
19718, 20521, 21387,
};
+#endif // !CONFIG_EXTQUANT
+#if CONFIG_EXTQUANT
+// 32, q_index = 0
+// qstep = 40 * 2^((q_index - 1)/24) q_index in [1, 255]
+#if 0
+static const uint16_t ac_qlookup_QTX[QINDEX_RANGE_8_BITS] = {
+ 32, 40, 41, 42, 44, 45, 46, 48, 49, 50, 52,
+ 53, 55, 57, 58, 60, 62, 63, 65, 67, 69, 71,
+ 73, 76, 78, 80, 82, 85, 87, 90, 92, 95, 98,
+ 101, 104, 107, 110, 113, 116, 120, 123, 127, 131, 135,
+ 138, 143, 147, 151, 155, 160, 165, 170, 174, 180, 185,
+ 190, 196, 202, 207, 214, 220, 226, 233, 240, 247, 254,
+ 261, 269, 277, 285, 293, 302, 311, 320, 329, 339, 349,
+ 359, 370, 381, 392, 403, 415, 427, 440, 453, 466, 479,
+ 494, 508, 523, 538, 554, 570, 587, 604, 622, 640, 659,
+ 678, 698, 718, 739, 761, 783, 806, 830, 854, 879, 905,
+ 932, 959, 987, 1016, 1046, 1076, 1108, 1140, 1174, 1208, 1244,
+ 1280, 1318, 1356, 1396, 1437, 1479, 1522, 1567, 1613, 1660, 1709,
+ 1759, 1810, 1863, 1918, 1974, 2032, 2091, 2153, 2216, 2281, 2348,
+ 2416, 2487, 2560, 2635, 2712, 2792, 2874, 2958, 3044, 3134, 3225,
+ 3320, 3417, 3517, 3620, 3726, 3836, 3948, 4064, 4183, 4305, 4432,
+ 4561, 4695, 4833, 4974, 5120, 5270, 5424, 5583, 5747, 5915, 6089,
+ 6267, 6451, 6640, 6834, 7035, 7241, 7453, 7671, 7896, 8127, 8366,
+ 8611, 8863, 9123, 9390, 9665, 9948, 10240, 10540, 10849, 11167, 11494,
+ 11831, 12177, 12534, 12902, 13280, 13669, 14069, 14482, 14906, 15343, 15792,
+ 16255, 16731, 17222, 17726, 18246, 18780, 19331, 19897, 20480, 21080, 21698,
+ 22334, 22988, 23662, 24355, 25069, 25803, 26559, 27338, 28139, 28963, 29812,
+ 30685, 31584, 32510, 33463, 34443, 35452, 36491, 37560, 38661, 39794, 40960,
+ 42160, 43396, 44667, 45976, 47323, 48710, 50137, 51606, 53119, 54675, 56277,
+ 57926, 59624, 61371
+};
+#else
+#if 0
+// 32, q_index = 0
+// Q = 40 * 2^((q_index - 1)/24) q_index in [1, 24]
+// Q[(q_index - 1) % 24) + 1] * 2^((q_index-1)/24) q_index in [25, 255]
+static const uint16_t ac_qlookup_QTX[25] = {
+ 32, 40, 41, 42, 44, 45, 46, 48, 49, 50, 52,
+ 53, 55, 57, 58, 60, 62, 63, 65, 67, 69, 71,
+ 73, 76, 78
+};
+
+#ifndef NDEBUG
+static const uint16_t ac_qlookup_QTX_full[QINDEX_RANGE_8_BITS] = {
+ 32, 40, 41, 42, 44, 45, 46, 48, 49, 50, 52,
+ 53, 55, 57, 58, 60, 62, 63, 65, 67, 69, 71,
+ 73, 76, 78, 80, 82, 84, 88, 90, 92, 96, 98,
+ 100, 104, 106, 110, 114, 116, 120, 124, 126, 130, 134,
+ 138, 142, 146, 152, 156, 160, 164, 168, 176, 180, 184,
+ 192, 196, 200, 208, 212, 220, 228, 232, 240, 248, 252,
+ 260, 268, 276, 284, 292, 304, 312, 320, 328, 336, 352,
+ 360, 368, 384, 392, 400, 416, 424, 440, 456, 464, 480,
+ 496, 504, 520, 536, 552, 568, 584, 608, 624, 640, 656,
+ 672, 704, 720, 736, 768, 784, 800, 832, 848, 880, 912,
+ 928, 960, 992, 1008, 1040, 1072, 1104, 1136, 1168, 1216, 1248,
+ 1280, 1312, 1344, 1408, 1440, 1472, 1536, 1568, 1600, 1664, 1696,
+ 1760, 1824, 1856, 1920, 1984, 2016, 2080, 2144, 2208, 2272, 2336,
+ 2432, 2496, 2560, 2624, 2688, 2816, 2880, 2944, 3072, 3136, 3200,
+ 3328, 3392, 3520, 3648, 3712, 3840, 3968, 4032, 4160, 4288, 4416,
+ 4544, 4672, 4864, 4992, 5120, 5248, 5376, 5632, 5760, 5888, 6144,
+ 6272, 6400, 6656, 6784, 7040, 7296, 7424, 7680, 7936, 8064, 8320,
+ 8576, 8832, 9088, 9344, 9728, 9984, 10240, 10496, 10752, 11264, 11520,
+ 11776, 12288, 12544, 12800, 13312, 13568, 14080, 14592, 14848, 15360, 15872,
+ 16128, 16640, 17152, 17664, 18176, 18688, 19456, 19968, 20480, 20992, 21504,
+ 22528, 23040, 23552, 24576, 25088, 25600, 26624, 27136, 28160, 29184, 29696,
+ 30720, 31744, 32256, 33280, 34304, 35328, 36352, 37376, 38912, 39936, 40960,
+ 41984, 43008, 45056, 46080, 47104, 49152, 50176, 51200, 53248, 54272, 56320,
+ 58368, 59392, 61440
+};
+#endif // NDEBUG
+#endif // #if 0
+
+// 32, q_index = 0
+// Q = 2^((q_index + 127)/24) q_index in [1, 24]
+// Q[(q_index - 1) % 24) + 1] * 2^((q_index-1)/24) q_index in [25, 255]
+static const uint16_t ac_qlookup_QTX[25] = {
+ 32, 40, 41, 43, 44, 45, 47, 48, 49, 51, 52,
+ 54, 55, 57, 59, 60, 62, 64, 66, 68, 70, 72,
+ 74, 76, 78
+};
+
+#ifndef NDEBUG
+static const uint16_t ac_qlookup_QTX_full[QINDEX_RANGE_8_BITS] = {
+ 32, 40, 41, 43, 44, 45, 47, 48, 49, 51, 52,
+ 54, 55, 57, 59, 60, 62, 64, 66, 68, 70, 72,
+ 74, 76, 78, 80, 82, 86, 88, 90, 94, 96, 98,
+ 102, 104, 108, 110, 114, 118, 120, 124, 128, 132, 136,
+ 140, 144, 148, 152, 156, 160, 164, 172, 176, 180, 188,
+ 192, 196, 204, 208, 216, 220, 228, 236, 240, 248, 256,
+ 264, 272, 280, 288, 296, 304, 312, 320, 328, 344, 352,
+ 360, 376, 384, 392, 408, 416, 432, 440, 456, 472, 480,
+ 496, 512, 528, 544, 560, 576, 592, 608, 624, 640, 656,
+ 688, 704, 720, 752, 768, 784, 816, 832, 864, 880, 912,
+ 944, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248,
+ 1280, 1312, 1376, 1408, 1440, 1504, 1536, 1568, 1632, 1664, 1728,
+ 1760, 1824, 1888, 1920, 1984, 2048, 2112, 2176, 2240, 2304, 2368,
+ 2432, 2496, 2560, 2624, 2752, 2816, 2880, 3008, 3072, 3136, 3264,
+ 3328, 3456, 3520, 3648, 3776, 3840, 3968, 4096, 4224, 4352, 4480,
+ 4608, 4736, 4864, 4992, 5120, 5248, 5504, 5632, 5760, 6016, 6144,
+ 6272, 6528, 6656, 6912, 7040, 7296, 7552, 7680, 7936, 8192, 8448,
+ 8704, 8960, 9216, 9472, 9728, 9984, 10240, 10496, 11008, 11264, 11520,
+ 12032, 12288, 12544, 13056, 13312, 13824, 14080, 14592, 15104, 15360, 15872,
+ 16384, 16896, 17408, 17920, 18432, 18944, 19456, 19968, 20480, 20992, 22016,
+ 22528, 23040, 24064, 24576, 25088, 26112, 26624, 27648, 28160, 29184, 30208,
+ 30720, 31744, 32768, 33792, 34816, 35840, 36864, 37888, 38912, 39936, 40960,
+ 41984, 44032, 45056, 46080, 48128, 49152, 50176, 52224, 53248, 55296, 56320,
+ 58368, 60416, 61440
+};
+#endif // NDEBUG
+
+#endif
+#else
static const int16_t ac_qlookup_QTX[QINDEX_RANGE] = {
4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -110,7 +224,9 @@
1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537,
1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
};
+#endif
+#if !CONFIG_EXTQUANT
static const int16_t ac_qlookup_10_QTX[QINDEX_RANGE] = {
4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40,
44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92,
@@ -160,6 +276,8 @@
22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599,
28143, 28687, 29247,
};
+#endif
+// clang-format on
// Coefficient scaling and quantization with AV1 TX are tailored to
// the AV1 TX transforms. Regardless of the bit-depth of the input,
@@ -190,6 +308,68 @@
// addition, the minimum allowable quantizer is 4; smaller values will
// underflow to 0 in the actual quantization routines.
+#if CONFIG_EXTQUANT
+int32_t av1_dc_quant_QTX(int qindex, int delta, int base_dc_delta_q,
+ aom_bit_depth_t bit_depth) {
+ int q_clamped;
+ if ((qindex == 0) && (delta + base_dc_delta_q <= 0))
+ q_clamped = 0;
+ else
+ q_clamped = clamp(qindex + base_dc_delta_q + delta, 1,
+ bit_depth == AOM_BITS_8
+ ? MAXQ_8_BITS
+ : bit_depth == AOM_BITS_10 ? MAXQ_10_BITS : MAXQ);
+
+ if (q_clamped == 0) return (int32_t)ac_qlookup_QTX[q_clamped];
+
+ int qindex_offset = MAXQ_OFFSET * (bit_depth - 8);
+
+ // for 8 bit video, Q is calculated as
+ // 32, q_idx = 0
+ // Q = 2^((q_idx + 127)/24) q_idx in [1, 24]
+ // Q[(q_idx - 1) % 24) + 1] * 2^((q_idx-1)/24) q_idx in [25, 255]
+ if (q_clamped > MAXQ_8_BITS) {
+ switch (bit_depth) {
+ case AOM_BITS_8: assert(q_clamped <= MAXQ_8_BITS);
+ case AOM_BITS_10: {
+ int32_t Q;
+ if ((q_clamped - qindex_offset) < 25) {
+ Q = ac_qlookup_QTX[q_clamped - qindex_offset];
+ } else {
+ Q = ac_qlookup_QTX[(q_clamped - qindex_offset - 1) % 24 + 1]
+ << ((q_clamped - qindex_offset - 1) / 24);
+ assert(Q == ac_qlookup_QTX_full[q_clamped - qindex_offset]);
+ }
+ return 4 * Q;
+ }
+ case AOM_BITS_12: {
+ int32_t Q;
+ if ((q_clamped - qindex_offset) < 25) {
+ Q = ac_qlookup_QTX[q_clamped - qindex_offset];
+ } else {
+ Q = ac_qlookup_QTX[(q_clamped - qindex_offset - 1) % 24 + 1]
+ << ((q_clamped - qindex_offset - 1) / 24);
+ assert(Q == ac_qlookup_QTX_full[q_clamped - qindex_offset]);
+ }
+ return 16 * Q;
+ }
+ default:
+ assert(0 &&
+ "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
+ return -1;
+ }
+ } else {
+ int32_t Q;
+ if (q_clamped < 25) {
+ Q = ac_qlookup_QTX[q_clamped];
+ } else {
+ Q = ac_qlookup_QTX[((q_clamped - 1) % 24) + 1] << ((q_clamped - 1) / 24);
+ assert(Q == ac_qlookup_QTX_full[q_clamped]);
+ }
+ return Q;
+ }
+}
+#else
int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
const int q_clamped = clamp(qindex + delta, 0, MAXQ);
switch (bit_depth) {
@@ -201,7 +381,69 @@
return -1;
}
}
+#endif // CONFIG_EXTQUANT
+#if CONFIG_EXTQUANT
+int32_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
+ int q_clamped;
+ if ((qindex == 0) && (delta <= 0))
+ q_clamped = 0;
+ else
+ q_clamped = clamp(qindex + delta, 1,
+ bit_depth == AOM_BITS_8
+ ? MAXQ_8_BITS
+ : bit_depth == AOM_BITS_10 ? MAXQ_10_BITS : MAXQ);
+
+ if (q_clamped == 0) return (int32_t)ac_qlookup_QTX[q_clamped];
+
+ int qindex_offset = MAXQ_OFFSET * (bit_depth - 8);
+
+ // for 8 bit video, Q is calculated as
+ // 32, q_idx = 0
+ // Q = 2^((q_idx + 127)/24) q_idx in [1, 24]
+ // Q[(q_idx - 1) % 24) + 1] * 2^((q_idx-1)/24) q_idx in [25, 255]
+ if (q_clamped > MAXQ_8_BITS) {
+ switch (bit_depth) {
+ case AOM_BITS_8: assert(q_clamped <= MAXQ_8_BITS);
+ case AOM_BITS_10: {
+ int32_t Q;
+ if ((q_clamped - qindex_offset) < 25) {
+ Q = ac_qlookup_QTX[q_clamped - qindex_offset];
+ } else {
+ Q = ac_qlookup_QTX[(q_clamped - qindex_offset - 1) % 24 + 1]
+ << ((q_clamped - qindex_offset - 1) / 24);
+ assert(Q == ac_qlookup_QTX_full[q_clamped - qindex_offset]);
+ }
+ return 4 * Q;
+ }
+ case AOM_BITS_12: {
+ int32_t Q;
+ if ((q_clamped - qindex_offset) < 25) {
+ Q = ac_qlookup_QTX[q_clamped - qindex_offset];
+ } else {
+ Q = ac_qlookup_QTX[(q_clamped - qindex_offset - 1) % 24 + 1]
+ << ((q_clamped - qindex_offset - 1) / 24);
+ assert(Q == ac_qlookup_QTX_full[q_clamped - qindex_offset]);
+ }
+ return 16 * Q;
+ }
+ default:
+ assert(0 &&
+ "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
+ return -1;
+ }
+ } else {
+ int32_t Q;
+ if (q_clamped < 25) {
+ Q = ac_qlookup_QTX[q_clamped];
+ } else {
+ Q = ac_qlookup_QTX[((q_clamped - 1) % 24) + 1] << ((q_clamped - 1) / 24);
+ assert(Q == ac_qlookup_QTX_full[q_clamped]);
+ }
+ return Q;
+ }
+}
+#else
int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
const int q_clamped = clamp(qindex + delta, 0, MAXQ);
switch (bit_depth) {
@@ -213,13 +455,26 @@
return -1;
}
}
+#endif
int av1_get_qindex(const struct segmentation *seg, int segment_id,
- int base_qindex) {
+ int base_qindex
+#if CONFIG_EXTQUANT
+ ,
+ aom_bit_depth_t bit_depth
+#endif
+) {
if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
const int seg_qindex = base_qindex + data;
+#if CONFIG_EXTQUANT
+ return clamp(seg_qindex, 0,
+ bit_depth == AOM_BITS_8
+ ? MAXQ_8_BITS
+ : bit_depth == AOM_BITS_10 ? MAXQ_10_BITS : MAXQ);
+#else
return clamp(seg_qindex, 0, MAXQ);
+#endif
} else {
return base_qindex;
}
diff --git a/av1/common/quant_common.h b/av1/common/quant_common.h
index 9c30204..5cde276 100644
--- a/av1/common/quant_common.h
+++ b/av1/common/quant_common.h
@@ -22,10 +22,23 @@
extern "C" {
#endif
+#if CONFIG_EXTQUANT
+#define MINQ 0
+#define QINDEX_BITS 9
+#define QINDEX_BITS_UNEXT 8
+#define MAXQ_8_BITS 255
+#define MAXQ_OFFSET 24
+#define MAXQ (255 + 4 * MAXQ_OFFSET)
+#define MAXQ_10_BITS (255 + 2 * MAXQ_OFFSET)
+#define QINDEX_RANGE (MAXQ - MINQ + 1)
+#define QINDEX_RANGE_8_BITS (MAXQ_8_BITS - MINQ + 1)
+#define QINDEX_RANGE_10_BITS (MAXQ_10_BITS - MINQ + 1)
+#else
#define MINQ 0
#define MAXQ 255
-#define QINDEX_RANGE (MAXQ - MINQ + 1)
#define QINDEX_BITS 8
+#define QINDEX_RANGE (MAXQ - MINQ + 1)
+#endif
// Total number of QM sets stored
#define QM_LEVEL_BITS 4
#define NUM_QM_LEVELS (1 << QM_LEVEL_BITS)
@@ -41,11 +54,22 @@
struct CommonQuantParams;
struct macroblockd;
+#if CONFIG_EXTQUANT
+int32_t av1_dc_quant_QTX(int qindex, int delta, int base_dc_delta_q,
+ aom_bit_depth_t bit_depth);
+int32_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth);
+#else
int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth);
int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth);
+#endif
int av1_get_qindex(const struct segmentation *seg, int segment_id,
- int base_qindex);
+ int base_qindex
+#if CONFIG_EXTQUANT
+ ,
+ aom_bit_depth_t bit_depth
+#endif
+);
// Returns true if we are using quantization matrix.
bool av1_use_qmatrix(const struct CommonQuantParams *quant_params,
@@ -53,8 +77,21 @@
// Reduce the large number of quantizers to a smaller number of levels for which
// different matrices may be defined
-static INLINE int aom_get_qmlevel(int qindex, int first, int last) {
+static INLINE int aom_get_qmlevel(int qindex, int first, int last
+#if CONFIG_EXTQUANT
+ ,
+ aom_bit_depth_t bit_depth
+#endif
+) {
+#if CONFIG_EXTQUANT
+ return first + (qindex * (last + 1 - first)) /
+ (bit_depth == AOM_BITS_8
+ ? QINDEX_RANGE_8_BITS
+ : bit_depth == AOM_BITS_10 ? QINDEX_RANGE_10_BITS
+ : QINDEX_RANGE);
+#else
return first + (qindex * (last + 1 - first)) / QINDEX_RANGE;
+#endif
}
// Initialize all global quant/dequant matrices.
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 89806a8..f2fc8f2 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1329,8 +1329,13 @@
if (cm->delta_q_info.delta_q_present_flag) {
for (int i = 0; i < MAX_SEGMENTS; i++) {
+#if CONFIG_EXTQUANT
+ const int current_qindex = av1_get_qindex(
+ &cm->seg, i, xd->current_base_qindex, cm->seq_params.bit_depth);
+#else
const int current_qindex =
av1_get_qindex(&cm->seg, i, xd->current_base_qindex);
+#endif
const CommonQuantParams *const quant_params = &cm->quant_params;
for (int j = 0; j < num_planes; ++j) {
const int dc_delta_q = j == 0 ? quant_params->y_dc_delta_q
@@ -1339,8 +1344,13 @@
const int ac_delta_q = j == 0 ? 0
: (j == 1 ? quant_params->u_ac_delta_q
: quant_params->v_ac_delta_q);
- xd->plane[j].seg_dequant_QTX[i][0] = av1_dc_quant_QTX(
- current_qindex, dc_delta_q, cm->seq_params.bit_depth);
+ xd->plane[j].seg_dequant_QTX[i][0] =
+ av1_dc_quant_QTX(current_qindex, dc_delta_q,
+#if CONFIG_EXTQUANT
+ j == 0 ? cm->seq_params.base_y_dc_delta_q
+ : cm->seq_params.base_uv_dc_delta_q,
+#endif
+ cm->seq_params.bit_depth);
xd->plane[j].seg_dequant_QTX[i][1] = av1_ac_quant_QTX(
current_qindex, ac_delta_q, cm->seq_params.bit_depth);
}
@@ -2045,9 +2055,16 @@
static AOM_INLINE void setup_quantization(CommonQuantParams *quant_params,
int num_planes,
+ aom_bit_depth_t bit_depth,
bool separate_uv_delta_q,
struct aom_read_bit_buffer *rb) {
+#if CONFIG_EXTQUANT
+ quant_params->base_qindex = aom_rb_read_literal(
+ rb, bit_depth == AOM_BITS_8 ? QINDEX_BITS_UNEXT : QINDEX_BITS);
+#else
+ (void)bit_depth;
quant_params->base_qindex = aom_rb_read_literal(rb, QINDEX_BITS);
+#endif
quant_params->y_dc_delta_q = read_delta_q(rb);
if (num_planes > 1) {
int diff_uv_delta = 0;
@@ -2093,14 +2110,26 @@
for (int i = 0; i < max_segments; ++i) {
const int qindex = xd->qindex[i];
quant_params->y_dequant_QTX[i][0] =
- av1_dc_quant_QTX(qindex, quant_params->y_dc_delta_q, bit_depth);
+ av1_dc_quant_QTX(qindex, quant_params->y_dc_delta_q,
+#if CONFIG_EXTQUANT
+ cm->seq_params.base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ bit_depth);
quant_params->y_dequant_QTX[i][1] = av1_ac_quant_QTX(qindex, 0, bit_depth);
quant_params->u_dequant_QTX[i][0] =
- av1_dc_quant_QTX(qindex, quant_params->u_dc_delta_q, bit_depth);
+ av1_dc_quant_QTX(qindex, quant_params->u_dc_delta_q,
+#if CONFIG_EXTQUANT
+ cm->seq_params.base_uv_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ bit_depth);
quant_params->u_dequant_QTX[i][1] =
av1_ac_quant_QTX(qindex, quant_params->u_ac_delta_q, bit_depth);
quant_params->v_dequant_QTX[i][0] =
- av1_dc_quant_QTX(qindex, quant_params->v_dc_delta_q, bit_depth);
+ av1_dc_quant_QTX(qindex, quant_params->v_dc_delta_q,
+#if CONFIG_EXTQUANT
+ cm->seq_params.base_uv_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ bit_depth);
quant_params->v_dequant_QTX[i][1] =
av1_ac_quant_QTX(qindex, quant_params->v_ac_delta_q, bit_depth);
const int use_qmatrix = av1_use_qmatrix(quant_params, xd, i);
@@ -4354,6 +4383,12 @@
}
}
seq_params->separate_uv_delta_q = aom_rb_read_bit(rb);
+#if CONFIG_EXTQUANT
+ seq_params->base_y_dc_delta_q =
+ DELTA_DCQUANT_MIN + aom_rb_read_literal(rb, DELTA_DCQUANT_BITS);
+ seq_params->base_uv_dc_delta_q =
+ DELTA_DCQUANT_MIN + aom_rb_read_literal(rb, DELTA_DCQUANT_BITS);
+#endif // CONFIG_EXTQUANT
}
void av1_read_timing_info_header(aom_timing_info_t *timing_info,
@@ -5229,7 +5264,7 @@
}
CommonQuantParams *const quant_params = &cm->quant_params;
- setup_quantization(quant_params, av1_num_planes(cm),
+ setup_quantization(quant_params, av1_num_planes(cm), cm->seq_params.bit_depth,
cm->seq_params.separate_uv_delta_q, rb);
xd->bd = (int)seq_params->bit_depth;
@@ -5273,11 +5308,23 @@
xd->cur_frame_force_integer_mv = features->cur_frame_force_integer_mv;
for (int i = 0; i < MAX_SEGMENTS; ++i) {
+#if CONFIG_EXTQUANT
+ const int qindex = av1_get_qindex(&cm->seg, i, quant_params->base_qindex,
+ cm->seq_params.bit_depth);
+ xd->lossless[i] =
+ qindex == 0 &&
+ (quant_params->y_dc_delta_q + cm->seq_params.base_y_dc_delta_q <= 0) &&
+ (quant_params->u_dc_delta_q + cm->seq_params.base_uv_dc_delta_q <= 0) &&
+ quant_params->u_ac_delta_q <= 0 &&
+ (quant_params->v_dc_delta_q + cm->seq_params.base_uv_dc_delta_q <= 0) &&
+ quant_params->v_ac_delta_q <= 0;
+#else
const int qindex = av1_get_qindex(&cm->seg, i, quant_params->base_qindex);
xd->lossless[i] =
qindex == 0 && quant_params->y_dc_delta_q == 0 &&
quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
+#endif
xd->qindex[i] = qindex;
}
features->coded_lossless = is_coded_lossless(cm, xd);
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index b8f3a78..dcc2a7c 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -824,7 +824,15 @@
xd->current_base_qindex +=
read_delta_qindex(cm, xd, r, mbmi) * delta_q_info->delta_q_res;
/* Normative: Clamp to [1,MAXQ] to not interfere with lossless mode */
+#if CONFIG_EXTQUANT
+ xd->current_base_qindex = clamp(
+ xd->current_base_qindex, 1,
+ cm->seq_params.bit_depth == AOM_BITS_8
+ ? MAXQ_8_BITS
+ : cm->seq_params.bit_depth == AOM_BITS_10 ? MAXQ_10_BITS : MAXQ);
+#else
xd->current_base_qindex = clamp(xd->current_base_qindex, 1, MAXQ);
+#endif
FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
if (delta_q_info->delta_lf_present_flag) {
const int mi_row = xd->mi_row;
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index 347f064..5bdc0d4 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -171,6 +171,10 @@
aom_get_worker_interface()->init(&pbi->lf_worker);
pbi->lf_worker.thread_name = "aom lf worker";
+#if DEBUG_EXTQUANT
+ cm->fDecCoeffLog = fopen("DecCoeffLog.txt", "wt");
+#endif
+
return pbi;
}
@@ -248,6 +252,13 @@
#endif
av1_free_mc_tmp_buf(&pbi->td);
aom_img_metadata_array_free(pbi->metadata);
+
+#if DEBUG_EXTQUANT
+ if (pbi->common.fDecCoeffLog != NULL) {
+ fclose(pbi->common.fDecCoeffLog);
+ }
+#endif
+
aom_free(pbi);
}
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c
index d49809d..a969b45 100644
--- a/av1/decoder/decodetxb.c
+++ b/av1/decoder/decodetxb.c
@@ -50,7 +50,11 @@
return eob;
}
+#if CONFIG_EXTQUANT
+static INLINE int get_dqv(const int32_t *dequant, int coeff_idx,
+#else
static INLINE int get_dqv(const int16_t *dequant, int coeff_idx,
+#endif
const qm_val_t *iqmatrix) {
int dqv = dequant[!!coeff_idx];
if (iqmatrix != NULL)
@@ -120,7 +124,11 @@
const PLANE_TYPE plane_type = get_plane_type(plane);
MB_MODE_INFO *const mbmi = xd->mi[0];
struct macroblockd_plane *const pd = &xd->plane[plane];
+#if CONFIG_EXTQUANT
+ const int32_t *const dequant = pd->seg_dequant_QTX[mbmi->segment_id];
+#else
const int16_t *const dequant = pd->seg_dequant_QTX[mbmi->segment_id];
+#endif
tran_low_t *const tcoeffs = dcb->dqcoeff_block[plane] + dcb->cb_offset[plane];
const int shift = av1_get_tx_scale(tx_size);
const int bwl = get_txb_bwl(tx_size);
@@ -150,6 +158,13 @@
}
#endif
+#if DEBUG_EXTQUANT
+ fprintf(cm->fDecCoeffLog,
+ "\nmi_row = %d, mi_col = %d, blk_row = %d,"
+ " blk_col = %d, plane = %d, tx_size = %d ",
+ xd->mi_row, xd->mi_col, blk_row, blk_col, plane, tx_size);
+#endif
+
if (all_zero) {
*max_scan_line = 0;
if (plane == 0) {
@@ -245,6 +260,10 @@
((width + TX_PAD_HOR) * (height + TX_PAD_VER) + TX_PAD_END));
}
+#if DEBUG_EXTQUANT
+ fprintf(cm->fDecCoeffLog, "tx_type = %d, eob = %d\n", tx_type, *eob);
+#endif
+
{
// Read the non-zero coefficient with scan index eob-1
// TODO(angiebird): Put this into a function
@@ -307,8 +326,15 @@
tran_low_t dq_coeff;
// Bitmasking to clamp dq_coeff to valid range:
// The valid range for 8/10/12 bit video is at most 17/19/21 bit
+#if CONFIG_EXTQUANT
+ const int64_t dq_coeff_hp =
+ (int64_t)level * get_dqv(dequant, scan[c], iqmatrix) & 0xffffff;
+ dq_coeff =
+ (tran_low_t)(ROUND_POWER_OF_TWO_64(dq_coeff_hp, QUANT_TABLE_BITS));
+#else
dq_coeff = (tran_low_t)(
(int64_t)level * get_dqv(dequant, scan[c], iqmatrix) & 0xffffff);
+#endif // CONFIG_EXTQUANT
dq_coeff = dq_coeff >> shift;
if (sign) {
dq_coeff = -dq_coeff;
@@ -316,6 +342,12 @@
tcoeffs[pos] = clamp(dq_coeff, min_value, max_value);
}
}
+#if DEBUG_EXTQUANT
+ for (int c = 0; c < tx_size_wide[tx_size] * tx_size_high[tx_size]; c++) {
+ fprintf(cm->fDecCoeffLog, "%d ", tcoeffs[c]);
+ }
+ fprintf(cm->fDecCoeffLog, "\n\n");
+#endif
cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
diff --git a/av1/decoder/inspection.h b/av1/decoder/inspection.h
index b963f6a..9a17254 100644
--- a/av1/decoder/inspection.h
+++ b/av1/decoder/inspection.h
@@ -72,9 +72,15 @@
int mi_cols;
int tile_mi_rows;
int tile_mi_cols;
+#if CONFIG_EXTQUANT
+ int32_t y_dequant[MAX_SEGMENTS][2];
+ int32_t u_dequant[MAX_SEGMENTS][2];
+ int32_t v_dequant[MAX_SEGMENTS][2];
+#else
int16_t y_dequant[MAX_SEGMENTS][2];
int16_t u_dequant[MAX_SEGMENTS][2];
int16_t v_dequant[MAX_SEGMENTS][2];
+#endif
// TODO(negge): add per frame CDEF data
int delta_q_present_flag;
int delta_q_res;
diff --git a/av1/encoder/aq_complexity.c b/av1/encoder/aq_complexity.c
index f912095..4a45fc7 100644
--- a/av1/encoder/aq_complexity.c
+++ b/av1/encoder/aq_complexity.c
@@ -41,7 +41,8 @@
static int get_aq_c_strength(int q_index, aom_bit_depth_t bit_depth) {
// Approximate base quatizer (truncated to int)
- const int base_quant = av1_ac_quant_QTX(q_index, 0, bit_depth) / 4;
+ const int base_quant =
+ av1_ac_quant_QTX(q_index, 0, bit_depth) / (4 << QUANT_TABLE_BITS);
return (base_quant > 10) + (base_quant > 25);
}
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index a4ca9e5..0a2ba35 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -19,7 +19,12 @@
#include "aom_dsp/aom_dsp_common.h"
#include "aom_ports/system_state.h"
-CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
+CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols
+#if CONFIG_EXTQUANT
+ ,
+ aom_bit_depth_t bit_depth
+#endif
+) {
size_t last_coded_q_map_size;
CYCLIC_REFRESH *const cr = aom_calloc(1, sizeof(*cr));
if (cr == NULL) return NULL;
@@ -30,13 +35,30 @@
return NULL;
}
last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
- cr->last_coded_q_map = aom_malloc(last_coded_q_map_size);
+#if CONFIG_EXTQUANT
+ cr->last_coded_q_map = (uint16_t *)aom_malloc(last_coded_q_map_size);
+#else
+ cr->last_coded_q_map = (uint8_t *)aom_malloc(last_coded_q_map_size);
+#endif // CONFIG_EXTQUANT
if (cr->last_coded_q_map == NULL) {
av1_cyclic_refresh_free(cr);
return NULL;
}
+#if CONFIG_EXTQUANT
+ assert(bit_depth == AOM_BITS_8
+ ? (MAXQ_8_BITS <= (QINDEX_RANGE_8_BITS - 1))
+ : bit_depth == AOM_BITS_10
+ ? (MAXQ_10_BITS <= (QINDEX_RANGE_10_BITS - 1))
+ : (MAXQ <= (QINDEX_RANGE - 1)));
+ const uint16_t qinit = bit_depth == AOM_BITS_8
+ ? MAXQ_8_BITS
+ : bit_depth == AOM_BITS_10 ? MAXQ_10_BITS : MAXQ;
+ for (int i = 0; i < mi_rows * mi_cols; ++i) cr->last_coded_q_map[i] = qinit;
+#else
assert(MAXQ <= 255);
memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
+#endif
+
cr->avg_frame_low_motion = 0.0;
return cr;
}
@@ -283,8 +305,14 @@
// cpi->common.features.allow_screen_content_tools and use the same instead
// of cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN
int qindex_thresh = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN
+#if CONFIG_EXTQUANT
+ ? av1_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2,
+ cm->quant_params.base_qindex,
+ cm->seq_params.bit_depth)
+#else
? av1_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2,
cm->quant_params.base_qindex)
+#endif
: 0;
assert(mi_row >= 0 && mi_row < mi_params->mi_rows);
assert(mi_col >= 0 && mi_col < mi_params->mi_cols);
@@ -413,9 +441,17 @@
memset(seg_map, 0, cm->mi_params.mi_rows * cm->mi_params.mi_cols);
av1_disable_segmentation(&cm->seg);
if (cm->current_frame.frame_type == KEY_FRAME) {
+#if CONFIG_EXTQUANT
+ for (int i = 0; i <= (cm->mi_params.mi_rows * cm->mi_params.mi_cols); i++)
+ cr->last_coded_q_map[i] =
+ cm->seq_params.bit_depth == AOM_BITS_8
+ ? MAXQ_8_BITS
+ : cm->seq_params.bit_depth == AOM_BITS_10 ? MAXQ_10_BITS : MAXQ;
+#else
memset(cr->last_coded_q_map, MAXQ,
cm->mi_params.mi_rows * cm->mi_params.mi_cols *
sizeof(*cr->last_coded_q_map));
+#endif
cr->sb_index = 0;
}
return;
@@ -458,9 +494,18 @@
cr->qindex_delta[1] = qindex_delta;
// Compute rd-mult for segment BOOST1.
+#if CONFIG_EXTQUANT
+ const int qindex2 = clamp(
+ quant_params->base_qindex + quant_params->y_dc_delta_q + qindex_delta,
+ 0,
+ cm->seq_params.bit_depth == AOM_BITS_8
+ ? MAXQ_8_BITS
+ : cm->seq_params.bit_depth == AOM_BITS_10 ? MAXQ_10_BITS : MAXQ);
+#else
const int qindex2 = clamp(
quant_params->base_qindex + quant_params->y_dc_delta_q + qindex_delta,
0, MAXQ);
+#endif
cr->rdmult = av1_compute_rd_mult(cpi, qindex2);
av1_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta);
diff --git a/av1/encoder/aq_cyclicrefresh.h b/av1/encoder/aq_cyclicrefresh.h
index 82f2b25..93a408a 100644
--- a/av1/encoder/aq_cyclicrefresh.h
+++ b/av1/encoder/aq_cyclicrefresh.h
@@ -76,7 +76,11 @@
/*!
* Map of the last q a block was coded at.
*/
+#if CONFIG_EXTQUANT
+ uint16_t *last_coded_q_map;
+#else
uint8_t *last_coded_q_map;
+#endif // CONFIG_EXTQUANT
/*!
* Threshold applied to the projected rate of the coding block,
* when deciding whether block should be refreshed.
@@ -115,7 +119,12 @@
typedef struct CYCLIC_REFRESH CYCLIC_REFRESH;
-CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols);
+CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols
+#if CONFIG_EXTQUANT
+ ,
+ aom_bit_depth_t bit_depth
+#endif
+);
void av1_cyclic_refresh_free(CYCLIC_REFRESH *cr);
diff --git a/av1/encoder/arm/neon/quantize_neon.c b/av1/encoder/arm/neon/quantize_neon.c
index 4eadbbc..077d793 100644
--- a/av1/encoder/arm/neon/quantize_neon.c
+++ b/av1/encoder/arm/neon/quantize_neon.c
@@ -120,7 +120,11 @@
static INLINE void calculate_dqcoeff_lp_and_store(const int16x8_t qcoeff,
const int16x8_t dequant,
+#if CONFIG_EXTQUANT
+ int32_t *dqcoeff) {
+#else
int16_t *dqcoeff) {
+#endif
const int32x4_t dqcoeff_0 =
vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant));
const int32x4_t dqcoeff_1 =
@@ -129,11 +133,19 @@
vst1q_s16(dqcoeff, vcombine_s16(vmovn_s32(dqcoeff_0), vmovn_s32(dqcoeff_1)));
}
+#if CONFIG_EXTQUANT
+void av1_quantize_lp_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int32_t *round_ptr, const int32_t *quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan) {
+#else
void av1_quantize_lp_neon(const int16_t *coeff_ptr, intptr_t count,
const int16_t *round_ptr, const int16_t *quant_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan) {
+#endif
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
const int16x8_t v_zero = vdupq_n_s16(0);
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index be7a97a..1e04ea4 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -33,6 +33,15 @@
*eob_ptr = 0;
}
+#if CONFIG_EXTQUANT
+static void quantize_fp_helper_c(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr,
+ const int32_t *round_ptr, const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, int log_scale) {
+#else
static void quantize_fp_helper_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
@@ -40,6 +49,7 @@
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr, int log_scale) {
+#endif
int i, eob = -1;
const int rounding[2] = { ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
ROUND_POWER_OF_TWO(round_ptr[1], log_scale) };
@@ -48,6 +58,9 @@
(void)zbin_ptr;
(void)quant_shift_ptr;
(void)iscan;
+#if CONFIG_EXTQUANT
+ const int shift = 16 - log_scale + QUANT_FP_BITS;
+#endif
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -55,7 +68,12 @@
if (qm_ptr == NULL && iqm_ptr == NULL) {
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
+#if CONFIG_EXTQUANT
+ const int32_t thresh =
+ (int32_t)ROUND_POWER_OF_TWO(dequant_ptr[rc != 0], QUANT_TABLE_BITS);
+#else
const int32_t thresh = (int32_t)(dequant_ptr[rc != 0]);
+#endif
const int coeff = coeff_ptr[rc];
const int coeff_sign = AOMSIGN(coeff);
int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
@@ -63,11 +81,23 @@
if ((abs_coeff << (1 + log_scale)) >= thresh) {
abs_coeff =
clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX);
+#if CONFIG_EXTQUANT
+ tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (shift));
+#else
tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
+#endif
if (tmp32) {
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+#if CONFIG_EXTQUANT
+ const tran_low_t abs_dqcoeff =
+ (tran_low_t)ROUND_POWER_OF_TWO_64(
+ (tran_high_t)tmp32 * dequant_ptr[rc != 0],
+ QUANT_TABLE_BITS) >>
+ log_scale;
+#else
const tran_low_t abs_dqcoeff =
(tmp32 * dequant_ptr[rc != 0]) >> log_scale;
+#endif // CONFIG_EXTQUANT
dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
}
}
@@ -87,14 +117,32 @@
const int coeff_sign = AOMSIGN(coeff);
int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp32 = 0;
+#if CONFIG_EXTQUANT
+ if ((((tran_high_t)abs_coeff * wt) << QUANT_TABLE_BITS) >=
+ ((tran_high_t)dequant_ptr[rc != 0]
+ << (AOM_QM_BITS - (1 + log_scale)))) {
+#else
if (abs_coeff * wt >=
(dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
+#endif
abs_coeff += rounding[rc != 0];
abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
+#if CONFIG_EXTQUANT
+ tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
+ (shift + AOM_QM_BITS));
+#else
tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
(16 - log_scale + AOM_QM_BITS));
+#endif
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+#if CONFIG_EXTQUANT
+ const tran_low_t abs_dqcoeff =
+ (tran_low_t)ROUND_POWER_OF_TWO_64((tran_high_t)tmp32 * dequant,
+ QUANT_TABLE_BITS) >>
+ log_scale;
+#else
const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
+#endif
dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
}
@@ -104,6 +152,15 @@
*eob_ptr = eob + 1;
}
+#if CONFIG_EXTQUANT
+static void highbd_quantize_fp_helper_c(
+ const tran_low_t *coeff_ptr, intptr_t count, const int32_t *zbin_ptr,
+ const int32_t *round_ptr, const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, int log_scale) {
+#else
static void highbd_quantize_fp_helper_c(
const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
@@ -111,9 +168,10 @@
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr, int log_scale) {
+#endif
int i;
int eob = -1;
- const int shift = 16 - log_scale;
+ const int shift = 16 - log_scale + QUANT_FP_BITS;
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
(void)zbin_ptr;
@@ -134,14 +192,27 @@
const int coeff_sign = AOMSIGN(coeff);
const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int abs_qcoeff = 0;
+#if CONFIG_EXTQUANT
+ if ((((tran_high_t)abs_coeff * wt) << QUANT_TABLE_BITS) >=
+ ((tran_high_t)dequant_ptr[rc != 0]
+ << (AOM_QM_BITS - (1 + log_scale)))) {
+#else
if (abs_coeff * wt >=
(dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
+#endif
const int64_t tmp =
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
abs_qcoeff =
(int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+#if CONFIG_EXTQUANT
+ const tran_low_t abs_dqcoeff =
+ (tran_low_t)ROUND_POWER_OF_TWO_64((tran_high_t)abs_qcoeff * dequant,
+ QUANT_TABLE_BITS) >>
+ log_scale;
+#else
const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
+#endif
dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
if (abs_qcoeff) eob = i;
} else {
@@ -161,13 +232,25 @@
const int coeff_sign = AOMSIGN(coeff);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int log_scaled_round = log_scaled_round_arr[rc01];
+#if CONFIG_EXTQUANT
+ if (((tran_high_t)abs_coeff << (1 + log_scale + QUANT_TABLE_BITS)) >=
+ (tran_high_t)dequant_ptr[rc01]) {
+#else
if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
+#endif
const int quant = quant_ptr[rc01];
const int dequant = dequant_ptr[rc01];
const int64_t tmp = (int64_t)abs_coeff + log_scaled_round;
const int abs_qcoeff = (int)((tmp * quant) >> shift);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+#if CONFIG_EXTQUANT
+ const tran_low_t abs_dqcoeff =
+ (tran_low_t)ROUND_POWER_OF_TWO_64((tran_high_t)abs_qcoeff * dequant,
+ QUANT_TABLE_BITS) >>
+ log_scale;
+#else
const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
+#endif
if (abs_qcoeff) eob = i;
dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
} else {
@@ -179,22 +262,39 @@
*eob_ptr = eob + 1;
}
+#if CONFIG_EXTQUANT
+void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int32_t *zbin_ptr, const int32_t *round_ptr,
+ const int32_t *quant_ptr, const int32_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+#else
void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
+#endif
quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
eob_ptr, scan, iscan, NULL, NULL, 0);
}
+#if CONFIG_EXTQUANT
+void av1_quantize_lp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int32_t *round_ptr, const int32_t *quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan) {
+#else
void av1_quantize_lp_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *round_ptr, const int16_t *quant_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan) {
+#endif
int eob = -1;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
@@ -219,6 +319,15 @@
*eob_ptr = eob + 1;
}
+#if CONFIG_EXTQUANT
+void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int32_t *zbin_ptr, const int32_t *round_ptr,
+ const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+#else
void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
@@ -226,11 +335,21 @@
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
+#endif
quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
eob_ptr, scan, iscan, NULL, NULL, 1);
}
+#if CONFIG_EXTQUANT
+void av1_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int32_t *zbin_ptr, const int32_t *round_ptr,
+ const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+#else
void av1_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
@@ -238,6 +357,7 @@
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
+#endif
quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
eob_ptr, scan, iscan, NULL, NULL, 2);
@@ -348,12 +468,21 @@
}
}
+#if CONFIG_EXTQUANT
+static void quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
+ int skip_block, const int32_t *round_ptr,
+ const int32_t quant, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int32_t dequant_ptr,
+ uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+ const qm_val_t *iqm_ptr, const int log_scale) {
+#else
static void quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t quant, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
uint16_t *eob_ptr, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr, const int log_scale) {
+#endif
const int rc = 0;
const int coeff = coeff_ptr[rc];
const int coeff_sign = AOMSIGN(coeff);
@@ -371,10 +500,22 @@
const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
INT16_MIN, INT16_MAX);
+#if CONFIG_EXTQUANT
+ const int shift = 16 - log_scale + QUANT_FP_BITS;
+ tmp32 = (int32_t)((tmp * wt * quant) >> (shift + AOM_QM_BITS));
+#else
tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS));
+#endif
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+#if CONFIG_EXTQUANT
+ const tran_low_t abs_dqcoeff =
+ (tran_low_t)ROUND_POWER_OF_TWO_64((tran_high_t)tmp32 * dequant,
+ QUANT_TABLE_BITS) >>
+ log_scale;
+#else
const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
+#endif
dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
if (tmp32) eob = 0;
}
@@ -486,11 +627,19 @@
}
}
+#if CONFIG_EXTQUANT
+static INLINE void highbd_quantize_dc(
+ const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
+ const int32_t *round_ptr, const int32_t quant, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int32_t dequant_ptr, uint16_t *eob_ptr,
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale) {
+#else
static INLINE void highbd_quantize_dc(
const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale) {
+#endif
int eob = -1;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
@@ -504,13 +653,25 @@
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
const int64_t tmpw = tmp * wt;
+#if CONFIG_EXTQUANT
+ const int shift = 16 - log_scale + QUANT_FP_BITS;
+ const int abs_qcoeff = (int)((tmpw * quant) >> (shift + AOM_QM_BITS));
+#else
const int abs_qcoeff =
(int)((tmpw * quant) >> (16 - log_scale + AOM_QM_BITS));
+#endif
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
const int dequant =
(dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+#if CONFIG_EXTQUANT
+ const tran_low_t abs_dqcoeff =
+ (tran_low_t)ROUND_POWER_OF_TWO_64((tran_high_t)abs_qcoeff * dequant,
+ QUANT_TABLE_BITS) >>
+ log_scale;
+#else
const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
+#endif
dqcoeff_ptr[0] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
if (abs_qcoeff) eob = 0;
}
@@ -535,6 +696,16 @@
qparam->log_scale);
}
+#if CONFIG_EXTQUANT
+void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
+ const int32_t *zbin_ptr, const int32_t *round_ptr,
+ const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan,
+ int log_scale) {
+#else
void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
@@ -543,28 +714,55 @@
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan,
int log_scale) {
+#endif
highbd_quantize_fp_helper_c(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
dequant_ptr, eob_ptr, scan, iscan, NULL, NULL,
log_scale);
}
+#if CONFIG_EXTQUANT
+static void invert_quant(int32_t *quant, int32_t *shift, int d) {
+#else
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
+#endif
uint32_t t;
int l, m;
t = d;
for (l = 0; t > 1; l++) t >>= 1;
m = 1 + (1 << (16 + l)) / d;
+#if CONFIG_EXTQUANT
+ *quant = (int32_t)(m - (1 << 16));
+ *shift = 1 << (16 - l + QUANT_TABLE_BITS);
+#else
*quant = (int16_t)(m - (1 << 16));
*shift = 1 << (16 - l);
+#endif
}
-static int get_qzbin_factor(int q, aom_bit_depth_t bit_depth) {
- const int quant = av1_dc_quant_QTX(q, 0, bit_depth);
+static int get_qzbin_factor(int q,
+#if CONFIG_EXTQUANT
+ int base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ aom_bit_depth_t bit_depth) {
+ const int quant = av1_dc_quant_QTX(q, 0,
+#if CONFIG_EXTQUANT
+ base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ bit_depth);
switch (bit_depth) {
+#if CONFIG_EXTQUANT
+ case AOM_BITS_8:
+ return q == 0 ? 64 : (quant < (148 << QUANT_TABLE_BITS) ? 84 : 80);
+ case AOM_BITS_10:
+ return q == 0 ? 64 : (quant < (592 << QUANT_TABLE_BITS) ? 84 : 80);
+ case AOM_BITS_12:
+ return q == 0 ? 64 : (quant < (2368 << QUANT_TABLE_BITS) ? 84 : 80);
+#else
case AOM_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80);
case AOM_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80);
case AOM_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
+#endif
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1;
@@ -573,14 +771,78 @@
void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
int u_dc_delta_q, int u_ac_delta_q, int v_dc_delta_q,
- int v_ac_delta_q, QUANTS *const quants,
- Dequants *const deq) {
+ int v_ac_delta_q,
+#if CONFIG_EXTQUANT
+ int base_y_dc_delta_q, int base_uv_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ QUANTS *const quants, Dequants *const deq) {
int i, q, quant_QTX;
- for (q = 0; q < QINDEX_RANGE; q++) {
- const int qzbin_factor = get_qzbin_factor(q, bit_depth);
- const int qrounding_factor = q == 0 ? 64 : 48;
+#if CONFIG_EXTQUANT
+ int qindex_range =
+ (bit_depth == AOM_BITS_8
+ ? QINDEX_RANGE_8_BITS
+ : bit_depth == AOM_BITS_10 ? QINDEX_RANGE_10_BITS : QINDEX_RANGE);
+#else
+ int qindex_range = QINDEX_RANGE;
+#endif
+ for (q = 0; q < qindex_range; q++) {
+ const int qrounding_factor = q == 0 ? 64 : 48;
+#if CONFIG_EXTQUANT
+ const int qzbin_factor = get_qzbin_factor(q, base_y_dc_delta_q, bit_depth);
+ for (i = 0; i < 2; ++i) {
+ int qrounding_factor_fp = 64;
+ // y quantizer with TX scale
+ quant_QTX = i == 0 ? av1_dc_quant_QTX(q, y_dc_delta_q, base_y_dc_delta_q,
+ bit_depth)
+ : av1_ac_quant_QTX(q, 0, bit_depth);
+ invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i],
+ quant_QTX);
+ quants->y_quant_fp[q][i] =
+ (1 << (16 + QUANT_FP_BITS + QUANT_TABLE_BITS)) / quant_QTX;
+ quants->y_round_fp[q][i] =
+ (qrounding_factor_fp * quant_QTX) >> (7 + QUANT_TABLE_BITS);
+ quants->y_zbin[q][i] =
+ ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, (7 + QUANT_TABLE_BITS));
+ quants->y_round[q][i] =
+ (qrounding_factor * quant_QTX) >> (7 + QUANT_TABLE_BITS);
+ deq->y_dequant_QTX[q][i] = quant_QTX;
+
+ // u quantizer with TX scale
+ quant_QTX = i == 0 ? av1_dc_quant_QTX(q, u_dc_delta_q, base_uv_dc_delta_q,
+ bit_depth)
+ : av1_ac_quant_QTX(q, u_ac_delta_q, bit_depth);
+ invert_quant(&quants->u_quant[q][i], &quants->u_quant_shift[q][i],
+ quant_QTX);
+ quants->u_quant_fp[q][i] =
+ (1 << (16 + QUANT_FP_BITS + QUANT_TABLE_BITS)) / quant_QTX;
+ quants->u_round_fp[q][i] =
+ (qrounding_factor_fp * quant_QTX) >> (7 + QUANT_TABLE_BITS);
+ quants->u_zbin[q][i] =
+ ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, (7 + QUANT_TABLE_BITS));
+ quants->u_round[q][i] =
+ (qrounding_factor * quant_QTX) >> (7 + QUANT_TABLE_BITS);
+ deq->u_dequant_QTX[q][i] = quant_QTX;
+
+ // v quantizer with TX scale
+ quant_QTX = i == 0 ? av1_dc_quant_QTX(q, v_dc_delta_q, base_uv_dc_delta_q,
+ bit_depth)
+ : av1_ac_quant_QTX(q, v_ac_delta_q, bit_depth);
+ invert_quant(&quants->v_quant[q][i], &quants->v_quant_shift[q][i],
+ quant_QTX);
+ quants->v_quant_fp[q][i] =
+ (1 << (16 + QUANT_FP_BITS + QUANT_TABLE_BITS)) / quant_QTX;
+ quants->v_round_fp[q][i] =
+ (qrounding_factor_fp * quant_QTX) >> (7 + QUANT_TABLE_BITS);
+ quants->v_zbin[q][i] =
+ ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, (7 + QUANT_TABLE_BITS));
+ quants->v_round[q][i] =
+ (qrounding_factor * quant_QTX) >> (7 + QUANT_TABLE_BITS);
+ deq->v_dequant_QTX[q][i] = quant_QTX;
+ }
+#else
+ const int qzbin_factor = get_qzbin_factor(q, bit_depth);
for (i = 0; i < 2; ++i) {
int qrounding_factor_fp = 64;
// y quantizer with TX scale
@@ -616,7 +878,7 @@
quants->v_round[q][i] = (qrounding_factor * quant_QTX) >> 7;
deq->v_dequant_QTX[q][i] = quant_QTX;
}
-
+#endif
for (i = 2; i < 8; i++) { // 8: SIMD width
quants->y_quant[q][i] = quants->y_quant[q][1];
quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
@@ -644,14 +906,18 @@
}
}
-void av1_init_quantizer(EncQuantDequantParams *const enc_quant_dequant_params,
- const CommonQuantParams *quant_params,
- aom_bit_depth_t bit_depth) {
+void av1_init_quantizer(SequenceHeader *seq_params,
+ EncQuantDequantParams *const enc_quant_dequant_params,
+ const CommonQuantParams *quant_params) {
QUANTS *const quants = &enc_quant_dequant_params->quants;
Dequants *const dequants = &enc_quant_dequant_params->dequants;
- av1_build_quantizer(bit_depth, quant_params->y_dc_delta_q,
+ av1_build_quantizer(seq_params->bit_depth, quant_params->y_dc_delta_q,
quant_params->u_dc_delta_q, quant_params->u_ac_delta_q,
quant_params->v_dc_delta_q, quant_params->v_ac_delta_q,
+#if CONFIG_EXTQUANT
+ seq_params->base_y_dc_delta_q,
+ seq_params->base_uv_dc_delta_q,
+#endif // CONFIG_EXTQUANT
quants, dequants);
}
@@ -662,13 +928,26 @@
MACROBLOCKD *const xd = &x->e_mbd;
const QUANTS *const quants = &cpi->enc_quant_dequant_params.quants;
const Dequants *const dequants = &cpi->enc_quant_dequant_params.dequants;
-
+#if CONFIG_EXTQUANT
+ int current_qindex =
+ AOMMAX(0, AOMMIN(cm->seq_params.bit_depth == AOM_BITS_8
+ ? QINDEX_RANGE_8_BITS - 1
+ : cm->seq_params.bit_depth == AOM_BITS_10
+ ? QINDEX_RANGE_10_BITS - 1
+ : QINDEX_RANGE - 1,
+ cm->delta_q_info.delta_q_present_flag
+ ? quant_params->base_qindex + x->delta_qindex
+ : quant_params->base_qindex));
+ const int qindex = av1_get_qindex(&cm->seg, segment_id, current_qindex,
+ cm->seq_params.bit_depth);
+#else
const int current_qindex = AOMMAX(
0,
AOMMIN(QINDEX_RANGE - 1, cm->delta_q_info.delta_q_present_flag
? quant_params->base_qindex + x->delta_qindex
: quant_params->base_qindex));
const int qindex = av1_get_qindex(&cm->seg, segment_id, current_qindex);
+#endif
const int rdmult =
av1_compute_rd_mult(cpi, qindex + quant_params->y_dc_delta_q);
const int use_qmatrix = av1_use_qmatrix(quant_params, xd, segment_id);
@@ -725,6 +1004,32 @@
x->seg_skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->qindex = qindex;
+#if DEBUG_EXTQUANT
+ fprintf(cm->fEncCoeffLog, "\ninit_plane_quantizers\n");
+ fprintf(cm->fEncCoeffLog, "qindex = %d\n", qindex);
+ fprintf(cm->fEncCoeffLog, "\nquant_QTX = [%d, %d, %d]",
+ x->plane[0].quant_QTX[0], x->plane[1].quant_QTX[0],
+ x->plane[2].quant_QTX[0]);
+ fprintf(cm->fEncCoeffLog, "\nquant_fp_QTX = [%d, %d, %d]",
+ x->plane[0].quant_fp_QTX[0], x->plane[1].quant_fp_QTX[0],
+ x->plane[2].quant_fp_QTX[0]);
+ fprintf(cm->fEncCoeffLog, "\nround_fp_QTX = [%d, %d, %d]",
+ x->plane[0].round_fp_QTX[0], x->plane[1].round_fp_QTX[0],
+ x->plane[2].round_fp_QTX[0]);
+ fprintf(cm->fEncCoeffLog, "\nquant_shift_QTX = [%d, %d, %d]",
+ x->plane[0].quant_shift_QTX[0], x->plane[1].quant_shift_QTX[0],
+ x->plane[2].quant_shift_QTX[0]);
+ fprintf(cm->fEncCoeffLog, "\nzbin_QTX = [%d, %d, %d]",
+ x->plane[0].zbin_QTX[0], x->plane[1].zbin_QTX[0],
+ x->plane[2].zbin_QTX[0]);
+ fprintf(cm->fEncCoeffLog, "\nround_QTX = [%d, %d, %d]",
+ x->plane[0].round_QTX[0], x->plane[1].round_QTX[0],
+ x->plane[2].round_QTX[0]);
+ fprintf(cm->fEncCoeffLog, "\ndequant_QTX = [%d, %d, %d]\n",
+ x->plane[0].dequant_QTX[0], x->plane[1].dequant_QTX[0],
+ x->plane[2].dequant_QTX[0]);
+#endif
+
MvCosts *mv_costs = &x->mv_costs;
av1_set_error_per_bit(mv_costs, rdmult);
av1_set_sad_per_bit(cpi, mv_costs, qindex);
@@ -736,37 +1041,145 @@
av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
}
+void set_frame_dc_delta_q(const AV1_COMMON *const cm, int *y_dc_delta_q,
+ int enable_chroma_deltaq, int *u_dc_delta_q,
+ int *v_dc_delta_q, int *u_ac_delta_q,
+ int *v_ac_delta_q) {
+ (void)cm;
+ *y_dc_delta_q = 0;
+ *u_dc_delta_q = 0;
+ *v_dc_delta_q = 0;
+ *u_ac_delta_q = 0;
+ *v_ac_delta_q = 0;
+#if CONFIG_EXTQUANT
+ if (frame_is_intra_only(cm)) {
+ enable_chroma_deltaq = 1;
+ const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360;
+ const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
+ if (!is_360p_or_larger) {
+ *y_dc_delta_q = 0;
+ if (enable_chroma_deltaq) {
+ *u_dc_delta_q = *v_dc_delta_q = 0;
+ }
+ } else if (!is_720p_or_larger) {
+ *y_dc_delta_q = -2;
+ if (enable_chroma_deltaq) {
+ *u_dc_delta_q = *v_dc_delta_q = -1;
+ }
+ } else {
+ *y_dc_delta_q = -4;
+ if (enable_chroma_deltaq) {
+ *u_dc_delta_q = *v_dc_delta_q = -2;
+ }
+ }
+ }
+#else
+ if (enable_chroma_deltaq) {
+ // TODO(aomedia:2717): need to design better delta
+ *u_ac_delta_q = 2;
+ *v_ac_delta_q = 2;
+ *u_dc_delta_q = 2;
+ *v_dc_delta_q = 2;
+ } else {
+ *u_ac_delta_q = 0;
+ *v_ac_delta_q = 0;
+ *u_dc_delta_q = 0;
+ *v_dc_delta_q = 0;
+ }
+#endif // CONFIG_EXTQUANT
+}
+
void av1_set_quantizer(AV1_COMMON *const cm, int min_qmlevel, int max_qmlevel,
int q, int enable_chroma_deltaq) {
// quantizer has to be reinitialized with av1_init_quantizer() if any
// delta_q changes.
CommonQuantParams *quant_params = &cm->quant_params;
quant_params->base_qindex = AOMMAX(cm->delta_q_info.delta_q_present_flag, q);
+ set_frame_dc_delta_q(cm, &quant_params->y_dc_delta_q, enable_chroma_deltaq,
+ &quant_params->u_dc_delta_q, &quant_params->v_dc_delta_q,
+ &quant_params->u_ac_delta_q,
+ &quant_params->v_ac_delta_q);
- quant_params->y_dc_delta_q = 0;
- if (enable_chroma_deltaq) {
- // TODO(aomedia:2717): need to design better delta
- quant_params->u_dc_delta_q = 2;
- quant_params->u_ac_delta_q = 2;
- quant_params->v_dc_delta_q = 2;
- quant_params->v_ac_delta_q = 2;
- } else {
- quant_params->u_dc_delta_q = 0;
- quant_params->u_ac_delta_q = 0;
- quant_params->v_dc_delta_q = 0;
- quant_params->v_ac_delta_q = 0;
- }
-
+#if CONFIG_EXTQUANT
+ quant_params->qmatrix_level_y =
+ aom_get_qmlevel(quant_params->base_qindex, min_qmlevel, max_qmlevel,
+ cm->seq_params.bit_depth);
+ quant_params->qmatrix_level_u =
+ aom_get_qmlevel(quant_params->base_qindex + quant_params->u_ac_delta_q,
+ min_qmlevel, max_qmlevel, cm->seq_params.bit_depth);
+#else
quant_params->qmatrix_level_y =
aom_get_qmlevel(quant_params->base_qindex, min_qmlevel, max_qmlevel);
quant_params->qmatrix_level_u =
aom_get_qmlevel(quant_params->base_qindex + quant_params->u_ac_delta_q,
min_qmlevel, max_qmlevel);
+#endif
if (!cm->seq_params.separate_uv_delta_q)
quant_params->qmatrix_level_v = quant_params->qmatrix_level_u;
else
+#if CONFIG_EXTQUANT
+ quant_params->qmatrix_level_v =
+ aom_get_qmlevel(quant_params->base_qindex + quant_params->v_ac_delta_q,
+ min_qmlevel, max_qmlevel, cm->seq_params.bit_depth);
+#else
quant_params->qmatrix_level_v =
aom_get_qmlevel(quant_params->base_qindex + quant_params->v_ac_delta_q,
min_qmlevel, max_qmlevel);
+#endif
+}
+
+// Table that converts 0-63 Q-range values passed in outside to the Qindex
+// range used internally.
+// clang-format off
+static const int quantizer_to_qindex[] = {
+ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
+ 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
+ 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
+ 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
+ 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
+};
+// clang-format on
+
+int av1_quantizer_to_qindex(int quantizer
+#if CONFIG_EXTQUANT
+ ,
+ aom_bit_depth_t bit_depth
+#endif
+) {
+#if CONFIG_EXTQUANT
+ assert(quantizer <= 63);
+ switch (bit_depth) {
+ case AOM_BITS_8: return quantizer_to_qindex[quantizer];
+ case AOM_BITS_10:
+ return (quantizer_to_qindex[quantizer] +
+ qindex_10b_offset[quantizer != 0]);
+ case AOM_BITS_12:
+ return (quantizer_to_qindex[quantizer] +
+ qindex_12b_offset[quantizer != 0]);
+ default:
+ assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
+ return -1;
+ }
+#else
+ return quantizer_to_qindex[quantizer];
+#endif // CONFIG_EXTQUANT
+}
+
+int av1_qindex_to_quantizer(int qindex
+#if CONFIG_EXTQUANT
+ ,
+ aom_bit_depth_t bit_depth
+#endif
+) {
+ int quantizer;
+ for (quantizer = 0; quantizer < 64; ++quantizer)
+#if CONFIG_EXTQUANT
+ if (av1_quantizer_to_qindex(quantizer, bit_depth) >= qindex)
+ return quantizer;
+#else
+ if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
+#endif
+
+ return 63;
}
diff --git a/av1/encoder/av1_quantize.h b/av1/encoder/av1_quantize.h
index ec012d3..1d1d0c0 100644
--- a/av1/encoder/av1_quantize.h
+++ b/av1/encoder/av1_quantize.h
@@ -42,11 +42,49 @@
const SCAN_ORDER *sc,
const QUANT_PARAM *qparam);
+#if CONFIG_EXTQUANT
+#define QUANT_FP_BITS 4
+static const int qindex_10b_offset[] = {
+ 0,
+ 48,
+};
+static const int qindex_12b_offset[] = {
+ 0,
+ 96,
+};
+#else
+#define QUANT_FP_BITS 0
+#endif // CONFIG_EXTQUANT
+
// The QUANTS structure is used only for internal quantizer setup in
// av1_quantize.c.
// All of its fields use the same coefficient shift/scaling at TX.
typedef struct {
// 0: dc 1: ac 2-8: ac repeated to SIMD width
+#if CONFIG_EXTQUANT
+ DECLARE_ALIGNED(32, int32_t, y_quant[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, y_quant_shift[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, y_zbin[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, y_round[QINDEX_RANGE][8]);
+
+ // TODO(jingning): in progress of re-working the quantization. will decide
+ // if we want to deprecate the current use of y_quant.
+ DECLARE_ALIGNED(32, int32_t, y_quant_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, u_quant_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, v_quant_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, y_round_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, u_round_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, v_round_fp[QINDEX_RANGE][8]);
+
+ DECLARE_ALIGNED(32, int32_t, u_quant[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, v_quant[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, u_quant_shift[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, v_quant_shift[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, u_zbin[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, v_zbin[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, u_round[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(32, int32_t, v_round[QINDEX_RANGE][8]);
+#else
DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);
@@ -69,6 +107,7 @@
DECLARE_ALIGNED(16, int16_t, v_zbin[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, u_round[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, v_round[QINDEX_RANGE][8]);
+#endif
} QUANTS;
// The Dequants structure is used only for internal quantizer setup in
@@ -76,12 +115,21 @@
// Fields are suffixed according to whether or not they're expressed in
// the same coefficient shift/precision as TX or a fixed Q3 format.
typedef struct {
+#if CONFIG_EXTQUANT
+ DECLARE_ALIGNED(32, int32_t,
+ y_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width
+ DECLARE_ALIGNED(32, int32_t,
+ u_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width
+ DECLARE_ALIGNED(32, int32_t,
+ v_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width
+#else
DECLARE_ALIGNED(16, int16_t,
y_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width
DECLARE_ALIGNED(16, int16_t,
u_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width
DECLARE_ALIGNED(16, int16_t,
v_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width
+#endif
} Dequants;
typedef struct {
@@ -101,16 +149,33 @@
void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
int u_dc_delta_q, int u_ac_delta_q, int v_dc_delta_q,
- int v_ac_delta_q, QUANTS *const quants,
- Dequants *const deq);
+ int v_ac_delta_q,
+#if CONFIG_EXTQUANT
+ int base_y_dc_delta_q, int base_uv_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ QUANTS *const quants, Dequants *const deq);
-void av1_init_quantizer(EncQuantDequantParams *const enc_quant_dequant_params,
- const CommonQuantParams *quant_params,
- aom_bit_depth_t bit_depth);
+void av1_init_quantizer(SequenceHeader *seq_params,
+ EncQuantDequantParams *const enc_quant_dequant_params,
+ const CommonQuantParams *quant_params);
void av1_set_quantizer(struct AV1Common *const cm, int min_qmlevel,
int max_qmlevel, int q, int enable_chroma_deltaq);
+int av1_quantizer_to_qindex(int quantizer
+#if CONFIG_EXTQUANT
+ ,
+ aom_bit_depth_t bit_depth
+#endif
+);
+
+int av1_qindex_to_quantizer(int qindex
+#if CONFIG_EXTQUANT
+ ,
+ aom_bit_depth_t bit_depth
+#endif
+);
+
void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 4b4961f..59347a8 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2351,8 +2351,16 @@
static AOM_INLINE void encode_quantization(
const CommonQuantParams *const quant_params, int num_planes,
- bool separate_uv_delta_q, struct aom_write_bit_buffer *wb) {
+ aom_bit_depth_t bit_depth, bool separate_uv_delta_q,
+ struct aom_write_bit_buffer *wb) {
+#if CONFIG_EXTQUANT
+ aom_wb_write_literal(
+ wb, quant_params->base_qindex,
+ bit_depth == AOM_BITS_8 ? QINDEX_BITS_UNEXT : QINDEX_BITS);
+#else
+ (void)bit_depth;
aom_wb_write_literal(wb, quant_params->base_qindex, QINDEX_BITS);
+#endif
write_delta_q(wb, quant_params->y_dc_delta_q);
if (num_planes > 1) {
int diff_uv_delta =
@@ -2743,6 +2751,16 @@
}
}
aom_wb_write_bit(wb, seq_params->separate_uv_delta_q);
+#if CONFIG_EXTQUANT
+ assert(seq_params->base_y_dc_delta_q <= DELTA_DCQUANT_MAX);
+ assert(seq_params->base_uv_dc_delta_q >= DELTA_DCQUANT_MIN);
+ aom_wb_write_unsigned_literal(
+ wb, seq_params->base_y_dc_delta_q - DELTA_DCQUANT_MIN,
+ DELTA_DCQUANT_BITS);
+ aom_wb_write_unsigned_literal(
+ wb, seq_params->base_uv_dc_delta_q - DELTA_DCQUANT_MIN,
+ DELTA_DCQUANT_BITS);
+#endif // CONFIG_EXTQUANT
}
static AOM_INLINE void write_timing_info_header(
@@ -3379,6 +3397,7 @@
write_tile_info(cm, saved_wb, wb);
encode_quantization(quant_params, av1_num_planes(cm),
+ cm->seq_params.bit_depth,
cm->seq_params.separate_uv_delta_q, wb);
encode_segmentation(cm, xd, wb);
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 618be87..445440c 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -123,6 +123,21 @@
*/
/**@{*/
//! Quantization step size used by AV1_XFORM_QUANT_FP.
+#if CONFIG_EXTQUANT
+ const int32_t *quant_fp_QTX;
+ // ! Offset used for rounding in the quantizer process by AV1_XFORM_QUANT_FP.
+ const int32_t *round_fp_QTX;
+ // ! Quantization step size used by AV1_XFORM_QUANT_B.
+ const int32_t *quant_QTX;
+ // ! Offset used for rounding in the quantizer process by AV1_XFORM_QUANT_B.
+ const int32_t *round_QTX;
+ // ! Scale factor to shift coefficients toward zero. Only used by QUANT_B.
+ const int32_t *quant_shift_QTX;
+ // ! Size of the quantization bin around 0. Only Used by QUANT_B
+ const int32_t *zbin_QTX;
+ // ! Dequantizer
+ const int32_t *dequant_QTX;
+#else
const int16_t *quant_fp_QTX;
//! Offset used for rounding in the quantizer process by AV1_XFORM_QUANT_FP.
const int16_t *round_fp_QTX;
@@ -136,6 +151,7 @@
const int16_t *zbin_QTX;
//! Dequantizer
const int16_t *dequant_QTX;
+#endif // CONFIG_EXTQUANT
/**@}*/
} MACROBLOCK_PLANE;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index f99e334..12a1b80 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1355,12 +1355,29 @@
const CommonQuantParams *quant_params = &cm->quant_params;
for (i = 0; i < MAX_SEGMENTS; ++i) {
const int qindex =
- cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
- : quant_params->base_qindex;
+ cm->seg.enabled
+#if CONFIG_EXTQUANT
+ ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex,
+ cm->seq_params.bit_depth)
+#else
+ ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
+#endif
+ : quant_params->base_qindex;
+
+#if CONFIG_EXTQUANT
+ xd->lossless[i] =
+ qindex == 0 &&
+ (quant_params->y_dc_delta_q + cm->seq_params.base_y_dc_delta_q <= 0) &&
+ (quant_params->u_dc_delta_q + cm->seq_params.base_uv_dc_delta_q <= 0) &&
+ quant_params->u_ac_delta_q <= 0 &&
+ (quant_params->v_dc_delta_q + cm->seq_params.base_uv_dc_delta_q <= 0) &&
+ quant_params->v_ac_delta_q <= 0;
+#else
xd->lossless[i] =
qindex == 0 && quant_params->y_dc_delta_q == 0 &&
quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
+#endif
if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
xd->qindex[i] = qindex;
if (xd->lossless[i]) {
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 7a9ab51..430b8b6 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -1085,7 +1085,15 @@
offset = AOMMIN(offset, delta_q_info->delta_q_res * 9 - 1);
offset = AOMMAX(offset, -delta_q_info->delta_q_res * 9 + 1);
int qindex = cm->quant_params.base_qindex + offset;
+#if CONFIG_EXTQUANT
+ qindex = AOMMIN(qindex, cm->seq_params.bit_depth == AOM_BITS_8
+ ? MAXQ_8_BITS
+ : cm->seq_params.bit_depth == AOM_BITS_10
+ ? MAXQ_10_BITS
+ : MAXQ);
+#else
qindex = AOMMIN(qindex, MAXQ);
+#endif
qindex = AOMMAX(qindex, MINQ);
return qindex;
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h
index 0a3edea..88ba0eb 100644
--- a/av1/encoder/encodeframe_utils.h
+++ b/av1/encoder/encodeframe_utils.h
@@ -195,8 +195,14 @@
const AV1_COMMON *const cm = &cpi->common;
av1_init_plane_quantizers(cpi, x, segment_id);
aom_clear_system_state();
+#if CONFIG_EXTQUANT
+ int segment_qindex =
+ av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex,
+ cm->seq_params.bit_depth);
+#else
const int segment_qindex =
av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
+#endif
return av1_compute_rd_mult(cpi,
segment_qindex + cm->quant_params.y_dc_delta_q);
}
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index b07859f..76d0eff 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -726,10 +726,24 @@
TX_TYPE tx_type = DCT_DCT;
const int bw = mi_size_wide[plane_bsize];
+#if DEBUG_EXTQUANT
+ if (args->dry_run == OUTPUT_ENABLED) {
+ fprintf(cm->fEncCoeffLog,
+ "\nmi_row = %d, mi_col = %d, blk_row = %d,"
+ " blk_col = %d, plane = %d, tx_size = %d ",
+ xd->mi_row, xd->mi_col, blk_row, blk_col, plane, tx_size);
+ }
+#endif
+
if (plane == 0 && is_blk_skip(x->txfm_search_info.blk_skip, plane,
blk_row * bw + blk_col)) {
*eob = 0;
p->txb_entropy_ctx[block] = 0;
+#if DEBUG_EXTQUANT
+ if (args->dry_run == OUTPUT_ENABLED) {
+ fprintf(cm->fEncCoeffLog, "tx_type = %d, eob = %d", tx_type, *eob);
+ }
+#endif
} else {
av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
@@ -756,7 +770,15 @@
av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, &txfm_param,
&quant_param);
-
+#if DEBUG_EXTQUANT
+ if (args->dry_run == OUTPUT_ENABLED) {
+ fprintf(cm->fEncCoeffLog, "tx_type = %d, eob = %d\n", tx_type, *eob);
+ for (int c = 0; c < tx_size_wide[tx_size] * tx_size_high[tx_size]; c++) {
+ fprintf(cm->fEncCoeffLog, "%d ", dqcoeff[c]);
+ }
+ fprintf(cm->fEncCoeffLog, "\n\n");
+ }
+#endif
// Whether trellis or dropout optimization is required for key frames and
// intra frames.
const bool do_trellis = (frame_is_intra_only(cm) &&
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index af0a429..b93230d 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -472,6 +472,22 @@
}
}
}
+#if CONFIG_EXTQUANT
+ const int is_360p_or_larger =
+ AOMMIN(seq->max_frame_width, seq->max_frame_height) >= 360;
+ const int is_720p_or_larger =
+ AOMMIN(seq->max_frame_width, seq->max_frame_height) >= 720;
+ if (!is_360p_or_larger) {
+ seq->base_y_dc_delta_q = -7;
+ seq->base_uv_dc_delta_q = -6;
+ } else if (!is_720p_or_larger) {
+ seq->base_y_dc_delta_q = -5;
+ seq->base_uv_dc_delta_q = -4;
+ } else {
+ seq->base_y_dc_delta_q = -4;
+ seq->base_uv_dc_delta_q = -3;
+ }
+#endif // CONFIG_EXTQUANT
}
static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
@@ -885,6 +901,10 @@
return 0;
}
+#if DEBUG_EXTQUANT
+ cm->fEncCoeffLog = fopen("EncCoeffLog.txt", "wt");
+#endif
+
cm->error.setjmp = 1;
cpi->lap_enabled = num_lap_buffers > 0;
cpi->compressor_stage = stage;
@@ -1326,8 +1346,8 @@
* called later when needed. This will avoid unnecessary calls of
* av1_init_quantizer() for every frame.
*/
- av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
- cm->seq_params.bit_depth);
+ av1_init_quantizer(&cm->seq_params, &cpi->enc_quant_dequant_params,
+ &cm->quant_params);
av1_qm_init(&cm->quant_params, av1_num_planes(cm));
av1_loop_filter_init(cm);
@@ -1543,6 +1563,12 @@
av1_remove_common(cm);
av1_free_ref_frame_buffers(cm->buffer_pool);
+#if DEBUG_EXTQUANT
+ if (cpi->common.fEncCoeffLog != NULL) {
+ fclose(cpi->common.fEncCoeffLog);
+ }
+#endif
+
aom_free(cpi->subgop_config_str);
aom_free(cpi->subgop_config_path);
aom_free(cpi);
@@ -2212,9 +2238,11 @@
av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
q_cfg->enable_chroma_deltaq);
av1_set_speed_features_qindex_dependent(cpi, cpi->oxcf.speed);
- if ((q_cfg->deltaq_mode != NO_DELTA_Q) || q_cfg->enable_chroma_deltaq)
- av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
- cm->seq_params.bit_depth);
+#if !CONFIG_EXTQUANT
+ if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
+#endif
+ av1_init_quantizer(&cm->seq_params, &cpi->enc_quant_dequant_params,
+ &cm->quant_params);
av1_set_variance_partition_thresholds(cpi, q, 0);
av1_setup_frame(cpi);
@@ -2227,9 +2255,11 @@
av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
q_cfg->enable_chroma_deltaq);
av1_set_speed_features_qindex_dependent(cpi, cpi->oxcf.speed);
+#if !CONFIG_EXTQUANT
if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
- av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
- cm->seq_params.bit_depth);
+#endif
+ av1_init_quantizer(&cm->seq_params, &cpi->enc_quant_dequant_params,
+ &cm->quant_params);
av1_set_variance_partition_thresholds(cpi, q, 0);
if (frame_is_intra_only(cm) || cm->features.error_resilient_mode)
av1_setup_frame(cpi);
@@ -2393,9 +2423,11 @@
q_cfg->enable_chroma_deltaq);
av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
+#if !CONFIG_EXTQUANT
if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
- av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
- cm->seq_params.bit_depth);
+#endif
+ av1_init_quantizer(&cm->seq_params, &cpi->enc_quant_dequant_params,
+ &cm->quant_params);
av1_set_variance_partition_thresholds(cpi, q, 0);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 9310714..91005df 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -501,7 +501,7 @@
*/
int best_allowed_q;
/*!
- * Indicates the Constant/Constrained Quality level.
+ * Indicates the Constant/Constrained Quality level in [0, 255] range.
*/
int qp;
/*!
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 1e06e62..b4291d2 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -89,9 +89,16 @@
// Create a map used for cyclic background refresh.
if (cpi->cyclic_refresh) av1_cyclic_refresh_free(cpi->cyclic_refresh);
+#if CONFIG_EXTQUANT
+ CHECK_MEM_ERROR(
+ cm, cpi->cyclic_refresh,
+ av1_cyclic_refresh_alloc(mi_params->mi_rows, mi_params->mi_cols,
+ cm->seq_params.bit_depth));
+#else
CHECK_MEM_ERROR(
cm, cpi->cyclic_refresh,
av1_cyclic_refresh_alloc(mi_params->mi_rows, mi_params->mi_cols));
+#endif
// Create a map used to mark inactive areas.
aom_free(cpi->active_map.map);
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 9afe39e..f4e4630 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -824,9 +824,11 @@
q_for_screen_content_quick_run,
q_cfg->enable_chroma_deltaq);
av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
+#if !CONFIG_EXTQUANT
if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
- av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
- cm->seq_params.bit_depth);
+#endif
+ av1_init_quantizer(&cm->seq_params, &cpi->enc_quant_dequant_params,
+ &cm->quant_params);
av1_set_variance_partition_thresholds(cpi, q_for_screen_content_quick_run,
0);
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index f6addbb..b3de273 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -41,7 +41,11 @@
int new_eob;
} LevelDownStats;
+#if CONFIG_EXTQUANT
+static INLINE int get_dqv(const int32_t *dequant, int coeff_idx,
+#else
static INLINE int get_dqv(const int16_t *dequant, int coeff_idx,
+#endif
const qm_val_t *iqmatrix) {
int dqv = dequant[!!coeff_idx];
if (iqmatrix != NULL)
@@ -351,6 +355,11 @@
av1_write_tx_type(cm, xd, tx_type, tx_size, w);
}
+#if DEBUG_EXTQUANT
+ fprintf(cm->fEncCoeffLog, "\nblk_row=%d,blk_col=%d,plane=%d,tx_size=%d",
+ blk_row, blk_col, plane, tx_size);
+#endif
+
int eob_extra;
const int eob_pt = get_eob_pos_token(eob, &eob_extra);
const int eob_multi_size = txsize_log2_minus4[tx_size];
@@ -401,6 +410,10 @@
}
}
+#if DEBUG_EXTQUANT
+ fprintf(cm->fEncCoeffLog, "tx_type=%d, eob=%d\n", tx_type, eob);
+#endif
+
const int width = get_txb_wide(tx_size);
const int height = get_txb_high(tx_size);
uint8_t levels_buf[TX_PAD_2D];
@@ -450,6 +463,15 @@
}
}
+#if DEBUG_EXTQUANT
+ for (int c = 0; c < eob; ++c) {
+ const tran_low_t v = tcoeff[scan[c]];
+ const tran_low_t level = abs(v);
+ fprintf(cm->fEncCoeffLog, "c=%d,pos=%d,level=%d,dq_coeff=%d\n", c, scan[c],
+ level, v);
+ }
+#endif
+
// Loop to code all signs in the transform block,
// starting with the sign of DC (if applicable)
for (int c = 0; c < eob; ++c) {
@@ -561,16 +583,26 @@
}
static INLINE void update_coeff_eob_fast(int *eob, int shift,
+#if CONFIG_EXTQUANT
+ const int32_t *dequant_ptr,
+#else
const int16_t *dequant_ptr,
+#endif
const int16_t *scan,
const tran_low_t *coeff_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr) {
// TODO(sarahparker) make this work for aomqm
int eob_out = *eob;
+#if CONFIG_EXTQUANT
+ int zbin[2] = { dequant_ptr[0] + ROUND_POWER_OF_TWO(dequant_ptr[0] * 70,
+ 7 + QUANT_TABLE_BITS),
+ dequant_ptr[1] + ROUND_POWER_OF_TWO(dequant_ptr[1] * 70,
+ 7 + QUANT_TABLE_BITS) };
+#else
int zbin[2] = { dequant_ptr[0] + ROUND_POWER_OF_TWO(dequant_ptr[0] * 70, 7),
dequant_ptr[1] + ROUND_POWER_OF_TWO(dequant_ptr[1] * 70, 7) };
-
+#endif
for (int i = *eob - 1; i >= 0; i--) {
const int rc = scan[i];
const int qcoeff = qcoeff_ptr[rc];
@@ -890,7 +922,13 @@
tran_low_t abs_qc_low = abs_qc - 1;
*qc_low = (-sign ^ abs_qc_low) + sign;
assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low);
+#if CONFIG_EXTQUANT
+ tran_low_t abs_dqc_low = (tran_low_t)(
+ ROUND_POWER_OF_TWO_64((tran_high_t)abs_qc_low * dqv, QUANT_TABLE_BITS) >>
+ shift);
+#else
tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
+#endif // CONFIG_EXTQUANT
*dqc_low = (-sign ^ abs_dqc_low) + sign;
assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low);
}
@@ -898,7 +936,11 @@
static INLINE void update_coeff_general(
int *accu_rate, int64_t *accu_dist, int si, int eob, TX_SIZE tx_size,
TX_CLASS tx_class, int bwl, int height, int64_t rdmult, int shift,
+#if CONFIG_EXTQUANT
+ int dc_sign_ctx, const int32_t *dequant, const int16_t *scan,
+#else
int dc_sign_ctx, const int16_t *dequant, const int16_t *scan,
+#endif
const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels,
const qm_val_t *iqmatrix) {
@@ -955,7 +997,11 @@
static AOM_FORCE_INLINE void update_coeff_simple(
int *accu_rate, int si, int eob, TX_SIZE tx_size, TX_CLASS tx_class,
+#if CONFIG_EXTQUANT
+ int bwl, int64_t rdmult, int shift, const int32_t *dequant,
+#else
int bwl, int64_t rdmult, int shift, const int16_t *dequant,
+#endif
const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs,
const tran_low_t *tcoeff, tran_low_t *qcoeff, tran_low_t *dqcoeff,
uint8_t *levels, const qm_val_t *iqmatrix) {
@@ -987,7 +1033,14 @@
const int64_t rd = RDCOST(rdmult, rate, dist);
const tran_low_t abs_qc_low = abs_qc - 1;
+#if CONFIG_EXTQUANT
+ const tran_low_t abs_dqc_low =
+ (tran_low_t)ROUND_POWER_OF_TWO_64((tran_high_t)abs_qc_low * dqv,
+ QUANT_TABLE_BITS) >>
+ shift;
+#else
const tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
+#endif // CONFIG_EXTQUANT
const int64_t dist_low = get_coeff_dist(abs_tqc, abs_dqc_low, shift);
const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
@@ -1006,7 +1059,11 @@
static AOM_FORCE_INLINE void update_coeff_eob(
int *accu_rate, int64_t *accu_dist, int *eob, int *nz_num, int *nz_ci,
int si, TX_SIZE tx_size, TX_CLASS tx_class, int bwl, int height,
+#if CONFIG_EXTQUANT
+ int dc_sign_ctx, int64_t rdmult, int shift, const int32_t *dequant,
+#else
int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant,
+#endif
const int16_t *scan, const LV_MAP_EOB_COST *txb_eob_costs,
const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, int sharpness,
@@ -1146,7 +1203,11 @@
const int16_t *scan = scan_order->scan;
const int shift = av1_get_tx_scale(tx_size);
int eob = p->eobs[block];
+#if CONFIG_EXTQUANT
+ const int32_t *dequant = p->dequant_QTX;
+#else
const int16_t *dequant = p->dequant_QTX;
+#endif
const qm_val_t *iqmatrix =
av1_get_iqmatrix(&cpi->common.quant_params, xd, plane, tx_size, tx_type);
const int block_offset = BLOCK_OFFSET(block);
diff --git a/av1/encoder/encodetxb.h b/av1/encoder/encodetxb.h
index 2cd3160..64072b2 100644
--- a/av1/encoder/encodetxb.h
+++ b/av1/encoder/encodetxb.h
@@ -34,7 +34,11 @@
uint8_t *levels; // absolute values and clamped to 255.
tran_low_t *dqcoeff;
const tran_low_t *tcoeff;
+#if CONFIG_EXTQUANT
+ const int32_t *dequant;
+#else
const int16_t *dequant;
+#endif
int shift;
TX_SIZE tx_size;
TX_SIZE txs_ctx;
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index b987396..62e520b 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -275,7 +275,16 @@
static int find_fp_qindex(aom_bit_depth_t bit_depth) {
aom_clear_system_state();
+#if CONFIG_EXTQUANT
+ return av1_find_qindex(FIRST_PASS_Q, bit_depth, 0,
+ bit_depth == AOM_BITS_8
+ ? QINDEX_RANGE_8_BITS - 1
+ : bit_depth == AOM_BITS_10
+ ? QINDEX_RANGE_10_BITS - 1
+ : QINDEX_RANGE - 1);
+#else
return av1_find_qindex(FIRST_PASS_Q, bit_depth, 0, QINDEX_RANGE - 1);
+#endif
}
static double raw_motion_error_stdev(int *raw_motion_err_list,
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
index 2c76fe3..f40fe96 100644
--- a/av1/encoder/intra_mode_search.c
+++ b/av1/encoder/intra_mode_search.c
@@ -909,6 +909,9 @@
mode_costs->mbmode_cost[size_group_lookup[bsize]][mode] + ref_frame_cost;
const int intra_cost_penalty = av1_get_intra_cost_penalty(
cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q,
+#if CONFIG_EXTQUANT
+ cm->seq_params.base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
cm->seq_params.bit_depth);
const int skip_ctx = av1_get_skip_txfm_context(xd);
diff --git a/av1/encoder/model_rd.h b/av1/encoder/model_rd.h
index 5de0649..d4d6e4d 100644
--- a/av1/encoder/model_rd.h
+++ b/av1/encoder/model_rd.h
@@ -94,7 +94,12 @@
// Fast approximate the modelling function.
if (cpi->sf.rd_sf.simple_model_rd_from_var) {
const int64_t square_error = sse;
+#if CONFIG_EXTQUANT
+ int quantizer = ROUND_POWER_OF_TWO(p->dequant_QTX[1], QUANT_TABLE_BITS) >>
+ dequant_shift;
+#else
int quantizer = p->dequant_QTX[1] >> dequant_shift;
+#endif
if (quantizer < 120)
*rate = (int)AOMMIN(
(square_error * (280 - quantizer)) >> (16 - AV1_PROB_COST_SHIFT),
@@ -123,7 +128,13 @@
const MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3;
+#if CONFIG_EXTQUANT
+ const int qstep = AOMMAX(
+ ROUND_POWER_OF_TWO(p->dequant_QTX[1], QUANT_TABLE_BITS) >> dequant_shift,
+ 1);
+#else
const int qstep = AOMMAX(p->dequant_QTX[1] >> dequant_shift, 1);
+#endif
if (sse == 0) {
if (rate) *rate = 0;
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index ee0486c..a65c545 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -649,8 +649,15 @@
unsigned int sse;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
+#if CONFIG_EXTQUANT
+ const uint32_t dc_quant =
+ ROUND_POWER_OF_TWO(p->dequant_QTX[0], QUANT_TABLE_BITS);
+ const uint32_t ac_quant =
+ ROUND_POWER_OF_TWO(p->dequant_QTX[1], QUANT_TABLE_BITS);
+#else
const uint32_t dc_quant = p->dequant_QTX[0];
const uint32_t ac_quant = p->dequant_QTX[1];
+#endif
const int64_t dc_thr = dc_quant * dc_quant >> 6;
int64_t ac_thr = ac_quant * ac_quant >> 6;
unsigned int var;
@@ -1144,8 +1151,15 @@
for (i = start_plane; i <= stop_plane; ++i) {
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
+#if CONFIG_EXTQUANT
+ const uint32_t dc_quant =
+ ROUND_POWER_OF_TWO(p->dequant_QTX[0], QUANT_TABLE_BITS);
+ const uint32_t ac_quant =
+ ROUND_POWER_OF_TWO(p->dequant_QTX[1], QUANT_TABLE_BITS);
+#else
const uint32_t dc_quant = p->dequant_QTX[0];
const uint32_t ac_quant = p->dequant_QTX[1];
+#endif
const BLOCK_SIZE bs = plane_bsize;
unsigned int var;
if (!x->color_sensitivity[i - 1]) continue;
@@ -1739,6 +1753,9 @@
int intra_cost_penalty = av1_get_intra_cost_penalty(
quant_params->base_qindex, quant_params->y_dc_delta_q,
+#if CONFIG_EXTQUANT
+ cm->seq_params.base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
cm->seq_params.bit_depth);
int64_t inter_mode_thresh = RDCOST(x->rdmult, intra_cost_penalty, 0);
int perform_intra_pred = cpi->sf.rt_sf.check_intra_pred_nonrd;
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c
index a973923..3716307 100644
--- a/av1/encoder/partition_strategy.c
+++ b/av1/encoder/partition_strategy.c
@@ -106,9 +106,18 @@
// Prepare the input
const MACROBLOCKD *xd = &x->e_mbd;
const int bit_depth = xd->bd;
+#if CONFIG_EXTQUANT
+ const int dc_q =
+ av1_dc_quant_QTX(x->qindex, 0, cm->seq_params.base_y_dc_delta_q,
+ bit_depth) >>
+ (bit_depth - 8);
+ part_info->log_q = logf(1.0f + (float)((int64_t)dc_q * (int64_t)dc_q) /
+ (256 << (2 * QUANT_TABLE_BITS)));
+#else
const int dc_q =
av1_dc_quant_QTX(x->qindex, 0, bit_depth) >> (bit_depth - 8);
part_info->log_q = logf(1.0f + (float)(dc_q * dc_q) / 256.0f);
+#endif
part_info->log_q =
(part_info->log_q - av1_intra_mode_cnn_partition_mean[0]) /
av1_intra_mode_cnn_partition_std[0];
@@ -494,9 +503,17 @@
set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize);
// Q_INDEX
+#if CONFIG_EXTQUANT
+ const int dc_q =
+ av1_dc_quant_QTX(x->qindex, 0, cpi->common.seq_params.base_y_dc_delta_q,
+ xd->bd) >>
+ (xd->bd - 8);
+ features[f_idx++] = logf(1.0f + (float)((int64_t)dc_q * (int64_t)dc_q) /
+ (256 << (2 * QUANT_TABLE_BITS)));
+#else
const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8);
features[f_idx++] = logf(1.0f + (float)(dc_q * dc_q) / 256.0f);
-
+#endif
// Neighbor stuff
const int has_above = !!xd->above_mbmi;
const int has_left = !!xd->left_mbmi;
@@ -647,10 +664,18 @@
assert(sb_size == BLOCK_128X128);
int f_idx = 0;
-
+#if CONFIG_EXTQUANT
+ const int dc_q = av1_dc_quant_QTX(x->qindex, 0,
+ cm->seq_params.base_y_dc_delta_q, xd->bd) >>
+ (xd->bd - 8);
+ aom_clear_system_state();
+ const float log_q_sq = logf(1.0f + (float)((int64_t)dc_q * (int64_t)dc_q) /
+ (256 << (2 * QUANT_TABLE_BITS)));
+#else
const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8);
aom_clear_system_state();
const float log_q_sq = logf(1.0f + (float)(dc_q * dc_q) / 256.0f);
+#endif
// Perform full-pixel single motion search in Y plane of 16x16 mbs in the sb
float sum_mv_row_sq = 0;
@@ -877,14 +902,23 @@
if (cpi->sf.part_sf.ml_early_term_after_part_split_level < 2) thresh -= 0.3f;
const MACROBLOCKD *const xd = &x->e_mbd;
+#if CONFIG_EXTQUANT
+ const int dc_q = av1_dc_quant_QTX(x->qindex, 0,
+ cm->seq_params.base_y_dc_delta_q, xd->bd) >>
+ (xd->bd - 8);
+#else
const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8);
+#endif
const int bs = block_size_wide[bsize];
int f_idx = 0;
float features[FEATURES] = { 0.0f };
aom_clear_system_state();
-
+#if CONFIG_EXTQUANT
+ features[f_idx++] = logf(1.0f + (float)dc_q / (4 << QUANT_TABLE_BITS));
+#else
features[f_idx++] = logf(1.0f + (float)dc_q / 4.0f);
+#endif
features[f_idx++] = logf(1.0f + (float)best_rd / bs / bs / 1024.0f);
add_rd_feature(part_none_rd, best_rd, features, &f_idx);
@@ -1293,7 +1327,12 @@
features[feature_index++] = (float)pb_source_variance;
const int dc_q = (int)x->plane[0].dequant_QTX[0];
+#if CONFIG_EXTQUANT
+ features[feature_index++] =
+ (float)(dc_q * dc_q) / (256 << (2 * QUANT_TABLE_BITS));
+#else
features[feature_index++] = (float)(dc_q * dc_q) / 256.0f;
+#endif
assert(feature_index == FEATURES);
// Calculate score using the NN model.
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index c3a84d3..e88cf87 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -324,12 +324,29 @@
static void pick_cdef_from_qp(AV1_COMMON *const cm) {
const int bd = cm->seq_params.bit_depth;
+#if CONFIG_EXTQUANT
+ const int q = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >>
+ (bd - 8 + QUANT_TABLE_BITS);
+#else
const int q =
av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
+#endif
CdefInfo *const cdef_info = &cm->cdef_info;
cdef_info->cdef_bits = 0;
cdef_info->nb_cdef_strengths = 1;
+#if CONFIG_EXTQUANT
+ int damping_offset = clamp(cm->quant_params.base_qindex -
+ (cm->seq_params.bit_depth == AOM_BITS_8
+ ? 0
+ : cm->seq_params.bit_depth == AOM_BITS_10
+ ? 2 * MAXQ_OFFSET
+ : 4 * MAXQ_OFFSET),
+ MINQ, MAXQ_8_BITS) >>
+ 6;
+ cdef_info->cdef_damping = AOMMIN(3 + damping_offset, 6);
+#else
cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
+#endif
int predicted_y_f1 = 0;
int predicted_y_f2 = 0;
@@ -396,7 +413,19 @@
const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
+#if CONFIG_EXTQUANT
+ int damping_offset = clamp(cm->quant_params.base_qindex -
+ (cm->seq_params.bit_depth == AOM_BITS_8
+ ? 0
+ : cm->seq_params.bit_depth == AOM_BITS_10
+ ? 2 * MAXQ_OFFSET
+ : 4 * MAXQ_OFFSET),
+ MINQ, MAXQ_8_BITS) >>
+ 6;
+ const int damping = AOMMIN(3 + damping_offset, 6);
+#else
const int damping = 3 + (cm->quant_params.base_qindex >> 6);
+#endif
const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
pick_method <= CDEF_FAST_SEARCH_LVL3);
const int total_strengths = nb_cdef_strengths[pick_method];
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index 52ea6af..b715a40 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -210,8 +210,15 @@
} else if (method >= LPF_PICK_FROM_Q) {
const int min_filter_level = 0;
const int max_filter_level = av1_get_max_filter_level(cpi);
+#if CONFIG_EXTQUANT
+ const int q =
+ ROUND_POWER_OF_TWO(av1_ac_quant_QTX(cm->quant_params.base_qindex, 0,
+ cm->seq_params.bit_depth),
+ QUANT_TABLE_BITS);
+#else
const int q = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0,
cm->seq_params.bit_depth);
+#endif
// based on tests result for rtc test set
// 0.04590 boosted or 0.02295 non-booseted in 18-bit fixed point
const int strength_boost_q_treshold = 700;
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 2d0021e..4a0a066 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -1445,6 +1445,9 @@
const int scale[3] = { 0, 1, 2 };
// Obtain the normalized Qscale
const int qs = av1_dc_quant_QTX(rsc->cm->quant_params.base_qindex, 0,
+#if CONFIG_EXTQUANT
+ rsc->cm->seq_params.base_y_dc_delta_q,
+#endif
rsc->cm->seq_params.bit_depth) >>
3;
// Derive threshold as sqr(normalized Qscale) * scale / 16,
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index eea406f..9fe515a 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -118,7 +118,16 @@
// down to lossless mode represented by q 1.0.
if (minqtarget <= 2.0) return 0;
+#if CONFIG_EXTQUANT
+ return av1_find_qindex(minqtarget, bit_depth, 0,
+ bit_depth == AOM_BITS_8
+ ? QINDEX_RANGE_8_BITS - 1
+ : bit_depth == AOM_BITS_10
+ ? QINDEX_RANGE_10_BITS - 1
+ : QINDEX_RANGE - 1);
+#else
return av1_find_qindex(minqtarget, bit_depth, 0, QINDEX_RANGE - 1);
+#endif
}
static void init_minq_luts(int *kf_low_m, int *kf_high_m, int *arfgf_low,
@@ -126,7 +135,15 @@
int *arfgf_ld_high, int *inter, int *rtc,
aom_bit_depth_t bit_depth) {
int i;
+#if CONFIG_EXTQUANT
+ for (i = 0; i < (bit_depth == AOM_BITS_8
+ ? QINDEX_RANGE_8_BITS
+ : bit_depth == AOM_BITS_10 ? QINDEX_RANGE_10_BITS
+ : QINDEX_RANGE);
+ i++) {
+#else
for (i = 0; i < QINDEX_RANGE; i++) {
+#endif
const double maxq = av1_convert_qindex_to_q(i, bit_depth);
kf_low_m[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.150, bit_depth);
kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.45, bit_depth);
@@ -161,9 +178,21 @@
double av1_convert_qindex_to_q(int qindex, aom_bit_depth_t bit_depth) {
// Convert the index to a real Q value (scaled down to match old Q values)
switch (bit_depth) {
+#if CONFIG_EXTQUANT
+ case AOM_BITS_8:
+ return av1_ac_quant_QTX(qindex, 0, bit_depth) /
+ (4.0 * (1 << QUANT_TABLE_BITS));
+ case AOM_BITS_10:
+ return av1_ac_quant_QTX(qindex, 0, bit_depth) /
+ (16.0 * (1 << QUANT_TABLE_BITS));
+ case AOM_BITS_12:
+ return av1_ac_quant_QTX(qindex, 0, bit_depth) /
+ (64.0 * (1 << QUANT_TABLE_BITS));
+#else
case AOM_BITS_8: return av1_ac_quant_QTX(qindex, 0, bit_depth) / 4.0;
case AOM_BITS_10: return av1_ac_quant_QTX(qindex, 0, bit_depth) / 16.0;
case AOM_BITS_12: return av1_ac_quant_QTX(qindex, 0, bit_depth) / 64.0;
+#endif
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1.0;
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index b01ad9e..ada9d17 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -44,6 +44,16 @@
#define RD_THRESH_POW 1.25
+#if CONFIG_EXTQUANT
+#define RD_THRESH_MUL 4.40
+#define RDMULT_FROM_Q2_NUM 96
+#define RDMULT_FROM_Q2_DEN 32
+#else
+#define RD_THRESH_MUL 5.12
+#define RDMULT_FROM_Q2_NUM 88
+#define RDMULT_FROM_Q2_DEN 24
+#endif // CONFIG_EXTQUANT
+
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for block size.
@@ -365,9 +375,15 @@
}
void av1_init_me_luts(void) {
+#if CONFIG_EXTQUANT
+ init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE_8_BITS, AOM_BITS_8);
+ init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE_10_BITS, AOM_BITS_10);
+ init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
+#else
init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
+#endif
}
static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
@@ -377,9 +393,18 @@
};
int av1_compute_rd_mult_based_on_qindex(const AV1_COMP *cpi, int qindex) {
+#if CONFIG_EXTQUANT
+ const int q =
+ av1_dc_quant_QTX(qindex, 0, cpi->common.seq_params.base_y_dc_delta_q,
+ cpi->common.seq_params.bit_depth);
+ int64_t rdmult = ROUND_POWER_OF_TWO_64(
+ (int64_t)((int64_t)q * q * RDMULT_FROM_Q2_NUM / RDMULT_FROM_Q2_DEN),
+ 2 * QUANT_TABLE_BITS);
+#else
const int q = av1_dc_quant_QTX(qindex, 0, cpi->common.seq_params.bit_depth);
int rdmult = q * q;
rdmult = rdmult * 3 + (rdmult * 2 / 3);
+#endif
switch (cpi->common.seq_params.bit_depth) {
case AOM_BITS_8: break;
case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
@@ -388,7 +413,7 @@
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1;
}
- return rdmult > 0 ? rdmult : 1;
+ return (int)(rdmult > 0 ? rdmult : 1);
}
int av1_compute_rd_mult(const AV1_COMP *cpi, int qindex) {
@@ -407,19 +432,40 @@
int av1_get_deltaq_offset(const AV1_COMP *cpi, int qindex, double beta) {
assert(beta > 0.0);
- int q = av1_dc_quant_QTX(qindex, 0, cpi->common.seq_params.bit_depth);
+ int q = av1_dc_quant_QTX(qindex, 0,
+#if CONFIG_EXTQUANT
+ cpi->common.seq_params.base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ cpi->common.seq_params.bit_depth);
int newq = (int)rint(q / sqrt(beta));
int orig_qindex = qindex;
if (newq < q) {
do {
qindex--;
- q = av1_dc_quant_QTX(qindex, 0, cpi->common.seq_params.bit_depth);
+ q = av1_dc_quant_QTX(qindex, 0,
+#if CONFIG_EXTQUANT
+ cpi->common.seq_params.base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ cpi->common.seq_params.bit_depth);
} while (newq < q && qindex > 0);
} else {
do {
qindex++;
- q = av1_dc_quant_QTX(qindex, 0, cpi->common.seq_params.bit_depth);
+ q = av1_dc_quant_QTX(qindex, 0,
+#if CONFIG_EXTQUANT
+ cpi->common.seq_params.base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ cpi->common.seq_params.bit_depth);
+#if CONFIG_EXTQUANT
+ } while (newq > q &&
+ (qindex < (cpi->common.seq_params.bit_depth == AOM_BITS_8
+ ? MAXQ_8_BITS
+ : cpi->common.seq_params.bit_depth == AOM_BITS_10
+ ? MAXQ_10_BITS
+ : MAXQ)));
+#else
} while (newq > q && qindex < MAXQ);
+#endif
}
return qindex - orig_qindex;
}
@@ -428,17 +474,43 @@
assert(beta > 0.0);
const AV1_COMMON *cm = &cpi->common;
int64_t q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
+#if CONFIG_EXTQUANT
+ cm->seq_params.base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
cm->seq_params.bit_depth);
int64_t rdmult = 0;
switch (cm->seq_params.bit_depth) {
- case AOM_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break;
- case AOM_BITS_10:
- rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4);
+ case AOM_BITS_8:
+#if CONFIG_EXTQUANT
+ rdmult = ROUND_POWER_OF_TWO_64(
+ (int64_t)((RDMULT_FROM_Q2_NUM * (double)q * q / beta) /
+ RDMULT_FROM_Q2_DEN),
+ 2 * QUANT_TABLE_BITS);
+#else
+ rdmult = (int)((88 * q * q / beta) / 24);
+#endif
break;
- default:
- assert(cm->seq_params.bit_depth == AOM_BITS_12);
+ case AOM_BITS_10:
+#if CONFIG_EXTQUANT
+ rdmult = ROUND_POWER_OF_TWO_64(
+ (int64_t)((RDMULT_FROM_Q2_NUM * (double)q * q / beta) /
+ RDMULT_FROM_Q2_DEN),
+ 4 + 2 * QUANT_TABLE_BITS);
+#else
+ rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4);
+#endif
+ break;
+ case AOM_BITS_12:
+ default: assert(cm->seq_params.bit_depth == AOM_BITS_12);
+#if CONFIG_EXTQUANT
+ rdmult = ROUND_POWER_OF_TWO_64(
+ (int64_t)((RDMULT_FROM_Q2_NUM * (double)q * q / beta) /
+ RDMULT_FROM_Q2_DEN),
+ 8 + 2 * QUANT_TABLE_BITS);
+#else
rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8);
+#endif
break;
}
@@ -456,22 +528,48 @@
return (int)rdmult;
}
-static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
+static int compute_rd_thresh_factor(int qindex,
+#if CONFIG_EXTQUANT
+ int base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ aom_bit_depth_t bit_depth) {
double q;
switch (bit_depth) {
- case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
+ case AOM_BITS_8:
+ q = av1_dc_quant_QTX(qindex, 0,
+#if CONFIG_EXTQUANT
+ base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ AOM_BITS_8) /
+ 4.0;
+ break;
case AOM_BITS_10:
- q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
+ q = av1_dc_quant_QTX(qindex, 0,
+#if CONFIG_EXTQUANT
+ base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ AOM_BITS_10) /
+ 16.0;
break;
case AOM_BITS_12:
- q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
+ q = av1_dc_quant_QTX(qindex, 0,
+#if CONFIG_EXTQUANT
+ base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ AOM_BITS_12) /
+ 64.0;
break;
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1;
}
- // TODO(debargha): Adjust the function below.
+ // TODO(debargha): Adjust the function below.
+#if CONFIG_EXTQUANT
+ q /= (1 << QUANT_TABLE_BITS);
+ return AOMMAX((int)(pow(q, RD_THRESH_POW) * RD_THRESH_MUL), 8);
+#else
return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
+#endif
}
void av1_set_sad_per_bit(const AV1_COMP *cpi, MvCosts *mv_costs, int qindex) {
@@ -488,11 +586,27 @@
int i, bsize, segment_id;
for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
+#if CONFIG_EXTQUANT
+ const int qindex = clamp(
+ av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex,
+ cm->seq_params.bit_depth) +
+ cm->quant_params.y_dc_delta_q,
+ 0,
+ cm->seq_params.bit_depth == AOM_BITS_8
+ ? MAXQ_8_BITS
+ : cm->seq_params.bit_depth == AOM_BITS_10 ? MAXQ_10_BITS : MAXQ);
+#else
const int qindex = clamp(
av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
cm->quant_params.y_dc_delta_q,
0, MAXQ);
- const int q = compute_rd_thresh_factor(qindex, cm->seq_params.bit_depth);
+#endif
+
+ const int q = compute_rd_thresh_factor(qindex,
+#if CONFIG_EXTQUANT
+ cm->seq_params.base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ cm->seq_params.bit_depth);
for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
// Threshold here seems unnecessarily harsh but fine given actual
@@ -1368,13 +1482,44 @@
}
}
+#if CONFIG_EXTQUANT
+#define INTRA_COST_PENALTY_Q_FACTOR 8
+#else
+#define INTRA_COST_PENALTY_Q_FACTOR 20
+#endif // CONFIG_EXTQUANT
+
int av1_get_intra_cost_penalty(int qindex, int qdelta,
+#if CONFIG_EXTQUANT
+ int base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
aom_bit_depth_t bit_depth) {
- const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
+ const int q = av1_dc_quant_QTX(qindex, qdelta,
+#if CONFIG_EXTQUANT
+ base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
+ bit_depth);
switch (bit_depth) {
- case AOM_BITS_8: return 20 * q;
- case AOM_BITS_10: return 5 * q;
- case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
+ case AOM_BITS_8:
+#if CONFIG_EXTQUANT
+ return ROUND_POWER_OF_TWO(INTRA_COST_PENALTY_Q_FACTOR * q,
+ 0 + QUANT_TABLE_BITS);
+#else
+ return 20 * q;
+#endif
+ case AOM_BITS_10:
+#if CONFIG_EXTQUANT
+ return ROUND_POWER_OF_TWO(INTRA_COST_PENALTY_Q_FACTOR * q,
+ 2 + QUANT_TABLE_BITS);
+#else
+ return 5 * q;
+#endif
+ case AOM_BITS_12:
+#if CONFIG_EXTQUANT
+ return ROUND_POWER_OF_TWO(INTRA_COST_PENALTY_Q_FACTOR * q,
+ 4 + QUANT_TABLE_BITS);
+#else
+ return ROUND_POWER_OF_TWO(5 * q, 2);
+#endif
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index ecc101f..41c8eec 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -352,6 +352,9 @@
const int num_planes);
int av1_get_intra_cost_penalty(int qindex, int qdelta,
+#if CONFIG_EXTQUANT
+ int base_y_dc_delta_q,
+#endif // CONFIG_EXTQUANT
aom_bit_depth_t bit_depth);
#if CONFIG_SDP
void av1_fill_mode_rates(AV1_COMMON *const cm, const MACROBLOCKD *xd,
diff --git a/av1/encoder/svc_layercontext.c b/av1/encoder/svc_layercontext.c
index 087d395..5a16790 100644
--- a/av1/encoder/svc_layercontext.c
+++ b/av1/encoder/svc_layercontext.c
@@ -75,8 +75,13 @@
if (lc->last_coded_q_map) aom_free(lc->last_coded_q_map);
CHECK_MEM_ERROR(cm, lc->last_coded_q_map,
aom_malloc(last_coded_q_map_size));
+#if CONFIG_EXTQUANT
+ for (int i = 0; i < mi_rows * mi_cols; ++i)
+ lc->last_coded_q_map[i] = MAXQ;
+#else
assert(MAXQ <= 255);
memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size);
+#endif // CONFIG_EXTQUANT
}
}
svc->downsample_filter_type[sl] = BILINEAR;
@@ -227,7 +232,11 @@
cpi->svc.number_spatial_layers > 1 && svc->temporal_layer_id == 0) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
signed char *temp = lc->map;
+#if CONFIG_EXTQUANT
+ uint16_t *temp2 = lc->last_coded_q_map;
+#else
uint8_t *temp2 = lc->last_coded_q_map;
+#endif // CONFIG_EXTQUANT
lc->map = cr->map;
cr->map = temp;
lc->last_coded_q_map = cr->last_coded_q_map;
diff --git a/av1/encoder/svc_layercontext.h b/av1/encoder/svc_layercontext.h
index a34843e..15160e8 100644
--- a/av1/encoder/svc_layercontext.h
+++ b/av1/encoder/svc_layercontext.h
@@ -50,7 +50,11 @@
/*!
* Segmentation map for last coded quantization paramters.
*/
+#if CONFIG_EXTQUANT
+ uint16_t *last_coded_q_map;
+#else
uint8_t *last_coded_q_map;
+#endif // CONFIG_EXTQUANT
/*!
* Number of blocks on segment 1
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index d42d53f..5de5295 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -1168,7 +1168,13 @@
const int q = av1_rc_pick_q_and_bounds(
cpi, &cpi->rc, cpi->oxcf.frm_dim_cfg.width,
cpi->oxcf.frm_dim_cfg.height, group_idx, &bottom_index, &top_index);
+#if CONFIG_EXTQUANT
+ const int ac_q = ROUND_POWER_OF_TWO(
+ av1_ac_quant_QTX(q, 0, cpi->common.seq_params.bit_depth),
+ QUANT_TABLE_BITS);
+#else
const int ac_q = av1_ac_quant_QTX(q, 0, cpi->common.seq_params.bit_depth);
+#endif
const float threshold = 0.7f * ac_q * ac_q;
if (!is_second_arf) {
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 0c0af18..71e3312 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -436,20 +436,31 @@
// Uses simple features on top of DCT coefficients to quickly predict
// whether optimal RD decision is to skip encoding the residual.
// The sse value is stored in dist.
-static int predict_skip_txfm(MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *dist,
+static int predict_skip_txfm(const AV1_COMMON *cm, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int64_t *dist,
int reduced_tx_set) {
const TxfmSearchParams *txfm_params = &x->txfm_search_params;
const int bw = block_size_wide[bsize];
const int bh = block_size_high[bsize];
const MACROBLOCKD *xd = &x->e_mbd;
- const int16_t dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd);
+ (void)cm;
+ const int16_t dc_q = av1_dc_quant_QTX(x->qindex, 0,
+#if CONFIG_EXTQUANT
+ cm->seq_params.base_y_dc_delta_q,
+#endif
+ xd->bd);
*dist = pixel_diff_dist(x, 0, 0, 0, bsize, bsize, NULL);
const int64_t mse = *dist / bw / bh;
// Normalized quantizer takes the transform upscaling factor (8 for tx size
// smaller than 32) into account.
+#if CONFIG_EXTQUANT
+ const int16_t normalized_dc_q =
+ ROUND_POWER_OF_TWO(dc_q, (3 + QUANT_TABLE_BITS));
+#else
const int16_t normalized_dc_q = dc_q >> 3;
+#endif
const int64_t mse_thresh = (int64_t)normalized_dc_q * normalized_dc_q / 8;
// For faster early skip decision, use dist to compare against threshold so
// that quality risk is less for the skip=1 decision. Otherwise, use mse
@@ -479,8 +490,15 @@
const int16_t *src_diff = x->plane[0].src_diff;
const int n_coeff = tx_w * tx_h;
const int16_t ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
+#if CONFIG_EXTQUANT
+ const uint32_t dc_thresh =
+ ROUND_POWER_OF_TWO((max_qcoef_thresh * dc_q), QUANT_TABLE_BITS);
+ const uint32_t ac_thresh =
+ ROUND_POWER_OF_TWO((max_qcoef_thresh * ac_q), QUANT_TABLE_BITS);
+#else
const uint32_t dc_thresh = max_qcoef_thresh * dc_q;
const uint32_t ac_thresh = max_qcoef_thresh * ac_q;
+#endif
for (int row = 0; row < bh; row += tx_h) {
for (int col = 0; col < bw; col += tx_w) {
av1_fwd_txfm(src_diff + col, coefs, bw, ¶m);
@@ -815,7 +833,12 @@
const int txw = tx_size_wide[tx_size];
const int txh = tx_size_high[tx_size];
const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3;
+#if CONFIG_EXTQUANT
+ const int q_step =
+ ROUND_POWER_OF_TWO(p->dequant_QTX[1], QUANT_TABLE_BITS) >> dequant_shift;
+#else
const int q_step = p->dequant_QTX[1] >> dequant_shift;
+#endif
const int num_samples = txw * txh;
const double rate_norm = (double)rd_stats->rate / num_samples;
@@ -1008,7 +1031,12 @@
&bh);
const int num_samples = bw * bh;
const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3;
+#if CONFIG_EXTQUANT
+ const int q_step =
+ ROUND_POWER_OF_TWO(p->dequant_QTX[1], QUANT_TABLE_BITS) >> dequant_shift;
+#else
const int q_step = p->dequant_QTX[1] >> dequant_shift;
+#endif
const int shift = (xd->bd - 8);
const double rate_norm = (double)rd_stats->rate / num_samples;
@@ -2166,9 +2194,18 @@
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = xd->mi[0];
const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3;
+#if CONFIG_EXTQUANT
+ const int qstep =
+ ROUND_POWER_OF_TWO(x->plane[plane].dequant_QTX[1], QUANT_TABLE_BITS) >>
+ dequant_shift;
+ const int dc_qstep =
+ ROUND_POWER_OF_TWO(x->plane[plane].dequant_QTX[0], QUANT_TABLE_BITS) >>
+ dequant_shift;
+#else
const int qstep = x->plane[plane].dequant_QTX[1] >> dequant_shift;
- uint64_t block_var = UINT64_MAX;
const int dc_qstep = x->plane[plane].dequant_QTX[0] >> 3;
+#endif
+ uint64_t block_var = UINT64_MAX;
*block_sse = pixel_diff_stats(x, plane, blk_row, blk_col, plane_bsize,
txsize_to_bsize[tx_size], block_mse_q8,
per_px_mean, &block_var);
@@ -2293,8 +2330,13 @@
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3;
+#if CONFIG_EXTQUANT
+ const int qstep =
+ ROUND_POWER_OF_TWO(x->plane[plane].dequant_QTX[1], QUANT_TABLE_BITS) >>
+ dequant_shift;
+#else
const int qstep = x->plane[plane].dequant_QTX[1] >> dequant_shift;
-
+#endif
const uint8_t txw = tx_size_wide[tx_size];
const uint8_t txh = tx_size_high[tx_size];
int64_t block_sse;
@@ -3457,7 +3499,7 @@
// context and terminate early.
int64_t dist;
if (txfm_params->skip_txfm_level &&
- predict_skip_txfm(x, bsize, &dist,
+ predict_skip_txfm(&cpi->common, x, bsize, &dist,
cpi->common.features.reduced_tx_set_used)) {
set_skip_txfm(x, rd_stats, bsize, dist);
// Save the RD search results into tx_rd_record.
@@ -3545,7 +3587,7 @@
int64_t dist;
if (tx_params->skip_txfm_level && is_inter &&
!xd->lossless[mbmi->segment_id] &&
- predict_skip_txfm(x, bs, &dist,
+ predict_skip_txfm(&cpi->common, x, bs, &dist,
cpi->common.features.reduced_tx_set_used)) {
// Populate rdstats as per skip decision
set_skip_txfm(x, rd_stats, bs, dist);
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 7442aa4..f54c3fc 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -328,10 +328,16 @@
AV1_COMMON *const cm = &cpi->common;
const int is_key_frame = frame_is_intra_only(cm);
const int threshold_multiplier = is_key_frame ? 40 : 1;
+ EncQuantDequantParams *quant_params = &cpi->enc_quant_dequant_params;
+#if CONFIG_EXTQUANT
int64_t threshold_base =
(int64_t)(threshold_multiplier *
- cpi->enc_quant_dequant_params.dequants.y_dequant_QTX[q][1]);
-
+ ROUND_POWER_OF_TWO(quant_params->dequants.y_dequant_QTX[q][1],
+ QUANT_TABLE_BITS));
+#else
+ int64_t threshold_base = (int64_t)(
+ threshold_multiplier * quant_params->dequants.y_dequant_QTX[q][1]);
+#endif
if (is_key_frame) {
thresholds[0] = threshold_base;
thresholds[1] = threshold_base;
@@ -903,7 +909,12 @@
if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
cyclic_refresh_segment_id_boosted(segment_id) &&
cpi->sf.rt_sf.use_nonrd_pick_mode) {
+#if CONFIG_EXTQUANT
+ int q = av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex,
+ cm->seq_params.bit_depth);
+#else
int q = av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
+#endif
set_vbp_thresholds(cpi, thresholds, q, content_state);
} else {
set_vbp_thresholds(cpi, thresholds, cm->quant_params.base_qindex,
diff --git a/av1/encoder/x86/av1_highbd_quantize_avx2.c b/av1/encoder/x86/av1_highbd_quantize_avx2.c
index b58911f..cc8d6e2 100644
--- a/av1/encoder/x86/av1_highbd_quantize_avx2.c
+++ b/av1/encoder/x86/av1_highbd_quantize_avx2.c
@@ -29,9 +29,15 @@
qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11);
}
+#if CONFIG_EXTQUANT
+static INLINE void init_qp(const int32_t *round_ptr, const int32_t *quant_ptr,
+ const int32_t *dequant_ptr, int log_scale,
+ __m256i *qp) {
+#else
static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *dequant_ptr, int log_scale,
__m256i *qp) {
+#endif
__m128i round = _mm_loadu_si128((const __m128i *)round_ptr);
if (log_scale) {
const __m128i round_scale = _mm_set1_epi16(1 << (15 - log_scale));
@@ -87,12 +93,21 @@
*eob = _mm256_max_epi32(cur_eob, *eob);
}
+#if CONFIG_EXTQUANT
+void av1_highbd_quantize_fp_avx2(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr,
+ const int32_t *round_ptr, const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, int log_scale) {
+#else
void av1_highbd_quantize_fp_avx2(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, int log_scale) {
+#endif
(void)scan;
(void)zbin_ptr;
(void)quant_shift_ptr;
diff --git a/av1/encoder/x86/av1_highbd_quantize_sse4.c b/av1/encoder/x86/av1_highbd_quantize_sse4.c
index 40b3b46..c9c2df1 100644
--- a/av1/encoder/x86/av1_highbd_quantize_sse4.c
+++ b/av1/encoder/x86/av1_highbd_quantize_sse4.c
@@ -112,12 +112,21 @@
return eobValue;
}
+#if CONFIG_EXTQUANT
+void av1_highbd_quantize_fp_sse4_1(
+ const tran_low_t *coeff_ptr, intptr_t count, const int32_t *zbin_ptr,
+ const int32_t *round_ptr, const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, int log_scale) {
+#else
void av1_highbd_quantize_fp_sse4_1(
const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, int log_scale) {
+#endif
__m128i coeff[2], qcoeff[3], dequant[2], qparam[4], coeff_sign;
__m128i eob = _mm_setzero_si128();
const tran_low_t *src = coeff_ptr;
diff --git a/av1/encoder/x86/av1_quantize_avx2.c b/av1/encoder/x86/av1_quantize_avx2.c
index f5f7ee1..4a82bd7 100644
--- a/av1/encoder/x86/av1_quantize_avx2.c
+++ b/av1/encoder/x86/av1_quantize_avx2.c
@@ -42,9 +42,15 @@
*qp = _mm256_insertf128_si256(_mm256_castsi128_si256(*p), ac, 1);
}
+#if CONFIG_EXTQUANT
+static INLINE void init_qp(const int32_t *round_ptr, const int32_t *quant_ptr,
+ const int32_t *dequant_ptr, int log_scale,
+ __m256i *thr, __m256i *qp) {
+#else
static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *dequant_ptr, int log_scale,
__m256i *thr, __m256i *qp) {
+#endif
__m128i round = _mm_loadu_si128((const __m128i *)round_ptr);
const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr);
const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr);
@@ -154,16 +160,28 @@
return _mm_extract_epi16(eob, 1);
}
+#if CONFIG_EXTQUANT
+static INLINE void store_zero_tran_low(tran_low_t *a) {
+#else
static INLINE void store_zero_tran_low(int16_t *a) {
+#endif
const __m256i zero = _mm256_setzero_si256();
_mm256_storeu_si256((__m256i *)(a), zero);
}
+#if CONFIG_EXTQUANT
+void av1_quantize_lp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int32_t *round_ptr, const int32_t *quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan) {
+#else
void av1_quantize_lp_avx2(const int16_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *round_ptr, const int16_t *quant_ptr,
int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan) {
+#endif
__m128i eob;
__m256i round256, quant256, dequant256;
__m256i eob256, thr256;
@@ -247,6 +265,15 @@
*eob_ptr = accumulate_eob(eob);
}
+#if CONFIG_EXTQUANT
+void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int32_t *zbin_ptr, const int32_t *round_ptr,
+ const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+#else
void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
@@ -254,6 +281,7 @@
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+#endif
(void)scan_ptr;
(void)zbin_ptr;
(void)quant_shift_ptr;
@@ -323,12 +351,21 @@
}
}
+#if CONFIG_EXTQUANT
+void av1_quantize_fp_32x32_avx2(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr,
+ const int32_t *round_ptr, const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+#else
void av1_quantize_fp_32x32_avx2(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+#endif
(void)scan_ptr;
(void)zbin_ptr;
(void)quant_shift_ptr;
@@ -402,12 +439,21 @@
}
}
+#if CONFIG_EXTQUANT
+void av1_quantize_fp_64x64_avx2(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int32_t *zbin_ptr,
+ const int32_t *round_ptr, const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+#else
void av1_quantize_fp_64x64_avx2(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+#endif
(void)scan_ptr;
(void)zbin_ptr;
(void)quant_shift_ptr;
diff --git a/av1/encoder/x86/av1_quantize_sse2.c b/av1/encoder/x86/av1_quantize_sse2.c
index 5497c7e..2d2d595 100644
--- a/av1/encoder/x86/av1_quantize_sse2.c
+++ b/av1/encoder/x86/av1_quantize_sse2.c
@@ -135,6 +135,15 @@
}
}
+#if CONFIG_EXTQUANT
+void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const int32_t *zbin_ptr, const int32_t *round_ptr,
+ const int32_t *quant_ptr,
+ const int32_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int32_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+#else
void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
@@ -142,6 +151,7 @@
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+#endif
(void)scan_ptr;
(void)zbin_ptr;
(void)quant_shift_ptr;