Tweak rdmult with `tune=ssimulacra2`
Tweak rdmult derivation in two places:
1. When computing rdmult in many parts of the encoder:
- `av1_compute_rd_mult()`
- `av1_compute_rd_mult_based_on_qindex()`
2. While performing trellis quantization:
- `av1_optimize_txb()`
These two tweaks work synergistically to improve subjective
quality and SSIMULACRA2 scores.
Approximate BD-Rate gains over no rdmult tweaks - cpu-used=6
(Daala's subset1):
- SSIMULACRA2 60: -1.2%
- SSIMULACRA2 70: -1.4%
- SSIMULACRA2 80: -1.2%
- SSIMULACRA2 90: -1.3%
Bug: aomedia:375221136
Change-Id: I559ee11f1924e6ea069e56b2accbfd00f41c24c7
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index 5c08726..ea337e3 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -645,7 +645,7 @@
qindex2, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi));
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
av1_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta);
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index f948405..12f58e3 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -815,7 +815,7 @@
qindex_rdmult, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi));
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
const int qindex_change = x->qindex != qindex;
if (qindex_change || do_update) {
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h
index 05afd61..46d36c8 100644
--- a/av1/encoder/encodeframe_utils.h
+++ b/av1/encoder/encodeframe_utils.h
@@ -317,7 +317,8 @@
return av1_compute_rd_mult(
qindex, bit_depth, update_type, layer_depth, boost_index, frame_type,
- cpi->oxcf.q_cfg.use_fixed_qp_offsets, is_stat_consumption_stage(cpi));
+ cpi->oxcf.q_cfg.use_fixed_qp_offsets, is_stat_consumption_stage(cpi),
+ cpi->oxcf.tune_cfg.tuning);
}
static inline int do_split_check(BLOCK_SIZE bsize) {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 2d623fa..1067288 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3382,7 +3382,8 @@
// Note: Both use common rdmult based on base qindex of fullres.
const int64_t rdmult = av1_compute_rd_mult_based_on_qindex(
- bit_depth, update_type, cm->quant_params.base_qindex);
+ bit_depth, update_type, cm->quant_params.base_qindex,
+ cpi->oxcf.tune_cfg.tuning);
// Find the best rdcost among all superres denoms.
int best_denom = -1;
@@ -3446,7 +3447,8 @@
// Note: Both use common rdmult based on base qindex of fullres.
const int64_t rdmult = av1_compute_rd_mult_based_on_qindex(
- bit_depth, update_type, cm->quant_params.base_qindex);
+ bit_depth, update_type, cm->quant_params.base_qindex,
+ cpi->oxcf.tune_cfg.tuning);
proj_rdcost1 =
RDCOST_DBL_WITH_NATIVE_BD_DIST(rdmult, rate1, sse1, bit_depth);
const double proj_rdcost2 =
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index e77dbc7..2c26688 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -386,7 +386,7 @@
int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
FRAME_UPDATE_TYPE update_type,
- int qindex) {
+ int qindex, aom_tune_metric tuning) {
const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
int64_t rdmult = q * q;
if (update_type == KF_UPDATE) {
@@ -400,6 +400,25 @@
rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
}
+ if (tuning == AOM_TUNE_SSIMULACRA2) {
+ // Further multiply rdmult (by up to 200/128 = 1.5625) to improve image
+ // quality. The most noticeable effect is a mild bias towards choosing
+ // larger transform sizes (e.g. one 16x16 transform instead of 4 8x8
+ // transforms).
+ // For very high qindexes, start progressively reducing the weight towards
+ // unity (128/128), as transforms are large enough and making them even
+ // larger actually harms subjective quality and SSIMULACRA 2 scores.
+ // This weight part of the equation was determined by iteratively increasing
+ // weight on CID22 and Daala's subset1, and observing its effects on visual
+ // quality and SSIMULACRA 2 scores along the usable (0-100) range.
+ // The ramp-down part of the equation was determined by choosing a fixed
+ // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where SSIMULACRA
+ // 2 scores for encodes with qindexes greater than 159 scored at or above
+ // their equivalents with no rdmult adjustment.
+ const int weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128;
+ rdmult = (int64_t)((double)rdmult * weight / 128.0);
+ }
+
switch (bit_depth) {
case AOM_BITS_8: break;
case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
@@ -416,9 +435,10 @@
const int layer_depth, const int boost_index,
const FRAME_TYPE frame_type,
const int use_fixed_qp_offsets,
- const int is_stat_consumption_stage) {
- int64_t rdmult =
- av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, qindex);
+ const int is_stat_consumption_stage,
+ const aom_tune_metric tuning) {
+ int64_t rdmult = av1_compute_rd_mult_based_on_qindex(bit_depth, update_type,
+ qindex, tuning);
if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
(frame_type != KEY_FRAME)) {
// Layer depth adjustment
@@ -426,7 +446,7 @@
// ARF boost adjustment
rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
}
- return (int)rdmult;
+ return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
}
int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
@@ -486,7 +506,7 @@
cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
layer_depth, boost_index, frame_type,
cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi)) /
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning) /
beta);
}
#endif // !CONFIG_REALTIME_ONLY
@@ -778,7 +798,7 @@
qindex_rdmult, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi));
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
#if CONFIG_RD_COMMAND
if (cpi->oxcf.pass == 2) {
const RD_COMMAND *rd_command = &cpi->rd_command;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index d4db276..0b79fd9 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -14,8 +14,8 @@
#include <limits.h>
+#include "aom/aomcx.h"
#include "av1/common/blockd.h"
-
#include "av1/encoder/block.h"
#include "av1/encoder/context_tree.h"
#include "av1/encoder/cost.h"
@@ -232,19 +232,21 @@
* \param[in] bit_depth bit depth
* \param[in] update_type frame update type
* \param[in] qindex q index
+ * \param[in] tuning visual tuning metric
*
* \return rdmult
*/
int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
FRAME_UPDATE_TYPE update_type,
- int qindex);
+ int qindex, aom_tune_metric tuning);
int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
const FRAME_UPDATE_TYPE update_type,
const int layer_depth, const int boost_index,
const FRAME_TYPE frame_type,
const int use_fixed_qp_offsets,
- const int is_stat_consumption_stage);
+ const int is_stat_consumption_stage,
+ const aom_tune_metric tuning);
void av1_initialize_rd_consts(struct AV1_COMP *cpi);
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 202ac1f..a70f0c9 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1375,12 +1375,17 @@
const int base_qindex =
cpi->use_ducky_encode ? gf_group->q_val[frame_idx] : pframe_qindex;
+ // The TPL model is only meant to be run in inter mode, so ensure that we are
+ // not running in all intra mode, which implies we are not tuning for
+ // SSIMULACRA 2.
+ assert(cpi->oxcf.tune_cfg.tuning != AOM_TUNE_SSIMULACRA2 &&
+ cpi->oxcf.mode != ALLINTRA);
// Get rd multiplier set up.
- rdmult = (int)av1_compute_rd_mult(
+ rdmult = av1_compute_rd_mult(
base_qindex, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi));
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
if (rdmult < 1) rdmult = 1;
av1_set_error_per_bit(&x->errorperbit, rdmult);
@@ -1395,7 +1400,8 @@
const FRAME_UPDATE_TYPE update_type =
gf_group->update_type[cpi->gf_frame_index];
tpl_frame->base_rdmult = av1_compute_rd_mult_based_on_qindex(
- bd_info.bit_depth, update_type, base_qindex) /
+ bd_info.bit_depth, update_type, base_qindex,
+ cpi->oxcf.tune_cfg.tuning) /
6;
if (cpi->use_ducky_encode)
@@ -2105,7 +2111,7 @@
orig_qindex_rdmult, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi));
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
const int new_qindex_rdmult = quant_params->base_qindex +
x->rdmult_delta_qindex +
@@ -2114,7 +2120,7 @@
new_qindex_rdmult, cm->seq_params->bit_depth,
cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
- is_stat_consumption_stage(cpi));
+ is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
const double scaling_factor = (double)new_rdmult / (double)orig_rdmult;
diff --git a/av1/encoder/txb_rdopt.c b/av1/encoder/txb_rdopt.c
index 4fc4ba3..68e3c3c 100644
--- a/av1/encoder/txb_rdopt.c
+++ b/av1/encoder/txb_rdopt.c
@@ -335,12 +335,19 @@
const LV_MAP_EOB_COST *txb_eob_costs =
&coeff_costs->eob_costs[eob_multi_size][plane_type];
- const int rshift = 2;
+ // For the SSIMULACRA 2 tune, increase rshift from 2 to 4.
+ // This biases trellis quantization towards keeping more coefficients, and
+ // together with the SSIMULACRA2 rdmult adjustment in
+ // av1_compute_rd_mult_based_on_qindex(), this helps preserve image
+ // features (like repeating patterns and camera noise/film grain), which
+ // improves SSIMULACRA 2 scores.
+ const int rshift = cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2 ? 4 : 2;
+ const int rounding = (1 << rshift) >> 1;
const int64_t rdmult =
(((int64_t)x->rdmult *
(plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8)))) +
- 2) >>
+ rounding) >>
rshift;
uint8_t levels_buf[TX_PAD_2D];