Optimize tune IQ and SSIMULACRA2 for inter-frame encoding - Adjust speed features for better image coding performance - Bias encoder toward picking intra-coded block candidates Approximate gains in good-quality mode, cpu-used=6, on two-layer progressive image encoding in 4:4:4 chroma subsampling mode (Daala's subset1): - SSIMULACRA 2 60: -2.1% - SSIMULACRA 2 70: -3.1% - SSIMULACRA 2 80: -3.2% - SSIMULACRA 2 90: -3.2% Change-Id: I7d88835ac993a11f74d7644be501ed0b56d7ba9a
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 01e4bf7..3e49001 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -784,7 +784,25 @@ } static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x, - RD_STATS *rd_cost) { + RD_STATS *rd_cost, bool is_inter_pred) { + if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ || + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) && + is_inter_pred) { + // Tune IQ and SSIMULACRA2 are often used to encode layered AVIFs, where + // keyframes can be encoded at a lower quality (i.e. higher QP) than + // inter-coded frames. + // In this case, libaom tends to underestimate the true RD cost of inter + // prediction candidates, causing encoded file size to increase without a + // corresponding increase in quality. + // To compensate for this effect, make inter block candidates appear more + // expensive to the encoder to slightly bias toward intra prediction. + // Doing this increases overall compression efficiency, while still allowing + // the encoder to pick inter prediction when it's beneficial. + rd_cost->dist += rd_cost->dist >> 3; + rd_cost->rdcost += rd_cost->rdcost >> 3; + return; + } + if (cpi->oxcf.algo_cfg.sharpness != 3) return; if (frame_is_kf_gf_arf(cpi)) return; @@ -807,7 +825,14 @@ } static void adjust_cost(const AV1_COMP *cpi, const MACROBLOCK *x, - int64_t *rd_cost) { + int64_t *rd_cost, bool is_inter_pred) { + if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ || + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) && + is_inter_pred) { + *rd_cost += *rd_cost >> 3; + return; + } + if (cpi->oxcf.algo_cfg.sharpness != 3) return; if (frame_is_kf_gf_arf(cpi)) return; @@ -1851,9 +1876,13 @@ } } - adjust_cost(cpi, x, &this_yrd); - adjust_rdcost(cpi, x, rd_stats); - adjust_rdcost(cpi, x, rd_stats_y); + if (this_yrd < INT64_MAX) { + adjust_cost(cpi, x, &this_yrd, /*is_inter_pred=*/true); + } + adjust_rdcost(cpi, x, rd_stats, /*is_inter_pred=*/true); + if (rd_stats_y->rdcost < INT64_MAX) { + adjust_rdcost(cpi, x, rd_stats_y, /*is_inter_pred=*/true); + } const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); if (mode_index == 0) { @@ -5787,7 +5816,7 @@ &best_model_rd, top_intra_model_rd); if (intra_rd_y < INT64_MAX) { - adjust_cost(cpi, x, &intra_rd_y); + adjust_cost(cpi, x, &intra_rd_y, /*is_inter_pred=*/false); } if (is_luma_result_valid && intra_rd_y < yrd_threshold) { @@ -5871,7 +5900,7 @@ intra_rd_stats.rdcost = this_rd; - adjust_rdcost(cpi, x, &intra_rd_stats); + adjust_rdcost(cpi, x, &intra_rd_stats, /*is_inter_pred=*/false); // Collect mode stats for multiwinner mode processing const int txfm_search_done = 1; @@ -6343,8 +6372,8 @@ ref_frame_rd[ref_frame] = this_rd; } - adjust_cost(cpi, x, &this_rd); - adjust_rdcost(cpi, x, &rd_stats); + adjust_cost(cpi, x, &this_rd, /*is_inter_pred=*/true); + adjust_rdcost(cpi, x, &rd_stats, /*is_inter_pred=*/true); // Did this mode help, i.e., is it the new best mode if (this_rd < search_state.best_rd) {
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c index 876ed33..7c14eab 100644 --- a/av1/encoder/speed_features.c +++ b/av1/encoder/speed_features.c
@@ -1062,6 +1062,11 @@ if (cpi->oxcf.enable_low_complexity_decode) set_good_speed_features_lc_dec_framesize_dependent(cpi, sf, speed); + + if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ || + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) { + sf->intra_sf.skip_intra_in_interframe = 0; + } } static void set_good_speed_features_framesize_independent( @@ -1475,6 +1480,32 @@ sf->tx_sf.adaptive_txb_search_level = 0; sf->tx_sf.tx_type_search.use_skip_flag_prediction = 0; } + + // Set speed features for the IQ and SSIMULACRA2 tuning modes + // Layered image encoding has different requirements than regular video + // coding. + // Mainly, most of these speed features undo an implicit assumption that + // keyframes are encoded at a better quality than inter-coded frames. + // This means the encoder needs to be more thorough at considering and + // performing RDO on intra block candidates vs. inter block candidates for + // the best compression efficiency. + // Finally, enabling certain coding tools are beneficial for layered image + // encoding in general. + if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ || + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) { + sf->intra_sf.skip_intra_in_interframe = 0; + sf->inter_sf.inter_mode_rd_model_estimation = 0; + sf->mv_sf.use_intrabc = 1; + + // Don't prune intra candidates too aggressively, as it can cause more + // expensive inter candidates to be chosen instead + if (sf->intra_sf.intra_pruning_with_hog > 3) { + sf->intra_sf.intra_pruning_with_hog = 3; + } + if (sf->intra_sf.chroma_intra_pruning_with_hog > 3) { + sf->intra_sf.chroma_intra_pruning_with_hog = 3; + } + } } static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi, @@ -2163,6 +2194,11 @@ cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) { sf->winner_mode_sf.dc_blk_pred_level = 3; } + + if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ || + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) { + sf->intra_sf.skip_intra_in_interframe = 0; + } } static inline void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {