Optimize tune IQ and SSIMULACRA2 for inter-frame encoding - Adjust speed features for better image coding performance - Bias encoder toward picking intra-coded block candidates Approximate gains in good-quality mode, cpu-used=6, on two-layer progressive image encoding in 4:4:4 chroma subsampling mode (Daala's subset1): - SSIMULACRA 2 60: -2.1% - SSIMULACRA 2 70: -3.1% - SSIMULACRA 2 80: -3.2% - SSIMULACRA 2 90: -3.2% Change-Id: I7d88835ac993a11f74d7644be501ed0b56d7ba9a

diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 01e4bf7..3e49001 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -784,7 +784,25 @@
 }
 
 static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x,
-                          RD_STATS *rd_cost) {
+                          RD_STATS *rd_cost, bool is_inter_pred) {
+  if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) &&
+      is_inter_pred) {
+    // Tune IQ and SSIMULACRA2 are often used to encode layered AVIFs, where
+    // keyframes can be encoded at a lower quality (i.e. higher QP) than
+    // inter-coded frames.
+    // In this case, libaom tends to underestimate the true RD cost of inter
+    // prediction candidates, causing encoded file size to increase without a
+    // corresponding increase in quality.
+    // To compensate for this effect, make inter block candidates appear more
+    // expensive to the encoder to slightly bias toward intra prediction.
+    // Doing this increases overall compression efficiency, while still allowing
+    // the encoder to pick inter prediction when it's beneficial.
+    rd_cost->dist += rd_cost->dist >> 3;
+    rd_cost->rdcost += rd_cost->rdcost >> 3;
+    return;
+  }
+
   if (cpi->oxcf.algo_cfg.sharpness != 3) return;
 
   if (frame_is_kf_gf_arf(cpi)) return;
@@ -807,7 +825,14 @@
 }
 
 static void adjust_cost(const AV1_COMP *cpi, const MACROBLOCK *x,
-                        int64_t *rd_cost) {
+                        int64_t *rd_cost, bool is_inter_pred) {
+  if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) &&
+      is_inter_pred) {
+    *rd_cost += *rd_cost >> 3;
+    return;
+  }
+
   if (cpi->oxcf.algo_cfg.sharpness != 3) return;
 
   if (frame_is_kf_gf_arf(cpi)) return;
@@ -1851,9 +1876,13 @@
       }
     }
 
-    adjust_cost(cpi, x, &this_yrd);
-    adjust_rdcost(cpi, x, rd_stats);
-    adjust_rdcost(cpi, x, rd_stats_y);
+    if (this_yrd < INT64_MAX) {
+      adjust_cost(cpi, x, &this_yrd, /*is_inter_pred=*/true);
+    }
+    adjust_rdcost(cpi, x, rd_stats, /*is_inter_pred=*/true);
+    if (rd_stats_y->rdcost < INT64_MAX) {
+      adjust_rdcost(cpi, x, rd_stats_y, /*is_inter_pred=*/true);
+    }
 
     const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
     if (mode_index == 0) {
@@ -5787,7 +5816,7 @@
         &best_model_rd, top_intra_model_rd);
 
     if (intra_rd_y < INT64_MAX) {
-      adjust_cost(cpi, x, &intra_rd_y);
+      adjust_cost(cpi, x, &intra_rd_y, /*is_inter_pred=*/false);
     }
 
     if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
@@ -5871,7 +5900,7 @@
 
   intra_rd_stats.rdcost = this_rd;
 
-  adjust_rdcost(cpi, x, &intra_rd_stats);
+  adjust_rdcost(cpi, x, &intra_rd_stats, /*is_inter_pred=*/false);
 
   // Collect mode stats for multiwinner mode processing
   const int txfm_search_done = 1;
@@ -6343,8 +6372,8 @@
       ref_frame_rd[ref_frame] = this_rd;
     }
 
-    adjust_cost(cpi, x, &this_rd);
-    adjust_rdcost(cpi, x, &rd_stats);
+    adjust_cost(cpi, x, &this_rd, /*is_inter_pred=*/true);
+    adjust_rdcost(cpi, x, &rd_stats, /*is_inter_pred=*/true);
 
     // Did this mode help, i.e., is it the new best mode
     if (this_rd < search_state.best_rd) {

diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 876ed33..7c14eab 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c

@@ -1062,6 +1062,11 @@
 
   if (cpi->oxcf.enable_low_complexity_decode)
     set_good_speed_features_lc_dec_framesize_dependent(cpi, sf, speed);
+
+  if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
+    sf->intra_sf.skip_intra_in_interframe = 0;
+  }
 }
 
 static void set_good_speed_features_framesize_independent(
@@ -1475,6 +1480,32 @@
     sf->tx_sf.adaptive_txb_search_level = 0;
     sf->tx_sf.tx_type_search.use_skip_flag_prediction = 0;
   }
+
+  // Set speed features for the IQ and SSIMULACRA2 tuning modes
+  // Layered image encoding has different requirements than regular video
+  // coding.
+  // Mainly, most of these speed features undo an implicit assumption that
+  // keyframes are encoded at a better quality than inter-coded frames.
+  // This means the encoder needs to be more thorough at considering and
+  // performing RDO on intra block candidates vs. inter block candidates for
+  // the best compression efficiency.
+  // Finally, enabling certain coding tools are beneficial for layered image
+  // encoding in general.
+  if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
+    sf->intra_sf.skip_intra_in_interframe = 0;
+    sf->inter_sf.inter_mode_rd_model_estimation = 0;
+    sf->mv_sf.use_intrabc = 1;
+
+    // Don't prune intra candidates too aggressively, as it can cause more
+    // expensive inter candidates to be chosen instead
+    if (sf->intra_sf.intra_pruning_with_hog > 3) {
+      sf->intra_sf.intra_pruning_with_hog = 3;
+    }
+    if (sf->intra_sf.chroma_intra_pruning_with_hog > 3) {
+      sf->intra_sf.chroma_intra_pruning_with_hog = 3;
+    }
+  }
 }
 
 static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
@@ -2163,6 +2194,11 @@
       cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
     sf->winner_mode_sf.dc_blk_pred_level = 3;
   }
+
+  if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
+      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
+    sf->intra_sf.skip_intra_in_interframe = 0;
+  }
 }
 
 static inline void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {