Tweak 4:4:4 chroma QM levels with `tune=ssimulacra2`

At the same level, chroma QMs are flatter in 4:4:4 chroma subsampling
mode relative to 4:2:0, so this commit introduces a formula that picks
a chroma QM level for a given frame qindex when encoding 4:4:4 content.

Approximate BD-Rate gains over no chroma QM adjustment - cpu-used=6
(Daala's subset1):
- SSIMULACRA2 60: -0.3%
- SSIMULACRA2 70: -0.6%
- SSIMULACRA2 80: -0.5%
- SSIMULACRA2 90: -0.4%

Bug: aomedia:375221136
Change-Id: I358e2701e327520583107aa860eae80fe32f2d1a
diff --git a/aom/aomcx.h b/aom/aomcx.h
index 26e3697..2534349 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1672,6 +1672,8 @@
  * Setting the tuning option to AOM_TUNE_SSIMULACRA2 causes the following
  * options to be set (expressed as command-line options):
  *   * --enable-qm=1
+ *   * --qm-min=2
+ *   * --qm-max=10
  *   * --sharpness=7
  *   * --dist-metric=qm-psnr
  *   * --enable-cdef=3
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 5d951da..6fd6edc 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -1790,8 +1790,11 @@
   if (extra_cfg->tuning == AOM_TUNE_SSIMULACRA2) {
     if (ctx->cfg.g_usage != AOM_USAGE_ALL_INTRA) return AOM_CODEC_INCAPABLE;
     // Enable QMs as they've been found to be beneficial for images, when used
-    // with an alternative QM formula (see aom_get_qmlevel_allintra()).
+    // with alternative QM formulas (see aom_get_qmlevel_allintra() and
+    // aom_get_qmlevel_444_chroma_ssimulacra2()).
     extra_cfg->enable_qm = 1;
+    extra_cfg->qm_min = QM_FIRST_SSIMULACRA2;
+    extra_cfg->qm_max = QM_LAST_SSIMULACRA2;
     // We can turn on loop filter sharpness, as frames do not have to serve as
     // references to others.
     extra_cfg->sharpness = 7;
diff --git a/av1/common/quant_common.h b/av1/common/quant_common.h
index 6d2fb97..ed4f23a 100644
--- a/av1/common/quant_common.h
+++ b/av1/common/quant_common.h
@@ -38,6 +38,8 @@
 #define DEFAULT_QM_LAST 9
 #define DEFAULT_QM_FIRST_ALLINTRA 4
 #define DEFAULT_QM_LAST_ALLINTRA 10
+#define QM_FIRST_SSIMULACRA2 2
+#define QM_LAST_SSIMULACRA2 10
 #define LOSSLESS_Q_STEP 4  // this should equal to dc/ac_qlookup_QTX[0]
 
 struct AV1Common;
@@ -60,11 +62,18 @@
   return first + (qindex * (last + 1 - first)) / QINDEX_RANGE;
 }
 
-// QM levels tuned for allintra mode (including still images)
+// QM levels tuned for all intra mode (including still images)
 // This formula was empirically derived by encoding the CID22 validation
 // testset for each QP/QM tuple, and building a convex hull that
-// maximizes SSIMU2 scores, and a final subjective visual quality pass
+// maximizes SSIMULACRA 2 scores, and a final subjective visual quality pass
 // as a sanity check. This is a decreasing function in qindex.
+// There are a total of 16 luma QM levels, and the higher the level, the
+// flatter these QMs are.
+// QM level 15 is a completely-flat matrix and level 0 is the steepest.
+// This formula only uses levels 4 through 10, unless qm-min and qm-max are
+// both set below or above this range.
+// For more information on quantization matrices, please refer to
+// https://arxiv.org/pdf/2008.06091, section F.
 static inline int aom_get_qmlevel_allintra(int qindex, int first, int last) {
   int qm_level = 0;
 
@@ -87,6 +96,45 @@
   return clamp(qm_level, first, last);
 }
 
+// Chroma QM levels for 4:4:4 subsampling tuned for SSIMULACRA 2 tune
+// This formula was empirically derived by encoding Daala's subset1 validation
+// testset for each QP/QM tuple, and building a convex hull that maximizes
+// SSIMULACRA 2 scores, and a final subjective visual quality pass as a sanity
+// check. This is a decreasing function in qindex.
+// Like with luma QMs, there are a total of 16 chroma QM levels, and the higher
+// the level, the flatter these QMs are.
+// QM level 15 is a completely-flat matrix and level 0 is the steepest.
+// This formula only uses levels 2 through 10, unless qm-min and qm-max are
+// both set below or above this range.
+// For more information on quantization matrices, please refer to
+// https://arxiv.org/pdf/2008.06091, section F.
+static inline int aom_get_qmlevel_444_chroma_ssimulacra2(int qindex, int first,
+                                                         int last) {
+  int chroma_qm_level = 0;
+
+  if (qindex <= 12) {
+    chroma_qm_level = 10;
+  } else if (qindex <= 24) {
+    chroma_qm_level = 9;
+  } else if (qindex <= 32) {
+    chroma_qm_level = 8;
+  } else if (qindex <= 36) {
+    chroma_qm_level = 7;
+  } else if (qindex <= 44) {
+    chroma_qm_level = 6;
+  } else if (qindex <= 48) {
+    chroma_qm_level = 5;
+  } else if (qindex <= 56) {
+    chroma_qm_level = 4;
+  } else if (qindex <= 88) {
+    chroma_qm_level = 3;
+  } else {
+    chroma_qm_level = 2;
+  }
+
+  return clamp(chroma_qm_level, first, last);
+}
+
 // Initialize all global quant/dequant matrices.
 void av1_qm_init(struct CommonQuantParams *quant_params, int num_planes);
 
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index c4fa7df..f948405 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -943,26 +943,42 @@
     }
   }
 
-  // Select the best QM formula based on whether we're encoding in allintra mode
-  // or any other mode
-  int (*get_qmlevel)(int, int, int);
+  // Select the best luma and chroma QM formulas based on encoding mode and
+  // tuning
+  int (*get_luma_qmlevel)(int, int, int);
+  int (*get_chroma_qmlevel)(int, int, int);
 
   if (is_allintra) {
-    get_qmlevel = aom_get_qmlevel_allintra;
+    get_luma_qmlevel = aom_get_qmlevel_allintra;
   } else {
-    get_qmlevel = aom_get_qmlevel;
+    get_luma_qmlevel = aom_get_qmlevel;
+  }
+
+  if (is_allintra) {
+    if (tuning == AOM_TUNE_SSIMULACRA2 && cm->seq_params->subsampling_x == 0 &&
+        cm->seq_params->subsampling_y == 0) {
+      // 4:4:4 subsampling mode has 4x the number of chroma coefficients
+      // compared to 4:2:0 (2x on each dimension). This means the encoder should
+      // use lower chroma QM levels that more closely match the scaling of an
+      // equivalent 4:2:0 chroma QM.
+      get_chroma_qmlevel = aom_get_qmlevel_444_chroma_ssimulacra2;
+    } else {
+      get_chroma_qmlevel = aom_get_qmlevel_allintra;
+    }
+  } else {
+    get_chroma_qmlevel = aom_get_qmlevel;
   }
 
   quant_params->qmatrix_level_y =
-      get_qmlevel(quant_params->base_qindex, min_qmlevel, max_qmlevel);
+      get_luma_qmlevel(quant_params->base_qindex, min_qmlevel, max_qmlevel);
   quant_params->qmatrix_level_u =
-      get_qmlevel(quant_params->base_qindex + quant_params->u_ac_delta_q,
-                  min_qmlevel, max_qmlevel);
+      get_chroma_qmlevel(quant_params->base_qindex + quant_params->u_ac_delta_q,
+                         min_qmlevel, max_qmlevel);
 
   if (cm->seq_params->separate_uv_delta_q) {
-    quant_params->qmatrix_level_v =
-        get_qmlevel(quant_params->base_qindex + quant_params->v_ac_delta_q,
-                    min_qmlevel, max_qmlevel);
+    quant_params->qmatrix_level_v = get_chroma_qmlevel(
+        quant_params->base_qindex + quant_params->v_ac_delta_q, min_qmlevel,
+        max_qmlevel);
   } else {
     quant_params->qmatrix_level_v = quant_params->qmatrix_level_u;
   }