Tune the MSE/Butteraugli model

The model was adjusted based on the collected data compressed at
different QP levels. Big improvements on the test files with loss
previously. Currently only 2 out of 250+ test images report Butteraugli
BD-rate loss.

BD-rate reduction with baseline tune=psnr:

Google HDR+ 153 images JPG test set
psnr    ssim     butteraugli
5.652%  -3.177%  -19.881%

Kodak 24 images PNG test set
psnr    ssim     butteraugli
3.088%  -1.620%  -15.013%

Note: Results are based on Butteraugli from JPEGXL v0.3

BUG=aomedia:2965

Change-Id: Ibefbb5aa5289a824962425d716670e535af0b26b
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index e0c07d3..ed20447 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1075,7 +1075,7 @@
 
 #if CONFIG_TUNE_BUTTERAUGLI
   {
-    const int bsize = BLOCK_8X8;
+    const int bsize = BLOCK_16X16;
     const int w = mi_size_wide[bsize];
     const int h = mi_size_high[bsize];
     const int num_cols = (mi_params->mi_cols + w - 1) / w;
diff --git a/av1/encoder/tune_butteraugli.c b/av1/encoder/tune_butteraugli.c
index dbd770f..9468eea 100644
--- a/av1/encoder/tune_butteraugli.c
+++ b/av1/encoder/tune_butteraugli.c
@@ -50,7 +50,7 @@
   CHECK_MEM_ERROR(cm, diffmap, aom_malloc(width * height * sizeof(*diffmap)));
   aom_calc_butteraugli(source, recon, bit_depth, diffmap);
 
-  const int block_size = BLOCK_8X8;
+  const int block_size = BLOCK_16X16;
   const int num_mi_w = mi_size_wide[block_size];
   const int num_mi_h = mi_size_high[block_size];
   const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
@@ -72,7 +72,7 @@
       // Loop through each pixel.
       for (int y = y_start; y < y_start + block_h && y < height; y++) {
         for (int x = x_start; x < x_start + block_w && x < width; x++) {
-          dbutteraugli += powf(diffmap[y * width + x], 12.0f);
+          dbutteraugli += powf(diffmap[y * width + x], 6.0f);
           float px_diff = source->y_buffer[y * source->y_stride + x] -
                           recon->y_buffer[y * recon->y_stride + x];
           dmse += px_diff * px_diff;
@@ -89,16 +89,20 @@
         }
       }
 
-      dbutteraugli = powf(dbutteraugli, 1.0f / 12.0f);
+      dbutteraugli = powf(dbutteraugli, 1.0f / 6.0f);
       dmse = dmse / (2.0f * (float)block_w * (float)block_h);
-      const double K = 0.4;
+      // 'K' is used to balance the rate-distortion distribution between PSNR
+      // and Butteraugli.
+      const double K = 0.28;
       const float eps = 0.01f;
       double weight;
       if (dbutteraugli < eps || dmse < eps) {
         weight = -1.0;
       } else {
         blk_count += 1.0;
-        weight = dmse / dbutteraugli + K;
+        weight = dmse / dbutteraugli;
+        weight = AOMMIN(weight, 4.0);
+        weight += K;
         log_sum += log(weight);
       }
       cpi->butteraugli_info.rdmult_scaling_factors[index] = weight;
@@ -132,7 +136,7 @@
   }
   const AV1_COMMON *const cm = &cpi->common;
 
-  const int bsize_base = BLOCK_8X8;
+  const int bsize_base = BLOCK_16X16;
   const int num_mi_w = mi_size_wide[bsize_base];
   const int num_mi_h = mi_size_high[bsize_base];
   const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;