Add tune=butteraugli

Kodak Image Dataset with baseline tune=psnr
avg_psnr  ovr_pnsr  ssim    butteraugli
4.295%    4.295%   -1.739%  -16.660%

TODO:
1) replace the Butteraugli interface with the official release;
2) use a test dataset with high resolution images;
3) there are 7 out of 150+ test images report BD-rate loss;
4) fine tune the parameters of the RD multiplier scaling model;
5) currently recode once is needed, but it can be much faster.

BUG=aomedia:2965

Change-Id: I746ec52e6384f116606ea481bbbab01a2960c2cf
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5b5fe05..02f20a6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -437,6 +437,35 @@
   list(APPEND AOM_APP_TARGETS ${AOM_ENCODER_EXAMPLE_TARGETS}
               ${AOM_ENCODER_TOOL_TARGETS})
 
+  if(CONFIG_TUNE_BUTTERAUGLI)
+    find_package(PkgConfig)
+    pkg_check_modules(LIBJXL REQUIRED libjxl)
+    target_link_libraries(aom PRIVATE ${LIBJXL_LDFLAGS} ${LIBJXL_LIBRARIES})
+    target_include_directories(aom PRIVATE ${LIBJXL_INCLUDE_DIRS})
+    if(LIBJXL_CFLAGS)
+      append_compiler_flag("${LIBJXL_CFLAGS}")
+    endif()
+
+    pkg_check_modules(LIBHWY REQUIRED libhwy)
+    target_link_libraries(aom PRIVATE ${LIBHWY_LDFLAGS} ${LIBHWY_LIBRARIES})
+    target_include_directories(aom PRIVATE ${LIBLIBHWY_INCLUDE_DIRS})
+    if(LIBHWY_CFLAGS)
+      append_compiler_flag("${LIBHWY_CFLAGS}")
+    endif()
+
+    pkg_check_modules(LIBJXLT REQUIRED libjxl_threads)
+    target_link_libraries(aom PRIVATE ${LIBJXLT_LDFLAGS} ${LIBJXLT_LIBRARIES})
+    target_include_directories(aom PRIVATE ${LIBJXLT_INCLUDE_DIRS})
+    if(LIBJXLT_CFLAGS)
+      append_compiler_flag("${LIBJXLT_CFLAGS}")
+    endif()
+
+    set_target_properties(aom PROPERTIES LINKER_LANGUAGE CXX)
+    if(BUILD_SHARED_LIBS)
+      set_target_properties(aom_static PROPERTIES LINKER_LANGUAGE CXX)
+    endif()
+  endif()
+
   if(CONFIG_USE_VMAF_RC AND NOT CONFIG_TUNE_VMAF)
     message(FATAL_ERROR "Turn on CONFIG_TUNE_VMAF to use CONFIG_USE_VMAF_RC.")
   endif()
diff --git a/aom/aomcx.h b/aom/aomcx.h
index bf3212a..7f0f99a 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1410,6 +1410,7 @@
   AOM_TUNE_VMAF_WITHOUT_PREPROCESSING = 5,
   AOM_TUNE_VMAF_MAX_GAIN = 6,
   AOM_TUNE_VMAF_NEG_MAX_GAIN = 7,
+  AOM_TUNE_BUTTERAUGLI = 8,
 } aom_tune_metric;
 
 #define AOM_MAX_LAYERS 32   /**< Max number of layers */
diff --git a/aom_dsp/aom_dsp.cmake b/aom_dsp/aom_dsp.cmake
index fa58f85..cf7072d 100644
--- a/aom_dsp/aom_dsp.cmake
+++ b/aom_dsp/aom_dsp.cmake
@@ -319,6 +319,11 @@
     list(APPEND AOM_DSP_ENCODER_SOURCES "${AOM_ROOT}/aom_dsp/vmaf.c"
                 "${AOM_ROOT}/aom_dsp/vmaf.h")
   endif()
+
+  if(CONFIG_TUNE_BUTTERAUGLI)
+    list(APPEND AOM_DSP_ENCODER_SOURCES "${AOM_ROOT}/aom_dsp/butteraugli.c"
+                "${AOM_ROOT}/aom_dsp/butteraugli.h")
+  endif()
 endif()
 
 # Creates aom_dsp build targets. Must not be called until after libaom target
diff --git a/aom_dsp/butteraugli.c b/aom_dsp/butteraugli.c
new file mode 100644
index 0000000..7ba38a9
--- /dev/null
+++ b/aom_dsp/butteraugli.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+// TODO(sdeng): update the jxl api.
+#include <assert.h>
+#include <jxl/encode.h>
+
+#include "aom_dsp/butteraugli.h"
+
+void aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
+                          const YV12_BUFFER_CONFIG *distorted, int bit_depth,
+                          float *dist_map) {
+  assert(bit_depth == 8);
+  assert(source->y_width == source->uv_width * 2);
+  uint8_t *src_y = source->y_buffer;
+  uint8_t *src_u = source->u_buffer;
+  uint8_t *src_v = source->v_buffer;
+  uint8_t *distorted_y = distorted->y_buffer;
+  uint8_t *distorted_u = distorted->u_buffer;
+  uint8_t *distorted_v = distorted->v_buffer;
+  const int width = source->y_width;
+  const int height = source->y_height;
+  double butteraugli_diffvalue;
+  JxlCalcButteraugliYuv420(width, height, src_y, source->y_stride, src_u, src_v,
+                           source->uv_stride, distorted_y, distorted->y_stride,
+                           distorted_u, distorted_v, distorted->uv_stride,
+                           dist_map, &butteraugli_diffvalue);
+  (void)bit_depth;
+}
diff --git a/aom_dsp/butteraugli.h b/aom_dsp/butteraugli.h
new file mode 100644
index 0000000..95314ce
--- /dev/null
+++ b/aom_dsp/butteraugli.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AOM_DSP_BUTTERAUGLI_H_
+#define AOM_AOM_DSP_BUTTERAUGLI_H_
+
+#include "aom_scale/yv12config.h"
+
+void aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
+                          const YV12_BUFFER_CONFIG *distorted, int bit_depth,
+                          float *dist_map);
+
+#endif  // AOM_AOM_DSP_BUTTERAUGLI_H_
diff --git a/av1/arg_defs.c b/av1/arg_defs.c
index dcf9afb..8d5727a 100644
--- a/av1/arg_defs.c
+++ b/av1/arg_defs.c
@@ -46,6 +46,7 @@
   { "vmaf_without_preprocessing", AOM_TUNE_VMAF_WITHOUT_PREPROCESSING },
   { "vmaf", AOM_TUNE_VMAF_MAX_GAIN },
   { "vmaf_neg", AOM_TUNE_VMAF_NEG_MAX_GAIN },
+  { "butteraugli", AOM_TUNE_BUTTERAUGLI },
   { NULL, 0 }
 };
 
diff --git a/av1/av1.cmake b/av1/av1.cmake
index b48c614..3687459 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -269,6 +269,12 @@
               "${AOM_ROOT}/av1/encoder/tune_vmaf.h")
 endif()
 
+if(CONFIG_TUNE_BUTTERAUGLI)
+  list(APPEND AOM_AV1_ENCODER_SOURCES
+              "${AOM_ROOT}/av1/encoder/tune_butteraugli.c"
+              "${AOM_ROOT}/av1/encoder/tune_butteraugli.h")
+endif()
+
 if(CONFIG_OPTICAL_FLOW_API)
   list(APPEND AOM_AV1_ENCODER_SOURCES "${AOM_ROOT}/av1/encoder/optical_flow.c"
               "${AOM_ROOT}/av1/encoder/optical_flow.h")
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 9c098b1..fab9092 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -523,6 +523,14 @@
         "VBR corpus complexity is supported only in the case of single pass "
         "VBR mode.");
 
+#if !CONFIG_TUNE_BUTTERAUGLI
+  if (extra_cfg->tuning == AOM_TUNE_BUTTERAUGLI) {
+    ERROR(
+        "This error may be related to the wrong configuration options: try to "
+        "set -DCONFIG_TUNE_BUTTERAUGLI=1 at the time CMake is run.");
+  }
+#endif
+
 #if !CONFIG_TUNE_VMAF
   if (extra_cfg->tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
       extra_cfg->tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN) {
@@ -541,11 +549,7 @@
   }
 #endif
 
-#if CONFIG_TUNE_VMAF
-  RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_VMAF_NEG_MAX_GAIN);
-#else
-  RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_SSIM);
-#endif
+  RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_BUTTERAUGLI);
 
   RANGE_CHECK(extra_cfg, timing_info_type, AOM_TIMING_UNSPECIFIED,
               AOM_TIMING_DEC_MODEL);
@@ -613,6 +617,12 @@
   if (img->d_w != ctx->cfg.g_w || img->d_h != ctx->cfg.g_h)
     ERROR("Image size must match encoder init configuration size");
 
+#if CONFIG_TUNE_BUTTERAUGLI
+  if (img->x_chroma_shift != 1 || img->y_chroma_shift != 1) {
+    ERROR("Only I420 images supported in tune=butteraugli mode.");
+  }
+#endif
+
   return AOM_CODEC_OK;
 }
 
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index dbd3a8b..e0c07d3 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1073,6 +1073,23 @@
   }
 #endif
 
+#if CONFIG_TUNE_BUTTERAUGLI
+  {
+    const int bsize = BLOCK_8X8;
+    const int w = mi_size_wide[bsize];
+    const int h = mi_size_high[bsize];
+    const int num_cols = (mi_params->mi_cols + w - 1) / w;
+    const int num_rows = (mi_params->mi_rows + h - 1) / h;
+    CHECK_MEM_ERROR(
+        cm, cpi->butteraugli_info.rdmult_scaling_factors,
+        aom_malloc(num_rows * num_cols *
+                   sizeof(*cpi->butteraugli_info.rdmult_scaling_factors)));
+    memset(&cpi->butteraugli_info.recon, 0,
+           sizeof(cpi->butteraugli_info.recon));
+    cpi->butteraugli_info.recon_set = false;
+  }
+#endif
+
 #if !CONFIG_REALTIME_ONLY
   if (!is_stat_generation_stage(cpi)) {
     av1_setup_tpl_buffers(cm, &cpi->tpl_data, cpi->oxcf.gf_cfg.lag_in_frames);
@@ -2475,6 +2492,11 @@
   }
 #endif
 
+#if CONFIG_TUNE_BUTTERAUGLI
+  cpi->butteraugli_info.recon_set = false;
+  int original_q = 0;
+#endif
+
   // Loop variables
   int loop = 0;
   int loop_count = 0;
@@ -2518,6 +2540,18 @@
       q = av1_get_vmaf_base_qindex(cpi, q);
     }
 #endif
+#if CONFIG_TUNE_BUTTERAUGLI
+    if (oxcf->tune_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
+      if (loop_count == 0) {
+        original_q = q;
+        // TODO(sdeng): different q here does not make big difference. Use a
+        //  faster pass instead.
+        q = 96;
+      } else {
+        q = original_q;
+      }
+    }
+#endif
     av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
                       q_cfg->enable_chroma_deltaq);
     av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
@@ -2577,6 +2611,12 @@
       last_loop_allow_hp = cm->features.allow_high_precision_mv;
     }
 
+#if CONFIG_TUNE_BUTTERAUGLI
+    if (oxcf->tune_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
+      av1_set_mb_butteraugli_rdmult_scaling(cpi);
+    }
+#endif
+
     // transform / motion compensation build reconstruction frame
     av1_encode_frame(cpi);
 
@@ -2629,6 +2669,13 @@
                            &low_cr_seen, loop_count);
     }
 
+#if CONFIG_TUNE_BUTTERAUGLI
+    if (loop_count == 0) {
+      loop = 1;
+      av1_setup_butteraugli_recon(cpi, &cm->cur_frame->buf);
+    }
+#endif
+
     if (loop) {
       ++loop_count;
 
@@ -3134,8 +3181,9 @@
     }
   }
 
-  if (oxcf->tune_cfg.tuning == AOM_TUNE_SSIM)
+  if (oxcf->tune_cfg.tuning == AOM_TUNE_SSIM) {
     av1_set_mb_ssim_rdmult_scaling(cpi);
+  }
 
 #if CONFIG_TUNE_VMAF
   if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 677ef75..a23892e 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -63,6 +63,9 @@
 #if CONFIG_AV1_TEMPORAL_DENOISING
 #include "av1/encoder/av1_temporal_denoiser.h"
 #endif
+#if CONFIG_TUNE_BUTTERAUGLI
+#include "av1/encoder/tune_butteraugli.h"
+#endif
 
 #include "aom/internal/aom_codec_internal.h"
 #include "aom_util/aom_thread.h"
@@ -2583,6 +2586,13 @@
   TuneVMAFInfo vmaf_info;
 #endif
 
+#if CONFIG_TUNE_BUTTERAUGLI
+  /*!
+   * Parameters for Butteraugli tuning.
+   */
+  TuneButteraugliInfo butteraugli_info;
+#endif
+
   /*!
    * Indicates whether to use SVC.
    */
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 43cc74c..feed49e 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -201,6 +201,12 @@
 #endif
 #endif
 
+#if CONFIG_TUNE_BUTTERAUGLI
+  aom_free(cpi->butteraugli_info.rdmult_scaling_factors);
+  cpi->butteraugli_info.rdmult_scaling_factors = NULL;
+  aom_free_frame_buffer(&cpi->butteraugli_info.recon);
+#endif
+
   release_obmc_buffers(&cpi->td.mb.obmc_buffer);
 
   if (cpi->td.mb.mv_costs) {
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index b50bbd5..77a9fe6 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -443,6 +443,11 @@
     av1_set_vmaf_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
   }
 #endif
+#if CONFIG_TUNE_BUTTERAUGLI
+  if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
+    av1_set_butteraugli_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
+  }
+#endif
 }
 
 void av1_set_offsets_without_segment_id(const AV1_COMP *const cpi,
diff --git a/av1/encoder/tune_butteraugli.c b/av1/encoder/tune_butteraugli.c
new file mode 100644
index 0000000..dbd770f
--- /dev/null
+++ b/av1/encoder/tune_butteraugli.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+
+#include "av1/encoder/tune_butteraugli.h"
+
+#include "aom_dsp/butteraugli.h"
+#include "aom_ports/system_state.h"
+#include "av1/encoder/rdopt.h"
+#include "av1/encoder/extend.h"
+
+void av1_setup_butteraugli_recon(AV1_COMP *cpi,
+                                 const YV12_BUFFER_CONFIG *recon) {
+  YV12_BUFFER_CONFIG *const dst = &cpi->butteraugli_info.recon;
+  AV1_COMMON *const cm = &cpi->common;
+  const int width = recon->y_width;
+  const int height = recon->y_height;
+  if (dst->buffer_alloc_sz == 0) {
+    aom_alloc_frame_buffer(
+        dst, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+        cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+  }
+  av1_copy_and_extend_frame(recon, dst);
+  cpi->butteraugli_info.recon_set = true;
+}
+
+void av1_set_mb_butteraugli_rdmult_scaling(AV1_COMP *cpi) {
+  if (!cpi->butteraugli_info.recon_set) {
+    return;
+  }
+  AV1_COMMON *const cm = &cpi->common;
+  const CommonModeInfoParams *const mi_params = &cm->mi_params;
+  YV12_BUFFER_CONFIG *source = cpi->source;
+  const int width = source->y_width;
+  const int height = source->y_height;
+  const int bit_depth = cpi->td.mb.e_mbd.bd;
+
+  aom_clear_system_state();
+  const YV12_BUFFER_CONFIG *recon = &cpi->butteraugli_info.recon;
+  float *diffmap;
+  CHECK_MEM_ERROR(cm, diffmap, aom_malloc(width * height * sizeof(*diffmap)));
+  aom_calc_butteraugli(source, recon, bit_depth, diffmap);
+
+  const int block_size = BLOCK_8X8;
+  const int num_mi_w = mi_size_wide[block_size];
+  const int num_mi_h = mi_size_high[block_size];
+  const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
+  const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
+  const int block_w = num_mi_w << 2;
+  const int block_h = num_mi_h << 2;
+  double log_sum = 0.0;
+  double blk_count = 0.0;
+
+  // Loop through each block.
+  for (int row = 0; row < num_rows; ++row) {
+    for (int col = 0; col < num_cols; ++col) {
+      const int index = row * num_cols + col;
+      const int y_start = row * block_h;
+      const int x_start = col * block_w;
+      float dbutteraugli = 0.0f;
+      float dmse = 0.0f;
+
+      // Loop through each pixel.
+      for (int y = y_start; y < y_start + block_h && y < height; y++) {
+        for (int x = x_start; x < x_start + block_w && x < width; x++) {
+          dbutteraugli += powf(diffmap[y * width + x], 12.0f);
+          float px_diff = source->y_buffer[y * source->y_stride + x] -
+                          recon->y_buffer[y * recon->y_stride + x];
+          dmse += px_diff * px_diff;
+        }
+      }
+      for (int y = y_start; y < y_start + block_h && y < height; y += 2) {
+        for (int x = x_start; x < x_start + block_w && x < width; x += 2) {
+          const int px_index = y / 2 * source->uv_stride + x / 2;
+          const float px_diff_u =
+              source->u_buffer[px_index] - recon->u_buffer[px_index];
+          const float px_diff_v =
+              source->v_buffer[px_index] - recon->v_buffer[px_index];
+          dmse += px_diff_u * px_diff_u + px_diff_v * px_diff_v;
+        }
+      }
+
+      dbutteraugli = powf(dbutteraugli, 1.0f / 12.0f);
+      dmse = dmse / (2.0f * (float)block_w * (float)block_h);
+      const double K = 0.4;
+      const float eps = 0.01f;
+      double weight;
+      if (dbutteraugli < eps || dmse < eps) {
+        weight = -1.0;
+      } else {
+        blk_count += 1.0;
+        weight = dmse / dbutteraugli + K;
+        log_sum += log(weight);
+      }
+      cpi->butteraugli_info.rdmult_scaling_factors[index] = weight;
+    }
+  }
+  // Geometric average of the weights.
+  log_sum = exp(log_sum / blk_count);
+
+  for (int row = 0; row < num_rows; ++row) {
+    for (int col = 0; col < num_cols; ++col) {
+      const int index = row * num_cols + col;
+      double *weight = &cpi->butteraugli_info.rdmult_scaling_factors[index];
+      if (*weight <= 0.0) {
+        *weight = 1.0;
+      } else {
+        *weight /= log_sum;
+      }
+    }
+  }
+
+  aom_clear_system_state();
+  aom_free(diffmap);
+}
+
+void av1_set_butteraugli_rdmult(const AV1_COMP *cpi, MACROBLOCK *x,
+                                BLOCK_SIZE bsize, int mi_row, int mi_col,
+                                int *rdmult) {
+  assert(cpi->oxcf.tune_cfg.tuning == AOM_TUNE_BUTTERAUGLI);
+  if (!cpi->butteraugli_info.recon_set) {
+    return;
+  }
+  const AV1_COMMON *const cm = &cpi->common;
+
+  const int bsize_base = BLOCK_8X8;
+  const int num_mi_w = mi_size_wide[bsize_base];
+  const int num_mi_h = mi_size_high[bsize_base];
+  const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
+  const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
+  const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
+  const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
+  double num_of_mi = 0.0;
+  double geom_mean_of_scale = 0.0;
+
+  aom_clear_system_state();
+  for (int row = mi_row / num_mi_w;
+       row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
+    for (int col = mi_col / num_mi_h;
+         col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
+      const int index = row * num_cols + col;
+      geom_mean_of_scale +=
+          log(cpi->butteraugli_info.rdmult_scaling_factors[index]);
+      num_of_mi += 1.0;
+    }
+  }
+  geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi);
+
+  *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5);
+  *rdmult = AOMMAX(*rdmult, 0);
+  av1_set_error_per_bit(&x->errorperbit, *rdmult);
+  aom_clear_system_state();
+}
diff --git a/av1/encoder/tune_butteraugli.h b/av1/encoder/tune_butteraugli.h
new file mode 100644
index 0000000..b402b8c
--- /dev/null
+++ b/av1/encoder/tune_butteraugli.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AV1_ENCODER_TUNE_BUTTERAUGLI_H_
+#define AOM_AV1_ENCODER_TUNE_BUTTERAUGLI_H_
+
+#include "aom_scale/yv12config.h"
+#include "av1/common/enums.h"
+#include "av1/encoder/ratectrl.h"
+#include "av1/encoder/block.h"
+
+typedef struct {
+  // Stores the scaling factors for rdmult when tuning for Butteraugli.
+  // rdmult_scaling_factors[row * num_cols + col] stores the scaling factors for
+  // 4x4 block at (row, col).
+  double *rdmult_scaling_factors;
+  YV12_BUFFER_CONFIG recon;
+  bool recon_set;
+} TuneButteraugliInfo;
+
+typedef struct AV1_COMP AV1_COMP;
+
+void av1_set_mb_butteraugli_rdmult_scaling(AV1_COMP *cpi);
+
+void av1_set_butteraugli_rdmult(const AV1_COMP *cpi, MACROBLOCK *x,
+                                BLOCK_SIZE bsize, int mi_row, int mi_col,
+                                int *rdmult);
+
+void av1_setup_butteraugli_recon(AV1_COMP *cpi,
+                                 const YV12_BUFFER_CONFIG *recon);
+
+#endif  // AOM_AV1_ENCODER_TUNE_BUTTERAUGLI_H_
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index f1b1e57..294af80 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -116,6 +116,8 @@
 set_aom_config_var(DECODE_WIDTH_LIMIT 0 "Set limit for decode width.")
 set_aom_config_var(CONFIG_TUNE_VMAF 0 "Enable encoding tuning for VMAF.")
 set_aom_config_var(CONFIG_USE_VMAF_RC 0 "Use libvmaf_rc tune for VMAF_NEG.")
+set_aom_config_var(CONFIG_TUNE_BUTTERAUGLI 0
+                   "Enable encoding tuning for Butteraugli.")
 
 # AV1 experiment flags.
 set_aom_config_var(CONFIG_SPEED_STATS 0 "AV1 experiment flag.")