Add tune=vmaf_neg mode

Use the adaptive video pre-processing method together with VMAF based RDO
multiplier scaling and QP adjustment.

               avg_psnr   ssim    vmaf   vmaf_neg
lowres_bd10    12.804    5.381  -20.251   -8.154
midres_bd10    11.517    6.221  -21.000   -7.722
midres         12.095    8.089  -16.744   -6.106
hdres          11.036    7.410  -20.237   -7.762

1.8% VMAF_NEG gains comparing to the method using fixed pre-processing
strength.

Change-Id: Ie4bc57ea70cab5eb7622cbefec5cc877e150757a
diff --git a/aom/aomcx.h b/aom/aomcx.h
index 3ab1dd2..d789dd2 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1374,7 +1374,8 @@
   /* NOTE: enums 2 and 3 unused */
   AOM_TUNE_VMAF_WITH_PREPROCESSING = 4,
   AOM_TUNE_VMAF_WITHOUT_PREPROCESSING = 5,
-  AOM_TUNE_VMAF_MAX_GAIN = 6
+  AOM_TUNE_VMAF_MAX_GAIN = 6,
+  AOM_TUNE_VMAF_NEG_MAX_GAIN = 7,
 } aom_tune_metric;
 
 #define AOM_MAX_LAYERS 32   /**< Max number of layers */
diff --git a/aom_dsp/vmaf.h b/aom_dsp/vmaf.h
index 246e9b6..775c9cb 100644
--- a/aom_dsp/vmaf.h
+++ b/aom_dsp/vmaf.h
@@ -34,6 +34,9 @@
   // Stores the filter strength of the last frame.
   double last_frame_unsharp_amount;
 
+  // Stores the base unsharp amount in video pre-processing.
+  double best_unsharp_amount;
+
   // Stores the origial qindex before scaling.
   int original_qindex;
 
diff --git a/apps/aomenc.c b/apps/aomenc.c
index 9814190..82c1f73 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -401,6 +401,7 @@
   { "vmaf_with_preprocessing", AOM_TUNE_VMAF_WITH_PREPROCESSING },
   { "vmaf_without_preprocessing", AOM_TUNE_VMAF_WITHOUT_PREPROCESSING },
   { "vmaf", AOM_TUNE_VMAF_MAX_GAIN },
+  { "vmaf_neg", AOM_TUNE_VMAF_NEG_MAX_GAIN },
   { NULL, 0 }
 };
 static const arg_def_t tune_metric =
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 6463ad6..70112e4 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -518,17 +518,25 @@
         "VBR mode.");
 
 #if !CONFIG_TUNE_VMAF
-  if (extra_cfg->tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
-      extra_cfg->tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
-      extra_cfg->tuning == AOM_TUNE_VMAF_MAX_GAIN) {
+  if (extra_cfg->tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+      extra_cfg->tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN) {
     ERROR(
         "This error may be related to the wrong configuration options: try to "
         "set -DCONFIG_TUNE_VMAF=1 at the time CMake is run.");
   }
 #endif
 
+#if !CONFIG_USE_VMAF_RC
+  if (extra_cfg->tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
+    ERROR(
+        "This error may be related to the wrong configuration options: try to "
+        "set -DCONFIG_TUNE_VMAF=1 and -DCONFIG_USE_VMAF_RC=1 at the time CMake"
+        " is run.");
+  }
+#endif
+
 #if CONFIG_TUNE_VMAF
-  RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_VMAF_MAX_GAIN);
+  RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_VMAF_NEG_MAX_GAIN);
 #else
   RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_SSIM);
 #endif
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index e67fde0..6583998 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -34,6 +34,10 @@
 #include "av1/encoder/temporal_filter.h"
 #include "av1/encoder/tpl_model.h"
 
+#if CONFIG_TUNE_VMAF
+#include "av1/encoder/tune_vmaf.h"
+#endif
+
 #define TEMPORAL_FILTER_KEY_FRAME (CONFIG_REALTIME_ONLY ? 0 : 1)
 
 static INLINE void set_refresh_frame_flags(
@@ -1373,6 +1377,14 @@
   }
 #endif  // !CONFIG_REALTIME_ONLY
 
+#if CONFIG_TUNE_VMAF
+  if (!is_stat_generation_stage(cpi) &&
+      (oxcf->tune_cfg.tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+       oxcf->tune_cfg.tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN)) {
+    av1_update_vmaf_curve(cpi, cpi->source, &cpi->common.cur_frame->buf);
+  }
+#endif
+
   if (!is_stat_generation_stage(cpi)) {
     update_fb_of_context_type(cpi, &frame_params, cpi->fb_of_context_type);
     set_additional_frame_flags(cm, frame_flags);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 8284b96..c46c500 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1040,7 +1040,12 @@
                     aom_calloc(num_rows * num_cols,
                                sizeof(*cpi->vmaf_info.rdmult_scaling_factors)));
     cpi->vmaf_info.last_frame_unsharp_amount = 0.0;
+    cpi->vmaf_info.best_unsharp_amount = 0.0;
     cpi->vmaf_info.original_qindex = -1;
+
+#if CONFIG_USE_VMAF_RC
+    cpi->vmaf_info.vmaf_model = NULL;
+#endif
   }
 #endif
 
@@ -2366,6 +2371,12 @@
   // Determine whether to use screen content tools using two fast encoding.
   av1_determine_sc_tools_with_encoding(cpi, q);
 
+#if CONFIG_USE_VMAF_RC
+  if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
+    av1_vmaf_neg_preprocessing(cpi, cpi->unscaled_source);
+  }
+#endif
+
   // Loop variables
   int loop = 0;
   int loop_count = 0;
@@ -2402,9 +2413,8 @@
       av1_scale_references(cpi, EIGHTTAP_REGULAR, 0, 0);
     }
 #if CONFIG_TUNE_VMAF
-    if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
-        oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
-        oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN) {
+    if (oxcf->tune_cfg.tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+        oxcf->tune_cfg.tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN) {
       cpi->vmaf_info.original_qindex = q;
       q = av1_get_vmaf_base_qindex(cpi, q);
     }
@@ -2507,9 +2517,8 @@
     }
 
 #if CONFIG_TUNE_VMAF
-    if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
-        oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
-        oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN) {
+    if (oxcf->tune_cfg.tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+        oxcf->tune_cfg.tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN) {
       q = cpi->vmaf_info.original_qindex;
     }
 #endif
@@ -2982,7 +2991,8 @@
 
 #if CONFIG_TUNE_VMAF
   if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
-      oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN) {
+      oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN ||
+      oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
     av1_set_mb_vmaf_rdmult_scaling(cpi);
   }
 #endif
@@ -3520,15 +3530,6 @@
     }
   }
 
-#if CONFIG_TUNE_VMAF
-  if (!is_stat_generation_stage(cpi) &&
-      (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
-       oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
-       oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN)) {
-    av1_update_vmaf_curve(cpi, cpi->source, &cpi->common.cur_frame->buf);
-  }
-#endif
-
   if (cpi->level_params.keep_level_stats && !is_stat_generation_stage(cpi)) {
     // Initialize level info. at the beginning of each sequence.
     if (cm->current_frame.frame_type == KEY_FRAME && !cpi->no_show_fwd_kf) {
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index 95de618..032c40d 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -432,7 +432,8 @@
   }
 #if CONFIG_TUNE_VMAF
   if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
-      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN) {
+      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN ||
+      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
     av1_set_vmaf_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
   }
 #endif
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 12d0b41..d974b49 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -3047,9 +3047,8 @@
       rc->active_best_quality[i] = cpi->common.quant_params.base_qindex;
 #if CONFIG_TUNE_VMAF
       if (cpi->vmaf_info.original_qindex != -1 &&
-          (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
-           cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
-           cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN)) {
+          (cpi->oxcf.tune_cfg.tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+           cpi->oxcf.tune_cfg.tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN)) {
         rc->active_best_quality[i] = cpi->vmaf_info.original_qindex;
       }
 #endif
diff --git a/av1/encoder/tune_vmaf.c b/av1/encoder/tune_vmaf.c
index 3481a90..62c29d4 100644
--- a/av1/encoder/tune_vmaf.c
+++ b/av1/encoder/tune_vmaf.c
@@ -237,6 +237,30 @@
   return unsharp_amount;
 }
 
+#if CONFIG_USE_VMAF_RC
+void av1_vmaf_neg_preprocessing(AV1_COMP *const cpi,
+                                YV12_BUFFER_CONFIG *const source) {
+  aom_clear_system_state();
+  const AV1_COMMON *const cm = &cpi->common;
+  const int bit_depth = cpi->td.mb.e_mbd.bd;
+  const int width = source->y_width;
+  const int height = source->y_height;
+  const double best_frame_unsharp_amount = cpi->vmaf_info.best_unsharp_amount;
+  if (best_frame_unsharp_amount == 0.0) return;
+
+  YV12_BUFFER_CONFIG blurred;
+  memset(&blurred, 0, sizeof(blurred));
+  aom_alloc_frame_buffer(
+      &blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+      cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+
+  gaussian_blur(bit_depth, source, &blurred);
+  unsharp(cpi, source, &blurred, source, best_frame_unsharp_amount);
+  aom_free_frame_buffer(&blurred);
+  aom_clear_system_state();
+}
+#endif
+
 void av1_vmaf_frame_preprocessing(AV1_COMP *const cpi,
                                   YV12_BUFFER_CONFIG *const source) {
   aom_clear_system_state();
@@ -793,6 +817,68 @@
   return qindex;
 }
 
+#if CONFIG_USE_VMAF_RC
+static double cal_approx_score(const AV1_COMP *const cpi,
+                               YV12_BUFFER_CONFIG *const ref,
+                               YV12_BUFFER_CONFIG *const sharpened) {
+  double score;
+  const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
+  aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, ref, sharpened, bit_depth, 1,
+                   &score);
+  return score;
+}
+
+static double find_best_frame_unsharp_amount_loop_neg(
+    const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const ref,
+    YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
+    YV12_BUFFER_CONFIG *const sharpened, double best_score,
+    const double unsharp_amount_start, const double step_size,
+    const int max_loop_count, const double max_amount) {
+  const double min_amount = 0.0;
+  int loop_count = 0;
+  double approx_score = best_score;
+  double unsharp_amount = unsharp_amount_start;
+  do {
+    best_score = approx_score;
+    unsharp_amount += step_size;
+    if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
+    unsharp(cpi, source, blurred, sharpened, unsharp_amount);
+    approx_score = cal_approx_score(cpi, ref, sharpened);
+
+    loop_count++;
+  } while (approx_score > best_score && loop_count < max_loop_count);
+  unsharp_amount =
+      approx_score > best_score ? unsharp_amount : unsharp_amount - step_size;
+  return AOMMIN(max_amount, AOMMAX(unsharp_amount, min_amount));
+}
+
+static double find_best_frame_unsharp_amount_neg(
+    const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const ref,
+    YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
+    const double unsharp_amount_start, const double step_size,
+    const int max_loop_count, const double max_filter_amount) {
+  const AV1_COMMON *const cm = &cpi->common;
+  const int width = source->y_width;
+  const int height = source->y_height;
+
+  double best_score = 0.0;
+  aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, ref, source, cpi->td.mb.e_mbd.bd,
+                   1, &best_score);
+  YV12_BUFFER_CONFIG sharpened;
+  memset(&sharpened, 0, sizeof(sharpened));
+  aom_alloc_frame_buffer(
+      &sharpened, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+      cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+
+  const double unsharp_amount = find_best_frame_unsharp_amount_loop_neg(
+      cpi, ref, source, blurred, &sharpened, best_score, unsharp_amount_start,
+      step_size, max_loop_count, max_filter_amount);
+
+  aom_free_frame_buffer(&sharpened);
+  return unsharp_amount;
+}
+#endif
+
 void av1_update_vmaf_curve(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source,
                            YV12_BUFFER_CONFIG *recon) {
   const int bit_depth = cpi->td.mb.e_mbd.bd;
@@ -806,4 +892,24 @@
   } else {
     cpi->vmaf_info.last_frame_ysse = (double)aom_get_y_sse(source, recon);
   }
+
+#if CONFIG_USE_VMAF_RC
+  if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
+    const AV1_COMMON *const cm = &cpi->common;
+    const int width = source->y_width;
+    const int height = source->y_height;
+
+    YV12_BUFFER_CONFIG blurred;
+    memset(&blurred, 0, sizeof(blurred));
+    aom_alloc_frame_buffer(
+        &blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+        cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+
+    gaussian_blur(bit_depth, recon, &blurred);
+    cpi->vmaf_info.best_unsharp_amount = find_best_frame_unsharp_amount_neg(
+        cpi, source, recon, &blurred, 0.0, 0.025, 20, 1.01);
+
+    aom_free_frame_buffer(&blurred);
+  }
+#endif
 }
diff --git a/av1/encoder/tune_vmaf.h b/av1/encoder/tune_vmaf.h
index c4cf072..c71fee5 100644
--- a/av1/encoder/tune_vmaf.h
+++ b/av1/encoder/tune_vmaf.h
@@ -19,6 +19,10 @@
 
 void av1_vmaf_frame_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source);
 
+#ifdef CONFIG_USE_VMAF_RC
+void av1_vmaf_neg_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source);
+#endif
+
 void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi);
 
 void av1_set_vmaf_rdmult(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,