Add tune=vmaf_neg mode
Use the adaptive video pre-processing method together with VMAF based RDO
multiplier scaling and QP adjustment.
avg_psnr ssim vmaf vmaf_neg
lowres_bd10 12.804 5.381 -20.251 -8.154
midres_bd10 11.517 6.221 -21.000 -7.722
midres 12.095 8.089 -16.744 -6.106
hdres 11.036 7.410 -20.237 -7.762
1.8% VMAF_NEG gains comparing to the method using fixed pre-processing
strength.
Change-Id: Ie4bc57ea70cab5eb7622cbefec5cc877e150757a
diff --git a/aom/aomcx.h b/aom/aomcx.h
index 3ab1dd2..d789dd2 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1374,7 +1374,8 @@
/* NOTE: enums 2 and 3 unused */
AOM_TUNE_VMAF_WITH_PREPROCESSING = 4,
AOM_TUNE_VMAF_WITHOUT_PREPROCESSING = 5,
- AOM_TUNE_VMAF_MAX_GAIN = 6
+ AOM_TUNE_VMAF_MAX_GAIN = 6,
+ AOM_TUNE_VMAF_NEG_MAX_GAIN = 7,
} aom_tune_metric;
#define AOM_MAX_LAYERS 32 /**< Max number of layers */
diff --git a/aom_dsp/vmaf.h b/aom_dsp/vmaf.h
index 246e9b6..775c9cb 100644
--- a/aom_dsp/vmaf.h
+++ b/aom_dsp/vmaf.h
@@ -34,6 +34,9 @@
// Stores the filter strength of the last frame.
double last_frame_unsharp_amount;
+ // Stores the base unsharp amount in video pre-processing.
+ double best_unsharp_amount;
+
// Stores the origial qindex before scaling.
int original_qindex;
diff --git a/apps/aomenc.c b/apps/aomenc.c
index 9814190..82c1f73 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -401,6 +401,7 @@
{ "vmaf_with_preprocessing", AOM_TUNE_VMAF_WITH_PREPROCESSING },
{ "vmaf_without_preprocessing", AOM_TUNE_VMAF_WITHOUT_PREPROCESSING },
{ "vmaf", AOM_TUNE_VMAF_MAX_GAIN },
+ { "vmaf_neg", AOM_TUNE_VMAF_NEG_MAX_GAIN },
{ NULL, 0 }
};
static const arg_def_t tune_metric =
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 6463ad6..70112e4 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -518,17 +518,25 @@
"VBR mode.");
#if !CONFIG_TUNE_VMAF
- if (extra_cfg->tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
- extra_cfg->tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
- extra_cfg->tuning == AOM_TUNE_VMAF_MAX_GAIN) {
+ if (extra_cfg->tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+ extra_cfg->tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN) {
ERROR(
"This error may be related to the wrong configuration options: try to "
"set -DCONFIG_TUNE_VMAF=1 at the time CMake is run.");
}
#endif
+#if !CONFIG_USE_VMAF_RC
+ if (extra_cfg->tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
+ ERROR(
+ "This error may be related to the wrong configuration options: try to "
+ "set -DCONFIG_TUNE_VMAF=1 and -DCONFIG_USE_VMAF_RC=1 at the time CMake"
+ " is run.");
+ }
+#endif
+
#if CONFIG_TUNE_VMAF
- RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_VMAF_MAX_GAIN);
+ RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_VMAF_NEG_MAX_GAIN);
#else
RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_SSIM);
#endif
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index e67fde0..6583998 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -34,6 +34,10 @@
#include "av1/encoder/temporal_filter.h"
#include "av1/encoder/tpl_model.h"
+#if CONFIG_TUNE_VMAF
+#include "av1/encoder/tune_vmaf.h"
+#endif
+
#define TEMPORAL_FILTER_KEY_FRAME (CONFIG_REALTIME_ONLY ? 0 : 1)
static INLINE void set_refresh_frame_flags(
@@ -1373,6 +1377,14 @@
}
#endif // !CONFIG_REALTIME_ONLY
+#if CONFIG_TUNE_VMAF
+ if (!is_stat_generation_stage(cpi) &&
+ (oxcf->tune_cfg.tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+ oxcf->tune_cfg.tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN)) {
+ av1_update_vmaf_curve(cpi, cpi->source, &cpi->common.cur_frame->buf);
+ }
+#endif
+
if (!is_stat_generation_stage(cpi)) {
update_fb_of_context_type(cpi, &frame_params, cpi->fb_of_context_type);
set_additional_frame_flags(cm, frame_flags);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 8284b96..c46c500 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1040,7 +1040,12 @@
aom_calloc(num_rows * num_cols,
sizeof(*cpi->vmaf_info.rdmult_scaling_factors)));
cpi->vmaf_info.last_frame_unsharp_amount = 0.0;
+ cpi->vmaf_info.best_unsharp_amount = 0.0;
cpi->vmaf_info.original_qindex = -1;
+
+#if CONFIG_USE_VMAF_RC
+ cpi->vmaf_info.vmaf_model = NULL;
+#endif
}
#endif
@@ -2366,6 +2371,12 @@
// Determine whether to use screen content tools using two fast encoding.
av1_determine_sc_tools_with_encoding(cpi, q);
+#if CONFIG_USE_VMAF_RC
+ if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
+ av1_vmaf_neg_preprocessing(cpi, cpi->unscaled_source);
+ }
+#endif
+
// Loop variables
int loop = 0;
int loop_count = 0;
@@ -2402,9 +2413,8 @@
av1_scale_references(cpi, EIGHTTAP_REGULAR, 0, 0);
}
#if CONFIG_TUNE_VMAF
- if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
- oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
- oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN) {
+ if (oxcf->tune_cfg.tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+ oxcf->tune_cfg.tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN) {
cpi->vmaf_info.original_qindex = q;
q = av1_get_vmaf_base_qindex(cpi, q);
}
@@ -2507,9 +2517,8 @@
}
#if CONFIG_TUNE_VMAF
- if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
- oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
- oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN) {
+ if (oxcf->tune_cfg.tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+ oxcf->tune_cfg.tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN) {
q = cpi->vmaf_info.original_qindex;
}
#endif
@@ -2982,7 +2991,8 @@
#if CONFIG_TUNE_VMAF
if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
- oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN) {
+ oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN ||
+ oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
av1_set_mb_vmaf_rdmult_scaling(cpi);
}
#endif
@@ -3520,15 +3530,6 @@
}
}
-#if CONFIG_TUNE_VMAF
- if (!is_stat_generation_stage(cpi) &&
- (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
- oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
- oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN)) {
- av1_update_vmaf_curve(cpi, cpi->source, &cpi->common.cur_frame->buf);
- }
-#endif
-
if (cpi->level_params.keep_level_stats && !is_stat_generation_stage(cpi)) {
// Initialize level info. at the beginning of each sequence.
if (cm->current_frame.frame_type == KEY_FRAME && !cpi->no_show_fwd_kf) {
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index 95de618..032c40d 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -432,7 +432,8 @@
}
#if CONFIG_TUNE_VMAF
if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
- cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN) {
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN ||
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
av1_set_vmaf_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
}
#endif
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 12d0b41..d974b49 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -3047,9 +3047,8 @@
rc->active_best_quality[i] = cpi->common.quant_params.base_qindex;
#if CONFIG_TUNE_VMAF
if (cpi->vmaf_info.original_qindex != -1 &&
- (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
- cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
- cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN)) {
+ (cpi->oxcf.tune_cfg.tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+ cpi->oxcf.tune_cfg.tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN)) {
rc->active_best_quality[i] = cpi->vmaf_info.original_qindex;
}
#endif
diff --git a/av1/encoder/tune_vmaf.c b/av1/encoder/tune_vmaf.c
index 3481a90..62c29d4 100644
--- a/av1/encoder/tune_vmaf.c
+++ b/av1/encoder/tune_vmaf.c
@@ -237,6 +237,30 @@
return unsharp_amount;
}
+#if CONFIG_USE_VMAF_RC
+void av1_vmaf_neg_preprocessing(AV1_COMP *const cpi,
+ YV12_BUFFER_CONFIG *const source) {
+ aom_clear_system_state();
+ const AV1_COMMON *const cm = &cpi->common;
+ const int bit_depth = cpi->td.mb.e_mbd.bd;
+ const int width = source->y_width;
+ const int height = source->y_height;
+ const double best_frame_unsharp_amount = cpi->vmaf_info.best_unsharp_amount;
+ if (best_frame_unsharp_amount == 0.0) return;
+
+ YV12_BUFFER_CONFIG blurred;
+ memset(&blurred, 0, sizeof(blurred));
+ aom_alloc_frame_buffer(
+ &blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+ cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+
+ gaussian_blur(bit_depth, source, &blurred);
+ unsharp(cpi, source, &blurred, source, best_frame_unsharp_amount);
+ aom_free_frame_buffer(&blurred);
+ aom_clear_system_state();
+}
+#endif
+
void av1_vmaf_frame_preprocessing(AV1_COMP *const cpi,
YV12_BUFFER_CONFIG *const source) {
aom_clear_system_state();
@@ -793,6 +817,68 @@
return qindex;
}
+#if CONFIG_USE_VMAF_RC
+static double cal_approx_score(const AV1_COMP *const cpi,
+ YV12_BUFFER_CONFIG *const ref,
+ YV12_BUFFER_CONFIG *const sharpened) {
+ double score;
+ const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
+ aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, ref, sharpened, bit_depth, 1,
+ &score);
+ return score;
+}
+
+static double find_best_frame_unsharp_amount_loop_neg(
+ const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const ref,
+ YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
+ YV12_BUFFER_CONFIG *const sharpened, double best_score,
+ const double unsharp_amount_start, const double step_size,
+ const int max_loop_count, const double max_amount) {
+ const double min_amount = 0.0;
+ int loop_count = 0;
+ double approx_score = best_score;
+ double unsharp_amount = unsharp_amount_start;
+ do {
+ best_score = approx_score;
+ unsharp_amount += step_size;
+ if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
+ unsharp(cpi, source, blurred, sharpened, unsharp_amount);
+ approx_score = cal_approx_score(cpi, ref, sharpened);
+
+ loop_count++;
+ } while (approx_score > best_score && loop_count < max_loop_count);
+ unsharp_amount =
+ approx_score > best_score ? unsharp_amount : unsharp_amount - step_size;
+ return AOMMIN(max_amount, AOMMAX(unsharp_amount, min_amount));
+}
+
+static double find_best_frame_unsharp_amount_neg(
+ const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const ref,
+ YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
+ const double unsharp_amount_start, const double step_size,
+ const int max_loop_count, const double max_filter_amount) {
+ const AV1_COMMON *const cm = &cpi->common;
+ const int width = source->y_width;
+ const int height = source->y_height;
+
+ double best_score = 0.0;
+ aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, ref, source, cpi->td.mb.e_mbd.bd,
+ 1, &best_score);
+ YV12_BUFFER_CONFIG sharpened;
+ memset(&sharpened, 0, sizeof(sharpened));
+ aom_alloc_frame_buffer(
+ &sharpened, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+ cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+
+ const double unsharp_amount = find_best_frame_unsharp_amount_loop_neg(
+ cpi, ref, source, blurred, &sharpened, best_score, unsharp_amount_start,
+ step_size, max_loop_count, max_filter_amount);
+
+ aom_free_frame_buffer(&sharpened);
+ return unsharp_amount;
+}
+#endif
+
void av1_update_vmaf_curve(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *recon) {
const int bit_depth = cpi->td.mb.e_mbd.bd;
@@ -806,4 +892,24 @@
} else {
cpi->vmaf_info.last_frame_ysse = (double)aom_get_y_sse(source, recon);
}
+
+#if CONFIG_USE_VMAF_RC
+ if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
+ const AV1_COMMON *const cm = &cpi->common;
+ const int width = source->y_width;
+ const int height = source->y_height;
+
+ YV12_BUFFER_CONFIG blurred;
+ memset(&blurred, 0, sizeof(blurred));
+ aom_alloc_frame_buffer(
+ &blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+ cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+
+ gaussian_blur(bit_depth, recon, &blurred);
+ cpi->vmaf_info.best_unsharp_amount = find_best_frame_unsharp_amount_neg(
+ cpi, source, recon, &blurred, 0.0, 0.025, 20, 1.01);
+
+ aom_free_frame_buffer(&blurred);
+ }
+#endif
}
diff --git a/av1/encoder/tune_vmaf.h b/av1/encoder/tune_vmaf.h
index c4cf072..c71fee5 100644
--- a/av1/encoder/tune_vmaf.h
+++ b/av1/encoder/tune_vmaf.h
@@ -19,6 +19,10 @@
void av1_vmaf_frame_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source);
+#ifdef CONFIG_USE_VMAF_RC
+void av1_vmaf_neg_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source);
+#endif
+
void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi);
void av1_set_vmaf_rdmult(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,