Add the required library and flags for tuning for VMAF
Change-Id: I7cc5620d4a3d51d05fab335ef7d5e0a80c704b0d
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c0e4463..942f9f2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -382,6 +382,15 @@
# Add encoder examples and tools to the targets list.
list(APPEND AOM_APP_TARGETS ${AOM_ENCODER_EXAMPLE_TARGETS}
${AOM_ENCODER_TOOL_TARGETS})
+
+ if(CONFIG_TUNE_VMAF)
+ find_library(VMAF vmaf)
+ if(NOT VMAF)
+ message(FATAL_ERROR "VMAF library not found.")
+ endif()
+ message("-- Found VMAF library: " ${VMAF})
+ target_link_libraries(aom PRIVATE ${VMAF} -static)
+ endif()
endif()
if(ENABLE_EXAMPLES)
diff --git a/README.md b/README.md
index e8b06e1..6fea7b0 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@
- [Xcode builds](#xcode-builds)
- [Emscripten builds](#emscripten-builds)
- [Extra Build Flags](#extra-build-flags)
+ - [Build with VMAF support](#build-with-vmaf)
2. [Testing the library](#testing-the-av1-codec)
- [Basics](#testing-basics)
- [Unit tests](#1_unit-tests)
@@ -295,6 +296,24 @@
-DAOM_EXTRA_CXX_FLAGS=-UNDEBUG
~~~
+### Build with VMAF support
+
+After installing
+[libvmaf.a](https://github.com/Netflix/vmaf/blob/master/resource/doc/libvmaf.md),
+you can use it with the encoder:
+
+~~~
+ $ cmake path/to/aom -DCONFIG_TUNE_VMAF=1
+~~~
+
+Please note that the default VMAF model
+("/usr/local/share/model/vmaf_v0.6.1.pkl")
+will be used unless you set the following flag when running the encoder:
+
+~~~
+ # --vmaf-model-path=path/to/model
+~~~
+
## Testing the AV1 codec
### Testing basics
diff --git a/aom/aomcx.h b/aom/aomcx.h
index 790daca..8b4207d 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1176,7 +1176,12 @@
/*!\brief Codec control function to set reference frame config:
* the ref_idx and the refresh flags for each buffer slot.
*/
- AV1E_SET_SVC_REF_FRAME_CONFIG = 152
+ AV1E_SET_SVC_REF_FRAME_CONFIG = 152,
+
+ /*!\brief Codec control function to set the path to the VMAF model used when
+ * tuning the encoder for VMAF.
+ */
+ AV1E_SET_VMAF_MODEL_PATH = 153,
};
/*!\brief aom 1-D scaling mode
@@ -1263,7 +1268,9 @@
AOM_TUNE_PSNR,
AOM_TUNE_SSIM,
AOM_TUNE_CDEF_DIST,
- AOM_TUNE_DAALA_DIST
+ AOM_TUNE_DAALA_DIST,
+ AOM_TUNE_VMAF_WITH_PREPROCESSING,
+ AOM_TUNE_VMAF_WITHOUT_PREPROCESSING,
} aom_tune_metric;
#define AOM_MAX_LAYERS 32 /**< Max number of layers */
@@ -1583,6 +1590,9 @@
AOM_CTRL_USE_TYPE(AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, unsigned int)
#define AOM_CTRL_AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST
+AOM_CTRL_USE_TYPE(AV1E_SET_VMAF_MODEL_PATH, const char *)
+#define AOM_CTRL_AV1E_SET_VMAF_MODEL_PATH
+
AOM_CTRL_USE_TYPE(AV1E_SET_FILM_GRAIN_TEST_VECTOR, int)
#define AOM_CTRL_AV1E_SET_FILM_GRAIN_TEST_VECTOR
diff --git a/aom_dsp/aom_dsp.cmake b/aom_dsp/aom_dsp.cmake
index 7f4a6f6..58f1a1f 100644
--- a/aom_dsp/aom_dsp.cmake
+++ b/aom_dsp/aom_dsp.cmake
@@ -289,6 +289,11 @@
"${AOM_ROOT}/aom_dsp/psnrhvs.c" "${AOM_ROOT}/aom_dsp/ssim.c"
"${AOM_ROOT}/aom_dsp/ssim.h")
endif()
+
+ if(CONFIG_TUNE_VMAF)
+ list(APPEND AOM_DSP_ENCODER_SOURCES "${AOM_ROOT}/aom_dsp/vmaf.c"
+ "${AOM_ROOT}/aom_dsp/vmaf.h")
+ endif()
endif()
# Creates aom_dsp build targets. Must not be called until after libaom target
diff --git a/aom_dsp/vmaf.c b/aom_dsp/vmaf.c
new file mode 100644
index 0000000..4e20155
--- /dev/null
+++ b/aom_dsp/vmaf.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <libvmaf.h>
+#include <stdlib.h>
+#include "aom_dsp/vmaf.h"
+#include "aom_ports/system_state.h"
+
+typedef struct FrameData {
+ const YV12_BUFFER_CONFIG *source;
+ const YV12_BUFFER_CONFIG *distorted;
+ int frame_set;
+} FrameData;
+
+// A callback function used to pass data to VMAF.
+// Returns 0 after reading a frame.
+// Returns 2 when there is no more frame to read.
+static int read_frame_8bd(float *ref_data, float *main_data, float *temp_data,
+ int stride, void *user_data) {
+ FrameData *frames = (FrameData *)user_data;
+
+ if (!frames->frame_set) {
+ const int width = frames->source->y_width;
+ const int height = frames->source->y_height;
+ assert(width == frames->distorted->y_width);
+ assert(height == frames->distorted->y_height);
+ uint8_t *ref_ptr = frames->source->y_buffer;
+ uint8_t *main_ptr = frames->distorted->y_buffer;
+
+ for (int row = 0; row < height; ++row) {
+ for (int col = 0; col < width; ++col) {
+ ref_data[col] = (float)ref_ptr[col];
+ }
+ ref_ptr += frames->source->y_stride;
+ ref_data += stride / sizeof(*ref_data);
+ }
+
+ for (int row = 0; row < height; ++row) {
+ for (int col = 0; col < width; ++col) {
+ main_data[col] = (float)main_ptr[col];
+ }
+ main_ptr += frames->distorted->y_stride;
+ main_data += stride / sizeof(*main_data);
+ }
+ frames->frame_set = 1;
+ return 0;
+ }
+
+ (void)temp_data;
+ return 2;
+}
+
+int aom_calc_vmaf(const char *model_path, const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *distorted, double *vmaf) {
+ aom_clear_system_state();
+ const int width = source->y_width;
+ const int height = source->y_height;
+ FrameData frames = { source, distorted, 0 };
+ double vmaf_score;
+ int (*read_frame)(float *reference_data, float *distorted_data,
+ float *temp_data, int stride, void *s);
+ read_frame = read_frame_8bd;
+ const int ret =
+ compute_vmaf(&vmaf_score, (char *)"yuv420p", width, height, read_frame,
+ /*user_data=*/&frames, (char *)model_path,
+ /*log_path=*/NULL, /*log_fmt=*/NULL, /*disable_clip=*/0,
+ /*disable_avx=*/0, /*enable_transform=*/0,
+ /*phone_model=*/0, /*do_psnr=*/0, /*do_ssim=*/0,
+ /*do_ms_ssim=*/0, /*pool_method=*/NULL, /*n_thread=*/1,
+ /*n_subsample=*/1, /*enable_conf_interval=*/0);
+
+ aom_clear_system_state();
+ *vmaf = vmaf_score;
+ return ret;
+}
diff --git a/aom_dsp/vmaf.h b/aom_dsp/vmaf.h
new file mode 100644
index 0000000..8a9e56c
--- /dev/null
+++ b/aom_dsp/vmaf.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2019, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AOM_DSP_VMAF_H_
+#define AOM_AOM_DSP_VMAF_H_
+
+#include "aom_scale/yv12config.h"
+
+int aom_calc_vmaf(const char *model_path, const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *distorted, double *vmaf);
+
+#endif // AOM_AOM_DSP_VMAF_H_
diff --git a/apps/aomenc.c b/apps/aomenc.c
index 56441c8..ce5ad12 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -394,6 +394,10 @@
{ "cdef-dist", AOM_TUNE_CDEF_DIST },
{ "daala-dist", AOM_TUNE_DAALA_DIST },
#endif
+#if CONFIG_TUNE_VMAF
+ { "vmaf_with_preprocessing", AOM_TUNE_VMAF_WITH_PREPROCESSING },
+ { "vmaf_without_preprocessing", AOM_TUNE_VMAF_WITHOUT_PREPROCESSING },
+#endif
{ NULL, 0 }
};
static const arg_def_t tune_metric =
@@ -612,6 +616,10 @@
"Signal timing info in the bitstream (model unly works for no "
"hidden frames, no super-res yet):",
timing_info_enum);
+#if CONFIG_TUNE_VMAF
+static const arg_def_t vmaf_model_path =
+ ARG_DEF(NULL, "vmaf-model-path", 1, "Path to the VMAF model file");
+#endif
static const arg_def_t film_grain_test =
ARG_DEF(NULL, "film-grain-test", 1,
"Film grain test vectors (0: none (default), 1: test-1 2: test-2, "
@@ -900,6 +908,9 @@
&sframe_dist,
&sframe_mode,
&save_as_annexb,
+#if CONFIG_TUNE_VMAF
+ &vmaf_model_path,
+#endif
NULL };
static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
AOME_SET_ENABLEAUTOALTREF,
@@ -998,6 +1009,9 @@
AV1E_SET_TARGET_SEQ_LEVEL_IDX,
AV1E_SET_TIER_MASK,
AV1E_SET_MIN_CR,
+#if CONFIG_TUNE_VMAF
+ AV1E_SET_VMAF_MODEL_PATH,
+#endif
0 };
#endif // CONFIG_AV1_ENCODER
@@ -1072,6 +1086,9 @@
int write_ivf;
// whether to use 16bit internal buffers
int use_16bit_internal;
+#if CONFIG_TUNE_VMAF
+ const char *vmaf_model_path;
+#endif
};
struct stream_state {
@@ -1564,6 +1581,10 @@
} else if (arg_match(&arg, &tile_height, argi)) {
config->cfg.tile_height_count =
arg_parse_list(&arg, config->cfg.tile_heights, MAX_TILE_HEIGHTS);
+#if CONFIG_TUNE_VMAF
+ } else if (arg_match(&arg, &vmaf_model_path, argi)) {
+ config->vmaf_model_path = arg.val;
+#endif
} else if (global->usage == AOM_USAGE_REALTIME &&
arg_match(&arg, &enable_restoration, argi)) {
if (arg_parse_uint(&arg) == 1) {
@@ -1870,6 +1891,14 @@
ctx_exit_on_error(&stream->encoder, "Failed to control codec");
}
+
+#if CONFIG_TUNE_VMAF
+ if (stream->config.vmaf_model_path) {
+ aom_codec_control_(&stream->encoder, AV1E_SET_VMAF_MODEL_PATH,
+ stream->config.vmaf_model_path);
+ }
+#endif
+
if (stream->config.film_grain_filename) {
aom_codec_control_(&stream->encoder, AV1E_SET_FILM_GRAIN_TABLE,
stream->config.film_grain_filename);
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 7e0a296..6fa4c87 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -47,6 +47,7 @@
unsigned int max_gf_interval;
unsigned int gf_max_pyr_height;
aom_tune_metric tuning;
+ const char *vmaf_model_path;
unsigned int cq_level; // constrained quality level
unsigned int rc_max_intra_bitrate_pct;
unsigned int rc_max_inter_bitrate_pct;
@@ -152,39 +153,40 @@
};
static struct av1_extracfg default_extra_cfg = {
- 0, // cpu_used
- 1, // enable_auto_alt_ref
- 0, // enable_auto_bwd_ref
- 0, // noise_sensitivity
- CONFIG_SHARP_SETTINGS, // sharpness
- 0, // static_thresh
- 1, // row_mt
- 0, // tile_columns
- 0, // tile_rows
- 1, // enable_tpl_model
- 1, // enable_keyframe_filtering
- 7, // arnr_max_frames
- 5, // arnr_strength
- 0, // min_gf_interval; 0 -> default decision
- 0, // max_gf_interval; 0 -> default decision
- 4, // gf_max_pyr_height
- AOM_TUNE_PSNR, // tuning
- 10, // cq_level
- 0, // rc_max_intra_bitrate_pct
- 0, // rc_max_inter_bitrate_pct
- 0, // gf_cbr_boost_pct
- 0, // lossless
- !CONFIG_SHARP_SETTINGS, // enable_cdef
- 1, // enable_restoration
- 1, // force_video_mode
- 1, // enable_obmc
- 3, // disable_trellis_quant
- 0, // enable_qm
- DEFAULT_QM_Y, // qm_y
- DEFAULT_QM_U, // qm_u
- DEFAULT_QM_V, // qm_v
- DEFAULT_QM_FIRST, // qm_min
- DEFAULT_QM_LAST, // qm_max
+ 0, // cpu_used
+ 1, // enable_auto_alt_ref
+ 0, // enable_auto_bwd_ref
+ 0, // noise_sensitivity
+ CONFIG_SHARP_SETTINGS, // sharpness
+ 0, // static_thresh
+ 1, // row_mt
+ 0, // tile_columns
+ 0, // tile_rows
+ 1, // enable_tpl_model
+ 1, // enable_keyframe_filtering
+ 7, // arnr_max_frames
+ 5, // arnr_strength
+ 0, // min_gf_interval; 0 -> default decision
+ 0, // max_gf_interval; 0 -> default decision
+ 4, // gf_max_pyr_height
+ AOM_TUNE_PSNR, // tuning
+ "/usr/local/share/model/vmaf_v0.6.1.pkl", // VMAF model path
+ 10, // cq_level
+ 0, // rc_max_intra_bitrate_pct
+ 0, // rc_max_inter_bitrate_pct
+ 0, // gf_cbr_boost_pct
+ 0, // lossless
+ !CONFIG_SHARP_SETTINGS, // enable_cdef
+ 1, // enable_restoration
+ 1, // force_video_mode
+ 1, // enable_obmc
+ 3, // disable_trellis_quant
+ 0, // enable_qm
+ DEFAULT_QM_Y, // qm_y
+ DEFAULT_QM_U, // qm_u
+ DEFAULT_QM_V, // qm_v
+ DEFAULT_QM_FIRST, // qm_min
+ DEFAULT_QM_LAST, // qm_max
#if CONFIG_DIST_8X8
0,
#endif
@@ -474,7 +476,10 @@
AOM_CICP_MC_ICTCP);
RANGE_CHECK(extra_cfg, color_range, 0, 1);
-#if CONFIG_DIST_8X8
+#if CONFIG_TUNE_VMAF
+ RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR,
+ AOM_TUNE_VMAF_WITHOUT_PREPROCESSING);
+#elif CONFIG_DIST_8X8
RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_DAALA_DIST);
#else
RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_SSIM);
@@ -824,6 +829,7 @@
oxcf->gf_max_pyr_height = extra_cfg->gf_max_pyr_height;
oxcf->tuning = extra_cfg->tuning;
+ oxcf->vmaf_model_path = extra_cfg->vmaf_model_path;
oxcf->content = extra_cfg->content;
oxcf->cdf_update_mode = (uint8_t)extra_cfg->cdf_update_mode;
oxcf->superblock_size = extra_cfg->superblock_size;
@@ -1601,6 +1607,13 @@
return update_extra_cfg(ctx, &extra_cfg);
}
+static aom_codec_err_t ctrl_set_vmaf_model_path(aom_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct av1_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.vmaf_model_path = CAST(AV1E_SET_VMAF_MODEL_PATH, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
static aom_codec_err_t ctrl_set_film_grain_test_vector(
aom_codec_alg_priv_t *ctx, va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
@@ -2519,6 +2532,7 @@
{ AV1E_SET_RENDER_SIZE, ctrl_set_render_size },
{ AV1E_SET_SUPERBLOCK_SIZE, ctrl_set_superblock_size },
{ AV1E_SET_SINGLE_TILE_DECODING, ctrl_set_single_tile_decoding },
+ { AV1E_SET_VMAF_MODEL_PATH, ctrl_set_vmaf_model_path },
{ AV1E_SET_FILM_GRAIN_TEST_VECTOR, ctrl_set_film_grain_test_vector },
{ AV1E_SET_FILM_GRAIN_TABLE, ctrl_set_film_grain_table },
{ AV1E_SET_DENOISE_NOISE_LEVEL, ctrl_set_denoise_noise_level },
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index efd28a8..371fa0f 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -30,6 +30,9 @@
#if CONFIG_INTERNAL_STATS
#include "aom_dsp/ssim.h"
#endif
+#if CONFIG_TUNE_VMAF
+#include "aom_dsp/vmaf.h"
+#endif
#include "aom_ports/aom_timer.h"
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
@@ -5939,6 +5942,16 @@
if (oxcf->tuning == AOM_TUNE_SSIM) set_mb_ssim_rdmult_scaling(cpi);
+#if CONFIG_TUNE_VMAF
+ if (oxcf->tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING ||
+ oxcf->tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING) {
+ double vmaf;
+ aom_calc_vmaf(oxcf->vmaf_model_path, cpi->source, cpi->source, &vmaf);
+ printf("Tune for VMAF is still a WIP.\n");
+ exit(0);
+ }
+#endif
+
aom_clear_system_state();
#if CONFIG_INTERNAL_STATS
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index e3b806c..23ac388 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -343,6 +343,7 @@
aom_fixed_buf_t two_pass_stats_in;
aom_tune_metric tuning;
+ const char *vmaf_model_path;
aom_tune_content content;
int use_highbitdepth;
aom_color_primaries_t color_primaries;
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 5a043ec..0798f01 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -108,6 +108,7 @@
set_aom_config_var(CONFIG_SPATIAL_RESAMPLING 1 "Spatial resampling.")
set_aom_config_var(DECODE_HEIGHT_LIMIT 0 "Set limit for decode height.")
set_aom_config_var(DECODE_WIDTH_LIMIT 0 "Set limit for decode width.")
+set_aom_config_var(CONFIG_TUNE_VMAF 0 "Enable encoding tuning for VMAF.")
# AV1 experiment flags.
set_aom_config_var(CONFIG_SPEED_STATS 0 "AV1 experiment flag.")