Add support of libvmaf v2.x for tune=vmaf* Removed libvmaf v1.x's code; Replaced libvmaf_rc's functions with libvmaf 2.0's. baseline=psnr, 150 frames, cpu-used=3, vbr ------------------------------------------ data set=hdres PSNR SSIM VMAF VMAF_NEG tune=vmaf -- -- -36.5% -- tune=vmaf_neg 11.9% 6.0% -22.6% -8.6% tune=vmaf_with_preprocessing -- -- -35.2% -- tune=vmaf_without_preprocessing 7.4% 7.5% -2.9% -2.7% ------------------------------------------ data set=midres_bd10 PSNR SSIM VMAF VMAF_NEG tune=vmaf -- -- -33.1% -- tune=vmaf_neg 10.0% 4.2% -22.7% -10.2% tune=vmaf_with_preprocessing -- -- -30.5% -- tune=vmaf_without_preprocessing 4.2% 3.7% -3.5% -3.0% Please note that there are some issues in libvmaf that multi-threading VMAF calculations cannot be enabled. Change-Id: Ic94cedbc3719dbbd341b3dcccec758b68a42b71c
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ebe580..b731c54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -507,42 +507,18 @@ endif() endif() - if(CONFIG_USE_VMAF_RC AND NOT CONFIG_TUNE_VMAF) - message(FATAL_ERROR "Turn on CONFIG_TUNE_VMAF to use CONFIG_USE_VMAF_RC.") - endif() - if(CONFIG_TUNE_VMAF) find_package(PkgConfig) - if(CONFIG_USE_VMAF_RC) - if(PKG_CONFIG_FOUND) - pkg_check_modules(VMAF_RC REQUIRED libvmaf_rc) - target_link_libraries(aom - PRIVATE ${VMAF_RC_LDFLAGS} ${VMAF_RC_LIBRARIES}) - target_include_directories(aom PRIVATE ${VMAF_RC_INCLUDE_DIRS}) - if(VMAF_RC_CFLAGS) - append_compiler_flag("${VMAF_RC_CFLAGS}") - endif() - else() - message(FATAL_ERROR "CONFIG_USE_VMAF_RC error: pkg-config not found.") - endif() - else() - if(PKG_CONFIG_FOUND) - pkg_check_modules(VMAF REQUIRED libvmaf) - else() - find_library(VMAF_LIBRARIES vmaf) - find_path(VMAF_INCLUDE_DIRS libvmaf.h PATH_SUFFIXES libvmaf) - if(VMAF_LIBRARIES AND VMAF_INCLUDE_DIRS) - message(STATUS "Found VMAF library: ${VMAF_LIBRARIES}") - message(STATUS "Found VMAF include: ${VMAF_INCLUDE_DIRS}") - else() - message(FATAL_ERROR "VMAF library not found.") - endif() - endif() - target_link_libraries(aom PRIVATE ${VMAF_LDFLAGS} ${VMAF_LIBRARIES}) + if(PKG_CONFIG_FOUND) + pkg_check_modules(VMAF REQUIRED libvmaf) + target_link_libraries(aom + PRIVATE ${VMAF_LDFLAGS} ${VMAF_LIBRARIES} -static) target_include_directories(aom PRIVATE ${VMAF_INCLUDE_DIRS}) if(VMAF_CFLAGS) append_compiler_flag("${VMAF_CFLAGS}") endif() + else() + message(FATAL_ERROR "CONFIG_TUNE_VMAF error: pkg-config not found.") endif() set_target_properties(aom PROPERTIES LINKER_LANGUAGE CXX) if(BUILD_SHARED_LIBS)
diff --git a/README.md b/README.md index a458f52..c4e7e6c 100644 --- a/README.md +++ b/README.md
@@ -326,7 +326,7 @@ ~~~ Please note that the default VMAF model -("/usr/local/share/model/vmaf_v0.6.1.pkl") +("/usr/local/share/model/vmaf_v0.6.1.json") will be used unless you set the following flag when running the encoder: ~~~
diff --git a/aom_dsp/vmaf.c b/aom_dsp/vmaf.c index 4165343..219e278 100644 --- a/aom_dsp/vmaf.c +++ b/aom_dsp/vmaf.c
@@ -12,9 +12,6 @@ #include "aom_dsp/vmaf.h" #include <assert.h> -#if !CONFIG_USE_VMAF_RC -#include <libvmaf.h> -#endif #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -24,10 +21,7 @@ #include <unistd.h> #endif -#if CONFIG_USE_VMAF_RC -#include <libvmaf/libvmaf.rc.h> -#endif - +#include <libvmaf/libvmaf.h> #include "aom_dsp/blend.h" #include "aom_ports/system_state.h" @@ -36,162 +30,18 @@ exit(EXIT_FAILURE); } -#if !CONFIG_USE_VMAF_RC -typedef struct FrameData { - const YV12_BUFFER_CONFIG *source; - const YV12_BUFFER_CONFIG *distorted; - int frame_set; - int bit_depth; -} FrameData; - -// A callback function used to pass data to VMAF. -// Returns 0 after reading a frame. -// Returns 2 when there is no more frame to read. -static int read_frame(float *ref_data, float *main_data, float *temp_data, - int stride, void *user_data) { - FrameData *frames = (FrameData *)user_data; - - if (!frames->frame_set) { - const int width = frames->source->y_width; - const int height = frames->source->y_height; - assert(width == frames->distorted->y_width); - assert(height == frames->distorted->y_height); - - if (frames->source->flags & YV12_FLAG_HIGHBITDEPTH) { - const float scale_factor = 1.0f / (float)(1 << (frames->bit_depth - 8)); - uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(frames->source->y_buffer); - uint16_t *main_ptr = CONVERT_TO_SHORTPTR(frames->distorted->y_buffer); - - for (int row = 0; row < height; ++row) { - for (int col = 0; col < width; ++col) { - ref_data[col] = scale_factor * (float)ref_ptr[col]; - } - ref_ptr += frames->source->y_stride; - ref_data += stride / sizeof(*ref_data); - } - - for (int row = 0; row < height; ++row) { - for (int col = 0; col < width; ++col) { - main_data[col] = scale_factor * (float)main_ptr[col]; - } - main_ptr += frames->distorted->y_stride; - main_data += stride / sizeof(*main_data); - } - } else { - uint8_t *ref_ptr = frames->source->y_buffer; - uint8_t *main_ptr = frames->distorted->y_buffer; - - for (int row = 0; row < height; ++row) { - for (int col = 0; col < width; ++col) { - ref_data[col] = (float)ref_ptr[col]; - } - ref_ptr += frames->source->y_stride; - ref_data += stride / sizeof(*ref_data); - } - - for (int row = 0; row < height; ++row) { - for (int col = 0; col < width; ++col) { - main_data[col] = (float)main_ptr[col]; - } - main_ptr += frames->distorted->y_stride; - main_data += stride / sizeof(*main_data); - } - } - frames->frame_set = 1; - return 0; - } - - (void)temp_data; - return 2; -} - -void aom_calc_vmaf(const char *model_path, const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *distorted, const int bit_depth, - double *const vmaf) { - aom_clear_system_state(); - const int width = source->y_width; - const int height = source->y_height; - FrameData frames = { source, distorted, 0, bit_depth }; - char *fmt = bit_depth == 10 ? "yuv420p10le" : "yuv420p"; - double vmaf_score; - const int ret = - compute_vmaf(&vmaf_score, fmt, width, height, read_frame, - /*user_data=*/&frames, (char *)model_path, - /*log_path=*/NULL, /*log_fmt=*/NULL, /*disable_clip=*/1, - /*disable_avx=*/0, /*enable_transform=*/0, - /*phone_model=*/0, /*do_psnr=*/0, /*do_ssim=*/0, - /*do_ms_ssim=*/0, /*pool_method=*/NULL, /*n_thread=*/0, - /*n_subsample=*/1, /*enable_conf_interval=*/0); - if (ret) vmaf_fatal_error("Failed to compute VMAF scores."); - - aom_clear_system_state(); - *vmaf = vmaf_score; -} - -void aom_calc_vmaf_multi_frame(void *user_data, const char *model_path, - int (*rd_frm)(float *ref_data, float *main_data, - float *temp_data, int stride_byte, - void *user_data), - int frame_width, int frame_height, int bit_depth, - double *vmaf) { - aom_clear_system_state(); - - char *fmt = bit_depth == 10 ? "yuv420p10le" : "yuv420p"; - int log_path_length = snprintf(NULL, 0, "vmaf_scores_%d.xml", getpid()) + 1; - char *log_path = malloc(log_path_length); - snprintf(log_path, log_path_length, "vmaf_scores_%d.xml", getpid()); - double vmaf_score; - const int ret = - compute_vmaf(&vmaf_score, fmt, frame_width, frame_height, rd_frm, - /*user_data=*/user_data, (char *)model_path, - /*log_path=*/log_path, /*log_fmt=*/NULL, /*disable_clip=*/0, - /*disable_avx=*/0, /*enable_transform=*/0, - /*phone_model=*/0, /*do_psnr=*/0, /*do_ssim=*/0, - /*do_ms_ssim=*/0, /*pool_method=*/NULL, /*n_thread=*/0, - /*n_subsample=*/1, /*enable_conf_interval=*/0); - FILE *vmaf_log = fopen(log_path, "r"); - free(log_path); - log_path = NULL; - if (vmaf_log == NULL || ret) { - vmaf_fatal_error("Failed to compute VMAF scores."); - } - - int frame_index = 0; - char buf[512]; - while (fgets(buf, 511, vmaf_log) != NULL) { - if (memcmp(buf, "\t\t<frame ", 9) == 0) { - char *p = strstr(buf, "vmaf="); - if (p != NULL && p[5] == '"') { - char *p2 = strstr(&p[6], "\""); - *p2 = '\0'; - const double score = atof(&p[6]); - if (score < 0.0 || score > 100.0) { - vmaf_fatal_error("Failed to compute VMAF scores."); - } - vmaf[frame_index++] = score; - } - } - } - fclose(vmaf_log); - - aom_clear_system_state(); -} -#endif - -#if CONFIG_USE_VMAF_RC -void aom_init_vmaf_model_rc(VmafModel **vmaf_model, const char *model_path) { +void aom_init_vmaf_model(VmafModel **vmaf_model, const char *model_path) { if (*vmaf_model != NULL) return; VmafModelConfig model_cfg; model_cfg.flags = VMAF_MODEL_FLAG_DISABLE_CLIP; model_cfg.name = "vmaf"; - model_cfg.path = (char *)model_path; - if (vmaf_model_load_from_path(vmaf_model, &model_cfg)) { + if (vmaf_model_load_from_path(vmaf_model, &model_cfg, model_path)) { vmaf_fatal_error("Failed to load VMAF model."); } } -void aom_close_vmaf_model_rc(VmafModel *vmaf_model) { +void aom_close_vmaf_model(VmafModel *vmaf_model) { vmaf_model_destroy(vmaf_model); } @@ -221,8 +71,9 @@ } } -void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model, - bool cal_vmaf_neg) { +void aom_init_vmaf_context(VmafContext **vmaf_context, VmafModel *vmaf_model, + bool cal_vmaf_neg) { + // TODO(sdeng): make them CLI arguments. VmafConfiguration cfg; cfg.log_level = VMAF_LOG_LEVEL_NONE; cfg.n_threads = 0; @@ -233,41 +84,82 @@ vmaf_fatal_error("Failed to init VMAF context."); } - if (vmaf_use_features_from_model(*vmaf_context, vmaf_model)) { - vmaf_fatal_error("Failed to load feature extractors from VMAF model."); - } - if (cal_vmaf_neg) { VmafFeatureDictionary *vif_feature = NULL; - vmaf_feature_dictionary_set(&vif_feature, "vif_enhn_gain_limit", "1.0"); - if (vmaf_use_feature(*vmaf_context, "float_vif", vif_feature)) { + if (vmaf_feature_dictionary_set(&vif_feature, "vif_enhn_gain_limit", + "1.0")) { + vmaf_fatal_error("Failed to set vif_enhn_gain_limit."); + } + if (vmaf_model_feature_overload(vmaf_model, "float_vif", vif_feature)) { vmaf_fatal_error("Failed to use feature float_vif."); } VmafFeatureDictionary *adm_feature = NULL; - vmaf_feature_dictionary_set(&adm_feature, "adm_enhn_gain_limit", "1.0"); - if (vmaf_use_feature(*vmaf_context, "float_adm", adm_feature)) { + if (vmaf_feature_dictionary_set(&adm_feature, "adm_enhn_gain_limit", + "1.0")) { + vmaf_fatal_error("Failed to set adm_enhn_gain_limit."); + } + if (vmaf_model_feature_overload(vmaf_model, "adm", adm_feature)) { vmaf_fatal_error("Failed to use feature float_adm."); } } VmafFeatureDictionary *motion_force_zero = NULL; - vmaf_feature_dictionary_set(&motion_force_zero, "motion_force_zero", "true"); - if (vmaf_use_feature(*vmaf_context, "float_motion", motion_force_zero)) { + if (vmaf_feature_dictionary_set(&motion_force_zero, "motion_force_zero", + "1")) { + vmaf_fatal_error("Failed to set motion_force_zero."); + } + if (vmaf_model_feature_overload(vmaf_model, "float_motion", + motion_force_zero)) { vmaf_fatal_error("Failed to use feature float_motion."); } + + if (vmaf_use_features_from_model(*vmaf_context, vmaf_model)) { + vmaf_fatal_error("Failed to load feature extractors from VMAF model."); + } } -void aom_close_vmaf_context_rc(VmafContext *vmaf_context) { +void aom_close_vmaf_context(VmafContext *vmaf_context) { if (vmaf_close(vmaf_context)) { vmaf_fatal_error("Failed to close VMAF context."); } } -void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model, - const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *distorted, - int bit_depth, int frame_index, double *vmaf) { +void aom_calc_vmaf(VmafModel *vmaf_model, const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *distorted, int bit_depth, + bool cal_vmaf_neg, double *vmaf) { + VmafContext *vmaf_context; + aom_init_vmaf_context(&vmaf_context, vmaf_model, cal_vmaf_neg); + const int frame_index = 0; + VmafPicture ref, dist; + if (vmaf_picture_alloc(&ref, VMAF_PIX_FMT_YUV420P, bit_depth, source->y_width, + source->y_height) || + vmaf_picture_alloc(&dist, VMAF_PIX_FMT_YUV420P, bit_depth, + source->y_width, source->y_height)) { + vmaf_fatal_error("Failed to alloc VMAF pictures."); + } + copy_picture(bit_depth, source, &ref); + copy_picture(bit_depth, distorted, &dist); + if (vmaf_read_pictures(vmaf_context, &ref, &dist, + /*picture index=*/frame_index)) { + vmaf_fatal_error("Failed to read VMAF pictures."); + } + + if (vmaf_read_pictures(vmaf_context, NULL, NULL, 0)) { + vmaf_fatal_error("Failed to flush context."); + } + + vmaf_picture_unref(&ref); + vmaf_picture_unref(&dist); + + vmaf_score_at_index(vmaf_context, vmaf_model, vmaf, frame_index); + aom_close_vmaf_context(vmaf_context); +} + +void aom_read_vmaf_image(VmafContext *vmaf_context, + const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *distorted, int bit_depth, + int frame_index) { VmafPicture ref, dist; if (vmaf_picture_alloc(&ref, VMAF_PIX_FMT_YUV420P, bit_depth, source->y_width, source->y_height) || @@ -284,8 +176,19 @@ vmaf_picture_unref(&ref); vmaf_picture_unref(&dist); - - vmaf_score_at_index(vmaf_context, vmaf_model, vmaf, frame_index); } -#endif // CONFIG_USE_VMAF_RC +double aom_calc_vmaf_at_index(VmafContext *vmaf_context, VmafModel *vmaf_model, + int frame_index) { + double vmaf; + if (vmaf_score_at_index(vmaf_context, vmaf_model, &vmaf, frame_index)) { + vmaf_fatal_error("Failed to calc VMAF scores."); + } + return vmaf; +} + +void aom_flush_vmaf_context(VmafContext *vmaf_context) { + if (vmaf_read_pictures(vmaf_context, NULL, NULL, 0)) { + vmaf_fatal_error("Failed to flush context."); + } +}
diff --git a/aom_dsp/vmaf.h b/aom_dsp/vmaf.h index d9da223..3ba8c8d 100644 --- a/aom_dsp/vmaf.h +++ b/aom_dsp/vmaf.h
@@ -15,33 +15,28 @@ #include <stdbool.h> #include "aom_scale/yv12config.h" -#if CONFIG_USE_VMAF_RC typedef struct VmafContext VmafContext; typedef struct VmafModel VmafModel; -#endif -#if CONFIG_USE_VMAF_RC -void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model, - bool cal_vmaf_neg); -void aom_close_vmaf_context_rc(VmafContext *vmaf_context); +void aom_init_vmaf_context(VmafContext **vmaf_context, VmafModel *vmaf_model, + bool cal_vmaf_neg); +void aom_close_vmaf_context(VmafContext *vmaf_context); -void aom_init_vmaf_model_rc(VmafModel **vmaf_model, const char *model_path); -void aom_close_vmaf_model_rc(VmafModel *vmaf_model); +void aom_init_vmaf_model(VmafModel **vmaf_model, const char *model_path); +void aom_close_vmaf_model(VmafModel *vmaf_model); -void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model, - const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *distorted, - int bit_depth, int frame_index, double *vmaf); -#else -void aom_calc_vmaf(const char *model_path, const YV12_BUFFER_CONFIG *source, +void aom_calc_vmaf(VmafModel *vmaf_model, const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *distorted, int bit_depth, - double *vmaf); + bool cal_vmaf_neg, double *vmaf); -void aom_calc_vmaf_multi_frame( - void *user_data, const char *model_path, - int (*read_frame)(float *ref_data, float *main_data, float *temp_data, - int stride_byte, void *user_data), - int frame_width, int frame_height, int bit_depth, double *vmaf); -#endif // CONFIG_USE_VMAF_RC +void aom_read_vmaf_image(VmafContext *vmaf_context, + const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *distorted, int bit_depth, + int frame_index); + +double aom_calc_vmaf_at_index(VmafContext *vmaf_context, VmafModel *vmaf_model, + int frame_index); + +void aom_flush_vmaf_context(VmafContext *vmaf_context); #endif // AOM_AOM_DSP_VMAF_H_
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c index 5cfde3a..2171415 100644 --- a/av1/av1_cx_iface.c +++ b/av1/av1_cx_iface.c
@@ -177,25 +177,25 @@ 0, // gf_min_pyr_height 5, // gf_max_pyr_height AOM_TUNE_PSNR, // tuning - "/usr/local/share/model/vmaf_v0.6.1.pkl", // VMAF model path - 10, // cq_level - 0, // rc_max_intra_bitrate_pct - 0, // rc_max_inter_bitrate_pct - 0, // gf_cbr_boost_pct - 0, // lossless - 1, // enable_cdef - 1, // enable_restoration - 0, // force_video_mode - 1, // enable_obmc - 3, // disable_trellis_quant - 0, // enable_qm - DEFAULT_QM_Y, // qm_y - DEFAULT_QM_U, // qm_u - DEFAULT_QM_V, // qm_v - DEFAULT_QM_FIRST, // qm_min - DEFAULT_QM_LAST, // qm_max - 1, // max number of tile groups - 0, // mtu_size + "/usr/local/share/model/vmaf_v0.6.1.json", // VMAF model path + 10, // cq_level + 0, // rc_max_intra_bitrate_pct + 0, // rc_max_inter_bitrate_pct + 0, // gf_cbr_boost_pct + 0, // lossless + 1, // enable_cdef + 1, // enable_restoration + 0, // force_video_mode + 1, // enable_obmc + 3, // disable_trellis_quant + 0, // enable_qm + DEFAULT_QM_Y, // qm_y + DEFAULT_QM_U, // qm_u + DEFAULT_QM_V, // qm_v + DEFAULT_QM_FIRST, // qm_min + DEFAULT_QM_LAST, // qm_max + 1, // max number of tile groups + 0, // mtu_size AOM_TIMING_UNSPECIFIED, // No picture timing signaling in bitstream 0, // frame_parallel_decoding_mode 1, // enable dual filter @@ -540,15 +540,6 @@ } #endif -#if !CONFIG_USE_VMAF_RC - if (extra_cfg->tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) { - ERROR( - "This error may be related to the wrong configuration options: try to " - "set -DCONFIG_TUNE_VMAF=1 and -DCONFIG_USE_VMAF_RC=1 at the time CMake" - " is run."); - } -#endif - RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_BUTTERAUGLI); RANGE_CHECK(extra_cfg, timing_info_type, AOM_TIMING_UNSPECIFIED, @@ -2249,9 +2240,9 @@ av1_apply_encoding_flags(cpi_lap, flags); } -#if CONFIG_USE_VMAF_RC - aom_init_vmaf_model_rc(&cpi->vmaf_info.vmaf_model, - cpi->oxcf.tune_cfg.vmaf_model_path); +#if CONFIG_TUNE_VMAF + aom_init_vmaf_model(&cpi->vmaf_info.vmaf_model, + cpi->oxcf.tune_cfg.vmaf_model_path); #endif // Handle fixed keyframe intervals
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index 7203ff9..c37ed29 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c
@@ -1079,10 +1079,7 @@ cpi->vmaf_info.last_frame_vmaf[i] = -1.0; } cpi->vmaf_info.original_qindex = -1; - -#if CONFIG_USE_VMAF_RC cpi->vmaf_info.vmaf_model = NULL; -#endif } #endif @@ -2506,7 +2503,7 @@ if (!cpi->sf.hl_sf.disable_extra_sc_testing) av1_determine_sc_tools_with_encoding(cpi, q); -#if CONFIG_USE_VMAF_RC +#if CONFIG_TUNE_VMAF if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) { av1_vmaf_neg_preprocessing(cpi, cpi->unscaled_source); }
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h index 57ab2f2..ef24a31 100644 --- a/av1/encoder/encoder_alloc.h +++ b/av1/encoder/encoder_alloc.h
@@ -202,10 +202,7 @@ #if CONFIG_TUNE_VMAF aom_free(cpi->vmaf_info.rdmult_scaling_factors); cpi->vmaf_info.rdmult_scaling_factors = NULL; - -#if CONFIG_USE_VMAF_RC - aom_close_vmaf_model_rc(cpi->vmaf_info.vmaf_model); -#endif + aom_close_vmaf_model(cpi->vmaf_info.vmaf_model); #endif #if CONFIG_TUNE_BUTTERAUGLI
diff --git a/av1/encoder/tune_vmaf.c b/av1/encoder/tune_vmaf.c index f5b6129..36af861 100644 --- a/av1/encoder/tune_vmaf.c +++ b/av1/encoder/tune_vmaf.c
@@ -15,9 +15,7 @@ #include "aom_ports/system_state.h" #include "av1/encoder/extend.h" #include "av1/encoder/rdopt.h" -#if CONFIG_USE_VMAF_RC #include "config/aom_scale_rtcd.h" -#endif static const double kBaselineVmaf = 97.42773; @@ -294,38 +292,27 @@ } static AOM_INLINE double cal_approx_vmaf(const AV1_COMP *const cpi, -#if CONFIG_USE_VMAF_RC - VmafContext *vmaf_context, - int *vmaf_cal_index, -#endif double source_variance, YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const sharpened) { const int bit_depth = cpi->td.mb.e_mbd.bd; + const bool cal_vmaf_neg = + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN; double new_vmaf; -#if CONFIG_USE_VMAF_RC - aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, source, - sharpened, bit_depth, *vmaf_cal_index, &new_vmaf); - (*vmaf_cal_index)++; -#else - aom_calc_vmaf(cpi->oxcf.tune_cfg.vmaf_model_path, source, sharpened, - bit_depth, &new_vmaf); -#endif + aom_calc_vmaf(cpi->vmaf_info.vmaf_model, source, sharpened, bit_depth, + cal_vmaf_neg, &new_vmaf); const double sharpened_var = frame_average_variance(cpi, sharpened); return source_variance / sharpened_var * (new_vmaf - kBaselineVmaf); } static double find_best_frame_unsharp_amount_loop( - const AV1_COMP *const cpi, -#if CONFIG_USE_VMAF_RC - VmafContext *vmaf_context, int *vmaf_cal_index, -#endif - YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred, - YV12_BUFFER_CONFIG *const sharpened, double best_vmaf, - const double baseline_variance, const double unsharp_amount_start, - const double step_size, const int max_loop_count, const double max_amount) { + const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source, + YV12_BUFFER_CONFIG *const blurred, YV12_BUFFER_CONFIG *const sharpened, + double best_vmaf, const double baseline_variance, + const double unsharp_amount_start, const double step_size, + const int max_loop_count, const double max_amount) { const double min_amount = 0.0; int loop_count = 0; double approx_vmaf = best_vmaf; @@ -335,11 +322,7 @@ unsharp_amount += step_size; if (unsharp_amount > max_amount || unsharp_amount < min_amount) break; unsharp(cpi, source, blurred, sharpened, unsharp_amount); - approx_vmaf = cal_approx_vmaf(cpi, -#if CONFIG_USE_VMAF_RC - vmaf_context, vmaf_cal_index, -#endif - baseline_variance, source, sharpened); + approx_vmaf = cal_approx_vmaf(cpi, baseline_variance, source, sharpened); loop_count++; } while (approx_vmaf > best_vmaf && loop_count < max_loop_count); @@ -358,13 +341,6 @@ const AV1_COMMON *const cm = &cpi->common; const int width = source->y_width; const int height = source->y_height; -#if CONFIG_USE_VMAF_RC - VmafContext *vmaf_context; - aom_init_vmaf_context_rc( - &vmaf_context, cpi->vmaf_info.vmaf_model, - cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN); - int vmaf_cal_index = 0; -#endif YV12_BUFFER_CONFIG sharpened; memset(&sharpened, 0, sizeof(sharpened)); aom_alloc_frame_buffer( @@ -375,56 +351,32 @@ double unsharp_amount; if (unsharp_amount_start <= step_size) { unsharp_amount = find_best_frame_unsharp_amount_loop( - cpi, -#if CONFIG_USE_VMAF_RC - vmaf_context, &vmaf_cal_index, -#endif - source, blurred, &sharpened, 0.0, baseline_variance, 0.0, step_size, - max_loop_count, max_filter_amount); + cpi, source, blurred, &sharpened, 0.0, baseline_variance, 0.0, + step_size, max_loop_count, max_filter_amount); } else { double a0 = unsharp_amount_start - step_size, a1 = unsharp_amount_start; double v0, v1; unsharp(cpi, source, blurred, &sharpened, a0); - v0 = cal_approx_vmaf(cpi, -#if CONFIG_USE_VMAF_RC - vmaf_context, &vmaf_cal_index, -#endif - baseline_variance, source, &sharpened); + v0 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened); unsharp(cpi, source, blurred, &sharpened, a1); - v1 = cal_approx_vmaf(cpi, -#if CONFIG_USE_VMAF_RC - vmaf_context, &vmaf_cal_index, -#endif - baseline_variance, source, &sharpened); + v1 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened); if (fabs(v0 - v1) < 0.01) { unsharp_amount = a0; } else if (v0 > v1) { unsharp_amount = find_best_frame_unsharp_amount_loop( - cpi, -#if CONFIG_USE_VMAF_RC - vmaf_context, &vmaf_cal_index, -#endif - source, blurred, &sharpened, v0, baseline_variance, a0, -step_size, - max_loop_count, max_filter_amount); + cpi, source, blurred, &sharpened, v0, baseline_variance, a0, + -step_size, max_loop_count, max_filter_amount); } else { unsharp_amount = find_best_frame_unsharp_amount_loop( - cpi, -#if CONFIG_USE_VMAF_RC - vmaf_context, &vmaf_cal_index, -#endif - source, blurred, &sharpened, v1, baseline_variance, a1, step_size, - max_loop_count, max_filter_amount); + cpi, source, blurred, &sharpened, v1, baseline_variance, a1, + step_size, max_loop_count, max_filter_amount); } } aom_free_frame_buffer(&sharpened); -#if CONFIG_USE_VMAF_RC - aom_close_vmaf_context_rc(vmaf_context); -#endif return unsharp_amount; } -#if CONFIG_USE_VMAF_RC void av1_vmaf_neg_preprocessing(AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source) { aom_clear_system_state(); @@ -452,7 +404,6 @@ aom_free_frame_buffer(&blurred); aom_clear_system_state(); } -#endif void av1_vmaf_frame_preprocessing(AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source) { @@ -654,93 +605,6 @@ aom_clear_system_state(); } -#if !CONFIG_USE_VMAF_RC -typedef struct FrameData { - const YV12_BUFFER_CONFIG *source, *blurred; - int block_w, block_h, num_rows, num_cols, row, col, bit_depth; -} FrameData; - -// A callback function used to pass data to VMAF. -// Returns 0 after reading a frame. -// Returns 2 when there is no more frame to read. -static int update_frame(float *ref_data, float *main_data, float *temp_data, - int stride, void *user_data) { - FrameData *frames = (FrameData *)user_data; - const int width = frames->source->y_width; - const int height = frames->source->y_height; - const int row = frames->row; - const int col = frames->col; - const int num_rows = frames->num_rows; - const int num_cols = frames->num_cols; - const int block_w = frames->block_w; - const int block_h = frames->block_h; - const YV12_BUFFER_CONFIG *source = frames->source; - const YV12_BUFFER_CONFIG *blurred = frames->blurred; - const int bit_depth = frames->bit_depth; - const float scale_factor = 1.0f / (float)(1 << (bit_depth - 8)); - (void)temp_data; - stride /= (int)sizeof(*ref_data); - - for (int i = 0; i < height; ++i) { - float *ref, *main; - ref = ref_data + i * stride; - main = main_data + i * stride; - if (source->flags & YV12_FLAG_HIGHBITDEPTH) { - uint16_t *src; - src = CONVERT_TO_SHORTPTR(source->y_buffer) + i * source->y_stride; - for (int j = 0; j < width; ++j) { - ref[j] = main[j] = scale_factor * (float)src[j]; - } - } else { - uint8_t *src; - src = source->y_buffer + i * source->y_stride; - for (int j = 0; j < width; ++j) { - ref[j] = main[j] = (float)src[j]; - } - } - } - if (row < num_rows && col < num_cols) { - // Set current block - const int row_offset = row * block_h; - const int col_offset = col * block_w; - const int block_width = AOMMIN(width - col_offset, block_w); - const int block_height = AOMMIN(height - row_offset, block_h); - - float *main_buf = main_data + col_offset + row_offset * stride; - if (source->flags & YV12_FLAG_HIGHBITDEPTH) { - uint16_t *blurred_buf = CONVERT_TO_SHORTPTR(blurred->y_buffer) + - row_offset * blurred->y_stride + col_offset; - for (int i = 0; i < block_height; ++i) { - for (int j = 0; j < block_width; ++j) { - main_buf[j] = scale_factor * (float)blurred_buf[j]; - } - main_buf += stride; - blurred_buf += blurred->y_stride; - } - } else { - uint8_t *blurred_buf = - blurred->y_buffer + row_offset * blurred->y_stride + col_offset; - for (int i = 0; i < block_height; ++i) { - for (int j = 0; j < block_width; ++j) { - main_buf[j] = (float)blurred_buf[j]; - } - main_buf += stride; - blurred_buf += blurred->y_stride; - } - } - - frames->col++; - if (frames->col >= num_cols) { - frames->col = 0; - frames->row++; - } - return 0; - } else { - return 2; - } -} -#endif - void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) { AV1_COMMON *cm = &cpi->common; const int y_width = cpi->source->y_width; @@ -776,7 +640,6 @@ cm->features.byte_alignment); gaussian_blur(bit_depth, &resized_source, &blurred); -#if CONFIG_USE_VMAF_RC YV12_BUFFER_CONFIG recon; memset(&recon, 0, sizeof(recon)); aom_alloc_frame_buffer(&recon, resized_y_width, resized_y_height, 1, 1, @@ -786,26 +649,11 @@ aom_yv12_copy_frame(&resized_source, &recon, 1); VmafContext *vmaf_context; - aom_init_vmaf_context_rc( - &vmaf_context, cpi->vmaf_info.vmaf_model, - cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN); -#else - double *scores = aom_malloc(sizeof(*scores) * (num_rows * num_cols)); - memset(scores, 0, sizeof(*scores) * (num_rows * num_cols)); - FrameData frame_data; - frame_data.source = &resized_source; - frame_data.blurred = &blurred; - frame_data.block_w = resized_block_w; - frame_data.block_h = resized_block_h; - frame_data.num_rows = num_rows; - frame_data.num_cols = num_cols; - frame_data.row = 0; - frame_data.col = 0; - frame_data.bit_depth = bit_depth; - aom_calc_vmaf_multi_frame(&frame_data, cpi->oxcf.tune_cfg.vmaf_model_path, - update_frame, resized_y_width, resized_y_height, - bit_depth, scores); -#endif + const bool cal_vmaf_neg = + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN; + aom_init_vmaf_context(&vmaf_context, cpi->vmaf_info.vmaf_model, cal_vmaf_neg); + unsigned int *sses = aom_malloc(sizeof(*sses) * (num_rows * num_cols)); + memset(sses, 0, sizeof(*sses) * (num_rows * num_cols)); // Loop through each 'block_size' block. for (int row = 0; row < num_rows; ++row) { @@ -820,11 +668,10 @@ uint8_t *const blurred_buf = blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y; - unsigned int sse; cpi->fn_ptr[resized_block_size].vf(orig_buf, resized_source.y_stride, - blurred_buf, blurred.y_stride, &sse); + blurred_buf, blurred.y_stride, + &sses[index]); -#if CONFIG_USE_VMAF_RC uint8_t *const recon_buf = recon.y_buffer + row_offset_y * recon.y_stride + col_offset_y; // Set recon buf @@ -839,10 +686,8 @@ resized_block_w, resized_block_h, 0.0); } - double vmaf; - aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, - &resized_source, &recon, bit_depth, index, - &vmaf); + aom_read_vmaf_image(vmaf_context, &resized_source, &recon, bit_depth, + index); // Restore recon buf if (cpi->common.seq_params.use_highbitdepth) { @@ -856,13 +701,18 @@ resized_source.y_stride, recon_buf, recon.y_stride, resized_block_w, resized_block_h, 0.0); } -#else - const double vmaf = scores[index]; -#endif + } + } + aom_flush_vmaf_context(vmaf_context); + for (int row = 0; row < num_rows; ++row) { + for (int col = 0; col < num_cols; ++col) { + const int index = row * num_cols + col; + const double vmaf = aom_calc_vmaf_at_index( + vmaf_context, cpi->vmaf_info.vmaf_model, index); const double dvmaf = kBaselineVmaf - vmaf; const double mse = - (double)sse / (double)(resized_y_width * resized_y_height); + (double)sses[index] / (double)(resized_y_width * resized_y_height); double weight; const double eps = 0.01 / (num_rows * num_cols); if (dvmaf < eps || mse < eps) { @@ -879,11 +729,8 @@ aom_free_frame_buffer(&resized_source); aom_free_frame_buffer(&blurred); -#if CONFIG_USE_VMAF_RC - aom_close_vmaf_context_rc(vmaf_context); -#else - aom_free(scores); -#endif + aom_close_vmaf_context(vmaf_context); + aom_free(sses); aom_clear_system_state(); } @@ -1094,23 +941,23 @@ return qindex; } -#if CONFIG_USE_VMAF_RC static AOM_INLINE double cal_approx_score( - AV1_COMP *const cpi, VmafContext *vmaf_context, int vmaf_cal_index, - double src_variance, double new_variance, double src_score, - YV12_BUFFER_CONFIG *const src, YV12_BUFFER_CONFIG *const recon_sharpened) { + AV1_COMP *const cpi, double src_variance, double new_variance, + double src_score, YV12_BUFFER_CONFIG *const src, + YV12_BUFFER_CONFIG *const recon_sharpened) { double score; const uint32_t bit_depth = cpi->td.mb.e_mbd.bd; - aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, src, - recon_sharpened, bit_depth, vmaf_cal_index, &score); + const bool cal_vmaf_neg = + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN; + aom_calc_vmaf(cpi->vmaf_info.vmaf_model, src, recon_sharpened, bit_depth, + cal_vmaf_neg, &score); return src_variance / new_variance * (score - src_score); } static double find_best_frame_unsharp_amount_loop_neg( - AV1_COMP *const cpi, VmafContext *vmaf_context, double src_variance, - double base_score, YV12_BUFFER_CONFIG *const src, - YV12_BUFFER_CONFIG *const recon, YV12_BUFFER_CONFIG *const ref, - YV12_BUFFER_CONFIG *const src_blurred, + AV1_COMP *const cpi, double src_variance, double base_score, + YV12_BUFFER_CONFIG *const src, YV12_BUFFER_CONFIG *const recon, + YV12_BUFFER_CONFIG *const ref, YV12_BUFFER_CONFIG *const src_blurred, YV12_BUFFER_CONFIG *const recon_blurred, YV12_BUFFER_CONFIG *const src_sharpened, YV12_BUFFER_CONFIG *const recon_sharpened, FULLPEL_MV *mvs, @@ -1120,7 +967,6 @@ int loop_count = 0; double approx_score = best_score; double unsharp_amount = unsharp_amount_start; - int vmaf_cal_index = 3; do { best_score = approx_score; @@ -1130,9 +976,8 @@ unsharp(cpi, src, src_blurred, src_sharpened, unsharp_amount); const double new_variance = residual_frame_average_variance(cpi, src_sharpened, ref, mvs); - approx_score = - cal_approx_score(cpi, vmaf_context, vmaf_cal_index++, src_variance, - new_variance, base_score, src, recon_sharpened); + approx_score = cal_approx_score(cpi, src_variance, new_variance, base_score, + src, recon_sharpened); loop_count++; } while (approx_score > best_score && loop_count < max_loop_count); @@ -1143,11 +988,11 @@ } static double find_best_frame_unsharp_amount_neg( - AV1_COMP *const cpi, VmafContext *vmaf_context, - YV12_BUFFER_CONFIG *const src, YV12_BUFFER_CONFIG *const recon, - YV12_BUFFER_CONFIG *const ref, double base_score, - const double unsharp_amount_start, const double step_size, - const int max_loop_count, const double max_filter_amount) { + AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const src, + YV12_BUFFER_CONFIG *const recon, YV12_BUFFER_CONFIG *const ref, + double base_score, const double unsharp_amount_start, + const double step_size, const int max_loop_count, + const double max_filter_amount) { FULLPEL_MV *mvs = NULL; const double src_variance = residual_frame_average_variance(cpi, src, ref, mvs); @@ -1181,32 +1026,28 @@ unsharp(cpi, src, &src_blurred, &src_sharpened, unsharp_amount_start); const double variance_start = residual_frame_average_variance(cpi, &src_sharpened, ref, mvs); - const double score_start = - cal_approx_score(cpi, vmaf_context, 1, src_variance, variance_start, - base_score, src, &recon_sharpened); + const double score_start = cal_approx_score( + cpi, src_variance, variance_start, base_score, src, &recon_sharpened); const double unsharp_amount_next = unsharp_amount_start + step_size; unsharp(cpi, recon, &recon_blurred, &recon_sharpened, unsharp_amount_next); unsharp(cpi, src, &src_blurred, &src_sharpened, unsharp_amount_next); const double variance_next = residual_frame_average_variance(cpi, &src_sharpened, ref, mvs); - const double score_next = - cal_approx_score(cpi, vmaf_context, 2, src_variance, variance_next, - base_score, src, &recon_sharpened); + const double score_next = cal_approx_score(cpi, src_variance, variance_next, + base_score, src, &recon_sharpened); double unsharp_amount; if (score_next > score_start) { unsharp_amount = find_best_frame_unsharp_amount_loop_neg( - cpi, vmaf_context, src_variance, base_score, src, recon, ref, - &src_blurred, &recon_blurred, &src_sharpened, &recon_sharpened, mvs, - score_next, unsharp_amount_next, step_size, max_loop_count, - max_filter_amount); + cpi, src_variance, base_score, src, recon, ref, &src_blurred, + &recon_blurred, &src_sharpened, &recon_sharpened, mvs, score_next, + unsharp_amount_next, step_size, max_loop_count, max_filter_amount); } else { unsharp_amount = find_best_frame_unsharp_amount_loop_neg( - cpi, vmaf_context, src_variance, base_score, src, recon, ref, - &src_blurred, &recon_blurred, &src_sharpened, &recon_sharpened, mvs, - score_start, unsharp_amount_start, -step_size, max_loop_count, - max_filter_amount); + cpi, src_variance, base_score, src, recon, ref, &src_blurred, + &recon_blurred, &src_sharpened, &recon_sharpened, mvs, score_start, + unsharp_amount_start, -step_size, max_loop_count, max_filter_amount); } aom_free_frame_buffer(&recon_sharpened); @@ -1216,7 +1057,6 @@ aom_free(mvs); return unsharp_amount; } -#endif // CONFIG_USE_VMAF_RC void av1_update_vmaf_curve(AV1_COMP *cpi) { YV12_BUFFER_CONFIG *source = cpi->source; @@ -1225,19 +1065,12 @@ const GF_GROUP *const gf_group = &cpi->gf_group; const int layer_depth = AOMMIN(gf_group->layer_depth[gf_group->index], MAX_ARF_LAYERS - 1); -#if CONFIG_USE_VMAF_RC double base_score; - VmafContext *vmaf_context; - aom_init_vmaf_context_rc( - &vmaf_context, cpi->vmaf_info.vmaf_model, - cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN); - aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, source, - recon, bit_depth, 0, &base_score); + const bool cal_vmaf_neg = + cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN; + aom_calc_vmaf(cpi->vmaf_info.vmaf_model, source, recon, bit_depth, + cal_vmaf_neg, &base_score); cpi->vmaf_info.last_frame_vmaf[layer_depth] = base_score; -#else - aom_calc_vmaf(cpi->oxcf.tune_cfg.vmaf_model_path, source, recon, bit_depth, - &cpi->vmaf_info.last_frame_vmaf[layer_depth]); -#endif // CONFIG_USE_VMAF_RC if (cpi->common.seq_params.use_highbitdepth) { assert(source->flags & YV12_FLAG_HIGHBITDEPTH); assert(recon->flags & YV12_FLAG_HIGHBITDEPTH); @@ -1248,7 +1081,6 @@ (double)aom_get_y_sse(source, recon); } -#if CONFIG_USE_VMAF_RC if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) { YV12_BUFFER_CONFIG *last, *next; get_neighbor_frames(cpi, &last, &next); @@ -1256,10 +1088,8 @@ get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth); const int max_loop_count = 5; cpi->vmaf_info.last_frame_unsharp_amount[layer_depth] = - find_best_frame_unsharp_amount_neg( - cpi, vmaf_context, source, recon, last, base_score, - best_unsharp_amount_start, 0.025, max_loop_count, 1.01); + find_best_frame_unsharp_amount_neg(cpi, source, recon, last, base_score, + best_unsharp_amount_start, 0.025, + max_loop_count, 1.01); } - aom_close_vmaf_context_rc(vmaf_context); -#endif // CONFIG_USE_VMAF_RC }
diff --git a/av1/encoder/tune_vmaf.h b/av1/encoder/tune_vmaf.h index 01c3068..4625fb9 100644 --- a/av1/encoder/tune_vmaf.h +++ b/av1/encoder/tune_vmaf.h
@@ -36,10 +36,8 @@ // Stores the origial qindex before scaling. int original_qindex; -#if CONFIG_USE_VMAF_RC // VMAF model used in VMAF caculations. VmafModel *vmaf_model; -#endif } TuneVMAFInfo; typedef struct AV1_COMP AV1_COMP; @@ -48,9 +46,7 @@ void av1_vmaf_frame_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source); -#ifdef CONFIG_USE_VMAF_RC void av1_vmaf_neg_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source); -#endif void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi);
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake index 2680b91..e075d49 100644 --- a/build/cmake/aom_config_defaults.cmake +++ b/build/cmake/aom_config_defaults.cmake
@@ -115,7 +115,6 @@ set_aom_config_var(DECODE_HEIGHT_LIMIT 0 "Set limit for decode height.") set_aom_config_var(DECODE_WIDTH_LIMIT 0 "Set limit for decode width.") set_aom_config_var(CONFIG_TUNE_VMAF 0 "Enable encoding tuning for VMAF.") -set_aom_config_var(CONFIG_USE_VMAF_RC 0 "Use libvmaf_rc tune for VMAF_NEG.") set_aom_config_var(CONFIG_TUNE_BUTTERAUGLI 0 "Enable encoding tuning for Butteraugli.") set_aom_config_var(STATIC_LINK_JXL 0 "Statically link the JPEG-XL library.")