Speed up VMAF calculations when using libvmaf_rc
by reducing the number of vmaf_context initializations. 4% encoding
time savings in the 1080p test (30f, sp1).
40% User time and 96% Sys time savings compare to encoding with libvmaf.
However the Real time is slower due to multithreading not enabled.
Performance counter stats for './aomenc_old basketballdrive_1080p50.y4m
--limit=30 -o output --tune=vmaf --cpu-used=1':
libvmaf
441.762382891 seconds time elapsed
1422.326568000 seconds user
51.411417000 seconds sys
libvmaf_rc
843.705030306 seconds time elapsed
841.818651000 seconds user
1.819919000 seconds sys
Change-Id: I5e29aff63ccd3fdfe268ef299b06df5a7126842c
(cherry picked from commit b2a3ecb3d95c036f9a9810ea716080cf4c2c9f34)
diff --git a/aom_dsp/vmaf.c b/aom_dsp/vmaf.c
index 35970f3..636aa64 100644
--- a/aom_dsp/vmaf.c
+++ b/aom_dsp/vmaf.c
@@ -169,7 +169,7 @@
#endif
#if CONFIG_USE_VMAF_RC
-void aom_init_vmaf_rc(VmafModel **vmaf_model, const char *model_path) {
+void aom_init_vmaf_model_rc(VmafModel **vmaf_model, const char *model_path) {
if (*vmaf_model != NULL) return;
VmafModelConfig model_cfg;
model_cfg.flags = VMAF_MODEL_FLAG_DISABLE_CLIP;
@@ -181,7 +181,7 @@
}
}
-void aom_close_vmaf_rc(VmafModel *vmaf_model) {
+void aom_close_vmaf_model_rc(VmafModel *vmaf_model) {
vmaf_model_destroy(vmaf_model);
}
@@ -211,38 +211,53 @@
}
}
-void aom_calc_vmaf_rc(VmafModel *vmaf_model, const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *distorted, int bit_depth,
- int cal_vmaf_neg, double *vmaf) {
+void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model,
+ bool cal_vmaf_neg) {
VmafConfiguration cfg;
cfg.log_level = VMAF_LOG_LEVEL_NONE;
cfg.n_threads = 0;
cfg.n_subsample = 0;
cfg.cpumask = 0;
- VmafContext *vmaf_context;
- if (vmaf_init(&vmaf_context, cfg)) {
+ if (vmaf_init(vmaf_context, cfg)) {
vmaf_fatal_error("Failed to init VMAF context.");
}
- if (vmaf_use_features_from_model(vmaf_context, vmaf_model)) {
+ if (vmaf_use_features_from_model(*vmaf_context, vmaf_model)) {
vmaf_fatal_error("Failed to load feature extractors from VMAF model.");
}
if (cal_vmaf_neg) {
VmafFeatureDictionary *vif_feature = NULL;
vmaf_feature_dictionary_set(&vif_feature, "vif_enhn_gain_limit", "1.0");
- if (vmaf_use_feature(vmaf_context, "float_vif", vif_feature)) {
+ if (vmaf_use_feature(*vmaf_context, "float_vif", vif_feature)) {
vmaf_fatal_error("Failed to use feature float_vif.");
}
VmafFeatureDictionary *adm_feature = NULL;
vmaf_feature_dictionary_set(&adm_feature, "adm_enhn_gain_limit", "1.0");
- if (vmaf_use_feature(vmaf_context, "float_adm", adm_feature)) {
+ if (vmaf_use_feature(*vmaf_context, "float_adm", adm_feature)) {
vmaf_fatal_error("Failed to use feature float_adm.");
}
}
+ VmafFeatureDictionary *motion_force_zero = NULL;
+ vmaf_feature_dictionary_set(&motion_force_zero, "motion_force_zero", "true");
+ if (vmaf_use_feature(*vmaf_context, "float_motion", motion_force_zero)) {
+ vmaf_fatal_error("Failed to use feature float_motion.");
+ }
+}
+
+void aom_close_vmaf_context_rc(VmafContext *vmaf_context) {
+ if (vmaf_close(vmaf_context)) {
+ vmaf_fatal_error("Failed to close VMAF context.");
+ }
+}
+
+void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model,
+ const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *distorted,
+ int bit_depth, int frame_index, double *vmaf) {
VmafPicture ref, dist;
if (vmaf_picture_alloc(&ref, VMAF_PIX_FMT_YUV420P, bit_depth, source->y_width,
source->y_height) ||
@@ -252,18 +267,15 @@
}
copy_picture(bit_depth, source, &ref);
copy_picture(bit_depth, distorted, &dist);
- if (vmaf_read_pictures(vmaf_context, &ref, &dist, /*picture index=*/0)) {
+ if (vmaf_read_pictures(vmaf_context, &ref, &dist,
+ /*picture index=*/frame_index)) {
vmaf_fatal_error("Failed to read VMAF pictures.");
}
vmaf_picture_unref(&ref);
vmaf_picture_unref(&dist);
- vmaf_score_at_index(vmaf_context, vmaf_model, vmaf, 0);
-
- if (vmaf_close(vmaf_context)) {
- vmaf_fatal_error("Failed to close VMAF context.");
- }
+ vmaf_score_at_index(vmaf_context, vmaf_model, vmaf, frame_index);
}
#endif // CONFIG_USE_VMAF_RC
diff --git a/aom_dsp/vmaf.h b/aom_dsp/vmaf.h
index 607d4bd..65ba199 100644
--- a/aom_dsp/vmaf.h
+++ b/aom_dsp/vmaf.h
@@ -12,6 +12,7 @@
#ifndef AOM_AOM_DSP_VMAF_H_
#define AOM_AOM_DSP_VMAF_H_
+#include <stdbool.h>
#include "aom_scale/yv12config.h"
#if CONFIG_USE_VMAF_RC
@@ -47,13 +48,17 @@
} TuneVMAFInfo;
#if CONFIG_USE_VMAF_RC
-void aom_init_vmaf_rc(VmafModel **vmaf_model, const char *model_path);
+void aom_init_vmaf_context_rc(VmafContext **vmaf_context, VmafModel *vmaf_model,
+ bool cal_vmaf_neg);
+void aom_close_vmaf_context_rc(VmafContext *vmaf_context);
-void aom_calc_vmaf_rc(VmafModel *vmaf_model, const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *distorted, int bit_depth,
- int cal_vmaf_neg, double *vmaf);
+void aom_init_vmaf_model_rc(VmafModel **vmaf_model, const char *model_path);
+void aom_close_vmaf_model_rc(VmafModel *vmaf_model);
-void aom_close_vmaf_rc(VmafModel *vmaf_model);
+void aom_calc_vmaf_at_index_rc(VmafContext *vmaf_context, VmafModel *vmaf_model,
+ const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *distorted,
+ int bit_depth, int frame_index, double *vmaf);
#else
void aom_calc_vmaf(const char *model_path, const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *distorted, int bit_depth,
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 838229a..bb8fb9a 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -2453,8 +2453,8 @@
}
#if CONFIG_USE_VMAF_RC
- aom_init_vmaf_rc(&cpi->vmaf_info.vmaf_model,
- cpi->oxcf.tune_cfg.vmaf_model_path);
+ aom_init_vmaf_model_rc(&cpi->vmaf_info.vmaf_model,
+ cpi->oxcf.tune_cfg.vmaf_model_path);
#endif
// Handle fixed keyframe intervals
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 4f84710..b60ae8d 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -240,7 +240,7 @@
cpi->vmaf_info.rdmult_scaling_factors = NULL;
#if CONFIG_USE_VMAF_RC
- aom_close_vmaf_rc(cpi->vmaf_info.vmaf_model);
+ aom_close_vmaf_model_rc(cpi->vmaf_info.vmaf_model);
#endif
#endif
diff --git a/av1/encoder/tune_vmaf.c b/av1/encoder/tune_vmaf.c
index ec63379..08c3d4e 100644
--- a/av1/encoder/tune_vmaf.c
+++ b/av1/encoder/tune_vmaf.c
@@ -158,16 +158,20 @@
return var;
}
-static double cal_approx_vmaf(const AV1_COMP *const cpi, double source_variance,
+static double cal_approx_vmaf(const AV1_COMP *const cpi,
+#if CONFIG_USE_VMAF_RC
+ VmafContext *vmaf_context, int *vmaf_cal_index,
+#endif
+ double source_variance,
YV12_BUFFER_CONFIG *const source,
YV12_BUFFER_CONFIG *const sharpened) {
const int bit_depth = cpi->td.mb.e_mbd.bd;
double new_vmaf;
#if CONFIG_USE_VMAF_RC
- aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, source, sharpened, bit_depth,
- cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN,
- &new_vmaf);
+ aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, source,
+ sharpened, bit_depth, *vmaf_cal_index, &new_vmaf);
+ (*vmaf_cal_index)++;
#else
aom_calc_vmaf(cpi->oxcf.tune_cfg.vmaf_model_path, source, sharpened,
bit_depth, &new_vmaf);
@@ -178,11 +182,14 @@
}
static double find_best_frame_unsharp_amount_loop(
- const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source,
- YV12_BUFFER_CONFIG *const blurred, YV12_BUFFER_CONFIG *const sharpened,
- double best_vmaf, const double baseline_variance,
- const double unsharp_amount_start, const double step_size,
- const int max_loop_count, const double max_amount) {
+ const AV1_COMP *const cpi,
+#if CONFIG_USE_VMAF_RC
+ VmafContext *vmaf_context, int *vmaf_cal_index,
+#endif
+ YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
+ YV12_BUFFER_CONFIG *const sharpened, double best_vmaf,
+ const double baseline_variance, const double unsharp_amount_start,
+ const double step_size, const int max_loop_count, const double max_amount) {
const double min_amount = 0.0;
int loop_count = 0;
double approx_vmaf = best_vmaf;
@@ -192,7 +199,11 @@
unsharp_amount += step_size;
if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
unsharp(cpi, source, blurred, sharpened, unsharp_amount);
- approx_vmaf = cal_approx_vmaf(cpi, baseline_variance, source, sharpened);
+ approx_vmaf = cal_approx_vmaf(cpi,
+#if CONFIG_USE_VMAF_RC
+ vmaf_context, vmaf_cal_index,
+#endif
+ baseline_variance, source, sharpened);
loop_count++;
} while (approx_vmaf > best_vmaf && loop_count < max_loop_count);
@@ -211,7 +222,13 @@
const AV1_COMMON *const cm = &cpi->common;
const int width = source->y_width;
const int height = source->y_height;
-
+#if CONFIG_USE_VMAF_RC
+ VmafContext *vmaf_context;
+ aom_init_vmaf_context_rc(
+ &vmaf_context, cpi->vmaf_info.vmaf_model,
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN);
+ int vmaf_cal_index = 0;
+#endif
YV12_BUFFER_CONFIG sharpened;
memset(&sharpened, 0, sizeof(sharpened));
aom_alloc_frame_buffer(
@@ -222,29 +239,52 @@
double unsharp_amount;
if (unsharp_amount_start <= step_size) {
unsharp_amount = find_best_frame_unsharp_amount_loop(
- cpi, source, blurred, &sharpened, 0.0, baseline_variance, 0.0,
- step_size, max_loop_count, max_filter_amount);
+ cpi,
+#if CONFIG_USE_VMAF_RC
+ vmaf_context, &vmaf_cal_index,
+#endif
+ source, blurred, &sharpened, 0.0, baseline_variance, 0.0, step_size,
+ max_loop_count, max_filter_amount);
} else {
double a0 = unsharp_amount_start - step_size, a1 = unsharp_amount_start;
double v0, v1;
unsharp(cpi, source, blurred, &sharpened, a0);
- v0 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
+ v0 = cal_approx_vmaf(cpi,
+#if CONFIG_USE_VMAF_RC
+ vmaf_context, &vmaf_cal_index,
+#endif
+ baseline_variance, source, &sharpened);
unsharp(cpi, source, blurred, &sharpened, a1);
- v1 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
+ v1 = cal_approx_vmaf(cpi,
+#if CONFIG_USE_VMAF_RC
+ vmaf_context, &vmaf_cal_index,
+#endif
+ baseline_variance, source, &sharpened);
if (fabs(v0 - v1) < 0.01) {
unsharp_amount = a0;
} else if (v0 > v1) {
unsharp_amount = find_best_frame_unsharp_amount_loop(
- cpi, source, blurred, &sharpened, v0, baseline_variance, a0,
- -step_size, max_loop_count, max_filter_amount);
+ cpi,
+#if CONFIG_USE_VMAF_RC
+ vmaf_context, &vmaf_cal_index,
+#endif
+ source, blurred, &sharpened, v0, baseline_variance, a0, -step_size,
+ max_loop_count, max_filter_amount);
} else {
unsharp_amount = find_best_frame_unsharp_amount_loop(
- cpi, source, blurred, &sharpened, v1, baseline_variance, a1,
- step_size, max_loop_count, max_filter_amount);
+ cpi,
+#if CONFIG_USE_VMAF_RC
+ vmaf_context, &vmaf_cal_index,
+#endif
+ source, blurred, &sharpened, v1, baseline_variance, a1, step_size,
+ max_loop_count, max_filter_amount);
}
}
aom_free_frame_buffer(&sharpened);
+#if CONFIG_USE_VMAF_RC
+ aom_close_vmaf_context_rc(vmaf_context);
+#endif
return unsharp_amount;
}
@@ -588,6 +628,11 @@
cpi->oxcf.border_in_pixels,
cm->features.byte_alignment);
aom_yv12_copy_frame(&resized_source, &recon, 1);
+
+ VmafContext *vmaf_context;
+ aom_init_vmaf_context_rc(
+ &vmaf_context, cpi->vmaf_info.vmaf_model,
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN);
#else
double *scores = aom_malloc(sizeof(*scores) * (num_rows * num_cols));
memset(scores, 0, sizeof(*scores) * (num_rows * num_cols));
@@ -639,9 +684,9 @@
}
double vmaf;
- aom_calc_vmaf_rc(
- cpi->vmaf_info.vmaf_model, &resized_source, &recon, bit_depth,
- cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN, &vmaf);
+ aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model,
+ &resized_source, &recon, bit_depth, index,
+ &vmaf);
// Restore recon buf
if (cpi->common.seq_params.use_highbitdepth) {
@@ -678,7 +723,9 @@
aom_free_frame_buffer(&resized_source);
aom_free_frame_buffer(&blurred);
-#if !CONFIG_USE_VMAF_RC
+#if CONFIG_USE_VMAF_RC
+ aom_close_vmaf_context_rc(vmaf_context);
+#else
aom_free(scores);
#endif
aom_clear_system_state();
@@ -879,31 +926,34 @@
#if CONFIG_USE_VMAF_RC
static double cal_approx_score(const AV1_COMP *const cpi,
+ VmafContext *vmaf_context, int vmaf_cal_index,
YV12_BUFFER_CONFIG *const ref,
YV12_BUFFER_CONFIG *const sharpened) {
double score;
const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
- aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, ref, sharpened, bit_depth, 1,
- &score);
+ aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, ref,
+ sharpened, bit_depth, vmaf_cal_index, &score);
return score;
}
static double find_best_frame_unsharp_amount_loop_neg(
- const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const ref,
- YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
- YV12_BUFFER_CONFIG *const sharpened, double best_score,
- const double unsharp_amount_start, const double step_size,
- const int max_loop_count, const double max_amount) {
+ const AV1_COMP *const cpi, VmafContext *vmaf_context,
+ YV12_BUFFER_CONFIG *const ref, YV12_BUFFER_CONFIG *const source,
+ YV12_BUFFER_CONFIG *const blurred, YV12_BUFFER_CONFIG *const sharpened,
+ double best_score, const double unsharp_amount_start,
+ const double step_size, const int max_loop_count, const double max_amount) {
const double min_amount = 0.0;
int loop_count = 0;
double approx_score = best_score;
double unsharp_amount = unsharp_amount_start;
+ int vmaf_cal_index = 2;
do {
best_score = approx_score;
unsharp_amount += step_size;
if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
unsharp(cpi, source, blurred, sharpened, unsharp_amount);
- approx_score = cal_approx_score(cpi, ref, sharpened);
+ approx_score =
+ cal_approx_score(cpi, vmaf_context, vmaf_cal_index++, ref, sharpened);
loop_count++;
} while (approx_score > best_score && loop_count < max_loop_count);
@@ -913,17 +963,18 @@
}
static double find_best_frame_unsharp_amount_neg(
- const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const ref,
- YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
- const double unsharp_amount_start, const double step_size,
- const int max_loop_count, const double max_filter_amount) {
+ const AV1_COMP *const cpi, VmafContext *vmaf_context,
+ YV12_BUFFER_CONFIG *const ref, YV12_BUFFER_CONFIG *const source,
+ YV12_BUFFER_CONFIG *const blurred, const double unsharp_amount_start,
+ const double step_size, const int max_loop_count,
+ const double max_filter_amount) {
const AV1_COMMON *const cm = &cpi->common;
const int width = source->y_width;
const int height = source->y_height;
double best_score = 0.0;
- aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, ref, source, cpi->td.mb.e_mbd.bd,
- 1, &best_score);
+ aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, ref,
+ source, cpi->td.mb.e_mbd.bd, 1, &best_score);
YV12_BUFFER_CONFIG sharpened;
memset(&sharpened, 0, sizeof(sharpened));
aom_alloc_frame_buffer(
@@ -931,8 +982,8 @@
cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
const double unsharp_amount = find_best_frame_unsharp_amount_loop_neg(
- cpi, ref, source, blurred, &sharpened, best_score, unsharp_amount_start,
- step_size, max_loop_count, max_filter_amount);
+ cpi, vmaf_context, ref, source, blurred, &sharpened, best_score,
+ unsharp_amount_start, step_size, max_loop_count, max_filter_amount);
aom_free_frame_buffer(&sharpened);
return unsharp_amount;
@@ -943,9 +994,13 @@
YV12_BUFFER_CONFIG *recon) {
const int bit_depth = cpi->td.mb.e_mbd.bd;
#if CONFIG_USE_VMAF_RC
- aom_calc_vmaf_rc(cpi->vmaf_info.vmaf_model, source, recon, bit_depth,
- cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN,
- &cpi->vmaf_info.last_frame_vmaf);
+ VmafContext *vmaf_context;
+ aom_init_vmaf_context_rc(
+ &vmaf_context, cpi->vmaf_info.vmaf_model,
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN);
+ aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, source,
+ recon, bit_depth, 0,
+ &cpi->vmaf_info.last_frame_vmaf);
#else
aom_calc_vmaf(cpi->oxcf.tune_cfg.vmaf_model_path, source, recon, bit_depth,
&cpi->vmaf_info.last_frame_vmaf);
@@ -973,9 +1028,10 @@
gaussian_blur(bit_depth, recon, &blurred);
cpi->vmaf_info.best_unsharp_amount = find_best_frame_unsharp_amount_neg(
- cpi, source, recon, &blurred, 0.0, 0.025, 20, 1.01);
+ cpi, vmaf_context, source, recon, &blurred, 0.0, 0.025, 20, 1.01);
aom_free_frame_buffer(&blurred);
}
+ aom_close_vmaf_context_rc(vmaf_context);
#endif
}