Speed up VMAF pre-processing
by using last frame's filter strength as the initial guess.
VMAF BD-rate changes
--------------------
MODE midres hdres lowres_10bd midres_10bd
VMAF -0.50% 0.06% -1.32% 0.01%
VMAF_W_PP -1.05% -0.35% -0.62% -0.70%
Compression speed (instruction count) of different modes
--------------------------------------------------------
MODE 480P 1080P
PSNR (baseline) 1.00x 1.00x
VMAF (before) 1.50x 4.14x
VMAF (after) 1.49x 3.87x
VMAF_W_PP (before) 1.21x 1.45x
VMAF_W_PP (after) 1.16x 1.15x
VMAF_WO_PP 1.19x 3.36x
Full speed test results
-----------------------
Performance counter stats for './aomenc red_kayak_480p.y4m --limit=30
-o output_new --tune=psnr --cpu-used=1':
642,437,621,503 instructions:u
84.889688323 seconds time elapsed
84.588830000 seconds user
0.291975000 seconds sys
Performance counter stats for './aomenc red_kayak_480p.y4m --limit=30
-o output_new --tune=vmaf_with_preprocessing --cpu-used=1':
Before:
775,025,691,607 instructions:u
109.458205579 seconds time elapsed
107.989239000 seconds user
4.284554000 seconds sys
After:
747,907,904,154 instructions:u
105.304168262 seconds time elapsed
104.205238000 seconds user
3.659274000 seconds sys
Performance counter stats for './aomenc red_kayak_480p.y4m --limit=30
-o output_new --tune=vmaf --cpu-used=1':
Before:
964,828,402,519 instructions:u
123.807091424 seconds time elapsed
161.892974000 seconds user
42.540103000 seconds sys
After:
958,203,027,249 instructions:u
166.730655440 seconds time elapsed
172.417289000 seconds user
96.809565000 seconds sys
Performance counter stats for './aomenc red_kayak_480p.y4m --limit=30
-o output_new --tune=vmaf_without_preprocessing --cpu-used=1':
762,965,561,826 instructions:u
79.566220976 seconds time elapsed
118.461060000 seconds user
4.023039000 seconds sys
------------------
Performance counter stats for './aomenc basketballdrive_1080p50.y4m
--limit=30 -o output_new --tune=psnr --cpu-used=1':
1,582,885,665,086 instructions:u
204.687949181 seconds time elapsed
203.775777000 seconds user
0.811887000 seconds sys
Performance counter stats for './aomenc basketballdrive_1080p50.y4m
--limit=30 -o output_new --tune=vmaf_with_preprocessing
--cpu-used=1':
Before:
2,298,587,912,062 instructions:u
361.306249556 seconds time elapsed
326.457704000 seconds user
41.029086000 seconds sys
After:
1,821,777,148,459 instructions:u
237.958643869 seconds time elapsed
233.000579000 seconds user
6.904995000 seconds sys
Performance counter stats for './aomenc basketballdrive_1080p50.y4m
--limit=30 -o output_new --tune=vmaf --cpu-used=1':
Before:
6,543,459,539,854 instructions:u
553.099946250 seconds time elapsed
1257.052953000 seconds user
250.151702000 seconds sys
After:
6,118,499,090,110 instructions:u
610.973700082 seconds time elapsed
1254.925749000 seconds user
439.220760000 seconds sys
Performance counter stats for './aomenc basketballdrive_1080p50.y4m
--limit=30 -o output_new --tune=vmaf_without_preprocessing
--cpu-used=1':
5,311,331,047,231 instructions:u
247.754365865 seconds time elapsed
984.609408000 seconds user
12.773107000 seconds sys
Change-Id: Ifdcfbccdd976ba537f23cbbfebe144402fa032c6
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 2aa9a3e..ee104e6 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3194,6 +3194,7 @@
CHECK_MEM_ERROR(cm, cpi->vmaf_rdmult_scaling_factors,
aom_calloc(num_rows * num_cols,
sizeof(*cpi->vmaf_rdmult_scaling_factors)));
+ cpi->last_frame_unsharp_amount = 0.0;
}
#endif
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 29e8f68..63c3b38 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1131,6 +1131,7 @@
double *vmaf_rdmult_scaling_factors;
double last_frame_ysse;
double last_frame_vmaf;
+ double last_frame_unsharp_amount;
#endif
int use_svc;
diff --git a/av1/encoder/tune_vmaf.c b/av1/encoder/tune_vmaf.c
index 6a5ea75..72fd846 100644
--- a/av1/encoder/tune_vmaf.c
+++ b/av1/encoder/tune_vmaf.c
@@ -152,16 +152,51 @@
return var;
}
+static double cal_approx_vmaf(const AV1_COMP *const cpi, double source_variance,
+ YV12_BUFFER_CONFIG *const source,
+ YV12_BUFFER_CONFIG *const sharpened) {
+ const int bit_depth = cpi->td.mb.e_mbd.bd;
+ double new_vmaf;
+ aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, sharpened, bit_depth,
+ &new_vmaf);
+ const double sharpened_var = frame_average_variance(cpi, sharpened);
+ return source_variance / sharpened_var * (new_vmaf - kBaselineVmaf);
+}
+
+static double find_best_frame_unsharp_amount_loop(
+ const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source,
+ YV12_BUFFER_CONFIG *const blurred, YV12_BUFFER_CONFIG *const sharpened,
+ double best_vmaf, const double baseline_variance,
+ const double unsharp_amount_start, const double step_size,
+ const int max_loop_count, const double max_amount) {
+ const double min_amount = 0.0;
+ int loop_count = 0;
+ double approx_vmaf = best_vmaf;
+ double unsharp_amount = unsharp_amount_start;
+ do {
+ best_vmaf = approx_vmaf;
+ unsharp_amount += step_size;
+ if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
+ unsharp(cpi, source, blurred, sharpened, unsharp_amount);
+ approx_vmaf = cal_approx_vmaf(cpi, baseline_variance, source, sharpened);
+
+ loop_count++;
+ } while (approx_vmaf > best_vmaf && loop_count < max_loop_count);
+ unsharp_amount =
+ approx_vmaf > best_vmaf ? unsharp_amount : unsharp_amount - step_size;
+ return AOMMIN(max_amount, AOMMAX(unsharp_amount, min_amount));
+}
+
static double find_best_frame_unsharp_amount(const AV1_COMP *const cpi,
YV12_BUFFER_CONFIG *const source,
YV12_BUFFER_CONFIG *const blurred,
const double unsharp_amount_start,
const double step_size,
- const int max_loop_count) {
+ const int max_loop_count,
+ const double max_filter_amount) {
const AV1_COMMON *const cm = &cpi->common;
const int width = source->y_width;
const int height = source->y_height;
- const int bit_depth = cpi->td.mb.e_mbd.bd;
YV12_BUFFER_CONFIG sharpened;
memset(&sharpened, 0, sizeof(sharpened));
@@ -170,27 +205,36 @@
cpi->oxcf.border_in_pixels, cm->byte_alignment);
const double baseline_variance = frame_average_variance(cpi, source);
- int loop_count = 0;
- double approx_vmaf = 0.0;
- double best_vmaf, new_vmaf, unsharp_amount = unsharp_amount_start;
- do {
- best_vmaf = approx_vmaf;
- unsharp_amount += step_size;
- unsharp(cpi, source, blurred, &sharpened, unsharp_amount);
- aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, &sharpened, bit_depth,
- &new_vmaf);
- const double sharpened_var = frame_average_variance(cpi, &sharpened);
- approx_vmaf =
- baseline_variance / sharpened_var * (new_vmaf - kBaselineVmaf);
-
- loop_count++;
- } while (approx_vmaf > best_vmaf && loop_count < max_loop_count);
+ double unsharp_amount;
+ if (unsharp_amount_start <= step_size) {
+ unsharp_amount = find_best_frame_unsharp_amount_loop(
+ cpi, source, blurred, &sharpened, 0.0, baseline_variance, 0.0,
+ step_size, max_loop_count, max_filter_amount);
+ } else {
+ double a0 = unsharp_amount_start - step_size, a1 = unsharp_amount_start;
+ double v0, v1;
+ unsharp(cpi, source, blurred, &sharpened, a0);
+ v0 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
+ unsharp(cpi, source, blurred, &sharpened, a1);
+ v1 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
+ if (fabs(v0 - v1) < 0.01) {
+ unsharp_amount = a0;
+ } else if (v0 > v1) {
+ unsharp_amount = find_best_frame_unsharp_amount_loop(
+ cpi, source, blurred, &sharpened, v0, baseline_variance, a0,
+ -step_size, max_loop_count, max_filter_amount);
+ } else {
+ unsharp_amount = find_best_frame_unsharp_amount_loop(
+ cpi, source, blurred, &sharpened, v1, baseline_variance, a1,
+ step_size, max_loop_count, max_filter_amount);
+ }
+ }
aom_free_frame_buffer(&sharpened);
- return approx_vmaf > best_vmaf ? unsharp_amount : unsharp_amount - step_size;
+ return unsharp_amount;
}
-void av1_vmaf_frame_preprocessing(const AV1_COMP *const cpi,
+void av1_vmaf_frame_preprocessing(AV1_COMP *const cpi,
YV12_BUFFER_CONFIG *const source) {
aom_clear_system_state();
const AV1_COMMON *const cm = &cpi->common;
@@ -212,15 +256,16 @@
gaussian_blur(bit_depth, &source_extended, &blurred);
aom_free_frame_buffer(&source_extended);
- const double best_frame_unsharp_amount =
- find_best_frame_unsharp_amount(cpi, source, &blurred, 0.0, 0.05, 20);
+ const double best_frame_unsharp_amount = find_best_frame_unsharp_amount(
+ cpi, source, &blurred, cpi->last_frame_unsharp_amount, 0.05, 20, 1.01);
+ cpi->last_frame_unsharp_amount = best_frame_unsharp_amount;
unsharp(cpi, source, &blurred, source, best_frame_unsharp_amount);
aom_free_frame_buffer(&blurred);
aom_clear_system_state();
}
-void av1_vmaf_blk_preprocessing(const AV1_COMP *const cpi,
+void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
YV12_BUFFER_CONFIG *const source) {
aom_clear_system_state();
const AV1_COMMON *const cm = &cpi->common;
@@ -242,8 +287,9 @@
gaussian_blur(bit_depth, &source_extended, &blurred);
aom_free_frame_buffer(&source_extended);
- const double best_frame_unsharp_amount =
- find_best_frame_unsharp_amount(cpi, source, &blurred, 0.0, 0.05, 20);
+ const double best_frame_unsharp_amount = find_best_frame_unsharp_amount(
+ cpi, source, &blurred, cpi->last_frame_unsharp_amount, 0.05, 20, 1.01);
+ cpi->last_frame_unsharp_amount = best_frame_unsharp_amount;
const int block_size = BLOCK_64X64;
const int block_w = mi_size_wide[block_size] * 4;
@@ -325,9 +371,9 @@
}
}
- const double amount_start = AOMMAX(best_frame_unsharp_amount - 0.2, 0.0);
best_unsharp_amounts[index] = find_best_frame_unsharp_amount(
- cpi, &source_block, &blurred_block, amount_start, 0.1, 5);
+ cpi, &source_block, &blurred_block, best_frame_unsharp_amount, 0.1, 3,
+ 1.5);
}
}
diff --git a/av1/encoder/tune_vmaf.h b/av1/encoder/tune_vmaf.h
index 5ed14b7..c4cf072 100644
--- a/av1/encoder/tune_vmaf.h
+++ b/av1/encoder/tune_vmaf.h
@@ -15,11 +15,9 @@
#include "aom_scale/yv12config.h"
#include "av1/encoder/encoder.h"
-void av1_vmaf_blk_preprocessing(const AV1_COMP *cpi,
- YV12_BUFFER_CONFIG *source);
+void av1_vmaf_blk_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source);
-void av1_vmaf_frame_preprocessing(const AV1_COMP *cpi,
- YV12_BUFFER_CONFIG *source);
+void av1_vmaf_frame_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source);
void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi);