Add block based VMAF preprocessing
Comparing to frame based preprocessing, the additional VMAF gains are:
150f/SP1/VBR
ugc360p -2.38%
midres -1.21%
hdres(95% done) -5.37%
However due to the slow VMAF computations, this method is not enabled
at the moment.
Change-Id: I281a79f24e2a68afadabae1de46e38d6d522bc7b
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 00da360..ac4cfcf 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -6242,7 +6242,7 @@
#if CONFIG_TUNE_VMAF
if (!is_stat_generation_stage(cpi) &&
cpi->oxcf.tuning == AOM_TUNE_VMAF_WITH_PREPROCESSING) {
- av1_vmaf_preprocessing(cpi, sd);
+ av1_vmaf_preprocessing(cpi, sd, false);
}
#endif
diff --git a/av1/encoder/tune_vmaf.c b/av1/encoder/tune_vmaf.c
index df37884..4bd086b 100644
--- a/av1/encoder/tune_vmaf.c
+++ b/av1/encoder/tune_vmaf.c
@@ -16,22 +16,28 @@
#include "av1/encoder/extend.h"
// TODO(sdeng): Add the SIMD implementation.
+static AOM_INLINE void unsharp_rect(const uint8_t *source, int source_stride,
+ const uint8_t *blurred, int blurred_stride,
+ uint8_t *dst, int dst_stride, int w, int h,
+ double amount) {
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; ++j) {
+ const double val =
+ (double)source[j] + amount * ((double)source[j] - (double)blurred[j]);
+ dst[j] = (uint8_t)clamp((int)(val + 0.5), 0, 255);
+ }
+ source += source_stride;
+ blurred += blurred_stride;
+ dst += dst_stride;
+ }
+}
+
static AOM_INLINE void unsharp(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *blurred,
const YV12_BUFFER_CONFIG *dst, double amount) {
- uint8_t *src = source->y_buffer;
- uint8_t *blur = blurred->y_buffer;
- uint8_t *dstbuf = dst->y_buffer;
- for (int i = 0; i < source->y_height; ++i) {
- for (int j = 0; j < source->y_width; ++j) {
- const double val =
- (double)src[j] + amount * ((double)src[j] - (double)blur[j]);
- dstbuf[j] = (uint8_t)clamp((int)(val + 0.5), 0, 255);
- }
- src += source->y_stride;
- blur += blurred->y_stride;
- dstbuf += dst->y_stride;
- }
+ unsharp_rect(source->y_buffer, source->y_stride, blurred->y_buffer,
+ blurred->y_stride, dst->y_buffer, dst->y_stride, source->y_width,
+ source->y_height, amount);
}
// 8-tap Gaussian convolution filter with sigma = 1.0, sums to 128,
@@ -88,37 +94,19 @@
}
}
-void av1_vmaf_preprocessing(const AV1_COMP *cpi, YV12_BUFFER_CONFIG *source) {
- const int use_hbd = source->flags & YV12_FLAG_HIGHBITDEPTH;
- // TODO(sdeng): Add high bit depth support.
- if (use_hbd) {
- printf(
- "VMAF preprocessing for high bit depth videos is unsupported yet.\n");
- exit(0);
- }
-
- aom_clear_system_state();
+static double find_best_frame_unsharp_amount(
+ const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source,
+ YV12_BUFFER_CONFIG *const blurred) {
const AV1_COMMON *const cm = &cpi->common;
const int width = source->y_width;
const int height = source->y_height;
- YV12_BUFFER_CONFIG source_extended, blurred, sharpened;
- memset(&source_extended, 0, sizeof(source_extended));
- memset(&blurred, 0, sizeof(blurred));
+ YV12_BUFFER_CONFIG sharpened;
memset(&sharpened, 0, sizeof(sharpened));
- aom_alloc_frame_buffer(&source_extended, width, height, 1, 1,
- cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->byte_alignment);
- aom_alloc_frame_buffer(&blurred, width, height, 1, 1,
- cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->byte_alignment);
aom_alloc_frame_buffer(&sharpened, width, height, 1, 1,
cm->seq_params.use_highbitdepth,
cpi->oxcf.border_in_pixels, cm->byte_alignment);
- av1_copy_and_extend_frame(source, &source_extended);
- gaussian_blur(cpi, &source_extended, &blurred);
-
double unsharp_amount = 0.0;
const double step_size = 0.05;
const double max_vmaf_score = 100.0;
@@ -135,7 +123,7 @@
const int max_loop_count = 20;
while (!exit_loop) {
unsharp_amount += step_size;
- unsharp(source, &blurred, &sharpened, unsharp_amount);
+ unsharp(source, blurred, &sharpened, unsharp_amount);
double new_vmaf;
aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, &sharpened, &new_vmaf);
if (new_vmaf < best_vmaf || loop_count == max_loop_count) {
@@ -149,14 +137,146 @@
loop_count++;
}
+ aom_free_frame_buffer(&sharpened);
+
unsharp_amount -= step_size;
if (best_unsharp_amount_begin >= 0.0) {
unsharp_amount = (unsharp_amount + best_unsharp_amount_begin) / 2.0;
}
- unsharp(source, &blurred, source, unsharp_amount);
+ return unsharp_amount;
+}
+
+void av1_vmaf_preprocessing(const AV1_COMP *const cpi,
+ YV12_BUFFER_CONFIG *const source,
+ bool use_block_based_method) {
+ const int use_hbd = source->flags & YV12_FLAG_HIGHBITDEPTH;
+ // TODO(sdeng): Add high bit depth support.
+ if (use_hbd) {
+ printf(
+ "VMAF preprocessing for high bit depth videos is unsupported yet.\n");
+ exit(0);
+ }
+
+ aom_clear_system_state();
+ const AV1_COMMON *const cm = &cpi->common;
+ const int width = source->y_width;
+ const int height = source->y_height;
+ YV12_BUFFER_CONFIG source_extended, blurred, sharpened;
+ memset(&source_extended, 0, sizeof(source_extended));
+ memset(&blurred, 0, sizeof(blurred));
+ memset(&sharpened, 0, sizeof(sharpened));
+ aom_alloc_frame_buffer(&source_extended, width, height, 1, 1,
+ cm->seq_params.use_highbitdepth,
+ cpi->oxcf.border_in_pixels, cm->byte_alignment);
+ aom_alloc_frame_buffer(&blurred, width, height, 1, 1,
+ cm->seq_params.use_highbitdepth,
+ cpi->oxcf.border_in_pixels, cm->byte_alignment);
+ aom_alloc_frame_buffer(&sharpened, width, height, 1, 1,
+ cm->seq_params.use_highbitdepth,
+ cpi->oxcf.border_in_pixels, cm->byte_alignment);
+
+ av1_copy_and_extend_frame(source, &source_extended);
+ av1_copy_and_extend_frame(source, &sharpened);
+
+ gaussian_blur(cpi, &source_extended, &blurred);
aom_free_frame_buffer(&source_extended);
+ const double best_frame_unsharp_amount =
+ find_best_frame_unsharp_amount(cpi, source, &blurred);
+
+ if (!use_block_based_method) {
+ unsharp(source, &blurred, source, best_frame_unsharp_amount);
+ aom_free_frame_buffer(&sharpened);
+ aom_free_frame_buffer(&blurred);
+ aom_clear_system_state();
+ return;
+ }
+
+ const int block_size = BLOCK_128X128;
+ const int num_mi_w = mi_size_wide[block_size];
+ const int num_mi_h = mi_size_high[block_size];
+ const int num_cols = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
+ const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
+ const int block_w = num_mi_w << 2;
+ const int block_h = num_mi_h << 2;
+ double *best_unsharp_amounts =
+ aom_malloc(sizeof(*best_unsharp_amounts) * num_cols * num_rows);
+ memset(best_unsharp_amounts, 0,
+ sizeof(*best_unsharp_amounts) * num_cols * num_rows);
+
+ for (int row = 0; row < num_rows; ++row) {
+ for (int col = 0; col < num_cols; ++col) {
+ const int mi_row = row * num_mi_h;
+ const int mi_col = col * num_mi_w;
+
+ const int row_offset_y = mi_row << 2;
+ const int col_offset_y = mi_col << 2;
+
+ const int block_width = AOMMIN(source->y_width - col_offset_y, block_w);
+ const int block_height = AOMMIN(source->y_height - row_offset_y, block_h);
+
+ uint8_t *src_buf =
+ source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
+ uint8_t *blurred_buf =
+ blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
+ uint8_t *dst_buf =
+ sharpened.y_buffer + row_offset_y * sharpened.y_stride + col_offset_y;
+
+ const int index = col + row * num_cols;
+ const double step_size = 0.1;
+ double amount = AOMMAX(best_frame_unsharp_amount - 0.2, step_size);
+ unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
+ dst_buf, sharpened.y_stride, block_width, block_height,
+ amount);
+ double best_vmaf;
+ aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, &sharpened, &best_vmaf);
+
+ // Find the best unsharp amount.
+ bool exit_loop = false;
+ while (!exit_loop && amount < best_frame_unsharp_amount + 0.2) {
+ amount += step_size;
+ unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
+ dst_buf, sharpened.y_stride, block_width, block_height,
+ amount);
+
+ double new_vmaf;
+ aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, &sharpened, &new_vmaf);
+ if (new_vmaf <= best_vmaf) {
+ exit_loop = true;
+ amount -= step_size;
+ } else {
+ best_vmaf = new_vmaf;
+ }
+ }
+ best_unsharp_amounts[index] = amount;
+ // Reset blurred frame
+ unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
+ dst_buf, sharpened.y_stride, block_width, block_height, 0.0);
+ }
+ }
+
+ // Apply best blur amounts
+ for (int row = 0; row < num_rows; ++row) {
+ for (int col = 0; col < num_cols; ++col) {
+ const int mi_row = row * num_mi_h;
+ const int mi_col = col * num_mi_w;
+ const int row_offset_y = mi_row << 2;
+ const int col_offset_y = mi_col << 2;
+ const int block_width = AOMMIN(source->y_width - col_offset_y, block_w);
+ const int block_height = AOMMIN(source->y_height - row_offset_y, block_h);
+ const int index = col + row * num_cols;
+ uint8_t *src_buf =
+ source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
+ uint8_t *blurred_buf =
+ blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
+ unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
+ src_buf, source->y_stride, block_width, block_height,
+ best_unsharp_amounts[index]);
+ }
+ }
+
aom_free_frame_buffer(&sharpened);
aom_free_frame_buffer(&blurred);
+ aom_free(best_unsharp_amounts);
aom_clear_system_state();
}
diff --git a/av1/encoder/tune_vmaf.h b/av1/encoder/tune_vmaf.h
index 0baa588..27955a0 100644
--- a/av1/encoder/tune_vmaf.h
+++ b/av1/encoder/tune_vmaf.h
@@ -15,6 +15,7 @@
#include "aom_scale/yv12config.h"
#include "av1/encoder/encoder.h"
-void av1_vmaf_preprocessing(const AV1_COMP *cpi, YV12_BUFFER_CONFIG *source);
+void av1_vmaf_preprocessing(const AV1_COMP *cpi, YV12_BUFFER_CONFIG *source,
+ bool use_block_based_method);
#endif // AOM_AV1_ENCODER_TUNE_VMAF_H_