| /* |
| * Copyright (c) 2019, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| |
| #include "av1/encoder/tune_vmaf.h" |
| |
| #include "aom_dsp/psnr.h" |
| #include "aom_dsp/vmaf.h" |
| #include "aom_ports/system_state.h" |
| #include "av1/encoder/extend.h" |
| #include "av1/encoder/rdopt.h" |
| |
| static const double kBaselineVmaf = 97.42773; |
| |
| // TODO(sdeng): Add the SIMD implementation. |
| static AOM_INLINE void highbd_unsharp_rect(const uint16_t *source, |
| int source_stride, |
| const uint16_t *blurred, |
| int blurred_stride, uint16_t *dst, |
| int dst_stride, int w, int h, |
| double amount, int bit_depth) { |
| const int max_value = (1 << bit_depth) - 1; |
| for (int i = 0; i < h; ++i) { |
| for (int j = 0; j < w; ++j) { |
| const double val = |
| (double)source[j] + amount * ((double)source[j] - (double)blurred[j]); |
| dst[j] = (uint16_t)clamp((int)(val + 0.5), 0, max_value); |
| } |
| source += source_stride; |
| blurred += blurred_stride; |
| dst += dst_stride; |
| } |
| } |
| |
| static AOM_INLINE void unsharp_rect(const uint8_t *source, int source_stride, |
| const uint8_t *blurred, int blurred_stride, |
| uint8_t *dst, int dst_stride, int w, int h, |
| double amount) { |
| for (int i = 0; i < h; ++i) { |
| for (int j = 0; j < w; ++j) { |
| const double val = |
| (double)source[j] + amount * ((double)source[j] - (double)blurred[j]); |
| dst[j] = (uint8_t)clamp((int)(val + 0.5), 0, 255); |
| } |
| source += source_stride; |
| blurred += blurred_stride; |
| dst += dst_stride; |
| } |
| } |
| |
| static AOM_INLINE void unsharp(const AV1_COMP *const cpi, |
| const YV12_BUFFER_CONFIG *source, |
| const YV12_BUFFER_CONFIG *blurred, |
| const YV12_BUFFER_CONFIG *dst, double amount) { |
| const int bit_depth = cpi->td.mb.e_mbd.bd; |
| if (bit_depth > 8) { |
| highbd_unsharp_rect(CONVERT_TO_SHORTPTR(source->y_buffer), source->y_stride, |
| CONVERT_TO_SHORTPTR(blurred->y_buffer), |
| blurred->y_stride, CONVERT_TO_SHORTPTR(dst->y_buffer), |
| dst->y_stride, source->y_width, source->y_height, |
| amount, bit_depth); |
| } else { |
| unsharp_rect(source->y_buffer, source->y_stride, blurred->y_buffer, |
| blurred->y_stride, dst->y_buffer, dst->y_stride, |
| source->y_width, source->y_height, amount); |
| } |
| } |
| |
| // 8-tap Gaussian convolution filter with sigma = 1.0, sums to 128, |
| // all co-efficients must be even. |
| DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 0, 8, 30, 52, |
| 30, 8, 0, 0 }; |
| static AOM_INLINE void gaussian_blur(const int bit_depth, |
| const YV12_BUFFER_CONFIG *source, |
| const YV12_BUFFER_CONFIG *dst) { |
| const int block_size = BLOCK_128X128; |
| const int block_w = mi_size_wide[block_size] * 4; |
| const int block_h = mi_size_high[block_size] * 4; |
| const int num_cols = (source->y_width + block_w - 1) / block_w; |
| const int num_rows = (source->y_height + block_h - 1) / block_h; |
| int row, col; |
| |
| ConvolveParams conv_params = get_conv_params(0, 0, bit_depth); |
| InterpFilterParams filter = { .filter_ptr = gauss_filter, |
| .taps = 8, |
| .subpel_shifts = 0, |
| .interp_filter = EIGHTTAP_REGULAR }; |
| |
| for (row = 0; row < num_rows; ++row) { |
| for (col = 0; col < num_cols; ++col) { |
| const int row_offset_y = row * block_h; |
| const int col_offset_y = col * block_w; |
| |
| uint8_t *src_buf = |
| source->y_buffer + row_offset_y * source->y_stride + col_offset_y; |
| uint8_t *dst_buf = |
| dst->y_buffer + row_offset_y * dst->y_stride + col_offset_y; |
| |
| if (bit_depth > 8) { |
| av1_highbd_convolve_2d_sr( |
| CONVERT_TO_SHORTPTR(src_buf), source->y_stride, |
| CONVERT_TO_SHORTPTR(dst_buf), dst->y_stride, block_w, block_h, |
| &filter, &filter, 0, 0, &conv_params, bit_depth); |
| } else { |
| av1_convolve_2d_sr(src_buf, source->y_stride, dst_buf, dst->y_stride, |
| block_w, block_h, &filter, &filter, 0, 0, |
| &conv_params); |
| } |
| } |
| } |
| } |
| |
| static double frame_average_variance(const AV1_COMP *const cpi, |
| const YV12_BUFFER_CONFIG *const frame) { |
| const uint8_t *const y_buffer = frame->y_buffer; |
| const int y_stride = frame->y_stride; |
| const BLOCK_SIZE block_size = BLOCK_64X64; |
| |
| const int block_w = mi_size_wide[block_size] * 4; |
| const int block_h = mi_size_high[block_size] * 4; |
| int row, col; |
| const int bit_depth = cpi->td.mb.e_mbd.bd; |
| double var = 0.0, var_count = 0.0; |
| |
| // Loop through each block. |
| for (row = 0; row < frame->y_height / block_h; ++row) { |
| for (col = 0; col < frame->y_width / block_w; ++col) { |
| struct buf_2d buf; |
| const int row_offset_y = row * block_h; |
| const int col_offset_y = col * block_w; |
| |
| buf.buf = (uint8_t *)y_buffer + row_offset_y * y_stride + col_offset_y; |
| buf.stride = y_stride; |
| |
| if (bit_depth > 8) { |
| var += av1_high_get_sby_perpixel_variance(cpi, &buf, block_size, |
| bit_depth); |
| } else { |
| var += av1_get_sby_perpixel_variance(cpi, &buf, block_size); |
| } |
| var_count += 1.0; |
| } |
| } |
| var /= var_count; |
| return var; |
| } |
| |
| static double cal_approx_vmaf(const AV1_COMP *const cpi, double source_variance, |
| YV12_BUFFER_CONFIG *const source, |
| YV12_BUFFER_CONFIG *const sharpened) { |
| const int bit_depth = cpi->td.mb.e_mbd.bd; |
| double new_vmaf; |
| aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, sharpened, bit_depth, |
| &new_vmaf); |
| const double sharpened_var = frame_average_variance(cpi, sharpened); |
| return source_variance / sharpened_var * (new_vmaf - kBaselineVmaf); |
| } |
| |
| static double find_best_frame_unsharp_amount_loop( |
| const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source, |
| YV12_BUFFER_CONFIG *const blurred, YV12_BUFFER_CONFIG *const sharpened, |
| double best_vmaf, const double baseline_variance, |
| const double unsharp_amount_start, const double step_size, |
| const int max_loop_count, const double max_amount) { |
| const double min_amount = 0.0; |
| int loop_count = 0; |
| double approx_vmaf = best_vmaf; |
| double unsharp_amount = unsharp_amount_start; |
| do { |
| best_vmaf = approx_vmaf; |
| unsharp_amount += step_size; |
| if (unsharp_amount > max_amount || unsharp_amount < min_amount) break; |
| unsharp(cpi, source, blurred, sharpened, unsharp_amount); |
| approx_vmaf = cal_approx_vmaf(cpi, baseline_variance, source, sharpened); |
| |
| loop_count++; |
| } while (approx_vmaf > best_vmaf && loop_count < max_loop_count); |
| unsharp_amount = |
| approx_vmaf > best_vmaf ? unsharp_amount : unsharp_amount - step_size; |
| return AOMMIN(max_amount, AOMMAX(unsharp_amount, min_amount)); |
| } |
| |
| static double find_best_frame_unsharp_amount(const AV1_COMP *const cpi, |
| YV12_BUFFER_CONFIG *const source, |
| YV12_BUFFER_CONFIG *const blurred, |
| const double unsharp_amount_start, |
| const double step_size, |
| const int max_loop_count, |
| const double max_filter_amount) { |
| const AV1_COMMON *const cm = &cpi->common; |
| const int width = source->y_width; |
| const int height = source->y_height; |
| |
| YV12_BUFFER_CONFIG sharpened; |
| memset(&sharpened, 0, sizeof(sharpened)); |
| aom_alloc_frame_buffer( |
| &sharpened, width, height, 1, 1, cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, cm->features.byte_alignment); |
| |
| const double baseline_variance = frame_average_variance(cpi, source); |
| double unsharp_amount; |
| if (unsharp_amount_start <= step_size) { |
| unsharp_amount = find_best_frame_unsharp_amount_loop( |
| cpi, source, blurred, &sharpened, 0.0, baseline_variance, 0.0, |
| step_size, max_loop_count, max_filter_amount); |
| } else { |
| double a0 = unsharp_amount_start - step_size, a1 = unsharp_amount_start; |
| double v0, v1; |
| unsharp(cpi, source, blurred, &sharpened, a0); |
| v0 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened); |
| unsharp(cpi, source, blurred, &sharpened, a1); |
| v1 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened); |
| if (fabs(v0 - v1) < 0.01) { |
| unsharp_amount = a0; |
| } else if (v0 > v1) { |
| unsharp_amount = find_best_frame_unsharp_amount_loop( |
| cpi, source, blurred, &sharpened, v0, baseline_variance, a0, |
| -step_size, max_loop_count, max_filter_amount); |
| } else { |
| unsharp_amount = find_best_frame_unsharp_amount_loop( |
| cpi, source, blurred, &sharpened, v1, baseline_variance, a1, |
| step_size, max_loop_count, max_filter_amount); |
| } |
| } |
| |
| aom_free_frame_buffer(&sharpened); |
| return unsharp_amount; |
| } |
| |
| void av1_vmaf_frame_preprocessing(AV1_COMP *const cpi, |
| YV12_BUFFER_CONFIG *const source) { |
| aom_clear_system_state(); |
| const AV1_COMMON *const cm = &cpi->common; |
| const int bit_depth = cpi->td.mb.e_mbd.bd; |
| const int width = source->y_width; |
| const int height = source->y_height; |
| |
| YV12_BUFFER_CONFIG source_extended, blurred; |
| memset(&source_extended, 0, sizeof(source_extended)); |
| memset(&blurred, 0, sizeof(blurred)); |
| aom_alloc_frame_buffer( |
| &source_extended, width, height, 1, 1, cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, cm->features.byte_alignment); |
| aom_alloc_frame_buffer( |
| &blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, cm->features.byte_alignment); |
| |
| av1_copy_and_extend_frame(source, &source_extended); |
| gaussian_blur(bit_depth, &source_extended, &blurred); |
| aom_free_frame_buffer(&source_extended); |
| |
| const double best_frame_unsharp_amount = find_best_frame_unsharp_amount( |
| cpi, source, &blurred, cpi->last_frame_unsharp_amount, 0.05, 20, 1.01); |
| cpi->last_frame_unsharp_amount = best_frame_unsharp_amount; |
| |
| unsharp(cpi, source, &blurred, source, best_frame_unsharp_amount); |
| aom_free_frame_buffer(&blurred); |
| aom_clear_system_state(); |
| } |
| |
| void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi, |
| YV12_BUFFER_CONFIG *const source) { |
| aom_clear_system_state(); |
| const AV1_COMMON *const cm = &cpi->common; |
| const int width = source->y_width; |
| const int height = source->y_height; |
| const int bit_depth = cpi->td.mb.e_mbd.bd; |
| |
| YV12_BUFFER_CONFIG source_extended, blurred; |
| memset(&blurred, 0, sizeof(blurred)); |
| memset(&source_extended, 0, sizeof(source_extended)); |
| aom_alloc_frame_buffer( |
| &blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, cm->features.byte_alignment); |
| aom_alloc_frame_buffer( |
| &source_extended, width, height, 1, 1, cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, cm->features.byte_alignment); |
| |
| av1_copy_and_extend_frame(source, &source_extended); |
| gaussian_blur(bit_depth, &source_extended, &blurred); |
| aom_free_frame_buffer(&source_extended); |
| |
| const double best_frame_unsharp_amount = find_best_frame_unsharp_amount( |
| cpi, source, &blurred, cpi->last_frame_unsharp_amount, 0.05, 20, 1.01); |
| cpi->last_frame_unsharp_amount = best_frame_unsharp_amount; |
| |
| const int block_size = BLOCK_64X64; |
| const int block_w = mi_size_wide[block_size] * 4; |
| const int block_h = mi_size_high[block_size] * 4; |
| const int num_cols = (source->y_width + block_w - 1) / block_w; |
| const int num_rows = (source->y_height + block_h - 1) / block_h; |
| double *best_unsharp_amounts = |
| aom_malloc(sizeof(*best_unsharp_amounts) * num_cols * num_rows); |
| memset(best_unsharp_amounts, 0, |
| sizeof(*best_unsharp_amounts) * num_cols * num_rows); |
| |
| YV12_BUFFER_CONFIG source_block, blurred_block; |
| memset(&source_block, 0, sizeof(source_block)); |
| memset(&blurred_block, 0, sizeof(blurred_block)); |
| aom_alloc_frame_buffer( |
| &source_block, block_w, block_h, 1, 1, cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, cm->features.byte_alignment); |
| aom_alloc_frame_buffer( |
| &blurred_block, block_w, block_h, 1, 1, cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, cm->features.byte_alignment); |
| |
| for (int row = 0; row < num_rows; ++row) { |
| for (int col = 0; col < num_cols; ++col) { |
| const int row_offset_y = row * block_h; |
| const int col_offset_y = col * block_w; |
| const int block_width = AOMMIN(width - col_offset_y, block_w); |
| const int block_height = AOMMIN(height - row_offset_y, block_h); |
| const int index = col + row * num_cols; |
| |
| if (bit_depth > 8) { |
| uint16_t *frame_src_buf = CONVERT_TO_SHORTPTR(source->y_buffer) + |
| row_offset_y * source->y_stride + |
| col_offset_y; |
| uint16_t *frame_blurred_buf = CONVERT_TO_SHORTPTR(blurred.y_buffer) + |
| row_offset_y * blurred.y_stride + |
| col_offset_y; |
| uint16_t *blurred_dst = CONVERT_TO_SHORTPTR(blurred_block.y_buffer); |
| uint16_t *src_dst = CONVERT_TO_SHORTPTR(source_block.y_buffer); |
| |
| // Copy block from source frame. |
| for (int i = 0; i < block_h; ++i) { |
| for (int j = 0; j < block_w; ++j) { |
| if (i >= block_height || j >= block_width) { |
| src_dst[j] = 0; |
| blurred_dst[j] = 0; |
| } else { |
| src_dst[j] = frame_src_buf[j]; |
| blurred_dst[j] = frame_blurred_buf[j]; |
| } |
| } |
| frame_src_buf += source->y_stride; |
| frame_blurred_buf += blurred.y_stride; |
| src_dst += source_block.y_stride; |
| blurred_dst += blurred_block.y_stride; |
| } |
| } else { |
| uint8_t *frame_src_buf = |
| source->y_buffer + row_offset_y * source->y_stride + col_offset_y; |
| uint8_t *frame_blurred_buf = |
| blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y; |
| uint8_t *blurred_dst = blurred_block.y_buffer; |
| uint8_t *src_dst = source_block.y_buffer; |
| |
| // Copy block from source frame. |
| for (int i = 0; i < block_h; ++i) { |
| for (int j = 0; j < block_w; ++j) { |
| if (i >= block_height || j >= block_width) { |
| src_dst[j] = 0; |
| blurred_dst[j] = 0; |
| } else { |
| src_dst[j] = frame_src_buf[j]; |
| blurred_dst[j] = frame_blurred_buf[j]; |
| } |
| } |
| frame_src_buf += source->y_stride; |
| frame_blurred_buf += blurred.y_stride; |
| src_dst += source_block.y_stride; |
| blurred_dst += blurred_block.y_stride; |
| } |
| } |
| |
| best_unsharp_amounts[index] = find_best_frame_unsharp_amount( |
| cpi, &source_block, &blurred_block, best_frame_unsharp_amount, 0.1, 3, |
| 1.5); |
| } |
| } |
| |
| // Apply best blur amounts |
| for (int row = 0; row < num_rows; ++row) { |
| for (int col = 0; col < num_cols; ++col) { |
| const int row_offset_y = row * block_h; |
| const int col_offset_y = col * block_w; |
| const int block_width = AOMMIN(source->y_width - col_offset_y, block_w); |
| const int block_height = AOMMIN(source->y_height - row_offset_y, block_h); |
| const int index = col + row * num_cols; |
| |
| if (bit_depth > 8) { |
| uint16_t *src_buf = CONVERT_TO_SHORTPTR(source->y_buffer) + |
| row_offset_y * source->y_stride + col_offset_y; |
| uint16_t *blurred_buf = CONVERT_TO_SHORTPTR(blurred.y_buffer) + |
| row_offset_y * blurred.y_stride + col_offset_y; |
| highbd_unsharp_rect(src_buf, source->y_stride, blurred_buf, |
| blurred.y_stride, src_buf, source->y_stride, |
| block_width, block_height, |
| best_unsharp_amounts[index], bit_depth); |
| } else { |
| uint8_t *src_buf = |
| source->y_buffer + row_offset_y * source->y_stride + col_offset_y; |
| uint8_t *blurred_buf = |
| blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y; |
| unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride, |
| src_buf, source->y_stride, block_width, block_height, |
| best_unsharp_amounts[index]); |
| } |
| } |
| } |
| |
| aom_free_frame_buffer(&source_block); |
| aom_free_frame_buffer(&blurred_block); |
| aom_free_frame_buffer(&blurred); |
| aom_free(best_unsharp_amounts); |
| aom_clear_system_state(); |
| } |
| |
| typedef struct FrameData { |
| const YV12_BUFFER_CONFIG *source, *blurred; |
| int block_w, block_h, num_rows, num_cols, row, col, bit_depth; |
| } FrameData; |
| |
| // A callback function used to pass data to VMAF. |
| // Returns 0 after reading a frame. |
| // Returns 2 when there is no more frame to read. |
| static int update_frame(float *ref_data, float *main_data, float *temp_data, |
| int stride, void *user_data) { |
| FrameData *frames = (FrameData *)user_data; |
| const int width = frames->source->y_width; |
| const int height = frames->source->y_height; |
| const int row = frames->row; |
| const int col = frames->col; |
| const int num_rows = frames->num_rows; |
| const int num_cols = frames->num_cols; |
| const int block_w = frames->block_w; |
| const int block_h = frames->block_h; |
| const YV12_BUFFER_CONFIG *source = frames->source; |
| const YV12_BUFFER_CONFIG *blurred = frames->blurred; |
| const int bit_depth = frames->bit_depth; |
| const float scale_factor = 1.0f / (float)(1 << (bit_depth - 8)); |
| (void)temp_data; |
| stride /= (int)sizeof(*ref_data); |
| |
| for (int i = 0; i < height; ++i) { |
| float *ref, *main; |
| ref = ref_data + i * stride; |
| main = main_data + i * stride; |
| if (bit_depth == 8) { |
| uint8_t *src; |
| src = source->y_buffer + i * source->y_stride; |
| for (int j = 0; j < width; ++j) { |
| ref[j] = main[j] = (float)src[j]; |
| } |
| } else { |
| uint16_t *src; |
| src = CONVERT_TO_SHORTPTR(source->y_buffer) + i * source->y_stride; |
| for (int j = 0; j < width; ++j) { |
| ref[j] = main[j] = scale_factor * (float)src[j]; |
| } |
| } |
| } |
| if (row < num_rows && col < num_cols) { |
| // Set current block |
| const int row_offset = row * block_h; |
| const int col_offset = col * block_w; |
| const int block_width = AOMMIN(width - col_offset, block_w); |
| const int block_height = AOMMIN(height - row_offset, block_h); |
| |
| float *main_buf = main_data + col_offset + row_offset * stride; |
| if (bit_depth == 8) { |
| uint8_t *blurred_buf = |
| blurred->y_buffer + row_offset * blurred->y_stride + col_offset; |
| for (int i = 0; i < block_height; ++i) { |
| for (int j = 0; j < block_width; ++j) { |
| main_buf[j] = (float)blurred_buf[j]; |
| } |
| main_buf += stride; |
| blurred_buf += blurred->y_stride; |
| } |
| } else { |
| uint16_t *blurred_buf = CONVERT_TO_SHORTPTR(blurred->y_buffer) + |
| row_offset * blurred->y_stride + col_offset; |
| for (int i = 0; i < block_height; ++i) { |
| for (int j = 0; j < block_width; ++j) { |
| main_buf[j] = scale_factor * (float)blurred_buf[j]; |
| } |
| main_buf += stride; |
| blurred_buf += blurred->y_stride; |
| } |
| } |
| |
| frames->col++; |
| if (frames->col >= num_cols) { |
| frames->col = 0; |
| frames->row++; |
| } |
| return 0; |
| } else { |
| return 2; |
| } |
| } |
| |
| void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) { |
| AV1_COMMON *cm = &cpi->common; |
| const int y_width = cpi->source->y_width; |
| const int y_height = cpi->source->y_height; |
| const int resized_block_size = BLOCK_32X32; |
| const int resize_factor = 2; |
| const int bit_depth = cpi->td.mb.e_mbd.bd; |
| |
| aom_clear_system_state(); |
| YV12_BUFFER_CONFIG resized_source; |
| memset(&resized_source, 0, sizeof(resized_source)); |
| aom_alloc_frame_buffer( |
| &resized_source, y_width / resize_factor, y_height / resize_factor, 1, 1, |
| cm->seq_params.use_highbitdepth, cpi->oxcf.border_in_pixels, |
| cm->features.byte_alignment); |
| av1_resize_and_extend_frame(cpi->source, &resized_source, bit_depth, |
| av1_num_planes(cm)); |
| |
| const int resized_y_width = resized_source.y_width; |
| const int resized_y_height = resized_source.y_height; |
| const int resized_block_w = mi_size_wide[resized_block_size] * 4; |
| const int resized_block_h = mi_size_high[resized_block_size] * 4; |
| const int num_cols = |
| (resized_y_width + resized_block_w - 1) / resized_block_w; |
| const int num_rows = |
| (resized_y_height + resized_block_h - 1) / resized_block_h; |
| |
| YV12_BUFFER_CONFIG blurred; |
| memset(&blurred, 0, sizeof(blurred)); |
| aom_alloc_frame_buffer(&blurred, resized_y_width, resized_y_height, 1, 1, |
| cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, |
| cm->features.byte_alignment); |
| gaussian_blur(bit_depth, &resized_source, &blurred); |
| |
| double *scores = aom_malloc(sizeof(*scores) * (num_rows * num_cols)); |
| memset(scores, 0, sizeof(*scores) * (num_rows * num_cols)); |
| FrameData frame_data; |
| frame_data.source = &resized_source; |
| frame_data.blurred = &blurred; |
| frame_data.block_w = resized_block_w; |
| frame_data.block_h = resized_block_h; |
| frame_data.num_rows = num_rows; |
| frame_data.num_cols = num_cols; |
| frame_data.row = 0; |
| frame_data.col = 0; |
| frame_data.bit_depth = bit_depth; |
| aom_calc_vmaf_multi_frame(&frame_data, cpi->oxcf.vmaf_model_path, |
| update_frame, resized_y_width, resized_y_height, |
| bit_depth, scores); |
| |
| // Loop through each 'block_size' block. |
| for (int row = 0; row < num_rows; ++row) { |
| for (int col = 0; col < num_cols; ++col) { |
| const int index = row * num_cols + col; |
| const int row_offset_y = row * resized_block_h; |
| const int col_offset_y = col * resized_block_w; |
| |
| uint8_t *const orig_buf = resized_source.y_buffer + |
| row_offset_y * resized_source.y_stride + |
| col_offset_y; |
| uint8_t *const blurred_buf = |
| blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y; |
| |
| const double vmaf = scores[index]; |
| const double dvmaf = kBaselineVmaf - vmaf; |
| unsigned int sse; |
| cpi->fn_ptr[resized_block_size].vf(orig_buf, resized_source.y_stride, |
| blurred_buf, blurred.y_stride, &sse); |
| |
| const double mse = |
| (double)sse / (double)(resized_y_width * resized_y_height); |
| double weight; |
| const double eps = 0.01 / (num_rows * num_cols); |
| if (dvmaf < eps || mse < eps) { |
| weight = 1.0; |
| } else { |
| weight = mse / dvmaf; |
| } |
| |
| // Normalize it with a data fitted model. |
| weight = 6.0 * (1.0 - exp(-0.05 * weight)) + 0.8; |
| cpi->vmaf_rdmult_scaling_factors[index] = weight; |
| } |
| } |
| |
| aom_free_frame_buffer(&resized_source); |
| aom_free_frame_buffer(&blurred); |
| aom_free(scores); |
| aom_clear_system_state(); |
| } |
| |
| void av1_set_vmaf_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, |
| const BLOCK_SIZE bsize, const int mi_row, |
| const int mi_col, int *const rdmult) { |
| const AV1_COMMON *const cm = &cpi->common; |
| |
| const int bsize_base = BLOCK_64X64; |
| const int num_mi_w = mi_size_wide[bsize_base]; |
| const int num_mi_h = mi_size_high[bsize_base]; |
| const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w; |
| const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h; |
| const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w; |
| const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h; |
| int row, col; |
| double num_of_mi = 0.0; |
| double geom_mean_of_scale = 0.0; |
| |
| aom_clear_system_state(); |
| for (row = mi_row / num_mi_w; |
| row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { |
| for (col = mi_col / num_mi_h; |
| col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) { |
| const int index = row * num_cols + col; |
| geom_mean_of_scale += log(cpi->vmaf_rdmult_scaling_factors[index]); |
| num_of_mi += 1.0; |
| } |
| } |
| geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi); |
| |
| *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5); |
| *rdmult = AOMMAX(*rdmult, 0); |
| set_error_per_bit(x, *rdmult); |
| aom_clear_system_state(); |
| } |
| |
| // TODO(sdeng): replace them with the SIMD versions. |
| static AOM_INLINE double highbd_image_sad_c(const uint16_t *src, int src_stride, |
| const uint16_t *ref, int ref_stride, |
| int w, int h) { |
| double accum = 0.0; |
| int i, j; |
| |
| for (i = 0; i < h; ++i) { |
| for (j = 0; j < w; ++j) { |
| double img1px = src[i * src_stride + j]; |
| double img2px = ref[i * ref_stride + j]; |
| |
| accum += fabs(img1px - img2px); |
| } |
| } |
| |
| return accum / (double)(h * w); |
| } |
| |
| static AOM_INLINE double image_sad_c(const uint8_t *src, int src_stride, |
| const uint8_t *ref, int ref_stride, int w, |
| int h) { |
| double accum = 0.0; |
| int i, j; |
| |
| for (i = 0; i < h; ++i) { |
| for (j = 0; j < w; ++j) { |
| double img1px = src[i * src_stride + j]; |
| double img2px = ref[i * ref_stride + j]; |
| |
| accum += fabs(img1px - img2px); |
| } |
| } |
| |
| return accum / (double)(h * w); |
| } |
| |
| static AOM_INLINE double calc_vmaf_motion_score( |
| const AV1_COMP *const cpi, const AV1_COMMON *const cm, |
| const YV12_BUFFER_CONFIG *const cur, const YV12_BUFFER_CONFIG *const last, |
| const YV12_BUFFER_CONFIG *const next) { |
| const int y_width = cur->y_width; |
| const int y_height = cur->y_height; |
| YV12_BUFFER_CONFIG blurred_cur, blurred_last, blurred_next; |
| const int bit_depth = cpi->td.mb.e_mbd.bd; |
| |
| memset(&blurred_cur, 0, sizeof(blurred_cur)); |
| memset(&blurred_last, 0, sizeof(blurred_last)); |
| memset(&blurred_next, 0, sizeof(blurred_next)); |
| |
| aom_alloc_frame_buffer( |
| &blurred_cur, y_width, y_height, 1, 1, cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, cm->features.byte_alignment); |
| aom_alloc_frame_buffer( |
| &blurred_last, y_width, y_height, 1, 1, cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, cm->features.byte_alignment); |
| aom_alloc_frame_buffer( |
| &blurred_next, y_width, y_height, 1, 1, cm->seq_params.use_highbitdepth, |
| cpi->oxcf.border_in_pixels, cm->features.byte_alignment); |
| |
| gaussian_blur(bit_depth, cur, &blurred_cur); |
| gaussian_blur(bit_depth, last, &blurred_last); |
| if (next) gaussian_blur(bit_depth, next, &blurred_next); |
| |
| double motion1, motion2 = 65536.0; |
| if (bit_depth > 8) { |
| const float scale_factor = 1.0f / (float)(1 << (bit_depth - 8)); |
| motion1 = highbd_image_sad_c(CONVERT_TO_SHORTPTR(blurred_cur.y_buffer), |
| blurred_cur.y_stride, |
| CONVERT_TO_SHORTPTR(blurred_last.y_buffer), |
| blurred_last.y_stride, y_width, y_height) * |
| scale_factor; |
| if (next) { |
| motion2 = highbd_image_sad_c(CONVERT_TO_SHORTPTR(blurred_cur.y_buffer), |
| blurred_cur.y_stride, |
| CONVERT_TO_SHORTPTR(blurred_next.y_buffer), |
| blurred_next.y_stride, y_width, y_height) * |
| scale_factor; |
| } |
| } else { |
| motion1 = image_sad_c(blurred_cur.y_buffer, blurred_cur.y_stride, |
| blurred_last.y_buffer, blurred_last.y_stride, y_width, |
| y_height); |
| if (next) { |
| motion2 = image_sad_c(blurred_cur.y_buffer, blurred_cur.y_stride, |
| blurred_next.y_buffer, blurred_next.y_stride, |
| y_width, y_height); |
| } |
| } |
| |
| aom_free_frame_buffer(&blurred_cur); |
| aom_free_frame_buffer(&blurred_last); |
| aom_free_frame_buffer(&blurred_next); |
| |
| return AOMMIN(motion1, motion2); |
| } |
| |
| // Calculates the new qindex from the VMAF motion score. This is based on the |
| // observation: when the motion score becomes higher, the VMAF score of the |
| // same source and distorted frames would become higher. |
| int av1_get_vmaf_base_qindex(const AV1_COMP *const cpi, int current_qindex) { |
| const AV1_COMMON *const cm = &cpi->common; |
| if (cm->current_frame.frame_number == 0 || cpi->oxcf.pass == 1) { |
| return current_qindex; |
| } |
| const int bit_depth = cpi->td.mb.e_mbd.bd; |
| const double approx_sse = |
| cpi->last_frame_ysse / |
| (double)((1 << (bit_depth - 8)) * (1 << (bit_depth - 8))); |
| const double approx_dvmaf = kBaselineVmaf - cpi->last_frame_vmaf; |
| const double sse_threshold = |
| 0.01 * cpi->source->y_width * cpi->source->y_height; |
| const double vmaf_threshold = 0.01; |
| if (approx_sse < sse_threshold || approx_dvmaf < vmaf_threshold) { |
| return current_qindex; |
| } |
| aom_clear_system_state(); |
| const GF_GROUP *gf_group = &cpi->gf_group; |
| YV12_BUFFER_CONFIG *cur_buf = cpi->source; |
| int src_index = 0; |
| if (cm->show_frame == 0) { |
| src_index = gf_group->arf_src_offset[gf_group->index]; |
| struct lookahead_entry *cur_entry = |
| av1_lookahead_peek(cpi->lookahead, src_index, cpi->compressor_stage); |
| cur_buf = &cur_entry->img; |
| } |
| assert(cur_buf); |
| |
| const struct lookahead_entry *last_entry = |
| av1_lookahead_peek(cpi->lookahead, src_index - 1, cpi->compressor_stage); |
| const struct lookahead_entry *next_entry = |
| av1_lookahead_peek(cpi->lookahead, src_index + 1, cpi->compressor_stage); |
| const YV12_BUFFER_CONFIG *next_buf = &next_entry->img; |
| const YV12_BUFFER_CONFIG *last_buf = |
| cm->show_frame ? cpi->last_source : &last_entry->img; |
| |
| assert(last_buf); |
| |
| const double motion = |
| calc_vmaf_motion_score(cpi, cm, cur_buf, last_buf, next_buf); |
| |
| // Get dVMAF through a data fitted model. |
| const double dvmaf = 26.11 * (1.0 - exp(-0.06 * motion)); |
| const double dsse = dvmaf * approx_sse / approx_dvmaf; |
| |
| const double beta = approx_sse / (dsse + approx_sse); |
| const int offset = av1_get_deltaq_offset(cpi, current_qindex, beta); |
| int qindex = current_qindex + offset; |
| |
| qindex = AOMMIN(qindex, MAXQ); |
| qindex = AOMMAX(qindex, MINQ); |
| |
| aom_clear_system_state(); |
| return qindex; |
| } |
| |
| void av1_update_vmaf_curve(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source, |
| YV12_BUFFER_CONFIG *recon) { |
| const int bit_depth = cpi->td.mb.e_mbd.bd; |
| aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, recon, bit_depth, |
| &cpi->last_frame_vmaf); |
| if (bit_depth > 8) { |
| cpi->last_frame_ysse = (double)aom_highbd_get_y_sse(source, recon); |
| } else { |
| cpi->last_frame_ysse = (double)aom_get_y_sse(source, recon); |
| } |
| } |