av1/encoder/tune_vmaf.c - aom - Git at Google

 /*
  * Copyright (c) 2019, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  * was not distributed with this source code in the LICENSE file, you can
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */

 #include "av1/encoder/tune_vmaf.h"

 #include "aom_dsp/vmaf.h"
 #include "aom_ports/system_state.h"
 #include "av1/encoder/extend.h"

 // TODO(sdeng): Add the SIMD implementation.
 static AOM_INLINE void unsharp_rect(const uint8_t *source, int source_stride,
                                     const uint8_t *blurred, int blurred_stride,
                                     uint8_t *dst, int dst_stride, int w, int h,
                                     double amount) {
   for (int i = 0; i < h; ++i) {
     for (int j = 0; j < w; ++j) {
       const double val =
           (double)source[j] + amount * ((double)source[j] - (double)blurred[j]);
       dst[j] = (uint8_t)clamp((int)(val + 0.5), 0, 255);
     }
     source += source_stride;
     blurred += blurred_stride;
     dst += dst_stride;
   }
 }

 static AOM_INLINE void unsharp(const YV12_BUFFER_CONFIG *source,
                                const YV12_BUFFER_CONFIG *blurred,
                                const YV12_BUFFER_CONFIG *dst, double amount) {
   unsharp_rect(source->y_buffer, source->y_stride, blurred->y_buffer,
                blurred->y_stride, dst->y_buffer, dst->y_stride, source->y_width,
                source->y_height, amount);
 }

 // 8-tap Gaussian convolution filter with sigma = 1.0, sums to 128,
 // all co-efficients must be even.
 DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 0,  8, 30, 52,
                                                                30, 8, 0,  0 };
 static AOM_INLINE void gaussian_blur(const AV1_COMP *const cpi,
                                      const YV12_BUFFER_CONFIG *source,
                                      const YV12_BUFFER_CONFIG *dst) {
   const AV1_COMMON *cm = &cpi->common;
   const ThreadData *td = &cpi->td;
   const MACROBLOCK *x = &td->mb;
   const MACROBLOCKD *xd = &x->e_mbd;

   const int block_size = BLOCK_128X128;

   const int num_mi_w = mi_size_wide[block_size];
   const int num_mi_h = mi_size_high[block_size];
   const int num_cols = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
   const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
   int row, col;
   const int use_hbd = source->flags & YV12_FLAG_HIGHBITDEPTH;

   ConvolveParams conv_params = get_conv_params(0, 0, xd->bd);
   InterpFilterParams filter = { .filter_ptr = gauss_filter,
                                 .taps = 8,
                                 .subpel_shifts = 0,
                                 .interp_filter = EIGHTTAP_REGULAR };

   for (row = 0; row < num_rows; ++row) {
     for (col = 0; col < num_cols; ++col) {
       const int mi_row = row * num_mi_h;
       const int mi_col = col * num_mi_w;

       const int row_offset_y = mi_row << 2;
       const int col_offset_y = mi_col << 2;

       uint8_t *src_buf =
           source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
       uint8_t *dst_buf =
           dst->y_buffer + row_offset_y * dst->y_stride + col_offset_y;

       if (use_hbd) {
         av1_highbd_convolve_2d_sr(
             CONVERT_TO_SHORTPTR(src_buf), source->y_stride,
             CONVERT_TO_SHORTPTR(dst_buf), dst->y_stride, num_mi_w << 2,
             num_mi_h << 2, &filter, &filter, 0, 0, &conv_params, xd->bd);
       } else {
         av1_convolve_2d_sr(src_buf, source->y_stride, dst_buf, dst->y_stride,
                            num_mi_w << 2, num_mi_h << 2, &filter, &filter, 0, 0,
                            &conv_params);
       }
     }
   }
 }

 static double find_best_frame_unsharp_amount(
     const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source,
     YV12_BUFFER_CONFIG *const blurred) {
   const AV1_COMMON *const cm = &cpi->common;
   const int width = source->y_width;
   const int height = source->y_height;

   YV12_BUFFER_CONFIG sharpened;
   memset(&sharpened, 0, sizeof(sharpened));
   aom_alloc_frame_buffer(&sharpened, width, height, 1, 1,
                          cm->seq_params.use_highbitdepth,
                          cpi->oxcf.border_in_pixels, cm->byte_alignment);

   double unsharp_amount = 0.0;
   const double step_size = 0.05;
   const double max_vmaf_score = 100.0;

   double best_vmaf;
   aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, source, &best_vmaf);

   // We may get the same best VMAF scores for different unsharp_amount values.
   // We pick the average value in this case.
   double best_unsharp_amount_begin = best_vmaf == max_vmaf_score ? 0.0 : -1.0;
   bool exit_loop = false;

   int loop_count = 0;
   const int max_loop_count = 20;
   while (!exit_loop) {
     unsharp_amount += step_size;
     unsharp(source, blurred, &sharpened, unsharp_amount);
     double new_vmaf;
     aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, &sharpened, &new_vmaf);
     if (new_vmaf < best_vmaf || loop_count == max_loop_count) {
       exit_loop = true;
     } else {
       if (new_vmaf == max_vmaf_score && best_unsharp_amount_begin < 0.0) {
         best_unsharp_amount_begin = unsharp_amount;
       }
       best_vmaf = new_vmaf;
     }
     loop_count++;
   }

   aom_free_frame_buffer(&sharpened);

   unsharp_amount -= step_size;
   if (best_unsharp_amount_begin >= 0.0) {
     unsharp_amount = (unsharp_amount + best_unsharp_amount_begin) / 2.0;
   }

   return unsharp_amount;
 }

 void av1_vmaf_preprocessing(const AV1_COMP *const cpi,
                             YV12_BUFFER_CONFIG *const source,
                             bool use_block_based_method) {
   const int use_hbd = source->flags & YV12_FLAG_HIGHBITDEPTH;
   // TODO(sdeng): Add high bit depth support.
   if (use_hbd) {
     printf(
         "VMAF preprocessing for high bit depth videos is unsupported yet.\n");
     exit(0);
   }

   aom_clear_system_state();
   const AV1_COMMON *const cm = &cpi->common;
   const int width = source->y_width;
   const int height = source->y_height;
   YV12_BUFFER_CONFIG source_extended, blurred, sharpened;
   memset(&source_extended, 0, sizeof(source_extended));
   memset(&blurred, 0, sizeof(blurred));
   memset(&sharpened, 0, sizeof(sharpened));
   aom_alloc_frame_buffer(&source_extended, width, height, 1, 1,
                          cm->seq_params.use_highbitdepth,
                          cpi->oxcf.border_in_pixels, cm->byte_alignment);
   aom_alloc_frame_buffer(&blurred, width, height, 1, 1,
                          cm->seq_params.use_highbitdepth,
                          cpi->oxcf.border_in_pixels, cm->byte_alignment);
   aom_alloc_frame_buffer(&sharpened, width, height, 1, 1,
                          cm->seq_params.use_highbitdepth,
                          cpi->oxcf.border_in_pixels, cm->byte_alignment);

   av1_copy_and_extend_frame(source, &source_extended);
   av1_copy_and_extend_frame(source, &sharpened);

   gaussian_blur(cpi, &source_extended, &blurred);
   aom_free_frame_buffer(&source_extended);
   const double best_frame_unsharp_amount =
       find_best_frame_unsharp_amount(cpi, source, &blurred);

   if (!use_block_based_method) {
     unsharp(source, &blurred, source, best_frame_unsharp_amount);
     aom_free_frame_buffer(&sharpened);
     aom_free_frame_buffer(&blurred);
     aom_clear_system_state();
     return;
   }

   const int block_size = BLOCK_128X128;
   const int num_mi_w = mi_size_wide[block_size];
   const int num_mi_h = mi_size_high[block_size];
   const int num_cols = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
   const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
   const int block_w = num_mi_w << 2;
   const int block_h = num_mi_h << 2;
   double *best_unsharp_amounts =
       aom_malloc(sizeof(*best_unsharp_amounts) * num_cols * num_rows);
   memset(best_unsharp_amounts, 0,
          sizeof(*best_unsharp_amounts) * num_cols * num_rows);

   for (int row = 0; row < num_rows; ++row) {
     for (int col = 0; col < num_cols; ++col) {
       const int mi_row = row * num_mi_h;
       const int mi_col = col * num_mi_w;

       const int row_offset_y = mi_row << 2;
       const int col_offset_y = mi_col << 2;

       const int block_width = AOMMIN(source->y_width - col_offset_y, block_w);
       const int block_height = AOMMIN(source->y_height - row_offset_y, block_h);

       uint8_t *src_buf =
           source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
       uint8_t *blurred_buf =
           blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
       uint8_t *dst_buf =
           sharpened.y_buffer + row_offset_y * sharpened.y_stride + col_offset_y;

       const int index = col + row * num_cols;
       const double step_size = 0.1;
       double amount = AOMMAX(best_frame_unsharp_amount - 0.2, step_size);
       unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
                    dst_buf, sharpened.y_stride, block_width, block_height,
                    amount);
       double best_vmaf;
       aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, &sharpened, &best_vmaf);

       // Find the best unsharp amount.
       bool exit_loop = false;
       while (!exit_loop && amount < best_frame_unsharp_amount + 0.2) {
         amount += step_size;
         unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
                      dst_buf, sharpened.y_stride, block_width, block_height,
                      amount);

         double new_vmaf;
         aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, &sharpened, &new_vmaf);
         if (new_vmaf <= best_vmaf) {
           exit_loop = true;
           amount -= step_size;
         } else {
           best_vmaf = new_vmaf;
         }
       }
       best_unsharp_amounts[index] = amount;
       // Reset blurred frame
       unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
                    dst_buf, sharpened.y_stride, block_width, block_height, 0.0);
     }
   }

   // Apply best blur amounts
   for (int row = 0; row < num_rows; ++row) {
     for (int col = 0; col < num_cols; ++col) {
       const int mi_row = row * num_mi_h;
       const int mi_col = col * num_mi_w;
       const int row_offset_y = mi_row << 2;
       const int col_offset_y = mi_col << 2;
       const int block_width = AOMMIN(source->y_width - col_offset_y, block_w);
       const int block_height = AOMMIN(source->y_height - row_offset_y, block_h);
       const int index = col + row * num_cols;
       uint8_t *src_buf =
           source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
       uint8_t *blurred_buf =
           blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
       unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
                    src_buf, source->y_stride, block_width, block_height,
                    best_unsharp_amounts[index]);
     }
   }

   aom_free_frame_buffer(&sharpened);
   aom_free_frame_buffer(&blurred);
   aom_free(best_unsharp_amounts);
   aom_clear_system_state();
 }

 // TODO(sdeng): replace it with the SIMD version.
 static AOM_INLINE double image_mse_c(const uint8_t *src, int src_stride,
                                      const uint8_t *ref, int ref_stride, int w,
                                      int h) {
   double accum = 0.0;
   int i, j;

   for (i = 0; i < h; ++i) {
     for (j = 0; j < w; ++j) {
       double img1px = src[i * src_stride + j];
       double img2px = ref[i * ref_stride + j];

       accum += (img1px - img2px) * (img1px - img2px);
     }
   }

   return accum / (double)(w * h);
 }

 void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
   AV1_COMMON *cm = &cpi->common;
   ThreadData *td = &cpi->td;
   MACROBLOCK *x = &td->mb;
   MACROBLOCKD *xd = &x->e_mbd;
   uint8_t *const y_buffer = cpi->source->y_buffer;
   const int y_stride = cpi->source->y_stride;
   const int y_width = cpi->source->y_width;
   const int y_height = cpi->source->y_height;
   const int block_size = BLOCK_64X64;

   const int num_mi_w = mi_size_wide[block_size];
   const int num_mi_h = mi_size_high[block_size];
   const int num_cols = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
   const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
   const int block_w = num_mi_w << 2;
   const int block_h = num_mi_h << 2;
   const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH;
   // TODO(sdeng): Add high bit depth support.
   if (use_hbd) {
     printf("VMAF RDO for high bit depth videos is unsupported yet.\n");
     exit(0);
   }

   aom_clear_system_state();
   YV12_BUFFER_CONFIG fake_recon, blurred;
   memset(&fake_recon, 0, sizeof(fake_recon));
   memset(&blurred, 0, sizeof(blurred));
   aom_alloc_frame_buffer(&fake_recon, y_width, y_height, 1, 1,
                          cm->seq_params.use_highbitdepth,
                          cpi->oxcf.border_in_pixels, cm->byte_alignment);
   aom_alloc_frame_buffer(&blurred, y_width, y_height, 1, 1,
                          cm->seq_params.use_highbitdepth,
                          cpi->oxcf.border_in_pixels, cm->byte_alignment);

   gaussian_blur(cpi, cpi->source, &blurred);

   // baseline vmaf
   double baseline_mse = 0.0, baseline_vmaf = 0.0;
   aom_calc_vmaf(cpi->oxcf.vmaf_model_path, cpi->source, cpi->source,
                 &baseline_vmaf);
   av1_copy_and_extend_frame(cpi->source, &fake_recon);

   // Loop through each 'block_size' block.
   for (int row = 0; row < num_rows; ++row) {
     for (int col = 0; col < num_cols; ++col) {
       const int mi_row = row * num_mi_h;
       const int mi_col = col * num_mi_w;
       const int index = row * num_cols + col;
       const int row_offset_y = mi_row << 2;
       const int col_offset_y = mi_col << 2;

       uint8_t *const orig_buf =
           y_buffer + row_offset_y * y_stride + col_offset_y;
       uint8_t *const blurred_buf =
           blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
       uint8_t *const fybuf = fake_recon.y_buffer +
                              row_offset_y * fake_recon.y_stride + col_offset_y;

       const int block_width = AOMMIN(y_width - col_offset_y, block_w);
       const int block_height = AOMMIN(y_height - row_offset_y, block_h);

       // Set the blurred block.
       unsharp_rect(orig_buf, y_stride, blurred_buf, blurred.y_stride, fybuf,
                    fake_recon.y_stride, block_width, block_height, -1.0);

       double vmaf, mse;
       aom_calc_vmaf(cpi->oxcf.vmaf_model_path, cpi->source, &fake_recon, &vmaf);
       mse = image_mse_c(y_buffer, y_stride, fake_recon.y_buffer,
                         fake_recon.y_stride, y_width, y_height);

       double weight = 0.0;
       const double dvmaf = baseline_vmaf - vmaf;
       const double dmse = mse - baseline_mse;
       const double eps = 0.01 / (num_rows * num_cols);

       if (dvmaf < eps || dmse < eps) {
         weight = 1.0;
       } else {
         weight = dmse / dvmaf;
       }

       // Normalize it with a data fitted model.
       weight = 6.0 * (1.0 - exp(-0.05 * weight)) + 0.8;
       cpi->vmaf_rdmult_scaling_factors[index] = weight;

       // Reset blurred block.
       unsharp_rect(orig_buf, y_stride, blurred_buf, blurred.y_stride, fybuf,
                    fake_recon.y_stride, block_width, block_height, 0.0);
     }
   }

   aom_free_frame_buffer(&fake_recon);
   aom_free_frame_buffer(&blurred);
   aom_clear_system_state();
   (void)xd;
 }

 void av1_set_vmaf_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
                          const BLOCK_SIZE bsize, const int mi_row,
                          const int mi_col, int *const rdmult) {
   const AV1_COMMON *const cm = &cpi->common;

   const int bsize_base = BLOCK_64X64;
   const int num_mi_w = mi_size_wide[bsize_base];
   const int num_mi_h = mi_size_high[bsize_base];
   const int num_cols = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
   const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
   const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
   const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
   int row, col;
   double num_of_mi = 0.0;
   double geom_mean_of_scale = 0.0;

   aom_clear_system_state();
   for (row = mi_row / num_mi_w;
        row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
     for (col = mi_col / num_mi_h;
          col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
       const int index = row * num_cols + col;
       geom_mean_of_scale += log(cpi->vmaf_rdmult_scaling_factors[index]);
       num_of_mi += 1.0;
     }
   }
   geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi);

   *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5);
   *rdmult = AOMMAX(*rdmult, 0);
   set_error_per_bit(x, *rdmult);
   aom_clear_system_state();
 }
	/*
	* Copyright (c) 2019, Alliance for Open Media. All rights reserved
	*
	* This source code is subject to the terms of the BSD 2 Clause License and
	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
	* was not distributed with this source code in the LICENSE file, you can
	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
	* Media Patent License 1.0 was not distributed with this source code in the
	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
	*/

	#include "av1/encoder/tune_vmaf.h"

	#include "aom_dsp/vmaf.h"
	#include "aom_ports/system_state.h"
	#include "av1/encoder/extend.h"

	// TODO(sdeng): Add the SIMD implementation.
	static AOM_INLINE void unsharp_rect(const uint8_t *source, int source_stride,
	const uint8_t *blurred, int blurred_stride,
	uint8_t *dst, int dst_stride, int w, int h,
	double amount) {
	for (int i = 0; i < h; ++i) {
	for (int j = 0; j < w; ++j) {
	const double val =
	(double)source[j] + amount * ((double)source[j] - (double)blurred[j]);
	dst[j] = (uint8_t)clamp((int)(val + 0.5), 0, 255);
	}
	source += source_stride;
	blurred += blurred_stride;
	dst += dst_stride;
	}
	}

	static AOM_INLINE void unsharp(const YV12_BUFFER_CONFIG *source,
	const YV12_BUFFER_CONFIG *blurred,
	const YV12_BUFFER_CONFIG *dst, double amount) {
	unsharp_rect(source->y_buffer, source->y_stride, blurred->y_buffer,
	blurred->y_stride, dst->y_buffer, dst->y_stride, source->y_width,
	source->y_height, amount);
	}

	// 8-tap Gaussian convolution filter with sigma = 1.0, sums to 128,
	// all co-efficients must be even.
	DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 0, 8, 30, 52,
	30, 8, 0, 0 };
	static AOM_INLINE void gaussian_blur(const AV1_COMP *const cpi,
	const YV12_BUFFER_CONFIG *source,
	const YV12_BUFFER_CONFIG *dst) {
	const AV1_COMMON *cm = &cpi->common;
	const ThreadData *td = &cpi->td;
	const MACROBLOCK *x = &td->mb;
	const MACROBLOCKD *xd = &x->e_mbd;

	const int block_size = BLOCK_128X128;

	const int num_mi_w = mi_size_wide[block_size];
	const int num_mi_h = mi_size_high[block_size];
	const int num_cols = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
	const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
	int row, col;
	const int use_hbd = source->flags & YV12_FLAG_HIGHBITDEPTH;

	ConvolveParams conv_params = get_conv_params(0, 0, xd->bd);
	InterpFilterParams filter = { .filter_ptr = gauss_filter,
	.taps = 8,
	.subpel_shifts = 0,
	.interp_filter = EIGHTTAP_REGULAR };

	for (row = 0; row < num_rows; ++row) {
	for (col = 0; col < num_cols; ++col) {
	const int mi_row = row * num_mi_h;
	const int mi_col = col * num_mi_w;

	const int row_offset_y = mi_row << 2;
	const int col_offset_y = mi_col << 2;

	uint8_t *src_buf =
	source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
	uint8_t *dst_buf =
	dst->y_buffer + row_offset_y * dst->y_stride + col_offset_y;

	if (use_hbd) {
	av1_highbd_convolve_2d_sr(
	CONVERT_TO_SHORTPTR(src_buf), source->y_stride,
	CONVERT_TO_SHORTPTR(dst_buf), dst->y_stride, num_mi_w << 2,
	num_mi_h << 2, &filter, &filter, 0, 0, &conv_params, xd->bd);
	} else {
	av1_convolve_2d_sr(src_buf, source->y_stride, dst_buf, dst->y_stride,
	num_mi_w << 2, num_mi_h << 2, &filter, &filter, 0, 0,
	&conv_params);
	}
	}
	}
	}

	static double find_best_frame_unsharp_amount(
	const AV1_COMP const cpi, YV12_BUFFER_CONFIG const source,
	YV12_BUFFER_CONFIG *const blurred) {
	const AV1_COMMON *const cm = &cpi->common;
	const int width = source->y_width;
	const int height = source->y_height;

	YV12_BUFFER_CONFIG sharpened;
	memset(&sharpened, 0, sizeof(sharpened));
	aom_alloc_frame_buffer(&sharpened, width, height, 1, 1,
	cm->seq_params.use_highbitdepth,
	cpi->oxcf.border_in_pixels, cm->byte_alignment);

	double unsharp_amount = 0.0;
	const double step_size = 0.05;
	const double max_vmaf_score = 100.0;

	double best_vmaf;
	aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, source, &best_vmaf);

	// We may get the same best VMAF scores for different unsharp_amount values.
	// We pick the average value in this case.
	double best_unsharp_amount_begin = best_vmaf == max_vmaf_score ? 0.0 : -1.0;
	bool exit_loop = false;

	int loop_count = 0;
	const int max_loop_count = 20;
	while (!exit_loop) {
	unsharp_amount += step_size;
	unsharp(source, blurred, &sharpened, unsharp_amount);
	double new_vmaf;
	aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, &sharpened, &new_vmaf);
	if (new_vmaf < best_vmaf \|\| loop_count == max_loop_count) {
	exit_loop = true;
	} else {
	if (new_vmaf == max_vmaf_score && best_unsharp_amount_begin < 0.0) {
	best_unsharp_amount_begin = unsharp_amount;
	}
	best_vmaf = new_vmaf;
	}
	loop_count++;
	}

	aom_free_frame_buffer(&sharpened);

	unsharp_amount -= step_size;
	if (best_unsharp_amount_begin >= 0.0) {
	unsharp_amount = (unsharp_amount + best_unsharp_amount_begin) / 2.0;
	}

	return unsharp_amount;
	}

	void av1_vmaf_preprocessing(const AV1_COMP *const cpi,
	YV12_BUFFER_CONFIG *const source,
	bool use_block_based_method) {
	const int use_hbd = source->flags & YV12_FLAG_HIGHBITDEPTH;
	// TODO(sdeng): Add high bit depth support.
	if (use_hbd) {
	printf(
	"VMAF preprocessing for high bit depth videos is unsupported yet.\n");
	exit(0);
	}

	aom_clear_system_state();
	const AV1_COMMON *const cm = &cpi->common;
	const int width = source->y_width;
	const int height = source->y_height;
	YV12_BUFFER_CONFIG source_extended, blurred, sharpened;
	memset(&source_extended, 0, sizeof(source_extended));
	memset(&blurred, 0, sizeof(blurred));
	memset(&sharpened, 0, sizeof(sharpened));
	aom_alloc_frame_buffer(&source_extended, width, height, 1, 1,
	cm->seq_params.use_highbitdepth,
	cpi->oxcf.border_in_pixels, cm->byte_alignment);
	aom_alloc_frame_buffer(&blurred, width, height, 1, 1,
	cm->seq_params.use_highbitdepth,
	cpi->oxcf.border_in_pixels, cm->byte_alignment);
	aom_alloc_frame_buffer(&sharpened, width, height, 1, 1,
	cm->seq_params.use_highbitdepth,
	cpi->oxcf.border_in_pixels, cm->byte_alignment);

	av1_copy_and_extend_frame(source, &source_extended);
	av1_copy_and_extend_frame(source, &sharpened);

	gaussian_blur(cpi, &source_extended, &blurred);
	aom_free_frame_buffer(&source_extended);
	const double best_frame_unsharp_amount =
	find_best_frame_unsharp_amount(cpi, source, &blurred);

	if (!use_block_based_method) {
	unsharp(source, &blurred, source, best_frame_unsharp_amount);
	aom_free_frame_buffer(&sharpened);
	aom_free_frame_buffer(&blurred);
	aom_clear_system_state();
	return;
	}

	const int block_size = BLOCK_128X128;
	const int num_mi_w = mi_size_wide[block_size];
	const int num_mi_h = mi_size_high[block_size];
	const int num_cols = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
	const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
	const int block_w = num_mi_w << 2;
	const int block_h = num_mi_h << 2;
	double *best_unsharp_amounts =
	aom_malloc(sizeof(best_unsharp_amounts) num_cols * num_rows);
	memset(best_unsharp_amounts, 0,
	sizeof(best_unsharp_amounts) num_cols * num_rows);

	for (int row = 0; row < num_rows; ++row) {
	for (int col = 0; col < num_cols; ++col) {
	const int mi_row = row * num_mi_h;
	const int mi_col = col * num_mi_w;

	const int row_offset_y = mi_row << 2;
	const int col_offset_y = mi_col << 2;

	const int block_width = AOMMIN(source->y_width - col_offset_y, block_w);
	const int block_height = AOMMIN(source->y_height - row_offset_y, block_h);

	uint8_t *src_buf =
	source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
	uint8_t *blurred_buf =
	blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
	uint8_t *dst_buf =
	sharpened.y_buffer + row_offset_y * sharpened.y_stride + col_offset_y;

	const int index = col + row * num_cols;
	const double step_size = 0.1;
	double amount = AOMMAX(best_frame_unsharp_amount - 0.2, step_size);
	unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
	dst_buf, sharpened.y_stride, block_width, block_height,
	amount);
	double best_vmaf;
	aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, &sharpened, &best_vmaf);

	// Find the best unsharp amount.
	bool exit_loop = false;
	while (!exit_loop && amount < best_frame_unsharp_amount + 0.2) {
	amount += step_size;
	unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
	dst_buf, sharpened.y_stride, block_width, block_height,
	amount);

	double new_vmaf;
	aom_calc_vmaf(cpi->oxcf.vmaf_model_path, source, &sharpened, &new_vmaf);
	if (new_vmaf <= best_vmaf) {
	exit_loop = true;
	amount -= step_size;
	} else {
	best_vmaf = new_vmaf;
	}
	}
	best_unsharp_amounts[index] = amount;
	// Reset blurred frame
	unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
	dst_buf, sharpened.y_stride, block_width, block_height, 0.0);
	}
	}

	// Apply best blur amounts
	for (int row = 0; row < num_rows; ++row) {
	for (int col = 0; col < num_cols; ++col) {
	const int mi_row = row * num_mi_h;
	const int mi_col = col * num_mi_w;
	const int row_offset_y = mi_row << 2;
	const int col_offset_y = mi_col << 2;
	const int block_width = AOMMIN(source->y_width - col_offset_y, block_w);
	const int block_height = AOMMIN(source->y_height - row_offset_y, block_h);
	const int index = col + row * num_cols;
	uint8_t *src_buf =
	source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
	uint8_t *blurred_buf =
	blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
	unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
	src_buf, source->y_stride, block_width, block_height,
	best_unsharp_amounts[index]);
	}
	}

	aom_free_frame_buffer(&sharpened);
	aom_free_frame_buffer(&blurred);
	aom_free(best_unsharp_amounts);
	aom_clear_system_state();
	}

	// TODO(sdeng): replace it with the SIMD version.
	static AOM_INLINE double image_mse_c(const uint8_t *src, int src_stride,
	const uint8_t *ref, int ref_stride, int w,
	int h) {
	double accum = 0.0;
	int i, j;

	for (i = 0; i < h; ++i) {
	for (j = 0; j < w; ++j) {
	double img1px = src[i * src_stride + j];
	double img2px = ref[i * ref_stride + j];

	accum += (img1px - img2px) * (img1px - img2px);
	}
	}

	return accum / (double)(w * h);
	}

	void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
	AV1_COMMON *cm = &cpi->common;
	ThreadData *td = &cpi->td;
	MACROBLOCK *x = &td->mb;
	MACROBLOCKD *xd = &x->e_mbd;
	uint8_t *const y_buffer = cpi->source->y_buffer;
	const int y_stride = cpi->source->y_stride;
	const int y_width = cpi->source->y_width;
	const int y_height = cpi->source->y_height;
	const int block_size = BLOCK_64X64;

	const int num_mi_w = mi_size_wide[block_size];
	const int num_mi_h = mi_size_high[block_size];
	const int num_cols = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
	const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
	const int block_w = num_mi_w << 2;
	const int block_h = num_mi_h << 2;
	const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH;
	// TODO(sdeng): Add high bit depth support.
	if (use_hbd) {
	printf("VMAF RDO for high bit depth videos is unsupported yet.\n");
	exit(0);
	}

	aom_clear_system_state();
	YV12_BUFFER_CONFIG fake_recon, blurred;
	memset(&fake_recon, 0, sizeof(fake_recon));
	memset(&blurred, 0, sizeof(blurred));
	aom_alloc_frame_buffer(&fake_recon, y_width, y_height, 1, 1,
	cm->seq_params.use_highbitdepth,
	cpi->oxcf.border_in_pixels, cm->byte_alignment);
	aom_alloc_frame_buffer(&blurred, y_width, y_height, 1, 1,
	cm->seq_params.use_highbitdepth,
	cpi->oxcf.border_in_pixels, cm->byte_alignment);

	gaussian_blur(cpi, cpi->source, &blurred);

	// baseline vmaf
	double baseline_mse = 0.0, baseline_vmaf = 0.0;
	aom_calc_vmaf(cpi->oxcf.vmaf_model_path, cpi->source, cpi->source,
	&baseline_vmaf);
	av1_copy_and_extend_frame(cpi->source, &fake_recon);

	// Loop through each 'block_size' block.
	for (int row = 0; row < num_rows; ++row) {
	for (int col = 0; col < num_cols; ++col) {
	const int mi_row = row * num_mi_h;
	const int mi_col = col * num_mi_w;
	const int index = row * num_cols + col;
	const int row_offset_y = mi_row << 2;
	const int col_offset_y = mi_col << 2;

	uint8_t *const orig_buf =
	y_buffer + row_offset_y * y_stride + col_offset_y;
	uint8_t *const blurred_buf =
	blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
	uint8_t *const fybuf = fake_recon.y_buffer +
	row_offset_y * fake_recon.y_stride + col_offset_y;

	const int block_width = AOMMIN(y_width - col_offset_y, block_w);
	const int block_height = AOMMIN(y_height - row_offset_y, block_h);

	// Set the blurred block.
	unsharp_rect(orig_buf, y_stride, blurred_buf, blurred.y_stride, fybuf,
	fake_recon.y_stride, block_width, block_height, -1.0);

	double vmaf, mse;
	aom_calc_vmaf(cpi->oxcf.vmaf_model_path, cpi->source, &fake_recon, &vmaf);
	mse = image_mse_c(y_buffer, y_stride, fake_recon.y_buffer,
	fake_recon.y_stride, y_width, y_height);

	double weight = 0.0;
	const double dvmaf = baseline_vmaf - vmaf;
	const double dmse = mse - baseline_mse;
	const double eps = 0.01 / (num_rows * num_cols);

	if (dvmaf < eps \|\| dmse < eps) {
	weight = 1.0;
	} else {
	weight = dmse / dvmaf;
	}

	// Normalize it with a data fitted model.
	weight = 6.0 * (1.0 - exp(-0.05 * weight)) + 0.8;
	cpi->vmaf_rdmult_scaling_factors[index] = weight;

	// Reset blurred block.
	unsharp_rect(orig_buf, y_stride, blurred_buf, blurred.y_stride, fybuf,
	fake_recon.y_stride, block_width, block_height, 0.0);
	}
	}

	aom_free_frame_buffer(&fake_recon);
	aom_free_frame_buffer(&blurred);
	aom_clear_system_state();
	(void)xd;
	}

	void av1_set_vmaf_rdmult(const AV1_COMP const cpi, MACROBLOCK const x,
	const BLOCK_SIZE bsize, const int mi_row,
	const int mi_col, int *const rdmult) {
	const AV1_COMMON *const cm = &cpi->common;

	const int bsize_base = BLOCK_64X64;
	const int num_mi_w = mi_size_wide[bsize_base];
	const int num_mi_h = mi_size_high[bsize_base];
	const int num_cols = (cm->mi_cols + num_mi_w - 1) / num_mi_w;
	const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
	const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
	const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
	int row, col;
	double num_of_mi = 0.0;
	double geom_mean_of_scale = 0.0;

	aom_clear_system_state();
	for (row = mi_row / num_mi_w;
	row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
	for (col = mi_col / num_mi_h;
	col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
	const int index = row * num_cols + col;
	geom_mean_of_scale += log(cpi->vmaf_rdmult_scaling_factors[index]);
	num_of_mi += 1.0;
	}
	}
	geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi);

	rdmult = (int)((double)(rdmult) * geom_mean_of_scale + 0.5);
	rdmult = AOMMAX(rdmult, 0);
	set_error_per_bit(x, *rdmult);
	aom_clear_system_state();
	}