blob: 78d7bc6e140f314e9b1d1e4a9d388386d8b44a82 [file] [log] [blame]
/*
* Copyright (c) 2020, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "av1/common/mfqe.h"
#include "av1/common/resize.h"
#include "av1/encoder/rdopt.h"
#define MFQE_GAUSSIAN_BLOCK 32 // Size of blocks to apply Gaussian Blur.
// 8-tap Gaussian convolution filter with sigma = 1.3, sums to 128,
// all co-efficients must be even.
DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 2, 12, 30, 40,
30, 12, 2, 0 };
// Apply gaussian blur to the block in src and save the result to dst.
void gaussian_blur(const uint8_t *src, int src_stride, int w, int h,
uint8_t *dst, bool high_bd, int bd) {
ConvolveParams conv_params = get_conv_params(0, 0, bd);
InterpFilterParams filter = { .filter_ptr = gauss_filter,
.taps = 8,
.subpel_shifts = 0,
.interp_filter = EIGHTTAP_REGULAR };
// Requirements from the vector-optimized implementations.
assert(h % 4 == 0);
assert(w % 8 == 0);
// Because we use an eight tap filter, the stride should be at least 7 + w.
assert(src_stride >= w + 7);
if (high_bd) {
av1_highbd_convolve_2d_sr(CONVERT_TO_SHORTPTR(src), src_stride,
CONVERT_TO_SHORTPTR(dst), src_stride, w, h,
&filter, &filter, 0, 0, &conv_params, bd);
} else {
av1_convolve_2d_sr(src, src_stride, dst, src_stride, w, h, &filter, &filter,
0, 0, &conv_params);
}
}
// Apply Gaussian Blur on a single plane and store the result in destination.
static void mfqe_gaussian_blur(uint8_t *src, uint8_t *dst, int stride, int h,
int w, int high_bd, int bd) {
for (int col = 0; col < h; col += MFQE_GAUSSIAN_BLOCK) {
for (int row = 0; row < w; row += MFQE_GAUSSIAN_BLOCK) {
int w_size = AOMMIN(MFQE_GAUSSIAN_BLOCK, w - row);
int h_size = AOMMIN(MFQE_GAUSSIAN_BLOCK, h - col);
uint8_t *src_ptr = src + col * stride + row;
uint8_t *dst_ptr = dst + col * stride + row;
// Gaussian blur can only operate on blocks with width % 8 == 0 and
// height % 4 == 0. If there are smaller sized blocks, break out of loop.
if (w_size % 8 != 0 || h_size % 4 != 0) break;
gaussian_blur(src_ptr, stride, w_size, h_size, dst_ptr, high_bd, bd);
}
}
}
// Resize a single plane with the given resize factor and store in destination.
static void mfqe_resize_plane(const uint8_t *src, uint8_t *dst, int stride,
int h, int w, int resize_factor, int highbd,
int bd) {
int dst_stride = stride * resize_factor;
int dst_height = h * resize_factor;
int dst_width = w * resize_factor;
if (highbd)
av1_highbd_resize_plane(src, h, w, stride, dst, dst_height, dst_width,
dst_stride, bd);
else
av1_resize_plane(src, h, w, stride, dst, dst_height, dst_width, dst_stride);
}
// Calcuate the sum of squared errors between two blocks in buffers.
static int64_t aom_sse_lowbd(const uint8_t *a, int a_stride, const uint8_t *b,
int b_stride, int width, int height) {
int y, x;
int64_t sse = 0;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
const int32_t diff = abs(a[x] - b[x]);
sse += diff * diff;
}
a += a_stride;
b += b_stride;
}
return sse;
}
// Calcuate the sum of squared errors between two blocks in buffers.
static int64_t aom_sse_highbd(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, int width,
int height) {
int y, x;
int64_t sse = 0;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
uint16_t *b = CONVERT_TO_SHORTPTR(b8);
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
const int32_t diff = (int32_t)(a[x]) - (int32_t)(b[x]);
sse += diff * diff;
}
a += a_stride;
b += b_stride;
}
return sse;
}
// Return the mean squared error between the given blocks in two buffers. If
// the row and column parameters are not valid indices, return MSE_MAX.
static double get_mse_block(Y_BUFFER_CONFIG buf1, Y_BUFFER_CONFIG buf2,
int16_t mb_row_1, int16_t mb_col_1,
int16_t mb_row_2, int16_t mb_col_2,
BLOCK_SIZE bsize, int highbd) {
int block_w = block_size_wide[bsize];
int block_h = block_size_high[bsize];
// Check if rows and columns are valid, return MSE_MAX if not.
if ((mb_row_1 < 0) || (mb_col_1 < 0) || (mb_row_2 < 0) || (mb_col_2 < 0) ||
(mb_row_1 >= buf1.height - block_h) ||
(mb_row_2 >= buf2.height - block_h) ||
(mb_col_1 >= buf1.width - block_w) || (mb_col_2 >= buf2.width - block_w))
return MSE_MAX;
uint8_t *a = buf1.buffer + buf1.stride * mb_row_1 + mb_col_1;
uint8_t *b = buf2.buffer + buf2.stride * mb_row_2 + mb_col_2;
int64_t sse;
if (highbd)
sse = aom_sse_highbd(a, buf1.stride, b, buf2.stride, block_w, block_h);
else
sse = aom_sse_lowbd(a, buf1.stride, b, buf2.stride, block_w, block_h);
// Divide the sum of squared errors by the number of pixels in the block.
double mse = ((double)sse) / (block_w * block_h);
return mse;
}
// Perform initial diamond search to obtain the full-pixel motion vector for
// every block in the current frame, going through the points specified by the
// grid_search_rows and grid_search_cols defined in the header file.
static void full_pix_diamond_search(MV_MFQE *mvr, int16_t mb_row,
int16_t mb_col, Y_BUFFER_CONFIG cur,
Y_BUFFER_CONFIG refs[], BLOCK_SIZE bsize,
int highbd) {
MV_MFQE mvr_best = *mvr;
double this_mse;
double best_mse = MSE_MAX;
int block_w = block_size_wide[bsize];
int block_h = block_size_high[bsize];
for (int ref_index = 0; ref_index < MFQE_NUM_REFS; ++ref_index) {
for (int point = 0; point < MFQE_N_GRID_SEARCH; ++point) {
int16_t dr = grid_search_rows[point];
int16_t dc = grid_search_cols[point];
int16_t mb_col_ref = mb_col + mvr->mv.col + dc * block_w;
int16_t mb_row_ref = mb_row + mvr->mv.row + dr * block_h;
this_mse = get_mse_block(cur, refs[ref_index], mb_row, mb_col, mb_row_ref,
mb_col_ref, bsize, highbd);
// Store the motion vector with lowest mean squared error.
if (this_mse < best_mse) {
best_mse = this_mse;
mvr_best.mv.col = mvr->mv.col + dc * block_w;
mvr_best.mv.row = mvr->mv.row + dr * block_h;
mvr_best.ref_index = ref_index;
}
}
}
*mvr = mvr_best;
}
// Perform full pixel motion vector search in the low frequency version of the
// current frame and reference frames.
static void full_pixel_search(MV_MFQE *mvr, int16_t mb_row, int16_t mb_col,
Y_BUFFER_CONFIG cur, Y_BUFFER_CONFIG refs[],
BLOCK_SIZE bsize, int highbd) {
MV_MFQE mvr_init = *mvr;
// Perform diamond search to obtain the initial motion vector.
full_pix_diamond_search(&mvr_init, mb_row, mb_col, cur, refs, bsize, highbd);
mvr->valid = 0;
mvr->ref_index = mvr_init.ref_index;
int mb_row_ref;
int mb_col_ref;
int search_size_w = block_size_wide[bsize] / 2;
int search_size_h = block_size_high[bsize] / 2;
double this_mse;
double best_mse = MSE_MAX;
for (int dr = -search_size_h; dr <= search_size_h; ++dr) {
for (int dc = -search_size_w; dc <= search_size_w; ++dc) {
mb_row_ref = mb_row + mvr_init.mv.row + dr;
mb_col_ref = mb_col + mvr_init.mv.col + dc;
this_mse = get_mse_block(cur, refs[mvr_init.ref_index], mb_row, mb_col,
mb_row_ref, mb_col_ref, bsize, highbd);
// The motion vector points to the block with lowest mean squared error.
if (this_mse < best_mse && this_mse < MFQE_MSE_THRESHOLD) {
best_mse = this_mse;
mvr->valid = 1;
mvr->mv.row = mvr_init.mv.row + dr;
mvr->mv.col = mvr_init.mv.col + dc;
mvr->alpha = get_alpha_weight(this_mse);
}
}
}
}
static BLOCK_SIZE scale_block_4X4(int scale) {
if (scale == 1) return BLOCK_4X4;
if (scale == 2) return BLOCK_8X8;
if (scale == 4) return BLOCK_16X16;
if (scale == 8) return BLOCK_32X32;
return BLOCK_4X4;
}
static BLOCK_SIZE scale_block_8X8(int scale) {
if (scale == 1) return BLOCK_8X8;
if (scale == 2) return BLOCK_16X16;
if (scale == 4) return BLOCK_32X32;
if (scale == 8) return BLOCK_64X64;
return BLOCK_8X8;
}
static BLOCK_SIZE scale_block_16X16(int scale) {
if (scale == 1) return BLOCK_16X16;
if (scale == 2) return BLOCK_32X32;
if (scale == 4) return BLOCK_64X64;
if (scale == 8) return BLOCK_128X128;
return BLOCK_16X16;
}
// Scale up the given BLOCK_SIZE by the resizing factor.
static BLOCK_SIZE scale_block_size(BLOCK_SIZE bsize, int scale) {
// Currently, only supports 4X4, 8X8, and 16X16 blocks.
assert(bsize == BLOCK_4X4 || bsize == BLOCK_8X8 || bsize == BLOCK_16X16);
if (bsize == BLOCK_4X4) return scale_block_4X4(scale);
if (bsize == BLOCK_8X8) return scale_block_8X8(scale);
return scale_block_16X16(scale);
}
// Perform finer-grained motion vector search at subpel level, then save the
// updated motion vector in MV_MFQE.
static void sub_pixel_search(MV_MFQE *mvr, int16_t mb_row, int16_t mb_col,
Y_BUFFER_CONFIG cur, Y_BUFFER_CONFIG refs[],
BLOCK_SIZE bsize, int resize_factor, int highbd) {
mb_row *= resize_factor;
mb_col *= resize_factor;
bsize = scale_block_size(bsize, resize_factor);
int search_size = resize_factor / 2;
int mb_row_ref;
int mb_col_ref;
double this_mse;
double best_mse = MSE_MAX;
int best_subpel_x_qn = 0;
int best_subpel_y_qn = 0;
// Search for the nearby search_size pixels in subpel accuracy, which is half
// the size of the scale. For example, if the image is scaled by a factor of
// 8, the algorithm will search the adjacent 4 pixels in the resized image.
for (int dr = -search_size; dr <= search_size; ++dr) {
for (int dc = -search_size; dc <= search_size; ++dc) {
mb_row_ref = mb_row + mvr->mv.row * resize_factor + dr;
mb_col_ref = mb_col + mvr->mv.col * resize_factor + dc;
this_mse = get_mse_block(cur, refs[mvr->ref_index], mb_row, mb_col,
mb_row_ref, mb_col_ref, bsize, highbd);
if (this_mse < best_mse) {
best_mse = this_mse;
best_subpel_x_qn = dr;
best_subpel_y_qn = dc;
}
}
}
mvr->subpel_x_qn = best_subpel_x_qn;
mvr->subpel_y_qn = best_subpel_y_qn;
}
// Replace the block in current frame using the block from the reference frame,
// using subpel motion vectors and interpolating the reference block. The block
// is replaced via weighted blending, using the alpha value calculated from
// the mean squared error between the current block and reference block.
static void replace_block_alpha(Y_BUFFER_CONFIG tmp, Y_BUFFER_CONFIG refs_sub[],
MV_MFQE *mvr, int16_t mb_row, int16_t mb_col,
BLOCK_SIZE bsize, int resize_factor,
uint8_t *swap_block) {
(void)swap_block;
int16_t mb_row_ref =
(mb_row + mvr->mv.row) * resize_factor + mvr->subpel_x_qn;
int16_t mb_col_ref =
(mb_col + mvr->mv.col) * resize_factor + mvr->subpel_y_qn;
Y_BUFFER_CONFIG ref = refs_sub[mvr->ref_index];
uint8_t *src = ref.buffer + mb_row_ref * ref.stride + mb_col_ref;
uint8_t *dst = tmp.buffer + mb_row * tmp.stride + mb_col;
int src_stride = ref.stride;
int dst_stride = tmp.stride;
int block_w = block_size_wide[bsize];
int block_h = block_size_high[bsize];
ConvolveParams conv_params = get_conv_params(0, AOM_PLANE_Y, 8);
int_interpfilters interp_filters;
interp_filters.as_filters.x_filter = EIGHTTAP_REGULAR;
interp_filters.as_filters.y_filter = EIGHTTAP_REGULAR;
av1_convolve_2d_facade(src, src_stride, dst, dst_stride, block_w, block_h,
block_w, block_h, interp_filters, mvr->subpel_x_qn, 0,
mvr->subpel_y_qn, 0, 0, &conv_params, 0);
}
// High bitdepth version of replace_block_alpha, using the same method.
static void replace_block_alpha_highbd(Y_BUFFER_CONFIG tmp,
Y_BUFFER_CONFIG refs_sub[], MV_MFQE *mvr,
int16_t mb_row, int16_t mb_col,
BLOCK_SIZE bsize, int resize_factor,
uint16_t *swap_block, int bd) {
int16_t mb_row_ref =
(mb_row + mvr->mv.row) * resize_factor + mvr->subpel_x_qn;
int16_t mb_col_ref =
(mb_col + mvr->mv.col) * resize_factor + mvr->subpel_y_qn;
Y_BUFFER_CONFIG ref = refs_sub[mvr->ref_index];
uint8_t *src = ref.buffer + mb_row_ref * ref.stride + mb_col_ref;
uint8_t *dst = tmp.buffer + mb_row * tmp.stride + mb_col;
int src_stride = ref.stride;
int dst_stride = tmp.stride;
int block_w = block_size_wide[bsize];
int block_h = block_size_high[bsize];
av1_highbd_resize_plane(src, block_h * resize_factor, block_w * resize_factor,
src_stride, CONVERT_TO_BYTEPTR(swap_block), block_h,
block_w, dst_stride, bd);
uint16_t *swap_ptr = swap_block;
uint16_t *dst_ptr = CONVERT_TO_SHORTPTR(dst);
double alpha = mvr->alpha;
double beta = 1 - alpha;
for (int row = 0; row < block_h; ++row) {
for (int col = 0; col < block_w; ++col) {
double p1 = (double)swap_ptr[col];
double p2 = (double)dst_ptr[col];
double p = floor(alpha * p1 + beta * p2 + 0.5);
p = p > 255 ? 255 : (p < 0 ? 0 : p); // Clamp the value of pixel.
dst_ptr[col] = (uint16_t)p;
}
swap_ptr += dst_stride;
dst_ptr += dst_stride;
}
}
// Dynamically allocate memory for a single buffer.
static void mfqe_alloc_buf(Y_BUFFER_CONFIG *buf, int stride, int h, int w,
int resize_factor) {
buf->stride = stride * resize_factor;
buf->height = h * resize_factor;
buf->width = w * resize_factor;
// The buffer adds padding to the top and bottom for tap filters to be used
// in Gaussian Blur and resizing. buffer points to the start of the frame and
// buffer_orig points to the originally allocated buffer including padding.
int buf_bytes = buf->stride * (buf->height + 2 * MFQE_PADDING_SIZE);
buf->buffer_orig = aom_memalign(32, sizeof(uint8_t) * buf_bytes);
memset(buf->buffer_orig, 0, sizeof(uint8_t) * buf_bytes);
buf->buffer = buf->buffer_orig + buf->stride * MFQE_PADDING_SIZE;
}
// Dynamically allocate memory for a single buffer in high bitdepth version.
static void mfqe_alloc_buf_highbd(Y_BUFFER_CONFIG *buf, int stride, int h,
int w, int resize_factor) {
buf->stride = stride * resize_factor;
buf->height = h * resize_factor;
buf->width = w * resize_factor;
// The buffer adds padding to the top and bottom for tap filters to be used
// in Gaussian Blur and resizing. buffer points to the start of the frame and
// buffer_orig points to the originally allocated buffer including padding.
int buf_bytes = buf->stride * (buf->height + 2 * MFQE_PADDING_SIZE);
uint16_t *buffer_orig = aom_memalign(32, sizeof(uint16_t) * buf_bytes);
memset(buffer_orig, 0, sizeof(uint16_t) * buf_bytes);
uint16_t *buffer = buffer_orig + buf->stride * MFQE_PADDING_SIZE;
buf->buffer_orig = CONVERT_TO_BYTEPTR(buffer_orig);
buf->buffer = CONVERT_TO_BYTEPTR(buffer);
}
// Dynamically allocate memory to be used for av1_apply_loop_mfqe.
static void mfqe_mem_alloc(Y_BUFFER_CONFIG *tmp, RefCntBuffer *ref_frames[],
Y_BUFFER_CONFIG *tmp_low, Y_BUFFER_CONFIG *tmp_sub,
Y_BUFFER_CONFIG *refs_low, Y_BUFFER_CONFIG *refs_sub,
int resize_factor) {
mfqe_alloc_buf(tmp_low, tmp->stride, tmp->height, tmp->width, 1);
mfqe_alloc_buf(tmp_sub, tmp->stride, tmp->height, tmp->width, resize_factor);
YV12_BUFFER_CONFIG *ref;
for (int i = 0; i < MFQE_NUM_REFS; i++) {
ref = &ref_frames[i]->buf;
mfqe_alloc_buf(&refs_low[i], ref->y_stride, ref->y_height, ref->y_width, 1);
mfqe_alloc_buf(&refs_sub[i], ref->y_stride, ref->y_height, ref->y_width,
resize_factor);
}
}
// Dynamically allocate memory to be used for av1_apply_loop_mfqe.
static void mfqe_mem_alloc_highbd(
Y_BUFFER_CONFIG *tmp, RefCntBuffer *ref_frames[], Y_BUFFER_CONFIG *tmp_low,
Y_BUFFER_CONFIG *tmp_sub, Y_BUFFER_CONFIG *refs_low,
Y_BUFFER_CONFIG *refs_sub, int resize_factor) {
mfqe_alloc_buf_highbd(tmp_low, tmp->stride, tmp->height, tmp->width, 1);
mfqe_alloc_buf_highbd(tmp_sub, tmp->stride, tmp->height, tmp->width,
resize_factor);
YV12_BUFFER_CONFIG *ref;
for (int i = 0; i < MFQE_NUM_REFS; i++) {
ref = &ref_frames[i]->buf;
mfqe_alloc_buf_highbd(&refs_low[i], ref->y_stride, ref->y_height,
ref->y_width, 1);
mfqe_alloc_buf_highbd(&refs_sub[i], ref->y_stride, ref->y_height,
ref->y_width, resize_factor);
}
}
// Free all of the dynamically allocated memory inside av1_apply_loop_mfqe.
static void mfqe_mem_free(Y_BUFFER_CONFIG *tmp_low, Y_BUFFER_CONFIG *tmp_sub,
Y_BUFFER_CONFIG *refs_low,
Y_BUFFER_CONFIG *refs_sub) {
aom_free(tmp_low->buffer_orig);
aom_free(tmp_sub->buffer_orig);
for (int i = 0; i < MFQE_NUM_REFS; i++) {
aom_free(refs_low[i].buffer_orig);
aom_free(refs_sub[i].buffer_orig);
}
}
// Free all of the dynamically allocated memory inside av1_apply_loop_mfqe.
static void mfqe_mem_free_highbd(Y_BUFFER_CONFIG *tmp_low,
Y_BUFFER_CONFIG *tmp_sub,
Y_BUFFER_CONFIG *refs_low,
Y_BUFFER_CONFIG *refs_sub) {
aom_free(CONVERT_TO_SHORTPTR(tmp_low->buffer_orig));
aom_free(CONVERT_TO_SHORTPTR(tmp_sub->buffer_orig));
for (int i = 0; i < MFQE_NUM_REFS; i++) {
aom_free(CONVERT_TO_SHORTPTR(refs_low[i].buffer_orig));
aom_free(CONVERT_TO_SHORTPTR(refs_sub[i].buffer_orig));
}
}
// Apply In-Loop Multi-Frame Quality Enhancement for low bitdepth version.
static void apply_loop_mfqe_lowbd(Y_BUFFER_CONFIG *tmp,
RefCntBuffer *ref_frames[], BLOCK_SIZE bsize,
int resize_factor, int bd) {
Y_BUFFER_CONFIG tmp_low;
Y_BUFFER_CONFIG tmp_sub;
// Contains blurred versions of reference frames.
Y_BUFFER_CONFIG refs_low[MFQE_NUM_REFS];
// Contains resized versions of reference frames.
Y_BUFFER_CONFIG refs_sub[MFQE_NUM_REFS];
mfqe_mem_alloc(tmp, ref_frames, &tmp_low, &tmp_sub, refs_low, refs_sub,
resize_factor);
mfqe_gaussian_blur(tmp->buffer, tmp_low.buffer, tmp->stride, tmp->height,
tmp->width, 0, bd);
mfqe_resize_plane(tmp->buffer, tmp_sub.buffer, tmp->stride, tmp->height,
tmp->width, resize_factor, 0, bd);
YV12_BUFFER_CONFIG *ref;
for (int i = 0; i < MFQE_NUM_REFS; i++) {
ref = &ref_frames[i]->buf;
mfqe_gaussian_blur(ref->y_buffer, refs_low[i].buffer, ref->y_stride,
ref->y_height, ref->y_width, 0, bd);
mfqe_resize_plane(ref->y_buffer, refs_sub[i].buffer, ref->y_stride,
ref->y_height, ref->y_width, resize_factor, 0, bd);
}
MV_MFQE mvr;
int block_w = block_size_wide[bsize];
int block_h = block_size_high[bsize];
int block_bytes = (block_h + 2 * MFQE_PADDING_SIZE) * tmp->stride;
uint8_t *block_orig = aom_memalign(32, sizeof(uint8_t) * block_bytes);
uint8_t *swap_block = block_orig + MFQE_PADDING_SIZE * tmp->stride;
for (int16_t mb_row = 0; mb_row < tmp->width; mb_row += block_w) {
for (int16_t mb_col = 0; mb_col < tmp->height; mb_col += block_h) {
mvr = kZeroMvMFQE;
memset(block_orig, 0, sizeof(uint8_t) * block_bytes);
full_pixel_search(&mvr, mb_row, mb_col, tmp_low, refs_low, bsize, 0);
if (!mvr.valid) continue; // Pass if mse is larger than threshold.
sub_pixel_search(&mvr, mb_row, mb_col, tmp_sub, refs_sub, bsize,
resize_factor, 0);
replace_block_alpha(*tmp, refs_sub, &mvr, mb_row, mb_col, bsize,
resize_factor, swap_block);
}
}
// Free all of the dynamically allocated resources.
aom_free(block_orig);
mfqe_mem_free(&tmp_low, &tmp_sub, refs_low, refs_sub);
}
// Apply In-Loop Multi-Frame Quality Enhancement for high bitdepth version.
static void apply_loop_mfqe_highbd(Y_BUFFER_CONFIG *tmp,
RefCntBuffer *ref_frames[], BLOCK_SIZE bsize,
int resize_factor, int bd) {
Y_BUFFER_CONFIG tmp_low;
Y_BUFFER_CONFIG tmp_sub;
// Contains blurred versions of reference frames.
Y_BUFFER_CONFIG refs_low[MFQE_NUM_REFS];
// Contains resized versions of reference frames.
Y_BUFFER_CONFIG refs_sub[MFQE_NUM_REFS];
mfqe_mem_alloc_highbd(tmp, ref_frames, &tmp_low, &tmp_sub, refs_low, refs_sub,
resize_factor);
mfqe_gaussian_blur(tmp->buffer, tmp_low.buffer, tmp->stride, tmp->height,
tmp->width, 1, bd);
mfqe_resize_plane(tmp->buffer, tmp_sub.buffer, tmp->stride, tmp->height,
tmp->width, resize_factor, 1, bd);
YV12_BUFFER_CONFIG *ref;
for (int i = 0; i < MFQE_NUM_REFS; i++) {
ref = &ref_frames[i]->buf;
mfqe_gaussian_blur(ref->y_buffer, refs_low[i].buffer, ref->y_stride,
ref->y_height, ref->y_width, 1, bd);
mfqe_resize_plane(ref->y_buffer, refs_sub[i].buffer, ref->y_stride,
ref->y_height, ref->y_width, resize_factor, 1, bd);
}
MV_MFQE mvr;
int block_w = block_size_wide[bsize];
int block_h = block_size_high[bsize];
int16_t num_cols = tmp->width / block_w;
int16_t num_rows = tmp->height / block_h;
int block_bytes = (block_h + 2 * MFQE_PADDING_SIZE) * tmp->stride;
uint16_t *block_orig = aom_memalign(32, sizeof(uint16_t) * block_bytes);
uint16_t *swap_block = block_orig + MFQE_PADDING_SIZE * tmp->stride;
for (int16_t mb_row = 0; mb_row < num_rows; ++mb_row) {
for (int16_t mb_col = 0; mb_col < num_cols; ++mb_col) {
mvr = kZeroMvMFQE;
memset(block_orig, 0, sizeof(uint16_t) * block_bytes);
full_pixel_search(&mvr, mb_row, mb_col, tmp_low, refs_low, bsize, 1);
if (!mvr.valid) continue; // Pass if mse is larger than threshold.
sub_pixel_search(&mvr, mb_row, mb_col, tmp_sub, refs_sub, bsize,
resize_factor, 1);
replace_block_alpha_highbd(*tmp, refs_sub, &mvr, mb_row, mb_col, bsize,
resize_factor, swap_block, bd);
}
}
// Free all of the dynamically allocated resources.
aom_free(block_orig);
mfqe_mem_free_highbd(&tmp_low, &tmp_sub, refs_low, refs_sub);
}
void av1_apply_loop_mfqe(Y_BUFFER_CONFIG *tmp, RefCntBuffer *ref_frames[],
BLOCK_SIZE bsize, int resize_factor, int high_bd,
int bd) {
if (high_bd)
apply_loop_mfqe_highbd(tmp, ref_frames, bsize, resize_factor, bd);
else
apply_loop_mfqe_lowbd(tmp, ref_frames, bsize, resize_factor, bd);
}
// Copy the buffer from source to destination for a single plane.
static void copy_single_plane_lowbd(const uint8_t *src_buf, uint8_t *dst_buf,
int src_stride, int dst_stride, int h,
int w) {
for (int row = 0; row < h; row++) {
memcpy(dst_buf, src_buf, w);
src_buf += src_stride;
dst_buf += dst_stride;
}
}
// Copy the buffer from source to destination for a single plane.
static void copy_single_plane_highbd(const uint8_t *src_buf, uint8_t *dst_buf,
int src_stride, int dst_stride, int h,
int w) {
uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_buf);
uint16_t *dst_ptr = CONVERT_TO_SHORTPTR(dst_buf);
for (int row = 0; row < h; row++) {
memcpy(dst_ptr, src_ptr, w * sizeof(*src_ptr));
src_ptr += src_stride;
dst_ptr += dst_stride;
}
}
// Compute the mean squared error between two frames, just for a single plane.
static double get_mse_frame(uint8_t *buf1, uint8_t *buf2, int stride1,
int stride2, int w, int h, int highbd) {
uint64_t sse;
if (highbd)
sse = aom_sse_highbd(buf1, stride1, buf2, stride2, w, h);
else
sse = aom_sse_lowbd(buf1, stride1, buf2, stride2, w, h);
double mse = ((double)sse) / (w * h);
return mse;
}
// Apply In-Loop Multi-Frame Quality Enhancement to the y plane of the current
// frame. If MFQE improves the current frame, replace the current y plane with
// the updated buffer. Returns 1 if MFQE is selected, 0 otherwise.
static void search_rest_mfqe_lowbd(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *cur, AV1_COMMON *cm,
int *use_mfqe) {
int frame_bytes = cur->y_stride * (cur->y_height + 2 * MFQE_PADDING_SIZE);
// Buffer to store temporary copy of current frame for MFQE.
uint8_t *tmpbuf_orig = aom_memalign(32, sizeof(uint8_t) * frame_bytes);
memset(tmpbuf_orig, 0, sizeof(uint8_t) * frame_bytes);
uint8_t *tmpbuf = tmpbuf_orig + cur->y_stride * MFQE_PADDING_SIZE;
copy_single_plane_lowbd(cur->y_buffer, tmpbuf, cur->y_stride, cur->y_stride,
cur->y_height, cur->y_width);
Y_BUFFER_CONFIG tmp = { .buffer = tmpbuf,
.stride = cur->y_stride,
.height = cur->y_height,
.width = cur->y_width };
RefCntBuffer *ref_frames[ALTREF_FRAME - LAST_FRAME + 1];
int num_ref_frames = 0;
MV_REFERENCE_FRAME ref_frame;
for (ref_frame = LAST_FRAME; ref_frame < ALTREF_FRAME; ++ref_frame) {
RefCntBuffer *ref = get_ref_frame_buf(cm, ref_frame);
if (ref) ref_frames[num_ref_frames++] = ref;
}
// Return if we have less than 3 available reference frames.
if (num_ref_frames < MFQE_NUM_REFS) {
aom_free(tmpbuf_orig);
return;
}
// Assert that pointers to RefCntBuffer are valid, then sort the reference
// frames based on their base_qindex, from lowest to highest.
for (int i = 0; i < num_ref_frames; i++) assert(ref_frames[i] != NULL);
qsort(ref_frames, num_ref_frames, sizeof(ref_frames[0]), cmpref);
// Perform In-Loop Multi-Frame Quality Enhancement on tmp.
av1_apply_loop_mfqe(&tmp, ref_frames, MFQE_BLOCK_SIZE, MFQE_SCALE_SIZE, 0,
cm->seq_params.bit_depth);
double mse_prev =
get_mse_frame(src->y_buffer, cur->y_buffer, src->y_stride, cur->y_stride,
src->y_width, src->y_height, 0);
double mse_curr = get_mse_frame(src->y_buffer, tmp.buffer, src->y_stride,
tmp.stride, src->y_width, src->y_height, 0);
if (mse_curr < mse_prev) {
*use_mfqe = 1;
copy_single_plane_lowbd(tmpbuf, cur->y_buffer, cur->y_stride, cur->y_stride,
cur->y_height, cur->y_width);
}
aom_free(tmpbuf_orig);
}
static void search_rest_mfqe_highbd(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *cur, AV1_COMMON *cm,
int *use_mfqe) {
int frame_bytes = cur->y_stride * (cur->y_height + 2 * MFQE_PADDING_SIZE);
// Buffer to store temporary copy of current frame for MFQE.
uint16_t *tmpbuf_orig = aom_memalign(32, sizeof(uint16_t) * frame_bytes);
memset(tmpbuf_orig, 0, sizeof(uint16_t) * frame_bytes);
uint16_t *tmpbuf = tmpbuf_orig + cur->y_stride * MFQE_PADDING_SIZE;
Y_BUFFER_CONFIG tmp = { .buffer = CONVERT_TO_BYTEPTR(tmpbuf),
.stride = cur->y_stride,
.height = cur->y_height,
.width = cur->y_width };
copy_single_plane_highbd(cur->y_buffer, tmp.buffer, cur->y_stride,
cur->y_stride, cur->y_height, cur->y_width);
RefCntBuffer *ref_frames[ALTREF_FRAME - LAST_FRAME + 1];
int num_ref_frames = 0;
MV_REFERENCE_FRAME ref_frame;
for (ref_frame = LAST_FRAME; ref_frame < ALTREF_FRAME; ++ref_frame) {
RefCntBuffer *ref = get_ref_frame_buf(cm, ref_frame);
if (ref) ref_frames[num_ref_frames++] = ref;
}
// Return if we have less than 3 available reference frames.
if (num_ref_frames < MFQE_NUM_REFS) {
aom_free(tmpbuf_orig);
return;
}
// Assert that pointers to RefCntBuffer are valid, then sort the reference
// frames based on their base_qindex, from lowest to highest.
for (int i = 0; i < num_ref_frames; i++) assert(ref_frames[i] != NULL);
qsort(ref_frames, num_ref_frames, sizeof(ref_frames[0]), cmpref);
// Perform In-Loop Multi-Frame Quality Enhancement on tmp.
av1_apply_loop_mfqe(&tmp, ref_frames, MFQE_BLOCK_SIZE, MFQE_SCALE_SIZE, 1,
cm->seq_params.bit_depth);
double mse_prev =
get_mse_frame(src->y_buffer, cur->y_buffer, src->y_stride, cur->y_stride,
src->y_width, src->y_height, 1);
double mse_curr = get_mse_frame(src->y_buffer, tmp.buffer, src->y_stride,
tmp.stride, src->y_width, src->y_height, 1);
if (mse_curr < mse_prev) {
*use_mfqe = 1;
copy_single_plane_highbd(tmp.buffer, cur->y_buffer, cur->y_stride,
cur->y_stride, cur->y_height, cur->y_width);
}
aom_free(tmpbuf_orig);
}
// Wrapper function for In-Loop Multi-Frame Quality Enhancement. There are two
// different code paths for low bit depth and high bit depth.
void av1_search_rest_mfqe(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *cur, AV1_COMMON *cm,
int *use_mfqe, int high_bd) {
if (high_bd)
search_rest_mfqe_highbd(src, cur, cm, use_mfqe);
else
search_rest_mfqe_lowbd(src, cur, cm, use_mfqe);
}
// MFQE decoding function in low bitdepth version.
void decode_restore_mfqe_lowbd(AV1_COMMON *cm, BLOCK_SIZE bsize,
int resize_factor) {
YV12_BUFFER_CONFIG *cur = &cm->cur_frame->buf;
int frame_bytes = cur->y_stride * (cur->y_height + 2 * MFQE_PADDING_SIZE);
// Buffer to store temporary copy of current frame for MFQE.
uint8_t *tmpbuf_orig = aom_memalign(32, sizeof(uint8_t) * frame_bytes);
memset(tmpbuf_orig, 0, sizeof(uint8_t) * frame_bytes);
uint8_t *tmpbuf = tmpbuf_orig + cur->y_stride * MFQE_PADDING_SIZE;
copy_single_plane_lowbd(cur->y_buffer, tmpbuf, cur->y_stride, cur->y_stride,
cur->y_height, cur->y_width);
Y_BUFFER_CONFIG cur_frame = { .buffer = tmpbuf,
.stride = cur->y_stride,
.height = cur->y_height,
.width = cur->y_width };
RefCntBuffer *ref_frames[ALTREF_FRAME - LAST_FRAME + 1];
int num_ref_frames = 0;
MV_REFERENCE_FRAME ref_frame;
for (ref_frame = LAST_FRAME; ref_frame < ALTREF_FRAME; ++ref_frame) {
RefCntBuffer *ref = get_ref_frame_buf(cm, ref_frame);
if (ref) ref_frames[num_ref_frames++] = ref;
}
assert(num_ref_frames >= MFQE_NUM_REFS);
// Assert that pointers to RefCntBuffer are valid, then sort the reference
// frames based on their base_qindex, from lowest to highest.
for (int i = 0; i < num_ref_frames; i++) assert(ref_frames[i] != NULL);
qsort(ref_frames, num_ref_frames, sizeof(ref_frames[0]), cmpref);
// Perform In-Loop Multi-Frame Quality Enhancement on tmp.
av1_apply_loop_mfqe(&cur_frame, ref_frames, bsize, resize_factor, 0,
cm->seq_params.bit_depth);
copy_single_plane_lowbd(tmpbuf, cur->y_buffer, cur->y_stride, cur->y_stride,
cur->y_height, cur->y_width);
aom_free(tmpbuf_orig);
}
// MFQE decoding function in high bitdepth version.
void decode_restore_mfqe_highbd(AV1_COMMON *cm, BLOCK_SIZE bsize,
int resize_factor) {
YV12_BUFFER_CONFIG *cur = &cm->cur_frame->buf;
int frame_bytes = cur->y_stride * (cur->y_height + 2 * MFQE_PADDING_SIZE);
// Buffer to store temporary copy of current frame for MFQE.
uint16_t *tmpbuf_orig = aom_memalign(32, sizeof(uint16_t) * frame_bytes);
memset(tmpbuf_orig, 0, sizeof(uint16_t) * frame_bytes);
uint16_t *tmpbuf = tmpbuf_orig + cur->y_stride * MFQE_PADDING_SIZE;
Y_BUFFER_CONFIG cur_frame = { .buffer = CONVERT_TO_BYTEPTR(tmpbuf),
.stride = cur->y_stride,
.height = cur->y_height,
.width = cur->y_width };
copy_single_plane_highbd(cur->y_buffer, cur_frame.buffer, cur->y_stride,
cur->y_stride, cur->y_height, cur->y_width);
RefCntBuffer *ref_frames[ALTREF_FRAME - LAST_FRAME + 1];
int num_ref_frames = 0;
MV_REFERENCE_FRAME ref_frame;
for (ref_frame = LAST_FRAME; ref_frame < ALTREF_FRAME; ++ref_frame) {
RefCntBuffer *ref = get_ref_frame_buf(cm, ref_frame);
if (ref) ref_frames[num_ref_frames++] = ref;
}
assert(num_ref_frames >= MFQE_NUM_REFS);
// Assert that pointers to RefCntBuffer are valid, then sort the reference
// frames based on their base_qindex, from lowest to highest.
for (int i = 0; i < num_ref_frames; i++) assert(ref_frames[i] != NULL);
qsort(ref_frames, num_ref_frames, sizeof(ref_frames[0]), cmpref);
// Perform In-Loop Multi-Frame Quality Enhancement on tmp.
av1_apply_loop_mfqe(&cur_frame, ref_frames, bsize, resize_factor, 0,
cm->seq_params.bit_depth);
copy_single_plane_highbd(cur_frame.buffer, cur->y_buffer, cur->y_stride,
cur->y_stride, cur->y_height, cur->y_width);
aom_free(tmpbuf_orig);
}
void av1_decode_restore_mfqe(AV1_COMMON *cm, int high_bd) {
if (high_bd)
decode_restore_mfqe_highbd(cm, MFQE_BLOCK_SIZE, MFQE_SCALE_SIZE);
else
decode_restore_mfqe_lowbd(cm, MFQE_BLOCK_SIZE, MFQE_SCALE_SIZE);
}