blob: 60e5b9908c84952f3ba32a2d1fab2a6db8848199 [file] [log] [blame]
/*
* Copyright (c) 2017, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "av1/common/optical_flow_ref.h"
#include <float.h>
#include <math.h>
#include <time.h>
#include "./aom_config.h"
#include "./aom_scale_rtcd.h"
#include "aom_mem/aom_mem.h"
#include "aom_scale/aom_scale.h"
#include "av1/common/alloccommon.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/sparse_linear_solver.h"
#if CONFIG_OPFL
// global timer for debug purpose
double timeinit, timesub, timesolve, timeder, timemed, timetotal;
clock_t startt, endt;
static int optical_flow_warp_filter[16][8] = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 },
{ 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 },
{ 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
{ 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 },
{ 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 },
{ 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 },
{ 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
{ 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 }
};
/*
* Interpolate the whole opfl reference frame.
* Should only be called by the encoder.
*
* Input:
* cm: the av1_common pointer.
* cm->opfl_ref_frame should already been initialized
*
* Output:
* 1: successfully interpolated
* 0: reference(s) not available
*/
int av1_get_opfl_ref(AV1_COMMON *cm) {
OPFL_BUFFER_STRUCT *buf_struct = cm->opfl_buf_struct_ptr;
OPFL_BLK_INFO blk_info;
av1_opfl_set_buf(cm, buf_struct);
if (buf_struct->initialized == 1) {
int width = buf_struct->ref0_buf[0]->y_width;
int height = buf_struct->ref0_buf[0]->y_height;
#if FRAME_LEVEL_OPFL
int blkwidth = width;
int blkheight = height;
#else
int blkwidth = OPFL_BLOCK_SIZE;
int blkheight = OPFL_BLOCK_SIZE;
#endif // FRAME_LEVEL_OPFL
double numbh = (double)height / (double)blkheight;
double numbw = (double)width / (double)blkwidth;
// for every block in the frame
for (int i = 0; i < numbh; i++) {
for (int j = 0; j < numbw; j++) {
if (i * blkheight >= height) continue;
if (j * blkwidth >= width) continue;
blk_info.starth = i * blkheight;
blk_info.startw = j * blkwidth;
if (blk_info.starth + blkheight > height) {
blk_info.blk_height = height - blk_info.starth;
} else {
blk_info.blk_height = blkheight;
}
if (blk_info.startw + blkwidth > width) {
blk_info.blk_width = width - blk_info.startw;
} else {
blk_info.blk_width = blkwidth;
}
blk_info.upbound = 0;
blk_info.lowerbound = 0;
blk_info.leftbound = 0;
blk_info.rightbound = 0;
if (blk_info.starth == 0) {
blk_info.upbound = 1;
}
if (blk_info.starth + blk_info.blk_height >= height) {
blk_info.lowerbound = 1;
}
if (blk_info.startw == 0) {
blk_info.leftbound = 1;
}
if (blk_info.startw + blk_info.blk_width >= width) {
blk_info.rightbound = 1;
}
av1_optical_flow_get_ref(buf_struct, blk_info);
}
}
// av1_opfl_free_buf(buf_struct);
return 1;
} else {
return 0;
}
}
/*
* Initialize and set the buffer for optical flow estimation
*/
void av1_opfl_set_buf(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct) {
startt = clock();
clock_t starti = clock();
int cur_offset = cm->frame_offset;
double dst_pos = -1;
// find the two nearest bi-directional refs
// these are the refs between which we do optical flow
int left_idx = -1, left_offset = -1, right_idx = -1, right_offset = -1;
int left_chosen = NONE_FRAME, right_chosen = NONE_FRAME;
opfl_get_closest_refs(cm, &left_idx, &left_offset, &left_chosen, &right_idx,
&right_offset, &right_chosen);
// check all other available bi-direcional ref pairs
// sorted by distance between refs
int left_idxs[MAX_NUM_REF_PAIR], left_offsets[MAX_NUM_REF_PAIR],
left_chosens[MAX_NUM_REF_PAIR], right_idxs[MAX_NUM_REF_PAIR],
right_offsets[MAX_NUM_REF_PAIR], right_chosens[MAX_NUM_REF_PAIR];
opfl_select_best_ref_pairs(cm, left_idxs, left_offsets, left_chosens,
right_idxs, right_offsets, right_chosens, left_idx,
right_idx);
// if no available refs on both sides, don't do optical flow
// TODO(bohanli): this should only happen for key frame and altref (?)
// If we disable it manually, mismatch happens (why?)
if (left_idx < 0 || right_idx < 0) {
buf_struct->initialized = 0;
return;
}
// set buffer ptrs to co-located ref, left ref and right ref
buf_struct->dst_buf = cm->opfl_ref_frame;
YV12_BUFFER_CONFIG *left = &(cm->buffer_pool->frame_bufs[left_idx].buf);
YV12_BUFFER_CONFIG *right = &(cm->buffer_pool->frame_bufs[right_idx].buf);
buf_struct->ref0_buf[0] = left;
buf_struct->ref1_buf[0] = right;
// set params for opfl
buf_struct->opfl_refs[0] = left_chosen;
buf_struct->opfl_refs[1] = right_chosen;
dst_pos = ((double)(cur_offset - left_offset)) /
((double)(right_offset - left_offset));
buf_struct->dst_pos = dst_pos;
buf_struct->left_offset = left_offset;
buf_struct->cur_offset = cur_offset;
buf_struct->right_offset = right_offset;
int width = buf_struct->ref0_buf[0]->y_width,
height = buf_struct->ref0_buf[0]->y_height;
int wid, hgt;
// allocate buffers
av1_opfl_alloc_buf(cm, buf_struct);
// calculate the derivatives
OPFL_BLK_INFO blk_info;
blk_info.starth = 0;
blk_info.startw = 0;
blk_info.blk_height = height;
blk_info.blk_width = width;
blk_info.upbound = 1;
blk_info.lowerbound = 1;
blk_info.leftbound = 1;
blk_info.rightbound = 1;
for (int l = 0; l < MAX_OPFL_LEVEL; l++) {
#if USE_BLK_DERIVATIVE
opfl_get_derivatives(buf_struct->ori_Ex0_buf[l], buf_struct->ori_Ey0_buf[l],
buf_struct->ori_Et0_buf[l], buf_struct->ref0_buf[0],
buf_struct->ref0_buf[0], buf_struct->ref0_buf[0],
buf_struct->ref0_buf[0], dst_pos, l, 0, blk_info);
opfl_get_derivatives(buf_struct->ori_Ex1_buf[l], buf_struct->ori_Ey1_buf[l],
buf_struct->ori_Et1_buf[l], buf_struct->ref1_buf[0],
buf_struct->ref1_buf[0], buf_struct->ref1_buf[0],
buf_struct->ref1_buf[0], dst_pos, l, 0, blk_info);
#else
opfl_get_derivatives(buf_struct->ori_Ex0_buf[l], buf_struct->ori_Ey0_buf[l],
buf_struct->ori_Et0_buf[l], buf_struct->ref0_buf[l],
buf_struct->ref0_buf[l], buf_struct->ref0_buf[l],
buf_struct->ref0_buf[l], dst_pos, l, 1, blk_info);
opfl_get_derivatives(buf_struct->ori_Ex1_buf[l], buf_struct->ori_Ey1_buf[l],
buf_struct->ori_Et1_buf[l], buf_struct->ref1_buf[l],
buf_struct->ref1_buf[l], buf_struct->ref1_buf[l],
buf_struct->ref1_buf[l], dst_pos, l, 1, blk_info);
#endif
}
// set up initial motion filed
int_mv *left_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv));
int_mv *right_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv));
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
left_mv[i * cm->mi_cols + j].as_int = INVALID_MV;
right_mv[i * cm->mi_cols + j].as_int = INVALID_MV;
}
}
#if OPFL_EXP_INIT
// temp mv buffers
int_mv *left_most_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv));
int_mv *right_most_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv));
int_mv *left_temp_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv));
int_mv *right_temp_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv));
int *is_first_valid = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int));
// get the motion field between the two selected refs
opfl_set_init_motion(cm, buf_struct, left_idx, right_idx, left_offset,
right_offset, left_mv, right_mv);
#if OPFL_DERIVE_INIT_MV
opfl_derive_init_mv(cm, buf_struct, left_idx, right_idx, left_offset,
right_offset, left_mv, right_mv);
#endif
#if OPFL_CHECK_INIT_MV
// copy the mvs from the nearest refs to a temp buffer
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
left_most_mv[i * cm->mi_cols + j].as_int =
left_mv[i * cm->mi_cols + j].as_int;
right_most_mv[i * cm->mi_cols + j].as_int =
right_mv[i * cm->mi_cols + j].as_int;
if (left_mv[i * cm->mi_cols + j].as_int != INVALID_MV &&
right_mv[i * cm->mi_cols + j].as_int != INVALID_MV) {
is_first_valid[i * cm->mi_cols + j] = 1;
} else {
is_first_valid[i * cm->mi_cols + j] = 0;
}
}
}
// fill the holes of the mvs for comparison later
opfl_fill_mv(left_most_mv, cm->mi_cols, cm->mi_rows);
opfl_fill_mv(right_most_mv, cm->mi_cols, cm->mi_rows);
// now for the other candidate pairs, find there associated motion vectors
for (int k = 0; k < 3; k++) {
if (left_idxs[k] < 0 || right_idxs[k] < 0) break;
opfl_set_init_motion(cm, buf_struct, left_idxs[k], right_idxs[k],
left_offsets[k], right_offsets[k], left_temp_mv,
right_temp_mv);
// for each block, see if the new temp mv is better
// than what we already have
opfl_update_init_motion(cm, buf_struct, left_most_mv, right_most_mv,
left_offset, right_offset, left_temp_mv,
right_temp_mv, left_offsets[k], right_offsets[k],
is_first_valid, left_mv, right_mv);
}
#endif
aom_free(left_most_mv);
aom_free(right_most_mv);
aom_free(left_temp_mv);
aom_free(right_temp_mv);
aom_free(is_first_valid);
#else
// use all available initialization of motion field
TPL_MV_REF *tpl_mvs_base = cm->cur_frame->tpl_mvs;
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
for (int k = 0; k < MFMV_STACK_SIZE; k++) {
if (tpl_mvs_base[i * cm->mi_stride + j]
.mfmv[left_chosen - LAST_FRAME][k]
.as_int != INVALID_MV) {
left_mv[i * cm->mi_cols + j].as_int =
tpl_mvs_base[i * cm->mi_stride + j]
.mfmv[left_chosen - LAST_FRAME][k]
.as_int;
break;
}
}
for (int k = 0; k < MFMV_STACK_SIZE; k++) {
if (tpl_mvs_base[i * cm->mi_stride + j]
.mfmv[right_chosen - LAST_FRAME][k]
.as_int != INVALID_MV) {
right_mv[i * cm->mi_cols + j].as_int =
tpl_mvs_base[i * cm->mi_stride + j]
.mfmv[right_chosen - LAST_FRAME][k]
.as_int;
break;
}
}
}
}
#endif
// copy the initialized motions to each level
for (int l = 0; l < MAX_OPFL_LEVEL; l++) {
wid = width >> l;
hgt = height >> l;
create_motion_field(left_mv, right_mv, buf_struct->init_mv_buf[l],
#if OPFL_INIT_WT
buf_struct->init_mv_wts[l],
#endif
width, height, wid, hgt, wid + 2 * AVG_MF_BORDER,
dst_pos);
// fill in possible "holes" in the initialization
fill_create_motion_field(left_mv, right_mv, buf_struct->init_mv_buf[l],
width, height, wid, hgt, wid + 2 * AVG_MF_BORDER);
#if OPFL_INIT_MOTION_SEARCH
opfl_init_motion_search(
left_mv, right_mv, buf_struct->init_mv_buf[l], width, height, wid, hgt,
wid + 2 * AVG_MF_BORDER, dst_pos, &blk_info,
buf_struct->ref0_buf[0]->y_buffer, buf_struct->ref1_buf[0]->y_buffer,
buf_struct->ref0_buf[0]->y_stride);
#endif
#if DUMP_OPFL
if (l == 0) {
int mvstr = wid + 2 * AVG_MF_BORDER;
DB_MV *mv_start =
buf_struct->init_mv_buf[l] + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
warp_optical_flow_fwd(buf_struct->ref0_buf[l], buf_struct->ref1_buf[l],
mv_start, mvstr, buf_struct->ref0_warped_buf[l],
dst_pos, l, !USE_BLK_DERIVATIVE, blk_info);
warp_optical_flow_back(buf_struct->ref1_buf[l], buf_struct->ref0_buf[l],
mv_start, mvstr, buf_struct->ref1_warped_buf[l],
1 - dst_pos, l, !USE_BLK_DERIVATIVE, blk_info);
// int srcstr = buf_struct->ref0_warped_buf[0]->y_stride;
// for (int i = 0; i < height; i++) {
// for (int j = 0; j < width; j++) {
// if (left_mv[i/4*wid/4+j/4].as_int == INVALID_MV) {
// buf_struct->ref0_warped_buf[0]->y_buffer[i*srcstr+j] = 0;
// buf_struct->ref1_warped_buf[0]->y_buffer[i*srcstr+j] = 0;
// }
// }
// }
write_image_opfl(buf_struct->ref0_buf[l], "init_dump.yuv");
write_image_opfl(buf_struct->ref0_warped_buf[l], "init_dump.yuv");
write_image_opfl(buf_struct->ref1_warped_buf[l], "init_dump.yuv");
write_image_opfl(buf_struct->ref1_buf[l], "init_dump.yuv");
}
#endif
}
aom_free(left_mv);
aom_free(right_mv);
// warped references according to initialization for future use
blk_info.starth = 0;
blk_info.startw = 0;
for (int l = 0; l < MAX_OPFL_LEVEL; l++) {
if (USE_BLK_DERIVATIVE && l != 0) {
continue;
}
wid = width >> l;
hgt = height >> l;
blk_info.blk_height = hgt;
blk_info.blk_width = wid;
int mvstr = wid + 2 * AVG_MF_BORDER;
DB_MV *mv_start =
buf_struct->init_mv_buf[l] + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
if (l == 0 || USE_BLK_DERIVATIVE) {
warp_optical_flow_fwd(buf_struct->ref0_buf[l], buf_struct->ref1_buf[l],
mv_start, mvstr, buf_struct->ref0_warped_buf[l],
dst_pos, l, !USE_BLK_DERIVATIVE, blk_info);
} else {
warp_optical_flow_fwd_bilinear(buf_struct->ref0_buf[l],
buf_struct->ref1_buf[l], mv_start, mvstr,
buf_struct->ref0_warped_buf[l], dst_pos, l,
!USE_BLK_DERIVATIVE, blk_info);
}
if (l == 0 || USE_BLK_DERIVATIVE) {
warp_optical_flow_back(buf_struct->ref1_buf[l], buf_struct->ref0_buf[l],
mv_start, mvstr, buf_struct->ref1_warped_buf[l],
1 - dst_pos, l, !USE_BLK_DERIVATIVE, blk_info);
} else {
warp_optical_flow_back_bilinear(
buf_struct->ref1_buf[l], buf_struct->ref0_buf[l], mv_start, mvstr,
buf_struct->ref1_warped_buf[l], 1 - dst_pos, l, !USE_BLK_DERIVATIVE,
blk_info);
}
aom_yv12_extend_frame_borders_c(buf_struct->ref0_warped_buf[l]);
aom_yv12_extend_frame_borders_c(buf_struct->ref1_warped_buf[l]);
}
buf_struct->initialized = 1;
clock_t endi = clock();
timeinit += (double)(endi - starti) / CLOCKS_PER_SEC;
}
/*
* Use optical flow method to interpolate a reference frame.
*
* Input:
* buf_struct: containing necessary buffers
* blk_info: information on the current block (size, location, etc.)
*/
void av1_optical_flow_get_ref(OPFL_BUFFER_STRUCT *buf_struct,
OPFL_BLK_INFO blk_info) {
int width, height, start_h, start_w, wid, hgt, sh, sw;
width = blk_info.blk_width;
height = blk_info.blk_height;
start_h = blk_info.starth;
start_w = blk_info.startw;
// temporary buffers for MF median filtering
double mv_r[25], mv_c[25], left[25], right[25];
// initialize buffers
DB_MV **mf_last = buf_struct->mf_last;
DB_MV **mf_new = buf_struct->mf_new;
DB_MV **mf_med = buf_struct->mf_med;
int l = MAX_OPFL_LEVEL - 1;
wid = width >> l;
hgt = height >> l;
// the mv stride for init mv is based on the frame buffer width
int imvstr = buf_struct->ref0_buf[0]->y_width;
imvstr = (imvstr >> l) + 2 * AVG_MF_BORDER;
sh = start_h >> l;
sw = start_w >> l;
// mv stride here is based on block width at each level
int str = wid + 2 * AVG_MF_BORDER;
// initialize with init mvs
for (int i = 0; i < hgt; i++) {
for (int j = 0; j < wid; j++) {
mf_last[l][(i + AVG_MF_BORDER) * str + j + AVG_MF_BORDER] =
buf_struct->init_mv_buf[l][(i + sh + AVG_MF_BORDER) * imvstr + j +
sw + AVG_MF_BORDER];
}
}
// estimate optical flow at each level
for (l = MAX_OPFL_LEVEL - 1; l >= 0; l--) {
wid = width >> l;
hgt = height >> l;
sh = start_h >> l;
sw = start_w >> l;
int mvstr = wid + 2 * AVG_MF_BORDER;
imvstr = buf_struct->ref0_buf[0]->y_width;
imvstr = (imvstr >> l) + 2 * AVG_MF_BORDER;
// use optical flow to refine our motion field
#if USE_BLK_DERIVATIVE
refine_motion_field(buf_struct, mf_last[l], mf_new[l], l,
buf_struct->dst_pos, 0, blk_info);
#else
refine_motion_field(buf_struct, mf_last[l], mf_new[l], l,
buf_struct->dst_pos, 1, blk_info);
#endif
DB_MV *mf_start_new = mf_new[l] + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
DB_MV *mf_start_med = mf_med[l] + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
clock_t startm, endm;
startm = clock();
DB_MV *initmv = buf_struct->init_mv_buf[l] + (sh + AVG_MF_BORDER) * imvstr +
sw + AVG_MF_BORDER;
for (int i = 0; i < hgt; i++) {
for (int j = 0; j < wid; j++) {
if (USE_MEDIAN_FILTER) {
int c = 0;
for (int h = -2; h < 3; h++) {
for (int w = -2; w < 3; w++) {
if (i + h < 0 || i + h >= hgt || j + w < 0 || j + w >= wid) {
// mv_r[c] = initmv[(i + h) * imvstr + j + w].row;
// mv_c[c] = initmv[(i + h) * imvstr + j + w].col;
continue;
} else {
mv_r[c] = mf_start_new[(i + h) * mvstr + j + w].row;
mv_c[c] = mf_start_new[(i + h) * mvstr + j + w].col;
}
c++;
}
}
mf_start_med[i * mvstr + j].row =
iter_median_double(mv_r, left, right, c, c / 2);
mf_start_med[i * mvstr + j].col =
iter_median_double(mv_c, left, right, c, c / 2);
} else {
mf_start_med[i * mvstr + j].row = mf_start_new[i * mvstr + j].row;
mf_start_med[i * mvstr + j].col = mf_start_new[i * mvstr + j].col;
}
}
}
endm = clock();
timemed += (double)(endm - startm) / CLOCKS_PER_SEC;
if (l != 0) {
// upscale mv to the next lower level
int mvstr_next = wid * 2 + 2 * AVG_MF_BORDER;
DB_MV *mf_start_next =
mf_last[l - 1] + AVG_MF_BORDER * mvstr_next + AVG_MF_BORDER;
upscale_mv_by_2(mf_start_med, wid, hgt, mvstr, mf_start_next, mvstr_next);
} else {
pad_motion_field_border(mf_start_med, wid, hgt, mvstr);
}
}
// interpolate to get our reference frame
clock_t start, end;
start = clock();
interp_optical_flow(buf_struct->ref0_buf[0], buf_struct->ref1_buf[0],
mf_med[0], buf_struct->dst_buf, buf_struct->dst_pos,
blk_info);
end = clock();
timesub += (double)(end - start) / CLOCKS_PER_SEC;
// pad border if at border of frame
int fstr = buf_struct->dst_buf->y_stride;
int fstruv = buf_struct->dst_buf->uv_stride;
int border = buf_struct->dst_buf->border;
int fheight = buf_struct->dst_buf->y_crop_height;
int fwidth = buf_struct->dst_buf->y_crop_width;
uint8_t *ydst = buf_struct->dst_buf->y_buffer;
uint8_t *udst = buf_struct->dst_buf->u_buffer;
uint8_t *vdst = buf_struct->dst_buf->v_buffer;
int topb = 0, bottomb = 0, leftb = 0, rightb = 0;
if (blk_info.upbound) {
topb = border;
}
if (blk_info.lowerbound) {
bottomb = border + buf_struct->dst_buf->y_height - fheight;
}
if (blk_info.leftbound) {
leftb = border;
}
if (blk_info.rightbound) {
rightb = border + buf_struct->dst_buf->y_width - fwidth;
}
extend_plane_opfl(ydst, fstr, fwidth, fheight, topb, leftb, bottomb, rightb);
extend_plane_opfl(udst, fstruv, fwidth / 2, fheight / 2, topb / 2, leftb / 2,
bottomb / 2, rightb / 2);
extend_plane_opfl(vdst, fstruv, fwidth / 2, fheight / 2, topb / 2, leftb / 2,
bottomb / 2, rightb / 2);
// copy the final motion field to the frame level motion field buffer
int mvstr = width + 2 * AVG_MF_BORDER;
DB_MV *mf_start_med = mf_med[0] + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
int f_mvstr = buf_struct->mf_frame_stride;
DB_MV *mf_frame_start = buf_struct->mf_frame_start;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
mf_frame_start[(i + start_h) * f_mvstr + j + start_w] =
mf_start_med[i * mvstr + j];
}
}
// if at frame boundary, extend the mf buffer
opfl_extend_frame_mf(buf_struct, blk_info);
return;
}
/*
* Allocate buffers in opfl
*/
void av1_opfl_alloc_buf(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct) {
// allocate yuv buffers for opfl use
int width = buf_struct->ref0_buf[0]->y_width,
height = buf_struct->ref0_buf[0]->y_height;
int wid, hgt;
for (int l = 0; l < MAX_OPFL_LEVEL; l++) {
wid = width >> l;
hgt = height >> l;
if (l == 0) {
buf_struct->ref0_warped_buf[l] =
aom_calloc(1, sizeof(YV12_BUFFER_CONFIG));
buf_struct->ref1_warped_buf[l] =
aom_calloc(1, sizeof(YV12_BUFFER_CONFIG));
aom_alloc_frame_buffer(buf_struct->ref0_warped_buf[l], wid, hgt, 1, 1, 0,
AOM_BORDER_IN_PIXELS, 0);
aom_alloc_frame_buffer(buf_struct->ref1_warped_buf[l], wid, hgt, 1, 1, 0,
AOM_BORDER_IN_PIXELS, 0);
}
#if !USE_BLK_DERIVATIVE
if (l != 0) {
buf_struct->ref0_buf[l] = aom_calloc(1, sizeof(YV12_BUFFER_CONFIG));
buf_struct->ref1_buf[l] = aom_calloc(1, sizeof(YV12_BUFFER_CONFIG));
aom_alloc_frame_buffer(buf_struct->ref0_buf[l], wid, hgt, 1, 1, 0,
AOM_BORDER_IN_PIXELS, 1);
aom_alloc_frame_buffer(buf_struct->ref1_buf[l], wid, hgt, 1, 1, 0,
AOM_BORDER_IN_PIXELS, 1);
buf_struct->ref0_warped_buf[l] =
aom_calloc(1, sizeof(YV12_BUFFER_CONFIG));
buf_struct->ref1_warped_buf[l] =
aom_calloc(1, sizeof(YV12_BUFFER_CONFIG));
aom_alloc_frame_buffer(buf_struct->ref0_warped_buf[l], wid, hgt, 1, 1, 0,
AOM_BORDER_IN_PIXELS, 1);
aom_alloc_frame_buffer(buf_struct->ref1_warped_buf[l], wid, hgt, 1, 1, 0,
AOM_BORDER_IN_PIXELS, 1);
}
#endif
}
#if !USE_BLK_DERIVATIVE
// scale the buffers for pyramid structure
// TODO(bohan): find out the necessary space for temp_buffer
uint8_t *temp_buffer = aom_calloc(width * 8, sizeof(uint8_t));
for (int l = 1; l < MAX_OPFL_LEVEL; l++) {
aom_scale_frame(buf_struct->ref0_buf[l - 1], buf_struct->ref0_buf[l],
temp_buffer, 8, 2, 1, 2, 1, 0);
aom_scale_frame(buf_struct->ref1_buf[l - 1], buf_struct->ref1_buf[l],
temp_buffer, 8, 2, 1, 2, 1, 0);
aom_yv12_extend_frame_borders_c(buf_struct->ref0_buf[l]);
aom_yv12_extend_frame_borders_c(buf_struct->ref1_buf[l]);
}
aom_free(temp_buffer);
#endif
// allocate initial motion field buffers
for (int l = 0; l < MAX_OPFL_LEVEL; l++) {
wid = width >> l;
hgt = height >> l;
buf_struct->init_mv_buf[l] = aom_calloc(
(wid + 2 * AVG_MF_BORDER) * (hgt + 2 * AVG_MF_BORDER), sizeof(DB_MV));
#if OPFL_INIT_WT
buf_struct->init_mv_wts[l] = aom_calloc(
(wid + 2 * AVG_MF_BORDER) * (hgt + 2 * AVG_MF_BORDER), sizeof(double));
#endif
}
// allocate frame level motion buffer
buf_struct->mf_frame = aom_calloc(
(width + 2 * OPFL_MF_FRAME_BORDER) * (height + 2 * OPFL_MF_FRAME_BORDER),
sizeof(DB_MV));
buf_struct->mf_frame_stride = width + 2 * OPFL_MF_FRAME_BORDER;
buf_struct->mf_frame_start =
buf_struct->mf_frame +
OPFL_MF_FRAME_BORDER * buf_struct->mf_frame_stride + OPFL_MF_FRAME_BORDER;
// allocate motion field buffer for each pyramid level
int blkwid, blkhgt;
#if FRAME_LEVEL_OPFL
blkwid = width;
blkhgt = height;
#else
blkwid = OPFL_BLOCK_SIZE;
blkhgt = OPFL_BLOCK_SIZE;
#endif
for (int l = 0; l < MAX_OPFL_LEVEL; l++) {
wid = blkwid >> l;
hgt = blkhgt >> l;
buf_struct->mf_last[l] = aom_calloc(
(wid + 2 * AVG_MF_BORDER) * (hgt + 2 * AVG_MF_BORDER), sizeof(DB_MV));
buf_struct->mf_new[l] = aom_calloc(
(wid + 2 * AVG_MF_BORDER) * (hgt + 2 * AVG_MF_BORDER), sizeof(DB_MV));
buf_struct->mf_med[l] = aom_calloc(
(wid + 2 * AVG_MF_BORDER) * (hgt + 2 * AVG_MF_BORDER), sizeof(DB_MV));
// allocate buffers
if (l != 0 && USE_BLK_DERIVATIVE) continue;
buf_struct->buffer0[l] = aom_calloc(1, sizeof(YV12_BUFFER_CONFIG));
buf_struct->buffer1[l] = aom_calloc(1, sizeof(YV12_BUFFER_CONFIG));
aom_alloc_frame_buffer(buf_struct->buffer0[l], wid, hgt, 1, 1, 0,
AOM_BORDER_IN_PIXELS, 0);
aom_alloc_frame_buffer(buf_struct->buffer1[l], wid, hgt, 1, 1, 0,
AOM_BORDER_IN_PIXELS, 0);
}
// allocate done flag buffer for each blockc
int wblk, hblk;
wblk = (width + blkwid - 1) / blkwid;
hblk = (height + blkhgt - 1) / blkhgt;
buf_struct->done_flag = aom_calloc(wblk * hblk, sizeof(int));
// allocate derivative buffers
// temp derivative buffer for each iter
buf_struct->Ex = aom_calloc(width * height, sizeof(double));
buf_struct->Ey = aom_calloc(width * height, sizeof(double));
buf_struct->Et = aom_calloc(width * height, sizeof(double));
// initial derivative in the refs
for (int l = 0; l < MAX_OPFL_LEVEL; l++) {
wid = width >> l;
hgt = height >> l;
buf_struct->ori_Ex0_buf[l] = aom_calloc(wid * hgt, sizeof(double));
buf_struct->ori_Ey0_buf[l] = aom_calloc(wid * hgt, sizeof(double));
buf_struct->ori_Et0_buf[l] = aom_calloc(wid * hgt, sizeof(double));
buf_struct->ori_Ex1_buf[l] = aom_calloc(wid * hgt, sizeof(double));
buf_struct->ori_Ey1_buf[l] = aom_calloc(wid * hgt, sizeof(double));
buf_struct->ori_Et1_buf[l] = aom_calloc(wid * hgt, sizeof(double));
}
}
/*
* Free the allocated buffers in buf_struct.
*/
void av1_opfl_free_buf(OPFL_BUFFER_STRUCT *buf_struct) {
if (buf_struct->initialized != 1) return;
for (int l = 0; l < MAX_OPFL_LEVEL; l++) {
if (l == 0) {
aom_free_frame_buffer(buf_struct->ref0_warped_buf[l]);
aom_free_frame_buffer(buf_struct->ref1_warped_buf[l]);
aom_free(buf_struct->ref0_warped_buf[l]);
aom_free(buf_struct->ref1_warped_buf[l]);
}
#if !USE_BLK_DERIVATIVE
if (l != 0) {
aom_free_frame_buffer(buf_struct->ref0_buf[l]);
aom_free_frame_buffer(buf_struct->ref1_buf[l]);
aom_free(buf_struct->ref0_buf[l]);
aom_free(buf_struct->ref1_buf[l]);
aom_free_frame_buffer(buf_struct->ref0_warped_buf[l]);
aom_free_frame_buffer(buf_struct->ref1_warped_buf[l]);
aom_free(buf_struct->ref0_warped_buf[l]);
aom_free(buf_struct->ref1_warped_buf[l]);
}
#endif
}
for (int l = 0; l < MAX_OPFL_LEVEL; l++) {
aom_free(buf_struct->init_mv_buf[l]);
#if OPFL_INIT_WT
aom_free(buf_struct->init_mv_wts[l]);
#endif
}
aom_free(buf_struct->mf_frame);
buf_struct->mf_frame_start = NULL;
aom_free(buf_struct->Ex);
aom_free(buf_struct->Ey);
aom_free(buf_struct->Et);
for (int l = 0; l < MAX_OPFL_LEVEL; l++) {
aom_free(buf_struct->mf_last[l]);
aom_free(buf_struct->mf_new[l]);
aom_free(buf_struct->mf_med[l]);
aom_free(buf_struct->ori_Ex0_buf[l]);
aom_free(buf_struct->ori_Ey0_buf[l]);
aom_free(buf_struct->ori_Et0_buf[l]);
aom_free(buf_struct->ori_Ex1_buf[l]);
aom_free(buf_struct->ori_Ey1_buf[l]);
aom_free(buf_struct->ori_Et1_buf[l]);
if (l != 0 && USE_BLK_DERIVATIVE) continue;
aom_free_frame_buffer(buf_struct->buffer0[l]);
aom_free_frame_buffer(buf_struct->buffer1[l]);
aom_free(buf_struct->buffer0[l]);
aom_free(buf_struct->buffer1[l]);
}
aom_free(buf_struct->done_flag);
endt = clock();
timetotal += (double)(endt - startt) / CLOCKS_PER_SEC;
#if OPFL_OUTPUT_TIME
// TODO(bohan): output time usage for debug for now.
printf(
"\ninit time: %.4f, der time: %.4f, sub time: %.4f, median time: %.4f, "
"solve time: %.4f, totaltime: %.4f\n",
timeinit, timeder, timesub, timemed, timesolve, timetotal);
fflush(stdout);
#endif
#if DUMP_OPFL
// TODO(bohan): dump the frames for now for debug
char filename[20] = "of_dump.yuv";
write_image_opfl(buf_struct->ref0_buf[0], filename);
write_image_opfl(buf_struct->dst_buf, filename);
write_image_opfl(buf_struct->ref1_buf[0], filename);
char idxfilename[20] = "of_data.txt";
FILE *f_idx = fopen(idxfilename, "a");
fprintf(f_idx, "%d %d %d\n", buf_struct->left_offset, buf_struct->cur_offset,
buf_struct->right_offset);
fclose(f_idx);
#endif
}
/*
* get how many motion vector initializations exist between two refs
*/
int get_num_MV_between_refs(AV1_COMMON *cm, int left_idx, int left_offset,
int right_idx, int right_offset) {
int totalNum = 0;
// process left ref
MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[left_idx].mvs;
int lst_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst_frame_offset;
int alt_frame_idx = cm->buffer_pool->frame_bufs[left_idx].alt_frame_offset;
int gld_frame_idx = cm->buffer_pool->frame_bufs[left_idx].gld_frame_offset;
#if CONFIG_EXT_REFS
int lst2_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst2_frame_offset;
int lst3_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst3_frame_offset;
int bwd_frame_idx = cm->buffer_pool->frame_bufs[left_idx].bwd_frame_offset;
#endif
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j];
MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
mv_ref->ref_frame[1] };
if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) {
ref_frame[0] = mv_ref->opfl_ref_frame[0];
ref_frame[1] = mv_ref->opfl_ref_frame[1];
}
for (int r = 0; r < 2; r++) {
int ref_offset;
switch (ref_frame[r]) {
// case LAST_FRAME: ref_offset = lst_frame_idx; break;
case ALTREF_FRAME:
ref_offset = alt_frame_idx;
break;
// case GOLDEN_FRAME: ref_offset = gld_frame_idx; break;
#if CONFIG_EXT_REFS
// case LAST2_FRAME: ref_offset = lst2_frame_idx; break;
// case LAST3_FRAME: ref_offset = lst3_frame_idx; break;
case BWDREF_FRAME: ref_offset = bwd_frame_idx; break;
#endif
default: ref_offset = -2;
}
if (ref_offset == right_offset && ref_offset >= 0) {
totalNum++;
}
}
}
}
// process right ref
mv_ref_base = cm->buffer_pool->frame_bufs[right_idx].mvs;
lst_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst_frame_offset;
alt_frame_idx = cm->buffer_pool->frame_bufs[right_idx].alt_frame_offset;
gld_frame_idx = cm->buffer_pool->frame_bufs[right_idx].gld_frame_offset;
#if CONFIG_EXT_REFS
lst2_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst2_frame_offset;
lst3_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst3_frame_offset;
bwd_frame_idx = cm->buffer_pool->frame_bufs[right_idx].bwd_frame_offset;
#endif
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j];
MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
mv_ref->ref_frame[1] };
if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) {
ref_frame[0] = mv_ref->opfl_ref_frame[0];
ref_frame[1] = mv_ref->opfl_ref_frame[1];
}
for (int r = 0; r < 2; r++) {
int ref_offset;
switch (ref_frame[r]) {
case LAST_FRAME: ref_offset = lst_frame_idx; break;
// case ALTREF_FRAME: ref_offset = alt_frame_idx; break;
case GOLDEN_FRAME: ref_offset = gld_frame_idx; break;
#if CONFIG_EXT_REFS
case LAST2_FRAME: ref_offset = lst2_frame_idx; break;
case LAST3_FRAME:
ref_offset = lst3_frame_idx;
break;
// case BWDREF_FRAME: ref_offset = bwd_frame_idx; break;
#endif
default: ref_offset = -2;
}
// only initialize with the same refs!
if (ref_offset == left_offset && ref_offset >= 0) {
// calculate mv to right
totalNum++;
}
}
}
}
return totalNum;
}
/*
* find and sort the best ref pairs by distance
*/
void opfl_select_best_ref_pairs(AV1_COMMON *cm, int *left_idx, int *left_offset,
int *left_chosen, int *right_idx,
int *right_offset, int *right_chosen,
int left_most_idx, int right_most_idx) {
int cur_offset = cm->frame_offset;
int alt_buf_idx = cm->frame_refs[ALTREF_FRAME - LAST_FRAME].idx;
int lst_buf_idx = cm->frame_refs[LAST_FRAME - LAST_FRAME].idx;
int gld_buf_idx = cm->frame_refs[GOLDEN_FRAME - LAST_FRAME].idx;
#if CONFIG_EXT_REFS
int lst2_buf_idx = cm->frame_refs[LAST2_FRAME - LAST_FRAME].idx;
int lst3_buf_idx = cm->frame_refs[LAST3_FRAME - LAST_FRAME].idx;
int bwd_buf_idx = cm->frame_refs[BWDREF_FRAME - LAST_FRAME].idx;
#endif
int left_cand_idx[INTER_REFS_PER_FRAME],
left_cand_offset[INTER_REFS_PER_FRAME];
int right_cand_idx[INTER_REFS_PER_FRAME],
right_cand_offset[INTER_REFS_PER_FRAME];
for (int k = 0; k < INTER_REFS_PER_FRAME; k++) {
left_cand_idx[k] = -1;
right_cand_idx[k] = -1;
}
int numMV_cand[INTER_REFS_PER_FRAME][INTER_REFS_PER_FRAME];
int this_offset;
if (alt_buf_idx >= 0) {
this_offset = cm->cur_frame->alt_frame_offset;
if (this_offset > cur_offset) {
right_cand_idx[ALTREF_FRAME - LAST_FRAME] = alt_buf_idx;
right_cand_offset[ALTREF_FRAME - LAST_FRAME] = this_offset;
} else if (this_offset < cur_offset) {
left_cand_idx[ALTREF_FRAME - LAST_FRAME] = alt_buf_idx;
left_cand_offset[ALTREF_FRAME - LAST_FRAME] = this_offset;
}
}
if (lst_buf_idx >= 0) {
this_offset = cm->cur_frame->lst_frame_offset;
if (this_offset > cur_offset) {
right_cand_idx[LAST_FRAME - LAST_FRAME] = lst_buf_idx;
right_cand_offset[LAST_FRAME - LAST_FRAME] = this_offset;
} else if (this_offset < cur_offset) {
left_cand_idx[LAST_FRAME - LAST_FRAME] = lst_buf_idx;
left_cand_offset[LAST_FRAME - LAST_FRAME] = this_offset;
}
}
if (gld_buf_idx >= 0) {
this_offset = cm->cur_frame->gld_frame_offset;
if (this_offset > cur_offset) {
right_cand_idx[GOLDEN_FRAME - LAST_FRAME] = gld_buf_idx;
right_cand_offset[GOLDEN_FRAME - LAST_FRAME] = this_offset;
} else if (this_offset < cur_offset) {
left_cand_idx[GOLDEN_FRAME - LAST_FRAME] = gld_buf_idx;
left_cand_offset[GOLDEN_FRAME - LAST_FRAME] = this_offset;
}
}
#if CONFIG_EXT_REFS
if (lst2_buf_idx >= 0) {
this_offset = cm->cur_frame->lst2_frame_offset;
if (this_offset > cur_offset) {
right_cand_idx[LAST2_FRAME - LAST_FRAME] = lst2_buf_idx;
right_cand_offset[LAST2_FRAME - LAST_FRAME] = this_offset;
} else if (this_offset < cur_offset) {
left_cand_idx[LAST2_FRAME - LAST_FRAME] = lst2_buf_idx;
left_cand_offset[LAST2_FRAME - LAST_FRAME] = this_offset;
}
}
if (lst3_buf_idx >= 0) {
this_offset = cm->cur_frame->lst3_frame_offset;
if (this_offset > cur_offset) {
right_cand_idx[LAST3_FRAME - LAST_FRAME] = lst3_buf_idx;
right_cand_offset[LAST3_FRAME - LAST_FRAME] = this_offset;
} else if (this_offset < cur_offset) {
left_cand_idx[LAST3_FRAME - LAST_FRAME] = lst3_buf_idx;
left_cand_offset[LAST3_FRAME - LAST_FRAME] = this_offset;
}
}
if (bwd_buf_idx >= 0) {
this_offset = cm->cur_frame->bwd_frame_offset;
if (this_offset > cur_offset) {
right_cand_idx[BWDREF_FRAME - LAST_FRAME] = bwd_buf_idx;
right_cand_offset[BWDREF_FRAME - LAST_FRAME] = this_offset;
} else if (this_offset < cur_offset) {
left_cand_idx[BWDREF_FRAME - LAST_FRAME] = bwd_buf_idx;
left_cand_offset[BWDREF_FRAME - LAST_FRAME] = this_offset;
}
}
#endif
// got all candidates, now calculate the number of mvs available
// for each pair
for (int ll = LAST_FRAME - LAST_FRAME; ll < OPFL_FRAME - LAST_FRAME; ll++) {
for (int rr = LAST_FRAME - LAST_FRAME; rr < OPFL_FRAME - LAST_FRAME; rr++) {
numMV_cand[ll][rr] = 0;
if (ll == rr) continue;
if (left_cand_idx[ll] < 0 || right_cand_idx[rr] < 0) continue;
if (left_cand_idx[ll] == left_most_idx &&
right_cand_idx[rr] == right_most_idx)
continue;
numMV_cand[ll][rr] =
get_num_MV_between_refs(cm, left_cand_idx[ll], left_cand_offset[ll],
right_cand_idx[rr], right_cand_offset[rr]);
}
}
// find the ones with most available mvs
int max_num_MV[MAX_NUM_REF_PAIR];
for (int k = 0; k < MAX_NUM_REF_PAIR; k++) {
max_num_MV[k] = 0;
}
for (int k = 0; k < MAX_NUM_REF_PAIR; k++) {
left_idx[k] = -1;
right_idx[k] = -1;
for (int ll = LAST_FRAME - LAST_FRAME; ll < OPFL_FRAME - LAST_FRAME; ll++) {
for (int rr = LAST_FRAME - LAST_FRAME; rr < OPFL_FRAME - LAST_FRAME;
rr++) {
if (numMV_cand[ll][rr] == 0) continue;
int skip = 0;
for (int kk = 0; kk < k; kk++) {
if (left_cand_idx[ll] == left_idx[kk] &&
right_cand_idx[rr] == right_idx[kk]) {
skip = 1;
break;
}
}
if (skip > 0) continue;
if (numMV_cand[ll][rr] > max_num_MV[k]) {
max_num_MV[k] = numMV_cand[ll][rr];
left_idx[k] = left_cand_idx[ll];
left_offset[k] = left_cand_offset[ll];
left_chosen[k] = ll + LAST_FRAME;
right_idx[k] = right_cand_idx[rr];
right_offset[k] = right_cand_offset[rr];
right_chosen[k] = rr + LAST_FRAME;
}
}
}
// if (left_idx[k] < 0)
// break;
}
int total_dist[MAX_NUM_REF_PAIR];
for (int k = 0; k < MAX_NUM_REF_PAIR; k++) {
if (left_idx[k] >= 0)
total_dist[k] = -left_offset[k] + right_offset[k];
else
total_dist[k] = -1;
}
// bubble sort by distance to cur_frame (prefer shorter)
for (int i = 1; i < MAX_NUM_REF_PAIR; i++) {
if (total_dist[i] < 0) break;
for (int j = i - 1; j >= 0; j--) {
if (total_dist[j + 1] < total_dist[j]) {
// swap j+1 and j
int temp;
temp = left_idx[j];
left_idx[j] = left_idx[j + 1];
left_idx[j + 1] = temp;
temp = left_offset[j];
left_offset[j] = left_offset[j + 1];
left_offset[j + 1] = temp;
temp = left_chosen[j];
left_chosen[j] = left_chosen[j + 1];
left_chosen[j + 1] = temp;
temp = right_idx[j];
right_idx[j] = right_idx[j + 1];
right_idx[j + 1] = temp;
temp = right_offset[j];
right_offset[j] = right_offset[j + 1];
right_offset[j + 1] = temp;
temp = right_chosen[j];
right_chosen[j] = right_chosen[j + 1];
right_chosen[j + 1] = temp;
temp = total_dist[j];
total_dist[j] = total_dist[j + 1];
total_dist[j + 1] = temp;
temp = max_num_MV[j];
max_num_MV[j] = max_num_MV[j + 1];
max_num_MV[j + 1] = temp;
} else {
break;
}
}
}
// fix when ref_frames are pointing to the same idx
// TODO(bohan): any other possiblities?
for (int i = 0; i < MAX_NUM_REF_PAIR; i++) {
if (left_idx[i] < 0) continue;
if (right_chosen[i] == BWDREF_FRAME && bwd_buf_idx == alt_buf_idx) {
right_chosen[i] = ALTREF_FRAME;
}
// if (left_chosen[i] == LAST_FRAME && lst_buf_idx == gld_buf_idx) {
// left_chosen[i] = GOLDEN_FRAME;
// }
}
}
/*
* find the closest two-sided refs
*/
void opfl_get_closest_refs(AV1_COMMON *cm, int *left_idx_ptr,
int *left_offset_ptr, int *left_chosen_ptr,
int *right_idx_ptr, int *right_offset_ptr,
int *right_chosen_ptr) {
int left_idx = -1, left_offset = -1, right_idx = -1, right_offset = -1;
int left_chosen = NONE_FRAME, right_chosen = NONE_FRAME;
int alt_buf_idx = cm->frame_refs[ALTREF_FRAME - LAST_FRAME].idx;
int lst_buf_idx = cm->frame_refs[LAST_FRAME - LAST_FRAME].idx;
int gld_buf_idx = cm->frame_refs[GOLDEN_FRAME - LAST_FRAME].idx;
#if CONFIG_EXT_REFS
int lst2_buf_idx = cm->frame_refs[LAST2_FRAME - LAST_FRAME].idx;
int lst3_buf_idx = cm->frame_refs[LAST3_FRAME - LAST_FRAME].idx;
int bwd_buf_idx = cm->frame_refs[BWDREF_FRAME - LAST_FRAME].idx;
#endif
int cur_offset = cm->frame_offset;
int this_offset;
if (alt_buf_idx >= 0) {
this_offset = cm->cur_frame->alt_frame_offset;
if (this_offset > cur_offset &&
(right_offset < 0 || this_offset < right_offset)) {
right_idx = alt_buf_idx;
right_offset = this_offset;
right_chosen = ALTREF_FRAME;
} else if (this_offset < cur_offset && this_offset > left_offset) {
left_idx = alt_buf_idx;
left_offset = this_offset;
left_chosen = ALTREF_FRAME;
}
}
if (lst_buf_idx >= 0) {
this_offset = cm->cur_frame->lst_frame_offset;
if (this_offset > cur_offset &&
(right_offset < 0 || this_offset < right_offset)) {
right_idx = lst_buf_idx;
right_offset = this_offset;
right_chosen = LAST_FRAME;
} else if (this_offset < cur_offset && this_offset > left_offset) {
left_idx = lst_buf_idx;
left_offset = this_offset;
left_chosen = LAST_FRAME;
}
}
if (gld_buf_idx >= 0) {
this_offset = cm->cur_frame->gld_frame_offset;
if (this_offset > cur_offset &&
(right_offset < 0 || this_offset < right_offset)) {
right_idx = gld_buf_idx;
right_offset = this_offset;
right_chosen = GOLDEN_FRAME;
} else if (this_offset < cur_offset && this_offset > left_offset) {
left_idx = gld_buf_idx;
left_offset = this_offset;
left_chosen = GOLDEN_FRAME;
}
}
#if CONFIG_EXT_REFS
if (lst2_buf_idx >= 0) {
this_offset = cm->cur_frame->lst2_frame_offset;
if (this_offset > cur_offset &&
(right_offset < 0 || this_offset < right_offset)) {
right_idx = lst2_buf_idx;
right_offset = this_offset;
right_chosen = LAST2_FRAME;
} else if (this_offset < cur_offset && this_offset > left_offset) {
left_idx = lst2_buf_idx;
left_offset = this_offset;
left_chosen = LAST2_FRAME;
}
}
if (lst3_buf_idx >= 0) {
this_offset = cm->cur_frame->lst3_frame_offset;
if (this_offset > cur_offset &&
(right_offset < 0 || this_offset < right_offset)) {
right_idx = lst3_buf_idx;
right_offset = this_offset;
right_chosen = LAST3_FRAME;
} else if (this_offset < cur_offset && this_offset > left_offset) {
left_idx = lst3_buf_idx;
left_offset = this_offset;
left_chosen = LAST3_FRAME;
}
}
if (bwd_buf_idx >= 0) {
this_offset = cm->cur_frame->bwd_frame_offset;
if (this_offset > cur_offset &&
(right_offset < 0 || this_offset < right_offset)) {
right_idx = bwd_buf_idx;
right_offset = this_offset;
right_chosen = BWDREF_FRAME;
} else if (this_offset < cur_offset && this_offset > left_offset) {
left_idx = bwd_buf_idx;
left_offset = this_offset;
left_chosen = BWDREF_FRAME;
}
}
#endif
*left_offset_ptr = left_offset;
*left_chosen_ptr = left_chosen;
*left_idx_ptr = left_idx;
*right_offset_ptr = right_offset;
*right_chosen_ptr = right_chosen;
*right_idx_ptr = right_idx;
}
/*
* find the existing initialization motion vectors between any two reference
* frames
*/
void opfl_set_init_motion(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct,
int left_idx, int right_idx, int left_offset,
int right_offset, int_mv *left_mv, int_mv *right_mv) {
int cur_offset = buf_struct->cur_offset;
double dst_pos = ((double)(cur_offset - left_offset)) /
((double)(right_offset - left_offset));
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
left_mv[i * cm->mi_cols + j].as_int = INVALID_MV;
right_mv[i * cm->mi_cols + j].as_int = INVALID_MV;
}
}
// process left ref
MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[left_idx].mvs;
int lst_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst_frame_offset;
int alt_frame_idx = cm->buffer_pool->frame_bufs[left_idx].alt_frame_offset;
int gld_frame_idx = cm->buffer_pool->frame_bufs[left_idx].gld_frame_offset;
#if CONFIG_EXT_REFS
int lst2_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst2_frame_offset;
int lst3_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst3_frame_offset;
int bwd_frame_idx = cm->buffer_pool->frame_bufs[left_idx].bwd_frame_offset;
#endif
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j];
MV this_mvs[2] = { mv_ref->mv[0].as_mv, mv_ref->mv[1].as_mv };
MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
mv_ref->ref_frame[1] };
#if OPFL_TPL
if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) {
this_mvs[0] = mv_ref->opfl_ref_mvs[0].as_mv;
this_mvs[1] = mv_ref->opfl_ref_mvs[1].as_mv;
ref_frame[0] = mv_ref->opfl_ref_frame[0];
ref_frame[1] = mv_ref->opfl_ref_frame[1];
}
#endif
for (int r = 0; r < 2; r++) {
if (ref_frame[r] == NONE_FRAME || ref_frame[r] == OPFL_FRAME) continue;
int ref_offset;
switch (ref_frame[r]) {
case LAST_FRAME: ref_offset = lst_frame_idx; break;
case ALTREF_FRAME: ref_offset = alt_frame_idx; break;
case GOLDEN_FRAME: ref_offset = gld_frame_idx; break;
#if CONFIG_EXT_REFS
case LAST2_FRAME: ref_offset = lst2_frame_idx; break;
case LAST3_FRAME: ref_offset = lst3_frame_idx; break;
case BWDREF_FRAME: ref_offset = bwd_frame_idx; break;
#endif
default: ref_offset = -1;
}
// only initialize with the same refs!
if (ref_offset == right_offset) {
// calculate mv to left
int_mv temp_mv;
temp_mv.as_mv.row =
opfl_round_double_2_int(-dst_pos * this_mvs[r].row);
temp_mv.as_mv.col =
opfl_round_double_2_int(-dst_pos * this_mvs[r].col);
int mi_r =
i - opfl_round_double_2_int(temp_mv.as_mv.row / (8.0 * 4.0));
int mi_c =
j - opfl_round_double_2_int(temp_mv.as_mv.col / (8.0 * 4.0));
if (mi_r < 0 || mi_r >= cm->mi_rows || mi_c < 0 ||
mi_c >= cm->mi_cols)
continue;
left_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int;
// calculate mv to right
temp_mv.as_mv.row =
opfl_round_double_2_int((1 - dst_pos) * this_mvs[r].row);
temp_mv.as_mv.col =
opfl_round_double_2_int((1 - dst_pos) * this_mvs[r].col);
right_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int;
}
}
}
}
// process right ref
mv_ref_base = cm->buffer_pool->frame_bufs[right_idx].mvs;
lst_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst_frame_offset;
alt_frame_idx = cm->buffer_pool->frame_bufs[right_idx].alt_frame_offset;
gld_frame_idx = cm->buffer_pool->frame_bufs[right_idx].gld_frame_offset;
#if CONFIG_EXT_REFS
lst2_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst2_frame_offset;
lst3_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst3_frame_offset;
bwd_frame_idx = cm->buffer_pool->frame_bufs[right_idx].bwd_frame_offset;
#endif
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j];
MV this_mvs[2] = { mv_ref->mv[0].as_mv, mv_ref->mv[1].as_mv };
MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
mv_ref->ref_frame[1] };
if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) {
this_mvs[0] = mv_ref->opfl_ref_mvs[0].as_mv;
this_mvs[1] = mv_ref->opfl_ref_mvs[1].as_mv;
ref_frame[0] = mv_ref->opfl_ref_frame[0];
ref_frame[1] = mv_ref->opfl_ref_frame[1];
}
for (int r = 0; r < 2; r++) {
if (ref_frame[r] == NONE_FRAME || ref_frame[r] == OPFL_FRAME) continue;
int ref_offset;
switch (ref_frame[r]) {
case LAST_FRAME: ref_offset = lst_frame_idx; break;
case ALTREF_FRAME: ref_offset = alt_frame_idx; break;
case GOLDEN_FRAME: ref_offset = gld_frame_idx; break;
#if CONFIG_EXT_REFS
case LAST2_FRAME: ref_offset = lst2_frame_idx; break;
case LAST3_FRAME: ref_offset = lst3_frame_idx; break;
case BWDREF_FRAME: ref_offset = bwd_frame_idx; break;
#endif
default: ref_offset = -1;
}
// only initialize with the same refs!
if (ref_offset == left_offset) {
// calculate mv to right
int_mv temp_mv;
temp_mv.as_mv.row =
opfl_round_double_2_int(-(1 - dst_pos) * this_mvs[r].row);
temp_mv.as_mv.col =
opfl_round_double_2_int(-(1 - dst_pos) * this_mvs[r].col);
int mi_r =
i - opfl_round_double_2_int(temp_mv.as_mv.row / (8.0 * 4.0));
int mi_c =
j - opfl_round_double_2_int(temp_mv.as_mv.col / (8.0 * 4.0));
if (mi_r < 0 || mi_r >= cm->mi_rows || mi_c < 0 ||
mi_c >= cm->mi_cols)
continue;
right_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int;
// calculate mv to left
temp_mv.as_mv.row =
opfl_round_double_2_int(dst_pos * this_mvs[r].row);
temp_mv.as_mv.col =
opfl_round_double_2_int(dst_pos * this_mvs[r].col);
left_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int;
}
}
}
}
}
/*
* This function finds the motion vectors between left_idx and right_idx,
* and forms the motions pointing from left to right
* unlike opfl_set_init_motion, this function does not scale and set
* motion vectors according to dst_pos.
*/
void opfl_find_init_motion(AV1_COMMON *cm, int left_idx, int right_idx,
int left_offset, int right_offset, int_mv *left_mv) {
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
left_mv[i * cm->mi_cols + j].as_int = INVALID_MV;
}
}
// process right to left mv
MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[right_idx].mvs;
int lst_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst_frame_offset;
int alt_frame_idx = cm->buffer_pool->frame_bufs[right_idx].alt_frame_offset;
int gld_frame_idx = cm->buffer_pool->frame_bufs[right_idx].gld_frame_offset;
#if CONFIG_EXT_REFS
int lst2_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst2_frame_offset;
int lst3_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst3_frame_offset;
int bwd_frame_idx = cm->buffer_pool->frame_bufs[right_idx].bwd_frame_offset;
#endif
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j];
MV this_mvs[2] = { mv_ref->mv[0].as_mv, mv_ref->mv[1].as_mv };
MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
mv_ref->ref_frame[1] };
#if OPFL_TPL
if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) {
this_mvs[0] = mv_ref->opfl_ref_mvs[0].as_mv;
this_mvs[1] = mv_ref->opfl_ref_mvs[1].as_mv;
ref_frame[0] = mv_ref->opfl_ref_frame[0];
ref_frame[1] = mv_ref->opfl_ref_frame[1];
}
#endif
for (int r = 0; r < 2; r++) {
int ref_offset;
switch (ref_frame[r]) {
case LAST_FRAME: ref_offset = lst_frame_idx; break;
case ALTREF_FRAME: ref_offset = alt_frame_idx; break;
case GOLDEN_FRAME: ref_offset = gld_frame_idx; break;
#if CONFIG_EXT_REFS
case LAST2_FRAME: ref_offset = lst2_frame_idx; break;
case LAST3_FRAME: ref_offset = lst3_frame_idx; break;
case BWDREF_FRAME: ref_offset = bwd_frame_idx; break;
#endif
default: ref_offset = -1;
}
// only initialize with the same refs!
if (ref_offset == left_offset) {
// calculate mv to right
int_mv temp_mv;
temp_mv.as_mv.row = -this_mvs[r].row;
temp_mv.as_mv.col = -this_mvs[r].col;
int mi_r = i - (temp_mv.as_mv.row >> (3 + MI_SIZE_LOG2));
int mi_c = j - (temp_mv.as_mv.col >> (3 + MI_SIZE_LOG2));
if (mi_r < 0 || mi_r >= cm->mi_rows || mi_c < 0 ||
mi_c >= cm->mi_cols)
continue;
left_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int;
}
}
}
}
// process left to right mv
mv_ref_base = cm->buffer_pool->frame_bufs[left_idx].mvs;
lst_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst_frame_offset;
alt_frame_idx = cm->buffer_pool->frame_bufs[left_idx].alt_frame_offset;
gld_frame_idx = cm->buffer_pool->frame_bufs[left_idx].gld_frame_offset;
#if CONFIG_EXT_REFS
lst2_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst2_frame_offset;
lst3_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst3_frame_offset;
bwd_frame_idx = cm->buffer_pool->frame_bufs[left_idx].bwd_frame_offset;
#endif
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j];
MV this_mvs[2] = { mv_ref->mv[0].as_mv, mv_ref->mv[1].as_mv };
MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
mv_ref->ref_frame[1] };
#if OPFL_TPL
if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) {
this_mvs[0] = mv_ref->opfl_ref_mvs[0].as_mv;
this_mvs[1] = mv_ref->opfl_ref_mvs[1].as_mv;
ref_frame[0] = mv_ref->opfl_ref_frame[0];
ref_frame[1] = mv_ref->opfl_ref_frame[1];
}
#endif
for (int r = 0; r < 2; r++) {
int ref_offset;
switch (ref_frame[r]) {
case LAST_FRAME: ref_offset = lst_frame_idx; break;
case ALTREF_FRAME: ref_offset = alt_frame_idx; break;
case GOLDEN_FRAME: ref_offset = gld_frame_idx; break;
#if CONFIG_EXT_REFS
case LAST2_FRAME: ref_offset = lst2_frame_idx; break;
case LAST3_FRAME: ref_offset = lst3_frame_idx; break;
case BWDREF_FRAME: ref_offset = bwd_frame_idx; break;
#endif
default: ref_offset = -1;
}
if (ref_offset == right_offset) {
// calculate mv to right
int_mv temp_mv;
temp_mv.as_mv.row = this_mvs[r].row;
temp_mv.as_mv.col = this_mvs[r].col;
left_mv[i * cm->mi_cols + j].as_int = temp_mv.as_int;
}
}
}
}
}
/*
* This function derives initialization motion vector, for example:
* assume we want mvs between ref1 and ref2, but there exist mvscale
* mv1 pointing from ref1 to ref3, and mv2 pointing from ref3 to ref2,
* then the derived mv = mv1 + mv2
*/
void opfl_derive_init_mv(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct,
int left_idx, int right_idx, int left_offset,
int right_offset, int_mv *left_mv, int_mv *right_mv) {
int alt_buf_idx = cm->frame_refs[ALTREF_FRAME - LAST_FRAME].idx;
int lst_buf_idx = cm->frame_refs[LAST_FRAME - LAST_FRAME].idx;
int gld_buf_idx = cm->frame_refs[GOLDEN_FRAME - LAST_FRAME].idx;
#if CONFIG_EXT_REFS
int lst2_buf_idx = cm->frame_refs[LAST2_FRAME - LAST_FRAME].idx;
int lst3_buf_idx = cm->frame_refs[LAST3_FRAME - LAST_FRAME].idx;
int bwd_buf_idx = cm->frame_refs[BWDREF_FRAME - LAST_FRAME].idx;
#endif
double dstpos = buf_struct->dst_pos;
// find candidate refs to try
int cand_idx[6];
int cand_offset[6];
int c = 0;
if (alt_buf_idx != left_idx && alt_buf_idx != right_idx && alt_buf_idx >= 0) {
int skip = 0;
for (int k = 0; k < c; k++) {
if (cand_idx[k] == alt_buf_idx) skip = 1;
}
if (skip == 0) {
cand_idx[c] = alt_buf_idx;
cand_offset[c] = cm->cur_frame->alt_frame_offset;
c++;
}
}
if (lst_buf_idx != left_idx && lst_buf_idx != right_idx && lst_buf_idx >= 0) {
int skip = 0;
for (int k = 0; k < c; k++) {
if (cand_idx[k] == lst_buf_idx) skip = 1;
}
if (skip == 0) {
cand_idx[c] = lst_buf_idx;
cand_offset[c] = cm->cur_frame->lst_frame_offset;
c++;
}
}
if (gld_buf_idx != left_idx && gld_buf_idx != right_idx && gld_buf_idx >= 0) {
int skip = 0;
for (int k = 0; k < c; k++) {
if (cand_idx[k] == gld_buf_idx) skip = 1;
}
if (skip == 0) {
cand_idx[c] = gld_buf_idx;
cand_offset[c] = cm->cur_frame->gld_frame_offset;
c++;
}
}
#if CONFIG_EXT_REFS
if (lst2_buf_idx != left_idx && lst2_buf_idx != right_idx &&
lst2_buf_idx >= 0) {
int skip = 0;
for (int k = 0; k < c; k++) {
if (cand_idx[k] == lst2_buf_idx) skip = 1;
}
if (skip == 0) {
cand_idx[c] = lst2_buf_idx;
cand_offset[c] = cm->cur_frame->lst2_frame_offset;
c++;
}
}
if (lst3_buf_idx != left_idx && lst3_buf_idx != right_idx &&
lst3_buf_idx >= 0) {
int skip = 0;
for (int k = 0; k < c; k++) {
if (cand_idx[k] == lst3_buf_idx) skip = 1;
}
if (skip == 0) {
cand_idx[c] = lst3_buf_idx;
cand_offset[c] = cm->cur_frame->lst3_frame_offset;
c++;
}
}
if (bwd_buf_idx != left_idx && bwd_buf_idx != right_idx && bwd_buf_idx >= 0) {
int skip = 0;
for (int k = 0; k < c; k++) {
if (cand_idx[k] == bwd_buf_idx) skip = 1;
}
if (skip == 0) {
cand_idx[c] = bwd_buf_idx;
cand_offset[c] = cm->cur_frame->bwd_frame_offset;
c++;
}
}
#endif
for (; c < 6; c++) {
cand_idx[c] = -1;
}
int_mv *mv_to_left = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv));
int_mv *mv_to_right = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv));
int_mv cur_mv, temp_mv;
for (c = 0; c < 6; c++) {
if (cand_idx[c] < 0) break;
opfl_find_init_motion(cm, cand_idx[c], left_idx, cand_offset[c],
left_offset, mv_to_left);
opfl_find_init_motion(cm, cand_idx[c], right_idx, cand_offset[c],
right_offset, mv_to_right);
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
if (mv_to_left[i * cm->mi_cols + j].as_int == INVALID_MV ||
mv_to_right[i * cm->mi_cols + j].as_int == INVALID_MV)
continue;
// cur_mv points from left to right
cur_mv.as_mv.row = mv_to_right[i * cm->mi_cols + j].as_mv.row -
mv_to_left[i * cm->mi_cols + j].as_mv.row;
cur_mv.as_mv.col = mv_to_right[i * cm->mi_cols + j].as_mv.col -
mv_to_left[i * cm->mi_cols + j].as_mv.col;
temp_mv.as_mv.row = opfl_round_double_2_int(-dstpos * cur_mv.as_mv.row);
temp_mv.as_mv.col = opfl_round_double_2_int(-dstpos * cur_mv.as_mv.col);
int mi_r = i + opfl_round_double_2_int(
(mv_to_left[i * cm->mi_cols + j].as_mv.row -
temp_mv.as_mv.row) /
(8.0 * 4.0));
int mi_c = j + opfl_round_double_2_int(
(mv_to_left[i * cm->mi_cols + j].as_mv.col -
temp_mv.as_mv.col) /
(8.0 * 4.0));
if (mi_r < 0 || mi_r >= cm->mi_rows || mi_c < 0 || mi_c >= cm->mi_cols)
continue;
if (left_mv[mi_r * cm->mi_cols + mi_c].as_int == INVALID_MV ||
right_mv[mi_r * cm->mi_cols + mi_c].as_int == INVALID_MV) {
left_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int;
// calculate mv to right
temp_mv.as_mv.row =
opfl_round_double_2_int((1 - dstpos) * cur_mv.as_mv.row);
temp_mv.as_mv.col =
opfl_round_double_2_int((1 - dstpos) * cur_mv.as_mv.col);
right_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int;
}
}
}
}
aom_free(mv_to_left);
aom_free(mv_to_right);
}
int opfl_get_4x4_warp_dist(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct,
int starth, int startw, int_mv left_mv,
int_mv right_mv) {
int dist = 0;
YV12_BUFFER_CONFIG *r0 = buf_struct->ref0_buf[0];
YV12_BUFFER_CONFIG *r1 = buf_struct->ref1_buf[0];
uint8_t *src_y0 = r0->y_buffer;
uint8_t *src_y1 = r1->y_buffer;
int srcstride = r0->y_stride;
int lpix, rpix;
int yl, xl, yr, xr;
double dil, djl, dir, djr;
int width = r0->y_width;
int height = r0->y_height;
yl = opfl_floor_double_2_int((double)left_mv.as_mv.row / 8.0);
xl = opfl_floor_double_2_int((double)left_mv.as_mv.col / 8.0);
yr = opfl_floor_double_2_int((double)right_mv.as_mv.row / 8.0);
xr = opfl_floor_double_2_int((double)right_mv.as_mv.col / 8.0);
dil = (double)left_mv.as_mv.row / 8.0 - yl;
djl = (double)left_mv.as_mv.col / 8.0 - xl;
dir = (double)right_mv.as_mv.row / 8.0 - yr;
djr = (double)right_mv.as_mv.col / 8.0 - xr;
if (starth + yl < 0) {
yl = -starth;
} else if (starth + yl >= height) {
yl = height - starth;
}
if (startw + xl < 0) {
xl = -startw;
} else if (startw + xl >= width) {
xl = width - startw;
}
if (starth + yr < 0) {
yr = -starth;
} else if (starth + yr >= height) {
yr = height - starth;
}
if (startw + xr < 0) {
xr = -startw;
} else if (startw + xr >= width) {
xr = width - startw;
}
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
uint8_t *tmpsrc;
tmpsrc = src_y0 + (starth + i + yl) * srcstride + (startw + j + xl);
lpix = get_sub_pel_y(tmpsrc, srcstride, dil, djl);
tmpsrc = src_y1 + (starth + i + yr) * srcstride + (startw + j + xr);
rpix = get_sub_pel_y(tmpsrc, srcstride, dir, djr);
dist += (lpix - rpix) * (lpix - rpix);
}
}
return dist;
}
void opfl_update_init_motion(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct,
int_mv *left_mv, int_mv *right_mv, int left_offset,
int right_offset, int_mv *left_cand_mv,
int_mv *right_cand_mv, int left_cand_offset,
int right_cand_offset, int *is_first_valid,
int_mv *left_final_mv, int_mv *right_final_mv) {
int cur_offset = buf_struct->cur_offset;
int_mv temp_left_mv, temp_right_mv;
int dist_ori, dist_new;
double left_ratio, right_ratio;
left_ratio = ((double)(cur_offset - left_offset)) /
((double)(cur_offset - left_cand_offset));
right_ratio = ((double)(-cur_offset + right_offset)) /
((double)(-cur_offset + right_cand_offset));
for (int i = 0; i < cm->mi_rows; i++) {
for (int j = 0; j < cm->mi_cols; j++) {
if (is_first_valid[i * cm->mi_cols + j] > 0) continue;
if (left_cand_mv[i * cm->mi_cols + j].as_int == INVALID_MV ||
right_cand_mv[i * cm->mi_cols + j].as_int == INVALID_MV) {
continue;
}
dist_ori = opfl_get_4x4_warp_dist(cm, buf_struct, i * 4, j * 4,
left_mv[i * cm->mi_cols + j],
right_mv[i * cm->mi_cols + j]);
temp_left_mv.as_mv.row = opfl_round_double_2_int(
left_cand_mv[i * cm->mi_cols + j].as_mv.row * left_ratio);
temp_left_mv.as_mv.col = opfl_round_double_2_int(
left_cand_mv[i * cm->mi_cols + j].as_mv.col * left_ratio);
temp_right_mv.as_mv.row = opfl_round_double_2_int(
right_cand_mv[i * cm->mi_cols + j].as_mv.row * right_ratio);
temp_right_mv.as_mv.col = opfl_round_double_2_int(
right_cand_mv[i * cm->mi_cols + j].as_mv.col * right_ratio);
dist_new = opfl_get_4x4_warp_dist(cm, buf_struct, i * 4, j * 4,
temp_left_mv, temp_right_mv);
if (dist_ori > dist_new) {
left_mv[i * cm->mi_cols + j].as_int = temp_left_mv.as_int;
right_mv[i * cm->mi_cols + j].as_int = temp_right_mv.as_int;
left_final_mv[i * cm->mi_cols + j].as_int = temp_left_mv.as_int;
right_final_mv[i * cm->mi_cols + j].as_int = temp_right_mv.as_int;
}
}
}
}
/*
* use optical flow method to calculate motion field of a specific level.
*
* Input:
* buf_struct: containing buffers of the reference frames
* mf_last: initial motion field
* level: current scale level, 0 = original, 1 = 0.5, 2 = 0.25, etc.
* dstpos: dst frame position
* usescale: 0->do not scale the original, 1->do scaling of images
* blk_info: information on the current block
*
* Output:
* mf_new: pointer to calculated motion field
*/
void refine_motion_field(OPFL_BUFFER_STRUCT *buf_struct, DB_MV *mf_last,
DB_MV *mf_new, int level, double dstpos, int usescale,
OPFL_BLK_INFO blk_info) {
int count = 0;
double last_cost = DBL_MAX;
double new_cost = last_cost;
int width = blk_info.blk_width, height = blk_info.blk_height;
width = width >> level;
height = height >> level;
int mvstr = width + 2 * AVG_MF_BORDER;
// annealing factor for laplacian multiplier
double as_scale_factor = 1 << level;
// iteratively warp and estimate motion field
while (count < MAX_ITER_OPTICAL_FLOW) {
// TODO(bohan): combinations of fast and direct methods might help
#if FAST_OPTICAL_FLOW
new_cost =
iterate_update_mv_fast(buf_struct, mf_last, mf_new, level, dstpos,
as_scale_factor, usescale, blk_info);
#else
new_cost = iterate_update_mv(buf_struct, mf_last, mf_new, level, dstpos,
as_scale_factor, usescale, blk_info, count);
#endif
// prepare for the next iteration
DB_MV *mv_start = mf_last + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
DB_MV *mv_start_new = mf_new + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
mv_start[i * mvstr + j].row += mv_start_new[i * mvstr + j].row;
mv_start[i * mvstr + j].col += mv_start_new[i * mvstr + j].col;
if (mv_start[i * mvstr + j].row > MAX_MV_LENGTH_1D)
mv_start[i * mvstr + j].row = MAX_MV_LENGTH_1D;
else if (mv_start[i * mvstr + j].row < -MAX_MV_LENGTH_1D)
mv_start[i * mvstr + j].row = -MAX_MV_LENGTH_1D;
if (mv_start[i * mvstr + j].col > MAX_MV_LENGTH_1D)
mv_start[i * mvstr + j].col = MAX_MV_LENGTH_1D;
else if (mv_start[i * mvstr + j].col < -MAX_MV_LENGTH_1D)
mv_start[i * mvstr + j].col = -MAX_MV_LENGTH_1D;
mv_start_new[i * mvstr + j].row = mv_start[i * mvstr + j].row;
mv_start_new[i * mvstr + j].col = mv_start[i * mvstr + j].col;
}
}
last_cost = new_cost;
count++;
as_scale_factor *= OPFL_ANNEAL_FACTOR;
}
return;
}
double get_opfl_cost(double *Ex, double *Ey, double *Et, DB_MV *mv_start,
int width, int height, int mfstr, int i, int j) {
double u = mv_start[i * mfstr + j].col;
double v = mv_start[i * mfstr + j].row;
double cost, temp;
int check_y[4] = { -1, -1, 1, 1 };
int check_x[4] = { -1, 1, -1, 1 };
cost = Ex[i * width + j] * u + Ey[i * width + j] * v + Et[i * width + j];
cost = cost * cost;
temp = 0;
temp += 4 * mv_start[i * mfstr + j].col;
for (int k = 0; k < 4; k++) {
int curi = i + check_y[k];
int curj = j + check_x[k];
if (curi < 0)
curi = 0;
else if (curi >= height)
curi = height - 1;
if (curj < 0)
curj = 0;
else if (curj >= width)
curj = width - 1;
temp -= mv_start[curi * mfstr + curj].col;
}
cost += temp * temp * OF_A_SQUARED / 4.0;
temp = 0;
temp += 4 * mv_start[i * mfstr + j].row;
for (int k = 0; k < 4; k++) {
int curi = i + check_y[k];
int curj = j + check_x[k];
if (curi < 0)
curi = 0;
else if (curi >= height)
curi = height - 1;
if (curj < 0)
curj = 0;
else if (curj >= width)
curj = width - 1;
temp -= mv_start[curi * mfstr + curj].row;
}
cost += temp * temp * OF_A_SQUARED / 4.0;
return cost;
}
/*
* Update motion field at each iteration by solving linear equations directly.
*
* Input:
* buf_struct: containing buffers of the reference frames
* mf_last: initial motion field
* level: current scale level, 0 = original, 1 = 0.5, 2 = 0.25, etc.
* dstpos: dst frame position
* as_scale: scale the laplacian multiplier to perform "annealing"
* usescale: 0->do not scale the original, 1->do scaling of images
* blk_info: information on the current block
*
* Output:
* mf_new: pointer to calculated motion field
*/
double iterate_update_mv(OPFL_BUFFER_STRUCT *buf_struct, DB_MV *mf_last,
DB_MV *mf_new, int level, double dstpos,
double as_scale, int usescale, OPFL_BLK_INFO blk_info,
int numWarpedRounds) {
double *Ex, *Ey, *Et;
double a_squared = OF_A_SQUARED * as_scale;
double cost = 0;
YV12_BUFFER_CONFIG *ref0, *ref1, *buf_init0, *buf_init1;
int l = level;
if (!usescale) l = 0;
ref0 = buf_struct->ref0_buf[l];
ref1 = buf_struct->ref1_buf[l];
buf_init0 = buf_struct->ref0_warped_buf[l];
buf_init1 = buf_struct->ref1_warped_buf[l];
int y_width = blk_info.blk_width, y_height = blk_info.blk_height;
int starth = blk_info.starth, startw = blk_info.startw;
int sh = starth >> level, sw = startw >> level;
int width = y_width, height = y_height;
width = width >> level;
height = height >> level;
if (usescale) {
y_width = y_width >> level;
y_height = y_height >> level;
starth = starth >> level;
startw = startw >> level;
}
int mvstr = width + 2 * AVG_MF_BORDER;
DB_MV *mv_start = mf_last + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
DB_MV *mv_start_new = mf_new + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
int i, j;
// allocate temp buffers
Ex = buf_struct->Ex;
Ey = buf_struct->Ey;
Et = buf_struct->Et;
YV12_BUFFER_CONFIG *buffer0 = buf_struct->buffer0[usescale ? level : 0];
YV12_BUFFER_CONFIG *buffer1 = buf_struct->buffer1[usescale ? level : 0];
// TODO(bohan): these buffers can also be moved to the buf_struct
int *row_pos = aom_calloc(width * height * 28, sizeof(int));
int *col_pos = aom_calloc(width * height * 28, sizeof(int));
double *values = aom_calloc(width * height * 28, sizeof(double));
double *mv_vec = aom_calloc(width * height * 2, sizeof(double));
double *b = aom_calloc(width * height * 2, sizeof(double));
double *b_temp = aom_calloc(width * height * 2, sizeof(double));
double *pixel_weight = aom_calloc(width * height, sizeof(double));
double *mv_weight = aom_calloc(width * height, sizeof(double));
double *mv_center_wt = aom_calloc(width * height, sizeof(double));
int imvstr = buf_struct->ref0_buf[0]->y_width;
imvstr = (imvstr >> level) + 2 * AVG_MF_BORDER;
DB_MV *initmv = buf_struct->init_mv_buf[level];
initmv = initmv + (sh + AVG_MF_BORDER) * imvstr + sw + AVG_MF_BORDER;
#if OPFL_INIT_WT
double *init_wts = buf_struct->init_mv_wts[level];
init_wts = init_wts + (sh + AVG_MF_BORDER) * imvstr + sw + AVG_MF_BORDER;
#endif
int fheight = buf_struct->ref0_buf[0]->y_height;
int fwidth = buf_struct->ref0_buf[0]->y_width;
fheight = fheight >> level;
fwidth = fwidth >> level;
clock_t starts = clock();
if (level == 0 || !usescale)
warp_optical_flow_fwd(ref0, ref1, mv_start, mvstr, buffer0, dstpos, level,
usescale, blk_info);
else
warp_optical_flow_fwd_bilinear(ref0, ref1, mv_start, mvstr, buffer0, dstpos,
level, usescale, blk_info);
if (level == 0 || !usescale)
warp_optical_flow_back(ref1, ref0, mv_start, mvstr, buffer1, 1 - dstpos,
level, usescale, blk_info);
else
warp_optical_flow_back_bilinear(ref1, ref0, mv_start, mvstr, buffer1,
1 - dstpos, level, usescale, blk_info);
clock_t ends = clock();
timesub += (double)(ends - starts) / CLOCKS_PER_SEC;
clock_t startd = clock();
// Calculate partial derivatives
opfl_get_derivatives(Ex, Ey, Et, buffer0, buffer1, buf_init0, buf_init1,
dstpos, level, usescale, blk_info);
#if OPFL_EXP_DERV
// if (numWarpedRounds == 0)
opfl_get_derivatives_nowarp(Ex, Ey, buf_struct, mf_last, dstpos, level,
blk_info);
#endif
clock_t endd = clock();
timeder += (double)(endd - startd) / CLOCKS_PER_SEC;
clock_t starti = clock(), endi;
// construct and solve A*mv_vec = b
SPARSE_MTX A, L, LtL;
// check if pointing out of bound
double boundFactor = 0.01;
double pixExpFactor = 0.001;
double mvExpFactor = 0.001;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
pixel_weight[i * width + j] = 1;
mv_weight[i * width + j] = 1;
}
}
double i0, i1, j0, j1;
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
i0 = (double)(i + sh) -
(buf_struct->dst_pos) * mv_start[i * mvstr + j].row;
i1 = (double)(i + sh) +
(1 - buf_struct->dst_pos) * mv_start[i * mvstr + j].row;
j0 = (double)(j + sw) -
(buf_struct->dst_pos) * mv_start[i * mvstr + j].col;
j1 = (double)(j + sw) +
(1 - buf_struct->dst_pos) * mv_start[i * mvstr + j].col;
int is_out = (i0 < 0 || i0 > fheight - 1 || i1 < 0 || i1 > fheight - 1 ||
j0 < 0 || j0 > fwidth - 1 || j1 < 0 || j1 > fwidth - 1);
if (is_out && OPFL_BOUND_HANDLING) {
pixel_weight[i * width + j] = boundFactor * boundFactor;
mv_weight[i * width + j] = boundFactor;
} else if (level >= 0 && numWarpedRounds >= 0 && OPFL_CONF_WTS) {
double avg = 0;
double minDiff = 1000;
double curWt;
double count = 0;
for (int ii = -2; ii <= 2; ii++) {
for (int jj = -2; jj <= 2; jj++) {
if (i + ii < 0 || i + ii >= height || j + jj < 0 || j + jj >= width)
continue;
count += 1;
curWt =
Et[(i + ii) * width + j + jj] * Et[(i + ii) * width + j + jj];
avg = avg * (count - 1) / count + curWt / count;
if (curWt > minDiff) minDiff = curWt;
}
}
mv_weight[i * width + j] = exp(-mvExpFactor * avg);
mv_weight[i * width + j] = (mv_weight[i * width + j] > boundFactor)
? mv_weight[i * width + j]
: boundFactor;
pixel_weight[i * width + j] =
mv_weight[i * width + j] * mv_weight[i * width + j];
}
}
}
int c;
// Build A and b in linear equation Ax = b
int offset = height * width;
int checki, checkj;
double blkbound_wt = 0.0;
// the location idx for neighboring pixels, k < 4 are the 4 direct neighbors
int check_locs_y[12] = { 0, 0, -1, 1, -1, -1, 1, 1, 0, 0, -2, 2 };
int check_locs_x[12] = { -1, 1, 0, 0, -1, 1, -1, 1, -2, 2, 0, 0 };
int useModifyLap = numWarpedRounds >= 0;
int useThres1 = 1;
double wtThres = 0.2;
// calculate center weight for laplacian (spatial term), and also the b term
// part for it
for (j = 0; j < width; j++) {
for (i = 0; i < height; i++) {
int curidx = j * height + i;
b_temp[curidx] = 0;
b_temp[curidx + offset] = 0;
mv_center_wt[i * width + j] = 0;
for (int k = 0; k < 4; k++) {
checki = i + check_locs_y[k];
checkj = j + check_locs_x[k];
if (checki < 0 || checki >= height || checkj < 0 || checkj >= width) {
// if at block bound, but not frame bound, use init mv with
// blkbound_wt
if (checki + sh >= 0 && checki + sh < fheight && checkj + sw >= 0 &&
checkj + sw < fwidth) {
mv_center_wt[i * width + j] += blkbound_wt;
b_temp[curidx] +=
blkbound_wt * initmv[(checki)*imvstr + checkj].col;
b_temp[curidx + offset] +=
blkbound_wt * initmv[(checki)*imvstr + checkj].row;
}
} else {
double cur_wt = mv_weight[checki * width + checkj];
if (cur_wt > mv_weight[i * width + j] && useModifyLap) cur_wt = 1;
if (cur_wt > wtThres && useThres1) cur_wt = 1;
mv_center_wt[i * width + j] += cur_wt;
}
}
#if OPFL_INIT_WT
mv_center_wt[i * width + j] += init_wts[(i)*imvstr + j];
b_temp[curidx] += init_wts[(i)*imvstr + j] * initmv[(i)*imvstr + j].col;
b_temp[curidx + offset] +=
init_wts[(i)*imvstr + j] * initmv[(i)*imvstr + j].row;
#endif
mv_center_wt[i * width + j] *= -1;
}
}
// get Laplacian filter matrix L
c = 0;
for (j = 0; j < width; j++) {
for (i = 0; i < height; i++) {
int curidx = j * height + i;
// center
row_pos[c] = curidx;
col_pos[c] = curidx;
values[c] = mv_center_wt[i * width + j];
c++;
row_pos[c] = curidx + offset;
col_pos[c] = curidx + offset;
values[c] = mv_center_wt[i * width + j];
c++;
// 4 direct neighbors
for (int k = 0; k < 4; k++) {
double cur_wt;
checki = i + check_locs_y[k];
checkj = j + check_locs_x[k];
if (checki < 0 || checki >= height || checkj < 0 || checkj >= width)
continue;
cur_wt = mv_weight[checki * width + checkj];
if (cur_wt > mv_weight[i * width + j] && useModifyLap) cur_wt = 1;
if (cur_wt > wtThres && useThres1) cur_wt = 1;
int nextidx = checkj * height + checki;
row_pos[c] = curidx;
col_pos[c] = nextidx;
values[c] = cur_wt;
c++;
row_pos[c] = curidx + offset;
col_pos[c] = nextidx + offset;
values[c] = cur_wt;
c++;
}
}
}
init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height,
2 * width * height, &L);
// get part of b from init_mv and boundary
mtx_vect_multi_left(&L, b_temp, b, 2 * width * height);
free_sparse_mtx_elems(&L);
// build LtL and vectorize mv_last
c = 0;
// use the mv_vec buffer temporarily
double *mv_last_vec = mv_vec;
for (j = 0; j < width; j++) {
for (i = 0; i < height; i++) {
int curidx = j * height + i;
double center_wt =
mv_center_wt[i * width + j] * mv_center_wt[i * width + j];
mv_last_vec[curidx] = mv_start[i * mvstr + j].col;
mv_last_vec[curidx + offset] = mv_start[i * mvstr + j].row;
// 4 direct neighbors
for (int k = 0; k < 4; k++) {
double cur_wt, reverse_wt;
checki = i + check_locs_y[k];
checkj = j + check_locs_x[k];
if (checki < 0 || checki >= height || checkj < 0 || checkj >= width)
continue;
cur_wt = mv_weight[checki * width + checkj];
if (cur_wt > mv_weight[i * width + j] && useModifyLap) cur_wt = 1;
if (cur_wt > wtThres && useThres1) cur_wt = 1;
reverse_wt = mv_weight[i * width + j];
if (reverse_wt > mv_weight[checki * width + checkj] && useModifyLap)
reverse_wt = 1;
if (reverse_wt > wtThres && useThres1) reverse_wt = 1;
int nextidx = checkj * height + checki;
row_pos[c] = curidx;
col_pos[c] = nextidx;
values[c] = cur_wt * mv_center_wt[i * width + j] +
reverse_wt * mv_center_wt[checki * width + checkj];
c++;
row_pos[c] = curidx + offset;
col_pos[c] = nextidx + offset;
values[c] = cur_wt * mv_center_wt[i * width + j] +
reverse_wt * mv_center_wt[checki * width + checkj];
c++;
center_wt += reverse_wt * reverse_wt;
}
// center
row_pos[c] = curidx;
col_pos[c] = curidx;
values[c] = center_wt;
c++;
row_pos[c] = curidx + offset;
col_pos[c] = curidx + offset;
values[c] = center_wt;
c++;
// diag corners
for (int k = 4; k < 8; k++) {
checki = i + check_locs_y[k];
checkj = j + check_locs_x[k];
if (checki < 0 || checki >= height || checkj < 0 || checkj >= width)
continue;
double cur_wt1 = mv_weight[checki * width + checkj],
cur_wt2 = mv_weight[checki * width + checkj],
reverse_wt1 = mv_weight[i * width + j],
reverse_wt2 = mv_weight[i * width + j];
if (cur_wt1 > mv_weight[i * width + checkj] && useModifyLap)
cur_wt1 = 1;
if (cur_wt1 > wtThres && useThres1) cur_wt1 = 1;
if (reverse_wt1 > mv_weight[i * width + checkj] && useModifyLap)
reverse_wt1 = 1;
if (reverse_wt1 > wtThres && useThres1) reverse_wt1 = 1;
if (cur_wt2 > mv_weight[checki * width + j] && useModifyLap)
cur_wt2 = 1;
if (cur_wt2 > wtThres && useThres1) cur_wt2 = 1;
if (reverse_wt2 > mv_weight[checki * width + j] && useModifyLap)
reverse_wt2 = 1;
if (reverse_wt2 > wtThres && useThres1) reverse_wt2 = 1;
int nextidx = checkj * height + checki;
row_pos[c] = curidx;
col_pos[c] = nextidx;
values[c] = cur_wt1 * reverse_wt1 + cur_wt2 * reverse_wt2;
c++;
row_pos[c] = curidx + offset;
col_pos[c] = nextidx + offset;
values[c] = cur_wt1 * reverse_wt1 + cur_wt2 * reverse_wt2;
c++;
}
// 4 direction with dist of 2
for (int k = 8; k < 12; k++) {
checki = i + check_locs_y[k];
checkj = j + check_locs_x[k];
int midi = i + check_locs_y[k] / 2;
int midj = j + check_locs_x[k] / 2;
if (checki < 0 || checki >= height || checkj < 0 || checkj >= width)
continue;
double cur_wt = mv_weight[checki * width + checkj],
reverse_wt = mv_weight[i * width + j];
if (cur_wt > mv_weight[midi * width + midj] && useModifyLap) cur_wt = 1;
if (cur_wt > wtThres && useThres1) cur_wt = 1;
if (reverse_wt > mv_weight[midi * width + midj] && useModifyLap)
reverse_wt = 1;
if (reverse_wt > wtThres && useThres1) reverse_wt = 1;
int nextidx = checkj * height + checki;
row_pos[c] = curidx;
col_pos[c] = nextidx;
values[c] = cur_wt * reverse_wt;
c++;
row_pos[c] = curidx + offset;
col_pos[c] = nextidx + offset;
values[c] = cur_wt * reverse_wt;
c++;
}
}
}
init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height,
2 * width * height, &LtL);
// get part of b from last iteration
mtx_vect_multi_right(&LtL, mv_last_vec, b_temp, 2 * width * height);
free_sparse_mtx_elems(&LtL);
// build the final A on top of LtL
// modify diagonal elements
for (int k = 0; k < c; k++) {
values[k] *= a_squared;
if (row_pos[k] != col_pos[k]) continue;
int curidx = row_pos[k];
if (curidx < offset) {
i = curidx % height;
j = curidx / height;
values[k] +=
pixel_weight[i * width + j] * Ex[i * width + j] * Ex[i * width + j] +
0.00001;
} else {
curidx -= offset;
i = curidx % height;
j = curidx / height;
values[k] +=
pixel_weight[i * width + j] * Ey[i * width + j] * Ey[i * width + j] +
0.00001;
}
}
// add cross terms to A and modify b with ExEt / EyEt
for (j = 0; j < width; j++) {
for (i = 0; i < height; i++) {
int curidx = j * height + i;
// modify b
b[curidx] = -a_squared * (b[curidx] + b_temp[curidx]);
b[curidx] -=
pixel_weight[i * width + j] * Ex[i * width + j] * Et[i * width + j];
b[curidx + offset] =
-a_squared * (b[curidx + offset] + b_temp[curidx + offset]);
b[curidx + offset] -=
pixel_weight[i * width + j] * Ey[i * width + j] * Et[i * width + j];
// add cross terms to A
row_pos[c] = curidx;
col_pos[c] = curidx + offset;
values[c] =
pixel_weight[i * width + j] * Ex[i * width + j] * Ey[i * width + j];
c++;
row_pos[c] = curidx + offset;
col_pos[c] = curidx;
values[c] =
pixel_weight[i * width + j] * Ex[i * width + j] * Ey[i * width + j];
c++;
}
}
// construct sparse mtx A
init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height,
2 * width * height, &A);
endi = clock();
timeinit += (double)(endi - starti) / CLOCKS_PER_SEC;
starts = clock();
// solve Ax = b
conjugate_gradient_sparse(&A, b, 2 * width * height, mv_vec);
ends = clock();
timesolve += (double)(ends - starts) / CLOCKS_PER_SEC;
// reshape motion field to 2D
cost = 0;
for (j = 0; j < width; j++) {
for (i = 0; i < height; i++) {
mv_start_new[i * mvstr + j].col = mv_vec[j * height + i];
cost += mv_vec[j * height + i] * mv_vec[j * height + i];
}
}
for (j = 0; j < width; j++) {
for (i = 0; i < height; i++) {
mv_start_new[i * mvstr + j].row = mv_vec[width * height + j * height + i];
cost += mv_vec[width * height + j * height + i] *
mv_vec[width * height + j * height + i];
}
}
// free buffers
aom_free(row_pos);
aom_free(col_pos);
aom_free(values);
free_sparse_mtx_elems(&A);
aom_free(mv_vec);
aom_free(b);
aom_free(b_temp);
aom_free(pixel_weight);
aom_free(mv_weight);
aom_free(mv_center_wt);
cost = sqrt(cost); // 2 norm
return cost;
}
/*
* Update motion field at each iteration by a fast iterative method.
*
* Input:
* buf_struct: containing buffers of the reference frames
* mf_last: initial motion field
* level: current scale level, 0 = original, 1 = 0.5, 2 = 0.25, etc.
* dstpos: dst frame position
* as_scale: scale the laplacian multiplier to perform "annealing"
* usescale: 0->do not scale the original, 1->do scaling of images
* blk_info: information on the current block
*
* Output:
* mf_new: pointer to calculated motion field
*/
double iterate_update_mv_fast(OPFL_BUFFER_STRUCT *buf_struct, DB_MV *mf_last,
DB_MV *mf_new, int level, double dstpos,
double as_scale, int usescale,
OPFL_BLK_INFO blk_info) {
double *Ex, *Ey, *Et;
double a_squared = OF_A_SQUARED * as_scale;
double cost = 0;
YV12_BUFFER_CONFIG *ref0, *ref1, *buf_init0, *buf_init1;
int l = level;
if (!usescale) l = 0;
ref0 = buf_struct->ref0_buf[l];
ref1 = buf_struct->ref1_buf[l];
buf_init0 = buf_struct->ref0_warped_buf[l];
buf_init1 = buf_struct->ref1_warped_buf[l];
int y_width = blk_info.blk_width, y_height = blk_info.blk_height;
int starth = blk_info.starth, startw = blk_info.startw;
int sh = starth >> level, sw = startw >> level;
int width = y_width, height = y_height;
width = width >> level;
height = height >> level;
if (usescale) {
y_width = y_width >> level;
y_height = y_height >> level;
starth = starth >> level;
startw = startw >> level;
}
int mvstr = width + 2 * AVG_MF_BORDER;
DB_MV *mv_start = mf_last + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
DB_MV *mv_start_new = mf_new + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
int i, j;
// allocate buffers
Ex = buf_struct->Ex;
Ey = buf_struct->Ey;
Et = buf_struct->Et;
YV12_BUFFER_CONFIG *buffer0 = buf_struct->buffer0[usescale ? level : 0];
YV12_BUFFER_CONFIG *buffer1 = buf_struct->buffer1[usescale ? level : 0];
int imvstr = buf_struct->ref0_buf[0]->y_width;
imvstr = (imvstr >> level) + 2 * AVG_MF_BORDER;
DB_MV *initmv = buf_struct->init_mv_buf[level];
initmv = initmv + (sh + AVG_MF_BORDER) * imvstr + sw + AVG_MF_BORDER;
int fheight = buf_struct->ref0_buf[0]->y_height;
int fwidth = buf_struct->ref0_buf[0]->y_width;
fheight = fheight >> level;
fwidth = fwidth >> level;
clock_t starts = clock();
if (level == 0 || !usescale)
warp_optical_flow_fwd(ref0, ref1, mv_start, mvstr, buffer0, dstpos, level,
usescale, blk_info);
else
warp_optical_flow_fwd_bilinear(ref0, ref1, mv_start, mvstr, buffer0, dstpos,
level, usescale, blk_info);
if (level == 0 || !usescale)
warp_optical_flow_back(ref1, ref0, mv_start, mvstr, buffer1, 1 - dstpos,
level, usescale, blk_info);
else
warp_optical_flow_back_bilinear(ref1, ref0, mv_start, mvstr, buffer1,
1 - dstpos, level, usescale, blk_info);
clock_t ends = clock();
timesub += (double)(ends - starts) / CLOCKS_PER_SEC;
clock_t startd = clock();
// Calculate partial derivatives
opfl_get_derivatives(Ex, Ey, Et, buffer0, buffer1, buf_init0, buf_init1,
dstpos, level, usescale, blk_info);
clock_t endd = clock();
timeder += (double)(endd - startd) / CLOCKS_PER_SEC;
// iterative solver
starts = clock();
DB_MV *tempmv;
DB_MV *bufmv_b =
aom_calloc((height + 2 * AVG_MF_BORDER) * (width + 2 * AVG_MF_BORDER),
sizeof(DB_MV));
DB_MV *bufmv = bufmv_b + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
DB_MV *lp_last = aom_calloc(height * mvstr, sizeof(DB_MV));
double *denorm = aom_calloc(height * width, sizeof(double));
DB_MV avg;
// get the laplacian of initial motion field
pad_motion_field_border(mv_start, width, height, mvstr);
int i0, i1, j0, j1;
for (i = 0; i < height; i++) {
i0 = i - 1;
i1 = i + 1;
for (j = 0; j < width; j++) {
j0 = j - 1;
j1 = j + 1;
bufmv[i * mvstr + j].row = 0;
bufmv[i * mvstr + j].col = 0;
lp_last[i * mvstr + j].row = 0.25 * mv_start[i0 * mvstr + j].row +
0.25 * mv_start[i1 * mvstr + j].row +
0.25 * mv_start[i * mvstr + j0].row +
0.25 * mv_start[i * mvstr + j1].row;
lp_last[i * mvstr + j].row -= mv_start[i * mvstr + j].row;
lp_last[i * mvstr + j].col = 0.25 * mv_start[i0 * mvstr + j].col +
0.25 * mv_start[i1 * mvstr + j].col +
0.25 * mv_start[i * mvstr + j0].col +
0.25 * mv_start[i * mvstr + j1].col;
lp_last[i * mvstr + j].col -= mv_start[i * mvstr + j].col;
denorm[i * width + j] = 16 * a_squared +
Ex[i * width + j] * Ex[i * width + j] +
Ey[i * width + j] * Ey[i * width + j];
}
}
// calculate the motion field
for (int k = 0; k < MAX_ITER_FAST_OPFL; k++) {
pad_motion_field_border(bufmv, width, height, mvstr);
for (i = 0; i < height; i++) {
i0 = i - 1;
i1 = i + 1;
for (j = 0; j < width; j++) {
j0 = j - 1;
j1 = j + 1;
avg.row = 0.25 * bufmv[i0 * mvstr + j].row +
0.25 * bufmv[i1 * mvstr + j].row +
0.25 * bufmv[i * mvstr + j0].row +
0.25 * bufmv[i * mvstr + j1].row;
avg.row += lp_last[i * mvstr + j].row;
avg.col = 0.25 * bufmv[i0 * mvstr + j].col +
0.25 * bufmv[i1 * mvstr + j].col +
0.25 * bufmv[i * mvstr + j0].col +
0.25 * bufmv[i * mvstr + j1].col;
avg.col += lp_last[i * mvstr + j].col;
mv_start_new[i * mvstr + j].col =
avg.col - Ex[i * width + j] *
(Ex[i * width + j] * avg.col +
Ey[i * width + j] * avg.row + Et[i * width + j]) /
denorm[i * width + j];
mv_start_new[i * mvstr + j].row =
avg.row - Ey[i * width + j] *
(Ex[i * width + j] * avg.col +
Ey[i * width + j] * avg.row + Et[i * width + j]) /
denorm[i * width + j];
}
}
if (k < MAX_ITER_FAST_OPFL - 1) {
tempmv = bufmv;
bufmv = mv_start_new;
mv_start_new = tempmv;
}
}
aom_free(bufmv_b);
aom_free(lp_last);
aom_free(denorm);
ends = clock();
timesolve += (double)(ends - starts) / CLOCKS_PER_SEC;
// reshape motion field to 2D
cost = 0;
for (j = 0; j < width; j++) {
for (i = 0; i < height; i++) {
cost += mv_start_new[i * mvstr + j].row * mv_start_new[i * mvstr + j].row;
}
}
for (j = 0; j < width; j++) {
for (i = 0; i < height; i++) {
cost += mv_start_new[i * mvstr + j].col * mv_start_new[i * mvstr + j].col;
}
}
cost = sqrt(cost); // 2 norm
return cost;
}
void opfl_get_derivatives(double *Ex, double *Ey, double *Et,
YV12_BUFFER_CONFIG *buffer0,
YV12_BUFFER_CONFIG *buffer1,
YV12_BUFFER_CONFIG *buffer_init0,
YV12_BUFFER_CONFIG *buffer_init1, double dstpos,
int level, int usescale, OPFL_BLK_INFO blk_info) {
int lh = DERIVATIVE_FILTER_LENGTH;
int hleft = (lh - 1) / 2;
double filter[DERIVATIVE_FILTER_LENGTH] = { -1.0 / 60, 9.0 / 60, -45.0 / 60,
0, 45.0 / 60, -9.0 / 60,
1.0 / 60 };
int idx, i, j;
int width = blk_info.blk_width;
int height = blk_info.blk_height;
int starth = blk_info.starth;
int startw = blk_info.startw;
int stride = buffer0->y_stride;
int istride = buffer_init0->y_stride;
if (usescale) {
width = width >> level;
height = height >> level;
starth = starth >> level;
startw = startw >> level;
}
uint8_t *buf0i = buffer_init0->y_buffer + starth * istride + startw;
uint8_t *buf1i = buffer_init1->y_buffer + starth * istride + startw;
double *tempEx = NULL, *tempEy = NULL, *tempEt = NULL;
double *oriEx = Ex, *oriEy = Ey, *oriEt = Et;
if (!usescale && level != 0) {
tempEx = aom_calloc(width * height, sizeof(double));
tempEy = aom_calloc(width * height, sizeof(double));
tempEt = aom_calloc(width * height, sizeof(double));
Ex = tempEx;
Ey = tempEy;
Et = tempEt;
}
// horizontal derivative filter
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
Ex[i * width + j] = 0;
for (int k = 0; k < lh; k++) {
idx = j + (k - hleft);
if ((idx < 0 && blk_info.leftbound != 1) ||
(idx > width - 1 && blk_info.rightbound != 1)) {
Ex[i * width + j] +=
filter[k] * (double)(buf0i[i * istride + idx]) * (1 - dstpos) +
filter[k] * (double)(buf1i[i * istride + idx]) * dstpos;
} else {
if (idx < 0)
idx = 0;
else if (idx > width - 1)
idx = width - 1;
Ex[i * width + j] +=
filter[k] * (double)(buffer0->y_buffer[i * stride + idx]) *
(1 - dstpos) +
filter[k] * (double)(buffer1->y_buffer[i * stride + idx]) *
dstpos;
}
}
}
}
// vertical derivative filter
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
Ey[i * width + j] = 0;
for (int k = 0; k < lh; k++) {
idx = i + (k - hleft);
if ((idx < 0 && blk_info.upbound != 1) ||
(idx > height - 1 && blk_info.lowerbound != 1)) {
Ey[i * width + j] +=
filter[k] * (double)(buf0i[idx * istride + j]) * (1 - dstpos) +
filter[k] * (double)(buf1i[idx * istride + j]) * dstpos;
} else {
if (idx < 0)
idx = 0;
else if (idx > height - 1)
idx = height - 1;
Ey[i * width + j] +=
filter[k] * (double)(buffer0->y_buffer[idx * stride + j]) *
(1 - dstpos) +
filter[k] * (double)(buffer1->y_buffer[idx * stride + j]) *
dstpos;
}
}
}
}
// time derivative
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
Et[i * width + j] = (double)(buffer1->y_buffer[i * stride + j]) -
(double)(buffer0->y_buffer[i * stride + j]);
}
}
// rescale the derivatives
if (!usescale && level != 0) {
Ex = oriEx;
Ey = oriEy;
Et = oriEt;
int s_width = width >> level, s_height = height >> level,
blk_w = 1 << level;
for (i = 0; i < s_height; i++) {
for (j = 0; j < s_width; j++) {
Ex[i * s_width + j] = 0;
Ey[i * s_width + j] = 0;
Et[i * s_width + j] = 0;
for (int h = 0; h < blk_w; h++) {
for (int w = 0; w < blk_w; w++) {
Ex[i * s_width + j] +=
tempEx[(i * blk_w + h) * width + j * blk_w + w];
Ey[i * s_width + j] +=
tempEy[(i * blk_w + h) * width + j * blk_w + w];
Et[i * s_width + j] +=
tempEt[(i * blk_w + h) * width + j * blk_w + w];
}
}
Ex[i * s_width + j] /= blk_w;
Ey[i * s_width + j] /= blk_w;
Et[i * s_width + j] /= (blk_w * blk_w);
}
}
if (tempEx) aom_free(tempEx);
if (tempEy) aom_free(tempEy);
if (tempEt) aom_free(tempEt);
}
}
void opfl_get_derivatives_nowarp(double *Ex, double *Ey,
OPFL_BUFFER_STRUCT *buf_struct, DB_MV *mf,
double dstpos, int level,
OPFL_BLK_INFO blk_info) {
int i, j, h, w;
int width = blk_info.blk_width;
int height = blk_info.blk_height;
int starth = blk_info.starth;
int startw = blk_info.startw;
width = width >> level;
height = height >> level;
starth = starth >> level;
startw = startw >> level;
int f_width = buf_struct->ref0_buf[0]->y_width;
int f_height = buf_struct->ref0_buf[0]->y_height;
f_width = f_width >> level;
f_height = f_height >> level;
int mf_stride = f_width + 2 * AVG_MF_BORDER;
DB_MV *mf_start = mf + AVG_MF_BORDER * mf_stride + AVG_MF_BORDER;
mf_start = mf_start + starth * mf_stride + startw;
double *ori_Ex0_start =
buf_struct->ori_Ex0_buf[level] + starth * f_width + startw;
double *ori_Ex1_start =
buf_struct->ori_Ex1_buf[level] + starth * f_width + startw;
double *ori_Ey0_start =
buf_struct->ori_Ey0_buf[level] + starth * f_width + startw;
double *ori_Ey1_start =
buf_struct->ori_Ey1_buf[level] + starth * f_width + startw;
int mv_r, mv_c, y_loc, x_loc;
double di, dj;
// calculate Ex, Ey
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
// printf("%.2f ", Ex[i*width+j]);
Ex[i * width + j] = 0;
Ey[i * width + j] = 0;
// from left ref
mv_r = opfl_floor_double_2_int(-dstpos * mf_start[i * mf_stride + j].row);
mv_c = opfl_floor_double_2_int(-dstpos * mf_start[i * mf_stride + j].col);
di = -dstpos * mf_start[i * mf_stride + j].row - mv_r;
dj = -dstpos * mf_start[i * mf_stride + j].col - mv_c;
int yidx = opfl_round_double_2_int(di * 8);
int xidx = opfl_round_double_2_int(dj * 8);
yidx *= 2;
xidx *= 2;
if (yidx == 16) {
yidx = 0;
mv_c += 1;
}
if (xidx == 16) {
xidx = 0;
mv_r += 1;
}
assert(xidx <= 14 && xidx >= 0);
assert(yidx <= 14 && yidx >= 0);
// Ex0
double x;
double y[8];
for (h = -3; h < 5; h++) {
y[h + 3] = 0;
for (w = -3; w < 5; w++) {
y_loc = i + mv_r + h;
x_loc = j + mv_c + w;
if (y_loc + starth < 0) {
y_loc = -starth;
} else if (y_loc + starth >= f_height) {
y_loc = f_height - starth - 1;
}
if (x_loc + startw < 0) {
x_loc = -startw;
} else if (x_loc + startw >= f_width) {
x_loc = f_width - startw - 1;
}
y[h + 3] += ori_Ex0_start[y_loc * f_width + x_loc] *
optical_flow_warp_filter[xidx][w + 3];
}
y[h + 3] /= (double)(1 << 7);
}
x = 0;
for (h = 0; h < 8; h++) {
x += y[h] * optical_flow_warp_filter[yidx][h];
}
x /= (double)(1 << 7);
Ex[i * width + j] += (1 - dstpos) * x;
// Ey0
for (h = -3; h < 5; h++) {
y[h + 3] = 0;
for (w = -3; w < 5; w++) {
y_loc = i + mv_r + h;
x_loc = j + mv_c + w;
if (y_loc + starth < 0) {
y_loc = -starth;
} else if (y_loc + starth >= f_height) {
y_loc = f_height - starth - 1;
}
if (x_loc + startw < 0) {
x_loc = -startw;
} else if (x_loc + startw >= f_width) {
x_loc = f_width - startw - 1;
}
y[h + 3] += ori_Ey0_start[y_loc * f_width + x_loc] *
optical_flow_warp_filter[xidx][w + 3];
}
y[h + 3] /= (double)(1 << 7);
}
x = 0;
for (h = 0; h < 8; h++) {
x += y[h] * optical_flow_warp_filter[yidx][h];
}
x /= (double)(1 << 7);
Ey[i * width + j] += (1 - dstpos) * x;
// from right ref
mv_r = opfl_floor_double_2_int((1 - dstpos) *
mf_start[i * mf_stride + j].row);
mv_c = opfl_floor_double_2_int((1 - dstpos) *
mf_start[i * mf_stride + j].col);
di = (1 - dstpos) * mf_start[i * mf_stride + j].row - mv_r;
dj = (1 - dstpos) * mf_start[i * mf_stride + j].col - mv_c;
yidx = opfl_round_double_2_int(di * 8);
xidx = opfl_round_double_2_int(dj * 8);
yidx *= 2;
xidx *= 2;
if (yidx == 16) {
yidx = 0;
mv_c += 1;
}
if (xidx == 16) {
xidx = 0;
mv_r += 1;
}
assert(xidx <= 14 && xidx >= 0);
assert(yidx <= 14 && yidx >= 0);
// Ex1
for (h = -3; h < 5; h++) {
y[h + 3] = 0;
for (w = -3; w < 5; w++) {
y_loc = i + mv_r + h;
x_loc = j + mv_c + w;
if (y_loc + starth < 0) {
y_loc = -starth;
} else if (y_loc + starth >= f_height) {
y_loc = f_height - starth - 1;
}
if (x_loc + startw < 0) {
x_loc = -startw;
} else if (x_loc + startw >= f_width) {
x_loc = f_width - startw - 1;
}
y[h + 3] += ori_Ex1_start[y_loc * f_width + x_loc] *
optical_flow_warp_filter[xidx][w + 3];
}
y[h + 3] /= (double)(1 << 7);
}
x = 0;
for (h = 0; h < 8; h++) {
x += y[h] * optical_flow_warp_filter[yidx][h];
}
x /= (double)(1 << 7);
Ex[i * width + j] += dstpos * x;
// Ey1
for (h = -3; h < 5; h++) {
y[h + 3] = 0;
for (w = -3; w < 5; w++) {
y_loc = i + mv_r + h;
x_loc = j + mv_c + w;
if (y_loc + starth < 0) {
y_loc = -starth;
} else if (y_loc + starth >= f_height) {
y_loc = f_height - starth - 1;
}
if (x_loc + startw < 0) {
x_loc = -startw;
} else if (x_loc + startw >= f_width) {
x_loc = f_width - startw - 1;
}
y[h + 3] += ori_Ey1_start[y_loc * f_width + x_loc] *
optical_flow_warp_filter[xidx][w + 3];
}
y[h + 3] /= (double)(1 << 7);
}
x = 0;
for (h = 0; h < 8; h++) {
x += y[h] * optical_flow_warp_filter[yidx][h];
}
x /= (double)(1 << 7);
Ey[i * width + j] += dstpos * x;
// printf("%.2f \n", Ex[i*width+j]);
}
}
}
/*
* Warp the Y component of src to dst according to the motion field
* Motion field points back from dst to src
*/
void warp_optical_flow_back(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
DB_MV *mf_start, int mvstr, YV12_BUFFER_CONFIG *dst,
double dstpos, int level, int usescale,
OPFL_BLK_INFO blk_info) {
int fwidth = src->y_width, fheight = src->y_height;
int width = blk_info.blk_width;
int height = blk_info.blk_height;
int starth = blk_info.starth;
int startw = blk_info.startw;
int srcstride = src->y_stride;
int dststride = dst->y_stride;
uint8_t *srcy = src->y_buffer;
uint8_t *dsty = dst->y_buffer;
uint8_t *refy = ref->y_buffer;
double ii, jj, di, dj;
int i0, j0;
int blk_w = 1;
if (!usescale) {
blk_w = blk_w << level;
} else {
width = width >> level;
height = height >> level;
starth = starth >> level;
startw = startw >> level;
}
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
ii = i + starth +
mf_start[(i / blk_w) * mvstr + j / blk_w].row * blk_w * dstpos;
jj = j + startw +
mf_start[(i / blk_w) * mvstr + j / blk_w].col * blk_w * dstpos;
i0 = opfl_floor_double_2_int(ii);
di = ii - i0;
j0 = opfl_floor_double_2_int(jj);
dj = jj - j0;
if (i0 < 0 || i0 > fheight - 1 || j0 < 0 || j0 > fwidth - 1) {
dsty[i * dststride + j] = refy[(i + starth) * srcstride + j + startw];
continue;
}
dsty[i * dststride + j] =
get_sub_pel_y(srcy + i0 * srcstride + j0, srcstride, di, dj);
}
}
}
/*
* Warp the Y component of src to dst using bilinear method
* Motion field points back from dst to src
*/
void warp_optical_flow_back_bilinear(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *ref, DB_MV *mf_start,
int mvstr, YV12_BUFFER_CONFIG *dst,
double dstpos, int level, int usescale,
OPFL_BLK_INFO blk_info) {
int fwidth = src->y_width, fheight = src->y_height;
int width = blk_info.blk_width;
int height = blk_info.blk_height;
int starth = blk_info.starth;
int startw = blk_info.startw;
int srcstride = src->y_stride;
int dststride = dst->y_stride;
uint8_t *srcy = src->y_buffer;
uint8_t *dsty = dst->y_buffer;
uint8_t *refy = ref->y_buffer;
double ii, jj, di, dj, temp;
int i0, j0, i1, j1;
int blk_w = 1;
if (!usescale) {
blk_w = blk_w << level;
} else {
width = width >> level;
height = height >> level;
starth = starth >> level;
startw = startw >> level;
}
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
ii = i + starth +
mf_start[(i / blk_w) * mvstr + j / blk_w].row * blk_w * dstpos;
jj = j + startw +
mf_start[(i / blk_w) * mvstr + j / blk_w].col * blk_w * dstpos;
i0 = opfl_floor_double_2_int(ii);
di = 1 - ii + i0;
i1 = i0 + 1;
j0 = opfl_floor_double_2_int(jj);
dj = 1 - jj + j0;
j1 = j0 + 1;
if (i0 < 0 || i0 > fheight - 1 || j0 < 0 || j0 > fwidth - 1) {
dsty[i * dststride + j] = refy[(i + starth) * srcstride + j + startw];
continue;
}
temp = di * dj * (double)srcy[i0 * srcstride + j0] +
di * (1 - dj) * (double)srcy[i0 * srcstride + j1] +
(1 - di) * dj * (double)srcy[i1 * srcstride + j0] +
(1 - di) * (1 - dj) * (double)srcy[i1 * srcstride + j1];
dsty[i * dststride + j] = (uint8_t)(temp + 0.5);
}
}
}
/*
* Warp the Y component of src to dst
* Motion field points forward from src to dst
*/
void warp_optical_flow_fwd(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
DB_MV *mf_start, int mvstr, YV12_BUFFER_CONFIG *dst,
double dstpos, int level, int usescale,
OPFL_BLK_INFO blk_info) {
int fwidth = src->y_width, fheight = src->y_height;
int width = blk_info.blk_width;
int height = blk_info.blk_height;
int starth = blk_info.starth;
int startw = blk_info.startw;
int srcstride = src->y_stride;
int dststride = dst->y_stride;
uint8_t *srcy = src->y_buffer;
uint8_t *dsty = dst->y_buffer;
uint8_t *refy = ref->y_buffer;
double ii, jj, di, dj;
int i0, j0;
int blk_w = 1;
if (!usescale) {
blk_w = blk_w << level;
} else {
width = width >> level;
height = height >> level;
starth = starth >> level;
startw = startw >> level;
}
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
ii = i + starth -
mf_start[(i / blk_w) * mvstr + j / blk_w].row * blk_w * dstpos;
jj = j + startw -
mf_start[(i / blk_w) * mvstr + j / blk_w].col * blk_w * dstpos;
i0 = opfl_floor_double_2_int(ii);
di = ii - i0;
j0 = opfl_floor_double_2_int(jj);
dj = jj - j0;
if (i0 < 0 || i0 > fheight - 1 || j0 < 0 || j0 > fwidth - 1) {
dsty[i * dststride + j] = refy[(i + starth) * srcstride + j + startw];
continue;
}
// printf("%d, %d, %d, %d, %d, %d, %d, %d, %d\n",i, j, dststride,
// width, height, fwidth, fheight, startw, starth);
dsty[i * dststride + j] =
get_sub_pel_y(srcy + i0 * srcstride + j0, srcstride, di, dj);
}
}
}
/*
* Warp the Y component of src to dst using bilinear method
* Motion field points forward from src to dst
*/
void warp_optical_flow_fwd_bilinear(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *ref, DB_MV *mf_start,
int mvstr, YV12_BUFFER_CONFIG *dst,
double dstpos, int level, int usescale,
OPFL_BLK_INFO blk_info) {
int fwidth = src->y_width, fheight = src->y_height;
int width = blk_info.blk_width;
int height = blk_info.blk_height;
int starth = blk_info.starth;
int startw = blk_info.startw;
int srcstride = src->y_stride;
int dststride = dst->y_stride;
uint8_t *srcy = src->y_buffer;
uint8_t *dsty = dst->y_buffer;
uint8_t *refy = ref->y_buffer;
double ii, jj, di, dj, temp;
int i0, j0, i1, j1;
int blk_w = 1;
if (!usescale) {
blk_w = blk_w << level;
} else {
width = width >> level;
height = height >> level;
starth = starth >> level;
startw = startw >> level;
}
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
ii = i + starth -
mf_start[(i / blk_w) * mvstr + j / blk_w].row * blk_w * dstpos;
jj = j + startw -
mf_start[(i / blk_w) * mvstr + j / blk_w].col * blk_w * dstpos;
i0 = opfl_floor_double_2_int(ii);
di = 1 - ii + i0;
i1 = i0 + 1;
j0 = opfl_floor_double_2_int(jj);
dj = 1 - jj + j0;
j1 = j0 + 1;
if (i0 < 0 || i0 > fheight - 1 || j0 < 0 || j0 > fwidth - 1) {
dsty[i * dststride + j] = refy[(i + starth) * srcstride + j + startw];
continue;
}
temp = di * dj * (double)srcy[i0 * srcstride + j0] +
di * (1 - dj) * (double)srcy[i0 * srcstride + j1] +
(1 - di) * dj * (double)srcy[i1 * srcstride + j0] +
(1 - di) * (1 - dj) * (double)srcy[i1 * srcstride + j1];
dsty[i * dststride + j] = (uint8_t)(temp + 0.5);
}
}
}
/*
* Interpolate references according to the motion field
*
* Input:
* src0, src1: the reference frames
* mf_start: the motion field start pointer
* mvstr: motion field stride
* dstpos: position of the interpolated frame
* method: the blend method to be used
* blk_info: block information
*
* Output:
* dst: pointer to the interpolated frame
*/
void warp_optical_flow(YV12_BUFFER_CONFIG *src0, YV12_BUFFER_CONFIG *src1,
DB_MV *mf_start, int mvstr, YV12_BUFFER_CONFIG *dst,
double dstpos, OPFL_BLEND_METHOD method,
OPFL_BLK_INFO blk_info) {
if (method == OPFL_DIFF_SELECT) {
// warp_optical_flow_diff_select(src0, src1, mf_start, mvstr, dst, dstpos);
warp_optical_flow_bilateral(src0, src1, mf_start, mvstr, dst, dstpos,
blk_info);
return;
}
int fwidth = dst->y_width, fheight = dst->y_height;
int width = blk_info.blk_width;
int height = blk_info.blk_height;
int starth = blk_info.starth;
int startw = blk_info.startw;
int starthuv = starth >> 1;
int startwuv = startw >> 1;
int stride = src0->y_stride;
int uvstride = src0->uv_stride;
uint8_t *src0y = src0->y_buffer + starth * stride + startw;
uint8_t *src1y = src1->y_buffer + starth * stride + startw;
uint8_t *dsty = dst->y_buffer + starth * stride + startw;
uint8_t *src0u = src0->u_buffer + starthuv * uvstride + startwuv;
uint8_t *src0v = src0->v_buffer + starthuv * uvstride + startwuv;
uint8_t *src1u = src1->u_buffer + starthuv * uvstride + startwuv;
uint8_t *src1v = src1->v_buffer + starthuv * uvstride + startwuv;
uint8_t *dstu = dst->u_buffer + starthuv * uvstride + startwuv;
uint8_t *dstv = dst->v_buffer + starthuv * uvstride + startwuv;
double ii0, jj0, di0, dj0, di0uv = 0, dj0uv = 0;
double ii1, jj1, di1, dj1, di1uv = 0, dj1uv = 0;
int i0, j0;
int i1, j1;
double dstpel_y, dstpel_u, dstpel_v;
double dstpel_y0 = 0, dstpel_y1 = 0;
double pos;
int do_uv;
int use0, use1, inside0, inside1;
int nearest = ((dstpos <= 0.5) ? 0 : 1);
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
pos = dstpos;
ii0 = i - mf_start[i * mvstr + j].row * dstpos;
jj0 = j - mf_start[i * mvstr + j].col * dstpos;
ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos);
jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos);
i0 = opfl_floor_double_2_int(ii0);
di0 = ii0 - i0;
j0 = opfl_floor_double_2_int(jj0);
dj0 = jj0 - j0;
i1 = opfl_floor_double_2_int(ii1);
di1 = ii1 - i1;
j1 = opfl_floor_double_2_int(jj1);
dj1 = jj1 - j1;
do_uv = i % 2 == 0 && j % 2 == 0; // TODO(bohan) only considering 420 now
if (do_uv) {
di0uv = di0 / 2 + 0.5 * ((i0 % 2 + 2) % 2);
dj0uv = dj0 / 2 + 0.5 * ((j0 % 2 + 2) % 2);
di1uv = di1 / 2 + 0.5 * ((i1 % 2 + 2) % 2);
dj1uv = dj1 / 2 + 0.5 * ((j1 % 2 + 2) % 2);
}
// Check availability of the references.
// If one ref is outside then do not use it.
inside0 = (i0 + starth >= 0 && i0 + starth < fheight - 1 &&
j0 + startw >= 0 && j0 + startw < fwidth - 1);
inside1 = (i1 + starth >= 0 && i1 + starth < fheight - 1 &&
j1 + startw >= 0 && j1 + startw < fwidth - 1);
use0 = inside0 == inside1 || inside0;
use1 = inside0 == inside1 || inside1;
// If use nearest single method, then use only one reference
if (method == OPFL_NEAREST_SINGLE) {
if (use0 && use1) {
use0 = (nearest == 0);
use1 = (nearest == 1);
}
}
// calculate subpel Y refs
if (use0) {
if (i0 + starth < 0) {
i0 = -starth;
di0 = 0;
ii0 = i0;
di0uv = 0;
} else if (i0 + starth > fheight - 1) {
i0 = fheight - 1 - starth;
di0 = 0;
ii0 = i0;
di0uv = 0;
}
if (j0 + startw < 0) {
j0 = -startw;
dj0 = 0;
jj0 = j0;
dj0uv = 0;
} else if (j0 + startw > fwidth - 1) {
j0 = fwidth - 1 - startw;
dj0 = 0;
jj0 = j0;
dj0uv = 0;
}
dstpel_y0 =
(double)get_sub_pel_y(src0y + i0 * stride + j0, stride, di0, dj0);
}
if (use1) {
if (i1 + starth < 0) {
i1 = -starth;
di1 = 0;
ii1 = i1;
di1uv = 0;
} else if (i1 + starth > fheight - 1) {
i1 = fheight - 1 - starth;
di1 = 0;
ii1 = i1;
di1uv = 0;
}
if (j1 + startw < 0) {
j1 = -startw;
dj1 = 0;
jj1 = j1;
dj1uv = 0;
} else if (j1 + startw > fwidth - 1) {
j1 = fwidth - 1 - startw;
dj1 = 0;
jj1 = j1;
dj1uv = 0;
}
dstpel_y1 =
(double)get_sub_pel_y(src1y + i1 * stride + j1, stride, di1, dj1);
}
// If use diff single method, check the pixels when the refs do not agree
if (method == OPFL_DIFF_SINGLE) {
if (use0 && use1) {
if (fabs(dstpel_y0 - dstpel_y1) > OPTICAL_FLOW_DIFF_THRES) {
use0 = (nearest == 0);
use1 = (nearest == 1);
}
}
}
if (use0 && !use1)
pos = 0;
else if (!use0 && use1)
pos = 1;
// blend
dstpel_y = 0;
dstpel_u = 0;
dstpel_v = 0;
if (use0) {
dstpel_y += dstpel_y0 * (1 - pos);
if (do_uv) {
dstpel_u +=
(double)get_sub_pel_uv(
src0u + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2),
uvstride, di0uv, dj0uv) *
(1 - pos);
dstpel_v +=
(double)get_sub_pel_uv(
src0v + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2),
uvstride, di0uv, dj0uv) *
(1 - pos);
}
}
if (use1) {
dstpel_y += dstpel_y1 * pos;
if (do_uv) {
dstpel_u +=
(double)get_sub_pel_uv(
src1u + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2),
uvstride, di1uv, dj1uv) *
pos;
dstpel_v +=
(double)get_sub_pel_uv(
src1v + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2),
uvstride, di1uv, dj1uv) *
pos;
}
}
dsty[i * stride + j] = opfl_round_double_2_int(dstpel_y);
if (do_uv) {
dstu[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_u);
dstv[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_v);
}
}
}
}
/*
* Interpolate references according to the motion field
* when the refs do not agree, use more advanced selection
*
* Input:
* src0, src1: the reference frames
* mf_start: the motion field start pointer
* mvstr: motion field stride
* dstpos: position of the interpolated frame
*
* Output:
* dst: pointer to the interpolated frame
*/
// TODO(bohan): this function needs to be updated to be used with
// block-based methods
void warp_optical_flow_diff_select(YV12_BUFFER_CONFIG *src0,
YV12_BUFFER_CONFIG *src1, DB_MV *mf_start,
int mvstr, YV12_BUFFER_CONFIG *dst,
double dstpos) {
int width = src0->y_width;
int height = src0->y_height;
int stride = src0->y_stride;
int uvstride = src0->uv_stride;
uint8_t *src0y = src0->y_buffer;
uint8_t *src1y = src1->y_buffer;
uint8_t *dsty = dst->y_buffer;
uint8_t *src0u = src0->u_buffer;
uint8_t *src0v = src0->v_buffer;
uint8_t *src1u = src1->u_buffer;
uint8_t *src1v = src1->v_buffer;
uint8_t *dstu = dst->u_buffer;
uint8_t *dstv = dst->v_buffer;
double ii0, jj0, di0, dj0, di0uv, dj0uv;
double ii1, jj1, di1, dj1, di1uv, dj1uv;
int i0, j0;
int i1, j1;
double dstpel_y, dstpel_u, dstpel_v;
double pos;
int do_uv;
double *used0 = aom_calloc(height * width, sizeof(double));
double *used1 = aom_calloc(height * width, sizeof(double));
// refid: -1=unset; 0=ref0; 1=ref1; 2=both
int *refid = aom_calloc(height * width, sizeof(int));
int *refid_mode = aom_calloc(height * width, sizeof(int));
double *dstpel_y0 = aom_calloc(height * width, sizeof(double));
double *dstpel_y1 = aom_calloc(height * width, sizeof(double));
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
used0[i * width + j] = 0;
used1[i * width + j] = 0;
refid[i * width + j] = -1;
}
}
// first check all pixels to see if the refs agree
// also note down which pixels in refs are used for reference
int disagree_cnt = 0;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
pos = dstpos;
ii0 = i - mf_start[i * mvstr + j].row * dstpos;
jj0 = j - mf_start[i * mvstr + j].col * dstpos;
ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos);
jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos);
i0 = opfl_floor_double_2_int(ii0);
di0 = ii0 - i0;
j0 = opfl_floor_double_2_int(jj0);
dj0 = jj0 - j0;
i1 = opfl_floor_double_2_int(ii1);
di1 = ii1 - i1;
j1 = opfl_floor_double_2_int(jj1);
dj1 = jj1 - j1;
int inside0 = (i0 >= 0 && i0 < height - 1 && j0 >= 0 && j0 < width - 1);
int inside1 = (i1 >= 0 && i1 < height - 1 && j1 >= 0 && j1 < width - 1);
int use0 = inside0 == inside1 || inside0;
int use1 = inside0 == inside1 || inside1;
if (inside0 && !inside1)
pos = 0;
else if (inside1 && !inside0)
pos = 1;
dstpel_y0[i * width + j] =
(double)get_sub_pel_y(src0y + i0 * stride + j0, stride, di0, dj0);
dstpel_y1[i * width + j] =
(double)get_sub_pel_y(src1y + i1 * stride + j1, stride, di1, dj1);
// check if the refs are similar
if (inside0 && inside1) {
if (fabs(dstpel_y0[i * width + j] - dstpel_y1[i * width + j]) >
OPTICAL_FLOW_DIFF_THRES) {
disagree_cnt++;
continue;
}
}
if (inside0 || inside1) {
used0[(i0)*width + j0] += use0 * (1 - di0) * (1 - dj0);
used0[(i0 + 1) * width + j0] += use0 * (di0) * (1 - dj0);
used0[(i0)*width + j0 + 1] += use0 * (1 - di0) * (dj0);
used0[(i0 + 1) * width + j0 + 1] += use0 * (di0) * (dj0);
used1[(i1)*width + j1] += use1 * (1 - di1) * (1 - dj1);
used1[(i1 + 1) * width + j1] += use1 * (di1) * (1 - dj1);
used1[(i1)*width + j1 + 1] += use1 * (1 - di1) * (dj1);
used1[(i1 + 1) * width + j1 + 1] += use1 * (di1) * (dj1);
} // ignore when both out of bound
if (use0 && use1)
refid[i * width + j] = 2;
else if (use0 && !use1)
refid[i * width + j] = 0;
else if (!use0 && use1)
refid[i * width + j] = 1;
else
assert(0);
}
}
// Determine if we want to trust the motion field
int dis_ref_id;
if (disagree_cnt > width * height * OPTICAL_FLOW_TRUST_MV_THRES) {
// Do not trust
dis_ref_id = ((dstpos <= 0.5) ? 0 : 1);
} else {
dis_ref_id = 2;
}
// calculate how many time each pixel is referenced
double totalused0, totalused1;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (dis_ref_id != 2) {
// since we do not trust the motion field, no need to select
refid[i * width + j] = dis_ref_id;
continue;
} else if (refid[i * width + j] >= 0)
continue;
totalused0 = 0;
totalused1 = 0;
pos = dstpos;
ii0 = i - mf_start[i * mvstr + j].row * dstpos;
jj0 = j - mf_start[i * mvstr + j].col * dstpos;
ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos);
jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos);
i0 = opfl_floor_double_2_int(ii0);
di0 = ii0 - i0;
j0 = opfl_floor_double_2_int(jj0);
dj0 = jj0 - j0;
i1 = opfl_floor_double_2_int(ii1);
di1 = ii1 - i1;
j1 = opfl_floor_double_2_int(jj1);
dj1 = jj1 - j1;
totalused0 += used0[(i0)*width + j0] * (1 - di0) * (1 - dj0);
totalused0 += used0[(i0 + 1) * width + j0] * (di0) * (1 - dj0);
totalused0 += used0[(i0)*width + j0 + 1] * (1 - di0) * (dj0);
totalused0 += used0[(i0 + 1) * width + j0 + 1] * (di0) * (dj0);
totalused1 += used1[(i1)*width + j1] * (1 - di1) * (1 - dj1);
totalused1 += used1[(i1 + 1) * width + j1] * (di1) * (1 - dj1);
totalused1 += used1[(i1)*width + j1 + 1] * (1 - di1) * (dj1);
totalused1 += used1[(i1 + 1) * width + j1 + 1] * (di1) * (dj1);
// if one ref pixel has been referenced by other pixels more than the
// other ref by a threshold, then decide there is occlusion
if (totalused0 < (totalused0 + totalused1) * OPTICAL_FLOW_REF_THRES) {
refid[i * width + j] = 0;
} else if (totalused1 <
(totalused0 + totalused1) * OPTICAL_FLOW_REF_THRES) {
refid[i * width + j] = 1;
} else {
refid[i * width + j] = dis_ref_id;
}
}
}
// All refs for each pixel have been decided now.
// Do mode filter to get rid of outliers
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (i == 0 || i == height - 1 || j == 0 || j == width - 1) {
refid_mode[i * width + j] = refid[i * width + j];
continue;
}
refid_mode[i * width + j] =
ref_mode_filter_3x3(refid + i * width + j, width, dstpos);
}
}
// blend according to the refs selected
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
int use0 = -1, use1 = -1;
pos = dstpos;
if (refid_mode[i * width + j] == 0) {
use0 = 1;
use1 = 0;
pos = 0;
} else if (refid_mode[i * width + j] == 1) {
use0 = 0;
use1 = 1;
pos = 1;
} else if (refid_mode[i * width + j] == 2) {
use0 = 1;
use1 = 1;
} else {
assert(0);
}
ii0 = i - mf_start[i * mvstr + j].row * dstpos;
jj0 = j - mf_start[i * mvstr + j].col * dstpos;
ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos);
jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos);
i0 = opfl_floor_double_2_int(ii0);
di0 = ii0 - i0;
j0 = opfl_floor_double_2_int(jj0);
dj0 = jj0 - j0;
i1 = opfl_floor_double_2_int(ii1);
di1 = ii1 - i1;
j1 = opfl_floor_double_2_int(jj1);
dj1 = jj1 - j1;
do_uv =
(i % 2 == 0) && (j % 2 == 0); // TODO(bohan) only considering 420 now
if (do_uv) {
di0uv = di0 / 2 + 0.5 * ((i0 % 2 + 2) % 2);
dj0uv = dj0 / 2 + 0.5 * ((j0 % 2 + 2) % 2);
di1uv = di1 / 2 + 0.5 * ((i1 % 2 + 2) % 2);
dj1uv = dj1 / 2 + 0.5 * ((j1 % 2 + 2) % 2);
}
dstpel_y = 0;
dstpel_u = 0;
dstpel_v = 0;
if (use0) {
dstpel_y += dstpel_y0[i * width + j] * (1 - pos);
if (do_uv) {
dstpel_u +=
(double)get_sub_pel_uv(
src0u + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2),
uvstride, di0uv, dj0uv) *
(1 - pos);
dstpel_v +=
(double)get_sub_pel_uv(
src0v + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2),
uvstride, di0uv, dj0uv) *
(1 - pos);
}
}
if (use1) {
dstpel_y += dstpel_y1[i * width + j] * pos;
if (do_uv) {
dstpel_u +=
(double)get_sub_pel_uv(
src1u + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2),
uvstride, di1uv, dj1uv) *
pos;
dstpel_v +=
(double)get_sub_pel_uv(
src1v + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2),
uvstride, di1uv, dj1uv) *
pos;
}
}
dsty[i * stride + j] = opfl_round_double_2_int(dstpel_y);
if (do_uv) {
dstu[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_u);
dstv[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_v);
}
}
}
aom_free(refid);
aom_free(refid_mode);
aom_free(used0);
aom_free(used1);
aom_free(dstpel_y0);
aom_free(dstpel_y1);
}
/*
* Interpolate references according to the motion field
* Use bilateral filters based on the difference of the
* pixels as well as confidence of initial mf
*
* Input:
* src0, src1: the reference frames
* mf_start: the motion field start pointer
* mvstr: motion field stride
* dstpos: position of the interpolated frame
*
* Output:
* dst: pointer to the interpolated frame
*/
void warp_optical_flow_bilateral(YV12_BUFFER_CONFIG *src0,
YV12_BUFFER_CONFIG *src1, DB_MV *mf_start,
int mvstr, YV12_BUFFER_CONFIG *dst,
double dstpos, OPFL_BLK_INFO blk_info) {
int fwidth = dst->y_width, fheight = dst->y_height;
int width = blk_info.blk_width;
int height = blk_info.blk_height;
int starth = blk_info.starth;
int startw = blk_info.startw;
int starthuv = starth >> 1;
int startwuv = startw >> 1;
int stride = src0->y_stride;
int uvstride = src0->uv_stride;
uint8_t *src0y = src0->y_buffer + starth * stride + startw;
uint8_t *src1y = src1->y_buffer + starth * stride + startw;
uint8_t *dsty = dst->y_buffer + starth * stride + startw;
uint8_t *src0u = src0->u_buffer + starthuv * uvstride + startwuv;
uint8_t *src0v = src0->v_buffer + starthuv * uvstride + startwuv;
uint8_t *src1u = src1->u_buffer + starthuv * uvstride + startwuv;
uint8_t *src1v = src1->v_buffer + starthuv * uvstride + startwuv;
uint8_t *dstu = dst->u_buffer + starthuv * uvstride + startwuv;
uint8_t *dstv = dst->v_buffer + starthuv * uvstride + startwuv;
double ii0, jj0, di0, dj0, di0uv, dj0uv;
double ii1, jj1, di1, dj1, di1uv, dj1uv;
int i0, j0;
int i1, j1;
double dstpel_y, dstpel_u, dstpel_v;
double pos;
int do_uv;
double *used0 = aom_calloc(height * width, sizeof(double));
double *used1 = aom_calloc(height * width, sizeof(double));
// refid: -1=unset; 0=ref0; 1=ref1; 2=both
double *ref0wts = aom_calloc(height * width, sizeof(double));
double *ref1wts = aom_calloc(height * width, sizeof(double));
double *dstpel_y0 = aom_calloc(height * width, sizeof(double));
double *dstpel_y1 = aom_calloc(height * width, sizeof(double));
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
used0[i * width + j] = 0;
used1[i * width + j] = 0;
}
}
// first note down which pixels in refs are used for reference
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
ii0 = i - mf_start[i * mvstr + j].row * dstpos;
jj0 = j - mf_start[i * mvstr + j].col * dstpos;
ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos);
jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos);
if (ii0 + starth < 0)
ii0 = -starth;
else if (ii0 + starth >= fheight)
ii0 = fheight - starth - 1;
if (jj0 + startw < 0)
jj0 = -startw;
else if (jj0 + startw >= fwidth)
jj0 = fwidth - startw - 1;
if (ii1 + starth < 0)
ii1 = -starth;
else if (ii1 + starth >= fheight)
ii1 = fheight - starth - 1;
if (jj1 + startw < 0)
jj1 = -startw;
else if (jj1 + startw >= fwidth)
jj1 = fwidth - startw - 1;
i0 = opfl_floor_double_2_int(ii0);
di0 = ii0 - i0;
j0 = opfl_floor_double_2_int(jj0);
dj0 = jj0 - j0;
i1 = opfl_floor_double_2_int(ii1);
di1 = ii1 - i1;
j1 = opfl_floor_double_2_int(jj1);
dj1 = jj1 - j1;
int inside0 = (i0 >= 0 && i0 < height - 1 && j0 >= 0 && j0 < width - 1);
int inside1 = (i1 >= 0 && i1 < height - 1 && j1 >= 0 && j1 < width - 1);
if (inside0) {
used0[(i0)*width + j0] += (1 - di0) * (1 - dj0);
used0[(i0 + 1) * width + j0] += (di0) * (1 - dj0);
used0[(i0)*width + j0 + 1] += (1 - di0) * (dj0);
used0[(i0 + 1) * width + j0 + 1] += (di0) * (dj0);
}
if (inside1) {
used1[(i1)*width + j1] += (1 - di1) * (1 - dj1);
used1[(i1 + 1) * width + j1] += (di1) * (1 - dj1);
used1[(i1)*width + j1 + 1] += (1 - di1) * (dj1);
used1[(i1 + 1) * width + j1 + 1] += (di1) * (dj1);
} // ignore when both out of bound
dstpel_y0[i * width + j] =
(double)get_sub_pel_y(src0y + i0 * stride + j0, stride, di0, dj0);
dstpel_y1[i * width + j] =
(double)get_sub_pel_y(src1y + i1 * stride + j1, stride, di1, dj1);
}
}
// calculate how many time each pixel is referenced
double totalused0, totalused1;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
totalused0 = 0;
totalused1 = 0;
pos = dstpos;
ii0 = i - mf_start[i * mvstr + j].row * dstpos;
jj0 = j - mf_start[i * mvstr + j].col * dstpos;
ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos);
jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos);
if (ii0 + starth < 0)
ii0 = -starth;
else if (ii0 + starth >= fheight)
ii0 = fheight - starth - 1;
if (jj0 + startw < 0)
jj0 = -startw;
else if (jj0 + startw >= fwidth)
jj0 = fwidth - startw - 1;
if (ii1 + starth < 0)
ii1 = -starth;
else if (ii1 + starth >= fheight)
ii1 = fheight - starth - 1;
if (jj1 + startw < 0)
jj1 = -startw;
else if (jj1 + startw >= fwidth)
jj1 = fwidth - startw - 1;
i0 = opfl_floor_double_2_int(ii0);
di0 = ii0 - i0;
j0 = opfl_floor_double_2_int(jj0);
dj0 = jj0 - j0;
i1 = opfl_floor_double_2_int(ii1);
di1 = ii1 - i1;
j1 = opfl_floor_double_2_int(jj1);
dj1 = jj1 - j1;
int inside0 = (i0 >= 0 && i0 < height - 1 && j0 >= 0 && j0 < width - 1);
int inside1 = (i1 >= 0 && i1 < height - 1 && j1 >= 0 && j1 < width - 1);
if (inside0) {
totalused0 += used0[(i0)*width + j0] * (1 - di0) * (1 - dj0);
totalused0 += used0[(i0 + 1) * width + j0] * (di0) * (1 - dj0);
totalused0 += used0[(i0)*width + j0 + 1] * (1 - di0) * (dj0);
totalused0 += used0[(i0 + 1) * width + j0 + 1] * (di0) * (dj0);
}
if (inside1) {
totalused1 += used1[(i1)*width + j1] * (1 - di1) * (1 - dj1);
totalused1 += used1[(i1 + 1) * width + j1] * (di1) * (1 - dj1);
totalused1 += used1[(i1)*width + j1 + 1] * (1 - di1) * (dj1);
totalused1 += used1[(i1 + 1) * width + j1 + 1] * (di1) * (dj1);
}
// if referenced more, weight should be lower
// if refs agrees better, weights should be closer to each other
double diffWts = (dstpel_y0[i * width + j] - dstpel_y1[i * width + j]);
diffWts = 0.1 * diffWts * diffWts;
if (diffWts > 50) diffWts = 50;
if (diffWts < 10) diffWts = 0;
if (inside0 == 0 && inside1 == 0) {
ref0wts[i * width + j] = 1 - pos;
ref1wts[i * width + j] = pos;
} else {
if (inside0)
ref0wts[i * width + j] =
exp(-0.01 * diffWts * totalused0) * (1 - dstpos);
else
ref0wts[i * width + j] = 0;
if (inside1)
ref1wts[i * width + j] = exp(-0.01 * diffWts * totalused1) * dstpos;
else
ref1wts[i * width + j] = 0;
}
double tempSum = ref0wts[i * width + j] + ref1wts[i * width + j];
ref0wts[i * width + j] /= tempSum;
ref1wts[i * width + j] /= tempSum;
}
}
// blend according to the refs selected
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
pos = dstpos;
ii0 = i - mf_start[i * mvstr + j].row * dstpos;
jj0 = j - mf_start[i * mvstr + j].col * dstpos;
ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos);
jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos);
if (ii0 + starth < 0)
ii0 = -starth;
else if (ii0 + starth >= fheight)
ii0 = fheight - starth - 1;
if (jj0 + startw < 0)
jj0 = -startw;
else if (jj0 + startw >= fwidth)
jj0 = fwidth - startw - 1;
if (ii1 + starth < 0)
ii1 = -starth;
else if (ii1 + starth >= fheight)
ii1 = fheight - starth - 1;
if (jj1 + startw < 0)
jj1 = -startw;
else if (jj1 + startw >= fwidth)
jj1 = fwidth - startw - 1;
i0 = opfl_floor_double_2_int(ii0);
di0 = ii0 - i0;
j0 = opfl_floor_double_2_int(jj0);
dj0 = jj0 - j0;
i1 = opfl_floor_double_2_int(ii1);
di1 = ii1 - i1;
j1 = opfl_floor_double_2_int(jj1);
dj1 = jj1 - j1;
do_uv =
(i % 2 == 0) && (j % 2 == 0); // TODO(bohan) only considering 420 now
if (do_uv) {
di0uv = di0 / 2 + 0.5 * ((i0 % 2 + 2) % 2);
dj0uv = dj0 / 2 + 0.5 * ((j0 % 2 + 2) % 2);
di1uv = di1 / 2 + 0.5 * ((i1 % 2 + 2) % 2);
dj1uv = dj1 / 2 + 0.5 * ((j1 % 2 + 2) % 2);
}
dstpel_y = 0;
dstpel_u = 0;
dstpel_v = 0;
dstpel_y += dstpel_y0[i * width + j] * ref0wts[i * width + j];
if (do_uv) {
dstpel_u +=
(double)get_sub_pel_uv(
src0u + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2),
uvstride, di0uv, dj0uv) *
ref0wts[i * width + j];
dstpel_v +=
(double)get_sub_pel_uv(
src0v + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2),
uvstride, di0uv, dj0uv) *
ref0wts[i * width + j];
}
dstpel_y += dstpel_y1[i * width + j] * ref1wts[i * width + j];
if (do_uv) {
dstpel_u +=
(double)get_sub_pel_uv(
src1u + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2),
uvstride, di1uv, dj1uv) *
ref1wts[i * width + j];
dstpel_v +=
(double)get_sub_pel_uv(
src1v + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2),
uvstride, di1uv, dj1uv) *
ref1wts[i * width + j];
}
dsty[i * stride + j] = opfl_round_double_2_int(dstpel_y);
if (do_uv) {
dstu[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_u);
dstv[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_v);
}
}
}
aom_free(ref0wts);
aom_free(ref1wts);
aom_free(used0);
aom_free(used1);
aom_free(dstpel_y0);
aom_free(dstpel_y1);
}
/*
* Subpel filter for y pixels. Round the motion field to 1/8 precision.
*
* Input:
* src: the source pixel at the integer location.
* stride: source stride
* di: subpel location in height
* dj: subpel location in width
*
* Output:
* interpolated pixel
*/
uint8_t get_sub_pel_y(uint8_t *src, int stride, double di, double dj) {
int yidx = opfl_round_double_2_int(di * 8);
int xidx = opfl_round_double_2_int(dj * 8);
yidx *= 2;
xidx *= 2;
if (yidx == 16) {
yidx = 0;
src += stride;
}
if (xidx == 16) {
xidx = 0;
src += 1;
}
assert(xidx <= 14 && xidx >= 0);
assert(yidx <= 14 && yidx >= 0);
int y[8];
for (int i = -3; i < 5; i++) {
y[i + 3] = 0;
for (int j = -3; j < 5; j++) {
y[i + 3] += src[i * stride + j] * optical_flow_warp_filter[xidx][j + 3];
}
y[i + 3] = (y[i + 3] + (1 << 6)) >> 7;
}
int x = 0;
for (int i = 0; i < 8; i++) {
x += y[i] * optical_flow_warp_filter[yidx][i];
}
x = (x + (1 << 6)) >> 7;
if (x > 255)
x = 255;
else if (x < 0)
x = 0;
return (uint8_t)x;
}
/*
* Subpel filter for u/v pixels. Round the motion field to 1/16 precision.
*
* Input:
* src: the source pixel at the integer location.
* stride: source stride
* di: subpel location in height
* dj: subpel location in width
*
* Output:
* interpolated pixel
*/
uint8_t get_sub_pel_uv(uint8_t *src, int stride, double di, double dj) {
// TODO(bohan) now only care about YUV 420
int yidx = opfl_round_double_2_int(di * 16);
int xidx = opfl_round_double_2_int(dj * 16);
if (yidx == 16) {
yidx = 0;
src += stride;
}
if (xidx == 16) {
xidx = 0;
src += 1;
}
assert(xidx <= 15 && xidx >= 0);
assert(yidx <= 15 && yidx >= 0);
int y[8];
for (int i = -3; i < 5; i++) {
y[i + 3] = 0;
for (int j = -3; j < 5; j++) {
y[i + 3] += src[i * stride + j] * optical_flow_warp_filter[xidx][j + 3];
}
y[i + 3] = (y[i + 3] + (1 << 6)) >> 7;
}
int x = 0;
for (int i = 0; i < 8; i++) {
x += y[i] * optical_flow_warp_filter[yidx][i];
}
x = (x + (1 << 6)) >> 7;
if (x > 255)
x = 255;
else if (x < 0)
x = 0;
return (uint8_t)x;
}
/*
* Blend function which calls the real blend methods.
* Kept as caller where we may make high level changes or pre-process
*/
void interp_optical_flow(YV12_BUFFER_CONFIG *ref0, YV12_BUFFER_CONFIG *ref1,
DB_MV *mf, YV12_BUFFER_CONFIG *dst, double dst_pos,
OPFL_BLK_INFO blk_info) {
// blend here
int mvstr = blk_info.blk_width + 2 * AVG_MF_BORDER;
DB_MV *mf_start = mf + AVG_MF_BORDER * mvstr + AVG_MF_BORDER;
warp_optical_flow(ref0, ref1, mf_start, mvstr, dst, dst_pos,
OPFL_BLEND_METHOD_USED, blk_info);
return;
}
/*
* Use the initial motion vectors to create the initial motion field.
*
* Input:
* mv_left: motion vector points from the current frame to ref0
* mv_right: motion vector points from the current frame to ref1
* width, height: frame/block width and height
* mvwid, mvhgt: width and height of the mv at some pyramid scale
* mfstr: stride of the motion field buffer
* dstpos: relative location of the current block
*
* Output:
* mf: pointer to the created motion field
*/
void create_motion_field(int_mv *mv_left, int_mv *mv_right, DB_MV *mf,
#if OPFL_INIT_WT
double *mv_wts,
#endif
int width, int height, int mvwid, int mvhgt, int mfstr,
double dstpos) {
// since the motion field is just used as initialization for now,
// just simply use the summation of the two
// TODO(bohan): need to change the function to work for MAX_OPFL_LEVEL > 3
int stride = mfstr;
DB_MV *mf_start = mf + AVG_MF_BORDER * stride + AVG_MF_BORDER;
int idx;
int blksize = mvwid / (width / 4);
assert(blksize == mvhgt / (height / 4));
double tempr, tempc;
double mvscale = 4 / blksize;
#if OPFL_INIT_WT
double *wts_start = mv_wts + AVG_MF_BORDER * stride + AVG_MF_BORDER;
double tempWts;
#endif
for (int h = 0; h < height / 4; h++) {
for (int w = 0; w < width / 4; w++) {
// mv_left, mv_right are based on 4x4 block
idx = h * width / 4 + w;
if (mv_left[idx].as_int == INVALID_MV &&
mv_right[idx].as_int == INVALID_MV) {
tempr = 0;
tempc = 0;
#if OPFL_INIT_WT
tempWts = 0;
#endif
} else if (mv_left[idx].as_int == INVALID_MV) {
tempr =
(double)(mv_right[idx].as_mv.row) / 8.0 / mvscale / (1 - dstpos);
tempc =
(double)(mv_right[idx].as_mv.col) / 8.0 / mvscale / (1 - dstpos);
#if OPFL_INIT_WT
tempWts = 0.0;
#endif
} else if (mv_right[idx].as_int == INVALID_MV) {
tempr = (double)(-mv_left[idx].as_mv.row) / 8.0 / mvscale / dstpos;
tempc = (double)(-mv_left[idx].as_mv.col) / 8.0 / mvscale / dstpos;
#if OPFL_INIT_WT
tempWts = 0.0;
#endif
} else {
tempr = (double)(-mv_left[idx].as_mv.row + mv_right[idx].as_mv.row) /
8.0 / mvscale;
tempc = (double)(-mv_left[idx].as_mv.col + mv_right[idx].as_mv.col) /
8.0 / mvscale;
#if OPFL_INIT_WT
tempWts = 0.0;
#endif
}
for (int i = 0; i < blksize; i++) {
for (int j = 0; j < blksize; j++) {
mf_start[(h * blksize + i) * mfstr + w * blksize + j].row = tempr;
mf_start[(h * blksize + i) * mfstr + w * blksize + j].col = tempc;
#if OPFL_INIT_WT
double i0, i1, j0, j1;
i0 = (double)(i + h * blksize) - (dstpos)*tempr;
i1 = (double)(i + h * blksize) + (1 - dstpos) * tempr;
j0 = (double)(j + w * blksize) - (dstpos)*tempc;
j1 = (double)(j + w * blksize) + (1 - dstpos) * tempc;
int is_out =
(i0 < 0 || i0 > height - 1 || i1 < 0 || i1 > height - 1 ||
j0 < 0 || j0 > width - 1 || j1 < 0 || j1 > width - 1);
if (is_out) {
wts_start[(h * blksize + i) * mfstr + w * blksize + j] = 0;
} else {
wts_start[(h * blksize + i) * mfstr + w * blksize + j] = tempWts;
}
#endif
}
}
}
}
// Pad the motion field border
// pad_motion_field_border(mf_start, mvwid, mvhgt, mfstr);
return;
}
/*
* Fill the initial motion field if there are holes
*
* Input:
* mv_left: motion vector points from the current frame to ref0
* mv_right: motion vector points from the current frame to ref1
* width, height: frame/block width and height
* mvwid, mvhgt: width and height of the mv at some pyramid scale
* mfstr: stride of the motion field buffer
* dstpos: relative location of the current block
*
* Output:
* mf: pointer to the created motion field
*/
void fill_create_motion_field(int_mv *mv_left, int_mv *mv_right, DB_MV *mf,
int width, int height, int mvwid, int mvhgt,
int mfstr) {
int stride = mfstr;
DB_MV *mf_start = mf + AVG_MF_BORDER * stride + AVG_MF_BORDER;
int idx;
int blksize = mvwid / (width / 4);
assert(blksize == mvhgt / (height / 4));
int invalid_cnt = 0;
DB_MV *tempmv = aom_calloc(mvwid * mvhgt, sizeof(DB_MV));
// isValid: 0: not valid; 1: already valid; -1:ready for next round
int *isValid = aom_calloc(height * width / 4 / 4, sizeof(int));
for (int i = 0; i < mvhgt; i++) {
for (int j = 0; j < mvwid; j++) {
tempmv[i * mvwid + j].col = mf_start[i * mfstr + j].col;
tempmv[i * mvwid + j].row = mf_start[i * mfstr + j].row;
}
}
for (int h = 0; h < height / 4; h++) {
for (int w = 0; w < width / 4; w++) {
idx = h * width / 4 + w;
if (mv_left[idx].as_int == INVALID_MV &&
mv_right[idx].as_int == INVALID_MV) {
invalid_cnt++;
isValid[idx] = 0;
} else {
isValid[idx] = 1;
}
}
}
DB_MV avg;
int avgcnt;
while (invalid_cnt > 0 && invalid_cnt != width * height / 4 / 4) {
for (int h = 0; h < height / 4; h++) {
for (int w = 0; w < width / 4; w++) {
idx = h * width / 4 + w;
if (isValid[idx] == 0) {
avgcnt = 0;
avg.col = 0;
avg.row = 0;
for (int i = -1; i < 2; i++) {
for (int j = -1; j < 2; j++) {
if (i + h >= 0 && i + h < height / 4 && j + w >= 0 &&
j + w < width / 4) {
if (isValid[idx + i * width / 4 + j] > 0) {
avg.col +=
mf_start[(h + i) * blksize * mfstr + (j + w) * blksize]
.col;
avg.row +=
mf_start[(h + i) * blksize * mfstr + (j + w) * blksize]
.row;
avgcnt++;
}
}
}
}
if (avgcnt != 0) {
for (int i = 0; i < blksize; i++) {
for (int j = 0; j < blksize; j++) {
tempmv[(h * blksize + i) * mvwid + w * blksize + j].row =
avg.row / avgcnt;
tempmv[(h * blksize + i) * mvwid + w * blksize + j].col =
avg.col / avgcnt;
}
}
isValid[idx] = -1;
}
}
}
}
invalid_cnt = 0;
for (int h = 0; h < height / 4; h++) {
for (int w = 0; w < width / 4; w++) {
idx = h * width / 4 + w;
if (isValid[idx] == 0) {
invalid_cnt++;
} else if (isValid[idx] < 0) {
isValid[idx] = 1;
for (int i = 0; i < blksize; i++) {
for (int j = 0; j < blksize; j++) {
mf_start[(h * blksize + i) * mfstr + w * blksize + j].row =
tempmv[(h * blksize + i) * mvwid + w * blksize + j].row;
mf_start[(h * blksize + i) * mfstr + w * blksize + j].col =
tempmv[(h * blksize + i) * mvwid + w * blksize + j].col;
}
}
}
}
}
}
aom_free(tempmv);
aom_free(isValid);
// Pad the motion field border
pad_motion_field_border(mf_start, mvwid, mvhgt, mfstr);
}
int opfl_get_blk_warp_SAD(uint8_t *y0, uint8_t *y1, int srcstride, int blksize,
int starth, int startw, DB_MV mv, double dstpos,
int width, int height) {
int dist = 0;
int lpix, rpix;
int yl, xl, yr, xr;
double dil, djl, dir, djr;
yl = opfl_floor_double_2_int(-mv.row * dstpos);
xl = opfl_floor_double_2_int(-mv.col * dstpos);
yr = opfl_floor_double_2_int(mv.row * (1 - dstpos));
xr = opfl_floor_double_2_int(mv.col * (1 - dstpos));
dil = -mv.row * dstpos - yl;
djl = -mv.col * dstpos - xl;
dir = mv.row * (1 - dstpos) - yr;
djr = mv.col * (1 - dstpos) - xr;
// if (starth + yl < 0) {
// yl = -starth;
// } else if (starth + yl >= height) {
// yl = height - starth;
// }
// if (startw + xl < 0) {
// xl = -startw;
// } else if (startw + xl >= width) {
// xl = width - startw;
// }
// if (starth + yr < 0) {
// yr = -starth;
// } else if (starth + yr >= height) {
// yr = height - starth;
// }
// if (startw + xr < 0) {
// xr = -startw;
// } else if (startw + xr >= width) {
// xr = width - startw;
// }
for (int i = 0; i < blksize; i++) {
for (int j = 0; j < blksize; j++) {
uint8_t *tmpsrc;
tmpsrc = y0 + (starth + i + yl) * srcstride + (startw + j + xl);
lpix = get_sub_pel_y(tmpsrc, srcstride, dil, djl);
tmpsrc = y1 + (starth + i + yr) * srcstride + (startw + j + xr);
rpix = get_sub_pel_y(tmpsrc, srcstride, dir, djr);
dist += abs(lpix - rpix);
}
}
return dist;
}
DB_MV bi_direction_motion_search(uint8_t *y0, uint8_t *y1, int srcstr,
int starth, int startw, DB_MV init_mv,
double dstpos, int width, int height) {
DB_MV mv_res = init_mv;
int step = 0;
int curDist = opfl_get_blk_warp_SAD(y0, y1, srcstr, 16, starth, startw,
mv_res, dstpos, width, height);
int nextDist, tempDist;
DB_MV next_mv, temp_mv;
double stepsize = 8;
while (step <= 24) {
nextDist = curDist;
next_mv = mv_res;
// try upper
temp_mv.row = mv_res.row - stepsize;
temp_mv.col = mv_res.col;
tempDist = opfl_get_blk_warp_SAD(y0, y1, srcstr, 16, starth, startw,
temp_mv, dstpos, width, height);
if (tempDist < nextDist) {
nextDist = tempDist;
next_mv = temp_mv;
}
// try lower
temp_mv.row = mv_res.row + stepsize;
temp_mv.col = mv_res.col;
tempDist = opfl_get_blk_warp_SAD(y0, y1, srcstr, 16, starth, startw,
temp_mv, dstpos, width, height);
if (tempDist < nextDist) {
nextDist = tempDist;
next_mv = temp_mv;
}
// try left
temp_mv.row = mv_res.row;
temp_mv.col = mv_res.col - stepsize;
tempDist = opfl_get_blk_warp_SAD(y0, y1, srcstr, 16, starth, startw,
temp_mv, dstpos, width, height);
if (tempDist < nextDist) {
nextDist = tempDist;
next_mv = temp_mv;
}
// try right
temp_mv.row = mv_res.row;
temp_mv.col = mv_res.col + stepsize;
tempDist = opfl_get_blk_warp_SAD(y0, y1, srcstr, 16, starth, startw,
temp_mv, dstpos, width, height);
if (tempDist < nextDist) {
nextDist = tempDist;
next_mv = temp_mv;
}
if (curDist > nextDist) {
curDist = nextDist;
mv_res = next_mv;
}
step++;
if (step % 4 == 0) stepsize /= 2;
}
return mv_res;
}
/*
* Do a motion search to update the motions
*
* Input:
* mv_left: motion vector points from the current frame to ref0
* mv_right: motion vector points from the current frame to ref1
* width, height: frame/block width and height
* mvwid, mvhgt: width and height of the mv at some pyramid scale
* mfstr: stride of the motion field buffer
* y0, y1: two reference frames at level 0 pointed to the start
* of current block/frame
* srcstr: the stride of y0 and y1
*
* Output:
* mf: pointer to the created motion field
*/
void opfl_init_motion_search(int_mv *mv_left, int_mv *mv_right, DB_MV *mf,
int width, int height, int mvwid, int mvhgt,
int mfstr, double dstpos, OPFL_BLK_INFO *blk_info,
uint8_t *y0, uint8_t *y1, int srcstr) {
int stride = mfstr;
DB_MV *mf_start = mf + AVG_MF_BORDER * stride + AVG_MF_BORDER;
int idx;
int invalid_cnt = 0;
// isValid = 0: not valid; 1: valid.
int *isValid = aom_calloc(height * width / 4 / 4, sizeof(int));
for (int h = 0; h < height / 4; h++) {
for (int w = 0; w < width / 4; w++) {
idx = h * width / 4 + w;
if (mv_left[idx].as_int == INVALID_MV &&
mv_right[idx].as_int == INVALID_MV) {
isValid[idx] = 0;
} else {
isValid[idx] = 1;
}
}
}
DB_MV avg_mv, search_mv;
int starth, startw;
// Do motion search for every 16x16 blocks if too many invalid mvs
for (int hb = 0; hb < height / 16; hb++) {
for (int wb = 0; wb < width / 16; wb++) {
// get how many invalid mvs are there in this 16x16 block
invalid_cnt = 0;
for (int h = 0; h < 16 / 4; h++) {
for (int w = 0; w < 16 / 4; w++) {
if (isValid[(hb * 4 + h) * width / 4 + wb * 4 + w]) continue;
invalid_cnt++;
}
}
if (invalid_cnt < 16) continue;
// first get the avg mv as the initial mv
avg_mv.row = 0;
avg_mv.col = 0;
int mvwid_16 = 16 * mvwid / width;
int mvhgt_16 = 16 * mvhgt / height;
for (int h = 0; h < mvhgt_16; h++) {
for (int w = 0; w < mvwid_16; w++) {
avg_mv.row +=
mf_start[(hb * mvhgt_16 + h) * mfstr + wb * mvwid_16 + w].row;
avg_mv.col +=
mf_start[(hb * mvhgt_16 + h) * mfstr + wb * mvwid_16 + w].col;
}
}
avg_mv.row /= (mvwid_16 * mvhgt_16);
avg_mv.col /= (mvwid_16 * mvhgt_16);
avg_mv.row *= (height / mvhgt);
avg_mv.col *= (width / mvwid);
// printf("\n(%.2f, %.2f): ", avg_mv.row, avg_mv.col);
starth = blk_info->starth + hb * 16;
startw = blk_info->startw + wb * 16;
search_mv = bi_direction_motion_search(y0, y1, srcstr, starth, startw,
avg_mv, dstpos, width, height);
// printf(" %d %d ", starth, startw);
// printf(" (%.2f, %.2f): ", search_mv.row, search_mv.col);
for (int h = 0; h < mvhgt_16; h++) {
for (int w = 0; w < mvwid_16; w++) {
mf_start[(hb * mvhgt_16 + h) * mfstr + wb * mvwid_16 + w].row =
search_mv.row / (height / mvhgt);
mf_start[(hb * mvhgt_16 + h) * mfstr + wb * mvwid_16 + w].col =
search_mv.col / (width / mvwid);
}
}
}
}
aom_free(isValid);
// Pad the motion field border
pad_motion_field_border(mf_start, mvwid, mvhgt, mfstr);
}
void opfl_fill_mv(int_mv *pmv, int width, int height) {
int invalid_cnt = 0;
int_mv *tempmv = aom_calloc(width * height, sizeof(int_mv));
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (pmv[i * width + j].as_int == INVALID_MV) {
invalid_cnt++;
}
tempmv[i * width + j].as_int = INVALID_MV;
}
}
int_mv avg;
int avgcnt;
while (invalid_cnt > 0 && invalid_cnt != width * height) {
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (pmv[i * width + j].as_int == INVALID_MV) {
avgcnt = 0;
avg.as_int = 0;
for (int h = -1; h < 2; h++) {
for (int w = -1; w < 2; w++) {
if (i + h >= 0 && i + h < height && j + w >= 0 && j + w < width) {
if (pmv[(i + h) * width + j + w].as_int != INVALID_MV) {
avg.as_mv.col += pmv[(i + h) * width + j + w].as_mv.col;
avg.as_mv.row += pmv[(i + h) * width + j + w].as_mv.row;
avgcnt++;
}
}
}
}
if (avgcnt != 0) {
tempmv[i * width + j].as_mv.col =
opfl_round_double_2_int((double)avg.as_mv.col / avgcnt);
tempmv[i * width + j].as_mv.row =
opfl_round_double_2_int((double)avg.as_mv.row / avgcnt);
}
}
}
} // for every mv
invalid_cnt = 0;
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
if (pmv[i * width + j].as_int == INVALID_MV &&
tempmv[i * width + j].as_int != INVALID_MV) {
pmv[i * width + j].as_int = tempmv[i * width + j].as_int;
} else if (pmv[i * width + j].as_int == INVALID_MV) {
invalid_cnt++;
}
}
}
}
aom_free(tempmv);
}
/*
* Upscale the motion field by 2.
* Currently simply copy the nearest mv
*
* Input:
* src: the source motion field
* srcw, srch, srcs: source width, height, stride
* dsts: destination mf stride
*
* Output;
* dst: pointer to the upscaled mf buffer
*/
void upscale_mv_by_2(DB_MV *src, int srcw, int srch, int srcs, DB_MV *dst,
int dsts) {
for (int i = 0; i < srch; i++) {
for (int j = 0; j < srcw; j++) {
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
dst[(i * 2 + y) * dsts + j * 2 + x].row = src[i * srcs + j].row * 2;
dst[(i * 2 + y) * dsts + j * 2 + x].col = src[i * srcs + j].col * 2;
}
}
}
}
pad_motion_field_border(dst, srcw * 2, srch * 2, dsts);
}
/*
* Pad the motion field border to prepare for median filter
*/
void pad_motion_field_border(DB_MV *mf_start, int width, int height,
int stride) {
assert(stride == width + 2 * AVG_MF_BORDER);
// upper
for (int i = -AVG_MF_BORDER; i < 0; i++) {
memcpy(mf_start + i * stride, mf_start, sizeof(DB_MV) * width);
}
// lower
for (int i = height; i < height + AVG_MF_BORDER; i++) {
memcpy(mf_start + i * stride, mf_start + (height - 1) * stride,
sizeof(DB_MV) * width);
}
// left
for (int i = -AVG_MF_BORDER; i < height + AVG_MF_BORDER; i++) {
for (int j = -AVG_MF_BORDER; j < 0; j++) {
mf_start[i * stride + j] = mf_start[i * stride];
}
}
// right
for (int i = -AVG_MF_BORDER; i < height + AVG_MF_BORDER; i++) {
for (int j = width; j < width + AVG_MF_BORDER; j++) {
mf_start[i * stride + j] = mf_start[i * stride + width - 1];
}
}
}
/*
* Median filter double arrays iteratively
*/
double iter_median_double(double *x, double *left, double *right, int length,
int mididx) {
int pivot = length / 2;
int ll = 0, rl = 0;
for (int i = 0; i < length; i++) {
if (i == pivot) continue;
if (x[i] <= x[pivot]) {
left[ll] = x[i];
ll++;
} else {
right[rl] = x[i];
rl++;
}
}
if (mididx == ll)
return x[pivot];
else if (mididx < ll)
return iter_median_double(left, x, right, ll, mididx);
else
return iter_median_double(right, left, x, rl, mididx - ll - 1);
}
/*
* Do mode filter to the reference selections
*/
int ref_mode_filter_3x3(int *center, int stride, double dstpos) {
int ref_id_count[3] = { 0 };
for (int i = -1; i < 2; i++) {
for (int j = -1; j < 2; j++) {
assert(center[i * stride + j] >= 0);
ref_id_count[center[i * stride + j]]++;
}
}
if (ref_id_count[2] >= ref_id_count[1] &&
ref_id_count[2] >= ref_id_count[0]) {
return 2;
} else if (ref_id_count[1] == ref_id_count[0]) {
return ((dstpos <= 0.5) ? 0 : 1);
} else {
return ((ref_id_count[0] > ref_id_count[1]) ? 0 : 1);
}
}
/*
* Write YUV for debug purpose
*/
int write_image_opfl(const YV12_BUFFER_CONFIG *const ref_buf, char *file_name) {
int h;
FILE *f_ref = NULL;
if (ref_buf == NULL) {
printf("Frame data buffer is NULL.\n");
return AOM_CODEC_MEM_ERROR;
}
if ((f_ref = fopen(file_name, "ab")) == NULL) {
printf("Unable to open file %s to write.\n", file_name);
return AOM_CODEC_MEM_ERROR;
}
// --- Y ---
for (h = 0; h < ref_buf->y_height; ++h) {
fwrite(&ref_buf->y_buffer[h * ref_buf->y_stride], 1, ref_buf->y_width,
f_ref);
}
// --- U ---
for (h = 0; h < (ref_buf->uv_height); ++h) {
fwrite(&ref_buf->u_buffer[h * ref_buf->uv_stride], 1, ref_buf->uv_width,
f_ref);
}
// --- V ---
for (h = 0; h < (ref_buf->uv_height); ++h) {
fwrite(&ref_buf->v_buffer[h * ref_buf->uv_stride], 1, ref_buf->uv_width,
f_ref);
}
fclose(f_ref);
return AOM_CODEC_OK;
}
/*
* Extend pixel planes. This is an exact copy of the original extend_plane
* function.
*/
void extend_plane_opfl(uint8_t *const src, int src_stride, int width,
int height, int extend_top, int extend_left,
int extend_bottom, int extend_right) {
int i;
const int linesize = extend_left + extend_right + width;
/* copy the left and right most columns out */
uint8_t *src_ptr1 = src;
uint8_t *src_ptr2 = src + width - 1;
uint8_t *dst_ptr1 = src - extend_left;
uint8_t *dst_ptr2 = src + width;
for (i = 0; i < height; ++i) {
memset(dst_ptr1, src_ptr1[0], extend_left);
memset(dst_ptr2, src_ptr2[0], extend_right);
src_ptr1 += src_stride;
src_ptr2 += src_stride;
dst_ptr1 += src_stride;
dst_ptr2 += src_stride;
}
/* Now copy the top and bottom lines into each line of the respective
* borders
*/
src_ptr1 = src - extend_left;
src_ptr2 = src + src_stride * (height - 1) - extend_left;
dst_ptr1 = src + src_stride * -extend_top - extend_left;
dst_ptr2 = src + src_stride * height - extend_left;
for (i = 0; i < extend_top; ++i) {
memcpy(dst_ptr1, src_ptr1, linesize);
dst_ptr1 += src_stride;
}
for (i = 0; i < extend_bottom; ++i) {
memcpy(dst_ptr2, src_ptr2, linesize);
dst_ptr2 += src_stride;
}
}
int opfl_round_double_2_int(double x) {
if (x >= 0) {
return (int)(x + 0.5);
} else {
return (int)(x - 0.5);
}
}
int opfl_floor_double_2_int(double x) {
if (x >= 0) {
return (int)x;
} else {
return (int)x - 1;
}
}
int opfl_ceil_double_2_int(double x) {
if (x >= 0) {
return -opfl_floor_double_2_int(-x);
} else {
return (int)x;
}
}
void opfl_extend_frame_mf(OPFL_BUFFER_STRUCT *buf_struct,
OPFL_BLK_INFO blk_info) {
DB_MV *mf_start = buf_struct->mf_frame_start;
int str = buf_struct->mf_frame_stride;
int starth = blk_info.starth, startw = blk_info.startw;
int height = blk_info.blk_height, width = blk_info.blk_width;
int endh = starth + height, endw = startw + width;
int f_height = buf_struct->ref0_buf[0]->y_height;
int f_width = buf_struct->ref0_buf[0]->y_width;
// upper bound
if (blk_info.upbound && starth == 0) {
for (int i = -OPFL_MF_FRAME_BORDER; i < 0; i++) {
for (int j = startw; j < endw; j++) {
mf_start[i * str + j] = mf_start[j];
}
}
}
// lower bound
if (blk_info.lowerbound && endh == f_height) {
for (int i = endh; i < endh + OPFL_MF_FRAME_BORDER; i++) {
for (int j = startw; j < endw; j++) {
mf_start[i * str + j] = mf_start[(endh - 1) * str + j];
}
}
}
// left bound
if (blk_info.leftbound && startw == 0) {
for (int i = starth; i < endh; i++) {
for (int j = -OPFL_MF_FRAME_BORDER; j < 0; j++) {
mf_start[i * str + j] = mf_start[i * str];
}
}
}
// right bound
if (blk_info.rightbound && endw == f_width) {
for (int i = starth; i < endh; i++) {
for (int j = endw; j < endw + OPFL_MF_FRAME_BORDER; j++) {
mf_start[i * str + j] = mf_start[i * str + endw - 1];
}
}
}
// check corners
if (blk_info.upbound && blk_info.leftbound && starth == 0 && startw == 0) {
for (int i = -OPFL_MF_FRAME_BORDER; i < 0; i++) {
for (int j = -OPFL_MF_FRAME_BORDER; j < 0; j++) {
mf_start[i * str + j] = mf_start[0];
}
}
}
if (blk_info.upbound && blk_info.rightbound && starth == 0 &&
endw == f_width) {
for (int i = -OPFL_MF_FRAME_BORDER; i < 0; i++) {
for (int j = endw; j < endw + OPFL_MF_FRAME_BORDER; j++) {
mf_start[i * str + j] = mf_start[endw - 1];
}
}
}
if (blk_info.lowerbound && blk_info.leftbound && endh == f_height &&
startw == 0) {
for (int i = endh; i < endh + OPFL_MF_FRAME_BORDER; i++) {
for (int j = -OPFL_MF_FRAME_BORDER; j < 0; j++) {
mf_start[i * str + j] = mf_start[(endh - 1) * str];
}
}
}
if (blk_info.lowerbound && blk_info.rightbound && endh == f_height &&
endw == f_width) {
for (int i = endh; i < endh + OPFL_MF_FRAME_BORDER; i++) {
for (int j = endw; j < endw + OPFL_MF_FRAME_BORDER; j++) {
mf_start[i * str + j] = mf_start[(endh - 1) * str + endw - 1];
}
}
}
}
#endif // CONFIG_OPFL