| /* |
| * Copyright (c) 2017, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| |
| #include "av1/common/optical_flow_ref.h" |
| #include <float.h> |
| #include <math.h> |
| #include <time.h> |
| #include "./aom_config.h" |
| #include "./aom_scale_rtcd.h" |
| #include "aom_mem/aom_mem.h" |
| #include "aom_scale/aom_scale.h" |
| #include "av1/common/alloccommon.h" |
| #include "av1/common/onyxc_int.h" |
| #include "av1/common/sparse_linear_solver.h" |
| |
| #if CONFIG_OPFL |
| // global timer for debug purpose |
| double timeinit, timesub, timesolve, timeder, timemed, timetotal; |
| clock_t startt, endt; |
| static int optical_flow_warp_filter[16][8] = { |
| { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 }, |
| { 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 }, |
| { 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 }, |
| { 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 }, |
| { 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 }, |
| { 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 }, |
| { 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 }, |
| { 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 } |
| }; |
| |
| /* |
| * Interpolate the whole opfl reference frame. |
| * Should only be called by the encoder. |
| * |
| * Input: |
| * cm: the av1_common pointer. |
| * cm->opfl_ref_frame should already been initialized |
| * |
| * Output: |
| * 1: successfully interpolated |
| * 0: reference(s) not available |
| */ |
| int av1_get_opfl_ref(AV1_COMMON *cm) { |
| OPFL_BUFFER_STRUCT *buf_struct = cm->opfl_buf_struct_ptr; |
| OPFL_BLK_INFO blk_info; |
| |
| av1_opfl_set_buf(cm, buf_struct); |
| |
| if (buf_struct->initialized == 1) { |
| int width = buf_struct->ref0_buf[0]->y_width; |
| int height = buf_struct->ref0_buf[0]->y_height; |
| |
| #if FRAME_LEVEL_OPFL |
| int blkwidth = width; |
| int blkheight = height; |
| #else |
| int blkwidth = OPFL_BLOCK_SIZE; |
| int blkheight = OPFL_BLOCK_SIZE; |
| #endif // FRAME_LEVEL_OPFL |
| |
| double numbh = (double)height / (double)blkheight; |
| double numbw = (double)width / (double)blkwidth; |
| |
| // for every block in the frame |
| for (int i = 0; i < numbh; i++) { |
| for (int j = 0; j < numbw; j++) { |
| if (i * blkheight >= height) continue; |
| if (j * blkwidth >= width) continue; |
| |
| blk_info.starth = i * blkheight; |
| blk_info.startw = j * blkwidth; |
| if (blk_info.starth + blkheight > height) { |
| blk_info.blk_height = height - blk_info.starth; |
| } else { |
| blk_info.blk_height = blkheight; |
| } |
| if (blk_info.startw + blkwidth > width) { |
| blk_info.blk_width = width - blk_info.startw; |
| } else { |
| blk_info.blk_width = blkwidth; |
| } |
| |
| blk_info.upbound = 0; |
| blk_info.lowerbound = 0; |
| blk_info.leftbound = 0; |
| blk_info.rightbound = 0; |
| |
| if (blk_info.starth == 0) { |
| blk_info.upbound = 1; |
| } |
| if (blk_info.starth + blk_info.blk_height >= height) { |
| blk_info.lowerbound = 1; |
| } |
| if (blk_info.startw == 0) { |
| blk_info.leftbound = 1; |
| } |
| if (blk_info.startw + blk_info.blk_width >= width) { |
| blk_info.rightbound = 1; |
| } |
| av1_optical_flow_get_ref(buf_struct, blk_info); |
| } |
| } |
| |
| // av1_opfl_free_buf(buf_struct); |
| return 1; |
| } else { |
| return 0; |
| } |
| } |
| |
| /* |
| * Initialize and set the buffer for optical flow estimation |
| */ |
| void av1_opfl_set_buf(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct) { |
| startt = clock(); |
| clock_t starti = clock(); |
| |
| int cur_offset = cm->frame_offset; |
| double dst_pos = -1; |
| |
| // find the two nearest bi-directional refs |
| // these are the refs between which we do optical flow |
| int left_idx = -1, left_offset = -1, right_idx = -1, right_offset = -1; |
| int left_chosen = NONE_FRAME, right_chosen = NONE_FRAME; |
| opfl_get_closest_refs(cm, &left_idx, &left_offset, &left_chosen, &right_idx, |
| &right_offset, &right_chosen); |
| |
| // check all other available bi-direcional ref pairs |
| // sorted by distance between refs |
| int left_idxs[MAX_NUM_REF_PAIR], left_offsets[MAX_NUM_REF_PAIR], |
| left_chosens[MAX_NUM_REF_PAIR], right_idxs[MAX_NUM_REF_PAIR], |
| right_offsets[MAX_NUM_REF_PAIR], right_chosens[MAX_NUM_REF_PAIR]; |
| opfl_select_best_ref_pairs(cm, left_idxs, left_offsets, left_chosens, |
| right_idxs, right_offsets, right_chosens, left_idx, |
| right_idx); |
| |
| // if no available refs on both sides, don't do optical flow |
| // TODO(bohanli): this should only happen for key frame and altref (?) |
| // If we disable it manually, mismatch happens (why?) |
| if (left_idx < 0 || right_idx < 0) { |
| buf_struct->initialized = 0; |
| return; |
| } |
| // set buffer ptrs to co-located ref, left ref and right ref |
| buf_struct->dst_buf = cm->opfl_ref_frame; |
| YV12_BUFFER_CONFIG *left = &(cm->buffer_pool->frame_bufs[left_idx].buf); |
| YV12_BUFFER_CONFIG *right = &(cm->buffer_pool->frame_bufs[right_idx].buf); |
| buf_struct->ref0_buf[0] = left; |
| buf_struct->ref1_buf[0] = right; |
| |
| // set params for opfl |
| buf_struct->opfl_refs[0] = left_chosen; |
| buf_struct->opfl_refs[1] = right_chosen; |
| dst_pos = ((double)(cur_offset - left_offset)) / |
| ((double)(right_offset - left_offset)); |
| buf_struct->dst_pos = dst_pos; |
| buf_struct->left_offset = left_offset; |
| buf_struct->cur_offset = cur_offset; |
| buf_struct->right_offset = right_offset; |
| |
| int width = buf_struct->ref0_buf[0]->y_width, |
| height = buf_struct->ref0_buf[0]->y_height; |
| int wid, hgt; |
| |
| // allocate buffers |
| av1_opfl_alloc_buf(cm, buf_struct); |
| |
| // calculate the derivatives |
| OPFL_BLK_INFO blk_info; |
| blk_info.starth = 0; |
| blk_info.startw = 0; |
| blk_info.blk_height = height; |
| blk_info.blk_width = width; |
| blk_info.upbound = 1; |
| blk_info.lowerbound = 1; |
| blk_info.leftbound = 1; |
| blk_info.rightbound = 1; |
| for (int l = 0; l < MAX_OPFL_LEVEL; l++) { |
| #if USE_BLK_DERIVATIVE |
| opfl_get_derivatives(buf_struct->ori_Ex0_buf[l], buf_struct->ori_Ey0_buf[l], |
| buf_struct->ori_Et0_buf[l], buf_struct->ref0_buf[0], |
| buf_struct->ref0_buf[0], buf_struct->ref0_buf[0], |
| buf_struct->ref0_buf[0], dst_pos, l, 0, blk_info); |
| opfl_get_derivatives(buf_struct->ori_Ex1_buf[l], buf_struct->ori_Ey1_buf[l], |
| buf_struct->ori_Et1_buf[l], buf_struct->ref1_buf[0], |
| buf_struct->ref1_buf[0], buf_struct->ref1_buf[0], |
| buf_struct->ref1_buf[0], dst_pos, l, 0, blk_info); |
| #else |
| opfl_get_derivatives(buf_struct->ori_Ex0_buf[l], buf_struct->ori_Ey0_buf[l], |
| buf_struct->ori_Et0_buf[l], buf_struct->ref0_buf[l], |
| buf_struct->ref0_buf[l], buf_struct->ref0_buf[l], |
| buf_struct->ref0_buf[l], dst_pos, l, 1, blk_info); |
| opfl_get_derivatives(buf_struct->ori_Ex1_buf[l], buf_struct->ori_Ey1_buf[l], |
| buf_struct->ori_Et1_buf[l], buf_struct->ref1_buf[l], |
| buf_struct->ref1_buf[l], buf_struct->ref1_buf[l], |
| buf_struct->ref1_buf[l], dst_pos, l, 1, blk_info); |
| #endif |
| } |
| |
| // set up initial motion filed |
| int_mv *left_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv)); |
| int_mv *right_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv)); |
| |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| left_mv[i * cm->mi_cols + j].as_int = INVALID_MV; |
| right_mv[i * cm->mi_cols + j].as_int = INVALID_MV; |
| } |
| } |
| |
| #if OPFL_EXP_INIT |
| // temp mv buffers |
| int_mv *left_most_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv)); |
| int_mv *right_most_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv)); |
| int_mv *left_temp_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv)); |
| int_mv *right_temp_mv = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv)); |
| int *is_first_valid = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int)); |
| |
| // get the motion field between the two selected refs |
| opfl_set_init_motion(cm, buf_struct, left_idx, right_idx, left_offset, |
| right_offset, left_mv, right_mv); |
| #if OPFL_DERIVE_INIT_MV |
| opfl_derive_init_mv(cm, buf_struct, left_idx, right_idx, left_offset, |
| right_offset, left_mv, right_mv); |
| #endif |
| #if OPFL_CHECK_INIT_MV |
| // copy the mvs from the nearest refs to a temp buffer |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| left_most_mv[i * cm->mi_cols + j].as_int = |
| left_mv[i * cm->mi_cols + j].as_int; |
| right_most_mv[i * cm->mi_cols + j].as_int = |
| right_mv[i * cm->mi_cols + j].as_int; |
| if (left_mv[i * cm->mi_cols + j].as_int != INVALID_MV && |
| right_mv[i * cm->mi_cols + j].as_int != INVALID_MV) { |
| is_first_valid[i * cm->mi_cols + j] = 1; |
| } else { |
| is_first_valid[i * cm->mi_cols + j] = 0; |
| } |
| } |
| } |
| // fill the holes of the mvs for comparison later |
| opfl_fill_mv(left_most_mv, cm->mi_cols, cm->mi_rows); |
| opfl_fill_mv(right_most_mv, cm->mi_cols, cm->mi_rows); |
| |
| // now for the other candidate pairs, find there associated motion vectors |
| for (int k = 0; k < 3; k++) { |
| if (left_idxs[k] < 0 || right_idxs[k] < 0) break; |
| opfl_set_init_motion(cm, buf_struct, left_idxs[k], right_idxs[k], |
| left_offsets[k], right_offsets[k], left_temp_mv, |
| right_temp_mv); |
| // for each block, see if the new temp mv is better |
| // than what we already have |
| opfl_update_init_motion(cm, buf_struct, left_most_mv, right_most_mv, |
| left_offset, right_offset, left_temp_mv, |
| right_temp_mv, left_offsets[k], right_offsets[k], |
| is_first_valid, left_mv, right_mv); |
| } |
| #endif |
| |
| aom_free(left_most_mv); |
| aom_free(right_most_mv); |
| aom_free(left_temp_mv); |
| aom_free(right_temp_mv); |
| aom_free(is_first_valid); |
| #else |
| // use all available initialization of motion field |
| TPL_MV_REF *tpl_mvs_base = cm->cur_frame->tpl_mvs; |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| for (int k = 0; k < MFMV_STACK_SIZE; k++) { |
| if (tpl_mvs_base[i * cm->mi_stride + j] |
| .mfmv[left_chosen - LAST_FRAME][k] |
| .as_int != INVALID_MV) { |
| left_mv[i * cm->mi_cols + j].as_int = |
| tpl_mvs_base[i * cm->mi_stride + j] |
| .mfmv[left_chosen - LAST_FRAME][k] |
| .as_int; |
| break; |
| } |
| } |
| for (int k = 0; k < MFMV_STACK_SIZE; k++) { |
| if (tpl_mvs_base[i * cm->mi_stride + j] |
| .mfmv[right_chosen - LAST_FRAME][k] |
| .as_int != INVALID_MV) { |
| right_mv[i * cm->mi_cols + j].as_int = |
| tpl_mvs_base[i * cm->mi_stride + j] |
| .mfmv[right_chosen - LAST_FRAME][k] |
| .as_int; |
| break; |
| } |
| } |
| } |
| } |
| #endif |
| |
| // copy the initialized motions to each level |
| for (int l = 0; l < MAX_OPFL_LEVEL; l++) { |
| wid = width >> l; |
| hgt = height >> l; |
| |
| create_motion_field(left_mv, right_mv, buf_struct->init_mv_buf[l], |
| #if OPFL_INIT_WT |
| buf_struct->init_mv_wts[l], |
| #endif |
| width, height, wid, hgt, wid + 2 * AVG_MF_BORDER, |
| dst_pos); |
| |
| // fill in possible "holes" in the initialization |
| fill_create_motion_field(left_mv, right_mv, buf_struct->init_mv_buf[l], |
| width, height, wid, hgt, wid + 2 * AVG_MF_BORDER); |
| #if OPFL_INIT_MOTION_SEARCH |
| opfl_init_motion_search( |
| left_mv, right_mv, buf_struct->init_mv_buf[l], width, height, wid, hgt, |
| wid + 2 * AVG_MF_BORDER, dst_pos, &blk_info, |
| buf_struct->ref0_buf[0]->y_buffer, buf_struct->ref1_buf[0]->y_buffer, |
| buf_struct->ref0_buf[0]->y_stride); |
| #endif |
| |
| #if DUMP_OPFL |
| if (l == 0) { |
| int mvstr = wid + 2 * AVG_MF_BORDER; |
| DB_MV *mv_start = |
| buf_struct->init_mv_buf[l] + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| |
| warp_optical_flow_fwd(buf_struct->ref0_buf[l], buf_struct->ref1_buf[l], |
| mv_start, mvstr, buf_struct->ref0_warped_buf[l], |
| dst_pos, l, !USE_BLK_DERIVATIVE, blk_info); |
| |
| warp_optical_flow_back(buf_struct->ref1_buf[l], buf_struct->ref0_buf[l], |
| mv_start, mvstr, buf_struct->ref1_warped_buf[l], |
| 1 - dst_pos, l, !USE_BLK_DERIVATIVE, blk_info); |
| // int srcstr = buf_struct->ref0_warped_buf[0]->y_stride; |
| // for (int i = 0; i < height; i++) { |
| // for (int j = 0; j < width; j++) { |
| // if (left_mv[i/4*wid/4+j/4].as_int == INVALID_MV) { |
| // buf_struct->ref0_warped_buf[0]->y_buffer[i*srcstr+j] = 0; |
| // buf_struct->ref1_warped_buf[0]->y_buffer[i*srcstr+j] = 0; |
| // } |
| // } |
| // } |
| |
| write_image_opfl(buf_struct->ref0_buf[l], "init_dump.yuv"); |
| write_image_opfl(buf_struct->ref0_warped_buf[l], "init_dump.yuv"); |
| write_image_opfl(buf_struct->ref1_warped_buf[l], "init_dump.yuv"); |
| write_image_opfl(buf_struct->ref1_buf[l], "init_dump.yuv"); |
| } |
| #endif |
| } |
| |
| aom_free(left_mv); |
| aom_free(right_mv); |
| |
| // warped references according to initialization for future use |
| blk_info.starth = 0; |
| blk_info.startw = 0; |
| for (int l = 0; l < MAX_OPFL_LEVEL; l++) { |
| if (USE_BLK_DERIVATIVE && l != 0) { |
| continue; |
| } |
| wid = width >> l; |
| hgt = height >> l; |
| blk_info.blk_height = hgt; |
| blk_info.blk_width = wid; |
| |
| int mvstr = wid + 2 * AVG_MF_BORDER; |
| DB_MV *mv_start = |
| buf_struct->init_mv_buf[l] + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| |
| if (l == 0 || USE_BLK_DERIVATIVE) { |
| warp_optical_flow_fwd(buf_struct->ref0_buf[l], buf_struct->ref1_buf[l], |
| mv_start, mvstr, buf_struct->ref0_warped_buf[l], |
| dst_pos, l, !USE_BLK_DERIVATIVE, blk_info); |
| } else { |
| warp_optical_flow_fwd_bilinear(buf_struct->ref0_buf[l], |
| buf_struct->ref1_buf[l], mv_start, mvstr, |
| buf_struct->ref0_warped_buf[l], dst_pos, l, |
| !USE_BLK_DERIVATIVE, blk_info); |
| } |
| if (l == 0 || USE_BLK_DERIVATIVE) { |
| warp_optical_flow_back(buf_struct->ref1_buf[l], buf_struct->ref0_buf[l], |
| mv_start, mvstr, buf_struct->ref1_warped_buf[l], |
| 1 - dst_pos, l, !USE_BLK_DERIVATIVE, blk_info); |
| } else { |
| warp_optical_flow_back_bilinear( |
| buf_struct->ref1_buf[l], buf_struct->ref0_buf[l], mv_start, mvstr, |
| buf_struct->ref1_warped_buf[l], 1 - dst_pos, l, !USE_BLK_DERIVATIVE, |
| blk_info); |
| } |
| aom_yv12_extend_frame_borders_c(buf_struct->ref0_warped_buf[l]); |
| aom_yv12_extend_frame_borders_c(buf_struct->ref1_warped_buf[l]); |
| } |
| buf_struct->initialized = 1; |
| |
| clock_t endi = clock(); |
| timeinit += (double)(endi - starti) / CLOCKS_PER_SEC; |
| } |
| |
| /* |
| * Use optical flow method to interpolate a reference frame. |
| * |
| * Input: |
| * buf_struct: containing necessary buffers |
| * blk_info: information on the current block (size, location, etc.) |
| */ |
| void av1_optical_flow_get_ref(OPFL_BUFFER_STRUCT *buf_struct, |
| OPFL_BLK_INFO blk_info) { |
| int width, height, start_h, start_w, wid, hgt, sh, sw; |
| width = blk_info.blk_width; |
| height = blk_info.blk_height; |
| start_h = blk_info.starth; |
| start_w = blk_info.startw; |
| |
| // temporary buffers for MF median filtering |
| double mv_r[25], mv_c[25], left[25], right[25]; |
| |
| // initialize buffers |
| DB_MV **mf_last = buf_struct->mf_last; |
| DB_MV **mf_new = buf_struct->mf_new; |
| DB_MV **mf_med = buf_struct->mf_med; |
| int l = MAX_OPFL_LEVEL - 1; |
| wid = width >> l; |
| hgt = height >> l; |
| // the mv stride for init mv is based on the frame buffer width |
| int imvstr = buf_struct->ref0_buf[0]->y_width; |
| imvstr = (imvstr >> l) + 2 * AVG_MF_BORDER; |
| sh = start_h >> l; |
| sw = start_w >> l; |
| // mv stride here is based on block width at each level |
| int str = wid + 2 * AVG_MF_BORDER; |
| |
| // initialize with init mvs |
| for (int i = 0; i < hgt; i++) { |
| for (int j = 0; j < wid; j++) { |
| mf_last[l][(i + AVG_MF_BORDER) * str + j + AVG_MF_BORDER] = |
| buf_struct->init_mv_buf[l][(i + sh + AVG_MF_BORDER) * imvstr + j + |
| sw + AVG_MF_BORDER]; |
| } |
| } |
| |
| // estimate optical flow at each level |
| for (l = MAX_OPFL_LEVEL - 1; l >= 0; l--) { |
| wid = width >> l; |
| hgt = height >> l; |
| sh = start_h >> l; |
| sw = start_w >> l; |
| int mvstr = wid + 2 * AVG_MF_BORDER; |
| imvstr = buf_struct->ref0_buf[0]->y_width; |
| imvstr = (imvstr >> l) + 2 * AVG_MF_BORDER; |
| // use optical flow to refine our motion field |
| #if USE_BLK_DERIVATIVE |
| refine_motion_field(buf_struct, mf_last[l], mf_new[l], l, |
| buf_struct->dst_pos, 0, blk_info); |
| #else |
| refine_motion_field(buf_struct, mf_last[l], mf_new[l], l, |
| buf_struct->dst_pos, 1, blk_info); |
| #endif |
| DB_MV *mf_start_new = mf_new[l] + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| DB_MV *mf_start_med = mf_med[l] + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| |
| clock_t startm, endm; |
| startm = clock(); |
| DB_MV *initmv = buf_struct->init_mv_buf[l] + (sh + AVG_MF_BORDER) * imvstr + |
| sw + AVG_MF_BORDER; |
| for (int i = 0; i < hgt; i++) { |
| for (int j = 0; j < wid; j++) { |
| if (USE_MEDIAN_FILTER) { |
| int c = 0; |
| for (int h = -2; h < 3; h++) { |
| for (int w = -2; w < 3; w++) { |
| if (i + h < 0 || i + h >= hgt || j + w < 0 || j + w >= wid) { |
| // mv_r[c] = initmv[(i + h) * imvstr + j + w].row; |
| // mv_c[c] = initmv[(i + h) * imvstr + j + w].col; |
| continue; |
| } else { |
| mv_r[c] = mf_start_new[(i + h) * mvstr + j + w].row; |
| mv_c[c] = mf_start_new[(i + h) * mvstr + j + w].col; |
| } |
| c++; |
| } |
| } |
| mf_start_med[i * mvstr + j].row = |
| iter_median_double(mv_r, left, right, c, c / 2); |
| mf_start_med[i * mvstr + j].col = |
| iter_median_double(mv_c, left, right, c, c / 2); |
| } else { |
| mf_start_med[i * mvstr + j].row = mf_start_new[i * mvstr + j].row; |
| mf_start_med[i * mvstr + j].col = mf_start_new[i * mvstr + j].col; |
| } |
| } |
| } |
| endm = clock(); |
| timemed += (double)(endm - startm) / CLOCKS_PER_SEC; |
| |
| if (l != 0) { |
| // upscale mv to the next lower level |
| int mvstr_next = wid * 2 + 2 * AVG_MF_BORDER; |
| DB_MV *mf_start_next = |
| mf_last[l - 1] + AVG_MF_BORDER * mvstr_next + AVG_MF_BORDER; |
| upscale_mv_by_2(mf_start_med, wid, hgt, mvstr, mf_start_next, mvstr_next); |
| } else { |
| pad_motion_field_border(mf_start_med, wid, hgt, mvstr); |
| } |
| } |
| |
| // interpolate to get our reference frame |
| clock_t start, end; |
| start = clock(); |
| interp_optical_flow(buf_struct->ref0_buf[0], buf_struct->ref1_buf[0], |
| mf_med[0], buf_struct->dst_buf, buf_struct->dst_pos, |
| blk_info); |
| end = clock(); |
| timesub += (double)(end - start) / CLOCKS_PER_SEC; |
| |
| // pad border if at border of frame |
| int fstr = buf_struct->dst_buf->y_stride; |
| int fstruv = buf_struct->dst_buf->uv_stride; |
| int border = buf_struct->dst_buf->border; |
| int fheight = buf_struct->dst_buf->y_crop_height; |
| int fwidth = buf_struct->dst_buf->y_crop_width; |
| uint8_t *ydst = buf_struct->dst_buf->y_buffer; |
| uint8_t *udst = buf_struct->dst_buf->u_buffer; |
| uint8_t *vdst = buf_struct->dst_buf->v_buffer; |
| |
| int topb = 0, bottomb = 0, leftb = 0, rightb = 0; |
| if (blk_info.upbound) { |
| topb = border; |
| } |
| if (blk_info.lowerbound) { |
| bottomb = border + buf_struct->dst_buf->y_height - fheight; |
| } |
| if (blk_info.leftbound) { |
| leftb = border; |
| } |
| if (blk_info.rightbound) { |
| rightb = border + buf_struct->dst_buf->y_width - fwidth; |
| } |
| |
| extend_plane_opfl(ydst, fstr, fwidth, fheight, topb, leftb, bottomb, rightb); |
| extend_plane_opfl(udst, fstruv, fwidth / 2, fheight / 2, topb / 2, leftb / 2, |
| bottomb / 2, rightb / 2); |
| extend_plane_opfl(vdst, fstruv, fwidth / 2, fheight / 2, topb / 2, leftb / 2, |
| bottomb / 2, rightb / 2); |
| |
| // copy the final motion field to the frame level motion field buffer |
| int mvstr = width + 2 * AVG_MF_BORDER; |
| DB_MV *mf_start_med = mf_med[0] + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| int f_mvstr = buf_struct->mf_frame_stride; |
| DB_MV *mf_frame_start = buf_struct->mf_frame_start; |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| mf_frame_start[(i + start_h) * f_mvstr + j + start_w] = |
| mf_start_med[i * mvstr + j]; |
| } |
| } |
| // if at frame boundary, extend the mf buffer |
| opfl_extend_frame_mf(buf_struct, blk_info); |
| return; |
| } |
| |
| /* |
| * Allocate buffers in opfl |
| */ |
| void av1_opfl_alloc_buf(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct) { |
| // allocate yuv buffers for opfl use |
| int width = buf_struct->ref0_buf[0]->y_width, |
| height = buf_struct->ref0_buf[0]->y_height; |
| int wid, hgt; |
| for (int l = 0; l < MAX_OPFL_LEVEL; l++) { |
| wid = width >> l; |
| hgt = height >> l; |
| if (l == 0) { |
| buf_struct->ref0_warped_buf[l] = |
| aom_calloc(1, sizeof(YV12_BUFFER_CONFIG)); |
| buf_struct->ref1_warped_buf[l] = |
| aom_calloc(1, sizeof(YV12_BUFFER_CONFIG)); |
| aom_alloc_frame_buffer(buf_struct->ref0_warped_buf[l], wid, hgt, 1, 1, 0, |
| AOM_BORDER_IN_PIXELS, 0); |
| aom_alloc_frame_buffer(buf_struct->ref1_warped_buf[l], wid, hgt, 1, 1, 0, |
| AOM_BORDER_IN_PIXELS, 0); |
| } |
| #if !USE_BLK_DERIVATIVE |
| if (l != 0) { |
| buf_struct->ref0_buf[l] = aom_calloc(1, sizeof(YV12_BUFFER_CONFIG)); |
| buf_struct->ref1_buf[l] = aom_calloc(1, sizeof(YV12_BUFFER_CONFIG)); |
| aom_alloc_frame_buffer(buf_struct->ref0_buf[l], wid, hgt, 1, 1, 0, |
| AOM_BORDER_IN_PIXELS, 1); |
| aom_alloc_frame_buffer(buf_struct->ref1_buf[l], wid, hgt, 1, 1, 0, |
| AOM_BORDER_IN_PIXELS, 1); |
| buf_struct->ref0_warped_buf[l] = |
| aom_calloc(1, sizeof(YV12_BUFFER_CONFIG)); |
| buf_struct->ref1_warped_buf[l] = |
| aom_calloc(1, sizeof(YV12_BUFFER_CONFIG)); |
| aom_alloc_frame_buffer(buf_struct->ref0_warped_buf[l], wid, hgt, 1, 1, 0, |
| AOM_BORDER_IN_PIXELS, 1); |
| aom_alloc_frame_buffer(buf_struct->ref1_warped_buf[l], wid, hgt, 1, 1, 0, |
| AOM_BORDER_IN_PIXELS, 1); |
| } |
| #endif |
| } |
| #if !USE_BLK_DERIVATIVE |
| // scale the buffers for pyramid structure |
| // TODO(bohan): find out the necessary space for temp_buffer |
| uint8_t *temp_buffer = aom_calloc(width * 8, sizeof(uint8_t)); |
| for (int l = 1; l < MAX_OPFL_LEVEL; l++) { |
| aom_scale_frame(buf_struct->ref0_buf[l - 1], buf_struct->ref0_buf[l], |
| temp_buffer, 8, 2, 1, 2, 1, 0); |
| aom_scale_frame(buf_struct->ref1_buf[l - 1], buf_struct->ref1_buf[l], |
| temp_buffer, 8, 2, 1, 2, 1, 0); |
| aom_yv12_extend_frame_borders_c(buf_struct->ref0_buf[l]); |
| aom_yv12_extend_frame_borders_c(buf_struct->ref1_buf[l]); |
| } |
| aom_free(temp_buffer); |
| #endif |
| |
| // allocate initial motion field buffers |
| for (int l = 0; l < MAX_OPFL_LEVEL; l++) { |
| wid = width >> l; |
| hgt = height >> l; |
| buf_struct->init_mv_buf[l] = aom_calloc( |
| (wid + 2 * AVG_MF_BORDER) * (hgt + 2 * AVG_MF_BORDER), sizeof(DB_MV)); |
| #if OPFL_INIT_WT |
| buf_struct->init_mv_wts[l] = aom_calloc( |
| (wid + 2 * AVG_MF_BORDER) * (hgt + 2 * AVG_MF_BORDER), sizeof(double)); |
| #endif |
| } |
| |
| // allocate frame level motion buffer |
| buf_struct->mf_frame = aom_calloc( |
| (width + 2 * OPFL_MF_FRAME_BORDER) * (height + 2 * OPFL_MF_FRAME_BORDER), |
| sizeof(DB_MV)); |
| buf_struct->mf_frame_stride = width + 2 * OPFL_MF_FRAME_BORDER; |
| buf_struct->mf_frame_start = |
| buf_struct->mf_frame + |
| OPFL_MF_FRAME_BORDER * buf_struct->mf_frame_stride + OPFL_MF_FRAME_BORDER; |
| |
| // allocate motion field buffer for each pyramid level |
| int blkwid, blkhgt; |
| #if FRAME_LEVEL_OPFL |
| blkwid = width; |
| blkhgt = height; |
| #else |
| blkwid = OPFL_BLOCK_SIZE; |
| blkhgt = OPFL_BLOCK_SIZE; |
| #endif |
| for (int l = 0; l < MAX_OPFL_LEVEL; l++) { |
| wid = blkwid >> l; |
| hgt = blkhgt >> l; |
| buf_struct->mf_last[l] = aom_calloc( |
| (wid + 2 * AVG_MF_BORDER) * (hgt + 2 * AVG_MF_BORDER), sizeof(DB_MV)); |
| buf_struct->mf_new[l] = aom_calloc( |
| (wid + 2 * AVG_MF_BORDER) * (hgt + 2 * AVG_MF_BORDER), sizeof(DB_MV)); |
| buf_struct->mf_med[l] = aom_calloc( |
| (wid + 2 * AVG_MF_BORDER) * (hgt + 2 * AVG_MF_BORDER), sizeof(DB_MV)); |
| |
| // allocate buffers |
| if (l != 0 && USE_BLK_DERIVATIVE) continue; |
| buf_struct->buffer0[l] = aom_calloc(1, sizeof(YV12_BUFFER_CONFIG)); |
| buf_struct->buffer1[l] = aom_calloc(1, sizeof(YV12_BUFFER_CONFIG)); |
| aom_alloc_frame_buffer(buf_struct->buffer0[l], wid, hgt, 1, 1, 0, |
| AOM_BORDER_IN_PIXELS, 0); |
| aom_alloc_frame_buffer(buf_struct->buffer1[l], wid, hgt, 1, 1, 0, |
| AOM_BORDER_IN_PIXELS, 0); |
| } |
| |
| // allocate done flag buffer for each blockc |
| int wblk, hblk; |
| wblk = (width + blkwid - 1) / blkwid; |
| hblk = (height + blkhgt - 1) / blkhgt; |
| buf_struct->done_flag = aom_calloc(wblk * hblk, sizeof(int)); |
| |
| // allocate derivative buffers |
| // temp derivative buffer for each iter |
| buf_struct->Ex = aom_calloc(width * height, sizeof(double)); |
| buf_struct->Ey = aom_calloc(width * height, sizeof(double)); |
| buf_struct->Et = aom_calloc(width * height, sizeof(double)); |
| // initial derivative in the refs |
| for (int l = 0; l < MAX_OPFL_LEVEL; l++) { |
| wid = width >> l; |
| hgt = height >> l; |
| |
| buf_struct->ori_Ex0_buf[l] = aom_calloc(wid * hgt, sizeof(double)); |
| buf_struct->ori_Ey0_buf[l] = aom_calloc(wid * hgt, sizeof(double)); |
| buf_struct->ori_Et0_buf[l] = aom_calloc(wid * hgt, sizeof(double)); |
| |
| buf_struct->ori_Ex1_buf[l] = aom_calloc(wid * hgt, sizeof(double)); |
| buf_struct->ori_Ey1_buf[l] = aom_calloc(wid * hgt, sizeof(double)); |
| buf_struct->ori_Et1_buf[l] = aom_calloc(wid * hgt, sizeof(double)); |
| } |
| } |
| |
| /* |
| * Free the allocated buffers in buf_struct. |
| */ |
| void av1_opfl_free_buf(OPFL_BUFFER_STRUCT *buf_struct) { |
| if (buf_struct->initialized != 1) return; |
| |
| for (int l = 0; l < MAX_OPFL_LEVEL; l++) { |
| if (l == 0) { |
| aom_free_frame_buffer(buf_struct->ref0_warped_buf[l]); |
| aom_free_frame_buffer(buf_struct->ref1_warped_buf[l]); |
| aom_free(buf_struct->ref0_warped_buf[l]); |
| aom_free(buf_struct->ref1_warped_buf[l]); |
| } |
| #if !USE_BLK_DERIVATIVE |
| if (l != 0) { |
| aom_free_frame_buffer(buf_struct->ref0_buf[l]); |
| aom_free_frame_buffer(buf_struct->ref1_buf[l]); |
| aom_free(buf_struct->ref0_buf[l]); |
| aom_free(buf_struct->ref1_buf[l]); |
| aom_free_frame_buffer(buf_struct->ref0_warped_buf[l]); |
| aom_free_frame_buffer(buf_struct->ref1_warped_buf[l]); |
| aom_free(buf_struct->ref0_warped_buf[l]); |
| aom_free(buf_struct->ref1_warped_buf[l]); |
| } |
| #endif |
| } |
| for (int l = 0; l < MAX_OPFL_LEVEL; l++) { |
| aom_free(buf_struct->init_mv_buf[l]); |
| #if OPFL_INIT_WT |
| aom_free(buf_struct->init_mv_wts[l]); |
| #endif |
| } |
| aom_free(buf_struct->mf_frame); |
| buf_struct->mf_frame_start = NULL; |
| aom_free(buf_struct->Ex); |
| aom_free(buf_struct->Ey); |
| aom_free(buf_struct->Et); |
| for (int l = 0; l < MAX_OPFL_LEVEL; l++) { |
| aom_free(buf_struct->mf_last[l]); |
| aom_free(buf_struct->mf_new[l]); |
| aom_free(buf_struct->mf_med[l]); |
| |
| aom_free(buf_struct->ori_Ex0_buf[l]); |
| aom_free(buf_struct->ori_Ey0_buf[l]); |
| aom_free(buf_struct->ori_Et0_buf[l]); |
| |
| aom_free(buf_struct->ori_Ex1_buf[l]); |
| aom_free(buf_struct->ori_Ey1_buf[l]); |
| aom_free(buf_struct->ori_Et1_buf[l]); |
| |
| if (l != 0 && USE_BLK_DERIVATIVE) continue; |
| aom_free_frame_buffer(buf_struct->buffer0[l]); |
| aom_free_frame_buffer(buf_struct->buffer1[l]); |
| aom_free(buf_struct->buffer0[l]); |
| aom_free(buf_struct->buffer1[l]); |
| } |
| aom_free(buf_struct->done_flag); |
| endt = clock(); |
| timetotal += (double)(endt - startt) / CLOCKS_PER_SEC; |
| #if OPFL_OUTPUT_TIME |
| // TODO(bohan): output time usage for debug for now. |
| printf( |
| "\ninit time: %.4f, der time: %.4f, sub time: %.4f, median time: %.4f, " |
| "solve time: %.4f, totaltime: %.4f\n", |
| timeinit, timeder, timesub, timemed, timesolve, timetotal); |
| fflush(stdout); |
| #endif |
| #if DUMP_OPFL |
| // TODO(bohan): dump the frames for now for debug |
| char filename[20] = "of_dump.yuv"; |
| write_image_opfl(buf_struct->ref0_buf[0], filename); |
| write_image_opfl(buf_struct->dst_buf, filename); |
| write_image_opfl(buf_struct->ref1_buf[0], filename); |
| char idxfilename[20] = "of_data.txt"; |
| FILE *f_idx = fopen(idxfilename, "a"); |
| fprintf(f_idx, "%d %d %d\n", buf_struct->left_offset, buf_struct->cur_offset, |
| buf_struct->right_offset); |
| fclose(f_idx); |
| #endif |
| } |
| |
| /* |
| * get how many motion vector initializations exist between two refs |
| */ |
| int get_num_MV_between_refs(AV1_COMMON *cm, int left_idx, int left_offset, |
| int right_idx, int right_offset) { |
| int totalNum = 0; |
| // process left ref |
| MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[left_idx].mvs; |
| int lst_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst_frame_offset; |
| int alt_frame_idx = cm->buffer_pool->frame_bufs[left_idx].alt_frame_offset; |
| int gld_frame_idx = cm->buffer_pool->frame_bufs[left_idx].gld_frame_offset; |
| #if CONFIG_EXT_REFS |
| int lst2_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst2_frame_offset; |
| int lst3_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst3_frame_offset; |
| int bwd_frame_idx = cm->buffer_pool->frame_bufs[left_idx].bwd_frame_offset; |
| #endif |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j]; |
| MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0], |
| mv_ref->ref_frame[1] }; |
| if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) { |
| ref_frame[0] = mv_ref->opfl_ref_frame[0]; |
| ref_frame[1] = mv_ref->opfl_ref_frame[1]; |
| } |
| |
| for (int r = 0; r < 2; r++) { |
| int ref_offset; |
| switch (ref_frame[r]) { |
| // case LAST_FRAME: ref_offset = lst_frame_idx; break; |
| case ALTREF_FRAME: |
| ref_offset = alt_frame_idx; |
| break; |
| // case GOLDEN_FRAME: ref_offset = gld_frame_idx; break; |
| #if CONFIG_EXT_REFS |
| // case LAST2_FRAME: ref_offset = lst2_frame_idx; break; |
| // case LAST3_FRAME: ref_offset = lst3_frame_idx; break; |
| case BWDREF_FRAME: ref_offset = bwd_frame_idx; break; |
| #endif |
| default: ref_offset = -2; |
| } |
| if (ref_offset == right_offset && ref_offset >= 0) { |
| totalNum++; |
| } |
| } |
| } |
| } |
| // process right ref |
| mv_ref_base = cm->buffer_pool->frame_bufs[right_idx].mvs; |
| lst_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst_frame_offset; |
| alt_frame_idx = cm->buffer_pool->frame_bufs[right_idx].alt_frame_offset; |
| gld_frame_idx = cm->buffer_pool->frame_bufs[right_idx].gld_frame_offset; |
| #if CONFIG_EXT_REFS |
| lst2_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst2_frame_offset; |
| lst3_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst3_frame_offset; |
| bwd_frame_idx = cm->buffer_pool->frame_bufs[right_idx].bwd_frame_offset; |
| #endif |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j]; |
| MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0], |
| mv_ref->ref_frame[1] }; |
| |
| if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) { |
| ref_frame[0] = mv_ref->opfl_ref_frame[0]; |
| ref_frame[1] = mv_ref->opfl_ref_frame[1]; |
| } |
| |
| for (int r = 0; r < 2; r++) { |
| int ref_offset; |
| switch (ref_frame[r]) { |
| case LAST_FRAME: ref_offset = lst_frame_idx; break; |
| // case ALTREF_FRAME: ref_offset = alt_frame_idx; break; |
| case GOLDEN_FRAME: ref_offset = gld_frame_idx; break; |
| #if CONFIG_EXT_REFS |
| case LAST2_FRAME: ref_offset = lst2_frame_idx; break; |
| case LAST3_FRAME: |
| ref_offset = lst3_frame_idx; |
| break; |
| // case BWDREF_FRAME: ref_offset = bwd_frame_idx; break; |
| #endif |
| default: ref_offset = -2; |
| } |
| // only initialize with the same refs! |
| if (ref_offset == left_offset && ref_offset >= 0) { |
| // calculate mv to right |
| totalNum++; |
| } |
| } |
| } |
| } |
| return totalNum; |
| } |
| /* |
| * find and sort the best ref pairs by distance |
| */ |
| void opfl_select_best_ref_pairs(AV1_COMMON *cm, int *left_idx, int *left_offset, |
| int *left_chosen, int *right_idx, |
| int *right_offset, int *right_chosen, |
| int left_most_idx, int right_most_idx) { |
| int cur_offset = cm->frame_offset; |
| |
| int alt_buf_idx = cm->frame_refs[ALTREF_FRAME - LAST_FRAME].idx; |
| int lst_buf_idx = cm->frame_refs[LAST_FRAME - LAST_FRAME].idx; |
| int gld_buf_idx = cm->frame_refs[GOLDEN_FRAME - LAST_FRAME].idx; |
| |
| #if CONFIG_EXT_REFS |
| int lst2_buf_idx = cm->frame_refs[LAST2_FRAME - LAST_FRAME].idx; |
| int lst3_buf_idx = cm->frame_refs[LAST3_FRAME - LAST_FRAME].idx; |
| int bwd_buf_idx = cm->frame_refs[BWDREF_FRAME - LAST_FRAME].idx; |
| #endif |
| |
| int left_cand_idx[INTER_REFS_PER_FRAME], |
| left_cand_offset[INTER_REFS_PER_FRAME]; |
| int right_cand_idx[INTER_REFS_PER_FRAME], |
| right_cand_offset[INTER_REFS_PER_FRAME]; |
| for (int k = 0; k < INTER_REFS_PER_FRAME; k++) { |
| left_cand_idx[k] = -1; |
| right_cand_idx[k] = -1; |
| } |
| int numMV_cand[INTER_REFS_PER_FRAME][INTER_REFS_PER_FRAME]; |
| |
| int this_offset; |
| if (alt_buf_idx >= 0) { |
| this_offset = cm->cur_frame->alt_frame_offset; |
| if (this_offset > cur_offset) { |
| right_cand_idx[ALTREF_FRAME - LAST_FRAME] = alt_buf_idx; |
| right_cand_offset[ALTREF_FRAME - LAST_FRAME] = this_offset; |
| } else if (this_offset < cur_offset) { |
| left_cand_idx[ALTREF_FRAME - LAST_FRAME] = alt_buf_idx; |
| left_cand_offset[ALTREF_FRAME - LAST_FRAME] = this_offset; |
| } |
| } |
| if (lst_buf_idx >= 0) { |
| this_offset = cm->cur_frame->lst_frame_offset; |
| if (this_offset > cur_offset) { |
| right_cand_idx[LAST_FRAME - LAST_FRAME] = lst_buf_idx; |
| right_cand_offset[LAST_FRAME - LAST_FRAME] = this_offset; |
| } else if (this_offset < cur_offset) { |
| left_cand_idx[LAST_FRAME - LAST_FRAME] = lst_buf_idx; |
| left_cand_offset[LAST_FRAME - LAST_FRAME] = this_offset; |
| } |
| } |
| if (gld_buf_idx >= 0) { |
| this_offset = cm->cur_frame->gld_frame_offset; |
| if (this_offset > cur_offset) { |
| right_cand_idx[GOLDEN_FRAME - LAST_FRAME] = gld_buf_idx; |
| right_cand_offset[GOLDEN_FRAME - LAST_FRAME] = this_offset; |
| } else if (this_offset < cur_offset) { |
| left_cand_idx[GOLDEN_FRAME - LAST_FRAME] = gld_buf_idx; |
| left_cand_offset[GOLDEN_FRAME - LAST_FRAME] = this_offset; |
| } |
| } |
| #if CONFIG_EXT_REFS |
| if (lst2_buf_idx >= 0) { |
| this_offset = cm->cur_frame->lst2_frame_offset; |
| if (this_offset > cur_offset) { |
| right_cand_idx[LAST2_FRAME - LAST_FRAME] = lst2_buf_idx; |
| right_cand_offset[LAST2_FRAME - LAST_FRAME] = this_offset; |
| } else if (this_offset < cur_offset) { |
| left_cand_idx[LAST2_FRAME - LAST_FRAME] = lst2_buf_idx; |
| left_cand_offset[LAST2_FRAME - LAST_FRAME] = this_offset; |
| } |
| } |
| if (lst3_buf_idx >= 0) { |
| this_offset = cm->cur_frame->lst3_frame_offset; |
| if (this_offset > cur_offset) { |
| right_cand_idx[LAST3_FRAME - LAST_FRAME] = lst3_buf_idx; |
| right_cand_offset[LAST3_FRAME - LAST_FRAME] = this_offset; |
| } else if (this_offset < cur_offset) { |
| left_cand_idx[LAST3_FRAME - LAST_FRAME] = lst3_buf_idx; |
| left_cand_offset[LAST3_FRAME - LAST_FRAME] = this_offset; |
| } |
| } |
| if (bwd_buf_idx >= 0) { |
| this_offset = cm->cur_frame->bwd_frame_offset; |
| if (this_offset > cur_offset) { |
| right_cand_idx[BWDREF_FRAME - LAST_FRAME] = bwd_buf_idx; |
| right_cand_offset[BWDREF_FRAME - LAST_FRAME] = this_offset; |
| } else if (this_offset < cur_offset) { |
| left_cand_idx[BWDREF_FRAME - LAST_FRAME] = bwd_buf_idx; |
| left_cand_offset[BWDREF_FRAME - LAST_FRAME] = this_offset; |
| } |
| } |
| #endif |
| // got all candidates, now calculate the number of mvs available |
| // for each pair |
| for (int ll = LAST_FRAME - LAST_FRAME; ll < OPFL_FRAME - LAST_FRAME; ll++) { |
| for (int rr = LAST_FRAME - LAST_FRAME; rr < OPFL_FRAME - LAST_FRAME; rr++) { |
| numMV_cand[ll][rr] = 0; |
| if (ll == rr) continue; |
| if (left_cand_idx[ll] < 0 || right_cand_idx[rr] < 0) continue; |
| if (left_cand_idx[ll] == left_most_idx && |
| right_cand_idx[rr] == right_most_idx) |
| continue; |
| numMV_cand[ll][rr] = |
| get_num_MV_between_refs(cm, left_cand_idx[ll], left_cand_offset[ll], |
| right_cand_idx[rr], right_cand_offset[rr]); |
| } |
| } |
| // find the ones with most available mvs |
| int max_num_MV[MAX_NUM_REF_PAIR]; |
| for (int k = 0; k < MAX_NUM_REF_PAIR; k++) { |
| max_num_MV[k] = 0; |
| } |
| for (int k = 0; k < MAX_NUM_REF_PAIR; k++) { |
| left_idx[k] = -1; |
| right_idx[k] = -1; |
| for (int ll = LAST_FRAME - LAST_FRAME; ll < OPFL_FRAME - LAST_FRAME; ll++) { |
| for (int rr = LAST_FRAME - LAST_FRAME; rr < OPFL_FRAME - LAST_FRAME; |
| rr++) { |
| if (numMV_cand[ll][rr] == 0) continue; |
| int skip = 0; |
| for (int kk = 0; kk < k; kk++) { |
| if (left_cand_idx[ll] == left_idx[kk] && |
| right_cand_idx[rr] == right_idx[kk]) { |
| skip = 1; |
| break; |
| } |
| } |
| if (skip > 0) continue; |
| if (numMV_cand[ll][rr] > max_num_MV[k]) { |
| max_num_MV[k] = numMV_cand[ll][rr]; |
| left_idx[k] = left_cand_idx[ll]; |
| left_offset[k] = left_cand_offset[ll]; |
| left_chosen[k] = ll + LAST_FRAME; |
| right_idx[k] = right_cand_idx[rr]; |
| right_offset[k] = right_cand_offset[rr]; |
| right_chosen[k] = rr + LAST_FRAME; |
| } |
| } |
| } |
| // if (left_idx[k] < 0) |
| // break; |
| } |
| int total_dist[MAX_NUM_REF_PAIR]; |
| for (int k = 0; k < MAX_NUM_REF_PAIR; k++) { |
| if (left_idx[k] >= 0) |
| total_dist[k] = -left_offset[k] + right_offset[k]; |
| else |
| total_dist[k] = -1; |
| } |
| // bubble sort by distance to cur_frame (prefer shorter) |
| for (int i = 1; i < MAX_NUM_REF_PAIR; i++) { |
| if (total_dist[i] < 0) break; |
| for (int j = i - 1; j >= 0; j--) { |
| if (total_dist[j + 1] < total_dist[j]) { |
| // swap j+1 and j |
| int temp; |
| temp = left_idx[j]; |
| left_idx[j] = left_idx[j + 1]; |
| left_idx[j + 1] = temp; |
| temp = left_offset[j]; |
| left_offset[j] = left_offset[j + 1]; |
| left_offset[j + 1] = temp; |
| temp = left_chosen[j]; |
| left_chosen[j] = left_chosen[j + 1]; |
| left_chosen[j + 1] = temp; |
| temp = right_idx[j]; |
| right_idx[j] = right_idx[j + 1]; |
| right_idx[j + 1] = temp; |
| temp = right_offset[j]; |
| right_offset[j] = right_offset[j + 1]; |
| right_offset[j + 1] = temp; |
| temp = right_chosen[j]; |
| right_chosen[j] = right_chosen[j + 1]; |
| right_chosen[j + 1] = temp; |
| temp = total_dist[j]; |
| total_dist[j] = total_dist[j + 1]; |
| total_dist[j + 1] = temp; |
| temp = max_num_MV[j]; |
| max_num_MV[j] = max_num_MV[j + 1]; |
| max_num_MV[j + 1] = temp; |
| } else { |
| break; |
| } |
| } |
| } |
| // fix when ref_frames are pointing to the same idx |
| // TODO(bohan): any other possiblities? |
| for (int i = 0; i < MAX_NUM_REF_PAIR; i++) { |
| if (left_idx[i] < 0) continue; |
| if (right_chosen[i] == BWDREF_FRAME && bwd_buf_idx == alt_buf_idx) { |
| right_chosen[i] = ALTREF_FRAME; |
| } |
| // if (left_chosen[i] == LAST_FRAME && lst_buf_idx == gld_buf_idx) { |
| // left_chosen[i] = GOLDEN_FRAME; |
| // } |
| } |
| } |
| /* |
| * find the closest two-sided refs |
| */ |
| void opfl_get_closest_refs(AV1_COMMON *cm, int *left_idx_ptr, |
| int *left_offset_ptr, int *left_chosen_ptr, |
| int *right_idx_ptr, int *right_offset_ptr, |
| int *right_chosen_ptr) { |
| int left_idx = -1, left_offset = -1, right_idx = -1, right_offset = -1; |
| int left_chosen = NONE_FRAME, right_chosen = NONE_FRAME; |
| |
| int alt_buf_idx = cm->frame_refs[ALTREF_FRAME - LAST_FRAME].idx; |
| int lst_buf_idx = cm->frame_refs[LAST_FRAME - LAST_FRAME].idx; |
| int gld_buf_idx = cm->frame_refs[GOLDEN_FRAME - LAST_FRAME].idx; |
| |
| #if CONFIG_EXT_REFS |
| int lst2_buf_idx = cm->frame_refs[LAST2_FRAME - LAST_FRAME].idx; |
| int lst3_buf_idx = cm->frame_refs[LAST3_FRAME - LAST_FRAME].idx; |
| int bwd_buf_idx = cm->frame_refs[BWDREF_FRAME - LAST_FRAME].idx; |
| #endif |
| |
| int cur_offset = cm->frame_offset; |
| int this_offset; |
| if (alt_buf_idx >= 0) { |
| this_offset = cm->cur_frame->alt_frame_offset; |
| if (this_offset > cur_offset && |
| (right_offset < 0 || this_offset < right_offset)) { |
| right_idx = alt_buf_idx; |
| right_offset = this_offset; |
| right_chosen = ALTREF_FRAME; |
| } else if (this_offset < cur_offset && this_offset > left_offset) { |
| left_idx = alt_buf_idx; |
| left_offset = this_offset; |
| left_chosen = ALTREF_FRAME; |
| } |
| } |
| if (lst_buf_idx >= 0) { |
| this_offset = cm->cur_frame->lst_frame_offset; |
| if (this_offset > cur_offset && |
| (right_offset < 0 || this_offset < right_offset)) { |
| right_idx = lst_buf_idx; |
| right_offset = this_offset; |
| right_chosen = LAST_FRAME; |
| } else if (this_offset < cur_offset && this_offset > left_offset) { |
| left_idx = lst_buf_idx; |
| left_offset = this_offset; |
| left_chosen = LAST_FRAME; |
| } |
| } |
| if (gld_buf_idx >= 0) { |
| this_offset = cm->cur_frame->gld_frame_offset; |
| if (this_offset > cur_offset && |
| (right_offset < 0 || this_offset < right_offset)) { |
| right_idx = gld_buf_idx; |
| right_offset = this_offset; |
| right_chosen = GOLDEN_FRAME; |
| } else if (this_offset < cur_offset && this_offset > left_offset) { |
| left_idx = gld_buf_idx; |
| left_offset = this_offset; |
| left_chosen = GOLDEN_FRAME; |
| } |
| } |
| #if CONFIG_EXT_REFS |
| if (lst2_buf_idx >= 0) { |
| this_offset = cm->cur_frame->lst2_frame_offset; |
| if (this_offset > cur_offset && |
| (right_offset < 0 || this_offset < right_offset)) { |
| right_idx = lst2_buf_idx; |
| right_offset = this_offset; |
| right_chosen = LAST2_FRAME; |
| } else if (this_offset < cur_offset && this_offset > left_offset) { |
| left_idx = lst2_buf_idx; |
| left_offset = this_offset; |
| left_chosen = LAST2_FRAME; |
| } |
| } |
| if (lst3_buf_idx >= 0) { |
| this_offset = cm->cur_frame->lst3_frame_offset; |
| if (this_offset > cur_offset && |
| (right_offset < 0 || this_offset < right_offset)) { |
| right_idx = lst3_buf_idx; |
| right_offset = this_offset; |
| right_chosen = LAST3_FRAME; |
| } else if (this_offset < cur_offset && this_offset > left_offset) { |
| left_idx = lst3_buf_idx; |
| left_offset = this_offset; |
| left_chosen = LAST3_FRAME; |
| } |
| } |
| if (bwd_buf_idx >= 0) { |
| this_offset = cm->cur_frame->bwd_frame_offset; |
| if (this_offset > cur_offset && |
| (right_offset < 0 || this_offset < right_offset)) { |
| right_idx = bwd_buf_idx; |
| right_offset = this_offset; |
| right_chosen = BWDREF_FRAME; |
| } else if (this_offset < cur_offset && this_offset > left_offset) { |
| left_idx = bwd_buf_idx; |
| left_offset = this_offset; |
| left_chosen = BWDREF_FRAME; |
| } |
| } |
| #endif |
| *left_offset_ptr = left_offset; |
| *left_chosen_ptr = left_chosen; |
| *left_idx_ptr = left_idx; |
| *right_offset_ptr = right_offset; |
| *right_chosen_ptr = right_chosen; |
| *right_idx_ptr = right_idx; |
| } |
| /* |
| * find the existing initialization motion vectors between any two reference |
| * frames |
| */ |
| void opfl_set_init_motion(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct, |
| int left_idx, int right_idx, int left_offset, |
| int right_offset, int_mv *left_mv, int_mv *right_mv) { |
| int cur_offset = buf_struct->cur_offset; |
| double dst_pos = ((double)(cur_offset - left_offset)) / |
| ((double)(right_offset - left_offset)); |
| |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| left_mv[i * cm->mi_cols + j].as_int = INVALID_MV; |
| right_mv[i * cm->mi_cols + j].as_int = INVALID_MV; |
| } |
| } |
| // process left ref |
| MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[left_idx].mvs; |
| int lst_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst_frame_offset; |
| int alt_frame_idx = cm->buffer_pool->frame_bufs[left_idx].alt_frame_offset; |
| int gld_frame_idx = cm->buffer_pool->frame_bufs[left_idx].gld_frame_offset; |
| #if CONFIG_EXT_REFS |
| int lst2_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst2_frame_offset; |
| int lst3_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst3_frame_offset; |
| int bwd_frame_idx = cm->buffer_pool->frame_bufs[left_idx].bwd_frame_offset; |
| #endif |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j]; |
| |
| MV this_mvs[2] = { mv_ref->mv[0].as_mv, mv_ref->mv[1].as_mv }; |
| MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0], |
| mv_ref->ref_frame[1] }; |
| #if OPFL_TPL |
| if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) { |
| this_mvs[0] = mv_ref->opfl_ref_mvs[0].as_mv; |
| this_mvs[1] = mv_ref->opfl_ref_mvs[1].as_mv; |
| ref_frame[0] = mv_ref->opfl_ref_frame[0]; |
| ref_frame[1] = mv_ref->opfl_ref_frame[1]; |
| } |
| #endif |
| |
| for (int r = 0; r < 2; r++) { |
| if (ref_frame[r] == NONE_FRAME || ref_frame[r] == OPFL_FRAME) continue; |
| int ref_offset; |
| switch (ref_frame[r]) { |
| case LAST_FRAME: ref_offset = lst_frame_idx; break; |
| case ALTREF_FRAME: ref_offset = alt_frame_idx; break; |
| case GOLDEN_FRAME: ref_offset = gld_frame_idx; break; |
| #if CONFIG_EXT_REFS |
| case LAST2_FRAME: ref_offset = lst2_frame_idx; break; |
| case LAST3_FRAME: ref_offset = lst3_frame_idx; break; |
| case BWDREF_FRAME: ref_offset = bwd_frame_idx; break; |
| #endif |
| default: ref_offset = -1; |
| } |
| // only initialize with the same refs! |
| if (ref_offset == right_offset) { |
| // calculate mv to left |
| int_mv temp_mv; |
| temp_mv.as_mv.row = |
| opfl_round_double_2_int(-dst_pos * this_mvs[r].row); |
| temp_mv.as_mv.col = |
| opfl_round_double_2_int(-dst_pos * this_mvs[r].col); |
| int mi_r = |
| i - opfl_round_double_2_int(temp_mv.as_mv.row / (8.0 * 4.0)); |
| int mi_c = |
| j - opfl_round_double_2_int(temp_mv.as_mv.col / (8.0 * 4.0)); |
| if (mi_r < 0 || mi_r >= cm->mi_rows || mi_c < 0 || |
| mi_c >= cm->mi_cols) |
| continue; |
| left_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int; |
| // calculate mv to right |
| temp_mv.as_mv.row = |
| opfl_round_double_2_int((1 - dst_pos) * this_mvs[r].row); |
| temp_mv.as_mv.col = |
| opfl_round_double_2_int((1 - dst_pos) * this_mvs[r].col); |
| right_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int; |
| } |
| } |
| } |
| } |
| // process right ref |
| mv_ref_base = cm->buffer_pool->frame_bufs[right_idx].mvs; |
| lst_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst_frame_offset; |
| alt_frame_idx = cm->buffer_pool->frame_bufs[right_idx].alt_frame_offset; |
| gld_frame_idx = cm->buffer_pool->frame_bufs[right_idx].gld_frame_offset; |
| #if CONFIG_EXT_REFS |
| lst2_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst2_frame_offset; |
| lst3_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst3_frame_offset; |
| bwd_frame_idx = cm->buffer_pool->frame_bufs[right_idx].bwd_frame_offset; |
| #endif |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j]; |
| MV this_mvs[2] = { mv_ref->mv[0].as_mv, mv_ref->mv[1].as_mv }; |
| MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0], |
| mv_ref->ref_frame[1] }; |
| |
| if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) { |
| this_mvs[0] = mv_ref->opfl_ref_mvs[0].as_mv; |
| this_mvs[1] = mv_ref->opfl_ref_mvs[1].as_mv; |
| ref_frame[0] = mv_ref->opfl_ref_frame[0]; |
| ref_frame[1] = mv_ref->opfl_ref_frame[1]; |
| } |
| |
| for (int r = 0; r < 2; r++) { |
| if (ref_frame[r] == NONE_FRAME || ref_frame[r] == OPFL_FRAME) continue; |
| int ref_offset; |
| switch (ref_frame[r]) { |
| case LAST_FRAME: ref_offset = lst_frame_idx; break; |
| case ALTREF_FRAME: ref_offset = alt_frame_idx; break; |
| case GOLDEN_FRAME: ref_offset = gld_frame_idx; break; |
| #if CONFIG_EXT_REFS |
| case LAST2_FRAME: ref_offset = lst2_frame_idx; break; |
| case LAST3_FRAME: ref_offset = lst3_frame_idx; break; |
| case BWDREF_FRAME: ref_offset = bwd_frame_idx; break; |
| #endif |
| default: ref_offset = -1; |
| } |
| // only initialize with the same refs! |
| if (ref_offset == left_offset) { |
| // calculate mv to right |
| int_mv temp_mv; |
| temp_mv.as_mv.row = |
| opfl_round_double_2_int(-(1 - dst_pos) * this_mvs[r].row); |
| temp_mv.as_mv.col = |
| opfl_round_double_2_int(-(1 - dst_pos) * this_mvs[r].col); |
| int mi_r = |
| i - opfl_round_double_2_int(temp_mv.as_mv.row / (8.0 * 4.0)); |
| int mi_c = |
| j - opfl_round_double_2_int(temp_mv.as_mv.col / (8.0 * 4.0)); |
| if (mi_r < 0 || mi_r >= cm->mi_rows || mi_c < 0 || |
| mi_c >= cm->mi_cols) |
| continue; |
| right_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int; |
| // calculate mv to left |
| temp_mv.as_mv.row = |
| opfl_round_double_2_int(dst_pos * this_mvs[r].row); |
| temp_mv.as_mv.col = |
| opfl_round_double_2_int(dst_pos * this_mvs[r].col); |
| left_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int; |
| } |
| } |
| } |
| } |
| } |
| |
| /* |
| * This function finds the motion vectors between left_idx and right_idx, |
| * and forms the motions pointing from left to right |
| * unlike opfl_set_init_motion, this function does not scale and set |
| * motion vectors according to dst_pos. |
| */ |
| void opfl_find_init_motion(AV1_COMMON *cm, int left_idx, int right_idx, |
| int left_offset, int right_offset, int_mv *left_mv) { |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| left_mv[i * cm->mi_cols + j].as_int = INVALID_MV; |
| } |
| } |
| // process right to left mv |
| MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[right_idx].mvs; |
| int lst_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst_frame_offset; |
| int alt_frame_idx = cm->buffer_pool->frame_bufs[right_idx].alt_frame_offset; |
| int gld_frame_idx = cm->buffer_pool->frame_bufs[right_idx].gld_frame_offset; |
| #if CONFIG_EXT_REFS |
| int lst2_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst2_frame_offset; |
| int lst3_frame_idx = cm->buffer_pool->frame_bufs[right_idx].lst3_frame_offset; |
| int bwd_frame_idx = cm->buffer_pool->frame_bufs[right_idx].bwd_frame_offset; |
| #endif |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j]; |
| MV this_mvs[2] = { mv_ref->mv[0].as_mv, mv_ref->mv[1].as_mv }; |
| MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0], |
| mv_ref->ref_frame[1] }; |
| #if OPFL_TPL |
| if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) { |
| this_mvs[0] = mv_ref->opfl_ref_mvs[0].as_mv; |
| this_mvs[1] = mv_ref->opfl_ref_mvs[1].as_mv; |
| ref_frame[0] = mv_ref->opfl_ref_frame[0]; |
| ref_frame[1] = mv_ref->opfl_ref_frame[1]; |
| } |
| #endif |
| for (int r = 0; r < 2; r++) { |
| int ref_offset; |
| switch (ref_frame[r]) { |
| case LAST_FRAME: ref_offset = lst_frame_idx; break; |
| case ALTREF_FRAME: ref_offset = alt_frame_idx; break; |
| case GOLDEN_FRAME: ref_offset = gld_frame_idx; break; |
| #if CONFIG_EXT_REFS |
| case LAST2_FRAME: ref_offset = lst2_frame_idx; break; |
| case LAST3_FRAME: ref_offset = lst3_frame_idx; break; |
| case BWDREF_FRAME: ref_offset = bwd_frame_idx; break; |
| #endif |
| default: ref_offset = -1; |
| } |
| // only initialize with the same refs! |
| if (ref_offset == left_offset) { |
| // calculate mv to right |
| int_mv temp_mv; |
| temp_mv.as_mv.row = -this_mvs[r].row; |
| temp_mv.as_mv.col = -this_mvs[r].col; |
| int mi_r = i - (temp_mv.as_mv.row >> (3 + MI_SIZE_LOG2)); |
| int mi_c = j - (temp_mv.as_mv.col >> (3 + MI_SIZE_LOG2)); |
| if (mi_r < 0 || mi_r >= cm->mi_rows || mi_c < 0 || |
| mi_c >= cm->mi_cols) |
| continue; |
| left_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int; |
| } |
| } |
| } |
| } |
| // process left to right mv |
| mv_ref_base = cm->buffer_pool->frame_bufs[left_idx].mvs; |
| lst_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst_frame_offset; |
| alt_frame_idx = cm->buffer_pool->frame_bufs[left_idx].alt_frame_offset; |
| gld_frame_idx = cm->buffer_pool->frame_bufs[left_idx].gld_frame_offset; |
| #if CONFIG_EXT_REFS |
| lst2_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst2_frame_offset; |
| lst3_frame_idx = cm->buffer_pool->frame_bufs[left_idx].lst3_frame_offset; |
| bwd_frame_idx = cm->buffer_pool->frame_bufs[left_idx].bwd_frame_offset; |
| #endif |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| MV_REF *mv_ref = &mv_ref_base[i * cm->mi_cols + j]; |
| |
| MV this_mvs[2] = { mv_ref->mv[0].as_mv, mv_ref->mv[1].as_mv }; |
| MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0], |
| mv_ref->ref_frame[1] }; |
| #if OPFL_TPL |
| if (ref_frame[0] == OPFL_FRAME && ref_frame[1] == NONE_FRAME) { |
| this_mvs[0] = mv_ref->opfl_ref_mvs[0].as_mv; |
| this_mvs[1] = mv_ref->opfl_ref_mvs[1].as_mv; |
| ref_frame[0] = mv_ref->opfl_ref_frame[0]; |
| ref_frame[1] = mv_ref->opfl_ref_frame[1]; |
| } |
| #endif |
| for (int r = 0; r < 2; r++) { |
| int ref_offset; |
| switch (ref_frame[r]) { |
| case LAST_FRAME: ref_offset = lst_frame_idx; break; |
| case ALTREF_FRAME: ref_offset = alt_frame_idx; break; |
| case GOLDEN_FRAME: ref_offset = gld_frame_idx; break; |
| #if CONFIG_EXT_REFS |
| case LAST2_FRAME: ref_offset = lst2_frame_idx; break; |
| case LAST3_FRAME: ref_offset = lst3_frame_idx; break; |
| case BWDREF_FRAME: ref_offset = bwd_frame_idx; break; |
| #endif |
| default: ref_offset = -1; |
| } |
| if (ref_offset == right_offset) { |
| // calculate mv to right |
| int_mv temp_mv; |
| temp_mv.as_mv.row = this_mvs[r].row; |
| temp_mv.as_mv.col = this_mvs[r].col; |
| left_mv[i * cm->mi_cols + j].as_int = temp_mv.as_int; |
| } |
| } |
| } |
| } |
| } |
| /* |
| * This function derives initialization motion vector, for example: |
| * assume we want mvs between ref1 and ref2, but there exist mvscale |
| * mv1 pointing from ref1 to ref3, and mv2 pointing from ref3 to ref2, |
| * then the derived mv = mv1 + mv2 |
| */ |
| void opfl_derive_init_mv(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct, |
| int left_idx, int right_idx, int left_offset, |
| int right_offset, int_mv *left_mv, int_mv *right_mv) { |
| int alt_buf_idx = cm->frame_refs[ALTREF_FRAME - LAST_FRAME].idx; |
| int lst_buf_idx = cm->frame_refs[LAST_FRAME - LAST_FRAME].idx; |
| int gld_buf_idx = cm->frame_refs[GOLDEN_FRAME - LAST_FRAME].idx; |
| |
| #if CONFIG_EXT_REFS |
| int lst2_buf_idx = cm->frame_refs[LAST2_FRAME - LAST_FRAME].idx; |
| int lst3_buf_idx = cm->frame_refs[LAST3_FRAME - LAST_FRAME].idx; |
| int bwd_buf_idx = cm->frame_refs[BWDREF_FRAME - LAST_FRAME].idx; |
| #endif |
| |
| double dstpos = buf_struct->dst_pos; |
| |
| // find candidate refs to try |
| int cand_idx[6]; |
| int cand_offset[6]; |
| int c = 0; |
| if (alt_buf_idx != left_idx && alt_buf_idx != right_idx && alt_buf_idx >= 0) { |
| int skip = 0; |
| for (int k = 0; k < c; k++) { |
| if (cand_idx[k] == alt_buf_idx) skip = 1; |
| } |
| if (skip == 0) { |
| cand_idx[c] = alt_buf_idx; |
| cand_offset[c] = cm->cur_frame->alt_frame_offset; |
| c++; |
| } |
| } |
| if (lst_buf_idx != left_idx && lst_buf_idx != right_idx && lst_buf_idx >= 0) { |
| int skip = 0; |
| for (int k = 0; k < c; k++) { |
| if (cand_idx[k] == lst_buf_idx) skip = 1; |
| } |
| if (skip == 0) { |
| cand_idx[c] = lst_buf_idx; |
| cand_offset[c] = cm->cur_frame->lst_frame_offset; |
| c++; |
| } |
| } |
| if (gld_buf_idx != left_idx && gld_buf_idx != right_idx && gld_buf_idx >= 0) { |
| int skip = 0; |
| for (int k = 0; k < c; k++) { |
| if (cand_idx[k] == gld_buf_idx) skip = 1; |
| } |
| if (skip == 0) { |
| cand_idx[c] = gld_buf_idx; |
| cand_offset[c] = cm->cur_frame->gld_frame_offset; |
| c++; |
| } |
| } |
| #if CONFIG_EXT_REFS |
| if (lst2_buf_idx != left_idx && lst2_buf_idx != right_idx && |
| lst2_buf_idx >= 0) { |
| int skip = 0; |
| for (int k = 0; k < c; k++) { |
| if (cand_idx[k] == lst2_buf_idx) skip = 1; |
| } |
| if (skip == 0) { |
| cand_idx[c] = lst2_buf_idx; |
| cand_offset[c] = cm->cur_frame->lst2_frame_offset; |
| c++; |
| } |
| } |
| if (lst3_buf_idx != left_idx && lst3_buf_idx != right_idx && |
| lst3_buf_idx >= 0) { |
| int skip = 0; |
| for (int k = 0; k < c; k++) { |
| if (cand_idx[k] == lst3_buf_idx) skip = 1; |
| } |
| if (skip == 0) { |
| cand_idx[c] = lst3_buf_idx; |
| cand_offset[c] = cm->cur_frame->lst3_frame_offset; |
| c++; |
| } |
| } |
| if (bwd_buf_idx != left_idx && bwd_buf_idx != right_idx && bwd_buf_idx >= 0) { |
| int skip = 0; |
| for (int k = 0; k < c; k++) { |
| if (cand_idx[k] == bwd_buf_idx) skip = 1; |
| } |
| if (skip == 0) { |
| cand_idx[c] = bwd_buf_idx; |
| cand_offset[c] = cm->cur_frame->bwd_frame_offset; |
| c++; |
| } |
| } |
| #endif |
| for (; c < 6; c++) { |
| cand_idx[c] = -1; |
| } |
| |
| int_mv *mv_to_left = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv)); |
| int_mv *mv_to_right = aom_calloc(cm->mi_cols * cm->mi_rows, sizeof(int_mv)); |
| int_mv cur_mv, temp_mv; |
| for (c = 0; c < 6; c++) { |
| if (cand_idx[c] < 0) break; |
| opfl_find_init_motion(cm, cand_idx[c], left_idx, cand_offset[c], |
| left_offset, mv_to_left); |
| opfl_find_init_motion(cm, cand_idx[c], right_idx, cand_offset[c], |
| right_offset, mv_to_right); |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| if (mv_to_left[i * cm->mi_cols + j].as_int == INVALID_MV || |
| mv_to_right[i * cm->mi_cols + j].as_int == INVALID_MV) |
| continue; |
| // cur_mv points from left to right |
| cur_mv.as_mv.row = mv_to_right[i * cm->mi_cols + j].as_mv.row - |
| mv_to_left[i * cm->mi_cols + j].as_mv.row; |
| cur_mv.as_mv.col = mv_to_right[i * cm->mi_cols + j].as_mv.col - |
| mv_to_left[i * cm->mi_cols + j].as_mv.col; |
| temp_mv.as_mv.row = opfl_round_double_2_int(-dstpos * cur_mv.as_mv.row); |
| temp_mv.as_mv.col = opfl_round_double_2_int(-dstpos * cur_mv.as_mv.col); |
| int mi_r = i + opfl_round_double_2_int( |
| (mv_to_left[i * cm->mi_cols + j].as_mv.row - |
| temp_mv.as_mv.row) / |
| (8.0 * 4.0)); |
| int mi_c = j + opfl_round_double_2_int( |
| (mv_to_left[i * cm->mi_cols + j].as_mv.col - |
| temp_mv.as_mv.col) / |
| (8.0 * 4.0)); |
| if (mi_r < 0 || mi_r >= cm->mi_rows || mi_c < 0 || mi_c >= cm->mi_cols) |
| continue; |
| if (left_mv[mi_r * cm->mi_cols + mi_c].as_int == INVALID_MV || |
| right_mv[mi_r * cm->mi_cols + mi_c].as_int == INVALID_MV) { |
| left_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int; |
| // calculate mv to right |
| temp_mv.as_mv.row = |
| opfl_round_double_2_int((1 - dstpos) * cur_mv.as_mv.row); |
| temp_mv.as_mv.col = |
| opfl_round_double_2_int((1 - dstpos) * cur_mv.as_mv.col); |
| right_mv[mi_r * cm->mi_cols + mi_c].as_int = temp_mv.as_int; |
| } |
| } |
| } |
| } |
| aom_free(mv_to_left); |
| aom_free(mv_to_right); |
| } |
| |
| int opfl_get_4x4_warp_dist(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct, |
| int starth, int startw, int_mv left_mv, |
| int_mv right_mv) { |
| int dist = 0; |
| YV12_BUFFER_CONFIG *r0 = buf_struct->ref0_buf[0]; |
| YV12_BUFFER_CONFIG *r1 = buf_struct->ref1_buf[0]; |
| uint8_t *src_y0 = r0->y_buffer; |
| uint8_t *src_y1 = r1->y_buffer; |
| int srcstride = r0->y_stride; |
| int lpix, rpix; |
| int yl, xl, yr, xr; |
| double dil, djl, dir, djr; |
| |
| int width = r0->y_width; |
| int height = r0->y_height; |
| |
| yl = opfl_floor_double_2_int((double)left_mv.as_mv.row / 8.0); |
| xl = opfl_floor_double_2_int((double)left_mv.as_mv.col / 8.0); |
| yr = opfl_floor_double_2_int((double)right_mv.as_mv.row / 8.0); |
| xr = opfl_floor_double_2_int((double)right_mv.as_mv.col / 8.0); |
| |
| dil = (double)left_mv.as_mv.row / 8.0 - yl; |
| djl = (double)left_mv.as_mv.col / 8.0 - xl; |
| dir = (double)right_mv.as_mv.row / 8.0 - yr; |
| djr = (double)right_mv.as_mv.col / 8.0 - xr; |
| |
| if (starth + yl < 0) { |
| yl = -starth; |
| } else if (starth + yl >= height) { |
| yl = height - starth; |
| } |
| if (startw + xl < 0) { |
| xl = -startw; |
| } else if (startw + xl >= width) { |
| xl = width - startw; |
| } |
| if (starth + yr < 0) { |
| yr = -starth; |
| } else if (starth + yr >= height) { |
| yr = height - starth; |
| } |
| if (startw + xr < 0) { |
| xr = -startw; |
| } else if (startw + xr >= width) { |
| xr = width - startw; |
| } |
| |
| for (int i = 0; i < 4; i++) { |
| for (int j = 0; j < 4; j++) { |
| uint8_t *tmpsrc; |
| tmpsrc = src_y0 + (starth + i + yl) * srcstride + (startw + j + xl); |
| lpix = get_sub_pel_y(tmpsrc, srcstride, dil, djl); |
| tmpsrc = src_y1 + (starth + i + yr) * srcstride + (startw + j + xr); |
| rpix = get_sub_pel_y(tmpsrc, srcstride, dir, djr); |
| dist += (lpix - rpix) * (lpix - rpix); |
| } |
| } |
| return dist; |
| } |
| |
| void opfl_update_init_motion(AV1_COMMON *cm, OPFL_BUFFER_STRUCT *buf_struct, |
| int_mv *left_mv, int_mv *right_mv, int left_offset, |
| int right_offset, int_mv *left_cand_mv, |
| int_mv *right_cand_mv, int left_cand_offset, |
| int right_cand_offset, int *is_first_valid, |
| int_mv *left_final_mv, int_mv *right_final_mv) { |
| int cur_offset = buf_struct->cur_offset; |
| int_mv temp_left_mv, temp_right_mv; |
| int dist_ori, dist_new; |
| double left_ratio, right_ratio; |
| left_ratio = ((double)(cur_offset - left_offset)) / |
| ((double)(cur_offset - left_cand_offset)); |
| right_ratio = ((double)(-cur_offset + right_offset)) / |
| ((double)(-cur_offset + right_cand_offset)); |
| for (int i = 0; i < cm->mi_rows; i++) { |
| for (int j = 0; j < cm->mi_cols; j++) { |
| if (is_first_valid[i * cm->mi_cols + j] > 0) continue; |
| if (left_cand_mv[i * cm->mi_cols + j].as_int == INVALID_MV || |
| right_cand_mv[i * cm->mi_cols + j].as_int == INVALID_MV) { |
| continue; |
| } |
| dist_ori = opfl_get_4x4_warp_dist(cm, buf_struct, i * 4, j * 4, |
| left_mv[i * cm->mi_cols + j], |
| right_mv[i * cm->mi_cols + j]); |
| |
| temp_left_mv.as_mv.row = opfl_round_double_2_int( |
| left_cand_mv[i * cm->mi_cols + j].as_mv.row * left_ratio); |
| temp_left_mv.as_mv.col = opfl_round_double_2_int( |
| left_cand_mv[i * cm->mi_cols + j].as_mv.col * left_ratio); |
| temp_right_mv.as_mv.row = opfl_round_double_2_int( |
| right_cand_mv[i * cm->mi_cols + j].as_mv.row * right_ratio); |
| temp_right_mv.as_mv.col = opfl_round_double_2_int( |
| right_cand_mv[i * cm->mi_cols + j].as_mv.col * right_ratio); |
| dist_new = opfl_get_4x4_warp_dist(cm, buf_struct, i * 4, j * 4, |
| temp_left_mv, temp_right_mv); |
| if (dist_ori > dist_new) { |
| left_mv[i * cm->mi_cols + j].as_int = temp_left_mv.as_int; |
| right_mv[i * cm->mi_cols + j].as_int = temp_right_mv.as_int; |
| left_final_mv[i * cm->mi_cols + j].as_int = temp_left_mv.as_int; |
| right_final_mv[i * cm->mi_cols + j].as_int = temp_right_mv.as_int; |
| } |
| } |
| } |
| } |
| /* |
| * use optical flow method to calculate motion field of a specific level. |
| * |
| * Input: |
| * buf_struct: containing buffers of the reference frames |
| * mf_last: initial motion field |
| * level: current scale level, 0 = original, 1 = 0.5, 2 = 0.25, etc. |
| * dstpos: dst frame position |
| * usescale: 0->do not scale the original, 1->do scaling of images |
| * blk_info: information on the current block |
| * |
| * Output: |
| * mf_new: pointer to calculated motion field |
| */ |
| void refine_motion_field(OPFL_BUFFER_STRUCT *buf_struct, DB_MV *mf_last, |
| DB_MV *mf_new, int level, double dstpos, int usescale, |
| OPFL_BLK_INFO blk_info) { |
| int count = 0; |
| double last_cost = DBL_MAX; |
| double new_cost = last_cost; |
| int width = blk_info.blk_width, height = blk_info.blk_height; |
| width = width >> level; |
| height = height >> level; |
| int mvstr = width + 2 * AVG_MF_BORDER; |
| // annealing factor for laplacian multiplier |
| double as_scale_factor = 1 << level; |
| // iteratively warp and estimate motion field |
| while (count < MAX_ITER_OPTICAL_FLOW) { |
| // TODO(bohan): combinations of fast and direct methods might help |
| #if FAST_OPTICAL_FLOW |
| new_cost = |
| iterate_update_mv_fast(buf_struct, mf_last, mf_new, level, dstpos, |
| as_scale_factor, usescale, blk_info); |
| #else |
| new_cost = iterate_update_mv(buf_struct, mf_last, mf_new, level, dstpos, |
| as_scale_factor, usescale, blk_info, count); |
| #endif |
| // prepare for the next iteration |
| DB_MV *mv_start = mf_last + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| DB_MV *mv_start_new = mf_new + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| mv_start[i * mvstr + j].row += mv_start_new[i * mvstr + j].row; |
| mv_start[i * mvstr + j].col += mv_start_new[i * mvstr + j].col; |
| if (mv_start[i * mvstr + j].row > MAX_MV_LENGTH_1D) |
| mv_start[i * mvstr + j].row = MAX_MV_LENGTH_1D; |
| else if (mv_start[i * mvstr + j].row < -MAX_MV_LENGTH_1D) |
| mv_start[i * mvstr + j].row = -MAX_MV_LENGTH_1D; |
| if (mv_start[i * mvstr + j].col > MAX_MV_LENGTH_1D) |
| mv_start[i * mvstr + j].col = MAX_MV_LENGTH_1D; |
| else if (mv_start[i * mvstr + j].col < -MAX_MV_LENGTH_1D) |
| mv_start[i * mvstr + j].col = -MAX_MV_LENGTH_1D; |
| mv_start_new[i * mvstr + j].row = mv_start[i * mvstr + j].row; |
| mv_start_new[i * mvstr + j].col = mv_start[i * mvstr + j].col; |
| } |
| } |
| last_cost = new_cost; |
| count++; |
| as_scale_factor *= OPFL_ANNEAL_FACTOR; |
| } |
| return; |
| } |
| |
| double get_opfl_cost(double *Ex, double *Ey, double *Et, DB_MV *mv_start, |
| int width, int height, int mfstr, int i, int j) { |
| double u = mv_start[i * mfstr + j].col; |
| double v = mv_start[i * mfstr + j].row; |
| double cost, temp; |
| int check_y[4] = { -1, -1, 1, 1 }; |
| int check_x[4] = { -1, 1, -1, 1 }; |
| cost = Ex[i * width + j] * u + Ey[i * width + j] * v + Et[i * width + j]; |
| cost = cost * cost; |
| temp = 0; |
| temp += 4 * mv_start[i * mfstr + j].col; |
| for (int k = 0; k < 4; k++) { |
| int curi = i + check_y[k]; |
| int curj = j + check_x[k]; |
| if (curi < 0) |
| curi = 0; |
| else if (curi >= height) |
| curi = height - 1; |
| if (curj < 0) |
| curj = 0; |
| else if (curj >= width) |
| curj = width - 1; |
| temp -= mv_start[curi * mfstr + curj].col; |
| } |
| cost += temp * temp * OF_A_SQUARED / 4.0; |
| temp = 0; |
| temp += 4 * mv_start[i * mfstr + j].row; |
| for (int k = 0; k < 4; k++) { |
| int curi = i + check_y[k]; |
| int curj = j + check_x[k]; |
| if (curi < 0) |
| curi = 0; |
| else if (curi >= height) |
| curi = height - 1; |
| if (curj < 0) |
| curj = 0; |
| else if (curj >= width) |
| curj = width - 1; |
| temp -= mv_start[curi * mfstr + curj].row; |
| } |
| cost += temp * temp * OF_A_SQUARED / 4.0; |
| |
| return cost; |
| } |
| |
| /* |
| * Update motion field at each iteration by solving linear equations directly. |
| * |
| * Input: |
| * buf_struct: containing buffers of the reference frames |
| * mf_last: initial motion field |
| * level: current scale level, 0 = original, 1 = 0.5, 2 = 0.25, etc. |
| * dstpos: dst frame position |
| * as_scale: scale the laplacian multiplier to perform "annealing" |
| * usescale: 0->do not scale the original, 1->do scaling of images |
| * blk_info: information on the current block |
| * |
| * Output: |
| * mf_new: pointer to calculated motion field |
| */ |
| double iterate_update_mv(OPFL_BUFFER_STRUCT *buf_struct, DB_MV *mf_last, |
| DB_MV *mf_new, int level, double dstpos, |
| double as_scale, int usescale, OPFL_BLK_INFO blk_info, |
| int numWarpedRounds) { |
| double *Ex, *Ey, *Et; |
| double a_squared = OF_A_SQUARED * as_scale; |
| double cost = 0; |
| |
| YV12_BUFFER_CONFIG *ref0, *ref1, *buf_init0, *buf_init1; |
| int l = level; |
| if (!usescale) l = 0; |
| ref0 = buf_struct->ref0_buf[l]; |
| ref1 = buf_struct->ref1_buf[l]; |
| buf_init0 = buf_struct->ref0_warped_buf[l]; |
| buf_init1 = buf_struct->ref1_warped_buf[l]; |
| |
| int y_width = blk_info.blk_width, y_height = blk_info.blk_height; |
| int starth = blk_info.starth, startw = blk_info.startw; |
| int sh = starth >> level, sw = startw >> level; |
| int width = y_width, height = y_height; |
| width = width >> level; |
| height = height >> level; |
| if (usescale) { |
| y_width = y_width >> level; |
| y_height = y_height >> level; |
| starth = starth >> level; |
| startw = startw >> level; |
| } |
| int mvstr = width + 2 * AVG_MF_BORDER; |
| DB_MV *mv_start = mf_last + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| DB_MV *mv_start_new = mf_new + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| int i, j; |
| |
| // allocate temp buffers |
| Ex = buf_struct->Ex; |
| Ey = buf_struct->Ey; |
| Et = buf_struct->Et; |
| YV12_BUFFER_CONFIG *buffer0 = buf_struct->buffer0[usescale ? level : 0]; |
| YV12_BUFFER_CONFIG *buffer1 = buf_struct->buffer1[usescale ? level : 0]; |
| |
| // TODO(bohan): these buffers can also be moved to the buf_struct |
| int *row_pos = aom_calloc(width * height * 28, sizeof(int)); |
| int *col_pos = aom_calloc(width * height * 28, sizeof(int)); |
| double *values = aom_calloc(width * height * 28, sizeof(double)); |
| double *mv_vec = aom_calloc(width * height * 2, sizeof(double)); |
| double *b = aom_calloc(width * height * 2, sizeof(double)); |
| double *b_temp = aom_calloc(width * height * 2, sizeof(double)); |
| double *pixel_weight = aom_calloc(width * height, sizeof(double)); |
| double *mv_weight = aom_calloc(width * height, sizeof(double)); |
| double *mv_center_wt = aom_calloc(width * height, sizeof(double)); |
| |
| int imvstr = buf_struct->ref0_buf[0]->y_width; |
| imvstr = (imvstr >> level) + 2 * AVG_MF_BORDER; |
| DB_MV *initmv = buf_struct->init_mv_buf[level]; |
| initmv = initmv + (sh + AVG_MF_BORDER) * imvstr + sw + AVG_MF_BORDER; |
| #if OPFL_INIT_WT |
| double *init_wts = buf_struct->init_mv_wts[level]; |
| init_wts = init_wts + (sh + AVG_MF_BORDER) * imvstr + sw + AVG_MF_BORDER; |
| #endif |
| int fheight = buf_struct->ref0_buf[0]->y_height; |
| int fwidth = buf_struct->ref0_buf[0]->y_width; |
| fheight = fheight >> level; |
| fwidth = fwidth >> level; |
| |
| clock_t starts = clock(); |
| if (level == 0 || !usescale) |
| warp_optical_flow_fwd(ref0, ref1, mv_start, mvstr, buffer0, dstpos, level, |
| usescale, blk_info); |
| else |
| warp_optical_flow_fwd_bilinear(ref0, ref1, mv_start, mvstr, buffer0, dstpos, |
| level, usescale, blk_info); |
| if (level == 0 || !usescale) |
| warp_optical_flow_back(ref1, ref0, mv_start, mvstr, buffer1, 1 - dstpos, |
| level, usescale, blk_info); |
| else |
| warp_optical_flow_back_bilinear(ref1, ref0, mv_start, mvstr, buffer1, |
| 1 - dstpos, level, usescale, blk_info); |
| clock_t ends = clock(); |
| timesub += (double)(ends - starts) / CLOCKS_PER_SEC; |
| |
| clock_t startd = clock(); |
| // Calculate partial derivatives |
| opfl_get_derivatives(Ex, Ey, Et, buffer0, buffer1, buf_init0, buf_init1, |
| dstpos, level, usescale, blk_info); |
| #if OPFL_EXP_DERV |
| // if (numWarpedRounds == 0) |
| opfl_get_derivatives_nowarp(Ex, Ey, buf_struct, mf_last, dstpos, level, |
| blk_info); |
| #endif |
| |
| clock_t endd = clock(); |
| timeder += (double)(endd - startd) / CLOCKS_PER_SEC; |
| |
| clock_t starti = clock(), endi; |
| // construct and solve A*mv_vec = b |
| SPARSE_MTX A, L, LtL; |
| // check if pointing out of bound |
| double boundFactor = 0.01; |
| double pixExpFactor = 0.001; |
| double mvExpFactor = 0.001; |
| for (i = 0; i < height; i++) { |
| for (j = 0; j < width; j++) { |
| pixel_weight[i * width + j] = 1; |
| mv_weight[i * width + j] = 1; |
| } |
| } |
| double i0, i1, j0, j1; |
| for (i = 0; i < height; i++) { |
| for (j = 0; j < width; j++) { |
| i0 = (double)(i + sh) - |
| (buf_struct->dst_pos) * mv_start[i * mvstr + j].row; |
| i1 = (double)(i + sh) + |
| (1 - buf_struct->dst_pos) * mv_start[i * mvstr + j].row; |
| j0 = (double)(j + sw) - |
| (buf_struct->dst_pos) * mv_start[i * mvstr + j].col; |
| j1 = (double)(j + sw) + |
| (1 - buf_struct->dst_pos) * mv_start[i * mvstr + j].col; |
| int is_out = (i0 < 0 || i0 > fheight - 1 || i1 < 0 || i1 > fheight - 1 || |
| j0 < 0 || j0 > fwidth - 1 || j1 < 0 || j1 > fwidth - 1); |
| if (is_out && OPFL_BOUND_HANDLING) { |
| pixel_weight[i * width + j] = boundFactor * boundFactor; |
| mv_weight[i * width + j] = boundFactor; |
| } else if (level >= 0 && numWarpedRounds >= 0 && OPFL_CONF_WTS) { |
| double avg = 0; |
| double minDiff = 1000; |
| double curWt; |
| double count = 0; |
| for (int ii = -2; ii <= 2; ii++) { |
| for (int jj = -2; jj <= 2; jj++) { |
| if (i + ii < 0 || i + ii >= height || j + jj < 0 || j + jj >= width) |
| continue; |
| count += 1; |
| curWt = |
| Et[(i + ii) * width + j + jj] * Et[(i + ii) * width + j + jj]; |
| avg = avg * (count - 1) / count + curWt / count; |
| if (curWt > minDiff) minDiff = curWt; |
| } |
| } |
| mv_weight[i * width + j] = exp(-mvExpFactor * avg); |
| mv_weight[i * width + j] = (mv_weight[i * width + j] > boundFactor) |
| ? mv_weight[i * width + j] |
| : boundFactor; |
| pixel_weight[i * width + j] = |
| mv_weight[i * width + j] * mv_weight[i * width + j]; |
| } |
| } |
| } |
| |
| int c; |
| // Build A and b in linear equation Ax = b |
| int offset = height * width; |
| int checki, checkj; |
| double blkbound_wt = 0.0; |
| // the location idx for neighboring pixels, k < 4 are the 4 direct neighbors |
| int check_locs_y[12] = { 0, 0, -1, 1, -1, -1, 1, 1, 0, 0, -2, 2 }; |
| int check_locs_x[12] = { -1, 1, 0, 0, -1, 1, -1, 1, -2, 2, 0, 0 }; |
| int useModifyLap = numWarpedRounds >= 0; |
| int useThres1 = 1; |
| double wtThres = 0.2; |
| |
| // calculate center weight for laplacian (spatial term), and also the b term |
| // part for it |
| for (j = 0; j < width; j++) { |
| for (i = 0; i < height; i++) { |
| int curidx = j * height + i; |
| b_temp[curidx] = 0; |
| b_temp[curidx + offset] = 0; |
| mv_center_wt[i * width + j] = 0; |
| for (int k = 0; k < 4; k++) { |
| checki = i + check_locs_y[k]; |
| checkj = j + check_locs_x[k]; |
| if (checki < 0 || checki >= height || checkj < 0 || checkj >= width) { |
| // if at block bound, but not frame bound, use init mv with |
| // blkbound_wt |
| if (checki + sh >= 0 && checki + sh < fheight && checkj + sw >= 0 && |
| checkj + sw < fwidth) { |
| mv_center_wt[i * width + j] += blkbound_wt; |
| b_temp[curidx] += |
| blkbound_wt * initmv[(checki)*imvstr + checkj].col; |
| b_temp[curidx + offset] += |
| blkbound_wt * initmv[(checki)*imvstr + checkj].row; |
| } |
| } else { |
| double cur_wt = mv_weight[checki * width + checkj]; |
| if (cur_wt > mv_weight[i * width + j] && useModifyLap) cur_wt = 1; |
| if (cur_wt > wtThres && useThres1) cur_wt = 1; |
| mv_center_wt[i * width + j] += cur_wt; |
| } |
| } |
| #if OPFL_INIT_WT |
| mv_center_wt[i * width + j] += init_wts[(i)*imvstr + j]; |
| b_temp[curidx] += init_wts[(i)*imvstr + j] * initmv[(i)*imvstr + j].col; |
| b_temp[curidx + offset] += |
| init_wts[(i)*imvstr + j] * initmv[(i)*imvstr + j].row; |
| #endif |
| mv_center_wt[i * width + j] *= -1; |
| } |
| } |
| // get Laplacian filter matrix L |
| c = 0; |
| for (j = 0; j < width; j++) { |
| for (i = 0; i < height; i++) { |
| int curidx = j * height + i; |
| // center |
| row_pos[c] = curidx; |
| col_pos[c] = curidx; |
| values[c] = mv_center_wt[i * width + j]; |
| c++; |
| row_pos[c] = curidx + offset; |
| col_pos[c] = curidx + offset; |
| values[c] = mv_center_wt[i * width + j]; |
| c++; |
| // 4 direct neighbors |
| for (int k = 0; k < 4; k++) { |
| double cur_wt; |
| checki = i + check_locs_y[k]; |
| checkj = j + check_locs_x[k]; |
| if (checki < 0 || checki >= height || checkj < 0 || checkj >= width) |
| continue; |
| |
| cur_wt = mv_weight[checki * width + checkj]; |
| if (cur_wt > mv_weight[i * width + j] && useModifyLap) cur_wt = 1; |
| if (cur_wt > wtThres && useThres1) cur_wt = 1; |
| |
| int nextidx = checkj * height + checki; |
| row_pos[c] = curidx; |
| col_pos[c] = nextidx; |
| values[c] = cur_wt; |
| c++; |
| row_pos[c] = curidx + offset; |
| col_pos[c] = nextidx + offset; |
| values[c] = cur_wt; |
| c++; |
| } |
| } |
| } |
| init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height, |
| 2 * width * height, &L); |
| // get part of b from init_mv and boundary |
| mtx_vect_multi_left(&L, b_temp, b, 2 * width * height); |
| free_sparse_mtx_elems(&L); |
| // build LtL and vectorize mv_last |
| c = 0; |
| // use the mv_vec buffer temporarily |
| double *mv_last_vec = mv_vec; |
| for (j = 0; j < width; j++) { |
| for (i = 0; i < height; i++) { |
| int curidx = j * height + i; |
| double center_wt = |
| mv_center_wt[i * width + j] * mv_center_wt[i * width + j]; |
| mv_last_vec[curidx] = mv_start[i * mvstr + j].col; |
| mv_last_vec[curidx + offset] = mv_start[i * mvstr + j].row; |
| // 4 direct neighbors |
| for (int k = 0; k < 4; k++) { |
| double cur_wt, reverse_wt; |
| checki = i + check_locs_y[k]; |
| checkj = j + check_locs_x[k]; |
| if (checki < 0 || checki >= height || checkj < 0 || checkj >= width) |
| continue; |
| |
| cur_wt = mv_weight[checki * width + checkj]; |
| if (cur_wt > mv_weight[i * width + j] && useModifyLap) cur_wt = 1; |
| if (cur_wt > wtThres && useThres1) cur_wt = 1; |
| |
| reverse_wt = mv_weight[i * width + j]; |
| if (reverse_wt > mv_weight[checki * width + checkj] && useModifyLap) |
| reverse_wt = 1; |
| if (reverse_wt > wtThres && useThres1) reverse_wt = 1; |
| |
| int nextidx = checkj * height + checki; |
| row_pos[c] = curidx; |
| col_pos[c] = nextidx; |
| values[c] = cur_wt * mv_center_wt[i * width + j] + |
| reverse_wt * mv_center_wt[checki * width + checkj]; |
| c++; |
| row_pos[c] = curidx + offset; |
| col_pos[c] = nextidx + offset; |
| values[c] = cur_wt * mv_center_wt[i * width + j] + |
| reverse_wt * mv_center_wt[checki * width + checkj]; |
| c++; |
| center_wt += reverse_wt * reverse_wt; |
| } |
| // center |
| row_pos[c] = curidx; |
| col_pos[c] = curidx; |
| values[c] = center_wt; |
| c++; |
| row_pos[c] = curidx + offset; |
| col_pos[c] = curidx + offset; |
| values[c] = center_wt; |
| c++; |
| // diag corners |
| for (int k = 4; k < 8; k++) { |
| checki = i + check_locs_y[k]; |
| checkj = j + check_locs_x[k]; |
| if (checki < 0 || checki >= height || checkj < 0 || checkj >= width) |
| continue; |
| |
| double cur_wt1 = mv_weight[checki * width + checkj], |
| cur_wt2 = mv_weight[checki * width + checkj], |
| reverse_wt1 = mv_weight[i * width + j], |
| reverse_wt2 = mv_weight[i * width + j]; |
| |
| if (cur_wt1 > mv_weight[i * width + checkj] && useModifyLap) |
| cur_wt1 = 1; |
| if (cur_wt1 > wtThres && useThres1) cur_wt1 = 1; |
| if (reverse_wt1 > mv_weight[i * width + checkj] && useModifyLap) |
| reverse_wt1 = 1; |
| if (reverse_wt1 > wtThres && useThres1) reverse_wt1 = 1; |
| if (cur_wt2 > mv_weight[checki * width + j] && useModifyLap) |
| cur_wt2 = 1; |
| if (cur_wt2 > wtThres && useThres1) cur_wt2 = 1; |
| if (reverse_wt2 > mv_weight[checki * width + j] && useModifyLap) |
| reverse_wt2 = 1; |
| if (reverse_wt2 > wtThres && useThres1) reverse_wt2 = 1; |
| |
| int nextidx = checkj * height + checki; |
| row_pos[c] = curidx; |
| col_pos[c] = nextidx; |
| values[c] = cur_wt1 * reverse_wt1 + cur_wt2 * reverse_wt2; |
| c++; |
| row_pos[c] = curidx + offset; |
| col_pos[c] = nextidx + offset; |
| values[c] = cur_wt1 * reverse_wt1 + cur_wt2 * reverse_wt2; |
| c++; |
| } |
| // 4 direction with dist of 2 |
| for (int k = 8; k < 12; k++) { |
| checki = i + check_locs_y[k]; |
| checkj = j + check_locs_x[k]; |
| int midi = i + check_locs_y[k] / 2; |
| int midj = j + check_locs_x[k] / 2; |
| if (checki < 0 || checki >= height || checkj < 0 || checkj >= width) |
| continue; |
| |
| double cur_wt = mv_weight[checki * width + checkj], |
| reverse_wt = mv_weight[i * width + j]; |
| |
| if (cur_wt > mv_weight[midi * width + midj] && useModifyLap) cur_wt = 1; |
| if (cur_wt > wtThres && useThres1) cur_wt = 1; |
| if (reverse_wt > mv_weight[midi * width + midj] && useModifyLap) |
| reverse_wt = 1; |
| if (reverse_wt > wtThres && useThres1) reverse_wt = 1; |
| |
| int nextidx = checkj * height + checki; |
| row_pos[c] = curidx; |
| col_pos[c] = nextidx; |
| values[c] = cur_wt * reverse_wt; |
| c++; |
| row_pos[c] = curidx + offset; |
| col_pos[c] = nextidx + offset; |
| values[c] = cur_wt * reverse_wt; |
| c++; |
| } |
| } |
| } |
| init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height, |
| 2 * width * height, &LtL); |
| // get part of b from last iteration |
| mtx_vect_multi_right(&LtL, mv_last_vec, b_temp, 2 * width * height); |
| free_sparse_mtx_elems(&LtL); |
| |
| // build the final A on top of LtL |
| // modify diagonal elements |
| for (int k = 0; k < c; k++) { |
| values[k] *= a_squared; |
| if (row_pos[k] != col_pos[k]) continue; |
| int curidx = row_pos[k]; |
| if (curidx < offset) { |
| i = curidx % height; |
| j = curidx / height; |
| values[k] += |
| pixel_weight[i * width + j] * Ex[i * width + j] * Ex[i * width + j] + |
| 0.00001; |
| } else { |
| curidx -= offset; |
| i = curidx % height; |
| j = curidx / height; |
| values[k] += |
| pixel_weight[i * width + j] * Ey[i * width + j] * Ey[i * width + j] + |
| 0.00001; |
| } |
| } |
| // add cross terms to A and modify b with ExEt / EyEt |
| for (j = 0; j < width; j++) { |
| for (i = 0; i < height; i++) { |
| int curidx = j * height + i; |
| // modify b |
| b[curidx] = -a_squared * (b[curidx] + b_temp[curidx]); |
| b[curidx] -= |
| pixel_weight[i * width + j] * Ex[i * width + j] * Et[i * width + j]; |
| b[curidx + offset] = |
| -a_squared * (b[curidx + offset] + b_temp[curidx + offset]); |
| b[curidx + offset] -= |
| pixel_weight[i * width + j] * Ey[i * width + j] * Et[i * width + j]; |
| // add cross terms to A |
| row_pos[c] = curidx; |
| col_pos[c] = curidx + offset; |
| values[c] = |
| pixel_weight[i * width + j] * Ex[i * width + j] * Ey[i * width + j]; |
| c++; |
| row_pos[c] = curidx + offset; |
| col_pos[c] = curidx; |
| values[c] = |
| pixel_weight[i * width + j] * Ex[i * width + j] * Ey[i * width + j]; |
| c++; |
| } |
| } |
| // construct sparse mtx A |
| init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height, |
| 2 * width * height, &A); |
| endi = clock(); |
| timeinit += (double)(endi - starti) / CLOCKS_PER_SEC; |
| |
| starts = clock(); |
| // solve Ax = b |
| conjugate_gradient_sparse(&A, b, 2 * width * height, mv_vec); |
| ends = clock(); |
| timesolve += (double)(ends - starts) / CLOCKS_PER_SEC; |
| |
| // reshape motion field to 2D |
| cost = 0; |
| for (j = 0; j < width; j++) { |
| for (i = 0; i < height; i++) { |
| mv_start_new[i * mvstr + j].col = mv_vec[j * height + i]; |
| cost += mv_vec[j * height + i] * mv_vec[j * height + i]; |
| } |
| } |
| for (j = 0; j < width; j++) { |
| for (i = 0; i < height; i++) { |
| mv_start_new[i * mvstr + j].row = mv_vec[width * height + j * height + i]; |
| cost += mv_vec[width * height + j * height + i] * |
| mv_vec[width * height + j * height + i]; |
| } |
| } |
| |
| // free buffers |
| aom_free(row_pos); |
| aom_free(col_pos); |
| aom_free(values); |
| free_sparse_mtx_elems(&A); |
| aom_free(mv_vec); |
| aom_free(b); |
| aom_free(b_temp); |
| aom_free(pixel_weight); |
| aom_free(mv_weight); |
| aom_free(mv_center_wt); |
| |
| cost = sqrt(cost); // 2 norm |
| return cost; |
| } |
| |
| /* |
| * Update motion field at each iteration by a fast iterative method. |
| * |
| * Input: |
| * buf_struct: containing buffers of the reference frames |
| * mf_last: initial motion field |
| * level: current scale level, 0 = original, 1 = 0.5, 2 = 0.25, etc. |
| * dstpos: dst frame position |
| * as_scale: scale the laplacian multiplier to perform "annealing" |
| * usescale: 0->do not scale the original, 1->do scaling of images |
| * blk_info: information on the current block |
| * |
| * Output: |
| * mf_new: pointer to calculated motion field |
| */ |
| double iterate_update_mv_fast(OPFL_BUFFER_STRUCT *buf_struct, DB_MV *mf_last, |
| DB_MV *mf_new, int level, double dstpos, |
| double as_scale, int usescale, |
| OPFL_BLK_INFO blk_info) { |
| double *Ex, *Ey, *Et; |
| double a_squared = OF_A_SQUARED * as_scale; |
| double cost = 0; |
| |
| YV12_BUFFER_CONFIG *ref0, *ref1, *buf_init0, *buf_init1; |
| int l = level; |
| if (!usescale) l = 0; |
| ref0 = buf_struct->ref0_buf[l]; |
| ref1 = buf_struct->ref1_buf[l]; |
| buf_init0 = buf_struct->ref0_warped_buf[l]; |
| buf_init1 = buf_struct->ref1_warped_buf[l]; |
| |
| int y_width = blk_info.blk_width, y_height = blk_info.blk_height; |
| int starth = blk_info.starth, startw = blk_info.startw; |
| int sh = starth >> level, sw = startw >> level; |
| int width = y_width, height = y_height; |
| width = width >> level; |
| height = height >> level; |
| if (usescale) { |
| y_width = y_width >> level; |
| y_height = y_height >> level; |
| starth = starth >> level; |
| startw = startw >> level; |
| } |
| int mvstr = width + 2 * AVG_MF_BORDER; |
| DB_MV *mv_start = mf_last + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| DB_MV *mv_start_new = mf_new + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| int i, j; |
| |
| // allocate buffers |
| Ex = buf_struct->Ex; |
| Ey = buf_struct->Ey; |
| Et = buf_struct->Et; |
| YV12_BUFFER_CONFIG *buffer0 = buf_struct->buffer0[usescale ? level : 0]; |
| YV12_BUFFER_CONFIG *buffer1 = buf_struct->buffer1[usescale ? level : 0]; |
| |
| int imvstr = buf_struct->ref0_buf[0]->y_width; |
| imvstr = (imvstr >> level) + 2 * AVG_MF_BORDER; |
| DB_MV *initmv = buf_struct->init_mv_buf[level]; |
| initmv = initmv + (sh + AVG_MF_BORDER) * imvstr + sw + AVG_MF_BORDER; |
| int fheight = buf_struct->ref0_buf[0]->y_height; |
| int fwidth = buf_struct->ref0_buf[0]->y_width; |
| fheight = fheight >> level; |
| fwidth = fwidth >> level; |
| |
| clock_t starts = clock(); |
| if (level == 0 || !usescale) |
| warp_optical_flow_fwd(ref0, ref1, mv_start, mvstr, buffer0, dstpos, level, |
| usescale, blk_info); |
| else |
| warp_optical_flow_fwd_bilinear(ref0, ref1, mv_start, mvstr, buffer0, dstpos, |
| level, usescale, blk_info); |
| if (level == 0 || !usescale) |
| warp_optical_flow_back(ref1, ref0, mv_start, mvstr, buffer1, 1 - dstpos, |
| level, usescale, blk_info); |
| else |
| warp_optical_flow_back_bilinear(ref1, ref0, mv_start, mvstr, buffer1, |
| 1 - dstpos, level, usescale, blk_info); |
| clock_t ends = clock(); |
| timesub += (double)(ends - starts) / CLOCKS_PER_SEC; |
| |
| clock_t startd = clock(); |
| // Calculate partial derivatives |
| opfl_get_derivatives(Ex, Ey, Et, buffer0, buffer1, buf_init0, buf_init1, |
| dstpos, level, usescale, blk_info); |
| clock_t endd = clock(); |
| timeder += (double)(endd - startd) / CLOCKS_PER_SEC; |
| |
| // iterative solver |
| starts = clock(); |
| DB_MV *tempmv; |
| DB_MV *bufmv_b = |
| aom_calloc((height + 2 * AVG_MF_BORDER) * (width + 2 * AVG_MF_BORDER), |
| sizeof(DB_MV)); |
| DB_MV *bufmv = bufmv_b + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| DB_MV *lp_last = aom_calloc(height * mvstr, sizeof(DB_MV)); |
| double *denorm = aom_calloc(height * width, sizeof(double)); |
| DB_MV avg; |
| // get the laplacian of initial motion field |
| pad_motion_field_border(mv_start, width, height, mvstr); |
| int i0, i1, j0, j1; |
| for (i = 0; i < height; i++) { |
| i0 = i - 1; |
| i1 = i + 1; |
| for (j = 0; j < width; j++) { |
| j0 = j - 1; |
| j1 = j + 1; |
| bufmv[i * mvstr + j].row = 0; |
| bufmv[i * mvstr + j].col = 0; |
| |
| lp_last[i * mvstr + j].row = 0.25 * mv_start[i0 * mvstr + j].row + |
| 0.25 * mv_start[i1 * mvstr + j].row + |
| 0.25 * mv_start[i * mvstr + j0].row + |
| 0.25 * mv_start[i * mvstr + j1].row; |
| lp_last[i * mvstr + j].row -= mv_start[i * mvstr + j].row; |
| lp_last[i * mvstr + j].col = 0.25 * mv_start[i0 * mvstr + j].col + |
| 0.25 * mv_start[i1 * mvstr + j].col + |
| 0.25 * mv_start[i * mvstr + j0].col + |
| 0.25 * mv_start[i * mvstr + j1].col; |
| lp_last[i * mvstr + j].col -= mv_start[i * mvstr + j].col; |
| denorm[i * width + j] = 16 * a_squared + |
| Ex[i * width + j] * Ex[i * width + j] + |
| Ey[i * width + j] * Ey[i * width + j]; |
| } |
| } |
| // calculate the motion field |
| for (int k = 0; k < MAX_ITER_FAST_OPFL; k++) { |
| pad_motion_field_border(bufmv, width, height, mvstr); |
| for (i = 0; i < height; i++) { |
| i0 = i - 1; |
| i1 = i + 1; |
| for (j = 0; j < width; j++) { |
| j0 = j - 1; |
| j1 = j + 1; |
| avg.row = 0.25 * bufmv[i0 * mvstr + j].row + |
| 0.25 * bufmv[i1 * mvstr + j].row + |
| 0.25 * bufmv[i * mvstr + j0].row + |
| 0.25 * bufmv[i * mvstr + j1].row; |
| avg.row += lp_last[i * mvstr + j].row; |
| avg.col = 0.25 * bufmv[i0 * mvstr + j].col + |
| 0.25 * bufmv[i1 * mvstr + j].col + |
| 0.25 * bufmv[i * mvstr + j0].col + |
| 0.25 * bufmv[i * mvstr + j1].col; |
| avg.col += lp_last[i * mvstr + j].col; |
| mv_start_new[i * mvstr + j].col = |
| avg.col - Ex[i * width + j] * |
| (Ex[i * width + j] * avg.col + |
| Ey[i * width + j] * avg.row + Et[i * width + j]) / |
| denorm[i * width + j]; |
| mv_start_new[i * mvstr + j].row = |
| avg.row - Ey[i * width + j] * |
| (Ex[i * width + j] * avg.col + |
| Ey[i * width + j] * avg.row + Et[i * width + j]) / |
| denorm[i * width + j]; |
| } |
| } |
| if (k < MAX_ITER_FAST_OPFL - 1) { |
| tempmv = bufmv; |
| bufmv = mv_start_new; |
| mv_start_new = tempmv; |
| } |
| } |
| |
| aom_free(bufmv_b); |
| aom_free(lp_last); |
| aom_free(denorm); |
| ends = clock(); |
| timesolve += (double)(ends - starts) / CLOCKS_PER_SEC; |
| |
| // reshape motion field to 2D |
| cost = 0; |
| for (j = 0; j < width; j++) { |
| for (i = 0; i < height; i++) { |
| cost += mv_start_new[i * mvstr + j].row * mv_start_new[i * mvstr + j].row; |
| } |
| } |
| for (j = 0; j < width; j++) { |
| for (i = 0; i < height; i++) { |
| cost += mv_start_new[i * mvstr + j].col * mv_start_new[i * mvstr + j].col; |
| } |
| } |
| |
| cost = sqrt(cost); // 2 norm |
| return cost; |
| } |
| |
| void opfl_get_derivatives(double *Ex, double *Ey, double *Et, |
| YV12_BUFFER_CONFIG *buffer0, |
| YV12_BUFFER_CONFIG *buffer1, |
| YV12_BUFFER_CONFIG *buffer_init0, |
| YV12_BUFFER_CONFIG *buffer_init1, double dstpos, |
| int level, int usescale, OPFL_BLK_INFO blk_info) { |
| int lh = DERIVATIVE_FILTER_LENGTH; |
| int hleft = (lh - 1) / 2; |
| double filter[DERIVATIVE_FILTER_LENGTH] = { -1.0 / 60, 9.0 / 60, -45.0 / 60, |
| 0, 45.0 / 60, -9.0 / 60, |
| 1.0 / 60 }; |
| int idx, i, j; |
| int width = blk_info.blk_width; |
| int height = blk_info.blk_height; |
| int starth = blk_info.starth; |
| int startw = blk_info.startw; |
| int stride = buffer0->y_stride; |
| int istride = buffer_init0->y_stride; |
| |
| if (usescale) { |
| width = width >> level; |
| height = height >> level; |
| starth = starth >> level; |
| startw = startw >> level; |
| } |
| |
| uint8_t *buf0i = buffer_init0->y_buffer + starth * istride + startw; |
| uint8_t *buf1i = buffer_init1->y_buffer + starth * istride + startw; |
| |
| double *tempEx = NULL, *tempEy = NULL, *tempEt = NULL; |
| double *oriEx = Ex, *oriEy = Ey, *oriEt = Et; |
| if (!usescale && level != 0) { |
| tempEx = aom_calloc(width * height, sizeof(double)); |
| tempEy = aom_calloc(width * height, sizeof(double)); |
| tempEt = aom_calloc(width * height, sizeof(double)); |
| Ex = tempEx; |
| Ey = tempEy; |
| Et = tempEt; |
| } |
| |
| // horizontal derivative filter |
| for (i = 0; i < height; i++) { |
| for (j = 0; j < width; j++) { |
| Ex[i * width + j] = 0; |
| for (int k = 0; k < lh; k++) { |
| idx = j + (k - hleft); |
| if ((idx < 0 && blk_info.leftbound != 1) || |
| (idx > width - 1 && blk_info.rightbound != 1)) { |
| Ex[i * width + j] += |
| filter[k] * (double)(buf0i[i * istride + idx]) * (1 - dstpos) + |
| filter[k] * (double)(buf1i[i * istride + idx]) * dstpos; |
| } else { |
| if (idx < 0) |
| idx = 0; |
| else if (idx > width - 1) |
| idx = width - 1; |
| Ex[i * width + j] += |
| filter[k] * (double)(buffer0->y_buffer[i * stride + idx]) * |
| (1 - dstpos) + |
| filter[k] * (double)(buffer1->y_buffer[i * stride + idx]) * |
| dstpos; |
| } |
| } |
| } |
| } |
| // vertical derivative filter |
| for (i = 0; i < height; i++) { |
| for (j = 0; j < width; j++) { |
| Ey[i * width + j] = 0; |
| for (int k = 0; k < lh; k++) { |
| idx = i + (k - hleft); |
| if ((idx < 0 && blk_info.upbound != 1) || |
| (idx > height - 1 && blk_info.lowerbound != 1)) { |
| Ey[i * width + j] += |
| filter[k] * (double)(buf0i[idx * istride + j]) * (1 - dstpos) + |
| filter[k] * (double)(buf1i[idx * istride + j]) * dstpos; |
| } else { |
| if (idx < 0) |
| idx = 0; |
| else if (idx > height - 1) |
| idx = height - 1; |
| Ey[i * width + j] += |
| filter[k] * (double)(buffer0->y_buffer[idx * stride + j]) * |
| (1 - dstpos) + |
| filter[k] * (double)(buffer1->y_buffer[idx * stride + j]) * |
| dstpos; |
| } |
| } |
| } |
| } |
| // time derivative |
| for (i = 0; i < height; i++) { |
| for (j = 0; j < width; j++) { |
| Et[i * width + j] = (double)(buffer1->y_buffer[i * stride + j]) - |
| (double)(buffer0->y_buffer[i * stride + j]); |
| } |
| } |
| // rescale the derivatives |
| if (!usescale && level != 0) { |
| Ex = oriEx; |
| Ey = oriEy; |
| Et = oriEt; |
| int s_width = width >> level, s_height = height >> level, |
| blk_w = 1 << level; |
| for (i = 0; i < s_height; i++) { |
| for (j = 0; j < s_width; j++) { |
| Ex[i * s_width + j] = 0; |
| Ey[i * s_width + j] = 0; |
| Et[i * s_width + j] = 0; |
| for (int h = 0; h < blk_w; h++) { |
| for (int w = 0; w < blk_w; w++) { |
| Ex[i * s_width + j] += |
| tempEx[(i * blk_w + h) * width + j * blk_w + w]; |
| Ey[i * s_width + j] += |
| tempEy[(i * blk_w + h) * width + j * blk_w + w]; |
| Et[i * s_width + j] += |
| tempEt[(i * blk_w + h) * width + j * blk_w + w]; |
| } |
| } |
| Ex[i * s_width + j] /= blk_w; |
| Ey[i * s_width + j] /= blk_w; |
| Et[i * s_width + j] /= (blk_w * blk_w); |
| } |
| } |
| if (tempEx) aom_free(tempEx); |
| if (tempEy) aom_free(tempEy); |
| if (tempEt) aom_free(tempEt); |
| } |
| } |
| |
| void opfl_get_derivatives_nowarp(double *Ex, double *Ey, |
| OPFL_BUFFER_STRUCT *buf_struct, DB_MV *mf, |
| double dstpos, int level, |
| OPFL_BLK_INFO blk_info) { |
| int i, j, h, w; |
| int width = blk_info.blk_width; |
| int height = blk_info.blk_height; |
| int starth = blk_info.starth; |
| int startw = blk_info.startw; |
| |
| width = width >> level; |
| height = height >> level; |
| starth = starth >> level; |
| startw = startw >> level; |
| |
| int f_width = buf_struct->ref0_buf[0]->y_width; |
| int f_height = buf_struct->ref0_buf[0]->y_height; |
| f_width = f_width >> level; |
| f_height = f_height >> level; |
| int mf_stride = f_width + 2 * AVG_MF_BORDER; |
| |
| DB_MV *mf_start = mf + AVG_MF_BORDER * mf_stride + AVG_MF_BORDER; |
| mf_start = mf_start + starth * mf_stride + startw; |
| |
| double *ori_Ex0_start = |
| buf_struct->ori_Ex0_buf[level] + starth * f_width + startw; |
| double *ori_Ex1_start = |
| buf_struct->ori_Ex1_buf[level] + starth * f_width + startw; |
| double *ori_Ey0_start = |
| buf_struct->ori_Ey0_buf[level] + starth * f_width + startw; |
| double *ori_Ey1_start = |
| buf_struct->ori_Ey1_buf[level] + starth * f_width + startw; |
| |
| int mv_r, mv_c, y_loc, x_loc; |
| double di, dj; |
| // calculate Ex, Ey |
| for (i = 0; i < height; i++) { |
| for (j = 0; j < width; j++) { |
| // printf("%.2f ", Ex[i*width+j]); |
| Ex[i * width + j] = 0; |
| Ey[i * width + j] = 0; |
| // from left ref |
| mv_r = opfl_floor_double_2_int(-dstpos * mf_start[i * mf_stride + j].row); |
| mv_c = opfl_floor_double_2_int(-dstpos * mf_start[i * mf_stride + j].col); |
| di = -dstpos * mf_start[i * mf_stride + j].row - mv_r; |
| dj = -dstpos * mf_start[i * mf_stride + j].col - mv_c; |
| |
| int yidx = opfl_round_double_2_int(di * 8); |
| int xidx = opfl_round_double_2_int(dj * 8); |
| yidx *= 2; |
| xidx *= 2; |
| if (yidx == 16) { |
| yidx = 0; |
| mv_c += 1; |
| } |
| if (xidx == 16) { |
| xidx = 0; |
| mv_r += 1; |
| } |
| assert(xidx <= 14 && xidx >= 0); |
| assert(yidx <= 14 && yidx >= 0); |
| |
| // Ex0 |
| double x; |
| double y[8]; |
| for (h = -3; h < 5; h++) { |
| y[h + 3] = 0; |
| for (w = -3; w < 5; w++) { |
| y_loc = i + mv_r + h; |
| x_loc = j + mv_c + w; |
| if (y_loc + starth < 0) { |
| y_loc = -starth; |
| } else if (y_loc + starth >= f_height) { |
| y_loc = f_height - starth - 1; |
| } |
| if (x_loc + startw < 0) { |
| x_loc = -startw; |
| } else if (x_loc + startw >= f_width) { |
| x_loc = f_width - startw - 1; |
| } |
| y[h + 3] += ori_Ex0_start[y_loc * f_width + x_loc] * |
| optical_flow_warp_filter[xidx][w + 3]; |
| } |
| y[h + 3] /= (double)(1 << 7); |
| } |
| x = 0; |
| for (h = 0; h < 8; h++) { |
| x += y[h] * optical_flow_warp_filter[yidx][h]; |
| } |
| x /= (double)(1 << 7); |
| Ex[i * width + j] += (1 - dstpos) * x; |
| // Ey0 |
| for (h = -3; h < 5; h++) { |
| y[h + 3] = 0; |
| for (w = -3; w < 5; w++) { |
| y_loc = i + mv_r + h; |
| x_loc = j + mv_c + w; |
| if (y_loc + starth < 0) { |
| y_loc = -starth; |
| } else if (y_loc + starth >= f_height) { |
| y_loc = f_height - starth - 1; |
| } |
| if (x_loc + startw < 0) { |
| x_loc = -startw; |
| } else if (x_loc + startw >= f_width) { |
| x_loc = f_width - startw - 1; |
| } |
| y[h + 3] += ori_Ey0_start[y_loc * f_width + x_loc] * |
| optical_flow_warp_filter[xidx][w + 3]; |
| } |
| y[h + 3] /= (double)(1 << 7); |
| } |
| x = 0; |
| for (h = 0; h < 8; h++) { |
| x += y[h] * optical_flow_warp_filter[yidx][h]; |
| } |
| x /= (double)(1 << 7); |
| Ey[i * width + j] += (1 - dstpos) * x; |
| |
| // from right ref |
| mv_r = opfl_floor_double_2_int((1 - dstpos) * |
| mf_start[i * mf_stride + j].row); |
| mv_c = opfl_floor_double_2_int((1 - dstpos) * |
| mf_start[i * mf_stride + j].col); |
| di = (1 - dstpos) * mf_start[i * mf_stride + j].row - mv_r; |
| dj = (1 - dstpos) * mf_start[i * mf_stride + j].col - mv_c; |
| yidx = opfl_round_double_2_int(di * 8); |
| xidx = opfl_round_double_2_int(dj * 8); |
| yidx *= 2; |
| xidx *= 2; |
| if (yidx == 16) { |
| yidx = 0; |
| mv_c += 1; |
| } |
| if (xidx == 16) { |
| xidx = 0; |
| mv_r += 1; |
| } |
| assert(xidx <= 14 && xidx >= 0); |
| assert(yidx <= 14 && yidx >= 0); |
| |
| // Ex1 |
| for (h = -3; h < 5; h++) { |
| y[h + 3] = 0; |
| for (w = -3; w < 5; w++) { |
| y_loc = i + mv_r + h; |
| x_loc = j + mv_c + w; |
| if (y_loc + starth < 0) { |
| y_loc = -starth; |
| } else if (y_loc + starth >= f_height) { |
| y_loc = f_height - starth - 1; |
| } |
| if (x_loc + startw < 0) { |
| x_loc = -startw; |
| } else if (x_loc + startw >= f_width) { |
| x_loc = f_width - startw - 1; |
| } |
| y[h + 3] += ori_Ex1_start[y_loc * f_width + x_loc] * |
| optical_flow_warp_filter[xidx][w + 3]; |
| } |
| y[h + 3] /= (double)(1 << 7); |
| } |
| x = 0; |
| for (h = 0; h < 8; h++) { |
| x += y[h] * optical_flow_warp_filter[yidx][h]; |
| } |
| x /= (double)(1 << 7); |
| Ex[i * width + j] += dstpos * x; |
| // Ey1 |
| for (h = -3; h < 5; h++) { |
| y[h + 3] = 0; |
| for (w = -3; w < 5; w++) { |
| y_loc = i + mv_r + h; |
| x_loc = j + mv_c + w; |
| if (y_loc + starth < 0) { |
| y_loc = -starth; |
| } else if (y_loc + starth >= f_height) { |
| y_loc = f_height - starth - 1; |
| } |
| if (x_loc + startw < 0) { |
| x_loc = -startw; |
| } else if (x_loc + startw >= f_width) { |
| x_loc = f_width - startw - 1; |
| } |
| y[h + 3] += ori_Ey1_start[y_loc * f_width + x_loc] * |
| optical_flow_warp_filter[xidx][w + 3]; |
| } |
| y[h + 3] /= (double)(1 << 7); |
| } |
| x = 0; |
| for (h = 0; h < 8; h++) { |
| x += y[h] * optical_flow_warp_filter[yidx][h]; |
| } |
| x /= (double)(1 << 7); |
| Ey[i * width + j] += dstpos * x; |
| |
| // printf("%.2f \n", Ex[i*width+j]); |
| } |
| } |
| } |
| |
| /* |
| * Warp the Y component of src to dst according to the motion field |
| * Motion field points back from dst to src |
| */ |
| void warp_optical_flow_back(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref, |
| DB_MV *mf_start, int mvstr, YV12_BUFFER_CONFIG *dst, |
| double dstpos, int level, int usescale, |
| OPFL_BLK_INFO blk_info) { |
| int fwidth = src->y_width, fheight = src->y_height; |
| int width = blk_info.blk_width; |
| int height = blk_info.blk_height; |
| int starth = blk_info.starth; |
| int startw = blk_info.startw; |
| int srcstride = src->y_stride; |
| int dststride = dst->y_stride; |
| uint8_t *srcy = src->y_buffer; |
| uint8_t *dsty = dst->y_buffer; |
| uint8_t *refy = ref->y_buffer; |
| |
| double ii, jj, di, dj; |
| int i0, j0; |
| |
| int blk_w = 1; |
| if (!usescale) { |
| blk_w = blk_w << level; |
| } else { |
| width = width >> level; |
| height = height >> level; |
| starth = starth >> level; |
| startw = startw >> level; |
| } |
| |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| ii = i + starth + |
| mf_start[(i / blk_w) * mvstr + j / blk_w].row * blk_w * dstpos; |
| jj = j + startw + |
| mf_start[(i / blk_w) * mvstr + j / blk_w].col * blk_w * dstpos; |
| i0 = opfl_floor_double_2_int(ii); |
| di = ii - i0; |
| j0 = opfl_floor_double_2_int(jj); |
| dj = jj - j0; |
| if (i0 < 0 || i0 > fheight - 1 || j0 < 0 || j0 > fwidth - 1) { |
| dsty[i * dststride + j] = refy[(i + starth) * srcstride + j + startw]; |
| continue; |
| } |
| dsty[i * dststride + j] = |
| get_sub_pel_y(srcy + i0 * srcstride + j0, srcstride, di, dj); |
| } |
| } |
| } |
| |
| /* |
| * Warp the Y component of src to dst using bilinear method |
| * Motion field points back from dst to src |
| */ |
| void warp_optical_flow_back_bilinear(YV12_BUFFER_CONFIG *src, |
| YV12_BUFFER_CONFIG *ref, DB_MV *mf_start, |
| int mvstr, YV12_BUFFER_CONFIG *dst, |
| double dstpos, int level, int usescale, |
| OPFL_BLK_INFO blk_info) { |
| int fwidth = src->y_width, fheight = src->y_height; |
| int width = blk_info.blk_width; |
| int height = blk_info.blk_height; |
| int starth = blk_info.starth; |
| int startw = blk_info.startw; |
| int srcstride = src->y_stride; |
| int dststride = dst->y_stride; |
| uint8_t *srcy = src->y_buffer; |
| uint8_t *dsty = dst->y_buffer; |
| uint8_t *refy = ref->y_buffer; |
| |
| double ii, jj, di, dj, temp; |
| int i0, j0, i1, j1; |
| |
| int blk_w = 1; |
| if (!usescale) { |
| blk_w = blk_w << level; |
| } else { |
| width = width >> level; |
| height = height >> level; |
| starth = starth >> level; |
| startw = startw >> level; |
| } |
| |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| ii = i + starth + |
| mf_start[(i / blk_w) * mvstr + j / blk_w].row * blk_w * dstpos; |
| jj = j + startw + |
| mf_start[(i / blk_w) * mvstr + j / blk_w].col * blk_w * dstpos; |
| i0 = opfl_floor_double_2_int(ii); |
| di = 1 - ii + i0; |
| i1 = i0 + 1; |
| j0 = opfl_floor_double_2_int(jj); |
| dj = 1 - jj + j0; |
| j1 = j0 + 1; |
| if (i0 < 0 || i0 > fheight - 1 || j0 < 0 || j0 > fwidth - 1) { |
| dsty[i * dststride + j] = refy[(i + starth) * srcstride + j + startw]; |
| continue; |
| } |
| temp = di * dj * (double)srcy[i0 * srcstride + j0] + |
| di * (1 - dj) * (double)srcy[i0 * srcstride + j1] + |
| (1 - di) * dj * (double)srcy[i1 * srcstride + j0] + |
| (1 - di) * (1 - dj) * (double)srcy[i1 * srcstride + j1]; |
| dsty[i * dststride + j] = (uint8_t)(temp + 0.5); |
| } |
| } |
| } |
| |
| /* |
| * Warp the Y component of src to dst |
| * Motion field points forward from src to dst |
| */ |
| void warp_optical_flow_fwd(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref, |
| DB_MV *mf_start, int mvstr, YV12_BUFFER_CONFIG *dst, |
| double dstpos, int level, int usescale, |
| OPFL_BLK_INFO blk_info) { |
| int fwidth = src->y_width, fheight = src->y_height; |
| int width = blk_info.blk_width; |
| int height = blk_info.blk_height; |
| int starth = blk_info.starth; |
| int startw = blk_info.startw; |
| int srcstride = src->y_stride; |
| int dststride = dst->y_stride; |
| uint8_t *srcy = src->y_buffer; |
| uint8_t *dsty = dst->y_buffer; |
| uint8_t *refy = ref->y_buffer; |
| |
| double ii, jj, di, dj; |
| int i0, j0; |
| |
| int blk_w = 1; |
| if (!usescale) { |
| blk_w = blk_w << level; |
| } else { |
| width = width >> level; |
| height = height >> level; |
| starth = starth >> level; |
| startw = startw >> level; |
| } |
| |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| ii = i + starth - |
| mf_start[(i / blk_w) * mvstr + j / blk_w].row * blk_w * dstpos; |
| jj = j + startw - |
| mf_start[(i / blk_w) * mvstr + j / blk_w].col * blk_w * dstpos; |
| i0 = opfl_floor_double_2_int(ii); |
| di = ii - i0; |
| j0 = opfl_floor_double_2_int(jj); |
| dj = jj - j0; |
| if (i0 < 0 || i0 > fheight - 1 || j0 < 0 || j0 > fwidth - 1) { |
| dsty[i * dststride + j] = refy[(i + starth) * srcstride + j + startw]; |
| continue; |
| } |
| // printf("%d, %d, %d, %d, %d, %d, %d, %d, %d\n",i, j, dststride, |
| // width, height, fwidth, fheight, startw, starth); |
| dsty[i * dststride + j] = |
| get_sub_pel_y(srcy + i0 * srcstride + j0, srcstride, di, dj); |
| } |
| } |
| } |
| |
| /* |
| * Warp the Y component of src to dst using bilinear method |
| * Motion field points forward from src to dst |
| */ |
| void warp_optical_flow_fwd_bilinear(YV12_BUFFER_CONFIG *src, |
| YV12_BUFFER_CONFIG *ref, DB_MV *mf_start, |
| int mvstr, YV12_BUFFER_CONFIG *dst, |
| double dstpos, int level, int usescale, |
| OPFL_BLK_INFO blk_info) { |
| int fwidth = src->y_width, fheight = src->y_height; |
| int width = blk_info.blk_width; |
| int height = blk_info.blk_height; |
| int starth = blk_info.starth; |
| int startw = blk_info.startw; |
| int srcstride = src->y_stride; |
| int dststride = dst->y_stride; |
| uint8_t *srcy = src->y_buffer; |
| uint8_t *dsty = dst->y_buffer; |
| uint8_t *refy = ref->y_buffer; |
| |
| double ii, jj, di, dj, temp; |
| int i0, j0, i1, j1; |
| |
| int blk_w = 1; |
| if (!usescale) { |
| blk_w = blk_w << level; |
| } else { |
| width = width >> level; |
| height = height >> level; |
| starth = starth >> level; |
| startw = startw >> level; |
| } |
| |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| ii = i + starth - |
| mf_start[(i / blk_w) * mvstr + j / blk_w].row * blk_w * dstpos; |
| jj = j + startw - |
| mf_start[(i / blk_w) * mvstr + j / blk_w].col * blk_w * dstpos; |
| i0 = opfl_floor_double_2_int(ii); |
| di = 1 - ii + i0; |
| i1 = i0 + 1; |
| j0 = opfl_floor_double_2_int(jj); |
| dj = 1 - jj + j0; |
| j1 = j0 + 1; |
| if (i0 < 0 || i0 > fheight - 1 || j0 < 0 || j0 > fwidth - 1) { |
| dsty[i * dststride + j] = refy[(i + starth) * srcstride + j + startw]; |
| continue; |
| } |
| temp = di * dj * (double)srcy[i0 * srcstride + j0] + |
| di * (1 - dj) * (double)srcy[i0 * srcstride + j1] + |
| (1 - di) * dj * (double)srcy[i1 * srcstride + j0] + |
| (1 - di) * (1 - dj) * (double)srcy[i1 * srcstride + j1]; |
| dsty[i * dststride + j] = (uint8_t)(temp + 0.5); |
| } |
| } |
| } |
| |
| /* |
| * Interpolate references according to the motion field |
| * |
| * Input: |
| * src0, src1: the reference frames |
| * mf_start: the motion field start pointer |
| * mvstr: motion field stride |
| * dstpos: position of the interpolated frame |
| * method: the blend method to be used |
| * blk_info: block information |
| * |
| * Output: |
| * dst: pointer to the interpolated frame |
| */ |
| void warp_optical_flow(YV12_BUFFER_CONFIG *src0, YV12_BUFFER_CONFIG *src1, |
| DB_MV *mf_start, int mvstr, YV12_BUFFER_CONFIG *dst, |
| double dstpos, OPFL_BLEND_METHOD method, |
| OPFL_BLK_INFO blk_info) { |
| if (method == OPFL_DIFF_SELECT) { |
| // warp_optical_flow_diff_select(src0, src1, mf_start, mvstr, dst, dstpos); |
| warp_optical_flow_bilateral(src0, src1, mf_start, mvstr, dst, dstpos, |
| blk_info); |
| return; |
| } |
| int fwidth = dst->y_width, fheight = dst->y_height; |
| int width = blk_info.blk_width; |
| int height = blk_info.blk_height; |
| int starth = blk_info.starth; |
| int startw = blk_info.startw; |
| int starthuv = starth >> 1; |
| int startwuv = startw >> 1; |
| int stride = src0->y_stride; |
| int uvstride = src0->uv_stride; |
| |
| uint8_t *src0y = src0->y_buffer + starth * stride + startw; |
| uint8_t *src1y = src1->y_buffer + starth * stride + startw; |
| uint8_t *dsty = dst->y_buffer + starth * stride + startw; |
| uint8_t *src0u = src0->u_buffer + starthuv * uvstride + startwuv; |
| uint8_t *src0v = src0->v_buffer + starthuv * uvstride + startwuv; |
| uint8_t *src1u = src1->u_buffer + starthuv * uvstride + startwuv; |
| uint8_t *src1v = src1->v_buffer + starthuv * uvstride + startwuv; |
| uint8_t *dstu = dst->u_buffer + starthuv * uvstride + startwuv; |
| uint8_t *dstv = dst->v_buffer + starthuv * uvstride + startwuv; |
| |
| double ii0, jj0, di0, dj0, di0uv = 0, dj0uv = 0; |
| double ii1, jj1, di1, dj1, di1uv = 0, dj1uv = 0; |
| int i0, j0; |
| int i1, j1; |
| double dstpel_y, dstpel_u, dstpel_v; |
| double dstpel_y0 = 0, dstpel_y1 = 0; |
| double pos; |
| int do_uv; |
| int use0, use1, inside0, inside1; |
| int nearest = ((dstpos <= 0.5) ? 0 : 1); |
| |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| pos = dstpos; |
| ii0 = i - mf_start[i * mvstr + j].row * dstpos; |
| jj0 = j - mf_start[i * mvstr + j].col * dstpos; |
| ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos); |
| jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos); |
| i0 = opfl_floor_double_2_int(ii0); |
| di0 = ii0 - i0; |
| j0 = opfl_floor_double_2_int(jj0); |
| dj0 = jj0 - j0; |
| i1 = opfl_floor_double_2_int(ii1); |
| di1 = ii1 - i1; |
| j1 = opfl_floor_double_2_int(jj1); |
| dj1 = jj1 - j1; |
| |
| do_uv = i % 2 == 0 && j % 2 == 0; // TODO(bohan) only considering 420 now |
| if (do_uv) { |
| di0uv = di0 / 2 + 0.5 * ((i0 % 2 + 2) % 2); |
| dj0uv = dj0 / 2 + 0.5 * ((j0 % 2 + 2) % 2); |
| di1uv = di1 / 2 + 0.5 * ((i1 % 2 + 2) % 2); |
| dj1uv = dj1 / 2 + 0.5 * ((j1 % 2 + 2) % 2); |
| } |
| |
| // Check availability of the references. |
| // If one ref is outside then do not use it. |
| inside0 = (i0 + starth >= 0 && i0 + starth < fheight - 1 && |
| j0 + startw >= 0 && j0 + startw < fwidth - 1); |
| inside1 = (i1 + starth >= 0 && i1 + starth < fheight - 1 && |
| j1 + startw >= 0 && j1 + startw < fwidth - 1); |
| use0 = inside0 == inside1 || inside0; |
| use1 = inside0 == inside1 || inside1; |
| |
| // If use nearest single method, then use only one reference |
| if (method == OPFL_NEAREST_SINGLE) { |
| if (use0 && use1) { |
| use0 = (nearest == 0); |
| use1 = (nearest == 1); |
| } |
| } |
| |
| // calculate subpel Y refs |
| if (use0) { |
| if (i0 + starth < 0) { |
| i0 = -starth; |
| di0 = 0; |
| ii0 = i0; |
| di0uv = 0; |
| } else if (i0 + starth > fheight - 1) { |
| i0 = fheight - 1 - starth; |
| di0 = 0; |
| ii0 = i0; |
| di0uv = 0; |
| } |
| if (j0 + startw < 0) { |
| j0 = -startw; |
| dj0 = 0; |
| jj0 = j0; |
| dj0uv = 0; |
| } else if (j0 + startw > fwidth - 1) { |
| j0 = fwidth - 1 - startw; |
| dj0 = 0; |
| jj0 = j0; |
| dj0uv = 0; |
| } |
| dstpel_y0 = |
| (double)get_sub_pel_y(src0y + i0 * stride + j0, stride, di0, dj0); |
| } |
| if (use1) { |
| if (i1 + starth < 0) { |
| i1 = -starth; |
| di1 = 0; |
| ii1 = i1; |
| di1uv = 0; |
| } else if (i1 + starth > fheight - 1) { |
| i1 = fheight - 1 - starth; |
| di1 = 0; |
| ii1 = i1; |
| di1uv = 0; |
| } |
| if (j1 + startw < 0) { |
| j1 = -startw; |
| dj1 = 0; |
| jj1 = j1; |
| dj1uv = 0; |
| } else if (j1 + startw > fwidth - 1) { |
| j1 = fwidth - 1 - startw; |
| dj1 = 0; |
| jj1 = j1; |
| dj1uv = 0; |
| } |
| dstpel_y1 = |
| (double)get_sub_pel_y(src1y + i1 * stride + j1, stride, di1, dj1); |
| } |
| |
| // If use diff single method, check the pixels when the refs do not agree |
| if (method == OPFL_DIFF_SINGLE) { |
| if (use0 && use1) { |
| if (fabs(dstpel_y0 - dstpel_y1) > OPTICAL_FLOW_DIFF_THRES) { |
| use0 = (nearest == 0); |
| use1 = (nearest == 1); |
| } |
| } |
| } |
| |
| if (use0 && !use1) |
| pos = 0; |
| else if (!use0 && use1) |
| pos = 1; |
| |
| // blend |
| dstpel_y = 0; |
| dstpel_u = 0; |
| dstpel_v = 0; |
| if (use0) { |
| dstpel_y += dstpel_y0 * (1 - pos); |
| if (do_uv) { |
| dstpel_u += |
| (double)get_sub_pel_uv( |
| src0u + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2), |
| uvstride, di0uv, dj0uv) * |
| (1 - pos); |
| dstpel_v += |
| (double)get_sub_pel_uv( |
| src0v + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2), |
| uvstride, di0uv, dj0uv) * |
| (1 - pos); |
| } |
| } |
| if (use1) { |
| dstpel_y += dstpel_y1 * pos; |
| if (do_uv) { |
| dstpel_u += |
| (double)get_sub_pel_uv( |
| src1u + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2), |
| uvstride, di1uv, dj1uv) * |
| pos; |
| dstpel_v += |
| (double)get_sub_pel_uv( |
| src1v + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2), |
| uvstride, di1uv, dj1uv) * |
| pos; |
| } |
| } |
| dsty[i * stride + j] = opfl_round_double_2_int(dstpel_y); |
| if (do_uv) { |
| dstu[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_u); |
| dstv[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_v); |
| } |
| } |
| } |
| } |
| |
| /* |
| * Interpolate references according to the motion field |
| * when the refs do not agree, use more advanced selection |
| * |
| * Input: |
| * src0, src1: the reference frames |
| * mf_start: the motion field start pointer |
| * mvstr: motion field stride |
| * dstpos: position of the interpolated frame |
| * |
| * Output: |
| * dst: pointer to the interpolated frame |
| */ |
| // TODO(bohan): this function needs to be updated to be used with |
| // block-based methods |
| void warp_optical_flow_diff_select(YV12_BUFFER_CONFIG *src0, |
| YV12_BUFFER_CONFIG *src1, DB_MV *mf_start, |
| int mvstr, YV12_BUFFER_CONFIG *dst, |
| double dstpos) { |
| int width = src0->y_width; |
| int height = src0->y_height; |
| int stride = src0->y_stride; |
| int uvstride = src0->uv_stride; |
| |
| uint8_t *src0y = src0->y_buffer; |
| uint8_t *src1y = src1->y_buffer; |
| uint8_t *dsty = dst->y_buffer; |
| uint8_t *src0u = src0->u_buffer; |
| uint8_t *src0v = src0->v_buffer; |
| uint8_t *src1u = src1->u_buffer; |
| uint8_t *src1v = src1->v_buffer; |
| uint8_t *dstu = dst->u_buffer; |
| uint8_t *dstv = dst->v_buffer; |
| |
| double ii0, jj0, di0, dj0, di0uv, dj0uv; |
| double ii1, jj1, di1, dj1, di1uv, dj1uv; |
| int i0, j0; |
| int i1, j1; |
| double dstpel_y, dstpel_u, dstpel_v; |
| double pos; |
| int do_uv; |
| |
| double *used0 = aom_calloc(height * width, sizeof(double)); |
| double *used1 = aom_calloc(height * width, sizeof(double)); |
| // refid: -1=unset; 0=ref0; 1=ref1; 2=both |
| int *refid = aom_calloc(height * width, sizeof(int)); |
| int *refid_mode = aom_calloc(height * width, sizeof(int)); |
| double *dstpel_y0 = aom_calloc(height * width, sizeof(double)); |
| double *dstpel_y1 = aom_calloc(height * width, sizeof(double)); |
| |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| used0[i * width + j] = 0; |
| used1[i * width + j] = 0; |
| refid[i * width + j] = -1; |
| } |
| } |
| // first check all pixels to see if the refs agree |
| // also note down which pixels in refs are used for reference |
| int disagree_cnt = 0; |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| pos = dstpos; |
| ii0 = i - mf_start[i * mvstr + j].row * dstpos; |
| jj0 = j - mf_start[i * mvstr + j].col * dstpos; |
| ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos); |
| jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos); |
| i0 = opfl_floor_double_2_int(ii0); |
| di0 = ii0 - i0; |
| j0 = opfl_floor_double_2_int(jj0); |
| dj0 = jj0 - j0; |
| i1 = opfl_floor_double_2_int(ii1); |
| di1 = ii1 - i1; |
| j1 = opfl_floor_double_2_int(jj1); |
| dj1 = jj1 - j1; |
| |
| int inside0 = (i0 >= 0 && i0 < height - 1 && j0 >= 0 && j0 < width - 1); |
| int inside1 = (i1 >= 0 && i1 < height - 1 && j1 >= 0 && j1 < width - 1); |
| int use0 = inside0 == inside1 || inside0; |
| int use1 = inside0 == inside1 || inside1; |
| if (inside0 && !inside1) |
| pos = 0; |
| else if (inside1 && !inside0) |
| pos = 1; |
| |
| dstpel_y0[i * width + j] = |
| (double)get_sub_pel_y(src0y + i0 * stride + j0, stride, di0, dj0); |
| dstpel_y1[i * width + j] = |
| (double)get_sub_pel_y(src1y + i1 * stride + j1, stride, di1, dj1); |
| // check if the refs are similar |
| if (inside0 && inside1) { |
| if (fabs(dstpel_y0[i * width + j] - dstpel_y1[i * width + j]) > |
| OPTICAL_FLOW_DIFF_THRES) { |
| disagree_cnt++; |
| continue; |
| } |
| } |
| if (inside0 || inside1) { |
| used0[(i0)*width + j0] += use0 * (1 - di0) * (1 - dj0); |
| used0[(i0 + 1) * width + j0] += use0 * (di0) * (1 - dj0); |
| used0[(i0)*width + j0 + 1] += use0 * (1 - di0) * (dj0); |
| used0[(i0 + 1) * width + j0 + 1] += use0 * (di0) * (dj0); |
| used1[(i1)*width + j1] += use1 * (1 - di1) * (1 - dj1); |
| used1[(i1 + 1) * width + j1] += use1 * (di1) * (1 - dj1); |
| used1[(i1)*width + j1 + 1] += use1 * (1 - di1) * (dj1); |
| used1[(i1 + 1) * width + j1 + 1] += use1 * (di1) * (dj1); |
| } // ignore when both out of bound |
| if (use0 && use1) |
| refid[i * width + j] = 2; |
| else if (use0 && !use1) |
| refid[i * width + j] = 0; |
| else if (!use0 && use1) |
| refid[i * width + j] = 1; |
| else |
| assert(0); |
| } |
| } |
| |
| // Determine if we want to trust the motion field |
| int dis_ref_id; |
| if (disagree_cnt > width * height * OPTICAL_FLOW_TRUST_MV_THRES) { |
| // Do not trust |
| dis_ref_id = ((dstpos <= 0.5) ? 0 : 1); |
| } else { |
| dis_ref_id = 2; |
| } |
| |
| // calculate how many time each pixel is referenced |
| double totalused0, totalused1; |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| if (dis_ref_id != 2) { |
| // since we do not trust the motion field, no need to select |
| refid[i * width + j] = dis_ref_id; |
| continue; |
| } else if (refid[i * width + j] >= 0) |
| continue; |
| |
| totalused0 = 0; |
| totalused1 = 0; |
| pos = dstpos; |
| ii0 = i - mf_start[i * mvstr + j].row * dstpos; |
| jj0 = j - mf_start[i * mvstr + j].col * dstpos; |
| ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos); |
| jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos); |
| i0 = opfl_floor_double_2_int(ii0); |
| di0 = ii0 - i0; |
| j0 = opfl_floor_double_2_int(jj0); |
| dj0 = jj0 - j0; |
| i1 = opfl_floor_double_2_int(ii1); |
| di1 = ii1 - i1; |
| j1 = opfl_floor_double_2_int(jj1); |
| dj1 = jj1 - j1; |
| |
| totalused0 += used0[(i0)*width + j0] * (1 - di0) * (1 - dj0); |
| totalused0 += used0[(i0 + 1) * width + j0] * (di0) * (1 - dj0); |
| totalused0 += used0[(i0)*width + j0 + 1] * (1 - di0) * (dj0); |
| totalused0 += used0[(i0 + 1) * width + j0 + 1] * (di0) * (dj0); |
| totalused1 += used1[(i1)*width + j1] * (1 - di1) * (1 - dj1); |
| totalused1 += used1[(i1 + 1) * width + j1] * (di1) * (1 - dj1); |
| totalused1 += used1[(i1)*width + j1 + 1] * (1 - di1) * (dj1); |
| totalused1 += used1[(i1 + 1) * width + j1 + 1] * (di1) * (dj1); |
| |
| // if one ref pixel has been referenced by other pixels more than the |
| // other ref by a threshold, then decide there is occlusion |
| if (totalused0 < (totalused0 + totalused1) * OPTICAL_FLOW_REF_THRES) { |
| refid[i * width + j] = 0; |
| } else if (totalused1 < |
| (totalused0 + totalused1) * OPTICAL_FLOW_REF_THRES) { |
| refid[i * width + j] = 1; |
| } else { |
| refid[i * width + j] = dis_ref_id; |
| } |
| } |
| } |
| |
| // All refs for each pixel have been decided now. |
| // Do mode filter to get rid of outliers |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| if (i == 0 || i == height - 1 || j == 0 || j == width - 1) { |
| refid_mode[i * width + j] = refid[i * width + j]; |
| continue; |
| } |
| refid_mode[i * width + j] = |
| ref_mode_filter_3x3(refid + i * width + j, width, dstpos); |
| } |
| } |
| |
| // blend according to the refs selected |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| int use0 = -1, use1 = -1; |
| pos = dstpos; |
| if (refid_mode[i * width + j] == 0) { |
| use0 = 1; |
| use1 = 0; |
| pos = 0; |
| } else if (refid_mode[i * width + j] == 1) { |
| use0 = 0; |
| use1 = 1; |
| pos = 1; |
| } else if (refid_mode[i * width + j] == 2) { |
| use0 = 1; |
| use1 = 1; |
| } else { |
| assert(0); |
| } |
| ii0 = i - mf_start[i * mvstr + j].row * dstpos; |
| jj0 = j - mf_start[i * mvstr + j].col * dstpos; |
| ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos); |
| jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos); |
| i0 = opfl_floor_double_2_int(ii0); |
| di0 = ii0 - i0; |
| j0 = opfl_floor_double_2_int(jj0); |
| dj0 = jj0 - j0; |
| i1 = opfl_floor_double_2_int(ii1); |
| di1 = ii1 - i1; |
| j1 = opfl_floor_double_2_int(jj1); |
| dj1 = jj1 - j1; |
| |
| do_uv = |
| (i % 2 == 0) && (j % 2 == 0); // TODO(bohan) only considering 420 now |
| if (do_uv) { |
| di0uv = di0 / 2 + 0.5 * ((i0 % 2 + 2) % 2); |
| dj0uv = dj0 / 2 + 0.5 * ((j0 % 2 + 2) % 2); |
| di1uv = di1 / 2 + 0.5 * ((i1 % 2 + 2) % 2); |
| dj1uv = dj1 / 2 + 0.5 * ((j1 % 2 + 2) % 2); |
| } |
| |
| dstpel_y = 0; |
| dstpel_u = 0; |
| dstpel_v = 0; |
| if (use0) { |
| dstpel_y += dstpel_y0[i * width + j] * (1 - pos); |
| if (do_uv) { |
| dstpel_u += |
| (double)get_sub_pel_uv( |
| src0u + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2), |
| uvstride, di0uv, dj0uv) * |
| (1 - pos); |
| dstpel_v += |
| (double)get_sub_pel_uv( |
| src0v + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2), |
| uvstride, di0uv, dj0uv) * |
| (1 - pos); |
| } |
| } |
| if (use1) { |
| dstpel_y += dstpel_y1[i * width + j] * pos; |
| if (do_uv) { |
| dstpel_u += |
| (double)get_sub_pel_uv( |
| src1u + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2), |
| uvstride, di1uv, dj1uv) * |
| pos; |
| dstpel_v += |
| (double)get_sub_pel_uv( |
| src1v + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2), |
| uvstride, di1uv, dj1uv) * |
| pos; |
| } |
| } |
| dsty[i * stride + j] = opfl_round_double_2_int(dstpel_y); |
| if (do_uv) { |
| dstu[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_u); |
| dstv[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_v); |
| } |
| } |
| } |
| |
| aom_free(refid); |
| aom_free(refid_mode); |
| aom_free(used0); |
| aom_free(used1); |
| aom_free(dstpel_y0); |
| aom_free(dstpel_y1); |
| } |
| |
| /* |
| * Interpolate references according to the motion field |
| * Use bilateral filters based on the difference of the |
| * pixels as well as confidence of initial mf |
| * |
| * Input: |
| * src0, src1: the reference frames |
| * mf_start: the motion field start pointer |
| * mvstr: motion field stride |
| * dstpos: position of the interpolated frame |
| * |
| * Output: |
| * dst: pointer to the interpolated frame |
| */ |
| void warp_optical_flow_bilateral(YV12_BUFFER_CONFIG *src0, |
| YV12_BUFFER_CONFIG *src1, DB_MV *mf_start, |
| int mvstr, YV12_BUFFER_CONFIG *dst, |
| double dstpos, OPFL_BLK_INFO blk_info) { |
| int fwidth = dst->y_width, fheight = dst->y_height; |
| int width = blk_info.blk_width; |
| int height = blk_info.blk_height; |
| int starth = blk_info.starth; |
| int startw = blk_info.startw; |
| int starthuv = starth >> 1; |
| int startwuv = startw >> 1; |
| int stride = src0->y_stride; |
| int uvstride = src0->uv_stride; |
| |
| uint8_t *src0y = src0->y_buffer + starth * stride + startw; |
| uint8_t *src1y = src1->y_buffer + starth * stride + startw; |
| uint8_t *dsty = dst->y_buffer + starth * stride + startw; |
| uint8_t *src0u = src0->u_buffer + starthuv * uvstride + startwuv; |
| uint8_t *src0v = src0->v_buffer + starthuv * uvstride + startwuv; |
| uint8_t *src1u = src1->u_buffer + starthuv * uvstride + startwuv; |
| uint8_t *src1v = src1->v_buffer + starthuv * uvstride + startwuv; |
| uint8_t *dstu = dst->u_buffer + starthuv * uvstride + startwuv; |
| uint8_t *dstv = dst->v_buffer + starthuv * uvstride + startwuv; |
| |
| double ii0, jj0, di0, dj0, di0uv, dj0uv; |
| double ii1, jj1, di1, dj1, di1uv, dj1uv; |
| int i0, j0; |
| int i1, j1; |
| double dstpel_y, dstpel_u, dstpel_v; |
| double pos; |
| int do_uv; |
| |
| double *used0 = aom_calloc(height * width, sizeof(double)); |
| double *used1 = aom_calloc(height * width, sizeof(double)); |
| // refid: -1=unset; 0=ref0; 1=ref1; 2=both |
| double *ref0wts = aom_calloc(height * width, sizeof(double)); |
| double *ref1wts = aom_calloc(height * width, sizeof(double)); |
| double *dstpel_y0 = aom_calloc(height * width, sizeof(double)); |
| double *dstpel_y1 = aom_calloc(height * width, sizeof(double)); |
| |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| used0[i * width + j] = 0; |
| used1[i * width + j] = 0; |
| } |
| } |
| // first note down which pixels in refs are used for reference |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| ii0 = i - mf_start[i * mvstr + j].row * dstpos; |
| jj0 = j - mf_start[i * mvstr + j].col * dstpos; |
| ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos); |
| jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos); |
| |
| if (ii0 + starth < 0) |
| ii0 = -starth; |
| else if (ii0 + starth >= fheight) |
| ii0 = fheight - starth - 1; |
| if (jj0 + startw < 0) |
| jj0 = -startw; |
| else if (jj0 + startw >= fwidth) |
| jj0 = fwidth - startw - 1; |
| if (ii1 + starth < 0) |
| ii1 = -starth; |
| else if (ii1 + starth >= fheight) |
| ii1 = fheight - starth - 1; |
| if (jj1 + startw < 0) |
| jj1 = -startw; |
| else if (jj1 + startw >= fwidth) |
| jj1 = fwidth - startw - 1; |
| |
| i0 = opfl_floor_double_2_int(ii0); |
| di0 = ii0 - i0; |
| j0 = opfl_floor_double_2_int(jj0); |
| dj0 = jj0 - j0; |
| i1 = opfl_floor_double_2_int(ii1); |
| di1 = ii1 - i1; |
| j1 = opfl_floor_double_2_int(jj1); |
| dj1 = jj1 - j1; |
| |
| int inside0 = (i0 >= 0 && i0 < height - 1 && j0 >= 0 && j0 < width - 1); |
| int inside1 = (i1 >= 0 && i1 < height - 1 && j1 >= 0 && j1 < width - 1); |
| |
| if (inside0) { |
| used0[(i0)*width + j0] += (1 - di0) * (1 - dj0); |
| used0[(i0 + 1) * width + j0] += (di0) * (1 - dj0); |
| used0[(i0)*width + j0 + 1] += (1 - di0) * (dj0); |
| used0[(i0 + 1) * width + j0 + 1] += (di0) * (dj0); |
| } |
| if (inside1) { |
| used1[(i1)*width + j1] += (1 - di1) * (1 - dj1); |
| used1[(i1 + 1) * width + j1] += (di1) * (1 - dj1); |
| used1[(i1)*width + j1 + 1] += (1 - di1) * (dj1); |
| used1[(i1 + 1) * width + j1 + 1] += (di1) * (dj1); |
| } // ignore when both out of bound |
| |
| dstpel_y0[i * width + j] = |
| (double)get_sub_pel_y(src0y + i0 * stride + j0, stride, di0, dj0); |
| dstpel_y1[i * width + j] = |
| (double)get_sub_pel_y(src1y + i1 * stride + j1, stride, di1, dj1); |
| } |
| } |
| |
| // calculate how many time each pixel is referenced |
| double totalused0, totalused1; |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| totalused0 = 0; |
| totalused1 = 0; |
| pos = dstpos; |
| ii0 = i - mf_start[i * mvstr + j].row * dstpos; |
| jj0 = j - mf_start[i * mvstr + j].col * dstpos; |
| ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos); |
| jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos); |
| |
| if (ii0 + starth < 0) |
| ii0 = -starth; |
| else if (ii0 + starth >= fheight) |
| ii0 = fheight - starth - 1; |
| if (jj0 + startw < 0) |
| jj0 = -startw; |
| else if (jj0 + startw >= fwidth) |
| jj0 = fwidth - startw - 1; |
| if (ii1 + starth < 0) |
| ii1 = -starth; |
| else if (ii1 + starth >= fheight) |
| ii1 = fheight - starth - 1; |
| if (jj1 + startw < 0) |
| jj1 = -startw; |
| else if (jj1 + startw >= fwidth) |
| jj1 = fwidth - startw - 1; |
| |
| i0 = opfl_floor_double_2_int(ii0); |
| di0 = ii0 - i0; |
| j0 = opfl_floor_double_2_int(jj0); |
| dj0 = jj0 - j0; |
| i1 = opfl_floor_double_2_int(ii1); |
| di1 = ii1 - i1; |
| j1 = opfl_floor_double_2_int(jj1); |
| dj1 = jj1 - j1; |
| |
| int inside0 = (i0 >= 0 && i0 < height - 1 && j0 >= 0 && j0 < width - 1); |
| int inside1 = (i1 >= 0 && i1 < height - 1 && j1 >= 0 && j1 < width - 1); |
| |
| if (inside0) { |
| totalused0 += used0[(i0)*width + j0] * (1 - di0) * (1 - dj0); |
| totalused0 += used0[(i0 + 1) * width + j0] * (di0) * (1 - dj0); |
| totalused0 += used0[(i0)*width + j0 + 1] * (1 - di0) * (dj0); |
| totalused0 += used0[(i0 + 1) * width + j0 + 1] * (di0) * (dj0); |
| } |
| if (inside1) { |
| totalused1 += used1[(i1)*width + j1] * (1 - di1) * (1 - dj1); |
| totalused1 += used1[(i1 + 1) * width + j1] * (di1) * (1 - dj1); |
| totalused1 += used1[(i1)*width + j1 + 1] * (1 - di1) * (dj1); |
| totalused1 += used1[(i1 + 1) * width + j1 + 1] * (di1) * (dj1); |
| } |
| // if referenced more, weight should be lower |
| // if refs agrees better, weights should be closer to each other |
| double diffWts = (dstpel_y0[i * width + j] - dstpel_y1[i * width + j]); |
| diffWts = 0.1 * diffWts * diffWts; |
| if (diffWts > 50) diffWts = 50; |
| if (diffWts < 10) diffWts = 0; |
| if (inside0 == 0 && inside1 == 0) { |
| ref0wts[i * width + j] = 1 - pos; |
| ref1wts[i * width + j] = pos; |
| } else { |
| if (inside0) |
| ref0wts[i * width + j] = |
| exp(-0.01 * diffWts * totalused0) * (1 - dstpos); |
| else |
| ref0wts[i * width + j] = 0; |
| if (inside1) |
| ref1wts[i * width + j] = exp(-0.01 * diffWts * totalused1) * dstpos; |
| else |
| ref1wts[i * width + j] = 0; |
| } |
| double tempSum = ref0wts[i * width + j] + ref1wts[i * width + j]; |
| ref0wts[i * width + j] /= tempSum; |
| ref1wts[i * width + j] /= tempSum; |
| } |
| } |
| |
| // blend according to the refs selected |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| pos = dstpos; |
| |
| ii0 = i - mf_start[i * mvstr + j].row * dstpos; |
| jj0 = j - mf_start[i * mvstr + j].col * dstpos; |
| ii1 = i + mf_start[i * mvstr + j].row * (1 - dstpos); |
| jj1 = j + mf_start[i * mvstr + j].col * (1 - dstpos); |
| |
| if (ii0 + starth < 0) |
| ii0 = -starth; |
| else if (ii0 + starth >= fheight) |
| ii0 = fheight - starth - 1; |
| if (jj0 + startw < 0) |
| jj0 = -startw; |
| else if (jj0 + startw >= fwidth) |
| jj0 = fwidth - startw - 1; |
| if (ii1 + starth < 0) |
| ii1 = -starth; |
| else if (ii1 + starth >= fheight) |
| ii1 = fheight - starth - 1; |
| if (jj1 + startw < 0) |
| jj1 = -startw; |
| else if (jj1 + startw >= fwidth) |
| jj1 = fwidth - startw - 1; |
| |
| i0 = opfl_floor_double_2_int(ii0); |
| di0 = ii0 - i0; |
| j0 = opfl_floor_double_2_int(jj0); |
| dj0 = jj0 - j0; |
| i1 = opfl_floor_double_2_int(ii1); |
| di1 = ii1 - i1; |
| j1 = opfl_floor_double_2_int(jj1); |
| dj1 = jj1 - j1; |
| |
| do_uv = |
| (i % 2 == 0) && (j % 2 == 0); // TODO(bohan) only considering 420 now |
| if (do_uv) { |
| di0uv = di0 / 2 + 0.5 * ((i0 % 2 + 2) % 2); |
| dj0uv = dj0 / 2 + 0.5 * ((j0 % 2 + 2) % 2); |
| di1uv = di1 / 2 + 0.5 * ((i1 % 2 + 2) % 2); |
| dj1uv = dj1 / 2 + 0.5 * ((j1 % 2 + 2) % 2); |
| } |
| |
| dstpel_y = 0; |
| dstpel_u = 0; |
| dstpel_v = 0; |
| |
| dstpel_y += dstpel_y0[i * width + j] * ref0wts[i * width + j]; |
| if (do_uv) { |
| dstpel_u += |
| (double)get_sub_pel_uv( |
| src0u + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2), |
| uvstride, di0uv, dj0uv) * |
| ref0wts[i * width + j]; |
| dstpel_v += |
| (double)get_sub_pel_uv( |
| src0v + (int)floor(ii0 / 2) * uvstride + (int)floor(jj0 / 2), |
| uvstride, di0uv, dj0uv) * |
| ref0wts[i * width + j]; |
| } |
| |
| dstpel_y += dstpel_y1[i * width + j] * ref1wts[i * width + j]; |
| if (do_uv) { |
| dstpel_u += |
| (double)get_sub_pel_uv( |
| src1u + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2), |
| uvstride, di1uv, dj1uv) * |
| ref1wts[i * width + j]; |
| dstpel_v += |
| (double)get_sub_pel_uv( |
| src1v + (int)floor(ii1 / 2) * uvstride + (int)floor(jj1 / 2), |
| uvstride, di1uv, dj1uv) * |
| ref1wts[i * width + j]; |
| } |
| |
| dsty[i * stride + j] = opfl_round_double_2_int(dstpel_y); |
| if (do_uv) { |
| dstu[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_u); |
| dstv[i / 2 * uvstride + j / 2] = opfl_round_double_2_int(dstpel_v); |
| } |
| } |
| } |
| aom_free(ref0wts); |
| aom_free(ref1wts); |
| aom_free(used0); |
| aom_free(used1); |
| aom_free(dstpel_y0); |
| aom_free(dstpel_y1); |
| } |
| |
| /* |
| * Subpel filter for y pixels. Round the motion field to 1/8 precision. |
| * |
| * Input: |
| * src: the source pixel at the integer location. |
| * stride: source stride |
| * di: subpel location in height |
| * dj: subpel location in width |
| * |
| * Output: |
| * interpolated pixel |
| */ |
| uint8_t get_sub_pel_y(uint8_t *src, int stride, double di, double dj) { |
| int yidx = opfl_round_double_2_int(di * 8); |
| int xidx = opfl_round_double_2_int(dj * 8); |
| yidx *= 2; |
| xidx *= 2; |
| if (yidx == 16) { |
| yidx = 0; |
| src += stride; |
| } |
| if (xidx == 16) { |
| xidx = 0; |
| src += 1; |
| } |
| assert(xidx <= 14 && xidx >= 0); |
| assert(yidx <= 14 && yidx >= 0); |
| int y[8]; |
| for (int i = -3; i < 5; i++) { |
| y[i + 3] = 0; |
| for (int j = -3; j < 5; j++) { |
| y[i + 3] += src[i * stride + j] * optical_flow_warp_filter[xidx][j + 3]; |
| } |
| y[i + 3] = (y[i + 3] + (1 << 6)) >> 7; |
| } |
| int x = 0; |
| for (int i = 0; i < 8; i++) { |
| x += y[i] * optical_flow_warp_filter[yidx][i]; |
| } |
| x = (x + (1 << 6)) >> 7; |
| if (x > 255) |
| x = 255; |
| else if (x < 0) |
| x = 0; |
| return (uint8_t)x; |
| } |
| |
| /* |
| * Subpel filter for u/v pixels. Round the motion field to 1/16 precision. |
| * |
| * Input: |
| * src: the source pixel at the integer location. |
| * stride: source stride |
| * di: subpel location in height |
| * dj: subpel location in width |
| * |
| * Output: |
| * interpolated pixel |
| */ |
| uint8_t get_sub_pel_uv(uint8_t *src, int stride, double di, double dj) { |
| // TODO(bohan) now only care about YUV 420 |
| int yidx = opfl_round_double_2_int(di * 16); |
| int xidx = opfl_round_double_2_int(dj * 16); |
| if (yidx == 16) { |
| yidx = 0; |
| src += stride; |
| } |
| if (xidx == 16) { |
| xidx = 0; |
| src += 1; |
| } |
| assert(xidx <= 15 && xidx >= 0); |
| assert(yidx <= 15 && yidx >= 0); |
| |
| int y[8]; |
| for (int i = -3; i < 5; i++) { |
| y[i + 3] = 0; |
| for (int j = -3; j < 5; j++) { |
| y[i + 3] += src[i * stride + j] * optical_flow_warp_filter[xidx][j + 3]; |
| } |
| y[i + 3] = (y[i + 3] + (1 << 6)) >> 7; |
| } |
| int x = 0; |
| for (int i = 0; i < 8; i++) { |
| x += y[i] * optical_flow_warp_filter[yidx][i]; |
| } |
| x = (x + (1 << 6)) >> 7; |
| if (x > 255) |
| x = 255; |
| else if (x < 0) |
| x = 0; |
| return (uint8_t)x; |
| } |
| |
| /* |
| * Blend function which calls the real blend methods. |
| * Kept as caller where we may make high level changes or pre-process |
| */ |
| void interp_optical_flow(YV12_BUFFER_CONFIG *ref0, YV12_BUFFER_CONFIG *ref1, |
| DB_MV *mf, YV12_BUFFER_CONFIG *dst, double dst_pos, |
| OPFL_BLK_INFO blk_info) { |
| // blend here |
| int mvstr = blk_info.blk_width + 2 * AVG_MF_BORDER; |
| DB_MV *mf_start = mf + AVG_MF_BORDER * mvstr + AVG_MF_BORDER; |
| |
| warp_optical_flow(ref0, ref1, mf_start, mvstr, dst, dst_pos, |
| OPFL_BLEND_METHOD_USED, blk_info); |
| return; |
| } |
| |
| /* |
| * Use the initial motion vectors to create the initial motion field. |
| * |
| * Input: |
| * mv_left: motion vector points from the current frame to ref0 |
| * mv_right: motion vector points from the current frame to ref1 |
| * width, height: frame/block width and height |
| * mvwid, mvhgt: width and height of the mv at some pyramid scale |
| * mfstr: stride of the motion field buffer |
| * dstpos: relative location of the current block |
| * |
| * Output: |
| * mf: pointer to the created motion field |
| */ |
| void create_motion_field(int_mv *mv_left, int_mv *mv_right, DB_MV *mf, |
| #if OPFL_INIT_WT |
| double *mv_wts, |
| #endif |
| int width, int height, int mvwid, int mvhgt, int mfstr, |
| double dstpos) { |
| // since the motion field is just used as initialization for now, |
| // just simply use the summation of the two |
| // TODO(bohan): need to change the function to work for MAX_OPFL_LEVEL > 3 |
| int stride = mfstr; |
| DB_MV *mf_start = mf + AVG_MF_BORDER * stride + AVG_MF_BORDER; |
| int idx; |
| int blksize = mvwid / (width / 4); |
| assert(blksize == mvhgt / (height / 4)); |
| double tempr, tempc; |
| double mvscale = 4 / blksize; |
| #if OPFL_INIT_WT |
| double *wts_start = mv_wts + AVG_MF_BORDER * stride + AVG_MF_BORDER; |
| double tempWts; |
| #endif |
| for (int h = 0; h < height / 4; h++) { |
| for (int w = 0; w < width / 4; w++) { |
| // mv_left, mv_right are based on 4x4 block |
| idx = h * width / 4 + w; |
| if (mv_left[idx].as_int == INVALID_MV && |
| mv_right[idx].as_int == INVALID_MV) { |
| tempr = 0; |
| tempc = 0; |
| #if OPFL_INIT_WT |
| tempWts = 0; |
| #endif |
| } else if (mv_left[idx].as_int == INVALID_MV) { |
| tempr = |
| (double)(mv_right[idx].as_mv.row) / 8.0 / mvscale / (1 - dstpos); |
| tempc = |
| (double)(mv_right[idx].as_mv.col) / 8.0 / mvscale / (1 - dstpos); |
| #if OPFL_INIT_WT |
| tempWts = 0.0; |
| #endif |
| } else if (mv_right[idx].as_int == INVALID_MV) { |
| tempr = (double)(-mv_left[idx].as_mv.row) / 8.0 / mvscale / dstpos; |
| tempc = (double)(-mv_left[idx].as_mv.col) / 8.0 / mvscale / dstpos; |
| #if OPFL_INIT_WT |
| tempWts = 0.0; |
| #endif |
| } else { |
| tempr = (double)(-mv_left[idx].as_mv.row + mv_right[idx].as_mv.row) / |
| 8.0 / mvscale; |
| tempc = (double)(-mv_left[idx].as_mv.col + mv_right[idx].as_mv.col) / |
| 8.0 / mvscale; |
| #if OPFL_INIT_WT |
| tempWts = 0.0; |
| #endif |
| } |
| for (int i = 0; i < blksize; i++) { |
| for (int j = 0; j < blksize; j++) { |
| mf_start[(h * blksize + i) * mfstr + w * blksize + j].row = tempr; |
| mf_start[(h * blksize + i) * mfstr + w * blksize + j].col = tempc; |
| #if OPFL_INIT_WT |
| double i0, i1, j0, j1; |
| i0 = (double)(i + h * blksize) - (dstpos)*tempr; |
| i1 = (double)(i + h * blksize) + (1 - dstpos) * tempr; |
| j0 = (double)(j + w * blksize) - (dstpos)*tempc; |
| j1 = (double)(j + w * blksize) + (1 - dstpos) * tempc; |
| int is_out = |
| (i0 < 0 || i0 > height - 1 || i1 < 0 || i1 > height - 1 || |
| j0 < 0 || j0 > width - 1 || j1 < 0 || j1 > width - 1); |
| if (is_out) { |
| wts_start[(h * blksize + i) * mfstr + w * blksize + j] = 0; |
| } else { |
| wts_start[(h * blksize + i) * mfstr + w * blksize + j] = tempWts; |
| } |
| #endif |
| } |
| } |
| } |
| } |
| // Pad the motion field border |
| // pad_motion_field_border(mf_start, mvwid, mvhgt, mfstr); |
| return; |
| } |
| |
| /* |
| * Fill the initial motion field if there are holes |
| * |
| * Input: |
| * mv_left: motion vector points from the current frame to ref0 |
| * mv_right: motion vector points from the current frame to ref1 |
| * width, height: frame/block width and height |
| * mvwid, mvhgt: width and height of the mv at some pyramid scale |
| * mfstr: stride of the motion field buffer |
| * dstpos: relative location of the current block |
| * |
| * Output: |
| * mf: pointer to the created motion field |
| */ |
| void fill_create_motion_field(int_mv *mv_left, int_mv *mv_right, DB_MV *mf, |
| int width, int height, int mvwid, int mvhgt, |
| int mfstr) { |
| int stride = mfstr; |
| DB_MV *mf_start = mf + AVG_MF_BORDER * stride + AVG_MF_BORDER; |
| int idx; |
| int blksize = mvwid / (width / 4); |
| assert(blksize == mvhgt / (height / 4)); |
| |
| int invalid_cnt = 0; |
| DB_MV *tempmv = aom_calloc(mvwid * mvhgt, sizeof(DB_MV)); |
| // isValid: 0: not valid; 1: already valid; -1:ready for next round |
| int *isValid = aom_calloc(height * width / 4 / 4, sizeof(int)); |
| for (int i = 0; i < mvhgt; i++) { |
| for (int j = 0; j < mvwid; j++) { |
| tempmv[i * mvwid + j].col = mf_start[i * mfstr + j].col; |
| tempmv[i * mvwid + j].row = mf_start[i * mfstr + j].row; |
| } |
| } |
| |
| for (int h = 0; h < height / 4; h++) { |
| for (int w = 0; w < width / 4; w++) { |
| idx = h * width / 4 + w; |
| if (mv_left[idx].as_int == INVALID_MV && |
| mv_right[idx].as_int == INVALID_MV) { |
| invalid_cnt++; |
| isValid[idx] = 0; |
| } else { |
| isValid[idx] = 1; |
| } |
| } |
| } |
| DB_MV avg; |
| int avgcnt; |
| while (invalid_cnt > 0 && invalid_cnt != width * height / 4 / 4) { |
| for (int h = 0; h < height / 4; h++) { |
| for (int w = 0; w < width / 4; w++) { |
| idx = h * width / 4 + w; |
| if (isValid[idx] == 0) { |
| avgcnt = 0; |
| avg.col = 0; |
| avg.row = 0; |
| for (int i = -1; i < 2; i++) { |
| for (int j = -1; j < 2; j++) { |
| if (i + h >= 0 && i + h < height / 4 && j + w >= 0 && |
| j + w < width / 4) { |
| if (isValid[idx + i * width / 4 + j] > 0) { |
| avg.col += |
| mf_start[(h + i) * blksize * mfstr + (j + w) * blksize] |
| .col; |
| avg.row += |
| mf_start[(h + i) * blksize * mfstr + (j + w) * blksize] |
| .row; |
| avgcnt++; |
| } |
| } |
| } |
| } |
| if (avgcnt != 0) { |
| for (int i = 0; i < blksize; i++) { |
| for (int j = 0; j < blksize; j++) { |
| tempmv[(h * blksize + i) * mvwid + w * blksize + j].row = |
| avg.row / avgcnt; |
| tempmv[(h * blksize + i) * mvwid + w * blksize + j].col = |
| avg.col / avgcnt; |
| } |
| } |
| isValid[idx] = -1; |
| } |
| } |
| } |
| } |
| invalid_cnt = 0; |
| for (int h = 0; h < height / 4; h++) { |
| for (int w = 0; w < width / 4; w++) { |
| idx = h * width / 4 + w; |
| if (isValid[idx] == 0) { |
| invalid_cnt++; |
| } else if (isValid[idx] < 0) { |
| isValid[idx] = 1; |
| for (int i = 0; i < blksize; i++) { |
| for (int j = 0; j < blksize; j++) { |
| mf_start[(h * blksize + i) * mfstr + w * blksize + j].row = |
| tempmv[(h * blksize + i) * mvwid + w * blksize + j].row; |
| mf_start[(h * blksize + i) * mfstr + w * blksize + j].col = |
| tempmv[(h * blksize + i) * mvwid + w * blksize + j].col; |
| } |
| } |
| } |
| } |
| } |
| } |
| aom_free(tempmv); |
| aom_free(isValid); |
| // Pad the motion field border |
| pad_motion_field_border(mf_start, mvwid, mvhgt, mfstr); |
| } |
| |
| int opfl_get_blk_warp_SAD(uint8_t *y0, uint8_t *y1, int srcstride, int blksize, |
| int starth, int startw, DB_MV mv, double dstpos, |
| int width, int height) { |
| int dist = 0; |
| int lpix, rpix; |
| int yl, xl, yr, xr; |
| double dil, djl, dir, djr; |
| |
| yl = opfl_floor_double_2_int(-mv.row * dstpos); |
| xl = opfl_floor_double_2_int(-mv.col * dstpos); |
| yr = opfl_floor_double_2_int(mv.row * (1 - dstpos)); |
| xr = opfl_floor_double_2_int(mv.col * (1 - dstpos)); |
| |
| dil = -mv.row * dstpos - yl; |
| djl = -mv.col * dstpos - xl; |
| dir = mv.row * (1 - dstpos) - yr; |
| djr = mv.col * (1 - dstpos) - xr; |
| |
| // if (starth + yl < 0) { |
| // yl = -starth; |
| // } else if (starth + yl >= height) { |
| // yl = height - starth; |
| // } |
| // if (startw + xl < 0) { |
| // xl = -startw; |
| // } else if (startw + xl >= width) { |
| // xl = width - startw; |
| // } |
| // if (starth + yr < 0) { |
| // yr = -starth; |
| // } else if (starth + yr >= height) { |
| // yr = height - starth; |
| // } |
| // if (startw + xr < 0) { |
| // xr = -startw; |
| // } else if (startw + xr >= width) { |
| // xr = width - startw; |
| // } |
| |
| for (int i = 0; i < blksize; i++) { |
| for (int j = 0; j < blksize; j++) { |
| uint8_t *tmpsrc; |
| tmpsrc = y0 + (starth + i + yl) * srcstride + (startw + j + xl); |
| lpix = get_sub_pel_y(tmpsrc, srcstride, dil, djl); |
| tmpsrc = y1 + (starth + i + yr) * srcstride + (startw + j + xr); |
| rpix = get_sub_pel_y(tmpsrc, srcstride, dir, djr); |
| dist += abs(lpix - rpix); |
| } |
| } |
| return dist; |
| } |
| |
| DB_MV bi_direction_motion_search(uint8_t *y0, uint8_t *y1, int srcstr, |
| int starth, int startw, DB_MV init_mv, |
| double dstpos, int width, int height) { |
| DB_MV mv_res = init_mv; |
| int step = 0; |
| int curDist = opfl_get_blk_warp_SAD(y0, y1, srcstr, 16, starth, startw, |
| mv_res, dstpos, width, height); |
| int nextDist, tempDist; |
| DB_MV next_mv, temp_mv; |
| double stepsize = 8; |
| while (step <= 24) { |
| nextDist = curDist; |
| next_mv = mv_res; |
| // try upper |
| temp_mv.row = mv_res.row - stepsize; |
| temp_mv.col = mv_res.col; |
| tempDist = opfl_get_blk_warp_SAD(y0, y1, srcstr, 16, starth, startw, |
| temp_mv, dstpos, width, height); |
| if (tempDist < nextDist) { |
| nextDist = tempDist; |
| next_mv = temp_mv; |
| } |
| // try lower |
| temp_mv.row = mv_res.row + stepsize; |
| temp_mv.col = mv_res.col; |
| tempDist = opfl_get_blk_warp_SAD(y0, y1, srcstr, 16, starth, startw, |
| temp_mv, dstpos, width, height); |
| if (tempDist < nextDist) { |
| nextDist = tempDist; |
| next_mv = temp_mv; |
| } |
| // try left |
| temp_mv.row = mv_res.row; |
| temp_mv.col = mv_res.col - stepsize; |
| tempDist = opfl_get_blk_warp_SAD(y0, y1, srcstr, 16, starth, startw, |
| temp_mv, dstpos, width, height); |
| if (tempDist < nextDist) { |
| nextDist = tempDist; |
| next_mv = temp_mv; |
| } |
| // try right |
| temp_mv.row = mv_res.row; |
| temp_mv.col = mv_res.col + stepsize; |
| tempDist = opfl_get_blk_warp_SAD(y0, y1, srcstr, 16, starth, startw, |
| temp_mv, dstpos, width, height); |
| if (tempDist < nextDist) { |
| nextDist = tempDist; |
| next_mv = temp_mv; |
| } |
| if (curDist > nextDist) { |
| curDist = nextDist; |
| mv_res = next_mv; |
| } |
| step++; |
| if (step % 4 == 0) stepsize /= 2; |
| } |
| |
| return mv_res; |
| } |
| |
| /* |
| * Do a motion search to update the motions |
| * |
| * Input: |
| * mv_left: motion vector points from the current frame to ref0 |
| * mv_right: motion vector points from the current frame to ref1 |
| * width, height: frame/block width and height |
| * mvwid, mvhgt: width and height of the mv at some pyramid scale |
| * mfstr: stride of the motion field buffer |
| * y0, y1: two reference frames at level 0 pointed to the start |
| * of current block/frame |
| * srcstr: the stride of y0 and y1 |
| * |
| * Output: |
| * mf: pointer to the created motion field |
| */ |
| void opfl_init_motion_search(int_mv *mv_left, int_mv *mv_right, DB_MV *mf, |
| int width, int height, int mvwid, int mvhgt, |
| int mfstr, double dstpos, OPFL_BLK_INFO *blk_info, |
| uint8_t *y0, uint8_t *y1, int srcstr) { |
| int stride = mfstr; |
| DB_MV *mf_start = mf + AVG_MF_BORDER * stride + AVG_MF_BORDER; |
| int idx; |
| |
| int invalid_cnt = 0; |
| // isValid = 0: not valid; 1: valid. |
| int *isValid = aom_calloc(height * width / 4 / 4, sizeof(int)); |
| |
| for (int h = 0; h < height / 4; h++) { |
| for (int w = 0; w < width / 4; w++) { |
| idx = h * width / 4 + w; |
| if (mv_left[idx].as_int == INVALID_MV && |
| mv_right[idx].as_int == INVALID_MV) { |
| isValid[idx] = 0; |
| } else { |
| isValid[idx] = 1; |
| } |
| } |
| } |
| DB_MV avg_mv, search_mv; |
| int starth, startw; |
| // Do motion search for every 16x16 blocks if too many invalid mvs |
| for (int hb = 0; hb < height / 16; hb++) { |
| for (int wb = 0; wb < width / 16; wb++) { |
| // get how many invalid mvs are there in this 16x16 block |
| invalid_cnt = 0; |
| for (int h = 0; h < 16 / 4; h++) { |
| for (int w = 0; w < 16 / 4; w++) { |
| if (isValid[(hb * 4 + h) * width / 4 + wb * 4 + w]) continue; |
| invalid_cnt++; |
| } |
| } |
| if (invalid_cnt < 16) continue; |
| |
| // first get the avg mv as the initial mv |
| avg_mv.row = 0; |
| avg_mv.col = 0; |
| int mvwid_16 = 16 * mvwid / width; |
| int mvhgt_16 = 16 * mvhgt / height; |
| for (int h = 0; h < mvhgt_16; h++) { |
| for (int w = 0; w < mvwid_16; w++) { |
| avg_mv.row += |
| mf_start[(hb * mvhgt_16 + h) * mfstr + wb * mvwid_16 + w].row; |
| avg_mv.col += |
| mf_start[(hb * mvhgt_16 + h) * mfstr + wb * mvwid_16 + w].col; |
| } |
| } |
| avg_mv.row /= (mvwid_16 * mvhgt_16); |
| avg_mv.col /= (mvwid_16 * mvhgt_16); |
| avg_mv.row *= (height / mvhgt); |
| avg_mv.col *= (width / mvwid); |
| |
| // printf("\n(%.2f, %.2f): ", avg_mv.row, avg_mv.col); |
| |
| starth = blk_info->starth + hb * 16; |
| startw = blk_info->startw + wb * 16; |
| search_mv = bi_direction_motion_search(y0, y1, srcstr, starth, startw, |
| avg_mv, dstpos, width, height); |
| // printf(" %d %d ", starth, startw); |
| // printf(" (%.2f, %.2f): ", search_mv.row, search_mv.col); |
| for (int h = 0; h < mvhgt_16; h++) { |
| for (int w = 0; w < mvwid_16; w++) { |
| mf_start[(hb * mvhgt_16 + h) * mfstr + wb * mvwid_16 + w].row = |
| search_mv.row / (height / mvhgt); |
| mf_start[(hb * mvhgt_16 + h) * mfstr + wb * mvwid_16 + w].col = |
| search_mv.col / (width / mvwid); |
| } |
| } |
| } |
| } |
| |
| aom_free(isValid); |
| // Pad the motion field border |
| pad_motion_field_border(mf_start, mvwid, mvhgt, mfstr); |
| } |
| |
| void opfl_fill_mv(int_mv *pmv, int width, int height) { |
| int invalid_cnt = 0; |
| int_mv *tempmv = aom_calloc(width * height, sizeof(int_mv)); |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| if (pmv[i * width + j].as_int == INVALID_MV) { |
| invalid_cnt++; |
| } |
| tempmv[i * width + j].as_int = INVALID_MV; |
| } |
| } |
| int_mv avg; |
| int avgcnt; |
| while (invalid_cnt > 0 && invalid_cnt != width * height) { |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| if (pmv[i * width + j].as_int == INVALID_MV) { |
| avgcnt = 0; |
| avg.as_int = 0; |
| for (int h = -1; h < 2; h++) { |
| for (int w = -1; w < 2; w++) { |
| if (i + h >= 0 && i + h < height && j + w >= 0 && j + w < width) { |
| if (pmv[(i + h) * width + j + w].as_int != INVALID_MV) { |
| avg.as_mv.col += pmv[(i + h) * width + j + w].as_mv.col; |
| avg.as_mv.row += pmv[(i + h) * width + j + w].as_mv.row; |
| avgcnt++; |
| } |
| } |
| } |
| } |
| if (avgcnt != 0) { |
| tempmv[i * width + j].as_mv.col = |
| opfl_round_double_2_int((double)avg.as_mv.col / avgcnt); |
| tempmv[i * width + j].as_mv.row = |
| opfl_round_double_2_int((double)avg.as_mv.row / avgcnt); |
| } |
| } |
| } |
| } // for every mv |
| invalid_cnt = 0; |
| for (int i = 0; i < height; i++) { |
| for (int j = 0; j < width; j++) { |
| if (pmv[i * width + j].as_int == INVALID_MV && |
| tempmv[i * width + j].as_int != INVALID_MV) { |
| pmv[i * width + j].as_int = tempmv[i * width + j].as_int; |
| } else if (pmv[i * width + j].as_int == INVALID_MV) { |
| invalid_cnt++; |
| } |
| } |
| } |
| } |
| aom_free(tempmv); |
| } |
| /* |
| * Upscale the motion field by 2. |
| * Currently simply copy the nearest mv |
| * |
| * Input: |
| * src: the source motion field |
| * srcw, srch, srcs: source width, height, stride |
| * dsts: destination mf stride |
| * |
| * Output; |
| * dst: pointer to the upscaled mf buffer |
| */ |
| void upscale_mv_by_2(DB_MV *src, int srcw, int srch, int srcs, DB_MV *dst, |
| int dsts) { |
| for (int i = 0; i < srch; i++) { |
| for (int j = 0; j < srcw; j++) { |
| for (int y = 0; y < 2; y++) { |
| for (int x = 0; x < 2; x++) { |
| dst[(i * 2 + y) * dsts + j * 2 + x].row = src[i * srcs + j].row * 2; |
| dst[(i * 2 + y) * dsts + j * 2 + x].col = src[i * srcs + j].col * 2; |
| } |
| } |
| } |
| } |
| pad_motion_field_border(dst, srcw * 2, srch * 2, dsts); |
| } |
| |
| /* |
| * Pad the motion field border to prepare for median filter |
| */ |
| void pad_motion_field_border(DB_MV *mf_start, int width, int height, |
| int stride) { |
| assert(stride == width + 2 * AVG_MF_BORDER); |
| // upper |
| for (int i = -AVG_MF_BORDER; i < 0; i++) { |
| memcpy(mf_start + i * stride, mf_start, sizeof(DB_MV) * width); |
| } |
| // lower |
| for (int i = height; i < height + AVG_MF_BORDER; i++) { |
| memcpy(mf_start + i * stride, mf_start + (height - 1) * stride, |
| sizeof(DB_MV) * width); |
| } |
| // left |
| for (int i = -AVG_MF_BORDER; i < height + AVG_MF_BORDER; i++) { |
| for (int j = -AVG_MF_BORDER; j < 0; j++) { |
| mf_start[i * stride + j] = mf_start[i * stride]; |
| } |
| } |
| // right |
| for (int i = -AVG_MF_BORDER; i < height + AVG_MF_BORDER; i++) { |
| for (int j = width; j < width + AVG_MF_BORDER; j++) { |
| mf_start[i * stride + j] = mf_start[i * stride + width - 1]; |
| } |
| } |
| } |
| |
| /* |
| * Median filter double arrays iteratively |
| */ |
| double iter_median_double(double *x, double *left, double *right, int length, |
| int mididx) { |
| int pivot = length / 2; |
| int ll = 0, rl = 0; |
| for (int i = 0; i < length; i++) { |
| if (i == pivot) continue; |
| if (x[i] <= x[pivot]) { |
| left[ll] = x[i]; |
| ll++; |
| } else { |
| right[rl] = x[i]; |
| rl++; |
| } |
| } |
| if (mididx == ll) |
| return x[pivot]; |
| else if (mididx < ll) |
| return iter_median_double(left, x, right, ll, mididx); |
| else |
| return iter_median_double(right, left, x, rl, mididx - ll - 1); |
| } |
| |
| /* |
| * Do mode filter to the reference selections |
| */ |
| int ref_mode_filter_3x3(int *center, int stride, double dstpos) { |
| int ref_id_count[3] = { 0 }; |
| for (int i = -1; i < 2; i++) { |
| for (int j = -1; j < 2; j++) { |
| assert(center[i * stride + j] >= 0); |
| ref_id_count[center[i * stride + j]]++; |
| } |
| } |
| if (ref_id_count[2] >= ref_id_count[1] && |
| ref_id_count[2] >= ref_id_count[0]) { |
| return 2; |
| } else if (ref_id_count[1] == ref_id_count[0]) { |
| return ((dstpos <= 0.5) ? 0 : 1); |
| } else { |
| return ((ref_id_count[0] > ref_id_count[1]) ? 0 : 1); |
| } |
| } |
| |
| /* |
| * Write YUV for debug purpose |
| */ |
| int write_image_opfl(const YV12_BUFFER_CONFIG *const ref_buf, char *file_name) { |
| int h; |
| FILE *f_ref = NULL; |
| |
| if (ref_buf == NULL) { |
| printf("Frame data buffer is NULL.\n"); |
| return AOM_CODEC_MEM_ERROR; |
| } |
| if ((f_ref = fopen(file_name, "ab")) == NULL) { |
| printf("Unable to open file %s to write.\n", file_name); |
| return AOM_CODEC_MEM_ERROR; |
| } |
| // --- Y --- |
| for (h = 0; h < ref_buf->y_height; ++h) { |
| fwrite(&ref_buf->y_buffer[h * ref_buf->y_stride], 1, ref_buf->y_width, |
| f_ref); |
| } |
| // --- U --- |
| for (h = 0; h < (ref_buf->uv_height); ++h) { |
| fwrite(&ref_buf->u_buffer[h * ref_buf->uv_stride], 1, ref_buf->uv_width, |
| f_ref); |
| } |
| // --- V --- |
| for (h = 0; h < (ref_buf->uv_height); ++h) { |
| fwrite(&ref_buf->v_buffer[h * ref_buf->uv_stride], 1, ref_buf->uv_width, |
| f_ref); |
| } |
| fclose(f_ref); |
| return AOM_CODEC_OK; |
| } |
| |
| /* |
| * Extend pixel planes. This is an exact copy of the original extend_plane |
| * function. |
| */ |
| void extend_plane_opfl(uint8_t *const src, int src_stride, int width, |
| int height, int extend_top, int extend_left, |
| int extend_bottom, int extend_right) { |
| int i; |
| const int linesize = extend_left + extend_right + width; |
| |
| /* copy the left and right most columns out */ |
| uint8_t *src_ptr1 = src; |
| uint8_t *src_ptr2 = src + width - 1; |
| uint8_t *dst_ptr1 = src - extend_left; |
| uint8_t *dst_ptr2 = src + width; |
| |
| for (i = 0; i < height; ++i) { |
| memset(dst_ptr1, src_ptr1[0], extend_left); |
| memset(dst_ptr2, src_ptr2[0], extend_right); |
| src_ptr1 += src_stride; |
| src_ptr2 += src_stride; |
| dst_ptr1 += src_stride; |
| dst_ptr2 += src_stride; |
| } |
| |
| /* Now copy the top and bottom lines into each line of the respective |
| * borders |
| */ |
| src_ptr1 = src - extend_left; |
| src_ptr2 = src + src_stride * (height - 1) - extend_left; |
| dst_ptr1 = src + src_stride * -extend_top - extend_left; |
| dst_ptr2 = src + src_stride * height - extend_left; |
| |
| for (i = 0; i < extend_top; ++i) { |
| memcpy(dst_ptr1, src_ptr1, linesize); |
| dst_ptr1 += src_stride; |
| } |
| |
| for (i = 0; i < extend_bottom; ++i) { |
| memcpy(dst_ptr2, src_ptr2, linesize); |
| dst_ptr2 += src_stride; |
| } |
| } |
| |
| int opfl_round_double_2_int(double x) { |
| if (x >= 0) { |
| return (int)(x + 0.5); |
| } else { |
| return (int)(x - 0.5); |
| } |
| } |
| int opfl_floor_double_2_int(double x) { |
| if (x >= 0) { |
| return (int)x; |
| } else { |
| return (int)x - 1; |
| } |
| } |
| int opfl_ceil_double_2_int(double x) { |
| if (x >= 0) { |
| return -opfl_floor_double_2_int(-x); |
| } else { |
| return (int)x; |
| } |
| } |
| |
| void opfl_extend_frame_mf(OPFL_BUFFER_STRUCT *buf_struct, |
| OPFL_BLK_INFO blk_info) { |
| DB_MV *mf_start = buf_struct->mf_frame_start; |
| int str = buf_struct->mf_frame_stride; |
| int starth = blk_info.starth, startw = blk_info.startw; |
| int height = blk_info.blk_height, width = blk_info.blk_width; |
| int endh = starth + height, endw = startw + width; |
| int f_height = buf_struct->ref0_buf[0]->y_height; |
| int f_width = buf_struct->ref0_buf[0]->y_width; |
| // upper bound |
| if (blk_info.upbound && starth == 0) { |
| for (int i = -OPFL_MF_FRAME_BORDER; i < 0; i++) { |
| for (int j = startw; j < endw; j++) { |
| mf_start[i * str + j] = mf_start[j]; |
| } |
| } |
| } |
| // lower bound |
| if (blk_info.lowerbound && endh == f_height) { |
| for (int i = endh; i < endh + OPFL_MF_FRAME_BORDER; i++) { |
| for (int j = startw; j < endw; j++) { |
| mf_start[i * str + j] = mf_start[(endh - 1) * str + j]; |
| } |
| } |
| } |
| // left bound |
| if (blk_info.leftbound && startw == 0) { |
| for (int i = starth; i < endh; i++) { |
| for (int j = -OPFL_MF_FRAME_BORDER; j < 0; j++) { |
| mf_start[i * str + j] = mf_start[i * str]; |
| } |
| } |
| } |
| // right bound |
| if (blk_info.rightbound && endw == f_width) { |
| for (int i = starth; i < endh; i++) { |
| for (int j = endw; j < endw + OPFL_MF_FRAME_BORDER; j++) { |
| mf_start[i * str + j] = mf_start[i * str + endw - 1]; |
| } |
| } |
| } |
| // check corners |
| if (blk_info.upbound && blk_info.leftbound && starth == 0 && startw == 0) { |
| for (int i = -OPFL_MF_FRAME_BORDER; i < 0; i++) { |
| for (int j = -OPFL_MF_FRAME_BORDER; j < 0; j++) { |
| mf_start[i * str + j] = mf_start[0]; |
| } |
| } |
| } |
| if (blk_info.upbound && blk_info.rightbound && starth == 0 && |
| endw == f_width) { |
| for (int i = -OPFL_MF_FRAME_BORDER; i < 0; i++) { |
| for (int j = endw; j < endw + OPFL_MF_FRAME_BORDER; j++) { |
| mf_start[i * str + j] = mf_start[endw - 1]; |
| } |
| } |
| } |
| if (blk_info.lowerbound && blk_info.leftbound && endh == f_height && |
| startw == 0) { |
| for (int i = endh; i < endh + OPFL_MF_FRAME_BORDER; i++) { |
| for (int j = -OPFL_MF_FRAME_BORDER; j < 0; j++) { |
| mf_start[i * str + j] = mf_start[(endh - 1) * str]; |
| } |
| } |
| } |
| if (blk_info.lowerbound && blk_info.rightbound && endh == f_height && |
| endw == f_width) { |
| for (int i = endh; i < endh + OPFL_MF_FRAME_BORDER; i++) { |
| for (int j = endw; j < endw + OPFL_MF_FRAME_BORDER; j++) { |
| mf_start[i * str + j] = mf_start[(endh - 1) * str + endw - 1]; |
| } |
| } |
| } |
| } |
| #endif // CONFIG_OPFL |