Reorder functions in mcomp.[c|h] Change-Id: I5a27005d9b96af2f6bb9de874fb8523e93c45b7c
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index a46d228..c447834 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -56,6 +56,7 @@ #include "av1/encoder/ethread.h" #include "av1/encoder/extend.h" #include "av1/encoder/ml.h" +#include "av1/encoder/motion_search_facade.h" #include "av1/encoder/partition_strategy.h" #if !CONFIG_REALTIME_ONLY #include "av1/encoder/partition_model_weights.h"
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c index b18f7a4..84a21b7 100644 --- a/av1/encoder/mcomp.c +++ b/av1/encoder/mcomp.c
@@ -19,7 +19,6 @@ #include "aom_dsp/aom_dsp_common.h" #include "aom_mem/aom_mem.h" #include "aom_ports/mem.h" -#include "aom_ports/system_state.h" #include "av1/common/common.h" #include "av1/common/mvref_common.h" @@ -29,7 +28,6 @@ #include "av1/encoder/encoder.h" #include "av1/encoder/encodemv.h" #include "av1/encoder/mcomp.h" -#include "av1/encoder/partition_strategy.h" #include "av1/encoder/rdopt.h" #include "av1/encoder/reconinter_enc.h" @@ -124,6 +122,10 @@ return sr; } +// ============================================================================ +// Cost of motion vectors +// ============================================================================ + // Returns the rate of encoding the current motion vector based on the // joint_cost and comp_cost. joint_costs covers the cost of transmitting // JOINT_MV, and comp_cost covers the cost of transmitting the actual motion @@ -209,6 +211,13 @@ } } +// ============================================================================= +// Fullpixel Motion Search: Translational +// ============================================================================= +#define MAX_PATTERN_SCALES 11 +#define MAX_PATTERN_CANDIDATES 8 // max number of candidates per scale +#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates + void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) { int ss_count = 0; int stage_index = MAX_MVSEARCH_STEPS - 1; @@ -326,1033 +335,7 @@ cfg->ss_count = ss_count; } -/* - * To avoid the penalty for crossing cache-line read, preload the reference - * area in a small buffer, which is aligned to make sure there won't be crossing - * cache-line read while reading from this buffer. This reduced the cpu - * cycles spent on reading ref data in sub-pixel filter functions. - * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x - * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we - * could reduce the area. - */ - -// convert motion vector component to offset for sv[a]f calc -static INLINE int sp(int x) { return x & 7; } - -static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { - const int offset = (r >> 3) * stride + (c >> 3); - return buf + offset; -} - -#define UNPACK_VAR_PARAMS(var_params) \ - const aom_variance_fn_ptr_t *vfp = (var_params)->vfp; \ - const SUBPEL_SEARCH_TYPE subpel_search_type = \ - (var_params)->subpel_search_type; \ - const uint8_t *second_pred = (var_params)->second_pred; \ - const uint8_t *mask = (var_params)->mask; \ - const int mask_stride = (var_params)->mask_stride; \ - const int invert_mask = (var_params)->invert_mask; \ - const int w = (var_params)->w; \ - const int h = (var_params)->h; - -static INLINE int estimated_pref_error( - const MV *this_mv, const uint8_t *src, const int src_stride, - const uint8_t *ref, int ref_stride, - const SUBPEL_SEARCH_VAR_PARAMS *var_params, unsigned int *sse) { - UNPACK_VAR_PARAMS(var_params); - (void)subpel_search_type; - (void)w; - (void)h; - const int r = this_mv->row; - const int c = this_mv->col; - if (second_pred == NULL) { - return vfp->svf(pre(ref, ref_stride, r, c), ref_stride, sp(c), sp(r), src, - src_stride, sse); - } else if (mask) { - return vfp->msvf(pre(ref, ref_stride, r, c), ref_stride, sp(c), sp(r), src, - src_stride, second_pred, mask, mask_stride, invert_mask, - sse); - } else { - return vfp->svaf(pre(ref, ref_stride, r, c), ref_stride, sp(c), sp(r), src, - src_stride, sse, second_pred); - } -} - -static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *cm, - const MV *this_mv, const uint8_t *src, - int src_stride, const uint8_t *ref, - int ref_stride, - const SUBPEL_SEARCH_VAR_PARAMS *var_params, - unsigned int *sse) { - UNPACK_VAR_PARAMS(var_params); - const int mi_row = xd->mi_row; - const int mi_col = xd->mi_col; - ref = pre(ref, ref_stride, this_mv->row, this_mv->col); - const int subpel_x_q3 = sp(this_mv->col); - const int subpel_y_q3 = sp(this_mv->row); - unsigned int besterr; -#if CONFIG_AV1_HIGHBITDEPTH - if (is_cur_buf_hbd(xd)) { - DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); - uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred16); - if (second_pred != NULL) { - if (mask) { - aom_highbd_comp_mask_upsampled_pred( - xd, cm, mi_row, mi_col, this_mv, pred8, second_pred, w, h, - subpel_x_q3, subpel_y_q3, ref, ref_stride, mask, mask_stride, - invert_mask, xd->bd, subpel_search_type); - } else { - aom_highbd_comp_avg_upsampled_pred( - xd, cm, mi_row, mi_col, this_mv, pred8, second_pred, w, h, - subpel_x_q3, subpel_y_q3, ref, ref_stride, xd->bd, - subpel_search_type); - } - } else { - aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred8, w, h, - subpel_x_q3, subpel_y_q3, ref, ref_stride, - xd->bd, subpel_search_type); - } - besterr = vfp->vf(pred8, w, src, src_stride, sse); - } else { - DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); - if (second_pred != NULL) { - if (mask) { - aom_comp_mask_upsampled_pred( - xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h, - subpel_x_q3, subpel_y_q3, ref, ref_stride, mask, mask_stride, - invert_mask, subpel_search_type); - } else { - aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, - second_pred, w, h, subpel_x_q3, subpel_y_q3, - ref, ref_stride, subpel_search_type); - } - } else { - aom_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h, - subpel_x_q3, subpel_y_q3, ref, ref_stride, - subpel_search_type); - } - - besterr = vfp->vf(pred, w, src, src_stride, sse); - } -#else - DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); - if (second_pred != NULL) { - if (mask) { - aom_comp_mask_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, - second_pred, w, h, subpel_x_q3, subpel_y_q3, - ref, ref_stride, mask, mask_stride, - invert_mask, subpel_search_type); - } else { - aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, - second_pred, w, h, subpel_x_q3, subpel_y_q3, - ref, ref_stride, subpel_search_type); - } - } else { - aom_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h, subpel_x_q3, - subpel_y_q3, ref, ref_stride, subpel_search_type); - } - - besterr = vfp->vf(pred, w, src, src_stride, sse); -#endif - return besterr; -} - -static INLINE unsigned int check_better_fast( - const MV *this_mv, MV *best_mv, const SubpelMvLimits *mv_limits, - const uint8_t *const src, const int src_stride, const uint8_t *const ref, - int ref_stride, const SUBPEL_SEARCH_VAR_PARAMS *var_params, - const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, - unsigned int *sse1, int *distortion) { - unsigned int cost; - if (av1_is_subpelmv_in_range(mv_limits, *this_mv)) { - unsigned int sse; - int thismse = estimated_pref_error(this_mv, src, src_stride, ref, - ref_stride, var_params, &sse); - cost = mv_err_cost_(this_mv, mv_cost_params); - cost += thismse; - - if (cost < *besterr) { - *besterr = cost; - *best_mv = *this_mv; - *distortion = thismse; - *sse1 = sse; - } - } else { - cost = INT_MAX; - } - return cost; -} - -static AOM_FORCE_INLINE unsigned int check_better( - MACROBLOCKD *xd, const AV1_COMMON *cm, const MV *this_mv, MV *best_mv, - const SubpelMvLimits *mv_limits, const uint8_t *const src, - const int src_stride, const uint8_t *const ref, int ref_stride, - const SUBPEL_SEARCH_VAR_PARAMS *var_params, - const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, - unsigned int *sse1, int *distortion, int *is_better) { - unsigned int cost; - if (av1_is_subpelmv_in_range(mv_limits, *this_mv)) { - unsigned int sse; - int thismse; - if (var_params->subpel_search_type != USE_2_TAPS_ORIG) { - thismse = upsampled_pref_error(xd, cm, this_mv, src, src_stride, ref, - ref_stride, var_params, &sse); - } else { - thismse = estimated_pref_error(this_mv, src, src_stride, ref, ref_stride, - var_params, &sse); - } - cost = mv_err_cost_(this_mv, mv_cost_params); - cost += thismse; - if (cost < *besterr) { - *besterr = cost; - *best_mv = *this_mv; - *distortion = thismse; - *sse1 = sse; - *is_better |= 1; - } - } else { - cost = INT_MAX; - } - return cost; -} - -static AOM_FORCE_INLINE int first_level_check_fast( - const MV *this_mv, MV *best_mv, int hstep, const SubpelMvLimits *mv_limits, - const uint8_t *const src, const int src_stride, const uint8_t *const ref, - int ref_stride, const SUBPEL_SEARCH_VAR_PARAMS *var_params, - const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, - unsigned int *sse1, int *distortion) { - // Check the four cardinal directions - const MV left_mv = { this_mv->row, this_mv->col - hstep }; - const unsigned int left = check_better_fast( - &left_mv, best_mv, mv_limits, src, src_stride, ref, ref_stride, - var_params, mv_cost_params, besterr, sse1, distortion); - - const MV right_mv = { this_mv->row, this_mv->col + hstep }; - const unsigned int right = check_better_fast( - &right_mv, best_mv, mv_limits, src, src_stride, ref, ref_stride, - var_params, mv_cost_params, besterr, sse1, distortion); - - const MV top_mv = { this_mv->row - hstep, this_mv->col }; - const unsigned int up = check_better_fast( - &top_mv, best_mv, mv_limits, src, src_stride, ref, ref_stride, var_params, - mv_cost_params, besterr, sse1, distortion); - - const MV bottom_mv = { this_mv->row + hstep, this_mv->col }; - const unsigned int down = check_better_fast( - &bottom_mv, best_mv, mv_limits, src, src_stride, ref, ref_stride, - var_params, mv_cost_params, besterr, sse1, distortion); - - // Check the diagonal direction with the best mv - const int whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - switch (whichdir) { - case 0: { - const MV top_left_mv = { this_mv->row - hstep, this_mv->col - hstep }; - check_better_fast(&top_left_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - break; - } - case 1: { - const MV top_right_mv = { this_mv->row - hstep, this_mv->col + hstep }; - check_better_fast(&top_right_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - break; - } - case 2: { - const MV bottom_left_mv = { this_mv->row + hstep, this_mv->col - hstep }; - check_better_fast(&bottom_left_mv, best_mv, mv_limits, src, src_stride, - ref, ref_stride, var_params, mv_cost_params, besterr, - sse1, distortion); - break; - } - case 3: { - const MV bottom_right_mv = { this_mv->row + hstep, this_mv->col + hstep }; - check_better_fast(&bottom_right_mv, best_mv, mv_limits, src, src_stride, - ref, ref_stride, var_params, mv_cost_params, besterr, - sse1, distortion); - break; - } - } - return whichdir; -} - -static AOM_FORCE_INLINE void second_level_check_fast( - const MV *this_mv, MV *best_mv, int hstep, const SubpelMvLimits *mv_limits, - const uint8_t *const src, const int src_stride, const uint8_t *const ref, - int ref_stride, const SUBPEL_SEARCH_VAR_PARAMS *var_params, - const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, - unsigned int *sse1, int *distortion, int whichdir) { - const int tr = this_mv->row; - const int tc = this_mv->col; - const int br = best_mv->row; - const int bc = best_mv->col; - if (tr != br && tc != bc) { - const int kr = br - tr; - const int kc = bc - tc; - - const MV chess_mv_1 = { tr + kr, tc + 2 * kc }; - check_better_fast(&chess_mv_1, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - - const MV chess_mv_2 = { tr + 2 * kr, tc + kc }; - check_better_fast(&chess_mv_2, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - } else if (tr == br && tc != bc) { - const int kc = bc - tc; - const MV bottom_long_mv = { tr + hstep, tc + 2 * kc }; - check_better_fast(&bottom_long_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - const MV top_long_mv = { tr - hstep, tc + 2 * kc }; - check_better_fast(&top_long_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - - switch (whichdir) { - case 0: - case 1: { - const MV bottom_mv = { tr + hstep, tc + kc }; - check_better_fast(&bottom_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - break; - } - case 2: - case 3: { - const MV top_mv = { tr - hstep, tc + kc }; - check_better_fast(&top_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - break; - } - } - } else if (tr != br && tc == bc) { - const int kr = br - tr; - const MV right_long_mv = { tr + 2 * kr, tc + hstep }; - check_better_fast(&right_long_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - const MV left_long_mv = { tr + 2 * kr, tc - hstep }; - check_better_fast(&left_long_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - - switch (whichdir) { - case 0: - case 2: { - const MV right_mv = { tr + kr, tc + hstep }; - check_better_fast(&right_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - break; - } - case 1: - case 3: { - const MV left_mv = { tr + kr, tc - hstep }; - check_better_fast(&left_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion); - } - } - } -} - -static AOM_FORCE_INLINE void two_level_checks_fast( - const MV *this_mv, MV *best_mv, int hstep, const SubpelMvLimits *mv_limits, - const uint8_t *const src, const int src_stride, const uint8_t *const ref, - int ref_stride, const SUBPEL_SEARCH_VAR_PARAMS *var_params, - const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, - unsigned int *sse1, int *distortion, int iters) { - unsigned int whichdir = first_level_check_fast( - this_mv, best_mv, hstep, mv_limits, src, src_stride, ref, ref_stride, - var_params, mv_cost_params, besterr, sse1, distortion); - if (iters > 1) { - second_level_check_fast(this_mv, best_mv, hstep, mv_limits, src, src_stride, - ref, ref_stride, var_params, mv_cost_params, - besterr, sse1, distortion, whichdir); - } -} - -#define CHECK_BETTER(v, r, c) \ - { \ - const MV this_mv = { (r), (c) }; \ - (v) = check_better_fast(&this_mv, bestmv, &mv_limits, src_address, \ - src_stride, y, y_stride, var_params, \ - mv_cost_params, &besterr, sse1, distortion); \ - } - -#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) - -/* checks if (r, c) has better score than previous best */ -#define CHECK_BETTER1(v, r, c) \ - (v) = check_better(xd, cm, (r), (c), &br, &bc, &mv_limits, src_address, \ - src_stride, y, y_stride, var_params, mv_cost_params, \ - &besterr, sse1, distortion); - -// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of -// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten -// later in the same way. -#define SECOND_LEVEL_CHECKS_BEST(k) \ - { \ - unsigned int second; \ - int br0 = br; \ - int bc0 = bc; \ - assert(tr == br || tc == bc); \ - if (tr == br && tc != bc) { \ - kc = bc - tc; \ - } else if (tr != br && tc == bc) { \ - kr = br - tr; \ - } \ - CHECK_BETTER##k(second, br0 + kr, bc0); \ - CHECK_BETTER##k(second, br0, bc0 + kc); \ - if (br0 != br || bc0 != bc) { \ - CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \ - } \ - (void)second; \ - } - -static unsigned int setup_center_error( - const MACROBLOCKD *xd, const MV *bestmv, const uint8_t *const src, - const int src_stride, const uint8_t *y, int y_stride, - const SUBPEL_SEARCH_VAR_PARAMS *var_params, - const MV_COST_PARAMS *mv_cost_params, unsigned int *sse1, int *distortion) { - UNPACK_VAR_PARAMS(var_params); - (void)subpel_search_type; - unsigned int besterr; - y = pre(y, y_stride, bestmv->row, bestmv->col); - - if (second_pred != NULL) { -#if CONFIG_AV1_HIGHBITDEPTH - if (is_cur_buf_hbd(xd)) { - DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]); - uint8_t *comp_pred = CONVERT_TO_BYTEPTR(comp_pred16); - if (mask) { - aom_highbd_comp_mask_pred(comp_pred, second_pred, w, h, y, y_stride, - mask, mask_stride, invert_mask); - } else { - aom_highbd_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); - } - besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); - } else { - DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); - if (mask) { - aom_comp_mask_pred(comp_pred, second_pred, w, h, y, y_stride, mask, - mask_stride, invert_mask); - } else { - aom_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); - } - besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); - } -#else - (void)xd; - DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); - if (mask) { - aom_comp_mask_pred(comp_pred, second_pred, w, h, y, y_stride, mask, - mask_stride, invert_mask); - } else { - aom_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); - } - besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); -#endif - } else { - besterr = vfp->vf(y, y_stride, src, src_stride, sse1); - } - *distortion = besterr; - besterr += mv_err_cost_(bestmv, mv_cost_params); - return besterr; -} - -static INLINE int divide_and_round(int n, int d) { - return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); -} - -static INLINE int is_cost_list_wellbehaved(const int *cost_list) { - return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] && - cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4]; -} - -// Returns surface minima estimate at given precision in 1/2^n bits. -// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C -// For a given set of costs S0, S1, S2, S3, S4 at points -// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, -// the solution for the location of the minima (x0, y0) is given by: -// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), -// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). -// The code below is an integerized version of that. -static AOM_INLINE void get_cost_surf_min(const int *cost_list, int *ir, int *ic, - int bits) { - *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)), - (cost_list[1] - 2 * cost_list[0] + cost_list[3])); - *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)), - (cost_list[4] - 2 * cost_list[0] + cost_list[2])); -} - -int av1_find_best_sub_pixel_tree_pruned_evenmore( - MACROBLOCK *x, const AV1_COMMON *const cm, - const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, int *distortion, - unsigned int *sse1) { - const int allow_hp = ms_params->allow_hp; - const int forced_stop = ms_params->forced_stop; - const int iters_per_step = ms_params->iters_per_step; - const int do_reset_fractional_mv = ms_params->do_reset_fractional_mv; - const int *cost_list = ms_params->cost_list; - const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; - const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; - const MV *ref_mv = mv_cost_params->ref_mv; - const SUBPEL_SEARCH_TYPE subpel_search_type = - ms_params->var_params.subpel_search_type; - - const uint8_t *const src_address = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - const MACROBLOCKD *xd = &x->e_mbd; - unsigned int besterr = INT_MAX; - const unsigned int halfiters = iters_per_step; - const unsigned int quarteriters = iters_per_step; - const unsigned int eighthiters = iters_per_step; - const uint8_t *const y = xd->plane[0].pre[0].buf; - const int y_stride = xd->plane[0].pre[0].stride; - - convert_fullmv_to_mv(&x->best_mv); - MV *bestmv = &x->best_mv.as_mv; - MV start_mv = *bestmv; - - int hstep = 4; - - SubpelMvLimits mv_limits; - av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, - mv_cost_params->ref_mv); - - besterr = setup_center_error(xd, bestmv, src_address, src_stride, y, y_stride, - var_params, mv_cost_params, sse1, distortion); - (void)halfiters; - (void)quarteriters; - (void)eighthiters; - (void)allow_hp; - (void)forced_stop; - (void)hstep; - (void)cm; - (void)do_reset_fractional_mv; - (void)ref_mv; - (void)subpel_search_type; - - if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && - cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && - cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { - int ir, ic; - get_cost_surf_min(cost_list, &ir, &ic, 2); - if (ir != 0 || ic != 0) { - const MV this_mv = { start_mv.row + 2 * ir, start_mv.col + 2 * ic }; - check_better_fast(&this_mv, bestmv, &mv_limits, src_address, src_stride, - y, y_stride, var_params, mv_cost_params, &besterr, sse1, - distortion); - } - } else { - two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, halfiters); - - // Each subsequent iteration checks at least one point in common with - // the last iteration could be 2 ( if diag selected) 1/4 pel - if (forced_stop != HALF_PEL) { - hstep >>= 1; - start_mv = *bestmv; - two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, quarteriters); - } - } - - if (allow_hp && forced_stop == EIGHTH_PEL) { - hstep >>= 1; - start_mv = *bestmv; - two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, eighthiters); - } - - return besterr; -} - -int av1_find_best_sub_pixel_tree_pruned_more( - MACROBLOCK *x, const AV1_COMMON *const cm, - const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, int *distortion, - unsigned int *sse1) { - const int allow_hp = ms_params->allow_hp; - const int forced_stop = ms_params->forced_stop; - const int iters_per_step = ms_params->iters_per_step; - const int *cost_list = ms_params->cost_list; - const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; - const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; - - const uint8_t *const src_address = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - const MACROBLOCKD *xd = &x->e_mbd; - unsigned int besterr = INT_MAX; - const unsigned int halfiters = iters_per_step; - const unsigned int quarteriters = iters_per_step; - const unsigned int eighthiters = iters_per_step; - const uint8_t *const y = xd->plane[0].pre[0].buf; - const int y_stride = xd->plane[0].pre[0].stride; - - convert_fullmv_to_mv(&x->best_mv); - MV *bestmv = &x->best_mv.as_mv; - MV start_mv = *bestmv; - - int hstep = 4; - - SubpelMvLimits mv_limits; - av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, - mv_cost_params->ref_mv); - - (void)cm; - - besterr = setup_center_error(xd, bestmv, src_address, src_stride, y, y_stride, - var_params, mv_cost_params, sse1, distortion); - if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && - cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && - cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { - int ir, ic; - get_cost_surf_min(cost_list, &ir, &ic, 1); - if (ir != 0 || ic != 0) { - const MV this_mv = { start_mv.row + ir * hstep, - start_mv.col + ic * hstep }; - check_better_fast(&this_mv, bestmv, &mv_limits, src_address, src_stride, - y, y_stride, var_params, mv_cost_params, &besterr, sse1, - distortion); - } - } else { - two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, halfiters); - } - - // Each subsequent iteration checks at least one point in common with - // the last iteration could be 2 ( if diag selected) 1/4 pel - if (forced_stop != HALF_PEL) { - hstep >>= 1; - start_mv = *bestmv; - two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, quarteriters); - } - - if (allow_hp && forced_stop == EIGHTH_PEL) { - hstep >>= 1; - start_mv = *bestmv; - two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, eighthiters); - } - - return besterr; -} - -int av1_find_best_sub_pixel_tree_pruned( - MACROBLOCK *x, const AV1_COMMON *const cm, - const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, int *distortion, - unsigned int *sse1) { - const int allow_hp = ms_params->allow_hp; - const int forced_stop = ms_params->forced_stop; - const int iters_per_step = ms_params->iters_per_step; - const int *cost_list = ms_params->cost_list; - const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; - const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; - - const uint8_t *const src_address = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - const MACROBLOCKD *xd = &x->e_mbd; - unsigned int besterr = INT_MAX; - const unsigned int halfiters = iters_per_step; - const unsigned int quarteriters = iters_per_step; - const unsigned int eighthiters = iters_per_step; - const uint8_t *const y = xd->plane[0].pre[0].buf; - const int y_stride = xd->plane[0].pre[0].stride; - - convert_fullmv_to_mv(&x->best_mv); - MV *bestmv = &x->best_mv.as_mv; - MV start_mv = *bestmv; - - int hstep = 4; - - SubpelMvLimits mv_limits; - av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, - mv_cost_params->ref_mv); - (void)cm; - - besterr = setup_center_error(xd, bestmv, src_address, src_stride, y, y_stride, - var_params, mv_cost_params, sse1, distortion); - if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && - cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && - cost_list[4] != INT_MAX) { - const unsigned int whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + - (cost_list[2] < cost_list[4] ? 0 : 2); - - const MV left_mv = { start_mv.row, start_mv.col - hstep }; - const MV right_mv = { start_mv.row, start_mv.col + hstep }; - const MV bottom_mv = { start_mv.row + hstep, start_mv.col }; - const MV top_mv = { start_mv.row - hstep, start_mv.col }; - - const MV bottom_left_mv = { start_mv.row + hstep, start_mv.col - hstep }; - const MV bottom_right_mv = { start_mv.row + hstep, start_mv.col + hstep }; - const MV top_left_mv = { start_mv.row - hstep, start_mv.col - hstep }; - const MV top_right_mv = { start_mv.row - hstep, start_mv.col + hstep }; - - switch (whichdir) { - case 0: // bottom left quadrant - check_better_fast(&left_mv, bestmv, &mv_limits, src_address, src_stride, - y, y_stride, var_params, mv_cost_params, &besterr, - sse1, distortion); - check_better_fast(&bottom_mv, bestmv, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion); - check_better_fast(&bottom_left_mv, bestmv, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion); - break; - case 1: // bottom right quadrant - check_better_fast(&right_mv, bestmv, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion); - check_better_fast(&bottom_mv, bestmv, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion); - check_better_fast(&bottom_right_mv, bestmv, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion); - break; - case 2: // top left quadrant - check_better_fast(&left_mv, bestmv, &mv_limits, src_address, src_stride, - y, y_stride, var_params, mv_cost_params, &besterr, - sse1, distortion); - check_better_fast(&top_mv, bestmv, &mv_limits, src_address, src_stride, - y, y_stride, var_params, mv_cost_params, &besterr, - sse1, distortion); - check_better_fast(&top_left_mv, bestmv, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion); - break; - case 3: // top right quadrant - check_better_fast(&right_mv, bestmv, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion); - check_better_fast(&top_mv, bestmv, &mv_limits, src_address, src_stride, - y, y_stride, var_params, mv_cost_params, &besterr, - sse1, distortion); - check_better_fast(&top_right_mv, bestmv, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion); - break; - } - } else { - two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, halfiters); - } - - // Each subsequent iteration checks at least one point in common with - // the last iteration could be 2 ( if diag selected) 1/4 pel - if (forced_stop != HALF_PEL) { - hstep >>= 1; - start_mv = *bestmv; - two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, quarteriters); - } - - if (allow_hp && forced_stop == EIGHTH_PEL) { - hstep >>= 1; - start_mv = *bestmv; - two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, eighthiters); - } - - return besterr; -} - -/* clang-format off */ -static const MV search_step_table[12] = { - // left, right, up, down - { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 }, - { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 }, - { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 } -}; -/* clang-format on */ - -static unsigned int upsampled_setup_center_error( - MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV *bestmv, - const uint8_t *const src, const int src_stride, const uint8_t *const y, - int y_stride, const SUBPEL_SEARCH_VAR_PARAMS *var_params, - const MV_COST_PARAMS *mv_cost_params, unsigned int *sse1, int *distortion) { - unsigned int besterr = upsampled_pref_error(xd, cm, bestmv, src, src_stride, - y, y_stride, var_params, sse1); - *distortion = besterr; - besterr += mv_err_cost_(bestmv, mv_cost_params); - return besterr; -} - -static AOM_FORCE_INLINE void second_level_check_v2( - MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV *diag_mv, MV *best_mv, - int kr, int kc, const SubpelMvLimits *mv_limits, const uint8_t *const src, - const int src_stride, const uint8_t *const ref, int ref_stride, - const SUBPEL_SEARCH_VAR_PARAMS *var_params, - const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, - unsigned int *sse1, int *distortion) { - const MV center_mv = *best_mv; - - assert(diag_mv->row == best_mv->row || diag_mv->col == best_mv->col); - if (best_mv->row == diag_mv->row && best_mv->col != diag_mv->col) { - kc = best_mv->col - diag_mv->col; - } else if (best_mv->row != diag_mv->row && best_mv->col == diag_mv->col) { - kr = best_mv->row - diag_mv->row; - } - - const MV row_bias_mv = { center_mv.row + kr, center_mv.col }; - const MV col_bias_mv = { center_mv.row, center_mv.col + kc }; - const MV diag_bias_mv = { center_mv.row + kr, center_mv.col + kc }; - int has_better_mv = 0; - - check_better(xd, cm, &row_bias_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion, &has_better_mv); - check_better(xd, cm, &col_bias_mv, best_mv, mv_limits, src, src_stride, ref, - ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion, &has_better_mv); - - // Do an additional search if the second iteration gives a better mv - if (has_better_mv) { - int dummy = 0; - check_better(xd, cm, &diag_bias_mv, best_mv, mv_limits, src, src_stride, - ref, ref_stride, var_params, mv_cost_params, besterr, sse1, - distortion, &dummy); - } -} - -int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const AV1_COMMON *const cm, - const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, - int *distortion, unsigned int *sse1) { - const int allow_hp = ms_params->allow_hp; - const int forced_stop = ms_params->forced_stop; - const int iters_per_step = ms_params->iters_per_step; - const int do_reset_fractional_mv = ms_params->do_reset_fractional_mv; - const int *cost_list = ms_params->cost_list; - const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; - const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; - const MV *ref_mv = mv_cost_params->ref_mv; - const SUBPEL_SEARCH_TYPE subpel_search_type = - ms_params->var_params.subpel_search_type; - - const uint8_t *const src_address = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - MACROBLOCKD *xd = &x->e_mbd; - unsigned int besterr = INT_MAX; - const int y_stride = xd->plane[0].pre[0].stride; - - const uint8_t *const y = xd->plane[0].pre[0].buf; - convert_fullmv_to_mv(&x->best_mv); - MV *bestmv = &x->best_mv.as_mv; - - int hstep = 4; - int iter, round = FULL_PEL - forced_stop; - const MV *search_step = search_step_table; - int best_idx = -1; - unsigned int cost_array[5]; - int kr, kc; - SubpelMvLimits mv_limits; - - av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, ref_mv); - - if (!allow_hp) - if (round == 3) round = 2; - - if (subpel_search_type != USE_2_TAPS_ORIG) - besterr = upsampled_setup_center_error(xd, cm, bestmv, src_address, - src_stride, y, y_stride, var_params, - mv_cost_params, sse1, distortion); - else - besterr = - setup_center_error(xd, bestmv, src_address, src_stride, y, y_stride, - var_params, mv_cost_params, sse1, distortion); - - (void)cost_list; // to silence compiler warning - - if (do_reset_fractional_mv) { - av1_set_fractional_mv(x->fractional_best_mv); - } - - MV iter_center_mv = *bestmv; - for (iter = 0; iter < round; ++iter) { - if (x->fractional_best_mv[iter].as_mv.row == iter_center_mv.row && - x->fractional_best_mv[iter].as_mv.col == iter_center_mv.col) - return INT_MAX; - - x->fractional_best_mv[iter].as_mv = iter_center_mv; - - MV best_iter_mv = { INT16_MAX, INT16_MAX }; - - // Check vertical and horizontal sub-pixel positions. - for (int idx = 0; idx < 4; ++idx) { - const MV this_mv = { iter_center_mv.row + search_step[idx].row, - iter_center_mv.col + search_step[idx].col }; - - int is_better = 0; - cost_array[idx] = - check_better(xd, cm, &this_mv, &best_iter_mv, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, &is_better); - if (is_better) { - best_idx = idx; - } - } - - // Check diagonal sub-pixel position - kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); - kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); - - const MV diag_mv = { iter_center_mv.row + kr, iter_center_mv.col + kc }; - int is_better = 0; - - cost_array[4] = - check_better(xd, cm, &diag_mv, &best_iter_mv, &mv_limits, src_address, - src_stride, y, y_stride, var_params, mv_cost_params, - &besterr, sse1, distortion, &is_better); - if (is_better) { - best_idx = 4; - } - - if (best_idx != -1) { - iter_center_mv = best_iter_mv; - - if (iters_per_step > 1) { - second_level_check_v2(xd, cm, &diag_mv, &iter_center_mv, kr, kc, - &mv_limits, src_address, src_stride, y, y_stride, - var_params, mv_cost_params, &besterr, sse1, - distortion); - } - } - - search_step += 4; - hstep >>= 1; - best_idx = -1; - } - - *bestmv = iter_center_mv; - - return besterr; -} - -#undef PRE -#undef CHECK_BETTER - -unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x, - BLOCK_SIZE bsize, const MV *this_mv) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - const uint8_t *const src = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - uint8_t *const dst = xd->plane[0].dst.buf; - const int dst_stride = xd->plane[0].dst.stride; - const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize]; - const int_mv ref_mv = av1_get_ref_mv(x, 0); - unsigned int mse; - unsigned int sse; - const int mi_row = xd->mi_row; - const int mi_col = xd->mi_col; - const MV_COST_TYPE mv_cost_type = x->mv_cost_type; - - av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, - AOM_PLANE_Y, AOM_PLANE_Y); - mse = vfp->vf(dst, dst_stride, src, src_stride, &sse); - mse += mv_err_cost(this_mv, &ref_mv.as_mv, x->nmv_vec_cost, - CONVERT_TO_CONST_MVCOST(x->mv_cost_stack), x->errorperbit, - mv_cost_type); - return mse; -} - -// Refine MV in a small range -unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x, - BLOCK_SIZE bsize, int *pts0, int *pts_inref0, - int total_samples) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 }, - { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } }; - const int_mv ref_mv = av1_get_ref_mv(x, 0); - int16_t br = mbmi->mv[0].as_mv.row; - int16_t bc = mbmi->mv[0].as_mv.col; - int16_t *tr = &mbmi->mv[0].as_mv.row; - int16_t *tc = &mbmi->mv[0].as_mv.col; - WarpedMotionParams best_wm_params = mbmi->wm_params; - int best_num_proj_ref = mbmi->num_proj_ref; - unsigned int bestmse; - SubpelMvLimits mv_limits; - - const int start = cm->allow_high_precision_mv ? 0 : 4; - int ite; - - av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv); - - // Calculate the center position's error - assert(av1_is_subpelmv_in_range(&mv_limits, mbmi->mv[0].as_mv)); - bestmse = av1_compute_motion_cost(cpi, x, bsize, &mbmi->mv[0].as_mv); - - // MV search - const int mi_row = xd->mi_row; - const int mi_col = xd->mi_col; - for (ite = 0; ite < 2; ++ite) { - int best_idx = -1; - int idx; - - for (idx = start; idx < start + 4; ++idx) { - unsigned int thismse; - - *tr = br + neighbors[idx].row; - *tc = bc + neighbors[idx].col; - - MV this_mv = { *tr, *tc }; - if (av1_is_subpelmv_in_range(&mv_limits, this_mv)) { - int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; - - memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); - memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); - if (total_samples > 1) - mbmi->num_proj_ref = - av1_selectSamples(&this_mv, pts, pts_inref, total_samples, bsize); - - if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, *tr, - *tc, &mbmi->wm_params, mi_row, mi_col)) { - thismse = av1_compute_motion_cost(cpi, x, bsize, &this_mv); - - if (thismse < bestmse) { - best_idx = idx; - best_wm_params = mbmi->wm_params; - best_num_proj_ref = mbmi->num_proj_ref; - bestmse = thismse; - } - } - } - } - - if (best_idx == -1) break; - - if (best_idx >= 0) { - br += neighbors[best_idx].row; - bc += neighbors[best_idx].col; - } - } - - *tr = br; - *tc = bc; - mbmi->wm_params = best_wm_params; - mbmi->num_proj_ref = best_num_proj_ref; - return bestmse; -} - +// Checks whether the mv is within range of the mv_limits static INLINE int check_bounds(const FullMvLimits *mv_limits, int row, int col, int range) { return ((row - range) >= mv_limits->row_min) & @@ -1361,23 +344,9 @@ ((col + range) <= mv_limits->col_max); } -#define CHECK_BETTER \ - { \ - if (thissad < bestsad) { \ - if (use_mvcost) \ - thissad += mvsad_err_cost(x, &this_mv, &full_ref_mv, sad_per_bit); \ - if (thissad < bestsad) { \ - bestsad = thissad; \ - best_site = i; \ - } \ - } \ - } - -#define MAX_PATTERN_SCALES 11 -#define MAX_PATTERN_CANDIDATES 8 // max number of candidates per scale -#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates - -// Calculate and return a sad+mvcost list around an integer best pel. +// Calculates and returns a sad+mvcost list around an integer best pel during +// fullpixel motion search. The resulting list can be used to speed up subpel +// motion search later. static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *const ref_mv, int sadpb, const aom_variance_fn_ptr_t *fn_ptr, @@ -1475,6 +444,16 @@ } } +#define CHECK_BETTER \ + if (thissad < bestsad) { \ + if (use_mvcost) \ + thissad += mvsad_err_cost(x, &this_mv, &full_ref_mv, sad_per_bit); \ + if (thissad < bestsad) { \ + bestsad = thissad; \ + best_site = i; \ + } \ + } + // Generic pattern search function that searches over multiple scales. // Each scale can have a different number of candidates and shape of // candidates as indicated in the num_candidates and candidates arrays @@ -1732,78 +711,7 @@ x->best_mv.as_mv.col = bc; return bestsad; } - -int av1_get_mvpred_sse(const MACROBLOCK *x, const FULLPEL_MV *best_mv, - const MV *ref_mv, const aom_variance_fn_ptr_t *vfp) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const MV mv = get_mv_from_fullmv(best_mv); - const MV_COST_TYPE mv_cost_type = x->mv_cost_type; - unsigned int sse, var; - - var = vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), - in_what->stride, &sse); - (void)var; - - return sse + mv_err_cost(&mv, ref_mv, x->nmv_vec_cost, - CONVERT_TO_CONST_MVCOST(x->mv_cost_stack), - x->errorperbit, mv_cost_type); -} - -int av1_get_mvpred_var(const MACROBLOCK *x, const FULLPEL_MV *best_mv, - const MV *ref_mv, const aom_variance_fn_ptr_t *vfp) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const MV mv = get_mv_from_fullmv(best_mv); - const MV_COST_TYPE mv_cost_type = x->mv_cost_type; - unsigned int sse, var; - - var = vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), - in_what->stride, &sse); - - return var + mv_err_cost(&mv, ref_mv, x->nmv_vec_cost, - CONVERT_TO_CONST_MVCOST(x->mv_cost_stack), - x->errorperbit, mv_cost_type); -} - -int av1_get_mvpred_av_var(const MACROBLOCK *x, const FULLPEL_MV *best_mv, - const MV *ref_mv, const uint8_t *second_pred, - const aom_variance_fn_ptr_t *vfp, - const struct buf_2d *src, const struct buf_2d *pre) { - const struct buf_2d *const what = src; - const struct buf_2d *const in_what = pre; - const MV mv = get_mv_from_fullmv(best_mv); - const MV_COST_TYPE mv_cost_type = x->mv_cost_type; - unsigned int unused; - - return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, - what->buf, what->stride, &unused, second_pred) + - mv_err_cost(&mv, ref_mv, x->nmv_vec_cost, - CONVERT_TO_CONST_MVCOST(x->mv_cost_stack), x->errorperbit, - mv_cost_type); -} - -int av1_get_mvpred_mask_var(const MACROBLOCK *x, const FULLPEL_MV *best_mv, - const MV *ref_mv, const uint8_t *second_pred, - const uint8_t *mask, int mask_stride, - int invert_mask, const aom_variance_fn_ptr_t *vfp, - const struct buf_2d *src, - const struct buf_2d *pre) { - const struct buf_2d *const what = src; - const struct buf_2d *const in_what = pre; - const MV mv = get_mv_from_fullmv(best_mv); - const MV_COST_TYPE mv_cost_type = x->mv_cost_type; - unsigned int unused; - - return vfp->msvf(what->buf, what->stride, 0, 0, - get_buf_from_mv(in_what, best_mv), in_what->stride, - second_pred, mask, mask_stride, invert_mask, &unused) + - mv_err_cost(&mv, ref_mv, x->nmv_vec_cost, - CONVERT_TO_CONST_MVCOST(x->mv_cost_stack), x->errorperbit, - mv_cost_type); -} +#undef CHECK_BETTER // For the following foo_search, the input arguments are: // x: The struct used to hold a bunch of random configs. @@ -1818,7 +726,7 @@ // speed up subpel search later. // vfp: a function pointer to the simd function so we can compute the cost // efficiently -// ref_mf: the reference mv used to compute the mv cost +// ref_mv: the reference mv used to compute the mv cost int av1_hex_search(MACROBLOCK *x, FULLPEL_MV *start_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp, const MV *ref_mv) { @@ -1953,8 +861,6 @@ sad_per_bit, do_init_search, cost_list, vfp, ref_mv); } -#undef CHECK_BETTER - // Exhaustive motion search around a given centre position with a given // step size. static int exhuastive_mesh_search(MACROBLOCK *x, FULLPEL_MV *ref_mv, @@ -2400,212 +1306,6 @@ return best_sad; } -static int vector_match(int16_t *ref, int16_t *src, int bwl) { - int best_sad = INT_MAX; - int this_sad; - int d; - int center, offset = 0; - int bw = 4 << bwl; // redundant variable, to be changed in the experiments. - for (d = 0; d <= bw; d += 16) { - this_sad = aom_vector_var(&ref[d], src, bwl); - if (this_sad < best_sad) { - best_sad = this_sad; - offset = d; - } - } - center = offset; - - for (d = -8; d <= 8; d += 16) { - int this_pos = offset + d; - // check limit - if (this_pos < 0 || this_pos > bw) continue; - this_sad = aom_vector_var(&ref[this_pos], src, bwl); - if (this_sad < best_sad) { - best_sad = this_sad; - center = this_pos; - } - } - offset = center; - - for (d = -4; d <= 4; d += 8) { - int this_pos = offset + d; - // check limit - if (this_pos < 0 || this_pos > bw) continue; - this_sad = aom_vector_var(&ref[this_pos], src, bwl); - if (this_sad < best_sad) { - best_sad = this_sad; - center = this_pos; - } - } - offset = center; - - for (d = -2; d <= 2; d += 4) { - int this_pos = offset + d; - // check limit - if (this_pos < 0 || this_pos > bw) continue; - this_sad = aom_vector_var(&ref[this_pos], src, bwl); - if (this_sad < best_sad) { - best_sad = this_sad; - center = this_pos; - } - } - offset = center; - - for (d = -1; d <= 1; d += 2) { - int this_pos = offset + d; - // check limit - if (this_pos < 0 || this_pos > bw) continue; - this_sad = aom_vector_var(&ref[this_pos], src, bwl); - if (this_sad < best_sad) { - best_sad = this_sad; - center = this_pos; - } - } - - return (center - (bw >> 1)); -} - -static const MV search_pos[4] = { - { -1, 0 }, - { 0, -1 }, - { 0, 1 }, - { 1, 0 }, -}; - -unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int mi_row, - int mi_col, const MV *ref_mv) { - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mi = xd->mi[0]; - struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } }; - DECLARE_ALIGNED(16, int16_t, hbuf[256]); - DECLARE_ALIGNED(16, int16_t, vbuf[256]); - DECLARE_ALIGNED(16, int16_t, src_hbuf[128]); - DECLARE_ALIGNED(16, int16_t, src_vbuf[128]); - int idx; - const int bw = 4 << mi_size_wide_log2[bsize]; - const int bh = 4 << mi_size_high_log2[bsize]; - const int search_width = bw << 1; - const int search_height = bh << 1; - const int src_stride = x->plane[0].src.stride; - const int ref_stride = xd->plane[0].pre[0].stride; - uint8_t const *ref_buf, *src_buf; - int_mv *best_int_mv = &xd->mi[0]->mv[0]; - unsigned int best_sad, tmp_sad, this_sad[4]; - const int norm_factor = 3 + (bw >> 5); - const YV12_BUFFER_CONFIG *scaled_ref_frame = - av1_get_scaled_ref_frame(cpi, mi->ref_frame[0]); - - if (scaled_ref_frame) { - int i; - // Swap out the reference frame for a version that's been scaled to - // match the resolution of the current frame, allowing the existing - // motion search code to be used without additional modifications. - for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; - av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL, - MAX_MB_PLANE); - } - - if (xd->bd != 8) { - unsigned int sad; - best_int_mv->as_fullmv = kZeroFullMv; - sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, - xd->plane[0].pre[0].buf, ref_stride); - - if (scaled_ref_frame) { - int i; - for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; - } - return sad; - } - - // Set up prediction 1-D reference set - ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); - for (idx = 0; idx < search_width; idx += 16) { - aom_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); - ref_buf += 16; - } - - ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; - for (idx = 0; idx < search_height; ++idx) { - vbuf[idx] = aom_int_pro_col(ref_buf, bw) >> norm_factor; - ref_buf += ref_stride; - } - - // Set up src 1-D reference set - for (idx = 0; idx < bw; idx += 16) { - src_buf = x->plane[0].src.buf + idx; - aom_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); - } - - src_buf = x->plane[0].src.buf; - for (idx = 0; idx < bh; ++idx) { - src_vbuf[idx] = aom_int_pro_col(src_buf, bw) >> norm_factor; - src_buf += src_stride; - } - - // Find the best match per 1-D search - best_int_mv->as_fullmv.col = - vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize]); - best_int_mv->as_fullmv.row = - vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize]); - - FULLPEL_MV this_mv = best_int_mv->as_fullmv; - src_buf = x->plane[0].src.buf; - ref_buf = get_buf_from_mv(&xd->plane[0].pre[0], &this_mv); - best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); - - { - const uint8_t *const pos[4] = { - ref_buf - ref_stride, - ref_buf - 1, - ref_buf + 1, - ref_buf + ref_stride, - }; - - cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); - } - - for (idx = 0; idx < 4; ++idx) { - if (this_sad[idx] < best_sad) { - best_sad = this_sad[idx]; - best_int_mv->as_fullmv.row = search_pos[idx].row + this_mv.row; - best_int_mv->as_fullmv.col = search_pos[idx].col + this_mv.col; - } - } - - if (this_sad[0] < this_sad[3]) - this_mv.row -= 1; - else - this_mv.row += 1; - - if (this_sad[1] < this_sad[2]) - this_mv.col -= 1; - else - this_mv.col += 1; - - ref_buf = get_buf_from_mv(&xd->plane[0].pre[0], &this_mv); - - tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); - if (best_sad > tmp_sad) { - best_int_mv->as_fullmv = this_mv; - best_sad = tmp_sad; - } - - convert_fullmv_to_mv(best_int_mv); - - SubpelMvLimits subpel_mv_limits; - av1_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); - clamp_mv(&best_int_mv->as_mv, &subpel_mv_limits); - - if (scaled_ref_frame) { - int i; - for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; - } - - return best_sad; -} - int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, FULLPEL_MV *start_mv, int step_param, int method, int run_mesh_search, int error_per_bit, @@ -2763,291 +1463,9 @@ return var; } -/* returns subpixel variance error function */ -#define DIST(r, c) \ - vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), src_address, mask, \ - &sse) - -/* checks if (r, c) has better score than previous best */ -#define MVC(diff_mv) \ - (unsigned int)(mvcost \ - ? (mv_cost((diff_mv), mvjcost, mvcost) * error_per_bit + \ - 4096) >> \ - 13 \ - : 0) - -#define CHECK_BETTER(v, r, c) \ - { \ - const MV this_mv = { r, c }; \ - if (av1_is_subpelmv_in_range(&mv_limits, this_mv)) { \ - const MV diff_mv = { r - ref_mv->row, c - ref_mv->col }; \ - thismse = (DIST(r, c)); \ - if ((v = MVC(&diff_mv) + thismse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } \ - } - -#undef CHECK_BETTER0 -#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) - -#undef CHECK_BETTER1 -#define CHECK_BETTER1(v, r, c) \ - { \ - const MV this_mv = { r, c }; \ - if (av1_is_subpelmv_in_range(&mv_limits, this_mv)) { \ - thismse = upsampled_obmc_pref_error( \ - xd, cm, &this_mv, mask, vfp, src_address, pre(y, y_stride, r, c), \ - y_stride, sp(c), sp(r), w, h, &sse, subpel_search_type); \ - v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, \ - mv_cost_type); \ - if ((v + thismse) < besterr) { \ - besterr = v + thismse; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } \ - } - -static unsigned int setup_obmc_center_error( - const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc, - const uint8_t *const y, int y_stride, const int *mvjcost, - const int *const mvcost[2], unsigned int *sse1, int *distortion, - MV_COST_TYPE mv_cost_type) { - unsigned int besterr; - besterr = vfp->ovf(y, y_stride, wsrc, mask, sse1); - *distortion = besterr; - besterr += - mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, mv_cost_type); - return besterr; -} - -static int upsampled_obmc_pref_error( - MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV *const mv, - const int32_t *mask, const aom_variance_fn_ptr_t *vfp, - const int32_t *const wsrc, const uint8_t *const y, int y_stride, - int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse, - int subpel_search) { - unsigned int besterr; - - const int mi_row = xd->mi_row; - const int mi_col = xd->mi_col; - DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]); -#if CONFIG_AV1_HIGHBITDEPTH - if (is_cur_buf_hbd(xd)) { - uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred); - aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h, - subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd, - subpel_search); - besterr = vfp->ovf(pred8, w, wsrc, mask, sse); - } else { - aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3, - subpel_y_q3, y, y_stride, subpel_search); - - besterr = vfp->ovf(pred, w, wsrc, mask, sse); - } -#else - aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3, - subpel_y_q3, y, y_stride, subpel_search); - - besterr = vfp->ovf(pred, w, wsrc, mask, sse); -#endif - return besterr; -} - -static unsigned int upsampled_setup_obmc_center_error( - MACROBLOCKD *xd, const AV1_COMMON *const cm, const int32_t *mask, - const MV *bestmv, const MV *ref_mv, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc, - const uint8_t *const y, int y_stride, int w, int h, const int *mvjcost, - const int *const mvcost[2], unsigned int *sse1, int *distortion, - int subpel_search, MV_COST_TYPE mv_cost_type) { - unsigned int besterr = - upsampled_obmc_pref_error(xd, cm, bestmv, mask, vfp, wsrc, y, y_stride, 0, - 0, w, h, sse1, subpel_search); - *distortion = besterr; - besterr += - mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, mv_cost_type); - return besterr; -} - -#define UNPACK_OBMC_MS_PARAMS \ - const int allow_hp = ms_params->allow_hp; \ - const int forced_stop = ms_params->forced_stop; \ - const int iters_per_step = ms_params->iters_per_step; \ - const MV *ref_mv = ms_params->mv_cost_params.ref_mv; \ - const int *mvjcost = ms_params->mv_cost_params.mvjcost; \ - const int *const *mvcost = ms_params->mv_cost_params.mvcost; \ - const int error_per_bit = ms_params->mv_cost_params.error_per_bit; \ - const MV_COST_TYPE mv_cost_type = ms_params->mv_cost_params.mv_cost_type; \ - const aom_variance_fn_ptr_t *vfp = ms_params->var_params.vfp; \ - const SUBPEL_SEARCH_TYPE subpel_search_type = \ - ms_params->var_params.subpel_search_type; \ - const int w = ms_params->var_params.w; \ - const int h = ms_params->var_params.h; - -int av1_find_best_obmc_sub_pixel_tree_up( - MACROBLOCK *x, const AV1_COMMON *const cm, - const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, int *distortion, - unsigned int *sse1) { - UNPACK_OBMC_MS_PARAMS; - const int32_t *wsrc = x->wsrc_buf; - const int32_t *mask = x->mask_buf; - - const int32_t *const src_address = wsrc; - MACROBLOCKD *xd = &x->e_mbd; - struct macroblockd_plane *const pd = &xd->plane[0]; - unsigned int besterr = INT_MAX; - unsigned int sse; - unsigned int thismse; - const int y_stride = pd->pre[0].stride; - const int offset = get_offset_from_mv(&x->best_mv.as_fullmv, y_stride); - const uint8_t *y = pd->pre[0].buf; - convert_fullmv_to_mv(&x->best_mv); - MV *bestmv = &x->best_mv.as_mv; - - int br = bestmv->row; - int bc = bestmv->col; - int hstep = 4; - int iter, round = FULL_PEL - forced_stop; - int tr = br; - int tc = bc; - const MV *search_step = search_step_table; - int idx, best_idx = -1; - unsigned int cost_array[5]; - int kr, kc; - - SubpelMvLimits mv_limits; - - av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, ref_mv); - - if (!allow_hp) - if (round == 3) round = 2; - - if (subpel_search_type != USE_2_TAPS_ORIG) - besterr = upsampled_setup_obmc_center_error( - xd, cm, mask, bestmv, ref_mv, error_per_bit, vfp, src_address, - y + offset, y_stride, w, h, mvjcost, mvcost, sse1, distortion, - subpel_search_type, mv_cost_type); - else - besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp, - src_address, y, y_stride, mvjcost, mvcost, - sse1, distortion, mv_cost_type); - - for (iter = 0; iter < round; ++iter) { - // Check vertical and horizontal sub-pixel positions. - for (idx = 0; idx < 4; ++idx) { - tr = br + search_step[idx].row; - tc = bc + search_step[idx].col; - MV this_mv = { tr, tc }; - if (av1_is_subpelmv_in_range(&mv_limits, this_mv)) { - if (subpel_search_type != USE_2_TAPS_ORIG) { - thismse = upsampled_obmc_pref_error( - xd, cm, &this_mv, mask, vfp, src_address, - pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse, - subpel_search_type); - } else { - thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), - sp(tr), src_address, mask, &sse); - } - - cost_array[idx] = - thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit, mv_cost_type); - if (cost_array[idx] < besterr) { - best_idx = idx; - besterr = cost_array[idx]; - *distortion = thismse; - *sse1 = sse; - } - } else { - cost_array[idx] = INT_MAX; - } - } - - // Check diagonal sub-pixel position - kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); - kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); - - tc = bc + kc; - tr = br + kr; - { - MV this_mv = { tr, tc }; - if (av1_is_subpelmv_in_range(&mv_limits, this_mv)) { - if (subpel_search_type != USE_2_TAPS_ORIG) { - thismse = upsampled_obmc_pref_error( - xd, cm, &this_mv, mask, vfp, src_address, - pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse, - subpel_search_type); - } else { - thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), - sp(tr), src_address, mask, &sse); - } - - cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit, mv_cost_type); - - if (cost_array[4] < besterr) { - best_idx = 4; - besterr = cost_array[4]; - *distortion = thismse; - *sse1 = sse; - } - } else { - cost_array[idx] = INT_MAX; - } - } - - if (best_idx < 4 && best_idx >= 0) { - br += search_step[best_idx].row; - bc += search_step[best_idx].col; - } else if (best_idx == 4) { - br = tr; - bc = tc; - } - - if (iters_per_step > 1 && best_idx != -1) { - if (subpel_search_type != USE_2_TAPS_ORIG) { - SECOND_LEVEL_CHECKS_BEST(1); - } else { - SECOND_LEVEL_CHECKS_BEST(0); - } - } - - tr = br; - tc = bc; - - search_step += 4; - hstep >>= 1; - best_idx = -1; - } - - // These lines insure static analysis doesn't warn that - // tr and tc aren't used after the above point. - (void)tr; - (void)tc; - - bestmv->row = br; - bestmv->col = bc; - - return besterr; -} - -#undef DIST -#undef MVC -#undef CHECK_BETTER - +// ============================================================================= +// Fullpixel Motion Search: OBMC +// ============================================================================= static int get_obmc_mvpred_var(const MACROBLOCK *x, const int32_t *wsrc, const int32_t *mask, const FULLPEL_MV *best_mv, const MV *ref_mv, @@ -3252,10 +1670,1152 @@ } } +static int vector_match(int16_t *ref, int16_t *src, int bwl) { + int best_sad = INT_MAX; + int this_sad; + int d; + int center, offset = 0; + int bw = 4 << bwl; // redundant variable, to be changed in the experiments. + for (d = 0; d <= bw; d += 16) { + this_sad = aom_vector_var(&ref[d], src, bwl); + if (this_sad < best_sad) { + best_sad = this_sad; + offset = d; + } + } + center = offset; + + for (d = -8; d <= 8; d += 16) { + int this_pos = offset + d; + // check limit + if (this_pos < 0 || this_pos > bw) continue; + this_sad = aom_vector_var(&ref[this_pos], src, bwl); + if (this_sad < best_sad) { + best_sad = this_sad; + center = this_pos; + } + } + offset = center; + + for (d = -4; d <= 4; d += 8) { + int this_pos = offset + d; + // check limit + if (this_pos < 0 || this_pos > bw) continue; + this_sad = aom_vector_var(&ref[this_pos], src, bwl); + if (this_sad < best_sad) { + best_sad = this_sad; + center = this_pos; + } + } + offset = center; + + for (d = -2; d <= 2; d += 4) { + int this_pos = offset + d; + // check limit + if (this_pos < 0 || this_pos > bw) continue; + this_sad = aom_vector_var(&ref[this_pos], src, bwl); + if (this_sad < best_sad) { + best_sad = this_sad; + center = this_pos; + } + } + offset = center; + + for (d = -1; d <= 1; d += 2) { + int this_pos = offset + d; + // check limit + if (this_pos < 0 || this_pos > bw) continue; + this_sad = aom_vector_var(&ref[this_pos], src, bwl); + if (this_sad < best_sad) { + best_sad = this_sad; + center = this_pos; + } + } + + return (center - (bw >> 1)); +} + +// A special fast version of motion search used in rt mode +unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, int mi_row, + int mi_col, const MV *ref_mv) { + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mi = xd->mi[0]; + struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } }; + DECLARE_ALIGNED(16, int16_t, hbuf[256]); + DECLARE_ALIGNED(16, int16_t, vbuf[256]); + DECLARE_ALIGNED(16, int16_t, src_hbuf[128]); + DECLARE_ALIGNED(16, int16_t, src_vbuf[128]); + int idx; + const int bw = 4 << mi_size_wide_log2[bsize]; + const int bh = 4 << mi_size_high_log2[bsize]; + const int search_width = bw << 1; + const int search_height = bh << 1; + const int src_stride = x->plane[0].src.stride; + const int ref_stride = xd->plane[0].pre[0].stride; + uint8_t const *ref_buf, *src_buf; + int_mv *best_int_mv = &xd->mi[0]->mv[0]; + unsigned int best_sad, tmp_sad, this_sad[4]; + const int norm_factor = 3 + (bw >> 5); + const YV12_BUFFER_CONFIG *scaled_ref_frame = + av1_get_scaled_ref_frame(cpi, mi->ref_frame[0]); + static const MV search_pos[4] = { + { -1, 0 }, + { 0, -1 }, + { 0, 1 }, + { 1, 0 }, + }; + + if (scaled_ref_frame) { + int i; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; + av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL, + MAX_MB_PLANE); + } + + if (xd->bd != 8) { + unsigned int sad; + best_int_mv->as_fullmv = kZeroFullMv; + sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, + xd->plane[0].pre[0].buf, ref_stride); + + if (scaled_ref_frame) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; + } + return sad; + } + + // Set up prediction 1-D reference set + ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); + for (idx = 0; idx < search_width; idx += 16) { + aom_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); + ref_buf += 16; + } + + ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; + for (idx = 0; idx < search_height; ++idx) { + vbuf[idx] = aom_int_pro_col(ref_buf, bw) >> norm_factor; + ref_buf += ref_stride; + } + + // Set up src 1-D reference set + for (idx = 0; idx < bw; idx += 16) { + src_buf = x->plane[0].src.buf + idx; + aom_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); + } + + src_buf = x->plane[0].src.buf; + for (idx = 0; idx < bh; ++idx) { + src_vbuf[idx] = aom_int_pro_col(src_buf, bw) >> norm_factor; + src_buf += src_stride; + } + + // Find the best match per 1-D search + best_int_mv->as_fullmv.col = + vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize]); + best_int_mv->as_fullmv.row = + vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize]); + + FULLPEL_MV this_mv = best_int_mv->as_fullmv; + src_buf = x->plane[0].src.buf; + ref_buf = get_buf_from_mv(&xd->plane[0].pre[0], &this_mv); + best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); + + { + const uint8_t *const pos[4] = { + ref_buf - ref_stride, + ref_buf - 1, + ref_buf + 1, + ref_buf + ref_stride, + }; + + cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); + } + + for (idx = 0; idx < 4; ++idx) { + if (this_sad[idx] < best_sad) { + best_sad = this_sad[idx]; + best_int_mv->as_fullmv.row = search_pos[idx].row + this_mv.row; + best_int_mv->as_fullmv.col = search_pos[idx].col + this_mv.col; + } + } + + if (this_sad[0] < this_sad[3]) + this_mv.row -= 1; + else + this_mv.row += 1; + + if (this_sad[1] < this_sad[2]) + this_mv.col -= 1; + else + this_mv.col += 1; + + ref_buf = get_buf_from_mv(&xd->plane[0].pre[0], &this_mv); + + tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); + if (best_sad > tmp_sad) { + best_int_mv->as_fullmv = this_mv; + best_sad = tmp_sad; + } + + convert_fullmv_to_mv(best_int_mv); + + SubpelMvLimits subpel_mv_limits; + av1_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); + clamp_mv(&best_int_mv->as_mv, &subpel_mv_limits); + + if (scaled_ref_frame) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; + } + + return best_sad; +} + +// ============================================================================= +// Subpixel Motion Search: Translational +// ============================================================================= +#define INIT_SUBPEL_STEP_SIZE (4) +/* + * To avoid the penalty for crossing cache-line read, preload the reference + * area in a small buffer, which is aligned to make sure there won't be crossing + * cache-line read while reading from this buffer. This reduced the cpu + * cycles spent on reading ref data in sub-pixel filter functions. + * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x + * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we + * could reduce the area. + */ + +// Returns the subpel offset used by various subpel variance functions [m]sv[a]f +static INLINE int sp(int x) { return x & 7; } + +// Gets the address of the ref buffer at subpel location (r, c), rounded to the +// nearest fullpel precision toward - \infty +static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { + const int offset = (r >> 3) * stride + (c >> 3); + return buf + offset; +} + +// Estimates the variance of prediction residue using bilinear filter for fast +// search. +static INLINE int estimated_pref_error( + const MV *this_mv, const uint8_t *src, const int src_stride, + const uint8_t *ref, int ref_stride, + const SUBPEL_SEARCH_VAR_PARAMS *var_params, unsigned int *sse) { + const aom_variance_fn_ptr_t *vfp = var_params->vfp; + const uint8_t *second_pred = var_params->second_pred; + const uint8_t *mask = var_params->mask; + const int mask_stride = var_params->mask_stride; + const int invert_mask = var_params->invert_mask; + const int r = this_mv->row; + const int c = this_mv->col; + + if (second_pred == NULL) { + return vfp->svf(pre(ref, ref_stride, r, c), ref_stride, sp(c), sp(r), src, + src_stride, sse); + } else if (mask) { + return vfp->msvf(pre(ref, ref_stride, r, c), ref_stride, sp(c), sp(r), src, + src_stride, second_pred, mask, mask_stride, invert_mask, + sse); + } else { + return vfp->svaf(pre(ref, ref_stride, r, c), ref_stride, sp(c), sp(r), src, + src_stride, sse, second_pred); + } +} + +// Calculates the variance of prediction residue. +static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *cm, + const MV *this_mv, const uint8_t *src, + int src_stride, const uint8_t *ref, + int ref_stride, + const SUBPEL_SEARCH_VAR_PARAMS *var_params, + unsigned int *sse) { + const aom_variance_fn_ptr_t *vfp = var_params->vfp; + const SUBPEL_SEARCH_TYPE subpel_search_type = var_params->subpel_search_type; + const uint8_t *second_pred = var_params->second_pred; + const uint8_t *mask = var_params->mask; + const int mask_stride = var_params->mask_stride; + const int invert_mask = var_params->invert_mask; + const int w = var_params->w; + const int h = var_params->h; + + const int mi_row = xd->mi_row; + const int mi_col = xd->mi_col; + const int subpel_x_q3 = sp(this_mv->col); + const int subpel_y_q3 = sp(this_mv->row); + unsigned int besterr; + ref = pre(ref, ref_stride, this_mv->row, this_mv->col); +#if CONFIG_AV1_HIGHBITDEPTH + if (is_cur_buf_hbd(xd)) { + DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); + uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred16); + if (second_pred != NULL) { + if (mask) { + aom_highbd_comp_mask_upsampled_pred( + xd, cm, mi_row, mi_col, this_mv, pred8, second_pred, w, h, + subpel_x_q3, subpel_y_q3, ref, ref_stride, mask, mask_stride, + invert_mask, xd->bd, subpel_search_type); + } else { + aom_highbd_comp_avg_upsampled_pred( + xd, cm, mi_row, mi_col, this_mv, pred8, second_pred, w, h, + subpel_x_q3, subpel_y_q3, ref, ref_stride, xd->bd, + subpel_search_type); + } + } else { + aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred8, w, h, + subpel_x_q3, subpel_y_q3, ref, ref_stride, + xd->bd, subpel_search_type); + } + besterr = vfp->vf(pred8, w, src, src_stride, sse); + } else { + DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); + if (second_pred != NULL) { + if (mask) { + aom_comp_mask_upsampled_pred( + xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h, + subpel_x_q3, subpel_y_q3, ref, ref_stride, mask, mask_stride, + invert_mask, subpel_search_type); + } else { + aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, + second_pred, w, h, subpel_x_q3, subpel_y_q3, + ref, ref_stride, subpel_search_type); + } + } else { + aom_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h, + subpel_x_q3, subpel_y_q3, ref, ref_stride, + subpel_search_type); + } + + besterr = vfp->vf(pred, w, src, src_stride, sse); + } +#else + DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); + if (second_pred != NULL) { + if (mask) { + aom_comp_mask_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, + second_pred, w, h, subpel_x_q3, subpel_y_q3, + ref, ref_stride, mask, mask_stride, + invert_mask, subpel_search_type); + } else { + aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, + second_pred, w, h, subpel_x_q3, subpel_y_q3, + ref, ref_stride, subpel_search_type); + } + } else { + aom_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h, subpel_x_q3, + subpel_y_q3, ref, ref_stride, subpel_search_type); + } + + besterr = vfp->vf(pred, w, src, src_stride, sse); +#endif + return besterr; +} + +// Estimates whether this_mv is better than best_mv. This function incorporates +// both prediction error and residue into account. It is suffixed "fast" because +// it uses bilinear filter to estimate the prediction. +static INLINE unsigned int check_better_fast( + const MV *this_mv, MV *best_mv, const SubpelMvLimits *mv_limits, + const uint8_t *const src, const int src_stride, const uint8_t *const ref, + int ref_stride, const SUBPEL_SEARCH_VAR_PARAMS *var_params, + const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, + unsigned int *sse1, int *distortion, int *has_better_mv) { + unsigned int cost; + if (av1_is_subpelmv_in_range(mv_limits, *this_mv)) { + unsigned int sse; + int thismse = estimated_pref_error(this_mv, src, src_stride, ref, + ref_stride, var_params, &sse); + cost = mv_err_cost_(this_mv, mv_cost_params); + cost += thismse; + + if (cost < *besterr) { + *besterr = cost; + *best_mv = *this_mv; + *distortion = thismse; + *sse1 = sse; + *has_better_mv |= 1; + } + } else { + cost = INT_MAX; + } + return cost; +} + +// Checks whether this_mv is better than best_mv. This function incorporates +// both prediction error and residue into account. +static AOM_FORCE_INLINE unsigned int check_better( + MACROBLOCKD *xd, const AV1_COMMON *cm, const MV *this_mv, MV *best_mv, + const SubpelMvLimits *mv_limits, const uint8_t *const src, + const int src_stride, const uint8_t *const ref, int ref_stride, + const SUBPEL_SEARCH_VAR_PARAMS *var_params, + const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, + unsigned int *sse1, int *distortion, int *is_better) { + unsigned int cost; + if (av1_is_subpelmv_in_range(mv_limits, *this_mv)) { + unsigned int sse; + int thismse; + thismse = upsampled_pref_error(xd, cm, this_mv, src, src_stride, ref, + ref_stride, var_params, &sse); + cost = mv_err_cost_(this_mv, mv_cost_params); + cost += thismse; + if (cost < *besterr) { + *besterr = cost; + *best_mv = *this_mv; + *distortion = thismse; + *sse1 = sse; + *is_better |= 1; + } + } else { + cost = INT_MAX; + } + return cost; +} + +// Searches the four cardinal direction for a better mv, then follows up with a +// search in the best quadrant. This uses bilinear filter to speed up the +// calculation. +static AOM_FORCE_INLINE int first_level_check_fast( + const MV *this_mv, MV *best_mv, int hstep, const SubpelMvLimits *mv_limits, + const uint8_t *const src, const int src_stride, const uint8_t *const ref, + int ref_stride, const SUBPEL_SEARCH_VAR_PARAMS *var_params, + const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, + unsigned int *sse1, int *distortion) { + // Check the four cardinal directions + const MV left_mv = { this_mv->row, this_mv->col - hstep }; + int dummy = 0; + const unsigned int left = check_better_fast( + &left_mv, best_mv, mv_limits, src, src_stride, ref, ref_stride, + var_params, mv_cost_params, besterr, sse1, distortion, &dummy); + + const MV right_mv = { this_mv->row, this_mv->col + hstep }; + const unsigned int right = check_better_fast( + &right_mv, best_mv, mv_limits, src, src_stride, ref, ref_stride, + var_params, mv_cost_params, besterr, sse1, distortion, &dummy); + + const MV top_mv = { this_mv->row - hstep, this_mv->col }; + const unsigned int up = check_better_fast( + &top_mv, best_mv, mv_limits, src, src_stride, ref, ref_stride, var_params, + mv_cost_params, besterr, sse1, distortion, &dummy); + + const MV bottom_mv = { this_mv->row + hstep, this_mv->col }; + const unsigned int down = check_better_fast( + &bottom_mv, best_mv, mv_limits, src, src_stride, ref, ref_stride, + var_params, mv_cost_params, besterr, sse1, distortion, &dummy); + + // Check the diagonal direction with the best mv + const int whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); + switch (whichdir) { + case 0: { + const MV top_left_mv = { this_mv->row - hstep, this_mv->col - hstep }; + check_better_fast(&top_left_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + break; + } + case 1: { + const MV top_right_mv = { this_mv->row - hstep, this_mv->col + hstep }; + check_better_fast(&top_right_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + break; + } + case 2: { + const MV bottom_left_mv = { this_mv->row + hstep, this_mv->col - hstep }; + check_better_fast(&bottom_left_mv, best_mv, mv_limits, src, src_stride, + ref, ref_stride, var_params, mv_cost_params, besterr, + sse1, distortion, &dummy); + break; + } + case 3: { + const MV bottom_right_mv = { this_mv->row + hstep, this_mv->col + hstep }; + check_better_fast(&bottom_right_mv, best_mv, mv_limits, src, src_stride, + ref, ref_stride, var_params, mv_cost_params, besterr, + sse1, distortion, &dummy); + break; + } + } + return whichdir; +} + +// Performs a following up search after first_level_check_fast is called. This +// performs two extra chess pattern searches in the best quadrant. +static AOM_FORCE_INLINE void second_level_check_fast( + const MV *this_mv, MV *best_mv, int hstep, const SubpelMvLimits *mv_limits, + const uint8_t *const src, const int src_stride, const uint8_t *const ref, + int ref_stride, const SUBPEL_SEARCH_VAR_PARAMS *var_params, + const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, + unsigned int *sse1, int *distortion, int whichdir) { + const int tr = this_mv->row; + const int tc = this_mv->col; + const int br = best_mv->row; + const int bc = best_mv->col; + int dummy = 0; + if (tr != br && tc != bc) { + const int kr = br - tr; + const int kc = bc - tc; + + const MV chess_mv_1 = { tr + kr, tc + 2 * kc }; + check_better_fast(&chess_mv_1, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + + const MV chess_mv_2 = { tr + 2 * kr, tc + kc }; + check_better_fast(&chess_mv_2, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + } else if (tr == br && tc != bc) { + const int kc = bc - tc; + const MV bottom_long_mv = { tr + hstep, tc + 2 * kc }; + check_better_fast(&bottom_long_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + const MV top_long_mv = { tr - hstep, tc + 2 * kc }; + check_better_fast(&top_long_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + + switch (whichdir) { + case 0: + case 1: { + const MV bottom_mv = { tr + hstep, tc + kc }; + check_better_fast(&bottom_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + break; + } + case 2: + case 3: { + const MV top_mv = { tr - hstep, tc + kc }; + check_better_fast(&top_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + break; + } + } + } else if (tr != br && tc == bc) { + const int kr = br - tr; + const MV right_long_mv = { tr + 2 * kr, tc + hstep }; + check_better_fast(&right_long_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + const MV left_long_mv = { tr + 2 * kr, tc - hstep }; + check_better_fast(&left_long_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + + switch (whichdir) { + case 0: + case 2: { + const MV right_mv = { tr + kr, tc + hstep }; + check_better_fast(&right_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + break; + } + case 1: + case 3: { + const MV left_mv = { tr + kr, tc - hstep }; + check_better_fast(&left_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + } + } + } +} + +// Combines first level check and second level check when applicable. This first +// searches the four cardinal directions, and perform several +// diagonal/chess-pattern searches in the best quadrant. +static AOM_FORCE_INLINE void two_level_checks_fast( + const MV *this_mv, MV *best_mv, int hstep, const SubpelMvLimits *mv_limits, + const uint8_t *const src, const int src_stride, const uint8_t *const ref, + int ref_stride, const SUBPEL_SEARCH_VAR_PARAMS *var_params, + const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, + unsigned int *sse1, int *distortion, int iters) { + unsigned int whichdir = first_level_check_fast( + this_mv, best_mv, hstep, mv_limits, src, src_stride, ref, ref_stride, + var_params, mv_cost_params, besterr, sse1, distortion); + if (iters > 1) { + second_level_check_fast(this_mv, best_mv, hstep, mv_limits, src, src_stride, + ref, ref_stride, var_params, mv_cost_params, + besterr, sse1, distortion, whichdir); + } +} + +// A newer version of second level check that gives better quality. +// TODO(chiyotsai@google.com): evaluate this on subpel_search_types different +// from av1_find_best_sub_pixel_tree +static AOM_FORCE_INLINE void second_level_check_v2( + MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV *diag_mv, MV *best_mv, + int kr, int kc, const SubpelMvLimits *mv_limits, const uint8_t *const src, + const int src_stride, const uint8_t *const ref, int ref_stride, + const SUBPEL_SEARCH_VAR_PARAMS *var_params, + const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, + unsigned int *sse1, int *distortion) { + const MV center_mv = *best_mv; + + assert(diag_mv->row == best_mv->row || diag_mv->col == best_mv->col); + if (best_mv->row == diag_mv->row && best_mv->col != diag_mv->col) { + kc = best_mv->col - diag_mv->col; + } else if (best_mv->row != diag_mv->row && best_mv->col == diag_mv->col) { + kr = best_mv->row - diag_mv->row; + } + + const MV row_bias_mv = { center_mv.row + kr, center_mv.col }; + const MV col_bias_mv = { center_mv.row, center_mv.col + kc }; + const MV diag_bias_mv = { center_mv.row + kr, center_mv.col + kc }; + int has_better_mv = 0; + + if (var_params->subpel_search_type != USE_2_TAPS_ORIG) { + check_better(xd, cm, &row_bias_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &has_better_mv); + check_better(xd, cm, &col_bias_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &has_better_mv); + + // Do an additional search if the second iteration gives a better mv + if (has_better_mv) { + int dummy = 0; + check_better(xd, cm, &diag_bias_mv, best_mv, mv_limits, src, src_stride, + ref, ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + } + } else { + check_better_fast(&row_bias_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &has_better_mv); + check_better_fast(&col_bias_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &has_better_mv); + + // Do an additional search if the second iteration gives a better mv + if (has_better_mv) { + int dummy = 0; + check_better_fast(&diag_bias_mv, best_mv, mv_limits, src, src_stride, ref, + ref_stride, var_params, mv_cost_params, besterr, sse1, + distortion, &dummy); + } + } +} + +// Gets the error at the beginning when the mv has fullpel precision +static unsigned int setup_center_error( + const MACROBLOCKD *xd, const MV *bestmv, const uint8_t *const src, + const int src_stride, const uint8_t *y, int y_stride, + const SUBPEL_SEARCH_VAR_PARAMS *var_params, + const MV_COST_PARAMS *mv_cost_params, unsigned int *sse1, int *distortion) { + const aom_variance_fn_ptr_t *vfp = var_params->vfp; + const uint8_t *second_pred = var_params->second_pred; + const uint8_t *mask = var_params->mask; + const int mask_stride = var_params->mask_stride; + const int invert_mask = var_params->invert_mask; + const int w = var_params->w; + const int h = var_params->h; + + unsigned int besterr; + y = pre(y, y_stride, bestmv->row, bestmv->col); + + if (second_pred != NULL) { +#if CONFIG_AV1_HIGHBITDEPTH + if (is_cur_buf_hbd(xd)) { + DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]); + uint8_t *comp_pred = CONVERT_TO_BYTEPTR(comp_pred16); + if (mask) { + aom_highbd_comp_mask_pred(comp_pred, second_pred, w, h, y, y_stride, + mask, mask_stride, invert_mask); + } else { + aom_highbd_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); + } + besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); + } else { + DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); + if (mask) { + aom_comp_mask_pred(comp_pred, second_pred, w, h, y, y_stride, mask, + mask_stride, invert_mask); + } else { + aom_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); + } + besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); + } +#else + (void)xd; + DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); + if (mask) { + aom_comp_mask_pred(comp_pred, second_pred, w, h, y, y_stride, mask, + mask_stride, invert_mask); + } else { + aom_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); + } + besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); +#endif + } else { + besterr = vfp->vf(y, y_stride, src, src_stride, sse1); + } + *distortion = besterr; + besterr += mv_err_cost_(bestmv, mv_cost_params); + return besterr; +} + +// Gets the error at the beginning when the mv has fullpel precision +static unsigned int upsampled_setup_center_error( + MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV *bestmv, + const uint8_t *const src, const int src_stride, const uint8_t *const y, + int y_stride, const SUBPEL_SEARCH_VAR_PARAMS *var_params, + const MV_COST_PARAMS *mv_cost_params, unsigned int *sse1, int *distortion) { + unsigned int besterr = upsampled_pref_error(xd, cm, bestmv, src, src_stride, + y, y_stride, var_params, sse1); + *distortion = besterr; + besterr += mv_err_cost_(bestmv, mv_cost_params); + return besterr; +} + +static INLINE int divide_and_round(int n, int d) { + return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); +} + +static INLINE int is_cost_list_wellbehaved(const int *cost_list) { + return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] && + cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4]; +} + +// Returns surface minima estimate at given precision in 1/2^n bits. +// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C +// For a given set of costs S0, S1, S2, S3, S4 at points +// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, +// the solution for the location of the minima (x0, y0) is given by: +// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), +// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). +// The code below is an integerized version of that. +static AOM_INLINE void get_cost_surf_min(const int *cost_list, int *ir, int *ic, + int bits) { + *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)), + (cost_list[1] - 2 * cost_list[0] + cost_list[3])); + *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)), + (cost_list[4] - 2 * cost_list[0] + cost_list[2])); +} + +int av1_find_best_sub_pixel_tree_pruned_evenmore( + MACROBLOCK *x, const AV1_COMMON *const cm, + const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, int *distortion, + unsigned int *sse1) { + const int allow_hp = ms_params->allow_hp; + const int forced_stop = ms_params->forced_stop; + const int iters_per_step = ms_params->iters_per_step; + const int *cost_list = ms_params->cost_list; + const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; + const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; + + const uint8_t *const src_address = x->plane[0].src.buf; + const int src_stride = x->plane[0].src.stride; + const MACROBLOCKD *xd = &x->e_mbd; + unsigned int besterr = INT_MAX; + const uint8_t *const ref_address = xd->plane[0].pre[0].buf; + const int ref_stride = xd->plane[0].pre[0].stride; + + convert_fullmv_to_mv(&x->best_mv); + MV *bestmv = &x->best_mv.as_mv; + MV start_mv = *bestmv; + + int hstep = INIT_SUBPEL_STEP_SIZE; + + SubpelMvLimits mv_limits; + av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, + mv_cost_params->ref_mv); + + (void)cm; + + besterr = setup_center_error(xd, bestmv, src_address, src_stride, ref_address, + ref_stride, var_params, mv_cost_params, sse1, + distortion); + + if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && + cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && + cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { + int ir, ic; + int dummy = 0; + get_cost_surf_min(cost_list, &ir, &ic, 2); + if (ir != 0 || ic != 0) { + const MV this_mv = { start_mv.row + 2 * ir, start_mv.col + 2 * ic }; + check_better_fast(&this_mv, bestmv, &mv_limits, src_address, src_stride, + ref_address, ref_stride, var_params, mv_cost_params, + &besterr, sse1, distortion, &dummy); + } + } else { + two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, + iters_per_step); + + // Each subsequent iteration checks at least one point in common with + // the last iteration could be 2 ( if diag selected) 1/4 pel + if (forced_stop != HALF_PEL) { + hstep >>= 1; + start_mv = *bestmv; + two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, + iters_per_step); + } + } + + if (allow_hp && forced_stop == EIGHTH_PEL) { + hstep >>= 1; + start_mv = *bestmv; + two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, + iters_per_step); + } + + return besterr; +} + +int av1_find_best_sub_pixel_tree_pruned_more( + MACROBLOCK *x, const AV1_COMMON *const cm, + const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, int *distortion, + unsigned int *sse1) { + const int allow_hp = ms_params->allow_hp; + const int forced_stop = ms_params->forced_stop; + const int iters_per_step = ms_params->iters_per_step; + const int *cost_list = ms_params->cost_list; + const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; + const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; + + const uint8_t *const src_address = x->plane[0].src.buf; + const int src_stride = x->plane[0].src.stride; + const MACROBLOCKD *xd = &x->e_mbd; + unsigned int besterr = INT_MAX; + const uint8_t *const ref_address = xd->plane[0].pre[0].buf; + const int ref_stride = xd->plane[0].pre[0].stride; + + convert_fullmv_to_mv(&x->best_mv); + MV *bestmv = &x->best_mv.as_mv; + MV start_mv = *bestmv; + + int hstep = INIT_SUBPEL_STEP_SIZE; + + SubpelMvLimits mv_limits; + av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, + mv_cost_params->ref_mv); + + (void)cm; + + besterr = setup_center_error(xd, bestmv, src_address, src_stride, ref_address, + ref_stride, var_params, mv_cost_params, sse1, + distortion); + if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && + cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && + cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { + int ir, ic; + get_cost_surf_min(cost_list, &ir, &ic, 1); + if (ir != 0 || ic != 0) { + const MV this_mv = { start_mv.row + ir * hstep, + start_mv.col + ic * hstep }; + int dummy = 0; + check_better_fast(&this_mv, bestmv, &mv_limits, src_address, src_stride, + ref_address, ref_stride, var_params, mv_cost_params, + &besterr, sse1, distortion, &dummy); + } + } else { + two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, + iters_per_step); + } + + // Each subsequent iteration checks at least one point in common with + // the last iteration could be 2 ( if diag selected) 1/4 pel + if (forced_stop != HALF_PEL) { + hstep >>= 1; + start_mv = *bestmv; + two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, + iters_per_step); + } + + if (allow_hp && forced_stop == EIGHTH_PEL) { + hstep >>= 1; + start_mv = *bestmv; + two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, + iters_per_step); + } + + return besterr; +} + +int av1_find_best_sub_pixel_tree_pruned( + MACROBLOCK *x, const AV1_COMMON *const cm, + const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, int *distortion, + unsigned int *sse1) { + const int allow_hp = ms_params->allow_hp; + const int forced_stop = ms_params->forced_stop; + const int iters_per_step = ms_params->iters_per_step; + const int *cost_list = ms_params->cost_list; + const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; + const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; + + const uint8_t *const src_address = x->plane[0].src.buf; + const int src_stride = x->plane[0].src.stride; + const MACROBLOCKD *xd = &x->e_mbd; + unsigned int besterr = INT_MAX; + const uint8_t *const ref_address = xd->plane[0].pre[0].buf; + const int ref_stride = xd->plane[0].pre[0].stride; + + convert_fullmv_to_mv(&x->best_mv); + MV *bestmv = &x->best_mv.as_mv; + MV start_mv = *bestmv; + + int hstep = INIT_SUBPEL_STEP_SIZE; + + SubpelMvLimits mv_limits; + av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, + mv_cost_params->ref_mv); + (void)cm; + + besterr = setup_center_error(xd, bestmv, src_address, src_stride, ref_address, + ref_stride, var_params, mv_cost_params, sse1, + distortion); + if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && + cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && + cost_list[4] != INT_MAX) { + const unsigned int whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + + (cost_list[2] < cost_list[4] ? 0 : 2); + + const MV left_mv = { start_mv.row, start_mv.col - hstep }; + const MV right_mv = { start_mv.row, start_mv.col + hstep }; + const MV bottom_mv = { start_mv.row + hstep, start_mv.col }; + const MV top_mv = { start_mv.row - hstep, start_mv.col }; + + const MV bottom_left_mv = { start_mv.row + hstep, start_mv.col - hstep }; + const MV bottom_right_mv = { start_mv.row + hstep, start_mv.col + hstep }; + const MV top_left_mv = { start_mv.row - hstep, start_mv.col - hstep }; + const MV top_right_mv = { start_mv.row - hstep, start_mv.col + hstep }; + + int dummy = 0; + + switch (whichdir) { + case 0: // bottom left quadrant + check_better_fast(&left_mv, bestmv, &mv_limits, src_address, src_stride, + ref_address, ref_stride, var_params, mv_cost_params, + &besterr, sse1, distortion, &dummy); + check_better_fast(&bottom_mv, bestmv, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, &dummy); + check_better_fast(&bottom_left_mv, bestmv, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, &dummy); + break; + case 1: // bottom right quadrant + check_better_fast(&right_mv, bestmv, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, &dummy); + check_better_fast(&bottom_mv, bestmv, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, &dummy); + check_better_fast(&bottom_right_mv, bestmv, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, &dummy); + break; + case 2: // top left quadrant + check_better_fast(&left_mv, bestmv, &mv_limits, src_address, src_stride, + ref_address, ref_stride, var_params, mv_cost_params, + &besterr, sse1, distortion, &dummy); + check_better_fast(&top_mv, bestmv, &mv_limits, src_address, src_stride, + ref_address, ref_stride, var_params, mv_cost_params, + &besterr, sse1, distortion, &dummy); + check_better_fast(&top_left_mv, bestmv, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, &dummy); + break; + case 3: // top right quadrant + check_better_fast(&right_mv, bestmv, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, &dummy); + check_better_fast(&top_mv, bestmv, &mv_limits, src_address, src_stride, + ref_address, ref_stride, var_params, mv_cost_params, + &besterr, sse1, distortion, &dummy); + check_better_fast(&top_right_mv, bestmv, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, &dummy); + break; + } + } else { + two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, + iters_per_step); + } + + // Each subsequent iteration checks at least one point in common with + // the last iteration could be 2 ( if diag selected) 1/4 pel + if (forced_stop != HALF_PEL) { + hstep >>= 1; + start_mv = *bestmv; + two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, + iters_per_step); + } + + if (allow_hp && forced_stop == EIGHTH_PEL) { + hstep >>= 1; + start_mv = *bestmv; + two_level_checks_fast(&start_mv, bestmv, hstep, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion, + iters_per_step); + } + + return besterr; +} + +/* clang-format off */ +static const MV search_step_table[12] = { + // left, right, up, down + { 0, -INIT_SUBPEL_STEP_SIZE }, { 0, INIT_SUBPEL_STEP_SIZE }, + { -INIT_SUBPEL_STEP_SIZE, 0 }, { INIT_SUBPEL_STEP_SIZE, 0 }, + { 0, -(INIT_SUBPEL_STEP_SIZE >> 1) }, { 0, (INIT_SUBPEL_STEP_SIZE >> 1) }, + { -(INIT_SUBPEL_STEP_SIZE >> 1), 0 }, { (INIT_SUBPEL_STEP_SIZE >> 1), 0 }, + { 0, -(INIT_SUBPEL_STEP_SIZE >> 2) }, { 0, (INIT_SUBPEL_STEP_SIZE >> 2) }, + { -(INIT_SUBPEL_STEP_SIZE >> 2), 0 }, { (INIT_SUBPEL_STEP_SIZE >> 2), 0 } +}; +/* clang-format on */ + +int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const AV1_COMMON *const cm, + const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, + int *distortion, unsigned int *sse1) { + const int allow_hp = ms_params->allow_hp; + const int forced_stop = ms_params->forced_stop; + const int iters_per_step = ms_params->iters_per_step; + const int do_reset_fractional_mv = ms_params->do_reset_fractional_mv; + const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; + const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; + const MV *ref_mv = mv_cost_params->ref_mv; + const SUBPEL_SEARCH_TYPE subpel_search_type = + ms_params->var_params.subpel_search_type; + + MACROBLOCKD *xd = &x->e_mbd; + const uint8_t *const src_address = x->plane[0].src.buf; + const int src_stride = x->plane[0].src.stride; + const int ref_stride = xd->plane[0].pre[0].stride; + const uint8_t *const ref_address = xd->plane[0].pre[0].buf; + + convert_fullmv_to_mv(&x->best_mv); + MV *bestmv = &x->best_mv.as_mv; + + SubpelMvLimits mv_limits; + av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, ref_mv); + + int hstep = INIT_SUBPEL_STEP_SIZE; + int iter, round = FULL_PEL - forced_stop; + const MV *search_step = search_step_table; + unsigned int cost_array[5]; + unsigned int besterr = INT_MAX; + + if (!allow_hp) + if (round == 3) round = 2; + + if (subpel_search_type != USE_2_TAPS_ORIG) { + besterr = upsampled_setup_center_error( + xd, cm, bestmv, src_address, src_stride, ref_address, ref_stride, + var_params, mv_cost_params, sse1, distortion); + } else { + besterr = setup_center_error(xd, bestmv, src_address, src_stride, + ref_address, ref_stride, var_params, + mv_cost_params, sse1, distortion); + } + + if (do_reset_fractional_mv) { + av1_set_fractional_mv(x->fractional_best_mv); + } + + MV iter_center_mv = *bestmv; + for (iter = 0; iter < round; ++iter) { + if (x->fractional_best_mv[iter].as_mv.row == iter_center_mv.row && + x->fractional_best_mv[iter].as_mv.col == iter_center_mv.col) + return INT_MAX; + + x->fractional_best_mv[iter].as_mv = iter_center_mv; + + MV best_iter_mv = iter_center_mv; + int iter_best_idx = -1; + + // Check vertical and horizontal sub-pixel positions. + for (int idx = 0; idx < 4; ++idx) { + const MV this_mv = { iter_center_mv.row + search_step[idx].row, + iter_center_mv.col + search_step[idx].col }; + + int has_better_mv = 0; + if (subpel_search_type != USE_2_TAPS_ORIG) { + cost_array[idx] = check_better( + xd, cm, &this_mv, &best_iter_mv, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, mv_cost_params, + &besterr, sse1, distortion, &has_better_mv); + } else { + cost_array[idx] = check_better_fast( + &this_mv, &best_iter_mv, &mv_limits, src_address, src_stride, + ref_address, ref_stride, var_params, mv_cost_params, &besterr, sse1, + distortion, &has_better_mv); + } + if (has_better_mv) { + iter_best_idx = idx; + } + } + + // Check diagonal sub-pixel position + const MV diag_step = { (cost_array[2] <= cost_array[3] ? -hstep : hstep), + (cost_array[0] <= cost_array[1] ? -hstep : hstep) }; + const MV diag_mv = { iter_center_mv.row + diag_step.row, + iter_center_mv.col + diag_step.col }; + int has_better_mv = 0; + if (subpel_search_type != USE_2_TAPS_ORIG) { + cost_array[4] = check_better(xd, cm, &diag_mv, &best_iter_mv, &mv_limits, + src_address, src_stride, ref_address, + ref_stride, var_params, mv_cost_params, + &besterr, sse1, distortion, &has_better_mv); + } else { + cost_array[4] = check_better_fast( + &diag_mv, &best_iter_mv, &mv_limits, src_address, src_stride, + ref_address, ref_stride, var_params, mv_cost_params, &besterr, sse1, + distortion, &has_better_mv); + } + if (has_better_mv) { + iter_best_idx = 4; + } + + if (iter_best_idx != -1) { + iter_center_mv = best_iter_mv; + + if (iters_per_step > 1) { + second_level_check_v2(xd, cm, &diag_mv, &iter_center_mv, diag_step.row, + diag_step.col, &mv_limits, src_address, + src_stride, ref_address, ref_stride, var_params, + mv_cost_params, &besterr, sse1, distortion); + } + } + + search_step += 4; + hstep >>= 1; + } + + *bestmv = iter_center_mv; + + return besterr; +} + // Note(yunqingwang): The following 2 functions are only used in the motion // vector unit test, which return extreme motion vectors allowed by the MV // limits. -// Return the maximum MV. + +// Returns the maximum MV. int av1_return_max_sub_pixel_mv(MACROBLOCK *x, const AV1_COMMON *const cm, const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, int *distortion, unsigned int *sse1) { @@ -3282,6 +2842,7 @@ lower_mv_precision(bestmv, allow_hp, 0); return besterr; } + // Return the minimum MV. int av1_return_min_sub_pixel_mv(MACROBLOCK *x, const AV1_COMMON *const cm, const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, @@ -3309,113 +2870,491 @@ return besterr; } -void av1_simple_motion_search(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row, - int mi_col, BLOCK_SIZE bsize, int ref, - FULLPEL_MV start_mv, int num_planes, - int use_subpixel) { - assert(num_planes == 1 && - "Currently simple_motion_search only supports luma plane"); - assert(!frame_is_intra_only(&cpi->common) && - "Simple motion search only enabled for non-key frames"); - AV1_COMMON *const cm = &cpi->common; +// Refine MV in a small range +unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x, + BLOCK_SIZE bsize, int *pts0, int *pts_inref0, + int total_samples) { + const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - - set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize); - MB_MODE_INFO *mbmi = xd->mi[0]; - mbmi->sb_type = bsize; - mbmi->ref_frame[0] = ref; - mbmi->ref_frame[1] = NONE_FRAME; - mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR); + const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 }, + { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } }; + const int_mv ref_mv = av1_get_ref_mv(x, 0); + int16_t br = mbmi->mv[0].as_mv.row; + int16_t bc = mbmi->mv[0].as_mv.col; + int16_t *tr = &mbmi->mv[0].as_mv.row; + int16_t *tc = &mbmi->mv[0].as_mv.col; + WarpedMotionParams best_wm_params = mbmi->wm_params; + int best_num_proj_ref = mbmi->num_proj_ref; + unsigned int bestmse; + SubpelMvLimits mv_limits; - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref); - const YV12_BUFFER_CONFIG *scaled_ref_frame = - av1_get_scaled_ref_frame(cpi, ref); - struct buf_2d backup_yv12; - // ref_mv is used to calculate the cost of the motion vector - const MV ref_mv = kZeroMv; - const int step_param = cpi->mv_step_param; - const FullMvLimits tmp_mv_limits = x->mv_limits; - const SEARCH_METHODS search_methods = cpi->sf.mv_sf.search_method; - const int do_mesh_search = 0; - const int sadpb = x->sadperbit16; - int cost_list[5]; - const int ref_idx = 0; - int var; + const int start = cm->allow_high_precision_mv ? 0 : 4; + int ite; - av1_setup_pre_planes(xd, ref_idx, yv12, mi_row, mi_col, - get_ref_scale_factors(cm, ref), num_planes); - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - if (scaled_ref_frame) { - backup_yv12 = xd->plane[AOM_PLANE_Y].pre[ref_idx]; - av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL, - num_planes); + av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv); + + // Calculate the center position's error + assert(av1_is_subpelmv_in_range(&mv_limits, mbmi->mv[0].as_mv)); + bestmse = av1_compute_motion_cost(cpi, x, bsize, &mbmi->mv[0].as_mv); + + // MV search + const int mi_row = xd->mi_row; + const int mi_col = xd->mi_col; + for (ite = 0; ite < 2; ++ite) { + int best_idx = -1; + int idx; + + for (idx = start; idx < start + 4; ++idx) { + unsigned int thismse; + + *tr = br + neighbors[idx].row; + *tc = bc + neighbors[idx].col; + + MV this_mv = { *tr, *tc }; + if (av1_is_subpelmv_in_range(&mv_limits, this_mv)) { + int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; + + memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); + memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); + if (total_samples > 1) + mbmi->num_proj_ref = + av1_selectSamples(&this_mv, pts, pts_inref, total_samples, bsize); + + if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, *tr, + *tc, &mbmi->wm_params, mi_row, mi_col)) { + thismse = av1_compute_motion_cost(cpi, x, bsize, &this_mv); + + if (thismse < bestmse) { + best_idx = idx; + best_wm_params = mbmi->wm_params; + best_num_proj_ref = mbmi->num_proj_ref; + bestmse = thismse; + } + } + } + } + + if (best_idx == -1) break; + + if (best_idx >= 0) { + br += neighbors[best_idx].row; + bc += neighbors[best_idx].col; + } } - // This overwrites the mv_limits so we will need to restore it later. - av1_set_mv_search_range(&x->mv_limits, &ref_mv); - var = av1_full_pixel_search( - cpi, x, bsize, &start_mv, step_param, search_methods, do_mesh_search, - sadpb, cond_cost_list(cpi, cost_list), &ref_mv, INT_MAX, 1, - mi_col * MI_SIZE, mi_row * MI_SIZE, 0, &cpi->ss_cfg[SS_CFG_SRC], 0); - // Restore - x->mv_limits = tmp_mv_limits; - - const int use_subpel_search = - var < INT_MAX && !cpi->common.cur_frame_force_integer_mv && use_subpixel; - if (scaled_ref_frame) { - xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12; - } - if (use_subpel_search) { - int not_used = 0; - - const uint8_t *second_pred = NULL; - const uint8_t *mask = NULL; - const int mask_stride = 0; - const int invert_mask = 0; - const int reset_fractional_mv = 1; - SUBPEL_MOTION_SEARCH_PARAMS ms_params; - av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv, - cost_list, second_pred, mask, mask_stride, - invert_mask, reset_fractional_mv); - - cpi->find_fractional_mv_step(x, cm, &ms_params, ¬_used, - &x->pred_sse[ref]); - } else { - // Manually convert from units of pixel to 1/8-pixels if we are not doing - // subpel search - x->best_mv.as_mv = get_mv_from_fullmv(&x->best_mv.as_fullmv); - } - - mbmi->mv[0] = x->best_mv; - - // Get a copy of the prediction output - av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, - AOM_PLANE_Y, AOM_PLANE_Y); - - aom_clear_system_state(); - - if (scaled_ref_frame) { - xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12; - } + *tr = br; + *tc = bc; + mbmi->wm_params = best_wm_params; + mbmi->num_proj_ref = best_num_proj_ref; + return bestmse; } -void av1_simple_motion_sse_var(AV1_COMP *cpi, MACROBLOCK *x, int mi_row, - int mi_col, BLOCK_SIZE bsize, - const FULLPEL_MV start_mv, int use_subpixel, - unsigned int *sse, unsigned int *var) { +// ============================================================================= +// Subpixel Motion Search: OBMC +// ============================================================================= +/* returns subpixel variance error function */ +#define DIST(r, c) \ + vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), src_address, mask, \ + &sse) + +/* checks if (r, c) has better score than previous best */ +#define MVC(diff_mv) \ + (unsigned int)(mvcost \ + ? (mv_cost((diff_mv), mvjcost, mvcost) * error_per_bit + \ + 4096) >> \ + 13 \ + : 0) + +#define CHECK_BETTER(v, r, c) \ + { \ + const MV this_mv = { r, c }; \ + if (av1_is_subpelmv_in_range(&mv_limits, this_mv)) { \ + const MV diff_mv = { r - ref_mv->row, c - ref_mv->col }; \ + thismse = (DIST(r, c)); \ + if ((v = MVC(&diff_mv) + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } \ + } + +#undef CHECK_BETTER0 +#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) + +#undef CHECK_BETTER1 +#define CHECK_BETTER1(v, r, c) \ + { \ + const MV this_mv = { r, c }; \ + if (av1_is_subpelmv_in_range(&mv_limits, this_mv)) { \ + thismse = upsampled_obmc_pref_error( \ + xd, cm, &this_mv, mask, vfp, src_address, pre(y, y_stride, r, c), \ + y_stride, sp(c), sp(r), w, h, &sse, subpel_search_type); \ + v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit, \ + mv_cost_type); \ + if ((v + thismse) < besterr) { \ + besterr = v + thismse; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } \ + } + +static unsigned int setup_obmc_center_error( + const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit, + const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc, + const uint8_t *const y, int y_stride, const int *mvjcost, + const int *const mvcost[2], unsigned int *sse1, int *distortion, + MV_COST_TYPE mv_cost_type) { + unsigned int besterr; + besterr = vfp->ovf(y, y_stride, wsrc, mask, sse1); + *distortion = besterr; + besterr += + mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, mv_cost_type); + return besterr; +} + +static int upsampled_obmc_pref_error( + MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV *const mv, + const int32_t *mask, const aom_variance_fn_ptr_t *vfp, + const int32_t *const wsrc, const uint8_t *const y, int y_stride, + int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse, + int subpel_search) { + unsigned int besterr; + + const int mi_row = xd->mi_row; + const int mi_col = xd->mi_col; + DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]); +#if CONFIG_AV1_HIGHBITDEPTH + if (is_cur_buf_hbd(xd)) { + uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred); + aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h, + subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd, + subpel_search); + besterr = vfp->ovf(pred8, w, wsrc, mask, sse); + } else { + aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3, + subpel_y_q3, y, y_stride, subpel_search); + + besterr = vfp->ovf(pred, w, wsrc, mask, sse); + } +#else + aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3, + subpel_y_q3, y, y_stride, subpel_search); + + besterr = vfp->ovf(pred, w, wsrc, mask, sse); +#endif + return besterr; +} +// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of +// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten +// later in the same way. +#define SECOND_LEVEL_CHECKS_BEST(k) \ + { \ + unsigned int second; \ + int br0 = br; \ + int bc0 = bc; \ + assert(tr == br || tc == bc); \ + if (tr == br && tc != bc) { \ + kc = bc - tc; \ + } else if (tr != br && tc == bc) { \ + kr = br - tr; \ + } \ + CHECK_BETTER##k(second, br0 + kr, bc0); \ + CHECK_BETTER##k(second, br0, bc0 + kc); \ + if (br0 != br || bc0 != bc) { \ + CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \ + } \ + (void)second; \ + } + +#define UNPACK_OBMC_MS_PARAMS \ + const int allow_hp = ms_params->allow_hp; \ + const int forced_stop = ms_params->forced_stop; \ + const int iters_per_step = ms_params->iters_per_step; \ + const MV *ref_mv = ms_params->mv_cost_params.ref_mv; \ + const int *mvjcost = ms_params->mv_cost_params.mvjcost; \ + const int *const *mvcost = ms_params->mv_cost_params.mvcost; \ + const int error_per_bit = ms_params->mv_cost_params.error_per_bit; \ + const MV_COST_TYPE mv_cost_type = ms_params->mv_cost_params.mv_cost_type; \ + const aom_variance_fn_ptr_t *vfp = ms_params->var_params.vfp; \ + const SUBPEL_SEARCH_TYPE subpel_search_type = \ + ms_params->var_params.subpel_search_type; \ + const int w = ms_params->var_params.w; \ + const int h = ms_params->var_params.h; + +static unsigned int upsampled_setup_obmc_center_error( + MACROBLOCKD *xd, const AV1_COMMON *const cm, const int32_t *mask, + const MV *bestmv, const MV *ref_mv, int error_per_bit, + const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc, + const uint8_t *const y, int y_stride, int w, int h, const int *mvjcost, + const int *const mvcost[2], unsigned int *sse1, int *distortion, + int subpel_search, MV_COST_TYPE mv_cost_type) { + unsigned int besterr = + upsampled_obmc_pref_error(xd, cm, bestmv, mask, vfp, wsrc, y, y_stride, 0, + 0, w, h, sse1, subpel_search); + *distortion = besterr; + besterr += + mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit, mv_cost_type); + return besterr; +} + +int av1_find_best_obmc_sub_pixel_tree_up( + MACROBLOCK *x, const AV1_COMMON *const cm, + const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, int *distortion, + unsigned int *sse1) { + UNPACK_OBMC_MS_PARAMS; + const int32_t *wsrc = x->wsrc_buf; + const int32_t *mask = x->mask_buf; + + const int32_t *const src_address = wsrc; MACROBLOCKD *xd = &x->e_mbd; - const MV_REFERENCE_FRAME ref = - cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; + struct macroblockd_plane *const pd = &xd->plane[0]; + unsigned int besterr = INT_MAX; + unsigned int sse; + unsigned int thismse; + const int y_stride = pd->pre[0].stride; + const int offset = get_offset_from_mv(&x->best_mv.as_fullmv, y_stride); + const uint8_t *y = pd->pre[0].buf; + convert_fullmv_to_mv(&x->best_mv); + MV *bestmv = &x->best_mv.as_mv; - av1_simple_motion_search(cpi, x, mi_row, mi_col, bsize, ref, start_mv, 1, - use_subpixel); + int br = bestmv->row; + int bc = bestmv->col; + int hstep = INIT_SUBPEL_STEP_SIZE; + int iter, round = FULL_PEL - forced_stop; + int tr = br; + int tc = bc; + const MV *search_step = search_step_table; + int idx, best_idx = -1; + unsigned int cost_array[5]; + int kr, kc; - const uint8_t *src = x->plane[0].src.buf; + SubpelMvLimits mv_limits; + + av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, ref_mv); + + if (!allow_hp) + if (round == 3) round = 2; + + if (subpel_search_type != USE_2_TAPS_ORIG) + besterr = upsampled_setup_obmc_center_error( + xd, cm, mask, bestmv, ref_mv, error_per_bit, vfp, src_address, + y + offset, y_stride, w, h, mvjcost, mvcost, sse1, distortion, + subpel_search_type, mv_cost_type); + else + besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp, + src_address, y, y_stride, mvjcost, mvcost, + sse1, distortion, mv_cost_type); + + for (iter = 0; iter < round; ++iter) { + // Check vertical and horizontal sub-pixel positions. + for (idx = 0; idx < 4; ++idx) { + tr = br + search_step[idx].row; + tc = bc + search_step[idx].col; + MV this_mv = { tr, tc }; + if (av1_is_subpelmv_in_range(&mv_limits, this_mv)) { + if (subpel_search_type != USE_2_TAPS_ORIG) { + thismse = upsampled_obmc_pref_error( + xd, cm, &this_mv, mask, vfp, src_address, + pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse, + subpel_search_type); + } else { + thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), + sp(tr), src_address, mask, &sse); + } + + cost_array[idx] = + thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit, mv_cost_type); + if (cost_array[idx] < besterr) { + best_idx = idx; + besterr = cost_array[idx]; + *distortion = thismse; + *sse1 = sse; + } + } else { + cost_array[idx] = INT_MAX; + } + } + + // Check diagonal sub-pixel position + kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); + kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); + + tc = bc + kc; + tr = br + kr; + { + MV this_mv = { tr, tc }; + if (av1_is_subpelmv_in_range(&mv_limits, this_mv)) { + if (subpel_search_type != USE_2_TAPS_ORIG) { + thismse = upsampled_obmc_pref_error( + xd, cm, &this_mv, mask, vfp, src_address, + pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse, + subpel_search_type); + } else { + thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), + sp(tr), src_address, mask, &sse); + } + + cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, + error_per_bit, mv_cost_type); + + if (cost_array[4] < besterr) { + best_idx = 4; + besterr = cost_array[4]; + *distortion = thismse; + *sse1 = sse; + } + } else { + cost_array[idx] = INT_MAX; + } + } + + if (best_idx < 4 && best_idx >= 0) { + br += search_step[best_idx].row; + bc += search_step[best_idx].col; + } else if (best_idx == 4) { + br = tr; + bc = tc; + } + + if (iters_per_step > 1 && best_idx != -1) { + if (subpel_search_type != USE_2_TAPS_ORIG) { + SECOND_LEVEL_CHECKS_BEST(1); + } else { + SECOND_LEVEL_CHECKS_BEST(0); + } + } + + tr = br; + tc = bc; + + search_step += 4; + hstep >>= 1; + best_idx = -1; + } + + // These lines insure static analysis doesn't warn that + // tr and tc aren't used after the above point. + (void)tr; + (void)tc; + + bestmv->row = br; + bestmv->col = bc; + + return besterr; +} + +#undef DIST +#undef MVC +#undef CHECK_BETTER + +// ============================================================================= +// Public cost function: mv_cost + pred error +// ============================================================================= +int av1_get_mvpred_sse(const MACROBLOCK *x, const FULLPEL_MV *best_mv, + const MV *ref_mv, const aom_variance_fn_ptr_t *vfp) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const MV mv = get_mv_from_fullmv(best_mv); + const MV_COST_TYPE mv_cost_type = x->mv_cost_type; + unsigned int sse, var; + + var = vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), + in_what->stride, &sse); + (void)var; + + return sse + mv_err_cost(&mv, ref_mv, x->nmv_vec_cost, + CONVERT_TO_CONST_MVCOST(x->mv_cost_stack), + x->errorperbit, mv_cost_type); +} + +int av1_get_mvpred_var(const MACROBLOCK *x, const FULLPEL_MV *best_mv, + const MV *ref_mv, const aom_variance_fn_ptr_t *vfp) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const MV mv = get_mv_from_fullmv(best_mv); + const MV_COST_TYPE mv_cost_type = x->mv_cost_type; + unsigned int sse, var; + + var = vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), + in_what->stride, &sse); + + return var + mv_err_cost(&mv, ref_mv, x->nmv_vec_cost, + CONVERT_TO_CONST_MVCOST(x->mv_cost_stack), + x->errorperbit, mv_cost_type); +} + +int av1_get_mvpred_av_var(const MACROBLOCK *x, const FULLPEL_MV *best_mv, + const MV *ref_mv, const uint8_t *second_pred, + const aom_variance_fn_ptr_t *vfp, + const struct buf_2d *src, const struct buf_2d *pre) { + const struct buf_2d *const what = src; + const struct buf_2d *const in_what = pre; + const MV mv = get_mv_from_fullmv(best_mv); + const MV_COST_TYPE mv_cost_type = x->mv_cost_type; + unsigned int unused; + + return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, + what->buf, what->stride, &unused, second_pred) + + mv_err_cost(&mv, ref_mv, x->nmv_vec_cost, + CONVERT_TO_CONST_MVCOST(x->mv_cost_stack), x->errorperbit, + mv_cost_type); +} + +int av1_get_mvpred_mask_var(const MACROBLOCK *x, const FULLPEL_MV *best_mv, + const MV *ref_mv, const uint8_t *second_pred, + const uint8_t *mask, int mask_stride, + int invert_mask, const aom_variance_fn_ptr_t *vfp, + const struct buf_2d *src, + const struct buf_2d *pre) { + const struct buf_2d *const what = src; + const struct buf_2d *const in_what = pre; + const MV mv = get_mv_from_fullmv(best_mv); + const MV_COST_TYPE mv_cost_type = x->mv_cost_type; + unsigned int unused; + + return vfp->msvf(what->buf, what->stride, 0, 0, + get_buf_from_mv(in_what, best_mv), in_what->stride, + second_pred, mask, mask_stride, invert_mask, &unused) + + mv_err_cost(&mv, ref_mv, x->nmv_vec_cost, + CONVERT_TO_CONST_MVCOST(x->mv_cost_stack), x->errorperbit, + mv_cost_type); +} + +unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x, + BLOCK_SIZE bsize, const MV *this_mv) { + const AV1_COMMON *const cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + const uint8_t *const src = x->plane[0].src.buf; const int src_stride = x->plane[0].src.stride; - const uint8_t *dst = xd->plane[0].dst.buf; + uint8_t *const dst = xd->plane[0].dst.buf; const int dst_stride = xd->plane[0].dst.stride; + const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize]; + const int_mv ref_mv = av1_get_ref_mv(x, 0); + unsigned int mse; + unsigned int sse; + const int mi_row = xd->mi_row; + const int mi_col = xd->mi_col; + const MV_COST_TYPE mv_cost_type = x->mv_cost_type; - *var = cpi->fn_ptr[bsize].vf(src, src_stride, dst, dst_stride, sse); + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, + AOM_PLANE_Y, AOM_PLANE_Y); + mse = vfp->vf(dst, dst_stride, src, src_stride, &sse); + mse += mv_err_cost(this_mv, &ref_mv.as_mv, x->nmv_vec_cost, + CONVERT_TO_CONST_MVCOST(x->mv_cost_stack), x->errorperbit, + mv_cost_type); + return mse; }
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h index dd88e11..79450bf 100644 --- a/av1/encoder/mcomp.h +++ b/av1/encoder/mcomp.h
@@ -21,16 +21,6 @@ extern "C" { #endif -// In this file, the following variables always have the same meaning: -// start_mv: the motion vector where we start the motion search -// ref_mv: the motion vector with respect to which we calculate the mv_cost -// best_mv: when it is not const, it is the destination where to store the -// best motion vector -// full_*: a prefix of full indicates that the mv is a FULLPEL_MV -// -// When a mv needs to both act as a fullpel_mv and subpel_mv, it is stored as an -// int_mv, which is a union of int, FULLPEL_MV, and MV - // The maximum number of steps in a step search given the largest // allowed initial step #define MAX_MVSEARCH_STEPS 11 @@ -63,20 +53,27 @@ } search_site_config; typedef struct { - MV coord; + FULLPEL_MV coord; int coord_offset; } search_neighbors; -void av1_init_dsmotion_compensation(search_site_config *cfg, int stride); -void av1_init_motion_fpf(search_site_config *cfg, int stride); -void av1_init3smotion_compensation(search_site_config *cfg, int stride); +struct AV1_COMP; +struct SPEED_FEATURES; -void av1_set_mv_search_range(FullMvLimits *mv_limits, const MV *mv); +// ============================================================================= +// Cost functions +// ============================================================================= +typedef struct { + const MV *ref_mv; + const int *mvjcost; + const int *mvcost[2]; + int error_per_bit; + MV_COST_TYPE mv_cost_type; +} MV_COST_PARAMS; int av1_mv_bit_cost(const MV *mv, const MV *ref_mv, const int *mvjcost, int *mvcost[2], int weight); -// Utility to compute variance + MV rate cost for a given MV int av1_get_mvpred_sse(const MACROBLOCK *x, const FULLPEL_MV *best_mv, const MV *ref_mv, const aom_variance_fn_ptr_t *vfp); int av1_get_mvpred_var(const MACROBLOCK *x, const FULLPEL_MV *best_mv, @@ -91,8 +88,21 @@ int invert_mask, const aom_variance_fn_ptr_t *vfp, const struct buf_2d *src, const struct buf_2d *pre); -struct AV1_COMP; -struct SPEED_FEATURES; +unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi, + MACROBLOCK *const x, BLOCK_SIZE bsize, + const MV *this_mv); + +// ============================================================================= +// Fullpixel Motion Search +// ============================================================================= +// Sets up configs for fullpixel diamond search +void av1_init_dsmotion_compensation(search_site_config *cfg, int stride); +// Sets up configs for firstpass motion search +void av1_init_motion_fpf(search_site_config *cfg, int stride); +// Sets up configs for all other types of motion search +void av1_init3smotion_compensation(search_site_config *cfg, int stride); + +void av1_set_mv_search_range(FullMvLimits *mv_limits, const MV *mv); int av1_init_search_range(int size); @@ -100,12 +110,54 @@ MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, const MV *ref_mv); - // Runs sequence of diamond searches in smaller steps for RD. int av1_hex_search(MACROBLOCK *x, FULLPEL_MV *start_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp, const MV *ref_mv); +int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range, + const aom_variance_fn_ptr_t *fn_ptr, + const uint8_t *mask, int mask_stride, + int invert_mask, const MV *ref_mv, + const uint8_t *second_pred, + const struct buf_2d *src, + const struct buf_2d *pre); + +int av1_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg, + FULLPEL_MV *start_mv, FULLPEL_MV *best_mv, + int search_param, int sad_per_bit, int *num00, + const aom_variance_fn_ptr_t *fn_ptr, + const MV *ref_mv, uint8_t *second_pred, + uint8_t *mask, int mask_stride, int inv_mask); + +int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, FULLPEL_MV *start_mv, + int step_param, int method, int run_mesh_search, + int error_per_bit, int *cost_list, const MV *ref_mv, + int var_max, int rd, int x_pos, int y_pos, int intra, + const search_site_config *cfg, + int use_intrabc_mesh_pattern); + +int av1_obmc_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x, + FULLPEL_MV *start_mv, int step_param, int sadpb, + int further_steps, int do_refine, + const aom_variance_fn_ptr_t *fn_ptr, + const MV *ref_mv, FULLPEL_MV *dst_mv, + const search_site_config *cfg); + +unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi, + MACROBLOCK *const x, BLOCK_SIZE bsize, + int *pts0, int *pts_inref0, + int total_samples); + +static INLINE int av1_is_fullmv_in_range(const FullMvLimits *mv_limits, + FULLPEL_MV mv) { + return (mv.col >= mv_limits->col_min) && (mv.col <= mv_limits->col_max) && + (mv.row >= mv_limits->row_min) && (mv.row <= mv_limits->row_max); +} +// ============================================================================= +// Subpixel Motion Search +// ============================================================================= enum { EIGHTH_PEL, QUARTER_PEL, @@ -114,14 +166,6 @@ } UENUM1BYTE(SUBPEL_FORCE_STOP); typedef struct { - const MV *ref_mv; - const int *mvjcost; - const int *mvcost[2]; - int error_per_bit; - MV_COST_TYPE mv_cost_type; -} MV_COST_PARAMS; - -typedef struct { const aom_variance_fn_ptr_t *vfp; SUBPEL_SEARCH_TYPE subpel_search_type; const uint8_t *second_pred; @@ -164,61 +208,8 @@ extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned_evenmore; extern fractional_mv_step_fp av1_return_max_sub_pixel_mv; extern fractional_mv_step_fp av1_return_min_sub_pixel_mv; - -int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range, - const aom_variance_fn_ptr_t *fn_ptr, - const uint8_t *mask, int mask_stride, - int invert_mask, const MV *ref_mv, - const uint8_t *second_pred, - const struct buf_2d *src, - const struct buf_2d *pre); - -int av1_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg, - FULLPEL_MV *start_mv, FULLPEL_MV *best_mv, - int search_param, int sad_per_bit, int *num00, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, uint8_t *second_pred, - uint8_t *mask, int mask_stride, int inv_mask); - -int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, FULLPEL_MV *start_mv, - int step_param, int method, int run_mesh_search, - int error_per_bit, int *cost_list, const MV *ref_mv, - int var_max, int rd, int x_pos, int y_pos, int intra, - const search_site_config *cfg, - int use_intrabc_mesh_pattern); - -int av1_obmc_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x, - FULLPEL_MV *start_mv, int step_param, int sadpb, - int further_steps, int do_refine, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, FULLPEL_MV *dst_mv, - const search_site_config *cfg); - extern fractional_mv_step_fp av1_find_best_obmc_sub_pixel_tree_up; -unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi, - MACROBLOCK *const x, BLOCK_SIZE bsize, - const MV *this_mv); -unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi, - MACROBLOCK *const x, BLOCK_SIZE bsize, - int *pts0, int *pts_inref0, - int total_samples); - -// Performs a motion search in SIMPLE_TRANSLATION mode using reference frame -// ref. Note that this sets the offset of mbmi, so we will need to reset it -// after calling this function. -void av1_simple_motion_search(struct AV1_COMP *const cpi, MACROBLOCK *x, - int mi_row, int mi_col, BLOCK_SIZE bsize, int ref, - FULLPEL_MV start_mv, int num_planes, - int use_subpixel); - -// Performs a simple motion search to calculate the sse and var of the residue -void av1_simple_motion_sse_var(struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row, - int mi_col, BLOCK_SIZE bsize, - const FULLPEL_MV start_mv, int use_subpixel, - unsigned int *sse, unsigned int *var); - static INLINE void av1_set_fractional_mv(int_mv *fractional_best_mv) { for (int z = 0; z < 3; z++) { fractional_best_mv[z].as_int = INVALID_MV; @@ -244,12 +235,6 @@ subpel_limits->row_max = AOMMIN(MV_UPP - 1, maxr); } -static INLINE int av1_is_fullmv_in_range(const FullMvLimits *mv_limits, - FULLPEL_MV mv) { - return (mv.col >= mv_limits->col_min) && (mv.col <= mv_limits->col_max) && - (mv.row >= mv_limits->row_min) && (mv.row <= mv_limits->row_max); -} - static INLINE int av1_is_subpelmv_in_range(const SubpelMvLimits *mv_limits, MV mv) { return (mv.col >= mv_limits->col_min) && (mv.col <= mv_limits->col_max) &&
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c index a43d4cd..ba4ce6f 100644 --- a/av1/encoder/motion_search_facade.c +++ b/av1/encoder/motion_search_facade.c
@@ -9,9 +9,13 @@ * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ +#include "aom_ports/system_state.h" + #include "av1/common/reconinter.h" + #include "av1/encoder/encodemv.h" #include "av1/encoder/motion_search_facade.h" +#include "av1/encoder/partition_strategy.h" #include "av1/encoder/reconinter_enc.h" void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, @@ -615,3 +619,114 @@ } return tmp_rate_mv; } + +void av1_simple_motion_search(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row, + int mi_col, BLOCK_SIZE bsize, int ref, + FULLPEL_MV start_mv, int num_planes, + int use_subpixel) { + assert(num_planes == 1 && + "Currently simple_motion_search only supports luma plane"); + assert(!frame_is_intra_only(&cpi->common) && + "Simple motion search only enabled for non-key frames"); + AV1_COMMON *const cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + + set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize); + + MB_MODE_INFO *mbmi = xd->mi[0]; + mbmi->sb_type = bsize; + mbmi->ref_frame[0] = ref; + mbmi->ref_frame[1] = NONE_FRAME; + mbmi->motion_mode = SIMPLE_TRANSLATION; + mbmi->interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR); + + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref); + const YV12_BUFFER_CONFIG *scaled_ref_frame = + av1_get_scaled_ref_frame(cpi, ref); + struct buf_2d backup_yv12; + // ref_mv is used to calculate the cost of the motion vector + const MV ref_mv = kZeroMv; + const int step_param = cpi->mv_step_param; + const FullMvLimits tmp_mv_limits = x->mv_limits; + const SEARCH_METHODS search_methods = cpi->sf.mv_sf.search_method; + const int do_mesh_search = 0; + const int sadpb = x->sadperbit16; + int cost_list[5]; + const int ref_idx = 0; + int var; + + av1_setup_pre_planes(xd, ref_idx, yv12, mi_row, mi_col, + get_ref_scale_factors(cm, ref), num_planes); + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + if (scaled_ref_frame) { + backup_yv12 = xd->plane[AOM_PLANE_Y].pre[ref_idx]; + av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL, + num_planes); + } + + // This overwrites the mv_limits so we will need to restore it later. + av1_set_mv_search_range(&x->mv_limits, &ref_mv); + var = av1_full_pixel_search( + cpi, x, bsize, &start_mv, step_param, search_methods, do_mesh_search, + sadpb, cond_cost_list(cpi, cost_list), &ref_mv, INT_MAX, 1, + mi_col * MI_SIZE, mi_row * MI_SIZE, 0, &cpi->ss_cfg[SS_CFG_SRC], 0); + // Restore + x->mv_limits = tmp_mv_limits; + + const int use_subpel_search = + var < INT_MAX && !cpi->common.cur_frame_force_integer_mv && use_subpixel; + if (scaled_ref_frame) { + xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12; + } + if (use_subpel_search) { + int not_used = 0; + + const uint8_t *second_pred = NULL; + const uint8_t *mask = NULL; + const int mask_stride = 0; + const int invert_mask = 0; + const int reset_fractional_mv = 1; + SUBPEL_MOTION_SEARCH_PARAMS ms_params; + av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv, + cost_list, second_pred, mask, mask_stride, + invert_mask, reset_fractional_mv); + + cpi->find_fractional_mv_step(x, cm, &ms_params, ¬_used, + &x->pred_sse[ref]); + } else { + // Manually convert from units of pixel to 1/8-pixels if we are not doing + // subpel search + x->best_mv.as_mv = get_mv_from_fullmv(&x->best_mv.as_fullmv); + } + + mbmi->mv[0] = x->best_mv; + + // Get a copy of the prediction output + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, + AOM_PLANE_Y, AOM_PLANE_Y); + + aom_clear_system_state(); + + if (scaled_ref_frame) { + xd->plane[AOM_PLANE_Y].pre[ref_idx] = backup_yv12; + } +} + +void av1_simple_motion_sse_var(AV1_COMP *cpi, MACROBLOCK *x, int mi_row, + int mi_col, BLOCK_SIZE bsize, + const FULLPEL_MV start_mv, int use_subpixel, + unsigned int *sse, unsigned int *var) { + MACROBLOCKD *xd = &x->e_mbd; + const MV_REFERENCE_FRAME ref = + cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; + + av1_simple_motion_search(cpi, x, mi_row, mi_col, bsize, ref, start_mv, 1, + use_subpixel); + + const uint8_t *src = x->plane[0].src.buf; + const int src_stride = x->plane[0].src.stride; + const uint8_t *dst = xd->plane[0].dst.buf; + const int dst_stride = xd->plane[0].dst.stride; + + *var = cpi->fn_ptr[bsize].vf(src, src_stride, dst, dst_stride, sse); +}
diff --git a/av1/encoder/motion_search_facade.h b/av1/encoder/motion_search_facade.h index 4bfe06d..960df34 100644 --- a/av1/encoder/motion_search_facade.h +++ b/av1/encoder/motion_search_facade.h
@@ -43,6 +43,20 @@ const uint8_t *mask, int mask_stride, int *rate_mv, int ref_idx); +// Performs a motion search in SIMPLE_TRANSLATION mode using reference frame +// ref. Note that this sets the offset of mbmi, so we will need to reset it +// after calling this function. +void av1_simple_motion_search(struct AV1_COMP *const cpi, MACROBLOCK *x, + int mi_row, int mi_col, BLOCK_SIZE bsize, int ref, + FULLPEL_MV start_mv, int num_planes, + int use_subpixel); + +// Performs a simple motion search to calculate the sse and var of the residue +void av1_simple_motion_sse_var(struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row, + int mi_col, BLOCK_SIZE bsize, + const FULLPEL_MV start_mv, int use_subpixel, + unsigned int *sse, unsigned int *var); + #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c index 8832944..883208b 100644 --- a/av1/encoder/partition_strategy.c +++ b/av1/encoder/partition_strategy.c
@@ -25,6 +25,7 @@ #endif #include "av1/encoder/encoder.h" +#include "av1/encoder/motion_search_facade.h" #include "av1/encoder/partition_strategy.h" #include "av1/encoder/rdopt.h"