blob: 3b7eb6a44ed17609dd3657b55d9206f7995fa95a [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <limits.h>
13#include <math.h>
14#include <stdio.h>
15
Yaowu Xuf883b422016-08-30 14:01:10 -070016#include "./aom_config.h"
17#include "./aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070018
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "aom_dsp/aom_dsp_common.h"
20#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070021#include "aom_ports/mem.h"
22
23#include "av1/common/common.h"
Yunqing Wangff4fa062017-04-21 10:56:08 -070024#include "av1/common/mvref_common.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070025#include "av1/common/reconinter.h"
26
27#include "av1/encoder/encoder.h"
28#include "av1/encoder/mcomp.h"
29#include "av1/encoder/rdopt.h"
30
31// #define NEW_DIAMOND_SEARCH
32
33static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
34 const MV *mv) {
35 return &buf->buf[mv->row * buf->stride + mv->col];
36}
37
Alex Converse0fa0f422017-04-24 12:51:14 -070038void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070039 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
40 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
41 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
42 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
43
Yaowu Xuf883b422016-08-30 14:01:10 -070044 col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
45 row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
46 col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
47 row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -070048
49 // Get intersection of UMV window and valid MV window to reduce # of checks
50 // in diamond search.
Alex Converse0fa0f422017-04-24 12:51:14 -070051 if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
52 if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
53 if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
54 if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
Yaowu Xuc27fc142016-08-22 16:08:15 -070055}
56
Alex Converse0f668412017-04-24 13:10:32 -070057static void av1_set_subpel_mv_search_range(const MvLimits *mv_limits,
58 int *col_min, int *col_max,
59 int *row_min, int *row_max,
60 const MV *ref_mv) {
Yunqing Wang8e173422017-04-21 09:27:55 -070061 const int max_mv = MAX_FULL_PEL_VAL * 8;
Alex Converse0fa0f422017-04-24 12:51:14 -070062 const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->col - max_mv);
63 const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->col + max_mv);
64 const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->row - max_mv);
65 const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->row + max_mv);
Yunqing Wang8e173422017-04-21 09:27:55 -070066
67 *col_min = AOMMAX(MV_LOW + 1, minc);
68 *col_max = AOMMIN(MV_UPP - 1, maxc);
69 *row_min = AOMMAX(MV_LOW + 1, minr);
70 *row_max = AOMMIN(MV_UPP - 1, maxr);
71}
72
Yaowu Xuf883b422016-08-30 14:01:10 -070073int av1_init_search_range(int size) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070074 int sr = 0;
75 // Minimum search size no matter what the passed in value.
Yaowu Xuf883b422016-08-30 14:01:10 -070076 size = AOMMAX(16, size);
Yaowu Xuc27fc142016-08-22 16:08:15 -070077
78 while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
79
Yaowu Xuf883b422016-08-30 14:01:10 -070080 sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070081 return sr;
82}
83
84static INLINE int mv_cost(const MV *mv, const int *joint_cost,
85 int *const comp_cost[2]) {
Yaowu Xuf883b422016-08-30 14:01:10 -070086 return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
Yaowu Xuc27fc142016-08-22 16:08:15 -070087 comp_cost[1][mv->col];
88}
89
Yaowu Xuf883b422016-08-30 14:01:10 -070090int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
91 int *mvcost[2], int weight) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070092 const MV diff = { mv->row - ref->row, mv->col - ref->col };
Yaowu Xu03013c52017-05-08 15:58:51 +000093 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
Yaowu Xuc27fc142016-08-22 16:08:15 -070094}
95
96#define PIXEL_TRANSFORM_ERROR_SCALE 4
97static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
98 int *mvcost[2], int error_per_bit) {
99 if (mvcost) {
100 const MV diff = { mv->row - ref->row, mv->col - ref->col };
Yunqing Wangacaa3842017-04-21 09:33:33 -0700101 return (int)ROUND_POWER_OF_TWO_64(
102 (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -0700103 RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT +
Yaowu Xuc27fc142016-08-22 16:08:15 -0700104 PIXEL_TRANSFORM_ERROR_SCALE);
105 }
106 return 0;
107}
108
109static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
110 int sad_per_bit) {
111 const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
Yaowu Xu03013c52017-05-08 15:58:51 +0000112 return ROUND_POWER_OF_TWO(
Alex Conversed85d94d2017-05-24 15:13:38 -0700113 (unsigned)mv_cost(&diff, x->nmvjointcost, x->mvcost) * sad_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -0700114 AV1_PROB_COST_SHIFT);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700115}
116
Yaowu Xuf883b422016-08-30 14:01:10 -0700117void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700118 int len, ss_count = 1;
119
120 cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
121 cfg->ss[0].offset = 0;
122
123 for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
124 // Generate offsets for 4 search sites per step.
125 const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
126 int i;
127 for (i = 0; i < 4; ++i) {
128 search_site *const ss = &cfg->ss[ss_count++];
129 ss->mv = ss_mvs[i];
130 ss->offset = ss->mv.row * stride + ss->mv.col;
131 }
132 }
133
134 cfg->ss_count = ss_count;
135 cfg->searches_per_step = 4;
136}
137
Yaowu Xuf883b422016-08-30 14:01:10 -0700138void av1_init3smotion_compensation(search_site_config *cfg, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700139 int len, ss_count = 1;
140
141 cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
142 cfg->ss[0].offset = 0;
143
144 for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
145 // Generate offsets for 8 search sites per step.
146 const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len },
147 { 0, len }, { -len, -len }, { -len, len },
148 { len, -len }, { len, len } };
149 int i;
150 for (i = 0; i < 8; ++i) {
151 search_site *const ss = &cfg->ss[ss_count++];
152 ss->mv = ss_mvs[i];
153 ss->offset = ss->mv.row * stride + ss->mv.col;
154 }
155 }
156
157 cfg->ss_count = ss_count;
158 cfg->searches_per_step = 8;
159}
160
161/*
162 * To avoid the penalty for crossing cache-line read, preload the reference
163 * area in a small buffer, which is aligned to make sure there won't be crossing
164 * cache-line read while reading from this buffer. This reduced the cpu
165 * cycles spent on reading ref data in sub-pixel filter functions.
166 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
167 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
168 * could reduce the area.
169 */
170
171// convert motion vector component to offset for sv[a]f calc
172static INLINE int sp(int x) { return x & 7; }
173
174static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
175 return &buf[(r >> 3) * stride + (c >> 3)];
176}
177
178/* checks if (r, c) has better score than previous best */
David Barkerf19f35f2017-05-22 16:33:22 +0100179#define CHECK_BETTER(v, r, c) \
180 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
181 MV this_mv = { r, c }; \
182 v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
183 if (second_pred == NULL) \
184 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
185 src_address, src_stride, &sse); \
186 else if (mask) \
187 thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
188 src_address, src_stride, second_pred, mask, \
189 mask_stride, invert_mask, &sse); \
190 else \
191 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
192 src_address, src_stride, &sse, second_pred); \
193 v += thismse; \
194 if (v < besterr) { \
195 besterr = v; \
196 br = r; \
197 bc = c; \
198 *distortion = thismse; \
199 *sse1 = sse; \
200 } \
201 } else { \
202 v = INT_MAX; \
David Barkerc155e012017-05-11 13:54:54 +0100203 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700204
205#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
206
Yaowu Xuc27fc142016-08-22 16:08:15 -0700207/* checks if (r, c) has better score than previous best */
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700208#define CHECK_BETTER1(v, r, c) \
209 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
210 MV this_mv = { r, c }; \
211 thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, \
212 pre(y, y_stride, r, c), y_stride, sp(c), \
213 sp(r), second_pred, mask, mask_stride, \
214 invert_mask, w, h, &sse); \
215 v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
216 v += thismse; \
217 if (v < besterr) { \
218 besterr = v; \
219 br = r; \
220 bc = c; \
221 *distortion = thismse; \
222 *sse1 = sse; \
223 } \
224 } else { \
225 v = INT_MAX; \
David Barkerc155e012017-05-11 13:54:54 +0100226 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700227
228#define FIRST_LEVEL_CHECKS \
229 { \
230 unsigned int left, right, up, down, diag; \
231 CHECK_BETTER(left, tr, tc - hstep); \
232 CHECK_BETTER(right, tr, tc + hstep); \
233 CHECK_BETTER(up, tr - hstep, tc); \
234 CHECK_BETTER(down, tr + hstep, tc); \
235 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \
236 switch (whichdir) { \
237 case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
238 case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
239 case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
240 case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
241 } \
242 }
243
244#define SECOND_LEVEL_CHECKS \
245 { \
246 int kr, kc; \
247 unsigned int second; \
248 if (tr != br && tc != bc) { \
249 kr = br - tr; \
250 kc = bc - tc; \
251 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
252 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
253 } else if (tr == br && tc != bc) { \
254 kc = bc - tc; \
255 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
256 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
257 switch (whichdir) { \
258 case 0: \
259 case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
260 case 2: \
261 case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
262 } \
263 } else if (tr != br && tc == bc) { \
264 kr = br - tr; \
265 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
266 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
267 switch (whichdir) { \
268 case 0: \
269 case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
270 case 1: \
271 case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
272 } \
273 } \
274 }
275
276// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
277// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
278// later in the same way.
279#define SECOND_LEVEL_CHECKS_BEST(k) \
280 { \
281 unsigned int second; \
282 int br0 = br; \
283 int bc0 = bc; \
284 assert(tr == br || tc == bc); \
285 if (tr == br && tc != bc) { \
286 kc = bc - tc; \
287 } else if (tr != br && tc == bc) { \
288 kr = br - tr; \
289 } \
290 CHECK_BETTER##k(second, br0 + kr, bc0); \
291 CHECK_BETTER##k(second, br0, bc0 + kc); \
292 if (br0 != br || bc0 != bc) { \
293 CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
294 } \
295 }
296
Alex Converse0fa0f422017-04-24 12:51:14 -0700297#define SETUP_SUBPEL_SEARCH \
298 const uint8_t *const src_address = x->plane[0].src.buf; \
299 const int src_stride = x->plane[0].src.stride; \
300 const MACROBLOCKD *xd = &x->e_mbd; \
301 unsigned int besterr = INT_MAX; \
302 unsigned int sse; \
303 unsigned int whichdir; \
304 int thismse; \
305 MV *bestmv = &x->best_mv.as_mv; \
306 const unsigned int halfiters = iters_per_step; \
307 const unsigned int quarteriters = iters_per_step; \
308 const unsigned int eighthiters = iters_per_step; \
309 const int y_stride = xd->plane[0].pre[0].stride; \
310 const int offset = bestmv->row * y_stride + bestmv->col; \
311 const uint8_t *const y = xd->plane[0].pre[0].buf; \
312 \
313 int br = bestmv->row * 8; \
314 int bc = bestmv->col * 8; \
315 int hstep = 4; \
316 int minc, maxc, minr, maxr; \
317 int tr = br; \
318 int tc = bc; \
319 \
320 av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, \
321 ref_mv); \
322 \
323 bestmv->row *= 8; \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700324 bestmv->col *= 8;
325
326static unsigned int setup_center_error(
327 const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
Yaowu Xuf883b422016-08-30 14:01:10 -0700328 int error_per_bit, const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700329 const uint8_t *const src, const int src_stride, const uint8_t *const y,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200330 int y_stride, const uint8_t *second_pred, const uint8_t *mask,
331 int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost,
332 int *mvcost[2], unsigned int *sse1, int *distortion) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700333 unsigned int besterr;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200334#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700335 if (second_pred != NULL) {
336 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
337 DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
David Barkerc155e012017-05-11 13:54:54 +0100338 if (mask)
339 aom_highbd_comp_mask_pred(comp_pred16, second_pred, w, h, y + offset,
340 y_stride, mask, mask_stride, invert_mask);
341 else
David Barkerc155e012017-05-11 13:54:54 +0100342 aom_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
343 y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700344 besterr =
345 vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
346 } else {
347 DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
David Barkerc155e012017-05-11 13:54:54 +0100348 if (mask)
349 aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
350 mask, mask_stride, invert_mask);
351 else
Cheng Chenefc55fd2017-10-10 12:08:28 -0700352#if CONFIG_JNT_COMP
353 aom_comp_avg_pred_c(comp_pred, second_pred, w, h, y + offset, y_stride);
354#else
David Barkerc155e012017-05-11 13:54:54 +0100355 aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
Cheng Chenefc55fd2017-10-10 12:08:28 -0700356#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -0700357 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
358 }
359 } else {
360 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
361 }
362 *distortion = besterr;
363 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
364#else
365 (void)xd;
366 if (second_pred != NULL) {
367 DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
David Barkerc155e012017-05-11 13:54:54 +0100368 if (mask)
369 aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
370 mask, mask_stride, invert_mask);
371 else
Cheng Chenefc55fd2017-10-10 12:08:28 -0700372#if CONFIG_JNT_COMP
373 aom_comp_avg_pred_c(comp_pred, second_pred, w, h, y + offset, y_stride);
374#else
David Barkerc155e012017-05-11 13:54:54 +0100375 aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
Cheng Chenefc55fd2017-10-10 12:08:28 -0700376#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -0700377 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
378 } else {
379 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
380 }
381 *distortion = besterr;
382 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200383#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700384 return besterr;
385}
386
Yaowu Xu4ff59b52017-04-24 12:41:56 -0700387static INLINE int divide_and_round(int n, int d) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700388 return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
389}
390
391static INLINE int is_cost_list_wellbehaved(int *cost_list) {
392 return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
393 cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
394}
395
396// Returns surface minima estimate at given precision in 1/2^n bits.
397// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
398// For a given set of costs S0, S1, S2, S3, S4 at points
399// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
400// the solution for the location of the minima (x0, y0) is given by:
401// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
402// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
403// The code below is an integerized version of that.
404static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
405 *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
406 (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
407 *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
408 (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
409}
410
Yaowu Xuf883b422016-08-30 14:01:10 -0700411int av1_find_best_sub_pixel_tree_pruned_evenmore(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700412 MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -0700413 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700414 int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200415 unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
416 int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700417 SETUP_SUBPEL_SEARCH;
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200418 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
419 src_address, src_stride, y, y_stride,
420 second_pred, mask, mask_stride, invert_mask, w,
421 h, offset, mvjcost, mvcost, sse1, distortion);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700422 (void)halfiters;
423 (void)quarteriters;
424 (void)eighthiters;
425 (void)whichdir;
426 (void)allow_hp;
427 (void)forced_stop;
428 (void)hstep;
429 (void)use_upsampled_ref;
430
431 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
432 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
433 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
434 int ir, ic;
435 unsigned int minpt;
436 get_cost_surf_min(cost_list, &ir, &ic, 2);
437 if (ir != 0 || ic != 0) {
438 CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
439 }
440 } else {
441 FIRST_LEVEL_CHECKS;
442 if (halfiters > 1) {
443 SECOND_LEVEL_CHECKS;
444 }
445
446 tr = br;
447 tc = bc;
448
449 // Each subsequent iteration checks at least one point in common with
450 // the last iteration could be 2 ( if diag selected) 1/4 pel
451 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
452 if (forced_stop != 2) {
453 hstep >>= 1;
454 FIRST_LEVEL_CHECKS;
455 if (quarteriters > 1) {
456 SECOND_LEVEL_CHECKS;
457 }
458 }
459 }
460
461 tr = br;
462 tc = bc;
463
Alex Converse6317c882016-09-29 14:21:37 -0700464 if (allow_hp && forced_stop == 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700465 hstep >>= 1;
466 FIRST_LEVEL_CHECKS;
467 if (eighthiters > 1) {
468 SECOND_LEVEL_CHECKS;
469 }
470 }
471
472 bestmv->row = br;
473 bestmv->col = bc;
474
Yaowu Xuc27fc142016-08-22 16:08:15 -0700475 return besterr;
476}
477
Yaowu Xuf883b422016-08-30 14:01:10 -0700478int av1_find_best_sub_pixel_tree_pruned_more(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700479 MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -0700480 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700481 int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200482 unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
483 int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700484 SETUP_SUBPEL_SEARCH;
485 (void)use_upsampled_ref;
486
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200487 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
488 src_address, src_stride, y, y_stride,
489 second_pred, mask, mask_stride, invert_mask, w,
490 h, offset, mvjcost, mvcost, sse1, distortion);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700491 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
492 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
493 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
494 unsigned int minpt;
495 int ir, ic;
496 get_cost_surf_min(cost_list, &ir, &ic, 1);
497 if (ir != 0 || ic != 0) {
498 CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
499 }
500 } else {
501 FIRST_LEVEL_CHECKS;
502 if (halfiters > 1) {
503 SECOND_LEVEL_CHECKS;
504 }
505 }
506
507 // Each subsequent iteration checks at least one point in common with
508 // the last iteration could be 2 ( if diag selected) 1/4 pel
509
510 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
511 if (forced_stop != 2) {
512 tr = br;
513 tc = bc;
514 hstep >>= 1;
515 FIRST_LEVEL_CHECKS;
516 if (quarteriters > 1) {
517 SECOND_LEVEL_CHECKS;
518 }
519 }
520
Alex Converse6317c882016-09-29 14:21:37 -0700521 if (allow_hp && forced_stop == 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700522 tr = br;
523 tc = bc;
524 hstep >>= 1;
525 FIRST_LEVEL_CHECKS;
526 if (eighthiters > 1) {
527 SECOND_LEVEL_CHECKS;
528 }
529 }
530 // These lines insure static analysis doesn't warn that
531 // tr and tc aren't used after the above point.
532 (void)tr;
533 (void)tc;
534
535 bestmv->row = br;
536 bestmv->col = bc;
537
Yaowu Xuc27fc142016-08-22 16:08:15 -0700538 return besterr;
539}
540
Yaowu Xuf883b422016-08-30 14:01:10 -0700541int av1_find_best_sub_pixel_tree_pruned(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700542 MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -0700543 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700544 int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200545 unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
546 int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700547 SETUP_SUBPEL_SEARCH;
548 (void)use_upsampled_ref;
549
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200550 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
551 src_address, src_stride, y, y_stride,
552 second_pred, mask, mask_stride, invert_mask, w,
553 h, offset, mvjcost, mvcost, sse1, distortion);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700554 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
555 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
556 cost_list[4] != INT_MAX) {
557 unsigned int left, right, up, down, diag;
558 whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
559 (cost_list[2] < cost_list[4] ? 0 : 2);
560 switch (whichdir) {
561 case 0:
562 CHECK_BETTER(left, tr, tc - hstep);
563 CHECK_BETTER(down, tr + hstep, tc);
564 CHECK_BETTER(diag, tr + hstep, tc - hstep);
565 break;
566 case 1:
567 CHECK_BETTER(right, tr, tc + hstep);
568 CHECK_BETTER(down, tr + hstep, tc);
569 CHECK_BETTER(diag, tr + hstep, tc + hstep);
570 break;
571 case 2:
572 CHECK_BETTER(left, tr, tc - hstep);
573 CHECK_BETTER(up, tr - hstep, tc);
574 CHECK_BETTER(diag, tr - hstep, tc - hstep);
575 break;
576 case 3:
577 CHECK_BETTER(right, tr, tc + hstep);
578 CHECK_BETTER(up, tr - hstep, tc);
579 CHECK_BETTER(diag, tr - hstep, tc + hstep);
580 break;
581 }
582 } else {
583 FIRST_LEVEL_CHECKS;
584 if (halfiters > 1) {
585 SECOND_LEVEL_CHECKS;
586 }
587 }
588
589 tr = br;
590 tc = bc;
591
592 // Each subsequent iteration checks at least one point in common with
593 // the last iteration could be 2 ( if diag selected) 1/4 pel
594
595 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
596 if (forced_stop != 2) {
597 hstep >>= 1;
598 FIRST_LEVEL_CHECKS;
599 if (quarteriters > 1) {
600 SECOND_LEVEL_CHECKS;
601 }
602 tr = br;
603 tc = bc;
604 }
605
Alex Converse6317c882016-09-29 14:21:37 -0700606 if (allow_hp && forced_stop == 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700607 hstep >>= 1;
608 FIRST_LEVEL_CHECKS;
609 if (eighthiters > 1) {
610 SECOND_LEVEL_CHECKS;
611 }
612 tr = br;
613 tc = bc;
614 }
615 // These lines insure static analysis doesn't warn that
616 // tr and tc aren't used after the above point.
617 (void)tr;
618 (void)tc;
619
620 bestmv->row = br;
621 bestmv->col = bc;
622
Yaowu Xuc27fc142016-08-22 16:08:15 -0700623 return besterr;
624}
625
626/* clang-format off */
627static const MV search_step_table[12] = {
628 // left, right, up, down
629 { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
630 { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
631 { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
632};
633/* clang-format on */
634
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200635static int upsampled_pref_error(const MACROBLOCKD *xd,
636 const aom_variance_fn_ptr_t *vfp,
637 const uint8_t *const src, const int src_stride,
638 const uint8_t *const y, int y_stride,
639 int subpel_x_q3, int subpel_y_q3,
640 const uint8_t *second_pred, const uint8_t *mask,
641 int mask_stride, int invert_mask, int w, int h,
642 unsigned int *sse) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700643 unsigned int besterr;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200644#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700645 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
646 DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
David Barkerc155e012017-05-11 13:54:54 +0100647 if (second_pred != NULL) {
David Barkerc155e012017-05-11 13:54:54 +0100648 if (mask)
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700649 aom_highbd_comp_mask_upsampled_pred(
650 pred16, second_pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride,
Timothy B. Terriberry6d6e1702017-07-22 16:12:36 -0700651 mask, mask_stride, invert_mask, xd->bd);
David Barkerc155e012017-05-11 13:54:54 +0100652 else
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700653 aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h,
654 subpel_x_q3, subpel_y_q3, y,
Timothy B. Terriberry6d6e1702017-07-22 16:12:36 -0700655 y_stride, xd->bd);
David Barkerc155e012017-05-11 13:54:54 +0100656 } else {
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700657 aom_highbd_upsampled_pred(pred16, w, h, subpel_x_q3, subpel_y_q3, y,
Timothy B. Terriberry6d6e1702017-07-22 16:12:36 -0700658 y_stride, xd->bd);
David Barkerc155e012017-05-11 13:54:54 +0100659 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700660
661 besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse);
662 } else {
663 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
664#else
665 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
666 (void)xd;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200667#endif // CONFIG_HIGHBITDEPTH
David Barkerc155e012017-05-11 13:54:54 +0100668 if (second_pred != NULL) {
David Barkerc155e012017-05-11 13:54:54 +0100669 if (mask)
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700670 aom_comp_mask_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
671 subpel_y_q3, y, y_stride, mask,
David Barkerc155e012017-05-11 13:54:54 +0100672 mask_stride, invert_mask);
673 else
Cheng Chenefc55fd2017-10-10 12:08:28 -0700674#if CONFIG_JNT_COMP
675 aom_comp_avg_upsampled_pred_c(pred, second_pred, w, h, subpel_x_q3,
676 subpel_y_q3, y, y_stride);
677#else
678 aom_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
679 subpel_y_q3, y, y_stride);
680#endif // CONFIG_JNT_COMP
David Barkerc155e012017-05-11 13:54:54 +0100681 } else {
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700682 aom_upsampled_pred(pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride);
David Barkerc155e012017-05-11 13:54:54 +0100683 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700684
685 besterr = vfp->vf(pred, w, src, src_stride, sse);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200686#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700687 }
688#endif
689 return besterr;
690}
691
692static unsigned int upsampled_setup_center_error(
693 const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
Yaowu Xuf883b422016-08-30 14:01:10 -0700694 int error_per_bit, const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700695 const uint8_t *const src, const int src_stride, const uint8_t *const y,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200696 int y_stride, const uint8_t *second_pred, const uint8_t *mask,
697 int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost,
698 int *mvcost[2], unsigned int *sse1, int *distortion) {
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700699 unsigned int besterr = upsampled_pref_error(
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200700 xd, vfp, src, src_stride, y + offset, y_stride, 0, 0, second_pred, mask,
701 mask_stride, invert_mask, w, h, sse1);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700702 *distortion = besterr;
703 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
704 return besterr;
705}
706
David Barkerc155e012017-05-11 13:54:54 +0100707int av1_find_best_sub_pixel_tree(
708 MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
709 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
710 int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200711 unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
712 int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700713 const uint8_t *const src_address = x->plane[0].src.buf;
714 const int src_stride = x->plane[0].src.stride;
715 const MACROBLOCKD *xd = &x->e_mbd;
716 unsigned int besterr = INT_MAX;
717 unsigned int sse;
718 unsigned int thismse;
719 const int y_stride = xd->plane[0].pre[0].stride;
720 MV *bestmv = &x->best_mv.as_mv;
721 const int offset = bestmv->row * y_stride + bestmv->col;
722 const uint8_t *const y = xd->plane[0].pre[0].buf;
723
724 int br = bestmv->row * 8;
725 int bc = bestmv->col * 8;
726 int hstep = 4;
727 int iter, round = 3 - forced_stop;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700728 int tr = br;
729 int tc = bc;
730 const MV *search_step = search_step_table;
731 int idx, best_idx = -1;
732 unsigned int cost_array[5];
733 int kr, kc;
Yunqing Wang8e173422017-04-21 09:27:55 -0700734 int minc, maxc, minr, maxr;
735
Alex Converse0fa0f422017-04-24 12:51:14 -0700736 av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
737 ref_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700738
Alex Converse6317c882016-09-29 14:21:37 -0700739 if (!allow_hp)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700740 if (round == 3) round = 2;
741
742 bestmv->row *= 8;
743 bestmv->col *= 8;
744
745 // use_upsampled_ref can be 0 or 1
746 if (use_upsampled_ref)
747 besterr = upsampled_setup_center_error(
748 xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200749 y_stride, second_pred, mask, mask_stride, invert_mask, w, h, offset,
750 mvjcost, mvcost, sse1, distortion);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700751 else
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200752 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
753 src_address, src_stride, y, y_stride,
754 second_pred, mask, mask_stride, invert_mask, w,
755 h, offset, mvjcost, mvcost, sse1, distortion);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700756
757 (void)cost_list; // to silence compiler warning
758
759 for (iter = 0; iter < round; ++iter) {
760 // Check vertical and horizontal sub-pixel positions.
761 for (idx = 0; idx < 4; ++idx) {
762 tr = br + search_step[idx].row;
763 tc = bc + search_step[idx].col;
764 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
765 MV this_mv = { tr, tc };
766
767 if (use_upsampled_ref) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700768 thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700769 pre(y, y_stride, tr, tc), y_stride,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200770 sp(tc), sp(tr), second_pred, mask,
771 mask_stride, invert_mask, w, h, &sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700772 } else {
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700773 const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700774 if (second_pred == NULL)
775 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
776 src_address, src_stride, &sse);
David Barkerc155e012017-05-11 13:54:54 +0100777 else if (mask)
David Barkerf19f35f2017-05-22 16:33:22 +0100778 thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
779 src_address, src_stride, second_pred, mask,
780 mask_stride, invert_mask, &sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700781 else
782 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
783 src_address, src_stride, &sse, second_pred);
784 }
785
786 cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
787 mvcost, error_per_bit);
788
789 if (cost_array[idx] < besterr) {
790 best_idx = idx;
791 besterr = cost_array[idx];
792 *distortion = thismse;
793 *sse1 = sse;
794 }
795 } else {
796 cost_array[idx] = INT_MAX;
797 }
798 }
799
800 // Check diagonal sub-pixel position
801 kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
802 kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
803
804 tc = bc + kc;
805 tr = br + kr;
806 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
807 MV this_mv = { tr, tc };
808
809 if (use_upsampled_ref) {
David Barkerc155e012017-05-11 13:54:54 +0100810 thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700811 pre(y, y_stride, tr, tc), y_stride,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200812 sp(tc), sp(tr), second_pred, mask,
813 mask_stride, invert_mask, w, h, &sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700814 } else {
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700815 const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700816
817 if (second_pred == NULL)
818 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
819 src_stride, &sse);
David Barkerc155e012017-05-11 13:54:54 +0100820 else if (mask)
David Barkerf19f35f2017-05-22 16:33:22 +0100821 thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
822 src_address, src_stride, second_pred, mask,
823 mask_stride, invert_mask, &sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700824 else
825 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
826 src_address, src_stride, &sse, second_pred);
827 }
828
829 cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
830 error_per_bit);
831
832 if (cost_array[4] < besterr) {
833 best_idx = 4;
834 besterr = cost_array[4];
835 *distortion = thismse;
836 *sse1 = sse;
837 }
838 } else {
839 cost_array[idx] = INT_MAX;
840 }
841
842 if (best_idx < 4 && best_idx >= 0) {
843 br += search_step[best_idx].row;
844 bc += search_step[best_idx].col;
845 } else if (best_idx == 4) {
846 br = tr;
847 bc = tc;
848 }
849
850 if (iters_per_step > 1 && best_idx != -1) {
851 if (use_upsampled_ref) {
852 SECOND_LEVEL_CHECKS_BEST(1);
853 } else {
854 SECOND_LEVEL_CHECKS_BEST(0);
855 }
856 }
857
858 search_step += 4;
859 hstep >>= 1;
860 best_idx = -1;
861 }
862
863 // These lines insure static analysis doesn't warn that
864 // tr and tc aren't used after the above point.
865 (void)tr;
866 (void)tc;
867
868 bestmv->row = br;
869 bestmv->col = bc;
870
Yaowu Xuc27fc142016-08-22 16:08:15 -0700871 return besterr;
872}
873
874#undef PRE
875#undef CHECK_BETTER
876
Yunqing Wang68f3ccd2017-05-23 14:43:54 -0700877#if CONFIG_WARPED_MOTION
878unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
879 BLOCK_SIZE bsize, int mi_row, int mi_col,
880 const MV *this_mv) {
881 const AV1_COMMON *const cm = &cpi->common;
882 MACROBLOCKD *xd = &x->e_mbd;
883 MODE_INFO *mi = xd->mi[0];
884 MB_MODE_INFO *mbmi = &mi->mbmi;
885 const uint8_t *const src = x->plane[0].src.buf;
886 const int src_stride = x->plane[0].src.stride;
887 uint8_t *const dst = xd->plane[0].dst.buf;
888 const int dst_stride = xd->plane[0].dst.stride;
889 const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize];
890 const MV ref_mv = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
891 unsigned int mse;
892 unsigned int sse;
893
894 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
895 mse = vfp->vf(dst, dst_stride, src, src_stride, &sse);
896 mse +=
897 mv_err_cost(this_mv, &ref_mv, x->nmvjointcost, x->mvcost, x->errorperbit);
898 return mse;
899}
900
901// Refine MV in a small range
Yunqing Wang97d6a372017-10-09 14:15:15 -0700902#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -0700903unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
904 BLOCK_SIZE bsize, int mi_row, int mi_col,
905 int *pts0, int *pts_inref0, int *pts_mv0,
906 int total_samples) {
907#else
Yunqing Wang68f3ccd2017-05-23 14:43:54 -0700908unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
909 BLOCK_SIZE bsize, int mi_row, int mi_col,
910 int *pts, int *pts_inref) {
Yunqing Wang97d6a372017-10-09 14:15:15 -0700911#endif // CONFIG_EXT_WARPED_MOTION
Yunqing Wang68f3ccd2017-05-23 14:43:54 -0700912 const AV1_COMMON *const cm = &cpi->common;
913 MACROBLOCKD *xd = &x->e_mbd;
914 MODE_INFO *mi = xd->mi[0];
915 MB_MODE_INFO *mbmi = &mi->mbmi;
916 const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 },
917 { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } };
918 const MV ref_mv = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
919 int16_t br = mbmi->mv[0].as_mv.row;
920 int16_t bc = mbmi->mv[0].as_mv.col;
921 int16_t *tr = &mbmi->mv[0].as_mv.row;
922 int16_t *tc = &mbmi->mv[0].as_mv.col;
923 WarpedMotionParams best_wm_params = mbmi->wm_params[0];
Yunqing Wang97d6a372017-10-09 14:15:15 -0700924#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -0700925 int best_num_proj_ref = mbmi->num_proj_ref[0];
Yunqing Wang97d6a372017-10-09 14:15:15 -0700926#endif // CONFIG_EXT_WARPED_MOTION
Yunqing Wang68f3ccd2017-05-23 14:43:54 -0700927 unsigned int bestmse;
928 int minc, maxc, minr, maxr;
929 const int start = cm->allow_high_precision_mv ? 0 : 4;
930 int ite;
931
932 av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
933 &ref_mv);
934
935 // Calculate the center position's error
936 assert(bc >= minc && bc <= maxc && br >= minr && br <= maxr);
937 bestmse = av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col,
938 &mbmi->mv[0].as_mv);
939
940 // MV search
941 for (ite = 0; ite < 2; ++ite) {
942 int best_idx = -1;
943 int idx;
944
945 for (idx = start; idx < start + 4; ++idx) {
946 unsigned int thismse;
947
948 *tr = br + neighbors[idx].row;
949 *tc = bc + neighbors[idx].col;
950
951 if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) {
952 MV this_mv = { *tr, *tc };
Yunqing Wang97d6a372017-10-09 14:15:15 -0700953#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -0700954 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
955
956 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
957 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
958 if (total_samples > 1)
959 mbmi->num_proj_ref[0] =
960 sortSamples(pts_mv0, &this_mv, pts, pts_inref, total_samples);
Yunqing Wang97d6a372017-10-09 14:15:15 -0700961#endif // CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -0700962
Yunqing Wang68f3ccd2017-05-23 14:43:54 -0700963 if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, *tr,
964 *tc, &mbmi->wm_params[0], mi_row, mi_col)) {
965 thismse =
966 av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, &this_mv);
967
968 if (thismse < bestmse) {
969 best_idx = idx;
970 best_wm_params = mbmi->wm_params[0];
Yunqing Wang97d6a372017-10-09 14:15:15 -0700971#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -0700972 best_num_proj_ref = mbmi->num_proj_ref[0];
Yunqing Wang97d6a372017-10-09 14:15:15 -0700973#endif // CONFIG_EXT_WARPED_MOTION
Yunqing Wang68f3ccd2017-05-23 14:43:54 -0700974 bestmse = thismse;
975 }
976 }
977 }
978 }
979
980 if (best_idx == -1) break;
981
982 if (best_idx >= 0) {
983 br += neighbors[best_idx].row;
984 bc += neighbors[best_idx].col;
985 }
986 }
987
988 *tr = br;
989 *tc = bc;
990 mbmi->wm_params[0] = best_wm_params;
Yunqing Wang97d6a372017-10-09 14:15:15 -0700991#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -0700992 mbmi->num_proj_ref[0] = best_num_proj_ref;
Yunqing Wang97d6a372017-10-09 14:15:15 -0700993#endif // CONFIG_EXT_WARPED_MOTION
Yunqing Wang68f3ccd2017-05-23 14:43:54 -0700994 return bestmse;
995}
996#endif // CONFIG_WARPED_MOTION
997
Alex Converse0fa0f422017-04-24 12:51:14 -0700998static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700999 int range) {
Alex Converse0fa0f422017-04-24 12:51:14 -07001000 return ((row - range) >= mv_limits->row_min) &
1001 ((row + range) <= mv_limits->row_max) &
1002 ((col - range) >= mv_limits->col_min) &
1003 ((col + range) <= mv_limits->col_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001004}
1005
Alex Converse0fa0f422017-04-24 12:51:14 -07001006static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
1007 return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
1008 (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001009}
1010
1011#define CHECK_BETTER \
1012 { \
1013 if (thissad < bestsad) { \
1014 if (use_mvcost) \
1015 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
1016 if (thissad < bestsad) { \
1017 bestsad = thissad; \
1018 best_site = i; \
1019 } \
1020 } \
1021 }
1022
1023#define MAX_PATTERN_SCALES 11
1024#define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
1025#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
1026
1027// Calculate and return a sad+mvcost list around an integer best pel.
1028static INLINE void calc_int_cost_list(const MACROBLOCK *x,
1029 const MV *const ref_mv, int sadpb,
Yaowu Xuf883b422016-08-30 14:01:10 -07001030 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001031 const MV *best_mv, int *cost_list) {
1032 static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
1033 const struct buf_2d *const what = &x->plane[0].src;
1034 const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
1035 const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
1036 const int br = best_mv->row;
1037 const int bc = best_mv->col;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001038 int i;
1039 unsigned int sse;
Urvang Joshi454280d2016-10-14 16:51:44 -07001040 const MV this_mv = { br, bc };
Yaowu Xuc27fc142016-08-22 16:08:15 -07001041
Yaowu Xuc27fc142016-08-22 16:08:15 -07001042 cost_list[0] =
1043 fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
1044 in_what->stride, &sse) +
1045 mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
Alex Converse0fa0f422017-04-24 12:51:14 -07001046 if (check_bounds(&x->mv_limits, br, bc, 1)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001047 for (i = 0; i < 4; i++) {
Urvang Joshi454280d2016-10-14 16:51:44 -07001048 const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
Yaowu Xuc27fc142016-08-22 16:08:15 -07001049 cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
Urvang Joshi454280d2016-10-14 16:51:44 -07001050 get_buf_from_mv(in_what, &neighbor_mv),
Yaowu Xuc27fc142016-08-22 16:08:15 -07001051 in_what->stride, &sse) +
Urvang Joshi454280d2016-10-14 16:51:44 -07001052 mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001053 x->mvcost, x->errorperbit);
1054 }
1055 } else {
1056 for (i = 0; i < 4; i++) {
Urvang Joshi454280d2016-10-14 16:51:44 -07001057 const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
Alex Converse0fa0f422017-04-24 12:51:14 -07001058 if (!is_mv_in(&x->mv_limits, &neighbor_mv))
Yaowu Xuc27fc142016-08-22 16:08:15 -07001059 cost_list[i + 1] = INT_MAX;
1060 else
Urvang Joshi454280d2016-10-14 16:51:44 -07001061 cost_list[i + 1] =
1062 fn_ptr->vf(what->buf, what->stride,
1063 get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
1064 &sse) +
1065 mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
1066 x->errorperbit);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001067 }
1068 }
1069}
1070
1071static INLINE void calc_int_sad_list(const MACROBLOCK *x,
1072 const MV *const ref_mv, int sadpb,
Yaowu Xuf883b422016-08-30 14:01:10 -07001073 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001074 const MV *best_mv, int *cost_list,
1075 const int use_mvcost, const int bestsad) {
1076 static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
1077 const struct buf_2d *const what = &x->plane[0].src;
1078 const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
1079 const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
1080 int i;
1081 const int br = best_mv->row;
1082 const int bc = best_mv->col;
1083
1084 if (cost_list[0] == INT_MAX) {
1085 cost_list[0] = bestsad;
Alex Converse0fa0f422017-04-24 12:51:14 -07001086 if (check_bounds(&x->mv_limits, br, bc, 1)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001087 for (i = 0; i < 4; i++) {
1088 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1089 cost_list[i + 1] =
1090 fn_ptr->sdf(what->buf, what->stride,
1091 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1092 }
1093 } else {
1094 for (i = 0; i < 4; i++) {
1095 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
Alex Converse0fa0f422017-04-24 12:51:14 -07001096 if (!is_mv_in(&x->mv_limits, &this_mv))
Yaowu Xuc27fc142016-08-22 16:08:15 -07001097 cost_list[i + 1] = INT_MAX;
1098 else
1099 cost_list[i + 1] =
1100 fn_ptr->sdf(what->buf, what->stride,
1101 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1102 }
1103 }
1104 } else {
1105 if (use_mvcost) {
1106 for (i = 0; i < 4; i++) {
1107 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1108 if (cost_list[i + 1] != INT_MAX) {
1109 cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
1110 }
1111 }
1112 }
1113 }
1114}
1115
1116// Generic pattern search function that searches over multiple scales.
1117// Each scale can have a different number of candidates and shape of
1118// candidates as indicated in the num_candidates and candidates arrays
1119// passed into this function
1120//
clang-format55ce9e02017-02-15 22:27:12 -08001121static int pattern_search(
1122 MACROBLOCK *x, MV *start_mv, int search_param, int sad_per_bit,
1123 int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp,
1124 int use_mvcost, const MV *center_mv,
1125 const int num_candidates[MAX_PATTERN_SCALES],
1126 const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001127 const MACROBLOCKD *const xd = &x->e_mbd;
1128 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
1129 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
1130 };
1131 int i, s, t;
1132 const struct buf_2d *const what = &x->plane[0].src;
1133 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1134 const int last_is_4 = num_candidates[0] == 4;
1135 int br, bc;
1136 int bestsad = INT_MAX;
1137 int thissad;
1138 int k = -1;
1139 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
Debargha Mukherjee27be8742017-10-07 23:51:10 -07001140 assert(search_param < MAX_MVSEARCH_STEPS);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001141 int best_init_s = search_param_to_steps[search_param];
1142 // adjust ref_mv to make sure it is within MV range
Alex Converse0fa0f422017-04-24 12:51:14 -07001143 clamp_mv(start_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1144 x->mv_limits.row_min, x->mv_limits.row_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001145 br = start_mv->row;
1146 bc = start_mv->col;
1147 if (cost_list != NULL) {
1148 cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
1149 INT_MAX;
1150 }
1151
1152 // Work out the start point for the search
1153 bestsad = vfp->sdf(what->buf, what->stride,
1154 get_buf_from_mv(in_what, start_mv), in_what->stride) +
1155 mvsad_err_cost(x, start_mv, &fcenter_mv, sad_per_bit);
1156
1157 // Search all possible scales upto the search param around the center point
1158 // pick the scale of the point that is best as the starting scale of
1159 // further steps around it.
1160 if (do_init_search) {
1161 s = best_init_s;
1162 best_init_s = -1;
1163 for (t = 0; t <= s; ++t) {
1164 int best_site = -1;
Alex Converse0fa0f422017-04-24 12:51:14 -07001165 if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001166 for (i = 0; i < num_candidates[t]; i++) {
1167 const MV this_mv = { br + candidates[t][i].row,
1168 bc + candidates[t][i].col };
1169 thissad =
1170 vfp->sdf(what->buf, what->stride,
1171 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1172 CHECK_BETTER
1173 }
1174 } else {
1175 for (i = 0; i < num_candidates[t]; i++) {
1176 const MV this_mv = { br + candidates[t][i].row,
1177 bc + candidates[t][i].col };
Alex Converse0fa0f422017-04-24 12:51:14 -07001178 if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001179 thissad =
1180 vfp->sdf(what->buf, what->stride,
1181 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1182 CHECK_BETTER
1183 }
1184 }
1185 if (best_site == -1) {
1186 continue;
1187 } else {
1188 best_init_s = t;
1189 k = best_site;
1190 }
1191 }
1192 if (best_init_s != -1) {
1193 br += candidates[best_init_s][k].row;
1194 bc += candidates[best_init_s][k].col;
1195 }
1196 }
1197
1198 // If the center point is still the best, just skip this and move to
1199 // the refinement step.
1200 if (best_init_s != -1) {
1201 const int last_s = (last_is_4 && cost_list != NULL);
1202 int best_site = -1;
1203 s = best_init_s;
1204
1205 for (; s >= last_s; s--) {
1206 // No need to search all points the 1st time if initial search was used
1207 if (!do_init_search || s != best_init_s) {
Alex Converse0fa0f422017-04-24 12:51:14 -07001208 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001209 for (i = 0; i < num_candidates[s]; i++) {
1210 const MV this_mv = { br + candidates[s][i].row,
1211 bc + candidates[s][i].col };
1212 thissad =
1213 vfp->sdf(what->buf, what->stride,
1214 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1215 CHECK_BETTER
1216 }
1217 } else {
1218 for (i = 0; i < num_candidates[s]; i++) {
1219 const MV this_mv = { br + candidates[s][i].row,
1220 bc + candidates[s][i].col };
Alex Converse0fa0f422017-04-24 12:51:14 -07001221 if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001222 thissad =
1223 vfp->sdf(what->buf, what->stride,
1224 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1225 CHECK_BETTER
1226 }
1227 }
1228
1229 if (best_site == -1) {
1230 continue;
1231 } else {
1232 br += candidates[s][best_site].row;
1233 bc += candidates[s][best_site].col;
1234 k = best_site;
1235 }
1236 }
1237
1238 do {
1239 int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1240 best_site = -1;
1241 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1242 next_chkpts_indices[1] = k;
1243 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1244
Alex Converse0fa0f422017-04-24 12:51:14 -07001245 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001246 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1247 const MV this_mv = {
1248 br + candidates[s][next_chkpts_indices[i]].row,
1249 bc + candidates[s][next_chkpts_indices[i]].col
1250 };
1251 thissad =
1252 vfp->sdf(what->buf, what->stride,
1253 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1254 CHECK_BETTER
1255 }
1256 } else {
1257 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1258 const MV this_mv = {
1259 br + candidates[s][next_chkpts_indices[i]].row,
1260 bc + candidates[s][next_chkpts_indices[i]].col
1261 };
Alex Converse0fa0f422017-04-24 12:51:14 -07001262 if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001263 thissad =
1264 vfp->sdf(what->buf, what->stride,
1265 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1266 CHECK_BETTER
1267 }
1268 }
1269
1270 if (best_site != -1) {
1271 k = next_chkpts_indices[best_site];
1272 br += candidates[s][k].row;
1273 bc += candidates[s][k].col;
1274 }
1275 } while (best_site != -1);
1276 }
1277
1278 // Note: If we enter the if below, then cost_list must be non-NULL.
1279 if (s == 0) {
1280 cost_list[0] = bestsad;
1281 if (!do_init_search || s != best_init_s) {
Alex Converse0fa0f422017-04-24 12:51:14 -07001282 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001283 for (i = 0; i < num_candidates[s]; i++) {
1284 const MV this_mv = { br + candidates[s][i].row,
1285 bc + candidates[s][i].col };
1286 cost_list[i + 1] = thissad =
1287 vfp->sdf(what->buf, what->stride,
1288 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1289 CHECK_BETTER
1290 }
1291 } else {
1292 for (i = 0; i < num_candidates[s]; i++) {
1293 const MV this_mv = { br + candidates[s][i].row,
1294 bc + candidates[s][i].col };
Alex Converse0fa0f422017-04-24 12:51:14 -07001295 if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001296 cost_list[i + 1] = thissad =
1297 vfp->sdf(what->buf, what->stride,
1298 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1299 CHECK_BETTER
1300 }
1301 }
1302
1303 if (best_site != -1) {
1304 br += candidates[s][best_site].row;
1305 bc += candidates[s][best_site].col;
1306 k = best_site;
1307 }
1308 }
1309 while (best_site != -1) {
1310 int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1311 best_site = -1;
1312 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1313 next_chkpts_indices[1] = k;
1314 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1315 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
1316 cost_list[((k + 2) % 4) + 1] = cost_list[0];
1317 cost_list[0] = bestsad;
1318
Alex Converse0fa0f422017-04-24 12:51:14 -07001319 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001320 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1321 const MV this_mv = {
1322 br + candidates[s][next_chkpts_indices[i]].row,
1323 bc + candidates[s][next_chkpts_indices[i]].col
1324 };
1325 cost_list[next_chkpts_indices[i] + 1] = thissad =
1326 vfp->sdf(what->buf, what->stride,
1327 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1328 CHECK_BETTER
1329 }
1330 } else {
1331 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1332 const MV this_mv = {
1333 br + candidates[s][next_chkpts_indices[i]].row,
1334 bc + candidates[s][next_chkpts_indices[i]].col
1335 };
Alex Converse0fa0f422017-04-24 12:51:14 -07001336 if (!is_mv_in(&x->mv_limits, &this_mv)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001337 cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
1338 continue;
1339 }
1340 cost_list[next_chkpts_indices[i] + 1] = thissad =
1341 vfp->sdf(what->buf, what->stride,
1342 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1343 CHECK_BETTER
1344 }
1345 }
1346
1347 if (best_site != -1) {
1348 k = next_chkpts_indices[best_site];
1349 br += candidates[s][k].row;
1350 bc += candidates[s][k].col;
1351 }
1352 }
1353 }
1354 }
1355
1356 // Returns the one-away integer pel cost/sad around the best as follows:
1357 // cost_list[0]: cost/sad at the best integer pel
1358 // cost_list[1]: cost/sad at delta {0, -1} (left) from the best integer pel
1359 // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel
1360 // cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel
1361 // cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel
1362 if (cost_list) {
Urvang Joshi454280d2016-10-14 16:51:44 -07001363 const MV best_int_mv = { br, bc };
Yaowu Xuc27fc142016-08-22 16:08:15 -07001364 if (last_is_4) {
Urvang Joshi454280d2016-10-14 16:51:44 -07001365 calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001366 use_mvcost, bestsad);
1367 } else {
Urvang Joshi454280d2016-10-14 16:51:44 -07001368 calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv,
1369 cost_list);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001370 }
1371 }
1372 x->best_mv.as_mv.row = br;
1373 x->best_mv.as_mv.col = bc;
1374 return bestsad;
1375}
1376
Yaowu Xuf883b422016-08-30 14:01:10 -07001377int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
1378 const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
1379 int use_mvcost) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001380 const MACROBLOCKD *const xd = &x->e_mbd;
1381 const struct buf_2d *const what = &x->plane[0].src;
1382 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1383 const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1384 unsigned int unused;
1385
1386 return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
1387 in_what->stride, &unused) +
1388 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1389 x->errorperbit)
1390 : 0);
1391}
1392
Yaowu Xuf883b422016-08-30 14:01:10 -07001393int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
1394 const MV *center_mv, const uint8_t *second_pred,
1395 const aom_variance_fn_ptr_t *vfp, int use_mvcost) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001396 const MACROBLOCKD *const xd = &x->e_mbd;
1397 const struct buf_2d *const what = &x->plane[0].src;
1398 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1399 const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1400 unsigned int unused;
1401
1402 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
1403 what->buf, what->stride, &unused, second_pred) +
1404 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1405 x->errorperbit)
1406 : 0);
1407}
1408
David Barkerc155e012017-05-11 13:54:54 +01001409int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
1410 const MV *center_mv, const uint8_t *second_pred,
1411 const uint8_t *mask, int mask_stride,
1412 int invert_mask, const aom_variance_fn_ptr_t *vfp,
1413 int use_mvcost) {
1414 const MACROBLOCKD *const xd = &x->e_mbd;
1415 const struct buf_2d *const what = &x->plane[0].src;
1416 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1417 const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1418 unsigned int unused;
1419
David Barkerf19f35f2017-05-22 16:33:22 +01001420 return vfp->msvf(what->buf, what->stride, 0, 0,
1421 get_buf_from_mv(in_what, best_mv), in_what->stride,
1422 second_pred, mask, mask_stride, invert_mask, &unused) +
David Barkerc155e012017-05-11 13:54:54 +01001423 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1424 x->errorperbit)
1425 : 0);
1426}
David Barkerc155e012017-05-11 13:54:54 +01001427
Yaowu Xuf883b422016-08-30 14:01:10 -07001428int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
1429 int sad_per_bit, int do_init_search, int *cost_list,
1430 const aom_variance_fn_ptr_t *vfp, int use_mvcost,
1431 const MV *center_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001432 // First scale has 8-closest points, the rest have 6 points in hex shape
1433 // at increasing scales
1434 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
1435 6, 6, 6, 6, 6 };
1436 // Note that the largest candidate step at each scale is 2^scale
1437 /* clang-format off */
1438 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1439 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
1440 { -1, 0 } },
1441 { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
1442 { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
1443 { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
1444 { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
1445 { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
1446 { -32, 0 } },
1447 { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
1448 { -64, 0 } },
1449 { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
1450 { -128, 0 } },
1451 { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
1452 { -256, 0 } },
1453 { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
1454 { -512, 0 } },
1455 { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
1456 { -512, 1024 }, { -1024, 0 } },
1457 };
1458 /* clang-format on */
1459 return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1460 cost_list, vfp, use_mvcost, center_mv,
1461 hex_num_candidates, hex_candidates);
1462}
1463
1464static int bigdia_search(MACROBLOCK *x, MV *start_mv, int search_param,
1465 int sad_per_bit, int do_init_search, int *cost_list,
Yaowu Xuf883b422016-08-30 14:01:10 -07001466 const aom_variance_fn_ptr_t *vfp, int use_mvcost,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001467 const MV *center_mv) {
1468 // First scale has 4-closest points, the rest have 8 points in diamond
1469 // shape at increasing scales
1470 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
1471 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1472 };
1473 // Note that the largest candidate step at each scale is 2^scale
1474 /* clang-format off */
1475 static const MV
1476 bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1477 { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
1478 { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
1479 { -1, 1 }, { -2, 0 } },
1480 { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
1481 { -2, 2 }, { -4, 0 } },
1482 { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
1483 { -4, 4 }, { -8, 0 } },
1484 { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
1485 { -8, 8 }, { -16, 0 } },
1486 { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
1487 { 0, 32 }, { -16, 16 }, { -32, 0 } },
1488 { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
1489 { 0, 64 }, { -32, 32 }, { -64, 0 } },
1490 { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
1491 { 0, 128 }, { -64, 64 }, { -128, 0 } },
1492 { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
1493 { 0, 256 }, { -128, 128 }, { -256, 0 } },
1494 { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
1495 { 0, 512 }, { -256, 256 }, { -512, 0 } },
1496 { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
1497 { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } },
1498 };
1499 /* clang-format on */
1500 return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1501 cost_list, vfp, use_mvcost, center_mv,
1502 bigdia_num_candidates, bigdia_candidates);
1503}
1504
1505static int square_search(MACROBLOCK *x, MV *start_mv, int search_param,
1506 int sad_per_bit, int do_init_search, int *cost_list,
Yaowu Xuf883b422016-08-30 14:01:10 -07001507 const aom_variance_fn_ptr_t *vfp, int use_mvcost,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001508 const MV *center_mv) {
1509 // All scales have 8 closest points in square shape
1510 static const int square_num_candidates[MAX_PATTERN_SCALES] = {
1511 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1512 };
1513 // Note that the largest candidate step at each scale is 2^scale
1514 /* clang-format off */
1515 static const MV
1516 square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1517 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
1518 { -1, 1 }, { -1, 0 } },
1519 { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
1520 { -2, 2 }, { -2, 0 } },
1521 { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
1522 { -4, 4 }, { -4, 0 } },
1523 { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
1524 { -8, 8 }, { -8, 0 } },
1525 { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
1526 { 0, 16 }, { -16, 16 }, { -16, 0 } },
1527 { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
1528 { 0, 32 }, { -32, 32 }, { -32, 0 } },
1529 { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
1530 { 0, 64 }, { -64, 64 }, { -64, 0 } },
1531 { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
1532 { 0, 128 }, { -128, 128 }, { -128, 0 } },
1533 { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
1534 { 0, 256 }, { -256, 256 }, { -256, 0 } },
1535 { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
1536 { 0, 512 }, { -512, 512 }, { -512, 0 } },
1537 { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
1538 { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } },
1539 };
1540 /* clang-format on */
1541 return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1542 cost_list, vfp, use_mvcost, center_mv,
1543 square_num_candidates, square_candidates);
1544}
1545
1546static int fast_hex_search(MACROBLOCK *x, MV *ref_mv, int search_param,
1547 int sad_per_bit,
1548 int do_init_search, // must be zero for fast_hex
Yaowu Xuf883b422016-08-30 14:01:10 -07001549 int *cost_list, const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001550 int use_mvcost, const MV *center_mv) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001551 return av1_hex_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1552 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1553 center_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001554}
1555
1556static int fast_dia_search(MACROBLOCK *x, MV *ref_mv, int search_param,
1557 int sad_per_bit, int do_init_search, int *cost_list,
Yaowu Xuf883b422016-08-30 14:01:10 -07001558 const aom_variance_fn_ptr_t *vfp, int use_mvcost,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001559 const MV *center_mv) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001560 return bigdia_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
Yaowu Xuc27fc142016-08-22 16:08:15 -07001561 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1562 center_mv);
1563}
1564
1565#undef CHECK_BETTER
1566
1567// Exhuastive motion search around a given centre position with a given
1568// step size.
1569static int exhuastive_mesh_search(MACROBLOCK *x, MV *ref_mv, MV *best_mv,
1570 int range, int step, int sad_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -07001571 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001572 const MV *center_mv) {
1573 const MACROBLOCKD *const xd = &x->e_mbd;
1574 const struct buf_2d *const what = &x->plane[0].src;
1575 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1576 MV fcenter_mv = { center_mv->row, center_mv->col };
1577 unsigned int best_sad = INT_MAX;
1578 int r, c, i;
1579 int start_col, end_col, start_row, end_row;
1580 int col_step = (step > 1) ? step : 4;
1581
1582 assert(step >= 1);
1583
Alex Converse0fa0f422017-04-24 12:51:14 -07001584 clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1585 x->mv_limits.row_min, x->mv_limits.row_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001586 *best_mv = fcenter_mv;
1587 best_sad =
1588 fn_ptr->sdf(what->buf, what->stride,
1589 get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
1590 mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
Alex Converse0fa0f422017-04-24 12:51:14 -07001591 start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
1592 start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
1593 end_row = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.row);
1594 end_col = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.col);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001595
1596 for (r = start_row; r <= end_row; r += step) {
1597 for (c = start_col; c <= end_col; c += col_step) {
1598 // Step > 1 means we are not checking every location in this pass.
1599 if (step > 1) {
1600 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
1601 unsigned int sad =
1602 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
1603 in_what->stride);
1604 if (sad < best_sad) {
1605 sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1606 if (sad < best_sad) {
1607 best_sad = sad;
1608 x->second_best_mv.as_mv = *best_mv;
1609 *best_mv = mv;
1610 }
1611 }
1612 } else {
1613 // 4 sads in a single call if we are checking every location
1614 if (c + 3 <= end_col) {
1615 unsigned int sads[4];
1616 const uint8_t *addrs[4];
1617 for (i = 0; i < 4; ++i) {
1618 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1619 addrs[i] = get_buf_from_mv(in_what, &mv);
1620 }
1621 fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
1622
1623 for (i = 0; i < 4; ++i) {
1624 if (sads[i] < best_sad) {
1625 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1626 const unsigned int sad =
1627 sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1628 if (sad < best_sad) {
1629 best_sad = sad;
1630 x->second_best_mv.as_mv = *best_mv;
1631 *best_mv = mv;
1632 }
1633 }
1634 }
1635 } else {
1636 for (i = 0; i < end_col - c; ++i) {
1637 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1638 unsigned int sad =
1639 fn_ptr->sdf(what->buf, what->stride,
1640 get_buf_from_mv(in_what, &mv), in_what->stride);
1641 if (sad < best_sad) {
1642 sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1643 if (sad < best_sad) {
1644 best_sad = sad;
1645 x->second_best_mv.as_mv = *best_mv;
1646 *best_mv = mv;
1647 }
1648 }
1649 }
1650 }
1651 }
1652 }
1653 }
1654
1655 return best_sad;
1656}
1657
Yaowu Xuf883b422016-08-30 14:01:10 -07001658int av1_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg,
1659 MV *ref_mv, MV *best_mv, int search_param,
1660 int sad_per_bit, int *num00,
1661 const aom_variance_fn_ptr_t *fn_ptr,
1662 const MV *center_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001663 int i, j, step;
1664
1665 const MACROBLOCKD *const xd = &x->e_mbd;
1666 uint8_t *what = x->plane[0].src.buf;
1667 const int what_stride = x->plane[0].src.stride;
1668 const uint8_t *in_what;
1669 const int in_what_stride = xd->plane[0].pre[0].stride;
1670 const uint8_t *best_address;
1671
1672 unsigned int bestsad = INT_MAX;
1673 int best_site = 0;
1674 int last_site = 0;
1675
1676 int ref_row;
1677 int ref_col;
1678
1679 // search_param determines the length of the initial step and hence the number
1680 // of iterations.
1681 // 0 = initial step (MAX_FIRST_STEP) pel
1682 // 1 = (MAX_FIRST_STEP/2) pel,
1683 // 2 = (MAX_FIRST_STEP/4) pel...
1684 const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
1685 const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
1686
1687 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
Alex Converse0fa0f422017-04-24 12:51:14 -07001688 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1689 x->mv_limits.row_min, x->mv_limits.row_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001690 ref_row = ref_mv->row;
1691 ref_col = ref_mv->col;
1692 *num00 = 0;
1693 best_mv->row = ref_row;
1694 best_mv->col = ref_col;
1695
1696 // Work out the start point for the search
1697 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
1698 best_address = in_what;
1699
1700 // Check the starting position
1701 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1702 mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
1703
1704 i = 1;
1705
1706 for (step = 0; step < tot_steps; step++) {
1707 int all_in = 1, t;
1708
1709 // All_in is true if every one of the points we are checking are within
1710 // the bounds of the image.
Alex Converse0fa0f422017-04-24 12:51:14 -07001711 all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_limits.row_min);
1712 all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_limits.row_max);
1713 all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_limits.col_min);
1714 all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_limits.col_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001715
1716 // If all the pixels are within the bounds we don't check whether the
1717 // search point is valid in this loop, otherwise we check each point
1718 // for validity..
1719 if (all_in) {
1720 unsigned int sad_array[4];
1721
1722 for (j = 0; j < cfg->searches_per_step; j += 4) {
1723 unsigned char const *block_offset[4];
1724
1725 for (t = 0; t < 4; t++)
1726 block_offset[t] = ss[i + t].offset + best_address;
1727
1728 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1729 sad_array);
1730
1731 for (t = 0; t < 4; t++, i++) {
1732 if (sad_array[t] < bestsad) {
1733 const MV this_mv = { best_mv->row + ss[i].mv.row,
1734 best_mv->col + ss[i].mv.col };
1735 sad_array[t] +=
1736 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1737 if (sad_array[t] < bestsad) {
1738 bestsad = sad_array[t];
1739 best_site = i;
1740 }
1741 }
1742 }
1743 }
1744 } else {
1745 for (j = 0; j < cfg->searches_per_step; j++) {
1746 // Trap illegal vectors
1747 const MV this_mv = { best_mv->row + ss[i].mv.row,
1748 best_mv->col + ss[i].mv.col };
1749
Alex Converse0fa0f422017-04-24 12:51:14 -07001750 if (is_mv_in(&x->mv_limits, &this_mv)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001751 const uint8_t *const check_here = ss[i].offset + best_address;
1752 unsigned int thissad =
1753 fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1754
1755 if (thissad < bestsad) {
1756 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1757 if (thissad < bestsad) {
1758 bestsad = thissad;
1759 best_site = i;
1760 }
1761 }
1762 }
1763 i++;
1764 }
1765 }
1766 if (best_site != last_site) {
1767 x->second_best_mv.as_mv = *best_mv;
1768 best_mv->row += ss[best_site].mv.row;
1769 best_mv->col += ss[best_site].mv.col;
1770 best_address += ss[best_site].offset;
1771 last_site = best_site;
1772#if defined(NEW_DIAMOND_SEARCH)
1773 while (1) {
1774 const MV this_mv = { best_mv->row + ss[best_site].mv.row,
1775 best_mv->col + ss[best_site].mv.col };
Alex Converse0fa0f422017-04-24 12:51:14 -07001776 if (is_mv_in(&x->mv_limits, &this_mv)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001777 const uint8_t *const check_here = ss[best_site].offset + best_address;
1778 unsigned int thissad =
1779 fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1780 if (thissad < bestsad) {
1781 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1782 if (thissad < bestsad) {
1783 bestsad = thissad;
1784 best_mv->row += ss[best_site].mv.row;
1785 best_mv->col += ss[best_site].mv.col;
1786 best_address += ss[best_site].offset;
1787 continue;
1788 }
1789 }
1790 }
1791 break;
1792 }
1793#endif
1794 } else if (best_address == in_what) {
1795 (*num00)++;
1796 }
1797 }
1798 return bestsad;
1799}
1800
1801static int vector_match(int16_t *ref, int16_t *src, int bwl) {
1802 int best_sad = INT_MAX;
1803 int this_sad;
1804 int d;
1805 int center, offset = 0;
1806 int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
1807 for (d = 0; d <= bw; d += 16) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001808 this_sad = aom_vector_var(&ref[d], src, bwl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001809 if (this_sad < best_sad) {
1810 best_sad = this_sad;
1811 offset = d;
1812 }
1813 }
1814 center = offset;
1815
1816 for (d = -8; d <= 8; d += 16) {
1817 int this_pos = offset + d;
1818 // check limit
1819 if (this_pos < 0 || this_pos > bw) continue;
Yaowu Xuf883b422016-08-30 14:01:10 -07001820 this_sad = aom_vector_var(&ref[this_pos], src, bwl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001821 if (this_sad < best_sad) {
1822 best_sad = this_sad;
1823 center = this_pos;
1824 }
1825 }
1826 offset = center;
1827
1828 for (d = -4; d <= 4; d += 8) {
1829 int this_pos = offset + d;
1830 // check limit
1831 if (this_pos < 0 || this_pos > bw) continue;
Yaowu Xuf883b422016-08-30 14:01:10 -07001832 this_sad = aom_vector_var(&ref[this_pos], src, bwl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001833 if (this_sad < best_sad) {
1834 best_sad = this_sad;
1835 center = this_pos;
1836 }
1837 }
1838 offset = center;
1839
1840 for (d = -2; d <= 2; d += 4) {
1841 int this_pos = offset + d;
1842 // check limit
1843 if (this_pos < 0 || this_pos > bw) continue;
Yaowu Xuf883b422016-08-30 14:01:10 -07001844 this_sad = aom_vector_var(&ref[this_pos], src, bwl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001845 if (this_sad < best_sad) {
1846 best_sad = this_sad;
1847 center = this_pos;
1848 }
1849 }
1850 offset = center;
1851
1852 for (d = -1; d <= 1; d += 2) {
1853 int this_pos = offset + d;
1854 // check limit
1855 if (this_pos < 0 || this_pos > bw) continue;
Yaowu Xuf883b422016-08-30 14:01:10 -07001856 this_sad = aom_vector_var(&ref[this_pos], src, bwl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001857 if (this_sad < best_sad) {
1858 best_sad = this_sad;
1859 center = this_pos;
1860 }
1861 }
1862
1863 return (center - (bw >> 1));
1864}
1865
1866static const MV search_pos[4] = {
1867 { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
1868};
1869
Yaowu Xuf883b422016-08-30 14:01:10 -07001870unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
1871 BLOCK_SIZE bsize, int mi_row,
1872 int mi_col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001873 MACROBLOCKD *xd = &x->e_mbd;
1874 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1875 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
1876 DECLARE_ALIGNED(16, int16_t, hbuf[2 * MAX_SB_SIZE]);
1877 DECLARE_ALIGNED(16, int16_t, vbuf[2 * MAX_SB_SIZE]);
1878 DECLARE_ALIGNED(16, int16_t, src_hbuf[MAX_SB_SQUARE]);
1879 DECLARE_ALIGNED(16, int16_t, src_vbuf[MAX_SB_SQUARE]);
1880 int idx;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001881 const int src_stride = x->plane[0].src.stride;
1882 const int ref_stride = xd->plane[0].pre[0].stride;
1883 uint8_t const *ref_buf, *src_buf;
1884 MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
Urvang Joshi454280d2016-10-14 16:51:44 -07001885 unsigned int best_sad, tmp_sad, sad_arr[4];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001886 MV this_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001887 const YV12_BUFFER_CONFIG *scaled_ref_frame =
Yaowu Xuf883b422016-08-30 14:01:10 -07001888 av1_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001889
1890 if (scaled_ref_frame) {
1891 int i;
1892 // Swap out the reference frame for a version that's been scaled to
1893 // match the resolution of the current frame, allowing the existing
1894 // motion search code to be used without additional modifications.
1895 for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
Yaowu Xuf883b422016-08-30 14:01:10 -07001896 av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001897 }
1898
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001899#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001900 {
1901 unsigned int this_sad;
1902 tmp_mv->row = 0;
1903 tmp_mv->col = 0;
1904 this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
1905 xd->plane[0].pre[0].buf, ref_stride);
1906
1907 if (scaled_ref_frame) {
1908 int i;
1909 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
1910 }
1911 return this_sad;
1912 }
1913#endif
1914
Sebastien Alaiwanb507bf12017-05-09 17:21:17 +02001915 const int bw = 4 << b_width_log2_lookup[bsize];
1916 const int bh = 4 << b_height_log2_lookup[bsize];
1917 const int search_width = bw << 1;
1918 const int search_height = bh << 1;
1919 const int norm_factor = 3 + (bw >> 5);
1920
Yaowu Xuc27fc142016-08-22 16:08:15 -07001921 // Set up prediction 1-D reference set
1922 ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
1923 for (idx = 0; idx < search_width; idx += 16) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001924 aom_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001925 ref_buf += 16;
1926 }
1927
1928 ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
1929 for (idx = 0; idx < search_height; ++idx) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001930 vbuf[idx] = aom_int_pro_col(ref_buf, bw) >> norm_factor;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001931 ref_buf += ref_stride;
1932 }
1933
1934 // Set up src 1-D reference set
1935 for (idx = 0; idx < bw; idx += 16) {
1936 src_buf = x->plane[0].src.buf + idx;
Yaowu Xuf883b422016-08-30 14:01:10 -07001937 aom_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001938 }
1939
1940 src_buf = x->plane[0].src.buf;
1941 for (idx = 0; idx < bh; ++idx) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001942 src_vbuf[idx] = aom_int_pro_col(src_buf, bw) >> norm_factor;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001943 src_buf += src_stride;
1944 }
1945
1946 // Find the best match per 1-D search
1947 tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
1948 tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
1949
1950 this_mv = *tmp_mv;
1951 src_buf = x->plane[0].src.buf;
1952 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
1953 best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
1954
1955 {
1956 const uint8_t *const pos[4] = {
1957 ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
1958 };
1959
Urvang Joshi454280d2016-10-14 16:51:44 -07001960 cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, sad_arr);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001961 }
1962
1963 for (idx = 0; idx < 4; ++idx) {
Urvang Joshi454280d2016-10-14 16:51:44 -07001964 if (sad_arr[idx] < best_sad) {
1965 best_sad = sad_arr[idx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001966 tmp_mv->row = search_pos[idx].row + this_mv.row;
1967 tmp_mv->col = search_pos[idx].col + this_mv.col;
1968 }
1969 }
1970
Urvang Joshi454280d2016-10-14 16:51:44 -07001971 if (sad_arr[0] < sad_arr[3])
Yaowu Xuc27fc142016-08-22 16:08:15 -07001972 this_mv.row -= 1;
1973 else
1974 this_mv.row += 1;
1975
Urvang Joshi454280d2016-10-14 16:51:44 -07001976 if (sad_arr[1] < sad_arr[2])
Yaowu Xuc27fc142016-08-22 16:08:15 -07001977 this_mv.col -= 1;
1978 else
1979 this_mv.col += 1;
1980
1981 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
1982
1983 tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
1984 if (best_sad > tmp_sad) {
1985 *tmp_mv = this_mv;
1986 best_sad = tmp_sad;
1987 }
1988
1989 tmp_mv->row *= 8;
1990 tmp_mv->col *= 8;
1991
1992 if (scaled_ref_frame) {
1993 int i;
1994 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
1995 }
1996
1997 return best_sad;
1998}
1999
2000/* do_refine: If last step (1-away) of n-step search doesn't pick the center
2001 point as the best match, we will do a final 1-away diamond
2002 refining search */
Urvang Joshi52648442016-10-13 17:27:51 -07002003static int full_pixel_diamond(const AV1_COMP *const cpi, MACROBLOCK *x,
2004 MV *mvp_full, int step_param, int sadpb,
2005 int further_steps, int do_refine, int *cost_list,
Yaowu Xuf883b422016-08-30 14:01:10 -07002006 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002007 const MV *ref_mv) {
2008 MV temp_mv;
2009 int thissme, n, num00 = 0;
2010 int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
2011 step_param, sadpb, &n, fn_ptr, ref_mv);
2012 if (bestsme < INT_MAX)
Yaowu Xuf883b422016-08-30 14:01:10 -07002013 bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002014 x->best_mv.as_mv = temp_mv;
2015
2016 // If there won't be more n-step search, check to see if refining search is
2017 // needed.
2018 if (n > further_steps) do_refine = 0;
2019
2020 while (n < further_steps) {
2021 ++n;
2022
2023 if (num00) {
2024 num00--;
2025 } else {
2026 thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
2027 step_param + n, sadpb, &num00, fn_ptr,
2028 ref_mv);
2029 if (thissme < INT_MAX)
Yaowu Xuf883b422016-08-30 14:01:10 -07002030 thissme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002031
2032 // check to see if refining search is needed.
2033 if (num00 > further_steps - n) do_refine = 0;
2034
2035 if (thissme < bestsme) {
2036 bestsme = thissme;
2037 x->best_mv.as_mv = temp_mv;
2038 }
2039 }
2040 }
2041
2042 // final 1-away diamond refining search
2043 if (do_refine) {
2044 const int search_range = 8;
2045 MV best_mv = x->best_mv.as_mv;
Yaowu Xuf883b422016-08-30 14:01:10 -07002046 thissme = av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
2047 ref_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002048 if (thissme < INT_MAX)
Yaowu Xuf883b422016-08-30 14:01:10 -07002049 thissme = av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002050 if (thissme < bestsme) {
2051 bestsme = thissme;
2052 x->best_mv.as_mv = best_mv;
2053 }
2054 }
2055
2056 // Return cost list.
2057 if (cost_list) {
2058 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
2059 }
2060 return bestsme;
2061}
2062
2063#define MIN_RANGE 7
2064#define MAX_RANGE 256
2065#define MIN_INTERVAL 1
2066// Runs an limited range exhaustive mesh search using a pattern set
2067// according to the encode speed profile.
Urvang Joshi52648442016-10-13 17:27:51 -07002068static int full_pixel_exhaustive(const AV1_COMP *const cpi, MACROBLOCK *x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002069 const MV *centre_mv_full, int sadpb,
2070 int *cost_list,
Yaowu Xuf883b422016-08-30 14:01:10 -07002071 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002072 const MV *ref_mv, MV *dst_mv) {
2073 const SPEED_FEATURES *const sf = &cpi->sf;
2074 MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
2075 MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
2076 int bestsme;
2077 int i;
2078 int interval = sf->mesh_patterns[0].interval;
2079 int range = sf->mesh_patterns[0].range;
2080 int baseline_interval_divisor;
2081
2082 // Keep track of number of exhaustive calls (this frame in this thread).
2083 ++(*x->ex_search_count_ptr);
2084
2085 // Trap illegal values for interval and range for this function.
2086 if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
2087 (interval > range))
2088 return INT_MAX;
2089
2090 baseline_interval_divisor = range / interval;
2091
2092 // Check size of proposed first range against magnitude of the centre
2093 // value used as a starting point.
Yaowu Xuf883b422016-08-30 14:01:10 -07002094 range = AOMMAX(range, (5 * AOMMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
2095 range = AOMMIN(range, MAX_RANGE);
2096 interval = AOMMAX(interval, range / baseline_interval_divisor);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002097
2098 // initial search
2099 bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
2100 sadpb, fn_ptr, &temp_mv);
2101
2102 if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
2103 // Progressive searches with range and step size decreasing each time
2104 // till we reach a step size of 1. Then break out.
2105 for (i = 1; i < MAX_MESH_STEP; ++i) {
2106 // First pass with coarser step and longer range
2107 bestsme = exhuastive_mesh_search(
2108 x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
2109 sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
2110
2111 if (sf->mesh_patterns[i].interval == 1) break;
2112 }
2113 }
2114
2115 if (bestsme < INT_MAX)
Yaowu Xuf883b422016-08-30 14:01:10 -07002116 bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002117 *dst_mv = temp_mv;
2118
2119 // Return cost list.
2120 if (cost_list) {
2121 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
2122 }
2123 return bestsme;
2124}
2125
Yaowu Xuf883b422016-08-30 14:01:10 -07002126int av1_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
2127 int sad_per_bit, int distance,
2128 const aom_variance_fn_ptr_t *fn_ptr,
2129 const MV *center_mv, MV *best_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002130 int r, c;
2131 const MACROBLOCKD *const xd = &x->e_mbd;
2132 const struct buf_2d *const what = &x->plane[0].src;
2133 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
Alex Converse0fa0f422017-04-24 12:51:14 -07002134 const int row_min = AOMMAX(ref_mv->row - distance, x->mv_limits.row_min);
2135 const int row_max = AOMMIN(ref_mv->row + distance, x->mv_limits.row_max);
2136 const int col_min = AOMMAX(ref_mv->col - distance, x->mv_limits.col_min);
2137 const int col_max = AOMMIN(ref_mv->col + distance, x->mv_limits.col_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002138 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2139 int best_sad =
2140 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2141 in_what->stride) +
2142 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2143 *best_mv = *ref_mv;
2144
2145 for (r = row_min; r < row_max; ++r) {
2146 for (c = col_min; c < col_max; ++c) {
2147 const MV mv = { r, c };
2148 const int sad =
2149 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
2150 in_what->stride) +
2151 mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2152 if (sad < best_sad) {
2153 best_sad = sad;
2154 *best_mv = mv;
2155 }
2156 }
2157 }
2158 return best_sad;
2159}
2160
Yaowu Xuf883b422016-08-30 14:01:10 -07002161int av1_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
2162 int sad_per_bit, int distance,
2163 const aom_variance_fn_ptr_t *fn_ptr,
2164 const MV *center_mv, MV *best_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002165 int r;
2166 const MACROBLOCKD *const xd = &x->e_mbd;
2167 const struct buf_2d *const what = &x->plane[0].src;
2168 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
Alex Converse0fa0f422017-04-24 12:51:14 -07002169 const int row_min = AOMMAX(ref_mv->row - distance, x->mv_limits.row_min);
2170 const int row_max = AOMMIN(ref_mv->row + distance, x->mv_limits.row_max);
2171 const int col_min = AOMMAX(ref_mv->col - distance, x->mv_limits.col_min);
2172 const int col_max = AOMMIN(ref_mv->col + distance, x->mv_limits.col_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002173 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2174 unsigned int best_sad =
2175 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2176 in_what->stride) +
2177 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2178 *best_mv = *ref_mv;
2179
2180 for (r = row_min; r < row_max; ++r) {
2181 int c = col_min;
2182 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
2183
2184 if (fn_ptr->sdx3f != NULL) {
2185 while ((c + 2) < col_max) {
2186 int i;
2187 DECLARE_ALIGNED(16, uint32_t, sads[3]);
2188
2189 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
2190 sads);
2191
2192 for (i = 0; i < 3; ++i) {
2193 unsigned int sad = sads[i];
2194 if (sad < best_sad) {
2195 const MV mv = { r, c };
2196 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2197 if (sad < best_sad) {
2198 best_sad = sad;
2199 *best_mv = mv;
2200 }
2201 }
2202 ++check_here;
2203 ++c;
2204 }
2205 }
2206 }
2207
2208 while (c < col_max) {
2209 unsigned int sad =
2210 fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
2211 if (sad < best_sad) {
2212 const MV mv = { r, c };
2213 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2214 if (sad < best_sad) {
2215 best_sad = sad;
2216 *best_mv = mv;
2217 }
2218 }
2219 ++check_here;
2220 ++c;
2221 }
2222 }
2223
2224 return best_sad;
2225}
2226
Yaowu Xuf883b422016-08-30 14:01:10 -07002227int av1_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
2228 int sad_per_bit, int distance,
2229 const aom_variance_fn_ptr_t *fn_ptr,
2230 const MV *center_mv, MV *best_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002231 int r;
2232 const MACROBLOCKD *const xd = &x->e_mbd;
2233 const struct buf_2d *const what = &x->plane[0].src;
2234 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
Alex Converse0fa0f422017-04-24 12:51:14 -07002235 const int row_min = AOMMAX(ref_mv->row - distance, x->mv_limits.row_min);
2236 const int row_max = AOMMIN(ref_mv->row + distance, x->mv_limits.row_max);
2237 const int col_min = AOMMAX(ref_mv->col - distance, x->mv_limits.col_min);
2238 const int col_max = AOMMIN(ref_mv->col + distance, x->mv_limits.col_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002239 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2240 unsigned int best_sad =
2241 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2242 in_what->stride) +
2243 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2244 *best_mv = *ref_mv;
2245
2246 for (r = row_min; r < row_max; ++r) {
2247 int c = col_min;
2248 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
2249
2250 if (fn_ptr->sdx8f != NULL) {
2251 while ((c + 7) < col_max) {
2252 int i;
2253 DECLARE_ALIGNED(16, uint32_t, sads[8]);
2254
2255 fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
2256 sads);
2257
2258 for (i = 0; i < 8; ++i) {
2259 unsigned int sad = sads[i];
2260 if (sad < best_sad) {
2261 const MV mv = { r, c };
2262 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2263 if (sad < best_sad) {
2264 best_sad = sad;
2265 *best_mv = mv;
2266 }
2267 }
2268 ++check_here;
2269 ++c;
2270 }
2271 }
2272 }
2273
2274 if (fn_ptr->sdx3f != NULL) {
2275 while ((c + 2) < col_max) {
2276 int i;
2277 DECLARE_ALIGNED(16, uint32_t, sads[3]);
2278
2279 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
2280 sads);
2281
2282 for (i = 0; i < 3; ++i) {
2283 unsigned int sad = sads[i];
2284 if (sad < best_sad) {
2285 const MV mv = { r, c };
2286 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2287 if (sad < best_sad) {
2288 best_sad = sad;
2289 *best_mv = mv;
2290 }
2291 }
2292 ++check_here;
2293 ++c;
2294 }
2295 }
2296 }
2297
2298 while (c < col_max) {
2299 unsigned int sad =
2300 fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
2301 if (sad < best_sad) {
2302 const MV mv = { r, c };
2303 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2304 if (sad < best_sad) {
2305 best_sad = sad;
2306 *best_mv = mv;
2307 }
2308 }
2309 ++check_here;
2310 ++c;
2311 }
2312 }
2313
2314 return best_sad;
2315}
2316
Yaowu Xuf883b422016-08-30 14:01:10 -07002317int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit,
2318 int search_range,
2319 const aom_variance_fn_ptr_t *fn_ptr,
2320 const MV *center_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002321 const MACROBLOCKD *const xd = &x->e_mbd;
2322 const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2323 const struct buf_2d *const what = &x->plane[0].src;
2324 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2325 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2326 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
2327 unsigned int best_sad =
2328 fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
2329 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2330 int i, j;
2331
2332 for (i = 0; i < search_range; i++) {
2333 int best_site = -1;
Alex Converse0fa0f422017-04-24 12:51:14 -07002334 const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
2335 ((ref_mv->row + 1) < x->mv_limits.row_max) &
2336 ((ref_mv->col - 1) > x->mv_limits.col_min) &
2337 ((ref_mv->col + 1) < x->mv_limits.col_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002338
2339 if (all_in) {
2340 unsigned int sads[4];
2341 const uint8_t *const positions[4] = { best_address - in_what->stride,
2342 best_address - 1, best_address + 1,
2343 best_address + in_what->stride };
2344
2345 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
2346
2347 for (j = 0; j < 4; ++j) {
2348 if (sads[j] < best_sad) {
2349 const MV mv = { ref_mv->row + neighbors[j].row,
2350 ref_mv->col + neighbors[j].col };
2351 sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2352 if (sads[j] < best_sad) {
2353 best_sad = sads[j];
2354 best_site = j;
2355 }
2356 }
2357 }
2358 } else {
2359 for (j = 0; j < 4; ++j) {
2360 const MV mv = { ref_mv->row + neighbors[j].row,
2361 ref_mv->col + neighbors[j].col };
2362
Alex Converse0fa0f422017-04-24 12:51:14 -07002363 if (is_mv_in(&x->mv_limits, &mv)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002364 unsigned int sad =
2365 fn_ptr->sdf(what->buf, what->stride,
2366 get_buf_from_mv(in_what, &mv), in_what->stride);
2367 if (sad < best_sad) {
2368 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2369 if (sad < best_sad) {
2370 best_sad = sad;
2371 best_site = j;
2372 }
2373 }
2374 }
2375 }
2376 }
2377
2378 if (best_site == -1) {
2379 break;
2380 } else {
2381 x->second_best_mv.as_mv = *ref_mv;
2382 ref_mv->row += neighbors[best_site].row;
2383 ref_mv->col += neighbors[best_site].col;
2384 best_address = get_buf_from_mv(in_what, ref_mv);
2385 }
2386 }
2387
2388 return best_sad;
2389}
2390
2391// This function is called when we do joint motion search in comp_inter_inter
David Barkerf19f35f2017-05-22 16:33:22 +01002392// mode, or when searching for one component of an ext-inter compound mode.
Yaowu Xuf883b422016-08-30 14:01:10 -07002393int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
2394 const aom_variance_fn_ptr_t *fn_ptr,
David Barkerc155e012017-05-11 13:54:54 +01002395 const uint8_t *mask, int mask_stride,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02002396 int invert_mask, const MV *center_mv,
2397 const uint8_t *second_pred) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002398 const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
2399 { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
2400 const MACROBLOCKD *const xd = &x->e_mbd;
2401 const struct buf_2d *const what = &x->plane[0].src;
2402 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2403 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2404 MV *best_mv = &x->best_mv.as_mv;
Yunqing Wang37f86a32017-04-20 15:53:00 -07002405 unsigned int best_sad = INT_MAX;
2406 int i, j;
2407
Alex Converse0fa0f422017-04-24 12:51:14 -07002408 clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
2409 x->mv_limits.row_min, x->mv_limits.row_max);
David Barkerc155e012017-05-11 13:54:54 +01002410 if (mask)
David Barkerf19f35f2017-05-22 16:33:22 +01002411 best_sad = fn_ptr->msdf(what->buf, what->stride,
2412 get_buf_from_mv(in_what, best_mv), in_what->stride,
2413 second_pred, mask, mask_stride, invert_mask) +
David Barkerc155e012017-05-11 13:54:54 +01002414 mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
2415 else
David Barkerc155e012017-05-11 13:54:54 +01002416 best_sad =
2417 fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
2418 in_what->stride, second_pred) +
2419 mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002420
2421 for (i = 0; i < search_range; ++i) {
2422 int best_site = -1;
2423
2424 for (j = 0; j < 8; ++j) {
2425 const MV mv = { best_mv->row + neighbors[j].row,
2426 best_mv->col + neighbors[j].col };
2427
Alex Converse0fa0f422017-04-24 12:51:14 -07002428 if (is_mv_in(&x->mv_limits, &mv)) {
David Barkerc155e012017-05-11 13:54:54 +01002429 unsigned int sad;
David Barkerc155e012017-05-11 13:54:54 +01002430 if (mask)
David Barkerf19f35f2017-05-22 16:33:22 +01002431 sad = fn_ptr->msdf(what->buf, what->stride,
2432 get_buf_from_mv(in_what, &mv), in_what->stride,
2433 second_pred, mask, mask_stride, invert_mask);
David Barkerc155e012017-05-11 13:54:54 +01002434 else
David Barkerc155e012017-05-11 13:54:54 +01002435 sad = fn_ptr->sdaf(what->buf, what->stride,
2436 get_buf_from_mv(in_what, &mv), in_what->stride,
2437 second_pred);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002438 if (sad < best_sad) {
2439 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2440 if (sad < best_sad) {
2441 best_sad = sad;
2442 best_site = j;
2443 }
2444 }
2445 }
2446 }
2447
2448 if (best_site == -1) {
2449 break;
2450 } else {
2451 best_mv->row += neighbors[best_site].row;
2452 best_mv->col += neighbors[best_site].col;
2453 }
2454 }
2455 return best_sad;
2456}
2457
2458#define MIN_EX_SEARCH_LIMIT 128
Urvang Joshi52648442016-10-13 17:27:51 -07002459static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002460 const SPEED_FEATURES *const sf = &cpi->sf;
2461 const int max_ex =
Yaowu Xuf883b422016-08-30 14:01:10 -07002462 AOMMAX(MIN_EX_SEARCH_LIMIT,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002463 (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
2464
2465 return sf->allow_exhaustive_searches &&
2466 (sf->exhaustive_searches_thresh < INT_MAX) &&
2467 (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
2468}
2469
RogerZhoucc5d35d2017-08-07 22:20:15 -07002470#if CONFIG_HASH_ME
2471#define MAX_HASH_MV_TABLE_SIZE 5
2472static void add_to_sort_table(block_hash block_hashes[MAX_HASH_MV_TABLE_SIZE],
2473 int costs[MAX_HASH_MV_TABLE_SIZE], int *existing,
2474 int max_size, block_hash curr_block,
2475 int curr_cost) {
2476 if (*existing < max_size) {
2477 block_hashes[*existing] = curr_block;
2478 costs[*existing] = curr_cost;
2479 (*existing)++;
2480 } else {
2481 int max_cost = 0;
2482 int max_cost_idx = 0;
2483 for (int i = 0; i < max_size; i++) {
2484 if (costs[i] > max_cost) {
2485 max_cost = costs[i];
2486 max_cost_idx = i;
2487 }
2488 }
2489
2490 if (curr_cost < max_cost) {
2491 block_hashes[max_cost_idx] = curr_block;
2492 costs[max_cost_idx] = curr_cost;
2493 }
2494 }
2495}
2496#endif
2497
2498#if CONFIG_HASH_ME
2499int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
2500 MV *mvp_full, int step_param, int error_per_bit,
2501 int *cost_list, const MV *ref_mv, int var_max, int rd,
RogerZhoud15e7c12017-09-26 08:49:28 -07002502 int x_pos, int y_pos, int intra) {
RogerZhoucc5d35d2017-08-07 22:20:15 -07002503#else
Urvang Joshi52648442016-10-13 17:27:51 -07002504int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
Yaowu Xuf883b422016-08-30 14:01:10 -07002505 MV *mvp_full, int step_param, int error_per_bit,
2506 int *cost_list, const MV *ref_mv, int var_max,
2507 int rd) {
RogerZhoucc5d35d2017-08-07 22:20:15 -07002508#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002509 const SPEED_FEATURES *const sf = &cpi->sf;
2510 const SEARCH_METHODS method = sf->mv.search_method;
Urvang Joshi52648442016-10-13 17:27:51 -07002511 const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002512 int var = 0;
2513
2514 if (cost_list) {
2515 cost_list[0] = INT_MAX;
2516 cost_list[1] = INT_MAX;
2517 cost_list[2] = INT_MAX;
2518 cost_list[3] = INT_MAX;
2519 cost_list[4] = INT_MAX;
2520 }
2521
2522 // Keep track of number of searches (this frame in this thread).
2523 ++(*x->m_search_count_ptr);
2524
2525 switch (method) {
2526 case FAST_DIAMOND:
2527 var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
2528 cost_list, fn_ptr, 1, ref_mv);
2529 break;
2530 case FAST_HEX:
2531 var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
2532 cost_list, fn_ptr, 1, ref_mv);
2533 break;
2534 case HEX:
Yaowu Xuf883b422016-08-30 14:01:10 -07002535 var = av1_hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2536 fn_ptr, 1, ref_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002537 break;
2538 case SQUARE:
2539 var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2540 fn_ptr, 1, ref_mv);
2541 break;
2542 case BIGDIA:
2543 var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2544 fn_ptr, 1, ref_mv);
2545 break;
2546 case NSTEP:
2547 var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
2548 MAX_MVSEARCH_STEPS - 1 - step_param, 1,
2549 cost_list, fn_ptr, ref_mv);
2550
2551 // Should we allow a follow on exhaustive search?
2552 if (is_exhaustive_allowed(cpi, x)) {
Alex Converse6c5d6982017-04-20 14:48:58 -07002553 int exhuastive_thr = sf->exhaustive_searches_thresh;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002554 exhuastive_thr >>=
Alex Converse6c5d6982017-04-20 14:48:58 -07002555 10 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002556
2557 // Threshold variance for an exhaustive full search.
2558 if (var > exhuastive_thr) {
2559 int var_ex;
2560 MV tmp_mv_ex;
2561 var_ex =
2562 full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
2563 cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
2564
2565 if (var_ex < var) {
2566 var = var_ex;
2567 x->best_mv.as_mv = tmp_mv_ex;
2568 }
2569 }
2570 }
2571 break;
2572
2573 break;
2574 default: assert(0 && "Invalid search method.");
2575 }
2576
2577 if (method != NSTEP && rd && var < var_max)
Yaowu Xuf883b422016-08-30 14:01:10 -07002578 var = av1_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002579
RogerZhoucc5d35d2017-08-07 22:20:15 -07002580#if CONFIG_HASH_ME
2581 do {
2582 if (!cpi->common.allow_screen_content_tools) {
2583 break;
2584 }
2585 // already single ME
2586 // get block size and original buffer of current block
2587 const int block_height = block_size_high[bsize];
2588 const int block_width = block_size_wide[bsize];
2589 if (block_height == block_width && x_pos >= 0 && y_pos >= 0) {
RogerZhouca865462017-10-05 15:06:27 -07002590 if (block_width == 4 || block_width == 8 || block_width == 16 ||
2591 block_width == 32 || block_width == 64) {
RogerZhoucc5d35d2017-08-07 22:20:15 -07002592 uint8_t *what = x->plane[0].src.buf;
2593 const int what_stride = x->plane[0].src.stride;
2594 block_hash block_hashes[MAX_HASH_MV_TABLE_SIZE];
2595 int costs[MAX_HASH_MV_TABLE_SIZE];
2596 int existing = 0;
2597 int i;
2598 uint32_t hash_value1, hash_value2;
2599 MV best_hash_mv;
2600 int best_hash_cost = INT_MAX;
2601
2602 // for the hashMap
2603 hash_table *ref_frame_hash =
RogerZhoud15e7c12017-09-26 08:49:28 -07002604 intra ? &cpi->common.cur_frame->hash_table
2605 : get_ref_frame_hash_map(cpi,
2606 x->e_mbd.mi[0]->mbmi.ref_frame[0]);
RogerZhoucc5d35d2017-08-07 22:20:15 -07002607
2608 av1_get_block_hash_value(what, what_stride, block_width, &hash_value1,
2609 &hash_value2);
2610
2611 const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
RogerZhoud15e7c12017-09-26 08:49:28 -07002612 // for intra, at lest one matching can be found, itself.
RogerZhou2b6456c2017-10-05 21:08:21 -07002613 if (count <= (intra ? 1 : 0)) {
RogerZhoucc5d35d2017-08-07 22:20:15 -07002614 break;
2615 }
2616
2617 Iterator iterator =
2618 av1_hash_get_first_iterator(ref_frame_hash, hash_value1);
2619 for (i = 0; i < count; i++, iterator_increment(&iterator)) {
2620 block_hash ref_block_hash = *(block_hash *)(iterator_get(&iterator));
2621 if (hash_value2 == ref_block_hash.hash_value2) {
RogerZhoud15e7c12017-09-26 08:49:28 -07002622 // for intra, make sure the prediction is from valid area
2623 // not predict from current block.
2624 // TODO(roger): check if the constrain is necessary
2625 if (intra &&
2626 ref_block_hash.y + block_height >
2627 ((y_pos >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2) &&
2628 ref_block_hash.x + block_width >
2629 ((x_pos >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2)) {
2630 continue;
2631 }
RogerZhoucc5d35d2017-08-07 22:20:15 -07002632 int refCost =
2633 abs(ref_block_hash.x - x_pos) + abs(ref_block_hash.y - y_pos);
2634 add_to_sort_table(block_hashes, costs, &existing,
2635 MAX_HASH_MV_TABLE_SIZE, ref_block_hash, refCost);
2636 }
2637 }
2638
2639 if (existing == 0) {
2640 break;
2641 }
2642
2643 for (i = 0; i < existing; i++) {
2644 MV hash_mv;
2645 hash_mv.col = block_hashes[i].x - x_pos;
2646 hash_mv.row = block_hashes[i].y - y_pos;
2647 if (!is_mv_in(&x->mv_limits, &hash_mv)) {
2648 continue;
2649 }
2650 int currHashCost = av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1);
2651 if (currHashCost < best_hash_cost) {
2652 best_hash_cost = currHashCost;
2653 best_hash_mv = hash_mv;
2654 }
2655 }
2656
2657 if (best_hash_cost < var) {
2658 x->second_best_mv = x->best_mv;
2659 x->best_mv.as_mv = best_hash_mv;
2660 var = best_hash_cost;
2661 }
2662 }
2663 }
2664 } while (0);
2665#endif
2666
Yaowu Xuc27fc142016-08-22 16:08:15 -07002667 return var;
2668}
2669
Yaowu Xuc27fc142016-08-22 16:08:15 -07002670/* returns subpixel variance error function */
2671#define DIST(r, c) \
2672 vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
2673
2674/* checks if (r, c) has better score than previous best */
Yaowu Xu0c33b152017-05-08 11:54:10 -07002675#define MVC(r, c) \
2676 (unsigned int)(mvcost \
2677 ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
2678 mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \
2679 error_per_bit + \
2680 4096) >> \
2681 13 \
2682 : 0)
Yaowu Xuc27fc142016-08-22 16:08:15 -07002683
2684#define CHECK_BETTER(v, r, c) \
2685 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
2686 thismse = (DIST(r, c)); \
2687 if ((v = MVC(r, c) + thismse) < besterr) { \
2688 besterr = v; \
2689 br = r; \
2690 bc = c; \
2691 *distortion = thismse; \
2692 *sse1 = sse; \
2693 } \
2694 } else { \
2695 v = INT_MAX; \
2696 }
2697
2698#undef CHECK_BETTER0
2699#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
2700
2701#undef CHECK_BETTER1
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002702#define CHECK_BETTER1(v, r, c) \
2703 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
2704 thismse = \
2705 upsampled_obmc_pref_error(xd, mask, vfp, z, pre(y, y_stride, r, c), \
2706 y_stride, sp(c), sp(r), w, h, &sse); \
2707 if ((v = MVC(r, c) + thismse) < besterr) { \
2708 besterr = v; \
2709 br = r; \
2710 bc = c; \
2711 *distortion = thismse; \
2712 *sse1 = sse; \
2713 } \
2714 } else { \
2715 v = INT_MAX; \
Yaowu Xuc27fc142016-08-22 16:08:15 -07002716 }
2717
2718static unsigned int setup_obmc_center_error(
2719 const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -07002720 const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002721 const uint8_t *const y, int y_stride, int offset, int *mvjcost,
2722 int *mvcost[2], unsigned int *sse1, int *distortion) {
2723 unsigned int besterr;
2724 besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
2725 *distortion = besterr;
2726 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
2727 return besterr;
2728}
2729
2730static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, const int32_t *mask,
Yaowu Xuf883b422016-08-30 14:01:10 -07002731 const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002732 const int32_t *const wsrc,
2733 const uint8_t *const y, int y_stride,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002734 int subpel_x_q3, int subpel_y_q3, int w,
2735 int h, unsigned int *sse) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002736 unsigned int besterr;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002737#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002738 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2739 DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002740 aom_highbd_upsampled_pred(pred16, w, h, subpel_x_q3, subpel_y_q3, y,
Timothy B. Terriberry6d6e1702017-07-22 16:12:36 -07002741 y_stride, xd->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002742
2743 besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, mask, sse);
2744 } else {
2745 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
2746#else
2747 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
2748 (void)xd;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002749#endif // CONFIG_HIGHBITDEPTH
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002750 aom_upsampled_pred(pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002751
2752 besterr = vfp->ovf(pred, w, wsrc, mask, sse);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002753#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002754 }
2755#endif
2756 return besterr;
2757}
2758
2759static unsigned int upsampled_setup_obmc_center_error(
2760 const MACROBLOCKD *xd, const int32_t *mask, const MV *bestmv,
Yaowu Xuf883b422016-08-30 14:01:10 -07002761 const MV *ref_mv, int error_per_bit, const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002762 const int32_t *const wsrc, const uint8_t *const y, int y_stride, int w,
2763 int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
2764 int *distortion) {
2765 unsigned int besterr = upsampled_obmc_pref_error(
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002766 xd, mask, vfp, wsrc, y + offset, y_stride, 0, 0, w, h, sse1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002767 *distortion = besterr;
2768 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
2769 return besterr;
2770}
2771
Yaowu Xuf883b422016-08-30 14:01:10 -07002772int av1_find_best_obmc_sub_pixel_tree_up(
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002773 MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
2774 int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
2775 int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
2776 unsigned int *sse1, int is_second, int use_upsampled_ref) {
Yue Chene9638cc2016-10-10 12:37:54 -07002777 const int32_t *wsrc = x->wsrc_buf;
2778 const int32_t *mask = x->mask_buf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002779 const int *const z = wsrc;
2780 const int *const src_address = z;
2781 MACROBLOCKD *xd = &x->e_mbd;
2782 struct macroblockd_plane *const pd = &xd->plane[0];
2783 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2784 unsigned int besterr = INT_MAX;
2785 unsigned int sse;
2786 unsigned int thismse;
2787
2788 int rr = ref_mv->row;
2789 int rc = ref_mv->col;
2790 int br = bestmv->row * 8;
2791 int bc = bestmv->col * 8;
2792 int hstep = 4;
2793 int iter;
2794 int round = 3 - forced_stop;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002795 int tr = br;
2796 int tc = bc;
2797 const MV *search_step = search_step_table;
2798 int idx, best_idx = -1;
2799 unsigned int cost_array[5];
2800 int kr, kc;
Jingning Hanae5cfde2016-11-30 12:01:44 -08002801 const int w = block_size_wide[mbmi->sb_type];
2802 const int h = block_size_high[mbmi->sb_type];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002803 int offset;
2804 int y_stride;
2805 const uint8_t *y;
2806
Yunqing Wang8e173422017-04-21 09:27:55 -07002807 int minc, maxc, minr, maxr;
2808
Alex Converse0fa0f422017-04-24 12:51:14 -07002809 av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
2810 ref_mv);
Yunqing Wang8e173422017-04-21 09:27:55 -07002811
Yaowu Xuc27fc142016-08-22 16:08:15 -07002812 y = pd->pre[is_second].buf;
2813 y_stride = pd->pre[is_second].stride;
2814 offset = bestmv->row * y_stride + bestmv->col;
2815
Alex Converse6317c882016-09-29 14:21:37 -07002816 if (!allow_hp)
Yaowu Xuc27fc142016-08-22 16:08:15 -07002817 if (round == 3) round = 2;
2818
2819 bestmv->row *= 8;
2820 bestmv->col *= 8;
2821 // use_upsampled_ref can be 0 or 1
2822 if (use_upsampled_ref)
2823 besterr = upsampled_setup_obmc_center_error(
2824 xd, mask, bestmv, ref_mv, error_per_bit, vfp, z, y, y_stride, w, h,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002825 offset, mvjcost, mvcost, sse1, distortion);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002826 else
2827 besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
2828 z, y, y_stride, offset, mvjcost, mvcost,
2829 sse1, distortion);
2830
2831 for (iter = 0; iter < round; ++iter) {
2832 // Check vertical and horizontal sub-pixel positions.
2833 for (idx = 0; idx < 4; ++idx) {
2834 tr = br + search_step[idx].row;
2835 tc = bc + search_step[idx].col;
2836 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
2837 MV this_mv = { tr, tc };
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002838 const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002839
2840 if (use_upsampled_ref) {
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002841 thismse =
2842 upsampled_obmc_pref_error(xd, mask, vfp, src_address, pre_address,
2843 y_stride, sp(tc), sp(tr), w, h, &sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002844 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002845 thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
2846 src_address, mask, &sse);
2847 }
2848
2849 cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
2850 mvcost, error_per_bit);
2851 if (cost_array[idx] < besterr) {
2852 best_idx = idx;
2853 besterr = cost_array[idx];
2854 *distortion = thismse;
2855 *sse1 = sse;
2856 }
2857 } else {
2858 cost_array[idx] = INT_MAX;
2859 }
2860 }
2861
2862 // Check diagonal sub-pixel position
2863 kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
2864 kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
2865
2866 tc = bc + kc;
2867 tr = br + kr;
2868 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
2869 MV this_mv = { tr, tc };
2870
2871 if (use_upsampled_ref) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002872 thismse = upsampled_obmc_pref_error(xd, mask, vfp, src_address,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002873 pre(y, y_stride, tr, tc), y_stride,
2874 sp(tc), sp(tr), w, h, &sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002875 } else {
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07002876 thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr),
2877 src_address, mask, &sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002878 }
2879
2880 cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
2881 error_per_bit);
2882
2883 if (cost_array[4] < besterr) {
2884 best_idx = 4;
2885 besterr = cost_array[4];
2886 *distortion = thismse;
2887 *sse1 = sse;
2888 }
2889 } else {
2890 cost_array[idx] = INT_MAX;
2891 }
2892
2893 if (best_idx < 4 && best_idx >= 0) {
2894 br += search_step[best_idx].row;
2895 bc += search_step[best_idx].col;
2896 } else if (best_idx == 4) {
2897 br = tr;
2898 bc = tc;
2899 }
2900
2901 if (iters_per_step > 1 && best_idx != -1) {
2902 if (use_upsampled_ref) {
2903 SECOND_LEVEL_CHECKS_BEST(1);
2904 } else {
2905 SECOND_LEVEL_CHECKS_BEST(0);
2906 }
2907 }
2908
2909 tr = br;
2910 tc = bc;
2911
2912 search_step += 4;
2913 hstep >>= 1;
2914 best_idx = -1;
2915 }
2916
2917 // These lines insure static analysis doesn't warn that
2918 // tr and tc aren't used after the above point.
2919 (void)tr;
2920 (void)tc;
2921
2922 bestmv->row = br;
2923 bestmv->col = bc;
2924
Yaowu Xuc27fc142016-08-22 16:08:15 -07002925 return besterr;
2926}
2927
2928#undef DIST
2929#undef MVC
2930#undef CHECK_BETTER
2931
2932static int get_obmc_mvpred_var(const MACROBLOCK *x, const int32_t *wsrc,
2933 const int32_t *mask, const MV *best_mv,
2934 const MV *center_mv,
Yaowu Xuf883b422016-08-30 14:01:10 -07002935 const aom_variance_fn_ptr_t *vfp, int use_mvcost,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002936 int is_second) {
2937 const MACROBLOCKD *const xd = &x->e_mbd;
2938 const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2939 const MV mv = { best_mv->row * 8, best_mv->col * 8 };
2940 unsigned int unused;
2941
2942 return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride, wsrc,
2943 mask, &unused) +
2944 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
2945 x->errorperbit)
2946 : 0);
2947}
2948
2949int obmc_refining_search_sad(const MACROBLOCK *x, const int32_t *wsrc,
2950 const int32_t *mask, MV *ref_mv, int error_per_bit,
2951 int search_range,
Yaowu Xuf883b422016-08-30 14:01:10 -07002952 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002953 const MV *center_mv, int is_second) {
2954 const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2955 const MACROBLOCKD *const xd = &x->e_mbd;
2956 const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2957 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2958 unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
2959 in_what->stride, wsrc, mask) +
2960 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2961 int i, j;
2962
2963 for (i = 0; i < search_range; i++) {
2964 int best_site = -1;
2965
2966 for (j = 0; j < 4; j++) {
2967 const MV mv = { ref_mv->row + neighbors[j].row,
2968 ref_mv->col + neighbors[j].col };
Alex Converse0fa0f422017-04-24 12:51:14 -07002969 if (is_mv_in(&x->mv_limits, &mv)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002970 unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
2971 in_what->stride, wsrc, mask);
2972 if (sad < best_sad) {
2973 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2974 if (sad < best_sad) {
2975 best_sad = sad;
2976 best_site = j;
2977 }
2978 }
2979 }
2980 }
2981
2982 if (best_site == -1) {
2983 break;
2984 } else {
2985 ref_mv->row += neighbors[best_site].row;
2986 ref_mv->col += neighbors[best_site].col;
2987 }
2988 }
2989 return best_sad;
2990}
2991
2992int obmc_diamond_search_sad(const MACROBLOCK *x, const search_site_config *cfg,
2993 const int32_t *wsrc, const int32_t *mask,
2994 MV *ref_mv, MV *best_mv, int search_param,
2995 int sad_per_bit, int *num00,
Yaowu Xuf883b422016-08-30 14:01:10 -07002996 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002997 const MV *center_mv, int is_second) {
2998 const MACROBLOCKD *const xd = &x->e_mbd;
2999 const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
3000 // search_param determines the length of the initial step and hence the number
3001 // of iterations
3002 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
3003 // (MAX_FIRST_STEP/4) pel... etc.
3004 const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
3005 const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
3006 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
3007 const uint8_t *best_address, *in_what_ref;
3008 int best_sad = INT_MAX;
3009 int best_site = 0;
3010 int last_site = 0;
3011 int i, j, step;
3012
Alex Converse0fa0f422017-04-24 12:51:14 -07003013 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
3014 x->mv_limits.row_min, x->mv_limits.row_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003015 in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col;
3016 best_address = in_what_ref;
3017 *num00 = 0;
3018 *best_mv = *ref_mv;
3019
3020 // Check the starting position
3021 best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) +
3022 mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
3023
3024 i = 1;
3025
3026 for (step = 0; step < tot_steps; step++) {
3027 for (j = 0; j < cfg->searches_per_step; j++) {
3028 const MV mv = { best_mv->row + ss[i].mv.row,
3029 best_mv->col + ss[i].mv.col };
Alex Converse0fa0f422017-04-24 12:51:14 -07003030 if (is_mv_in(&x->mv_limits, &mv)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003031 int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
3032 wsrc, mask);
3033 if (sad < best_sad) {
3034 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
3035 if (sad < best_sad) {
3036 best_sad = sad;
3037 best_site = i;
3038 }
3039 }
3040 }
3041
3042 i++;
3043 }
3044
3045 if (best_site != last_site) {
3046 best_mv->row += ss[best_site].mv.row;
3047 best_mv->col += ss[best_site].mv.col;
3048 best_address += ss[best_site].offset;
3049 last_site = best_site;
3050#if defined(NEW_DIAMOND_SEARCH)
3051 while (1) {
3052 const MV this_mv = { best_mv->row + ss[best_site].mv.row,
3053 best_mv->col + ss[best_site].mv.col };
Alex Converse0fa0f422017-04-24 12:51:14 -07003054 if (is_mv_in(&x->mv_limits, &this_mv)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003055 int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
3056 in_what->stride, wsrc, mask);
3057 if (sad < best_sad) {
3058 sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
3059 if (sad < best_sad) {
3060 best_sad = sad;
3061 best_mv->row += ss[best_site].mv.row;
3062 best_mv->col += ss[best_site].mv.col;
3063 best_address += ss[best_site].offset;
3064 continue;
3065 }
3066 }
3067 }
3068 break;
3069 }
3070#endif
3071 } else if (best_address == in_what_ref) {
3072 (*num00)++;
3073 }
3074 }
3075 return best_sad;
3076}
3077
Yaowu Xuf883b422016-08-30 14:01:10 -07003078int av1_obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
Yaowu Xuf883b422016-08-30 14:01:10 -07003079 MV *mvp_full, int step_param, int sadpb,
3080 int further_steps, int do_refine,
3081 const aom_variance_fn_ptr_t *fn_ptr,
3082 const MV *ref_mv, MV *dst_mv, int is_second) {
Yue Chene9638cc2016-10-10 12:37:54 -07003083 const int32_t *wsrc = x->wsrc_buf;
3084 const int32_t *mask = x->mask_buf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003085 MV temp_mv;
3086 int thissme, n, num00 = 0;
3087 int bestsme =
3088 obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full, &temp_mv,
3089 step_param, sadpb, &n, fn_ptr, ref_mv, is_second);
3090 if (bestsme < INT_MAX)
3091 bestsme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1,
3092 is_second);
3093 *dst_mv = temp_mv;
3094
3095 // If there won't be more n-step search, check to see if refining search is
3096 // needed.
3097 if (n > further_steps) do_refine = 0;
3098
3099 while (n < further_steps) {
3100 ++n;
3101
3102 if (num00) {
3103 num00--;
3104 } else {
3105 thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full,
3106 &temp_mv, step_param + n, sadpb, &num00,
3107 fn_ptr, ref_mv, is_second);
3108 if (thissme < INT_MAX)
3109 thissme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr,
3110 1, is_second);
3111
3112 // check to see if refining search is needed.
3113 if (num00 > further_steps - n) do_refine = 0;
3114
3115 if (thissme < bestsme) {
3116 bestsme = thissme;
3117 *dst_mv = temp_mv;
3118 }
3119 }
3120 }
3121
3122 // final 1-away diamond refining search
3123 if (do_refine) {
3124 const int search_range = 8;
3125 MV best_mv = *dst_mv;
3126 thissme = obmc_refining_search_sad(x, wsrc, mask, &best_mv, sadpb,
3127 search_range, fn_ptr, ref_mv, is_second);
3128 if (thissme < INT_MAX)
3129 thissme = get_obmc_mvpred_var(x, wsrc, mask, &best_mv, ref_mv, fn_ptr, 1,
3130 is_second);
3131 if (thissme < bestsme) {
3132 bestsme = thissme;
3133 *dst_mv = best_mv;
3134 }
3135 }
3136 return bestsme;
3137}
Yunqing Wangff4fa062017-04-21 10:56:08 -07003138
3139// Note(yunqingwang): The following 2 functions are only used in the motion
3140// vector unit test, which return extreme motion vectors allowed by the MV
3141// limits.
3142#define COMMON_MV_TEST \
3143 SETUP_SUBPEL_SEARCH; \
3144 \
3145 (void)error_per_bit; \
3146 (void)vfp; \
3147 (void)src_address; \
3148 (void)src_stride; \
3149 (void)y; \
3150 (void)y_stride; \
3151 (void)second_pred; \
3152 (void)w; \
3153 (void)h; \
3154 (void)use_upsampled_ref; \
3155 (void)offset; \
3156 (void)mvjcost; \
3157 (void)mvcost; \
3158 (void)sse1; \
3159 (void)distortion; \
3160 \
3161 (void)halfiters; \
3162 (void)quarteriters; \
3163 (void)eighthiters; \
3164 (void)whichdir; \
3165 (void)forced_stop; \
3166 (void)hstep; \
3167 \
3168 (void)tr; \
3169 (void)tc; \
3170 (void)sse; \
3171 (void)thismse; \
3172 (void)cost_list;
3173// Return the maximum MV.
David Barkerc155e012017-05-11 13:54:54 +01003174int av1_return_max_sub_pixel_mv(
3175 MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
3176 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
3177 int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02003178 unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
3179 int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
Yunqing Wangff4fa062017-04-21 10:56:08 -07003180 COMMON_MV_TEST;
David Barkerc155e012017-05-11 13:54:54 +01003181 (void)mask;
3182 (void)mask_stride;
3183 (void)invert_mask;
Yunqing Wangff4fa062017-04-21 10:56:08 -07003184 (void)minr;
3185 (void)minc;
3186 bestmv->row = maxr;
3187 bestmv->col = maxc;
3188 besterr = 0;
RogerZhou3b635242017-09-19 10:06:46 -07003189// In the sub-pel motion search, if hp is not used, then the last bit of mv
3190// has to be 0.
3191#if CONFIG_AMVR
3192 lower_mv_precision(bestmv, allow_hp, 0);
3193#else
Yunqing Wangff4fa062017-04-21 10:56:08 -07003194 lower_mv_precision(bestmv, allow_hp);
RogerZhou3b635242017-09-19 10:06:46 -07003195#endif
Yunqing Wangff4fa062017-04-21 10:56:08 -07003196 return besterr;
3197}
3198// Return the minimum MV.
David Barkerc155e012017-05-11 13:54:54 +01003199int av1_return_min_sub_pixel_mv(
3200 MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
3201 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
3202 int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02003203 unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
3204 int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
Yunqing Wangff4fa062017-04-21 10:56:08 -07003205 COMMON_MV_TEST;
3206 (void)maxr;
3207 (void)maxc;
David Barkerc155e012017-05-11 13:54:54 +01003208 (void)mask;
3209 (void)mask_stride;
3210 (void)invert_mask;
Yunqing Wangff4fa062017-04-21 10:56:08 -07003211 bestmv->row = minr;
3212 bestmv->col = minc;
3213 besterr = 0;
RogerZhou3b635242017-09-19 10:06:46 -07003214// In the sub-pel motion search, if hp is not used, then the last bit of mv
3215// has to be 0.
3216#if CONFIG_AMVR
3217 lower_mv_precision(bestmv, allow_hp, 0);
3218#else
Yunqing Wangff4fa062017-04-21 10:56:08 -07003219 lower_mv_precision(bestmv, allow_hp);
RogerZhou3b635242017-09-19 10:06:46 -07003220#endif
Yunqing Wangff4fa062017-04-21 10:56:08 -07003221 return besterr;
3222}