blob: af112e5d115d94d4a71ed1cfa12804d918c20576 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <limits.h>
13#include <math.h>
14#include <stdio.h>
15
Yaowu Xuf883b422016-08-30 14:01:10 -070016#include "./aom_config.h"
17#include "./aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070018
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "aom_dsp/aom_dsp_common.h"
20#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070021#include "aom_ports/mem.h"
22
23#include "av1/common/common.h"
24#include "av1/common/reconinter.h"
25
26#include "av1/encoder/encoder.h"
27#include "av1/encoder/mcomp.h"
28#include "av1/encoder/rdopt.h"
29
30// #define NEW_DIAMOND_SEARCH
31
32static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
33 const MV *mv) {
34 return &buf->buf[mv->row * buf->stride + mv->col];
35}
36
Yaowu Xuf883b422016-08-30 14:01:10 -070037void av1_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070038 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
39 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
40 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
41 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
42
Yaowu Xuf883b422016-08-30 14:01:10 -070043 col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
44 row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
45 col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
46 row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -070047
48 // Get intersection of UMV window and valid MV window to reduce # of checks
49 // in diamond search.
50 if (x->mv_col_min < col_min) x->mv_col_min = col_min;
51 if (x->mv_col_max > col_max) x->mv_col_max = col_max;
52 if (x->mv_row_min < row_min) x->mv_row_min = row_min;
53 if (x->mv_row_max > row_max) x->mv_row_max = row_max;
54}
55
Yaowu Xuf883b422016-08-30 14:01:10 -070056int av1_init_search_range(int size) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070057 int sr = 0;
58 // Minimum search size no matter what the passed in value.
Yaowu Xuf883b422016-08-30 14:01:10 -070059 size = AOMMAX(16, size);
Yaowu Xuc27fc142016-08-22 16:08:15 -070060
61 while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
62
Yaowu Xuf883b422016-08-30 14:01:10 -070063 sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070064 return sr;
65}
66
67static INLINE int mv_cost(const MV *mv, const int *joint_cost,
68 int *const comp_cost[2]) {
Yaowu Xuf883b422016-08-30 14:01:10 -070069 return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
Yaowu Xuc27fc142016-08-22 16:08:15 -070070 comp_cost[1][mv->col];
71}
72
Yaowu Xuf883b422016-08-30 14:01:10 -070073int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
74 int *mvcost[2], int weight) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070075 const MV diff = { mv->row - ref->row, mv->col - ref->col };
76 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
77}
78
79#define PIXEL_TRANSFORM_ERROR_SCALE 4
80static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
81 int *mvcost[2], int error_per_bit) {
82 if (mvcost) {
83 const MV diff = { mv->row - ref->row, mv->col - ref->col };
84 // This product sits at a 32-bit ceiling right now and any additional
85 // accuracy in either bit cost or error cost will cause it to overflow.
86 return ROUND_POWER_OF_TWO(
87 (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -070088 RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT +
Yaowu Xuc27fc142016-08-22 16:08:15 -070089 PIXEL_TRANSFORM_ERROR_SCALE);
90 }
91 return 0;
92}
93
94static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
95 int sad_per_bit) {
96 const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
97 return ROUND_POWER_OF_TWO(
98 (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->mvsadcost) * sad_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -070099 AV1_PROB_COST_SHIFT);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700100}
101
Yaowu Xuf883b422016-08-30 14:01:10 -0700102void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700103 int len, ss_count = 1;
104
105 cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
106 cfg->ss[0].offset = 0;
107
108 for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
109 // Generate offsets for 4 search sites per step.
110 const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
111 int i;
112 for (i = 0; i < 4; ++i) {
113 search_site *const ss = &cfg->ss[ss_count++];
114 ss->mv = ss_mvs[i];
115 ss->offset = ss->mv.row * stride + ss->mv.col;
116 }
117 }
118
119 cfg->ss_count = ss_count;
120 cfg->searches_per_step = 4;
121}
122
Yaowu Xuf883b422016-08-30 14:01:10 -0700123void av1_init3smotion_compensation(search_site_config *cfg, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700124 int len, ss_count = 1;
125
126 cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
127 cfg->ss[0].offset = 0;
128
129 for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
130 // Generate offsets for 8 search sites per step.
131 const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len },
132 { 0, len }, { -len, -len }, { -len, len },
133 { len, -len }, { len, len } };
134 int i;
135 for (i = 0; i < 8; ++i) {
136 search_site *const ss = &cfg->ss[ss_count++];
137 ss->mv = ss_mvs[i];
138 ss->offset = ss->mv.row * stride + ss->mv.col;
139 }
140 }
141
142 cfg->ss_count = ss_count;
143 cfg->searches_per_step = 8;
144}
145
146/*
147 * To avoid the penalty for crossing cache-line read, preload the reference
148 * area in a small buffer, which is aligned to make sure there won't be crossing
149 * cache-line read while reading from this buffer. This reduced the cpu
150 * cycles spent on reading ref data in sub-pixel filter functions.
151 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
152 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
153 * could reduce the area.
154 */
155
156// convert motion vector component to offset for sv[a]f calc
157static INLINE int sp(int x) { return x & 7; }
158
159static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
160 return &buf[(r >> 3) * stride + (c >> 3)];
161}
162
163/* checks if (r, c) has better score than previous best */
164#define CHECK_BETTER(v, r, c) \
165 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
166 MV this_mv = { r, c }; \
167 v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
168 if (second_pred == NULL) \
169 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
170 src_address, src_stride, &sse); \
171 else \
172 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
173 src_address, src_stride, &sse, second_pred); \
174 v += thismse; \
175 if (v < besterr) { \
176 besterr = v; \
177 br = r; \
178 bc = c; \
179 *distortion = thismse; \
180 *sse1 = sse; \
181 } \
182 } else { \
183 v = INT_MAX; \
184 }
185
186#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
187
188static INLINE const uint8_t *upre(const uint8_t *buf, int stride, int r,
189 int c) {
190 return &buf[(r)*stride + (c)];
191}
192
193/* checks if (r, c) has better score than previous best */
194#define CHECK_BETTER1(v, r, c) \
195 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
196 MV this_mv = { r, c }; \
197 thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, \
198 upre(y, y_stride, r, c), y_stride, \
199 second_pred, w, h, &sse); \
200 v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
201 v += thismse; \
202 if (v < besterr) { \
203 besterr = v; \
204 br = r; \
205 bc = c; \
206 *distortion = thismse; \
207 *sse1 = sse; \
208 } \
209 } else { \
210 v = INT_MAX; \
211 }
212
213#define FIRST_LEVEL_CHECKS \
214 { \
215 unsigned int left, right, up, down, diag; \
216 CHECK_BETTER(left, tr, tc - hstep); \
217 CHECK_BETTER(right, tr, tc + hstep); \
218 CHECK_BETTER(up, tr - hstep, tc); \
219 CHECK_BETTER(down, tr + hstep, tc); \
220 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \
221 switch (whichdir) { \
222 case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
223 case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
224 case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
225 case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
226 } \
227 }
228
229#define SECOND_LEVEL_CHECKS \
230 { \
231 int kr, kc; \
232 unsigned int second; \
233 if (tr != br && tc != bc) { \
234 kr = br - tr; \
235 kc = bc - tc; \
236 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
237 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
238 } else if (tr == br && tc != bc) { \
239 kc = bc - tc; \
240 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
241 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
242 switch (whichdir) { \
243 case 0: \
244 case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
245 case 2: \
246 case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
247 } \
248 } else if (tr != br && tc == bc) { \
249 kr = br - tr; \
250 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
251 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
252 switch (whichdir) { \
253 case 0: \
254 case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
255 case 1: \
256 case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
257 } \
258 } \
259 }
260
261// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
262// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
263// later in the same way.
264#define SECOND_LEVEL_CHECKS_BEST(k) \
265 { \
266 unsigned int second; \
267 int br0 = br; \
268 int bc0 = bc; \
269 assert(tr == br || tc == bc); \
270 if (tr == br && tc != bc) { \
271 kc = bc - tc; \
272 } else if (tr != br && tc == bc) { \
273 kr = br - tr; \
274 } \
275 CHECK_BETTER##k(second, br0 + kr, bc0); \
276 CHECK_BETTER##k(second, br0, bc0 + kc); \
277 if (br0 != br || bc0 != bc) { \
278 CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
279 } \
280 }
281
282#define SETUP_SUBPEL_SEARCH \
283 const uint8_t *const src_address = x->plane[0].src.buf; \
284 const int src_stride = x->plane[0].src.stride; \
285 const MACROBLOCKD *xd = &x->e_mbd; \
286 unsigned int besterr = INT_MAX; \
287 unsigned int sse; \
288 unsigned int whichdir; \
289 int thismse; \
290 MV *bestmv = &x->best_mv.as_mv; \
291 const unsigned int halfiters = iters_per_step; \
292 const unsigned int quarteriters = iters_per_step; \
293 const unsigned int eighthiters = iters_per_step; \
294 const int y_stride = xd->plane[0].pre[0].stride; \
295 const int offset = bestmv->row * y_stride + bestmv->col; \
296 const uint8_t *const y = xd->plane[0].pre[0].buf; \
297 \
298 int br = bestmv->row * 8; \
299 int bc = bestmv->col * 8; \
300 int hstep = 4; \
Yaowu Xuf883b422016-08-30 14:01:10 -0700301 const int minc = AOMMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \
302 const int maxc = AOMMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \
303 const int minr = AOMMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \
304 const int maxr = AOMMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700305 int tr = br; \
306 int tc = bc; \
307 \
308 bestmv->row *= 8; \
309 bestmv->col *= 8;
310
311static unsigned int setup_center_error(
312 const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
Yaowu Xuf883b422016-08-30 14:01:10 -0700313 int error_per_bit, const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700314 const uint8_t *const src, const int src_stride, const uint8_t *const y,
315 int y_stride, const uint8_t *second_pred, int w, int h, int offset,
316 int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) {
317 unsigned int besterr;
Yaowu Xuf883b422016-08-30 14:01:10 -0700318#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700319 if (second_pred != NULL) {
320 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
321 DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
Yaowu Xuf883b422016-08-30 14:01:10 -0700322 aom_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700323 y_stride);
324 besterr =
325 vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
326 } else {
327 DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
Yaowu Xuf883b422016-08-30 14:01:10 -0700328 aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700329 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
330 }
331 } else {
332 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
333 }
334 *distortion = besterr;
335 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
336#else
337 (void)xd;
338 if (second_pred != NULL) {
339 DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
Yaowu Xuf883b422016-08-30 14:01:10 -0700340 aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700341 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
342 } else {
343 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
344 }
345 *distortion = besterr;
346 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
Yaowu Xuf883b422016-08-30 14:01:10 -0700347#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700348 return besterr;
349}
350
351static INLINE int divide_and_round(const int n, const int d) {
352 return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
353}
354
355static INLINE int is_cost_list_wellbehaved(int *cost_list) {
356 return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
357 cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
358}
359
360// Returns surface minima estimate at given precision in 1/2^n bits.
361// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
362// For a given set of costs S0, S1, S2, S3, S4 at points
363// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
364// the solution for the location of the minima (x0, y0) is given by:
365// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
366// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
367// The code below is an integerized version of that.
368static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
369 *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
370 (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
371 *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
372 (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
373}
374
Yaowu Xuf883b422016-08-30 14:01:10 -0700375int av1_find_best_sub_pixel_tree_pruned_evenmore(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700376 MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -0700377 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700378 int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
379 unsigned int *sse1, const uint8_t *second_pred, int w, int h,
380 int use_upsampled_ref) {
381 SETUP_SUBPEL_SEARCH;
382 besterr = setup_center_error(
383 xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
384 y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
385 (void)halfiters;
386 (void)quarteriters;
387 (void)eighthiters;
388 (void)whichdir;
389 (void)allow_hp;
390 (void)forced_stop;
391 (void)hstep;
392 (void)use_upsampled_ref;
393
394 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
395 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
396 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
397 int ir, ic;
398 unsigned int minpt;
399 get_cost_surf_min(cost_list, &ir, &ic, 2);
400 if (ir != 0 || ic != 0) {
401 CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
402 }
403 } else {
404 FIRST_LEVEL_CHECKS;
405 if (halfiters > 1) {
406 SECOND_LEVEL_CHECKS;
407 }
408
409 tr = br;
410 tc = bc;
411
412 // Each subsequent iteration checks at least one point in common with
413 // the last iteration could be 2 ( if diag selected) 1/4 pel
414 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
415 if (forced_stop != 2) {
416 hstep >>= 1;
417 FIRST_LEVEL_CHECKS;
418 if (quarteriters > 1) {
419 SECOND_LEVEL_CHECKS;
420 }
421 }
422 }
423
424 tr = br;
425 tc = bc;
426
Alex Converse6317c882016-09-29 14:21:37 -0700427 if (allow_hp && forced_stop == 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700428 hstep >>= 1;
429 FIRST_LEVEL_CHECKS;
430 if (eighthiters > 1) {
431 SECOND_LEVEL_CHECKS;
432 }
433 }
434
435 bestmv->row = br;
436 bestmv->col = bc;
437
438 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
439 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
440 return INT_MAX;
441
442 return besterr;
443}
444
Yaowu Xuf883b422016-08-30 14:01:10 -0700445int av1_find_best_sub_pixel_tree_pruned_more(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700446 MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -0700447 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700448 int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
449 unsigned int *sse1, const uint8_t *second_pred, int w, int h,
450 int use_upsampled_ref) {
451 SETUP_SUBPEL_SEARCH;
452 (void)use_upsampled_ref;
453
454 besterr = setup_center_error(
455 xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
456 y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
457 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
458 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
459 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
460 unsigned int minpt;
461 int ir, ic;
462 get_cost_surf_min(cost_list, &ir, &ic, 1);
463 if (ir != 0 || ic != 0) {
464 CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
465 }
466 } else {
467 FIRST_LEVEL_CHECKS;
468 if (halfiters > 1) {
469 SECOND_LEVEL_CHECKS;
470 }
471 }
472
473 // Each subsequent iteration checks at least one point in common with
474 // the last iteration could be 2 ( if diag selected) 1/4 pel
475
476 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
477 if (forced_stop != 2) {
478 tr = br;
479 tc = bc;
480 hstep >>= 1;
481 FIRST_LEVEL_CHECKS;
482 if (quarteriters > 1) {
483 SECOND_LEVEL_CHECKS;
484 }
485 }
486
Alex Converse6317c882016-09-29 14:21:37 -0700487 if (allow_hp && forced_stop == 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700488 tr = br;
489 tc = bc;
490 hstep >>= 1;
491 FIRST_LEVEL_CHECKS;
492 if (eighthiters > 1) {
493 SECOND_LEVEL_CHECKS;
494 }
495 }
496 // These lines insure static analysis doesn't warn that
497 // tr and tc aren't used after the above point.
498 (void)tr;
499 (void)tc;
500
501 bestmv->row = br;
502 bestmv->col = bc;
503
504 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
505 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
506 return INT_MAX;
507
508 return besterr;
509}
510
Yaowu Xuf883b422016-08-30 14:01:10 -0700511int av1_find_best_sub_pixel_tree_pruned(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700512 MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -0700513 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700514 int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
515 unsigned int *sse1, const uint8_t *second_pred, int w, int h,
516 int use_upsampled_ref) {
517 SETUP_SUBPEL_SEARCH;
518 (void)use_upsampled_ref;
519
520 besterr = setup_center_error(
521 xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
522 y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
523 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
524 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
525 cost_list[4] != INT_MAX) {
526 unsigned int left, right, up, down, diag;
527 whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
528 (cost_list[2] < cost_list[4] ? 0 : 2);
529 switch (whichdir) {
530 case 0:
531 CHECK_BETTER(left, tr, tc - hstep);
532 CHECK_BETTER(down, tr + hstep, tc);
533 CHECK_BETTER(diag, tr + hstep, tc - hstep);
534 break;
535 case 1:
536 CHECK_BETTER(right, tr, tc + hstep);
537 CHECK_BETTER(down, tr + hstep, tc);
538 CHECK_BETTER(diag, tr + hstep, tc + hstep);
539 break;
540 case 2:
541 CHECK_BETTER(left, tr, tc - hstep);
542 CHECK_BETTER(up, tr - hstep, tc);
543 CHECK_BETTER(diag, tr - hstep, tc - hstep);
544 break;
545 case 3:
546 CHECK_BETTER(right, tr, tc + hstep);
547 CHECK_BETTER(up, tr - hstep, tc);
548 CHECK_BETTER(diag, tr - hstep, tc + hstep);
549 break;
550 }
551 } else {
552 FIRST_LEVEL_CHECKS;
553 if (halfiters > 1) {
554 SECOND_LEVEL_CHECKS;
555 }
556 }
557
558 tr = br;
559 tc = bc;
560
561 // Each subsequent iteration checks at least one point in common with
562 // the last iteration could be 2 ( if diag selected) 1/4 pel
563
564 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
565 if (forced_stop != 2) {
566 hstep >>= 1;
567 FIRST_LEVEL_CHECKS;
568 if (quarteriters > 1) {
569 SECOND_LEVEL_CHECKS;
570 }
571 tr = br;
572 tc = bc;
573 }
574
Alex Converse6317c882016-09-29 14:21:37 -0700575 if (allow_hp && forced_stop == 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700576 hstep >>= 1;
577 FIRST_LEVEL_CHECKS;
578 if (eighthiters > 1) {
579 SECOND_LEVEL_CHECKS;
580 }
581 tr = br;
582 tc = bc;
583 }
584 // These lines insure static analysis doesn't warn that
585 // tr and tc aren't used after the above point.
586 (void)tr;
587 (void)tc;
588
589 bestmv->row = br;
590 bestmv->col = bc;
591
592 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
593 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
594 return INT_MAX;
595
596 return besterr;
597}
598
599/* clang-format off */
600static const MV search_step_table[12] = {
601 // left, right, up, down
602 { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
603 { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
604 { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
605};
606/* clang-format on */
607
608static int upsampled_pref_error(const MACROBLOCKD *xd,
Yaowu Xuf883b422016-08-30 14:01:10 -0700609 const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700610 const uint8_t *const src, const int src_stride,
611 const uint8_t *const y, int y_stride,
612 const uint8_t *second_pred, int w, int h,
613 unsigned int *sse) {
614 unsigned int besterr;
Yaowu Xuf883b422016-08-30 14:01:10 -0700615#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700616 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
617 DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
618 if (second_pred != NULL)
Yaowu Xuf883b422016-08-30 14:01:10 -0700619 aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700620 y_stride);
621 else
Yaowu Xuf883b422016-08-30 14:01:10 -0700622 aom_highbd_upsampled_pred(pred16, w, h, y, y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700623
624 besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse);
625 } else {
626 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
627#else
628 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
629 (void)xd;
Yaowu Xuf883b422016-08-30 14:01:10 -0700630#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700631 if (second_pred != NULL)
Yaowu Xuf883b422016-08-30 14:01:10 -0700632 aom_comp_avg_upsampled_pred(pred, second_pred, w, h, y, y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700633 else
Yaowu Xuf883b422016-08-30 14:01:10 -0700634 aom_upsampled_pred(pred, w, h, y, y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700635
636 besterr = vfp->vf(pred, w, src, src_stride, sse);
Yaowu Xuf883b422016-08-30 14:01:10 -0700637#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700638 }
639#endif
640 return besterr;
641}
642
643static unsigned int upsampled_setup_center_error(
644 const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
Yaowu Xuf883b422016-08-30 14:01:10 -0700645 int error_per_bit, const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700646 const uint8_t *const src, const int src_stride, const uint8_t *const y,
647 int y_stride, const uint8_t *second_pred, int w, int h, int offset,
648 int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) {
649 unsigned int besterr = upsampled_pref_error(
650 xd, vfp, src, src_stride, y + offset, y_stride, second_pred, w, h, sse1);
651 *distortion = besterr;
652 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
653 return besterr;
654}
655
Yaowu Xuf883b422016-08-30 14:01:10 -0700656int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp,
657 int error_per_bit,
658 const aom_variance_fn_ptr_t *vfp,
659 int forced_stop, int iters_per_step,
660 int *cost_list, int *mvjcost, int *mvcost[2],
661 int *distortion, unsigned int *sse1,
662 const uint8_t *second_pred, int w, int h,
663 int use_upsampled_ref) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700664 const uint8_t *const src_address = x->plane[0].src.buf;
665 const int src_stride = x->plane[0].src.stride;
666 const MACROBLOCKD *xd = &x->e_mbd;
667 unsigned int besterr = INT_MAX;
668 unsigned int sse;
669 unsigned int thismse;
670 const int y_stride = xd->plane[0].pre[0].stride;
671 MV *bestmv = &x->best_mv.as_mv;
672 const int offset = bestmv->row * y_stride + bestmv->col;
673 const uint8_t *const y = xd->plane[0].pre[0].buf;
674
675 int br = bestmv->row * 8;
676 int bc = bestmv->col * 8;
677 int hstep = 4;
678 int iter, round = 3 - forced_stop;
Yaowu Xuf883b422016-08-30 14:01:10 -0700679 const int minc = AOMMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
680 const int maxc = AOMMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
681 const int minr = AOMMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
682 const int maxr = AOMMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700683 int tr = br;
684 int tc = bc;
685 const MV *search_step = search_step_table;
686 int idx, best_idx = -1;
687 unsigned int cost_array[5];
688 int kr, kc;
689
Alex Converse6317c882016-09-29 14:21:37 -0700690 if (!allow_hp)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700691 if (round == 3) round = 2;
692
693 bestmv->row *= 8;
694 bestmv->col *= 8;
695
696 // use_upsampled_ref can be 0 or 1
697 if (use_upsampled_ref)
698 besterr = upsampled_setup_center_error(
699 xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
700 y_stride, second_pred, w, h, (offset * 8), mvjcost, mvcost, sse1,
701 distortion);
702 else
703 besterr = setup_center_error(
704 xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
705 y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
706
707 (void)cost_list; // to silence compiler warning
708
709 for (iter = 0; iter < round; ++iter) {
710 // Check vertical and horizontal sub-pixel positions.
711 for (idx = 0; idx < 4; ++idx) {
712 tr = br + search_step[idx].row;
713 tc = bc + search_step[idx].col;
714 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
715 MV this_mv = { tr, tc };
716
717 if (use_upsampled_ref) {
718 const uint8_t *const pre_address = y + tr * y_stride + tc;
719
720 thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
721 pre_address, y_stride, second_pred, w,
722 h, &sse);
723 } else {
724 const uint8_t *const pre_address =
725 y + (tr >> 3) * y_stride + (tc >> 3);
726 if (second_pred == NULL)
727 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
728 src_address, src_stride, &sse);
729 else
730 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
731 src_address, src_stride, &sse, second_pred);
732 }
733
734 cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
735 mvcost, error_per_bit);
736
737 if (cost_array[idx] < besterr) {
738 best_idx = idx;
739 besterr = cost_array[idx];
740 *distortion = thismse;
741 *sse1 = sse;
742 }
743 } else {
744 cost_array[idx] = INT_MAX;
745 }
746 }
747
748 // Check diagonal sub-pixel position
749 kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
750 kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
751
752 tc = bc + kc;
753 tr = br + kr;
754 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
755 MV this_mv = { tr, tc };
756
757 if (use_upsampled_ref) {
758 const uint8_t *const pre_address = y + tr * y_stride + tc;
759
760 thismse =
761 upsampled_pref_error(xd, vfp, src_address, src_stride, pre_address,
762 y_stride, second_pred, w, h, &sse);
763 } else {
764 const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
765
766 if (second_pred == NULL)
767 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
768 src_stride, &sse);
769 else
770 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
771 src_address, src_stride, &sse, second_pred);
772 }
773
774 cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
775 error_per_bit);
776
777 if (cost_array[4] < besterr) {
778 best_idx = 4;
779 besterr = cost_array[4];
780 *distortion = thismse;
781 *sse1 = sse;
782 }
783 } else {
784 cost_array[idx] = INT_MAX;
785 }
786
787 if (best_idx < 4 && best_idx >= 0) {
788 br += search_step[best_idx].row;
789 bc += search_step[best_idx].col;
790 } else if (best_idx == 4) {
791 br = tr;
792 bc = tc;
793 }
794
795 if (iters_per_step > 1 && best_idx != -1) {
796 if (use_upsampled_ref) {
797 SECOND_LEVEL_CHECKS_BEST(1);
798 } else {
799 SECOND_LEVEL_CHECKS_BEST(0);
800 }
801 }
802
803 search_step += 4;
804 hstep >>= 1;
805 best_idx = -1;
806 }
807
808 // These lines insure static analysis doesn't warn that
809 // tr and tc aren't used after the above point.
810 (void)tr;
811 (void)tc;
812
813 bestmv->row = br;
814 bestmv->col = bc;
815
816 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
817 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
818 return INT_MAX;
819
820 return besterr;
821}
822
823#undef PRE
824#undef CHECK_BETTER
825
826static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
827 int range) {
828 return ((row - range) >= x->mv_row_min) & ((row + range) <= x->mv_row_max) &
829 ((col - range) >= x->mv_col_min) & ((col + range) <= x->mv_col_max);
830}
831
832static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
833 return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
834 (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
835}
836
837#define CHECK_BETTER \
838 { \
839 if (thissad < bestsad) { \
840 if (use_mvcost) \
841 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
842 if (thissad < bestsad) { \
843 bestsad = thissad; \
844 best_site = i; \
845 } \
846 } \
847 }
848
849#define MAX_PATTERN_SCALES 11
850#define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
851#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
852
853// Calculate and return a sad+mvcost list around an integer best pel.
854static INLINE void calc_int_cost_list(const MACROBLOCK *x,
855 const MV *const ref_mv, int sadpb,
Yaowu Xuf883b422016-08-30 14:01:10 -0700856 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700857 const MV *best_mv, int *cost_list) {
858 static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
859 const struct buf_2d *const what = &x->plane[0].src;
860 const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
861 const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
862 const int br = best_mv->row;
863 const int bc = best_mv->col;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700864 int i;
865 unsigned int sse;
Urvang Joshi454280d2016-10-14 16:51:44 -0700866 const MV this_mv = { br, bc };
Yaowu Xuc27fc142016-08-22 16:08:15 -0700867
Yaowu Xuc27fc142016-08-22 16:08:15 -0700868 cost_list[0] =
869 fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
870 in_what->stride, &sse) +
871 mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
872 if (check_bounds(x, br, bc, 1)) {
873 for (i = 0; i < 4; i++) {
Urvang Joshi454280d2016-10-14 16:51:44 -0700874 const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
Yaowu Xuc27fc142016-08-22 16:08:15 -0700875 cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
Urvang Joshi454280d2016-10-14 16:51:44 -0700876 get_buf_from_mv(in_what, &neighbor_mv),
Yaowu Xuc27fc142016-08-22 16:08:15 -0700877 in_what->stride, &sse) +
Urvang Joshi454280d2016-10-14 16:51:44 -0700878 mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700879 x->mvcost, x->errorperbit);
880 }
881 } else {
882 for (i = 0; i < 4; i++) {
Urvang Joshi454280d2016-10-14 16:51:44 -0700883 const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
884 if (!is_mv_in(x, &neighbor_mv))
Yaowu Xuc27fc142016-08-22 16:08:15 -0700885 cost_list[i + 1] = INT_MAX;
886 else
Urvang Joshi454280d2016-10-14 16:51:44 -0700887 cost_list[i + 1] =
888 fn_ptr->vf(what->buf, what->stride,
889 get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
890 &sse) +
891 mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
892 x->errorperbit);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700893 }
894 }
895}
896
897static INLINE void calc_int_sad_list(const MACROBLOCK *x,
898 const MV *const ref_mv, int sadpb,
Yaowu Xuf883b422016-08-30 14:01:10 -0700899 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700900 const MV *best_mv, int *cost_list,
901 const int use_mvcost, const int bestsad) {
902 static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
903 const struct buf_2d *const what = &x->plane[0].src;
904 const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
905 const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
906 int i;
907 const int br = best_mv->row;
908 const int bc = best_mv->col;
909
910 if (cost_list[0] == INT_MAX) {
911 cost_list[0] = bestsad;
912 if (check_bounds(x, br, bc, 1)) {
913 for (i = 0; i < 4; i++) {
914 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
915 cost_list[i + 1] =
916 fn_ptr->sdf(what->buf, what->stride,
917 get_buf_from_mv(in_what, &this_mv), in_what->stride);
918 }
919 } else {
920 for (i = 0; i < 4; i++) {
921 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
922 if (!is_mv_in(x, &this_mv))
923 cost_list[i + 1] = INT_MAX;
924 else
925 cost_list[i + 1] =
926 fn_ptr->sdf(what->buf, what->stride,
927 get_buf_from_mv(in_what, &this_mv), in_what->stride);
928 }
929 }
930 } else {
931 if (use_mvcost) {
932 for (i = 0; i < 4; i++) {
933 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
934 if (cost_list[i + 1] != INT_MAX) {
935 cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
936 }
937 }
938 }
939 }
940}
941
942// Generic pattern search function that searches over multiple scales.
943// Each scale can have a different number of candidates and shape of
944// candidates as indicated in the num_candidates and candidates arrays
945// passed into this function
946//
clang-format55ce9e02017-02-15 22:27:12 -0800947static int pattern_search(
948 MACROBLOCK *x, MV *start_mv, int search_param, int sad_per_bit,
949 int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp,
950 int use_mvcost, const MV *center_mv,
951 const int num_candidates[MAX_PATTERN_SCALES],
952 const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700953 const MACROBLOCKD *const xd = &x->e_mbd;
954 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
955 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
956 };
957 int i, s, t;
958 const struct buf_2d *const what = &x->plane[0].src;
959 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
960 const int last_is_4 = num_candidates[0] == 4;
961 int br, bc;
962 int bestsad = INT_MAX;
963 int thissad;
964 int k = -1;
965 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
966 int best_init_s = search_param_to_steps[search_param];
967 // adjust ref_mv to make sure it is within MV range
968 clamp_mv(start_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
969 x->mv_row_max);
970 br = start_mv->row;
971 bc = start_mv->col;
972 if (cost_list != NULL) {
973 cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
974 INT_MAX;
975 }
976
977 // Work out the start point for the search
978 bestsad = vfp->sdf(what->buf, what->stride,
979 get_buf_from_mv(in_what, start_mv), in_what->stride) +
980 mvsad_err_cost(x, start_mv, &fcenter_mv, sad_per_bit);
981
982 // Search all possible scales upto the search param around the center point
983 // pick the scale of the point that is best as the starting scale of
984 // further steps around it.
985 if (do_init_search) {
986 s = best_init_s;
987 best_init_s = -1;
988 for (t = 0; t <= s; ++t) {
989 int best_site = -1;
990 if (check_bounds(x, br, bc, 1 << t)) {
991 for (i = 0; i < num_candidates[t]; i++) {
992 const MV this_mv = { br + candidates[t][i].row,
993 bc + candidates[t][i].col };
994 thissad =
995 vfp->sdf(what->buf, what->stride,
996 get_buf_from_mv(in_what, &this_mv), in_what->stride);
997 CHECK_BETTER
998 }
999 } else {
1000 for (i = 0; i < num_candidates[t]; i++) {
1001 const MV this_mv = { br + candidates[t][i].row,
1002 bc + candidates[t][i].col };
1003 if (!is_mv_in(x, &this_mv)) continue;
1004 thissad =
1005 vfp->sdf(what->buf, what->stride,
1006 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1007 CHECK_BETTER
1008 }
1009 }
1010 if (best_site == -1) {
1011 continue;
1012 } else {
1013 best_init_s = t;
1014 k = best_site;
1015 }
1016 }
1017 if (best_init_s != -1) {
1018 br += candidates[best_init_s][k].row;
1019 bc += candidates[best_init_s][k].col;
1020 }
1021 }
1022
1023 // If the center point is still the best, just skip this and move to
1024 // the refinement step.
1025 if (best_init_s != -1) {
1026 const int last_s = (last_is_4 && cost_list != NULL);
1027 int best_site = -1;
1028 s = best_init_s;
1029
1030 for (; s >= last_s; s--) {
1031 // No need to search all points the 1st time if initial search was used
1032 if (!do_init_search || s != best_init_s) {
1033 if (check_bounds(x, br, bc, 1 << s)) {
1034 for (i = 0; i < num_candidates[s]; i++) {
1035 const MV this_mv = { br + candidates[s][i].row,
1036 bc + candidates[s][i].col };
1037 thissad =
1038 vfp->sdf(what->buf, what->stride,
1039 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1040 CHECK_BETTER
1041 }
1042 } else {
1043 for (i = 0; i < num_candidates[s]; i++) {
1044 const MV this_mv = { br + candidates[s][i].row,
1045 bc + candidates[s][i].col };
1046 if (!is_mv_in(x, &this_mv)) continue;
1047 thissad =
1048 vfp->sdf(what->buf, what->stride,
1049 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1050 CHECK_BETTER
1051 }
1052 }
1053
1054 if (best_site == -1) {
1055 continue;
1056 } else {
1057 br += candidates[s][best_site].row;
1058 bc += candidates[s][best_site].col;
1059 k = best_site;
1060 }
1061 }
1062
1063 do {
1064 int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1065 best_site = -1;
1066 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1067 next_chkpts_indices[1] = k;
1068 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1069
1070 if (check_bounds(x, br, bc, 1 << s)) {
1071 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1072 const MV this_mv = {
1073 br + candidates[s][next_chkpts_indices[i]].row,
1074 bc + candidates[s][next_chkpts_indices[i]].col
1075 };
1076 thissad =
1077 vfp->sdf(what->buf, what->stride,
1078 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1079 CHECK_BETTER
1080 }
1081 } else {
1082 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1083 const MV this_mv = {
1084 br + candidates[s][next_chkpts_indices[i]].row,
1085 bc + candidates[s][next_chkpts_indices[i]].col
1086 };
1087 if (!is_mv_in(x, &this_mv)) continue;
1088 thissad =
1089 vfp->sdf(what->buf, what->stride,
1090 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1091 CHECK_BETTER
1092 }
1093 }
1094
1095 if (best_site != -1) {
1096 k = next_chkpts_indices[best_site];
1097 br += candidates[s][k].row;
1098 bc += candidates[s][k].col;
1099 }
1100 } while (best_site != -1);
1101 }
1102
1103 // Note: If we enter the if below, then cost_list must be non-NULL.
1104 if (s == 0) {
1105 cost_list[0] = bestsad;
1106 if (!do_init_search || s != best_init_s) {
1107 if (check_bounds(x, br, bc, 1 << s)) {
1108 for (i = 0; i < num_candidates[s]; i++) {
1109 const MV this_mv = { br + candidates[s][i].row,
1110 bc + candidates[s][i].col };
1111 cost_list[i + 1] = thissad =
1112 vfp->sdf(what->buf, what->stride,
1113 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1114 CHECK_BETTER
1115 }
1116 } else {
1117 for (i = 0; i < num_candidates[s]; i++) {
1118 const MV this_mv = { br + candidates[s][i].row,
1119 bc + candidates[s][i].col };
1120 if (!is_mv_in(x, &this_mv)) continue;
1121 cost_list[i + 1] = thissad =
1122 vfp->sdf(what->buf, what->stride,
1123 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1124 CHECK_BETTER
1125 }
1126 }
1127
1128 if (best_site != -1) {
1129 br += candidates[s][best_site].row;
1130 bc += candidates[s][best_site].col;
1131 k = best_site;
1132 }
1133 }
1134 while (best_site != -1) {
1135 int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1136 best_site = -1;
1137 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1138 next_chkpts_indices[1] = k;
1139 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1140 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
1141 cost_list[((k + 2) % 4) + 1] = cost_list[0];
1142 cost_list[0] = bestsad;
1143
1144 if (check_bounds(x, br, bc, 1 << s)) {
1145 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1146 const MV this_mv = {
1147 br + candidates[s][next_chkpts_indices[i]].row,
1148 bc + candidates[s][next_chkpts_indices[i]].col
1149 };
1150 cost_list[next_chkpts_indices[i] + 1] = thissad =
1151 vfp->sdf(what->buf, what->stride,
1152 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1153 CHECK_BETTER
1154 }
1155 } else {
1156 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1157 const MV this_mv = {
1158 br + candidates[s][next_chkpts_indices[i]].row,
1159 bc + candidates[s][next_chkpts_indices[i]].col
1160 };
1161 if (!is_mv_in(x, &this_mv)) {
1162 cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
1163 continue;
1164 }
1165 cost_list[next_chkpts_indices[i] + 1] = thissad =
1166 vfp->sdf(what->buf, what->stride,
1167 get_buf_from_mv(in_what, &this_mv), in_what->stride);
1168 CHECK_BETTER
1169 }
1170 }
1171
1172 if (best_site != -1) {
1173 k = next_chkpts_indices[best_site];
1174 br += candidates[s][k].row;
1175 bc += candidates[s][k].col;
1176 }
1177 }
1178 }
1179 }
1180
1181 // Returns the one-away integer pel cost/sad around the best as follows:
1182 // cost_list[0]: cost/sad at the best integer pel
1183 // cost_list[1]: cost/sad at delta {0, -1} (left) from the best integer pel
1184 // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel
1185 // cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel
1186 // cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel
1187 if (cost_list) {
Urvang Joshi454280d2016-10-14 16:51:44 -07001188 const MV best_int_mv = { br, bc };
Yaowu Xuc27fc142016-08-22 16:08:15 -07001189 if (last_is_4) {
Urvang Joshi454280d2016-10-14 16:51:44 -07001190 calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001191 use_mvcost, bestsad);
1192 } else {
Urvang Joshi454280d2016-10-14 16:51:44 -07001193 calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv,
1194 cost_list);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001195 }
1196 }
1197 x->best_mv.as_mv.row = br;
1198 x->best_mv.as_mv.col = bc;
1199 return bestsad;
1200}
1201
Yaowu Xuf883b422016-08-30 14:01:10 -07001202int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
1203 const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
1204 int use_mvcost) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001205 const MACROBLOCKD *const xd = &x->e_mbd;
1206 const struct buf_2d *const what = &x->plane[0].src;
1207 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1208 const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1209 unsigned int unused;
1210
1211 return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
1212 in_what->stride, &unused) +
1213 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1214 x->errorperbit)
1215 : 0);
1216}
1217
Yaowu Xuf883b422016-08-30 14:01:10 -07001218int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
1219 const MV *center_mv, const uint8_t *second_pred,
1220 const aom_variance_fn_ptr_t *vfp, int use_mvcost) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001221 const MACROBLOCKD *const xd = &x->e_mbd;
1222 const struct buf_2d *const what = &x->plane[0].src;
1223 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1224 const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1225 unsigned int unused;
1226
1227 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
1228 what->buf, what->stride, &unused, second_pred) +
1229 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1230 x->errorperbit)
1231 : 0);
1232}
1233
Yaowu Xuf883b422016-08-30 14:01:10 -07001234int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
1235 int sad_per_bit, int do_init_search, int *cost_list,
1236 const aom_variance_fn_ptr_t *vfp, int use_mvcost,
1237 const MV *center_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001238 // First scale has 8-closest points, the rest have 6 points in hex shape
1239 // at increasing scales
1240 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
1241 6, 6, 6, 6, 6 };
1242 // Note that the largest candidate step at each scale is 2^scale
1243 /* clang-format off */
1244 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1245 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
1246 { -1, 0 } },
1247 { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
1248 { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
1249 { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
1250 { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
1251 { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
1252 { -32, 0 } },
1253 { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
1254 { -64, 0 } },
1255 { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
1256 { -128, 0 } },
1257 { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
1258 { -256, 0 } },
1259 { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
1260 { -512, 0 } },
1261 { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
1262 { -512, 1024 }, { -1024, 0 } },
1263 };
1264 /* clang-format on */
1265 return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1266 cost_list, vfp, use_mvcost, center_mv,
1267 hex_num_candidates, hex_candidates);
1268}
1269
1270static int bigdia_search(MACROBLOCK *x, MV *start_mv, int search_param,
1271 int sad_per_bit, int do_init_search, int *cost_list,
Yaowu Xuf883b422016-08-30 14:01:10 -07001272 const aom_variance_fn_ptr_t *vfp, int use_mvcost,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001273 const MV *center_mv) {
1274 // First scale has 4-closest points, the rest have 8 points in diamond
1275 // shape at increasing scales
1276 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
1277 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1278 };
1279 // Note that the largest candidate step at each scale is 2^scale
1280 /* clang-format off */
1281 static const MV
1282 bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1283 { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
1284 { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
1285 { -1, 1 }, { -2, 0 } },
1286 { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
1287 { -2, 2 }, { -4, 0 } },
1288 { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
1289 { -4, 4 }, { -8, 0 } },
1290 { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
1291 { -8, 8 }, { -16, 0 } },
1292 { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
1293 { 0, 32 }, { -16, 16 }, { -32, 0 } },
1294 { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
1295 { 0, 64 }, { -32, 32 }, { -64, 0 } },
1296 { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
1297 { 0, 128 }, { -64, 64 }, { -128, 0 } },
1298 { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
1299 { 0, 256 }, { -128, 128 }, { -256, 0 } },
1300 { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
1301 { 0, 512 }, { -256, 256 }, { -512, 0 } },
1302 { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
1303 { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } },
1304 };
1305 /* clang-format on */
1306 return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1307 cost_list, vfp, use_mvcost, center_mv,
1308 bigdia_num_candidates, bigdia_candidates);
1309}
1310
1311static int square_search(MACROBLOCK *x, MV *start_mv, int search_param,
1312 int sad_per_bit, int do_init_search, int *cost_list,
Yaowu Xuf883b422016-08-30 14:01:10 -07001313 const aom_variance_fn_ptr_t *vfp, int use_mvcost,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001314 const MV *center_mv) {
1315 // All scales have 8 closest points in square shape
1316 static const int square_num_candidates[MAX_PATTERN_SCALES] = {
1317 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1318 };
1319 // Note that the largest candidate step at each scale is 2^scale
1320 /* clang-format off */
1321 static const MV
1322 square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1323 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
1324 { -1, 1 }, { -1, 0 } },
1325 { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
1326 { -2, 2 }, { -2, 0 } },
1327 { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
1328 { -4, 4 }, { -4, 0 } },
1329 { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
1330 { -8, 8 }, { -8, 0 } },
1331 { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
1332 { 0, 16 }, { -16, 16 }, { -16, 0 } },
1333 { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
1334 { 0, 32 }, { -32, 32 }, { -32, 0 } },
1335 { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
1336 { 0, 64 }, { -64, 64 }, { -64, 0 } },
1337 { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
1338 { 0, 128 }, { -128, 128 }, { -128, 0 } },
1339 { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
1340 { 0, 256 }, { -256, 256 }, { -256, 0 } },
1341 { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
1342 { 0, 512 }, { -512, 512 }, { -512, 0 } },
1343 { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
1344 { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } },
1345 };
1346 /* clang-format on */
1347 return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1348 cost_list, vfp, use_mvcost, center_mv,
1349 square_num_candidates, square_candidates);
1350}
1351
1352static int fast_hex_search(MACROBLOCK *x, MV *ref_mv, int search_param,
1353 int sad_per_bit,
1354 int do_init_search, // must be zero for fast_hex
Yaowu Xuf883b422016-08-30 14:01:10 -07001355 int *cost_list, const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001356 int use_mvcost, const MV *center_mv) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001357 return av1_hex_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1358 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1359 center_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001360}
1361
1362static int fast_dia_search(MACROBLOCK *x, MV *ref_mv, int search_param,
1363 int sad_per_bit, int do_init_search, int *cost_list,
Yaowu Xuf883b422016-08-30 14:01:10 -07001364 const aom_variance_fn_ptr_t *vfp, int use_mvcost,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001365 const MV *center_mv) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001366 return bigdia_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
Yaowu Xuc27fc142016-08-22 16:08:15 -07001367 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1368 center_mv);
1369}
1370
1371#undef CHECK_BETTER
1372
1373// Exhuastive motion search around a given centre position with a given
1374// step size.
1375static int exhuastive_mesh_search(MACROBLOCK *x, MV *ref_mv, MV *best_mv,
1376 int range, int step, int sad_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -07001377 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001378 const MV *center_mv) {
1379 const MACROBLOCKD *const xd = &x->e_mbd;
1380 const struct buf_2d *const what = &x->plane[0].src;
1381 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1382 MV fcenter_mv = { center_mv->row, center_mv->col };
1383 unsigned int best_sad = INT_MAX;
1384 int r, c, i;
1385 int start_col, end_col, start_row, end_row;
1386 int col_step = (step > 1) ? step : 4;
1387
1388 assert(step >= 1);
1389
1390 clamp_mv(&fcenter_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1391 x->mv_row_max);
1392 *best_mv = fcenter_mv;
1393 best_sad =
1394 fn_ptr->sdf(what->buf, what->stride,
1395 get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
1396 mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
Yaowu Xuf883b422016-08-30 14:01:10 -07001397 start_row = AOMMAX(-range, x->mv_row_min - fcenter_mv.row);
1398 start_col = AOMMAX(-range, x->mv_col_min - fcenter_mv.col);
1399 end_row = AOMMIN(range, x->mv_row_max - fcenter_mv.row);
1400 end_col = AOMMIN(range, x->mv_col_max - fcenter_mv.col);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001401
1402 for (r = start_row; r <= end_row; r += step) {
1403 for (c = start_col; c <= end_col; c += col_step) {
1404 // Step > 1 means we are not checking every location in this pass.
1405 if (step > 1) {
1406 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
1407 unsigned int sad =
1408 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
1409 in_what->stride);
1410 if (sad < best_sad) {
1411 sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1412 if (sad < best_sad) {
1413 best_sad = sad;
1414 x->second_best_mv.as_mv = *best_mv;
1415 *best_mv = mv;
1416 }
1417 }
1418 } else {
1419 // 4 sads in a single call if we are checking every location
1420 if (c + 3 <= end_col) {
1421 unsigned int sads[4];
1422 const uint8_t *addrs[4];
1423 for (i = 0; i < 4; ++i) {
1424 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1425 addrs[i] = get_buf_from_mv(in_what, &mv);
1426 }
1427 fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
1428
1429 for (i = 0; i < 4; ++i) {
1430 if (sads[i] < best_sad) {
1431 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1432 const unsigned int sad =
1433 sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1434 if (sad < best_sad) {
1435 best_sad = sad;
1436 x->second_best_mv.as_mv = *best_mv;
1437 *best_mv = mv;
1438 }
1439 }
1440 }
1441 } else {
1442 for (i = 0; i < end_col - c; ++i) {
1443 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1444 unsigned int sad =
1445 fn_ptr->sdf(what->buf, what->stride,
1446 get_buf_from_mv(in_what, &mv), in_what->stride);
1447 if (sad < best_sad) {
1448 sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1449 if (sad < best_sad) {
1450 best_sad = sad;
1451 x->second_best_mv.as_mv = *best_mv;
1452 *best_mv = mv;
1453 }
1454 }
1455 }
1456 }
1457 }
1458 }
1459 }
1460
1461 return best_sad;
1462}
1463
Yaowu Xuf883b422016-08-30 14:01:10 -07001464int av1_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg,
1465 MV *ref_mv, MV *best_mv, int search_param,
1466 int sad_per_bit, int *num00,
1467 const aom_variance_fn_ptr_t *fn_ptr,
1468 const MV *center_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001469 int i, j, step;
1470
1471 const MACROBLOCKD *const xd = &x->e_mbd;
1472 uint8_t *what = x->plane[0].src.buf;
1473 const int what_stride = x->plane[0].src.stride;
1474 const uint8_t *in_what;
1475 const int in_what_stride = xd->plane[0].pre[0].stride;
1476 const uint8_t *best_address;
1477
1478 unsigned int bestsad = INT_MAX;
1479 int best_site = 0;
1480 int last_site = 0;
1481
1482 int ref_row;
1483 int ref_col;
1484
1485 // search_param determines the length of the initial step and hence the number
1486 // of iterations.
1487 // 0 = initial step (MAX_FIRST_STEP) pel
1488 // 1 = (MAX_FIRST_STEP/2) pel,
1489 // 2 = (MAX_FIRST_STEP/4) pel...
1490 const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
1491 const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
1492
1493 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1494 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1495 ref_row = ref_mv->row;
1496 ref_col = ref_mv->col;
1497 *num00 = 0;
1498 best_mv->row = ref_row;
1499 best_mv->col = ref_col;
1500
1501 // Work out the start point for the search
1502 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
1503 best_address = in_what;
1504
1505 // Check the starting position
1506 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1507 mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
1508
1509 i = 1;
1510
1511 for (step = 0; step < tot_steps; step++) {
1512 int all_in = 1, t;
1513
1514 // All_in is true if every one of the points we are checking are within
1515 // the bounds of the image.
1516 all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
1517 all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
1518 all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
1519 all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
1520
1521 // If all the pixels are within the bounds we don't check whether the
1522 // search point is valid in this loop, otherwise we check each point
1523 // for validity..
1524 if (all_in) {
1525 unsigned int sad_array[4];
1526
1527 for (j = 0; j < cfg->searches_per_step; j += 4) {
1528 unsigned char const *block_offset[4];
1529
1530 for (t = 0; t < 4; t++)
1531 block_offset[t] = ss[i + t].offset + best_address;
1532
1533 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1534 sad_array);
1535
1536 for (t = 0; t < 4; t++, i++) {
1537 if (sad_array[t] < bestsad) {
1538 const MV this_mv = { best_mv->row + ss[i].mv.row,
1539 best_mv->col + ss[i].mv.col };
1540 sad_array[t] +=
1541 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1542 if (sad_array[t] < bestsad) {
1543 bestsad = sad_array[t];
1544 best_site = i;
1545 }
1546 }
1547 }
1548 }
1549 } else {
1550 for (j = 0; j < cfg->searches_per_step; j++) {
1551 // Trap illegal vectors
1552 const MV this_mv = { best_mv->row + ss[i].mv.row,
1553 best_mv->col + ss[i].mv.col };
1554
1555 if (is_mv_in(x, &this_mv)) {
1556 const uint8_t *const check_here = ss[i].offset + best_address;
1557 unsigned int thissad =
1558 fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1559
1560 if (thissad < bestsad) {
1561 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1562 if (thissad < bestsad) {
1563 bestsad = thissad;
1564 best_site = i;
1565 }
1566 }
1567 }
1568 i++;
1569 }
1570 }
1571 if (best_site != last_site) {
1572 x->second_best_mv.as_mv = *best_mv;
1573 best_mv->row += ss[best_site].mv.row;
1574 best_mv->col += ss[best_site].mv.col;
1575 best_address += ss[best_site].offset;
1576 last_site = best_site;
1577#if defined(NEW_DIAMOND_SEARCH)
1578 while (1) {
1579 const MV this_mv = { best_mv->row + ss[best_site].mv.row,
1580 best_mv->col + ss[best_site].mv.col };
1581 if (is_mv_in(x, &this_mv)) {
1582 const uint8_t *const check_here = ss[best_site].offset + best_address;
1583 unsigned int thissad =
1584 fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1585 if (thissad < bestsad) {
1586 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1587 if (thissad < bestsad) {
1588 bestsad = thissad;
1589 best_mv->row += ss[best_site].mv.row;
1590 best_mv->col += ss[best_site].mv.col;
1591 best_address += ss[best_site].offset;
1592 continue;
1593 }
1594 }
1595 }
1596 break;
1597 }
1598#endif
1599 } else if (best_address == in_what) {
1600 (*num00)++;
1601 }
1602 }
1603 return bestsad;
1604}
1605
1606static int vector_match(int16_t *ref, int16_t *src, int bwl) {
1607 int best_sad = INT_MAX;
1608 int this_sad;
1609 int d;
1610 int center, offset = 0;
1611 int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
1612 for (d = 0; d <= bw; d += 16) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001613 this_sad = aom_vector_var(&ref[d], src, bwl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001614 if (this_sad < best_sad) {
1615 best_sad = this_sad;
1616 offset = d;
1617 }
1618 }
1619 center = offset;
1620
1621 for (d = -8; d <= 8; d += 16) {
1622 int this_pos = offset + d;
1623 // check limit
1624 if (this_pos < 0 || this_pos > bw) continue;
Yaowu Xuf883b422016-08-30 14:01:10 -07001625 this_sad = aom_vector_var(&ref[this_pos], src, bwl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001626 if (this_sad < best_sad) {
1627 best_sad = this_sad;
1628 center = this_pos;
1629 }
1630 }
1631 offset = center;
1632
1633 for (d = -4; d <= 4; d += 8) {
1634 int this_pos = offset + d;
1635 // check limit
1636 if (this_pos < 0 || this_pos > bw) continue;
Yaowu Xuf883b422016-08-30 14:01:10 -07001637 this_sad = aom_vector_var(&ref[this_pos], src, bwl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001638 if (this_sad < best_sad) {
1639 best_sad = this_sad;
1640 center = this_pos;
1641 }
1642 }
1643 offset = center;
1644
1645 for (d = -2; d <= 2; d += 4) {
1646 int this_pos = offset + d;
1647 // check limit
1648 if (this_pos < 0 || this_pos > bw) continue;
Yaowu Xuf883b422016-08-30 14:01:10 -07001649 this_sad = aom_vector_var(&ref[this_pos], src, bwl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001650 if (this_sad < best_sad) {
1651 best_sad = this_sad;
1652 center = this_pos;
1653 }
1654 }
1655 offset = center;
1656
1657 for (d = -1; d <= 1; d += 2) {
1658 int this_pos = offset + d;
1659 // check limit
1660 if (this_pos < 0 || this_pos > bw) continue;
Yaowu Xuf883b422016-08-30 14:01:10 -07001661 this_sad = aom_vector_var(&ref[this_pos], src, bwl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001662 if (this_sad < best_sad) {
1663 best_sad = this_sad;
1664 center = this_pos;
1665 }
1666 }
1667
1668 return (center - (bw >> 1));
1669}
1670
1671static const MV search_pos[4] = {
1672 { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
1673};
1674
Yaowu Xuf883b422016-08-30 14:01:10 -07001675unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
1676 BLOCK_SIZE bsize, int mi_row,
1677 int mi_col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001678 MACROBLOCKD *xd = &x->e_mbd;
1679 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1680 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
1681 DECLARE_ALIGNED(16, int16_t, hbuf[2 * MAX_SB_SIZE]);
1682 DECLARE_ALIGNED(16, int16_t, vbuf[2 * MAX_SB_SIZE]);
1683 DECLARE_ALIGNED(16, int16_t, src_hbuf[MAX_SB_SQUARE]);
1684 DECLARE_ALIGNED(16, int16_t, src_vbuf[MAX_SB_SQUARE]);
1685 int idx;
1686 const int bw = 4 << b_width_log2_lookup[bsize];
1687 const int bh = 4 << b_height_log2_lookup[bsize];
1688 const int search_width = bw << 1;
1689 const int search_height = bh << 1;
1690 const int src_stride = x->plane[0].src.stride;
1691 const int ref_stride = xd->plane[0].pre[0].stride;
1692 uint8_t const *ref_buf, *src_buf;
1693 MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
Urvang Joshi454280d2016-10-14 16:51:44 -07001694 unsigned int best_sad, tmp_sad, sad_arr[4];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001695 MV this_mv;
1696 const int norm_factor = 3 + (bw >> 5);
1697 const YV12_BUFFER_CONFIG *scaled_ref_frame =
Yaowu Xuf883b422016-08-30 14:01:10 -07001698 av1_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001699
1700 if (scaled_ref_frame) {
1701 int i;
1702 // Swap out the reference frame for a version that's been scaled to
1703 // match the resolution of the current frame, allowing the existing
1704 // motion search code to be used without additional modifications.
1705 for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
Yaowu Xuf883b422016-08-30 14:01:10 -07001706 av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001707 }
1708
Yaowu Xuf883b422016-08-30 14:01:10 -07001709#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001710 {
1711 unsigned int this_sad;
1712 tmp_mv->row = 0;
1713 tmp_mv->col = 0;
1714 this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
1715 xd->plane[0].pre[0].buf, ref_stride);
1716
1717 if (scaled_ref_frame) {
1718 int i;
1719 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
1720 }
1721 return this_sad;
1722 }
1723#endif
1724
1725 // Set up prediction 1-D reference set
1726 ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
1727 for (idx = 0; idx < search_width; idx += 16) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001728 aom_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001729 ref_buf += 16;
1730 }
1731
1732 ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
1733 for (idx = 0; idx < search_height; ++idx) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001734 vbuf[idx] = aom_int_pro_col(ref_buf, bw) >> norm_factor;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001735 ref_buf += ref_stride;
1736 }
1737
1738 // Set up src 1-D reference set
1739 for (idx = 0; idx < bw; idx += 16) {
1740 src_buf = x->plane[0].src.buf + idx;
Yaowu Xuf883b422016-08-30 14:01:10 -07001741 aom_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001742 }
1743
1744 src_buf = x->plane[0].src.buf;
1745 for (idx = 0; idx < bh; ++idx) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001746 src_vbuf[idx] = aom_int_pro_col(src_buf, bw) >> norm_factor;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001747 src_buf += src_stride;
1748 }
1749
1750 // Find the best match per 1-D search
1751 tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
1752 tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
1753
1754 this_mv = *tmp_mv;
1755 src_buf = x->plane[0].src.buf;
1756 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
1757 best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
1758
1759 {
1760 const uint8_t *const pos[4] = {
1761 ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
1762 };
1763
Urvang Joshi454280d2016-10-14 16:51:44 -07001764 cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, sad_arr);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001765 }
1766
1767 for (idx = 0; idx < 4; ++idx) {
Urvang Joshi454280d2016-10-14 16:51:44 -07001768 if (sad_arr[idx] < best_sad) {
1769 best_sad = sad_arr[idx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001770 tmp_mv->row = search_pos[idx].row + this_mv.row;
1771 tmp_mv->col = search_pos[idx].col + this_mv.col;
1772 }
1773 }
1774
Urvang Joshi454280d2016-10-14 16:51:44 -07001775 if (sad_arr[0] < sad_arr[3])
Yaowu Xuc27fc142016-08-22 16:08:15 -07001776 this_mv.row -= 1;
1777 else
1778 this_mv.row += 1;
1779
Urvang Joshi454280d2016-10-14 16:51:44 -07001780 if (sad_arr[1] < sad_arr[2])
Yaowu Xuc27fc142016-08-22 16:08:15 -07001781 this_mv.col -= 1;
1782 else
1783 this_mv.col += 1;
1784
1785 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
1786
1787 tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
1788 if (best_sad > tmp_sad) {
1789 *tmp_mv = this_mv;
1790 best_sad = tmp_sad;
1791 }
1792
1793 tmp_mv->row *= 8;
1794 tmp_mv->col *= 8;
1795
1796 if (scaled_ref_frame) {
1797 int i;
1798 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
1799 }
1800
1801 return best_sad;
1802}
1803
1804/* do_refine: If last step (1-away) of n-step search doesn't pick the center
1805 point as the best match, we will do a final 1-away diamond
1806 refining search */
Urvang Joshi52648442016-10-13 17:27:51 -07001807static int full_pixel_diamond(const AV1_COMP *const cpi, MACROBLOCK *x,
1808 MV *mvp_full, int step_param, int sadpb,
1809 int further_steps, int do_refine, int *cost_list,
Yaowu Xuf883b422016-08-30 14:01:10 -07001810 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001811 const MV *ref_mv) {
1812 MV temp_mv;
1813 int thissme, n, num00 = 0;
1814 int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
1815 step_param, sadpb, &n, fn_ptr, ref_mv);
1816 if (bestsme < INT_MAX)
Yaowu Xuf883b422016-08-30 14:01:10 -07001817 bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001818 x->best_mv.as_mv = temp_mv;
1819
1820 // If there won't be more n-step search, check to see if refining search is
1821 // needed.
1822 if (n > further_steps) do_refine = 0;
1823
1824 while (n < further_steps) {
1825 ++n;
1826
1827 if (num00) {
1828 num00--;
1829 } else {
1830 thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
1831 step_param + n, sadpb, &num00, fn_ptr,
1832 ref_mv);
1833 if (thissme < INT_MAX)
Yaowu Xuf883b422016-08-30 14:01:10 -07001834 thissme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001835
1836 // check to see if refining search is needed.
1837 if (num00 > further_steps - n) do_refine = 0;
1838
1839 if (thissme < bestsme) {
1840 bestsme = thissme;
1841 x->best_mv.as_mv = temp_mv;
1842 }
1843 }
1844 }
1845
1846 // final 1-away diamond refining search
1847 if (do_refine) {
1848 const int search_range = 8;
1849 MV best_mv = x->best_mv.as_mv;
Yaowu Xuf883b422016-08-30 14:01:10 -07001850 thissme = av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
1851 ref_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001852 if (thissme < INT_MAX)
Yaowu Xuf883b422016-08-30 14:01:10 -07001853 thissme = av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001854 if (thissme < bestsme) {
1855 bestsme = thissme;
1856 x->best_mv.as_mv = best_mv;
1857 }
1858 }
1859
1860 // Return cost list.
1861 if (cost_list) {
1862 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
1863 }
1864 return bestsme;
1865}
1866
1867#define MIN_RANGE 7
1868#define MAX_RANGE 256
1869#define MIN_INTERVAL 1
1870// Runs an limited range exhaustive mesh search using a pattern set
1871// according to the encode speed profile.
Urvang Joshi52648442016-10-13 17:27:51 -07001872static int full_pixel_exhaustive(const AV1_COMP *const cpi, MACROBLOCK *x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001873 const MV *centre_mv_full, int sadpb,
1874 int *cost_list,
Yaowu Xuf883b422016-08-30 14:01:10 -07001875 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001876 const MV *ref_mv, MV *dst_mv) {
1877 const SPEED_FEATURES *const sf = &cpi->sf;
1878 MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
1879 MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
1880 int bestsme;
1881 int i;
1882 int interval = sf->mesh_patterns[0].interval;
1883 int range = sf->mesh_patterns[0].range;
1884 int baseline_interval_divisor;
1885
1886 // Keep track of number of exhaustive calls (this frame in this thread).
1887 ++(*x->ex_search_count_ptr);
1888
1889 // Trap illegal values for interval and range for this function.
1890 if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
1891 (interval > range))
1892 return INT_MAX;
1893
1894 baseline_interval_divisor = range / interval;
1895
1896 // Check size of proposed first range against magnitude of the centre
1897 // value used as a starting point.
Yaowu Xuf883b422016-08-30 14:01:10 -07001898 range = AOMMAX(range, (5 * AOMMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
1899 range = AOMMIN(range, MAX_RANGE);
1900 interval = AOMMAX(interval, range / baseline_interval_divisor);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001901
1902 // initial search
1903 bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
1904 sadpb, fn_ptr, &temp_mv);
1905
1906 if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
1907 // Progressive searches with range and step size decreasing each time
1908 // till we reach a step size of 1. Then break out.
1909 for (i = 1; i < MAX_MESH_STEP; ++i) {
1910 // First pass with coarser step and longer range
1911 bestsme = exhuastive_mesh_search(
1912 x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
1913 sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
1914
1915 if (sf->mesh_patterns[i].interval == 1) break;
1916 }
1917 }
1918
1919 if (bestsme < INT_MAX)
Yaowu Xuf883b422016-08-30 14:01:10 -07001920 bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001921 *dst_mv = temp_mv;
1922
1923 // Return cost list.
1924 if (cost_list) {
1925 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
1926 }
1927 return bestsme;
1928}
1929
Yaowu Xuf883b422016-08-30 14:01:10 -07001930int av1_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
1931 int sad_per_bit, int distance,
1932 const aom_variance_fn_ptr_t *fn_ptr,
1933 const MV *center_mv, MV *best_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001934 int r, c;
1935 const MACROBLOCKD *const xd = &x->e_mbd;
1936 const struct buf_2d *const what = &x->plane[0].src;
1937 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
Yaowu Xuf883b422016-08-30 14:01:10 -07001938 const int row_min = AOMMAX(ref_mv->row - distance, x->mv_row_min);
1939 const int row_max = AOMMIN(ref_mv->row + distance, x->mv_row_max);
1940 const int col_min = AOMMAX(ref_mv->col - distance, x->mv_col_min);
1941 const int col_max = AOMMIN(ref_mv->col + distance, x->mv_col_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001942 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1943 int best_sad =
1944 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
1945 in_what->stride) +
1946 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1947 *best_mv = *ref_mv;
1948
1949 for (r = row_min; r < row_max; ++r) {
1950 for (c = col_min; c < col_max; ++c) {
1951 const MV mv = { r, c };
1952 const int sad =
1953 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
1954 in_what->stride) +
1955 mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1956 if (sad < best_sad) {
1957 best_sad = sad;
1958 *best_mv = mv;
1959 }
1960 }
1961 }
1962 return best_sad;
1963}
1964
Yaowu Xuf883b422016-08-30 14:01:10 -07001965int av1_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
1966 int sad_per_bit, int distance,
1967 const aom_variance_fn_ptr_t *fn_ptr,
1968 const MV *center_mv, MV *best_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001969 int r;
1970 const MACROBLOCKD *const xd = &x->e_mbd;
1971 const struct buf_2d *const what = &x->plane[0].src;
1972 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
Yaowu Xuf883b422016-08-30 14:01:10 -07001973 const int row_min = AOMMAX(ref_mv->row - distance, x->mv_row_min);
1974 const int row_max = AOMMIN(ref_mv->row + distance, x->mv_row_max);
1975 const int col_min = AOMMAX(ref_mv->col - distance, x->mv_col_min);
1976 const int col_max = AOMMIN(ref_mv->col + distance, x->mv_col_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001977 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1978 unsigned int best_sad =
1979 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
1980 in_what->stride) +
1981 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1982 *best_mv = *ref_mv;
1983
1984 for (r = row_min; r < row_max; ++r) {
1985 int c = col_min;
1986 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
1987
1988 if (fn_ptr->sdx3f != NULL) {
1989 while ((c + 2) < col_max) {
1990 int i;
1991 DECLARE_ALIGNED(16, uint32_t, sads[3]);
1992
1993 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
1994 sads);
1995
1996 for (i = 0; i < 3; ++i) {
1997 unsigned int sad = sads[i];
1998 if (sad < best_sad) {
1999 const MV mv = { r, c };
2000 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2001 if (sad < best_sad) {
2002 best_sad = sad;
2003 *best_mv = mv;
2004 }
2005 }
2006 ++check_here;
2007 ++c;
2008 }
2009 }
2010 }
2011
2012 while (c < col_max) {
2013 unsigned int sad =
2014 fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
2015 if (sad < best_sad) {
2016 const MV mv = { r, c };
2017 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2018 if (sad < best_sad) {
2019 best_sad = sad;
2020 *best_mv = mv;
2021 }
2022 }
2023 ++check_here;
2024 ++c;
2025 }
2026 }
2027
2028 return best_sad;
2029}
2030
Yaowu Xuf883b422016-08-30 14:01:10 -07002031int av1_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
2032 int sad_per_bit, int distance,
2033 const aom_variance_fn_ptr_t *fn_ptr,
2034 const MV *center_mv, MV *best_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002035 int r;
2036 const MACROBLOCKD *const xd = &x->e_mbd;
2037 const struct buf_2d *const what = &x->plane[0].src;
2038 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
Yaowu Xuf883b422016-08-30 14:01:10 -07002039 const int row_min = AOMMAX(ref_mv->row - distance, x->mv_row_min);
2040 const int row_max = AOMMIN(ref_mv->row + distance, x->mv_row_max);
2041 const int col_min = AOMMAX(ref_mv->col - distance, x->mv_col_min);
2042 const int col_max = AOMMIN(ref_mv->col + distance, x->mv_col_max);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002043 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2044 unsigned int best_sad =
2045 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2046 in_what->stride) +
2047 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2048 *best_mv = *ref_mv;
2049
2050 for (r = row_min; r < row_max; ++r) {
2051 int c = col_min;
2052 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
2053
2054 if (fn_ptr->sdx8f != NULL) {
2055 while ((c + 7) < col_max) {
2056 int i;
2057 DECLARE_ALIGNED(16, uint32_t, sads[8]);
2058
2059 fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
2060 sads);
2061
2062 for (i = 0; i < 8; ++i) {
2063 unsigned int sad = sads[i];
2064 if (sad < best_sad) {
2065 const MV mv = { r, c };
2066 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2067 if (sad < best_sad) {
2068 best_sad = sad;
2069 *best_mv = mv;
2070 }
2071 }
2072 ++check_here;
2073 ++c;
2074 }
2075 }
2076 }
2077
2078 if (fn_ptr->sdx3f != NULL) {
2079 while ((c + 2) < col_max) {
2080 int i;
2081 DECLARE_ALIGNED(16, uint32_t, sads[3]);
2082
2083 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
2084 sads);
2085
2086 for (i = 0; i < 3; ++i) {
2087 unsigned int sad = sads[i];
2088 if (sad < best_sad) {
2089 const MV mv = { r, c };
2090 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2091 if (sad < best_sad) {
2092 best_sad = sad;
2093 *best_mv = mv;
2094 }
2095 }
2096 ++check_here;
2097 ++c;
2098 }
2099 }
2100 }
2101
2102 while (c < col_max) {
2103 unsigned int sad =
2104 fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
2105 if (sad < best_sad) {
2106 const MV mv = { r, c };
2107 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2108 if (sad < best_sad) {
2109 best_sad = sad;
2110 *best_mv = mv;
2111 }
2112 }
2113 ++check_here;
2114 ++c;
2115 }
2116 }
2117
2118 return best_sad;
2119}
2120
Yaowu Xuf883b422016-08-30 14:01:10 -07002121int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit,
2122 int search_range,
2123 const aom_variance_fn_ptr_t *fn_ptr,
2124 const MV *center_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002125 const MACROBLOCKD *const xd = &x->e_mbd;
2126 const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2127 const struct buf_2d *const what = &x->plane[0].src;
2128 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2129 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2130 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
2131 unsigned int best_sad =
2132 fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
2133 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2134 int i, j;
2135
2136 for (i = 0; i < search_range; i++) {
2137 int best_site = -1;
2138 const int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
2139 ((ref_mv->row + 1) < x->mv_row_max) &
2140 ((ref_mv->col - 1) > x->mv_col_min) &
2141 ((ref_mv->col + 1) < x->mv_col_max);
2142
2143 if (all_in) {
2144 unsigned int sads[4];
2145 const uint8_t *const positions[4] = { best_address - in_what->stride,
2146 best_address - 1, best_address + 1,
2147 best_address + in_what->stride };
2148
2149 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
2150
2151 for (j = 0; j < 4; ++j) {
2152 if (sads[j] < best_sad) {
2153 const MV mv = { ref_mv->row + neighbors[j].row,
2154 ref_mv->col + neighbors[j].col };
2155 sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2156 if (sads[j] < best_sad) {
2157 best_sad = sads[j];
2158 best_site = j;
2159 }
2160 }
2161 }
2162 } else {
2163 for (j = 0; j < 4; ++j) {
2164 const MV mv = { ref_mv->row + neighbors[j].row,
2165 ref_mv->col + neighbors[j].col };
2166
2167 if (is_mv_in(x, &mv)) {
2168 unsigned int sad =
2169 fn_ptr->sdf(what->buf, what->stride,
2170 get_buf_from_mv(in_what, &mv), in_what->stride);
2171 if (sad < best_sad) {
2172 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2173 if (sad < best_sad) {
2174 best_sad = sad;
2175 best_site = j;
2176 }
2177 }
2178 }
2179 }
2180 }
2181
2182 if (best_site == -1) {
2183 break;
2184 } else {
2185 x->second_best_mv.as_mv = *ref_mv;
2186 ref_mv->row += neighbors[best_site].row;
2187 ref_mv->col += neighbors[best_site].col;
2188 best_address = get_buf_from_mv(in_what, ref_mv);
2189 }
2190 }
2191
2192 return best_sad;
2193}
2194
2195// This function is called when we do joint motion search in comp_inter_inter
2196// mode.
Yaowu Xuf883b422016-08-30 14:01:10 -07002197int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
2198 const aom_variance_fn_ptr_t *fn_ptr,
2199 const MV *center_mv, const uint8_t *second_pred) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002200 const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
2201 { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
2202 const MACROBLOCKD *const xd = &x->e_mbd;
2203 const struct buf_2d *const what = &x->plane[0].src;
2204 const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2205 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2206 MV *best_mv = &x->best_mv.as_mv;
2207 unsigned int best_sad =
2208 fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
2209 in_what->stride, second_pred) +
2210 mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
2211 int i, j;
2212
2213 for (i = 0; i < search_range; ++i) {
2214 int best_site = -1;
2215
2216 for (j = 0; j < 8; ++j) {
2217 const MV mv = { best_mv->row + neighbors[j].row,
2218 best_mv->col + neighbors[j].col };
2219
2220 if (is_mv_in(x, &mv)) {
2221 unsigned int sad =
2222 fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
2223 in_what->stride, second_pred);
2224 if (sad < best_sad) {
2225 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2226 if (sad < best_sad) {
2227 best_sad = sad;
2228 best_site = j;
2229 }
2230 }
2231 }
2232 }
2233
2234 if (best_site == -1) {
2235 break;
2236 } else {
2237 best_mv->row += neighbors[best_site].row;
2238 best_mv->col += neighbors[best_site].col;
2239 }
2240 }
2241 return best_sad;
2242}
2243
2244#define MIN_EX_SEARCH_LIMIT 128
Urvang Joshi52648442016-10-13 17:27:51 -07002245static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002246 const SPEED_FEATURES *const sf = &cpi->sf;
2247 const int max_ex =
Yaowu Xuf883b422016-08-30 14:01:10 -07002248 AOMMAX(MIN_EX_SEARCH_LIMIT,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002249 (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
2250
2251 return sf->allow_exhaustive_searches &&
2252 (sf->exhaustive_searches_thresh < INT_MAX) &&
2253 (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
2254}
2255
Urvang Joshi52648442016-10-13 17:27:51 -07002256int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
Yaowu Xuf883b422016-08-30 14:01:10 -07002257 MV *mvp_full, int step_param, int error_per_bit,
2258 int *cost_list, const MV *ref_mv, int var_max,
2259 int rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002260 const SPEED_FEATURES *const sf = &cpi->sf;
2261 const SEARCH_METHODS method = sf->mv.search_method;
Urvang Joshi52648442016-10-13 17:27:51 -07002262 const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002263 int var = 0;
2264
2265 if (cost_list) {
2266 cost_list[0] = INT_MAX;
2267 cost_list[1] = INT_MAX;
2268 cost_list[2] = INT_MAX;
2269 cost_list[3] = INT_MAX;
2270 cost_list[4] = INT_MAX;
2271 }
2272
2273 // Keep track of number of searches (this frame in this thread).
2274 ++(*x->m_search_count_ptr);
2275
2276 switch (method) {
2277 case FAST_DIAMOND:
2278 var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
2279 cost_list, fn_ptr, 1, ref_mv);
2280 break;
2281 case FAST_HEX:
2282 var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
2283 cost_list, fn_ptr, 1, ref_mv);
2284 break;
2285 case HEX:
Yaowu Xuf883b422016-08-30 14:01:10 -07002286 var = av1_hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2287 fn_ptr, 1, ref_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002288 break;
2289 case SQUARE:
2290 var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2291 fn_ptr, 1, ref_mv);
2292 break;
2293 case BIGDIA:
2294 var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2295 fn_ptr, 1, ref_mv);
2296 break;
2297 case NSTEP:
2298 var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
2299 MAX_MVSEARCH_STEPS - 1 - step_param, 1,
2300 cost_list, fn_ptr, ref_mv);
2301
2302 // Should we allow a follow on exhaustive search?
2303 if (is_exhaustive_allowed(cpi, x)) {
2304 int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
2305 exhuastive_thr >>=
2306 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
2307
2308 // Threshold variance for an exhaustive full search.
2309 if (var > exhuastive_thr) {
2310 int var_ex;
2311 MV tmp_mv_ex;
2312 var_ex =
2313 full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
2314 cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
2315
2316 if (var_ex < var) {
2317 var = var_ex;
2318 x->best_mv.as_mv = tmp_mv_ex;
2319 }
2320 }
2321 }
2322 break;
2323
2324 break;
2325 default: assert(0 && "Invalid search method.");
2326 }
2327
2328 if (method != NSTEP && rd && var < var_max)
Yaowu Xuf883b422016-08-30 14:01:10 -07002329 var = av1_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002330
2331 return var;
2332}
2333
2334#if CONFIG_EXT_INTER
2335/* returns subpixel variance error function */
2336#define DIST(r, c) \
2337 vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, src_stride, \
2338 mask, mask_stride, &sse)
2339
2340/* checks if (r, c) has better score than previous best */
2341
2342#define MVC(r, c) \
2343 (mvcost \
2344 ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + mvcost[0][((r)-rr)] + \
2345 mvcost[1][((c)-rc)]) * \
2346 error_per_bit + \
2347 4096) >> \
2348 13 \
2349 : 0)
2350
2351#define CHECK_BETTER(v, r, c) \
2352 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
2353 thismse = (DIST(r, c)); \
2354 if ((v = MVC(r, c) + thismse) < besterr) { \
2355 besterr = v; \
2356 br = r; \
2357 bc = c; \
2358 *distortion = thismse; \
2359 *sse1 = sse; \
2360 } \
2361 } else { \
2362 v = INT_MAX; \
2363 }
2364
2365#undef CHECK_BETTER0
2366#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
2367
2368#undef CHECK_BETTER1
2369#define CHECK_BETTER1(v, r, c) \
2370 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
2371 thismse = upsampled_masked_pref_error(xd, mask, mask_stride, vfp, z, \
2372 src_stride, upre(y, y_stride, r, c), \
2373 y_stride, w, h, &sse); \
2374 if ((v = MVC(r, c) + thismse) < besterr) { \
2375 besterr = v; \
2376 br = r; \
2377 bc = c; \
2378 *distortion = thismse; \
2379 *sse1 = sse; \
2380 } \
2381 } else { \
2382 v = INT_MAX; \
2383 }
2384
Yaowu Xuf883b422016-08-30 14:01:10 -07002385int av1_find_best_masked_sub_pixel_tree(
Yaowu Xuc27fc142016-08-22 16:08:15 -07002386 const MACROBLOCK *x, const uint8_t *mask, int mask_stride, MV *bestmv,
2387 const MV *ref_mv, int allow_hp, int error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -07002388 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002389 int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
2390 int is_second) {
2391 const uint8_t *const z = x->plane[0].src.buf;
2392 const int src_stride = x->plane[0].src.stride;
2393 const MACROBLOCKD *xd = &x->e_mbd;
2394 unsigned int besterr = INT_MAX;
2395 unsigned int sse;
2396 int thismse;
2397 unsigned int whichdir;
2398 unsigned int halfiters = iters_per_step;
2399 unsigned int quarteriters = iters_per_step;
2400 unsigned int eighthiters = iters_per_step;
2401
2402 const int y_stride = xd->plane[0].pre[is_second].stride;
2403 const int offset = bestmv->row * y_stride + bestmv->col;
2404 const uint8_t *const y = xd->plane[0].pre[is_second].buf;
2405
2406 int rr = ref_mv->row;
2407 int rc = ref_mv->col;
2408 int br = bestmv->row * 8;
2409 int bc = bestmv->col * 8;
2410 int hstep = 4;
Yaowu Xuf883b422016-08-30 14:01:10 -07002411 const int minc = AOMMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
2412 const int maxc = AOMMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
2413 const int minr = AOMMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
2414 const int maxr = AOMMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002415
2416 int tr = br;
2417 int tc = bc;
2418
2419 // central mv
2420 bestmv->row *= 8;
2421 bestmv->col *= 8;
2422
2423 // calculate central point error
2424 besterr =
2425 vfp->mvf(y + offset, y_stride, z, src_stride, mask, mask_stride, sse1);
2426 *distortion = besterr;
2427 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
2428
2429 // 1/2 pel
2430 FIRST_LEVEL_CHECKS;
2431 if (halfiters > 1) {
2432 SECOND_LEVEL_CHECKS;
2433 }
2434 tr = br;
2435 tc = bc;
2436
2437 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
2438 if (forced_stop != 2) {
2439 hstep >>= 1;
2440 FIRST_LEVEL_CHECKS;
2441 if (quarteriters > 1) {
2442 SECOND_LEVEL_CHECKS;
2443 }
2444 tr = br;
2445 tc = bc;
2446 }
2447
Alex Converse6317c882016-09-29 14:21:37 -07002448 if (allow_hp && forced_stop == 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002449 hstep >>= 1;
2450 FIRST_LEVEL_CHECKS;
2451 if (eighthiters > 1) {
2452 SECOND_LEVEL_CHECKS;
2453 }
2454 tr = br;
2455 tc = bc;
2456 }
2457 // These lines insure static analysis doesn't warn that
2458 // tr and tc aren't used after the above point.
2459 (void)tr;
2460 (void)tc;
2461
2462 bestmv->row = br;
2463 bestmv->col = bc;
2464
2465 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
2466 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
2467 return INT_MAX;
2468
2469 return besterr;
2470}
2471
2472static unsigned int setup_masked_center_error(
2473 const uint8_t *mask, int mask_stride, const MV *bestmv, const MV *ref_mv,
Yaowu Xuf883b422016-08-30 14:01:10 -07002474 int error_per_bit, const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002475 const uint8_t *const src, const int src_stride, const uint8_t *const y,
2476 int y_stride, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
2477 int *distortion) {
2478 unsigned int besterr;
2479 besterr =
2480 vfp->mvf(y + offset, y_stride, src, src_stride, mask, mask_stride, sse1);
2481 *distortion = besterr;
2482 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
2483 return besterr;
2484}
2485
2486static int upsampled_masked_pref_error(const MACROBLOCKD *xd,
2487 const uint8_t *mask, int mask_stride,
Yaowu Xuf883b422016-08-30 14:01:10 -07002488 const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002489 const uint8_t *const src,
2490 const int src_stride,
2491 const uint8_t *const y, int y_stride,
2492 int w, int h, unsigned int *sse) {
2493 unsigned int besterr;
Yaowu Xuf883b422016-08-30 14:01:10 -07002494#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002495 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2496 DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
Yaowu Xuf883b422016-08-30 14:01:10 -07002497 aom_highbd_upsampled_pred(pred16, w, h, y, y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002498
2499 besterr = vfp->mvf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, mask,
2500 mask_stride, sse);
2501 } else {
2502 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
2503#else
2504 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
2505 (void)xd;
Yaowu Xuf883b422016-08-30 14:01:10 -07002506#endif // CONFIG_AOM_HIGHBITDEPTH
2507 aom_upsampled_pred(pred, w, h, y, y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002508
2509 besterr = vfp->mvf(pred, w, src, src_stride, mask, mask_stride, sse);
Yaowu Xuf883b422016-08-30 14:01:10 -07002510#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002511 }
2512#endif
2513 return besterr;
2514}
2515
2516static unsigned int upsampled_setup_masked_center_error(
2517 const MACROBLOCKD *xd, const uint8_t *mask, int mask_stride,
2518 const MV *bestmv, const MV *ref_mv, int error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -07002519 const aom_variance_fn_ptr_t *vfp, const uint8_t *const src,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002520 const int src_stride, const uint8_t *const y, int y_stride, int w, int h,
2521 int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
2522 int *distortion) {
2523 unsigned int besterr =
2524 upsampled_masked_pref_error(xd, mask, mask_stride, vfp, src, src_stride,
2525 y + offset, y_stride, w, h, sse1);
2526 *distortion = besterr;
2527 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
2528 return besterr;
2529}
2530
Yaowu Xuf883b422016-08-30 14:01:10 -07002531int av1_find_best_masked_sub_pixel_tree_up(
Urvang Joshi52648442016-10-13 17:27:51 -07002532 const AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002533 int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
Yaowu Xuf883b422016-08-30 14:01:10 -07002534 int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002535 int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
2536 unsigned int *sse1, int is_second, int use_upsampled_ref) {
2537 const uint8_t *const z = x->plane[0].src.buf;
2538 const uint8_t *const src_address = z;
2539 const int src_stride = x->plane[0].src.stride;
2540 MACROBLOCKD *xd = &x->e_mbd;
2541 struct macroblockd_plane *const pd = &xd->plane[0];
2542 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2543 unsigned int besterr = INT_MAX;
2544 unsigned int sse;
2545 unsigned int thismse;
2546
2547 int rr = ref_mv->row;
2548 int rc = ref_mv->col;
2549 int br = bestmv->row * 8;
2550 int bc = bestmv->col * 8;
2551 int hstep = 4;
2552 int iter;
2553 int round = 3 - forced_stop;
Yaowu Xuf883b422016-08-30 14:01:10 -07002554 const int minc = AOMMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
2555 const int maxc = AOMMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
2556 const int minr = AOMMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
2557 const int maxr = AOMMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002558 int tr = br;
2559 int tc = bc;
2560 const MV *search_step = search_step_table;
2561 int idx, best_idx = -1;
2562 unsigned int cost_array[5];
2563 int kr, kc;
Jingning Hanae5cfde2016-11-30 12:01:44 -08002564 const int w = block_size_wide[mbmi->sb_type];
2565 const int h = block_size_high[mbmi->sb_type];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002566 int offset;
2567 int y_stride;
2568 const uint8_t *y;
2569
2570 const struct buf_2d backup_pred = pd->pre[is_second];
2571 if (use_upsampled_ref) {
2572 int ref = xd->mi[0]->mbmi.ref_frame[is_second];
2573 const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
2574 setup_pred_plane(&pd->pre[is_second], upsampled_ref->y_buffer,
2575 upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
2576 upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
2577 NULL, pd->subsampling_x, pd->subsampling_y);
2578 }
2579 y = pd->pre[is_second].buf;
2580 y_stride = pd->pre[is_second].stride;
2581 offset = bestmv->row * y_stride + bestmv->col;
2582
Alex Converse6317c882016-09-29 14:21:37 -07002583 if (!allow_hp)
Yaowu Xuc27fc142016-08-22 16:08:15 -07002584 if (round == 3) round = 2;
2585
2586 bestmv->row *= 8;
2587 bestmv->col *= 8;
2588
2589 // use_upsampled_ref can be 0 or 1
2590 if (use_upsampled_ref)
2591 besterr = upsampled_setup_masked_center_error(
2592 xd, mask, mask_stride, bestmv, ref_mv, error_per_bit, vfp, z,
Debargha Mukherjee1ae9f2c2016-10-04 14:30:16 -07002593 src_stride, y, y_stride, w, h, (offset * 8), mvjcost, mvcost, sse1,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002594 distortion);
2595 else
2596 besterr = setup_masked_center_error(
2597 mask, mask_stride, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y,
2598 y_stride, offset, mvjcost, mvcost, sse1, distortion);
2599
2600 for (iter = 0; iter < round; ++iter) {
2601 // Check vertical and horizontal sub-pixel positions.
2602 for (idx = 0; idx < 4; ++idx) {
2603 tr = br + search_step[idx].row;
2604 tc = bc + search_step[idx].col;
2605 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
2606 MV this_mv = { tr, tc };
2607
2608 if (use_upsampled_ref) {
2609 const uint8_t *const pre_address = y + tr * y_stride + tc;
2610
2611 thismse = upsampled_masked_pref_error(
2612 xd, mask, mask_stride, vfp, src_address, src_stride, pre_address,
2613 y_stride, w, h, &sse);
2614 } else {
2615 const uint8_t *const pre_address =
2616 y + (tr >> 3) * y_stride + (tc >> 3);
2617 thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
2618 src_address, src_stride, mask, mask_stride, &sse);
2619 }
2620
2621 cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
2622 mvcost, error_per_bit);
2623
2624 if (cost_array[idx] < besterr) {
2625 best_idx = idx;
2626 besterr = cost_array[idx];
2627 *distortion = thismse;
2628 *sse1 = sse;
2629 }
2630 } else {
2631 cost_array[idx] = INT_MAX;
2632 }
2633 }
2634
2635 // Check diagonal sub-pixel position
2636 kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
2637 kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
2638
2639 tc = bc + kc;
2640 tr = br + kr;
2641 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
2642 MV this_mv = { tr, tc };
2643
2644 if (use_upsampled_ref) {
2645 const uint8_t *const pre_address = y + tr * y_stride + tc;
2646
2647 thismse = upsampled_masked_pref_error(
2648 xd, mask, mask_stride, vfp, src_address, src_stride, pre_address,
2649 y_stride, w, h, &sse);
2650 } else {
2651 const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
2652
2653 thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr), src_address,
2654 src_stride, mask, mask_stride, &sse);
2655 }
2656
2657 cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
2658 error_per_bit);
2659
2660 if (cost_array[4] < besterr) {
2661 best_idx = 4;
2662 besterr = cost_array[4];
2663 *distortion = thismse;
2664 *sse1 = sse;
2665 }
2666 } else {
2667 cost_array[idx] = INT_MAX;
2668 }
2669
2670 if (best_idx < 4 && best_idx >= 0) {
2671 br += search_step[best_idx].row;
2672 bc += search_step[best_idx].col;
2673 } else if (best_idx == 4) {
2674 br = tr;
2675 bc = tc;
2676 }
2677
2678 if (iters_per_step > 1 && best_idx != -1) {
2679 if (use_upsampled_ref) {
2680 SECOND_LEVEL_CHECKS_BEST(1);
2681 } else {
2682 SECOND_LEVEL_CHECKS_BEST(0);
2683 }
2684 }
2685
2686 tr = br;
2687 tc = bc;
2688
2689 search_step += 4;
2690 hstep >>= 1;
2691 best_idx = -1;
2692 }
2693
2694 // These lines insure static analysis doesn't warn that
2695 // tr and tc aren't used after the above point.
2696 (void)tr;
2697 (void)tc;
2698
2699 bestmv->row = br;
2700 bestmv->col = bc;
2701
2702 if (use_upsampled_ref) {
2703 pd->pre[is_second] = backup_pred;
2704 }
2705
2706 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
2707 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
2708 return INT_MAX;
2709
2710 return besterr;
2711}
2712
2713#undef DIST
2714#undef MVC
2715#undef CHECK_BETTER
2716
2717static int get_masked_mvpred_var(const MACROBLOCK *x, const uint8_t *mask,
2718 int mask_stride, const MV *best_mv,
2719 const MV *center_mv,
Yaowu Xuf883b422016-08-30 14:01:10 -07002720 const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002721 int use_mvcost, int is_second) {
2722 const MACROBLOCKD *const xd = &x->e_mbd;
2723 const struct buf_2d *const what = &x->plane[0].src;
2724 const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2725 const MV mv = { best_mv->row * 8, best_mv->col * 8 };
2726 unsigned int unused;
2727
2728 return vfp->mvf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
2729 in_what->stride, mask, mask_stride, &unused) +
2730 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
2731 x->errorperbit)
2732 : 0);
2733}
2734
2735int masked_refining_search_sad(const MACROBLOCK *x, const uint8_t *mask,
2736 int mask_stride, MV *ref_mv, int error_per_bit,
2737 int search_range,
Yaowu Xuf883b422016-08-30 14:01:10 -07002738 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002739 const MV *center_mv, int is_second) {
2740 const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2741 const MACROBLOCKD *const xd = &x->e_mbd;
2742 const struct buf_2d *const what = &x->plane[0].src;
2743 const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2744 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2745 unsigned int best_sad =
2746 fn_ptr->msdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2747 in_what->stride, mask, mask_stride) +
2748 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2749 int i, j;
2750
2751 for (i = 0; i < search_range; i++) {
2752 int best_site = -1;
2753
2754 for (j = 0; j < 4; j++) {
2755 const MV mv = { ref_mv->row + neighbors[j].row,
2756 ref_mv->col + neighbors[j].col };
2757 if (is_mv_in(x, &mv)) {
2758 unsigned int sad =
2759 fn_ptr->msdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
2760 in_what->stride, mask, mask_stride);
2761 if (sad < best_sad) {
2762 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2763 if (sad < best_sad) {
2764 best_sad = sad;
2765 best_site = j;
2766 }
2767 }
2768 }
2769 }
2770
2771 if (best_site == -1) {
2772 break;
2773 } else {
2774 ref_mv->row += neighbors[best_site].row;
2775 ref_mv->col += neighbors[best_site].col;
2776 }
2777 }
2778 return best_sad;
2779}
2780
2781int masked_diamond_search_sad(const MACROBLOCK *x,
2782 const search_site_config *cfg,
2783 const uint8_t *mask, int mask_stride, MV *ref_mv,
2784 MV *best_mv, int search_param, int sad_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -07002785 int *num00, const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002786 const MV *center_mv, int is_second) {
2787 const MACROBLOCKD *const xd = &x->e_mbd;
2788 const struct buf_2d *const what = &x->plane[0].src;
2789 const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2790 // search_param determines the length of the initial step and hence the number
2791 // of iterations
2792 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
2793 // (MAX_FIRST_STEP/4) pel... etc.
2794 const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
2795 const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
2796 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2797 const uint8_t *best_address, *in_what_ref;
2798 int best_sad = INT_MAX;
2799 int best_site = 0;
2800 int last_site = 0;
2801 int i, j, step;
2802
2803 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
2804 in_what_ref = get_buf_from_mv(in_what, ref_mv);
2805 best_address = in_what_ref;
2806 *num00 = 0;
2807 *best_mv = *ref_mv;
2808
2809 // Check the starting position
2810 best_sad = fn_ptr->msdf(what->buf, what->stride, best_address,
2811 in_what->stride, mask, mask_stride) +
2812 mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
2813
2814 i = 1;
2815
2816 for (step = 0; step < tot_steps; step++) {
2817 for (j = 0; j < cfg->searches_per_step; j++) {
2818 const MV mv = { best_mv->row + ss[i].mv.row,
2819 best_mv->col + ss[i].mv.col };
2820 if (is_mv_in(x, &mv)) {
2821 int sad =
2822 fn_ptr->msdf(what->buf, what->stride, best_address + ss[i].offset,
2823 in_what->stride, mask, mask_stride);
2824 if (sad < best_sad) {
2825 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2826 if (sad < best_sad) {
2827 best_sad = sad;
2828 best_site = i;
2829 }
2830 }
2831 }
2832
2833 i++;
2834 }
2835
2836 if (best_site != last_site) {
2837 best_mv->row += ss[best_site].mv.row;
2838 best_mv->col += ss[best_site].mv.col;
2839 best_address += ss[best_site].offset;
2840 last_site = best_site;
2841#if defined(NEW_DIAMOND_SEARCH)
2842 while (1) {
2843 const MV this_mv = { best_mv->row + ss[best_site].mv.row,
2844 best_mv->col + ss[best_site].mv.col };
2845 if (is_mv_in(x, &this_mv)) {
2846 int sad = fn_ptr->msdf(what->buf, what->stride,
2847 best_address + ss[best_site].offset,
2848 in_what->stride, mask, mask_stride);
2849 if (sad < best_sad) {
2850 sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
2851 if (sad < best_sad) {
2852 best_sad = sad;
2853 best_mv->row += ss[best_site].mv.row;
2854 best_mv->col += ss[best_site].mv.col;
2855 best_address += ss[best_site].offset;
2856 continue;
2857 }
2858 }
2859 }
2860 break;
2861 }
2862#endif
2863 } else if (best_address == in_what_ref) {
2864 (*num00)++;
2865 }
2866 }
2867 return best_sad;
2868}
2869
Yaowu Xuf883b422016-08-30 14:01:10 -07002870int av1_masked_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
2871 const uint8_t *mask, int mask_stride,
2872 MV *mvp_full, int step_param, int sadpb,
2873 int further_steps, int do_refine,
2874 const aom_variance_fn_ptr_t *fn_ptr,
2875 const MV *ref_mv, MV *dst_mv, int is_second) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002876 MV temp_mv;
2877 int thissme, n, num00 = 0;
2878 int bestsme = masked_diamond_search_sad(x, &cpi->ss_cfg, mask, mask_stride,
2879 mvp_full, &temp_mv, step_param, sadpb,
2880 &n, fn_ptr, ref_mv, is_second);
2881 if (bestsme < INT_MAX)
2882 bestsme = get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv,
2883 fn_ptr, 1, is_second);
2884 *dst_mv = temp_mv;
2885
2886 // If there won't be more n-step search, check to see if refining search is
2887 // needed.
2888 if (n > further_steps) do_refine = 0;
2889
2890 while (n < further_steps) {
2891 ++n;
2892
2893 if (num00) {
2894 num00--;
2895 } else {
2896 thissme = masked_diamond_search_sad(
2897 x, &cpi->ss_cfg, mask, mask_stride, mvp_full, &temp_mv,
2898 step_param + n, sadpb, &num00, fn_ptr, ref_mv, is_second);
2899 if (thissme < INT_MAX)
2900 thissme = get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv,
2901 fn_ptr, 1, is_second);
2902
2903 // check to see if refining search is needed.
2904 if (num00 > further_steps - n) do_refine = 0;
2905
2906 if (thissme < bestsme) {
2907 bestsme = thissme;
2908 *dst_mv = temp_mv;
2909 }
2910 }
2911 }
2912
2913 // final 1-away diamond refining search
2914 if (do_refine) {
2915 const int search_range = 8;
2916 MV best_mv = *dst_mv;
2917 thissme =
2918 masked_refining_search_sad(x, mask, mask_stride, &best_mv, sadpb,
2919 search_range, fn_ptr, ref_mv, is_second);
2920 if (thissme < INT_MAX)
2921 thissme = get_masked_mvpred_var(x, mask, mask_stride, &best_mv, ref_mv,
2922 fn_ptr, 1, is_second);
2923 if (thissme < bestsme) {
2924 bestsme = thissme;
2925 *dst_mv = best_mv;
2926 }
2927 }
2928 return bestsme;
2929}
2930#endif // CONFIG_EXT_INTER
2931
Yue Chencb60b182016-10-13 15:18:22 -07002932#if CONFIG_MOTION_VAR
Yaowu Xuc27fc142016-08-22 16:08:15 -07002933/* returns subpixel variance error function */
2934#define DIST(r, c) \
2935 vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
2936
2937/* checks if (r, c) has better score than previous best */
2938#define MVC(r, c) \
2939 (mvcost \
2940 ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + mvcost[0][((r)-rr)] + \
2941 mvcost[1][((c)-rc)]) * \
2942 error_per_bit + \
2943 4096) >> \
2944 13 \
2945 : 0)
2946
2947#define CHECK_BETTER(v, r, c) \
2948 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
2949 thismse = (DIST(r, c)); \
2950 if ((v = MVC(r, c) + thismse) < besterr) { \
2951 besterr = v; \
2952 br = r; \
2953 bc = c; \
2954 *distortion = thismse; \
2955 *sse1 = sse; \
2956 } \
2957 } else { \
2958 v = INT_MAX; \
2959 }
2960
2961#undef CHECK_BETTER0
2962#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
2963
2964#undef CHECK_BETTER1
2965#define CHECK_BETTER1(v, r, c) \
2966 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
2967 thismse = upsampled_obmc_pref_error( \
2968 xd, mask, vfp, z, upre(y, y_stride, r, c), y_stride, w, h, &sse); \
2969 if ((v = MVC(r, c) + thismse) < besterr) { \
2970 besterr = v; \
2971 br = r; \
2972 bc = c; \
2973 *distortion = thismse; \
2974 *sse1 = sse; \
2975 } \
2976 } else { \
2977 v = INT_MAX; \
2978 }
2979
2980static unsigned int setup_obmc_center_error(
2981 const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
Yaowu Xuf883b422016-08-30 14:01:10 -07002982 const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002983 const uint8_t *const y, int y_stride, int offset, int *mvjcost,
2984 int *mvcost[2], unsigned int *sse1, int *distortion) {
2985 unsigned int besterr;
2986 besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
2987 *distortion = besterr;
2988 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
2989 return besterr;
2990}
2991
2992static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, const int32_t *mask,
Yaowu Xuf883b422016-08-30 14:01:10 -07002993 const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002994 const int32_t *const wsrc,
2995 const uint8_t *const y, int y_stride,
2996 int w, int h, unsigned int *sse) {
2997 unsigned int besterr;
Yaowu Xuf883b422016-08-30 14:01:10 -07002998#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002999 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3000 DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
Yaowu Xuf883b422016-08-30 14:01:10 -07003001 aom_highbd_upsampled_pred(pred16, w, h, y, y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003002
3003 besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, mask, sse);
3004 } else {
3005 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
3006#else
3007 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
3008 (void)xd;
Yaowu Xuf883b422016-08-30 14:01:10 -07003009#endif // CONFIG_AOM_HIGHBITDEPTH
3010 aom_upsampled_pred(pred, w, h, y, y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003011
3012 besterr = vfp->ovf(pred, w, wsrc, mask, sse);
Yaowu Xuf883b422016-08-30 14:01:10 -07003013#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003014 }
3015#endif
3016 return besterr;
3017}
3018
3019static unsigned int upsampled_setup_obmc_center_error(
3020 const MACROBLOCKD *xd, const int32_t *mask, const MV *bestmv,
Yaowu Xuf883b422016-08-30 14:01:10 -07003021 const MV *ref_mv, int error_per_bit, const aom_variance_fn_ptr_t *vfp,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003022 const int32_t *const wsrc, const uint8_t *const y, int y_stride, int w,
3023 int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
3024 int *distortion) {
3025 unsigned int besterr = upsampled_obmc_pref_error(
3026 xd, mask, vfp, wsrc, y + offset, y_stride, w, h, sse1);
3027 *distortion = besterr;
3028 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
3029 return besterr;
3030}
3031
Yaowu Xuf883b422016-08-30 14:01:10 -07003032int av1_find_best_obmc_sub_pixel_tree_up(
Urvang Joshi52648442016-10-13 17:27:51 -07003033 const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
Yue Chene9638cc2016-10-10 12:37:54 -07003034 const MV *ref_mv, int allow_hp, int error_per_bit,
3035 const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
3036 int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
3037 int is_second, int use_upsampled_ref) {
3038 const int32_t *wsrc = x->wsrc_buf;
3039 const int32_t *mask = x->mask_buf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003040 const int *const z = wsrc;
3041 const int *const src_address = z;
3042 MACROBLOCKD *xd = &x->e_mbd;
3043 struct macroblockd_plane *const pd = &xd->plane[0];
3044 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3045 unsigned int besterr = INT_MAX;
3046 unsigned int sse;
3047 unsigned int thismse;
3048
3049 int rr = ref_mv->row;
3050 int rc = ref_mv->col;
3051 int br = bestmv->row * 8;
3052 int bc = bestmv->col * 8;
3053 int hstep = 4;
3054 int iter;
3055 int round = 3 - forced_stop;
Yaowu Xuf883b422016-08-30 14:01:10 -07003056 const int minc = AOMMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
3057 const int maxc = AOMMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
3058 const int minr = AOMMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
3059 const int maxr = AOMMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003060 int tr = br;
3061 int tc = bc;
3062 const MV *search_step = search_step_table;
3063 int idx, best_idx = -1;
3064 unsigned int cost_array[5];
3065 int kr, kc;
Jingning Hanae5cfde2016-11-30 12:01:44 -08003066 const int w = block_size_wide[mbmi->sb_type];
3067 const int h = block_size_high[mbmi->sb_type];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003068 int offset;
3069 int y_stride;
3070 const uint8_t *y;
3071
3072 const struct buf_2d backup_pred = pd->pre[is_second];
3073 if (use_upsampled_ref) {
3074 int ref = xd->mi[0]->mbmi.ref_frame[is_second];
3075 const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
3076 setup_pred_plane(&pd->pre[is_second], upsampled_ref->y_buffer,
3077 upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
3078 upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
3079 NULL, pd->subsampling_x, pd->subsampling_y);
3080 }
3081 y = pd->pre[is_second].buf;
3082 y_stride = pd->pre[is_second].stride;
3083 offset = bestmv->row * y_stride + bestmv->col;
3084
Alex Converse6317c882016-09-29 14:21:37 -07003085 if (!allow_hp)
Yaowu Xuc27fc142016-08-22 16:08:15 -07003086 if (round == 3) round = 2;
3087
3088 bestmv->row *= 8;
3089 bestmv->col *= 8;
3090 // use_upsampled_ref can be 0 or 1
3091 if (use_upsampled_ref)
3092 besterr = upsampled_setup_obmc_center_error(
3093 xd, mask, bestmv, ref_mv, error_per_bit, vfp, z, y, y_stride, w, h,
Debargha Mukherjee1ae9f2c2016-10-04 14:30:16 -07003094 (offset * 8), mvjcost, mvcost, sse1, distortion);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003095 else
3096 besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
3097 z, y, y_stride, offset, mvjcost, mvcost,
3098 sse1, distortion);
3099
3100 for (iter = 0; iter < round; ++iter) {
3101 // Check vertical and horizontal sub-pixel positions.
3102 for (idx = 0; idx < 4; ++idx) {
3103 tr = br + search_step[idx].row;
3104 tc = bc + search_step[idx].col;
3105 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
3106 MV this_mv = { tr, tc };
3107
3108 if (use_upsampled_ref) {
3109 const uint8_t *const pre_address = y + tr * y_stride + tc;
3110
3111 thismse = upsampled_obmc_pref_error(
3112 xd, mask, vfp, src_address, pre_address, y_stride, w, h, &sse);
3113 } else {
3114 const uint8_t *const pre_address =
3115 y + (tr >> 3) * y_stride + (tc >> 3);
3116 thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
3117 src_address, mask, &sse);
3118 }
3119
3120 cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
3121 mvcost, error_per_bit);
3122 if (cost_array[idx] < besterr) {
3123 best_idx = idx;
3124 besterr = cost_array[idx];
3125 *distortion = thismse;
3126 *sse1 = sse;
3127 }
3128 } else {
3129 cost_array[idx] = INT_MAX;
3130 }
3131 }
3132
3133 // Check diagonal sub-pixel position
3134 kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
3135 kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
3136
3137 tc = bc + kc;
3138 tr = br + kr;
3139 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
3140 MV this_mv = { tr, tc };
3141
3142 if (use_upsampled_ref) {
3143 const uint8_t *const pre_address = y + tr * y_stride + tc;
3144
3145 thismse = upsampled_obmc_pref_error(xd, mask, vfp, src_address,
3146 pre_address, y_stride, w, h, &sse);
3147 } else {
3148 const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
3149
3150 thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr), src_address,
3151 mask, &sse);
3152 }
3153
3154 cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
3155 error_per_bit);
3156
3157 if (cost_array[4] < besterr) {
3158 best_idx = 4;
3159 besterr = cost_array[4];
3160 *distortion = thismse;
3161 *sse1 = sse;
3162 }
3163 } else {
3164 cost_array[idx] = INT_MAX;
3165 }
3166
3167 if (best_idx < 4 && best_idx >= 0) {
3168 br += search_step[best_idx].row;
3169 bc += search_step[best_idx].col;
3170 } else if (best_idx == 4) {
3171 br = tr;
3172 bc = tc;
3173 }
3174
3175 if (iters_per_step > 1 && best_idx != -1) {
3176 if (use_upsampled_ref) {
3177 SECOND_LEVEL_CHECKS_BEST(1);
3178 } else {
3179 SECOND_LEVEL_CHECKS_BEST(0);
3180 }
3181 }
3182
3183 tr = br;
3184 tc = bc;
3185
3186 search_step += 4;
3187 hstep >>= 1;
3188 best_idx = -1;
3189 }
3190
3191 // These lines insure static analysis doesn't warn that
3192 // tr and tc aren't used after the above point.
3193 (void)tr;
3194 (void)tc;
3195
3196 bestmv->row = br;
3197 bestmv->col = bc;
3198
3199 if (use_upsampled_ref) {
3200 pd->pre[is_second] = backup_pred;
3201 }
3202
3203 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
3204 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
3205 return INT_MAX;
3206
3207 return besterr;
3208}
3209
3210#undef DIST
3211#undef MVC
3212#undef CHECK_BETTER
3213
3214static int get_obmc_mvpred_var(const MACROBLOCK *x, const int32_t *wsrc,
3215 const int32_t *mask, const MV *best_mv,
3216 const MV *center_mv,
Yaowu Xuf883b422016-08-30 14:01:10 -07003217 const aom_variance_fn_ptr_t *vfp, int use_mvcost,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003218 int is_second) {
3219 const MACROBLOCKD *const xd = &x->e_mbd;
3220 const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
3221 const MV mv = { best_mv->row * 8, best_mv->col * 8 };
3222 unsigned int unused;
3223
3224 return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride, wsrc,
3225 mask, &unused) +
3226 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
3227 x->errorperbit)
3228 : 0);
3229}
3230
3231int obmc_refining_search_sad(const MACROBLOCK *x, const int32_t *wsrc,
3232 const int32_t *mask, MV *ref_mv, int error_per_bit,
3233 int search_range,
Yaowu Xuf883b422016-08-30 14:01:10 -07003234 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003235 const MV *center_mv, int is_second) {
3236 const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
3237 const MACROBLOCKD *const xd = &x->e_mbd;
3238 const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
3239 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
3240 unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
3241 in_what->stride, wsrc, mask) +
3242 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
3243 int i, j;
3244
3245 for (i = 0; i < search_range; i++) {
3246 int best_site = -1;
3247
3248 for (j = 0; j < 4; j++) {
3249 const MV mv = { ref_mv->row + neighbors[j].row,
3250 ref_mv->col + neighbors[j].col };
3251 if (is_mv_in(x, &mv)) {
3252 unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
3253 in_what->stride, wsrc, mask);
3254 if (sad < best_sad) {
3255 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
3256 if (sad < best_sad) {
3257 best_sad = sad;
3258 best_site = j;
3259 }
3260 }
3261 }
3262 }
3263
3264 if (best_site == -1) {
3265 break;
3266 } else {
3267 ref_mv->row += neighbors[best_site].row;
3268 ref_mv->col += neighbors[best_site].col;
3269 }
3270 }
3271 return best_sad;
3272}
3273
3274int obmc_diamond_search_sad(const MACROBLOCK *x, const search_site_config *cfg,
3275 const int32_t *wsrc, const int32_t *mask,
3276 MV *ref_mv, MV *best_mv, int search_param,
3277 int sad_per_bit, int *num00,
Yaowu Xuf883b422016-08-30 14:01:10 -07003278 const aom_variance_fn_ptr_t *fn_ptr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003279 const MV *center_mv, int is_second) {
3280 const MACROBLOCKD *const xd = &x->e_mbd;
3281 const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
3282 // search_param determines the length of the initial step and hence the number
3283 // of iterations
3284 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
3285 // (MAX_FIRST_STEP/4) pel... etc.
3286 const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
3287 const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
3288 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
3289 const uint8_t *best_address, *in_what_ref;
3290 int best_sad = INT_MAX;
3291 int best_site = 0;
3292 int last_site = 0;
3293 int i, j, step;
3294
3295 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
3296 in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col;
3297 best_address = in_what_ref;
3298 *num00 = 0;
3299 *best_mv = *ref_mv;
3300
3301 // Check the starting position
3302 best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) +
3303 mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
3304
3305 i = 1;
3306
3307 for (step = 0; step < tot_steps; step++) {
3308 for (j = 0; j < cfg->searches_per_step; j++) {
3309 const MV mv = { best_mv->row + ss[i].mv.row,
3310 best_mv->col + ss[i].mv.col };
3311 if (is_mv_in(x, &mv)) {
3312 int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
3313 wsrc, mask);
3314 if (sad < best_sad) {
3315 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
3316 if (sad < best_sad) {
3317 best_sad = sad;
3318 best_site = i;
3319 }
3320 }
3321 }
3322
3323 i++;
3324 }
3325
3326 if (best_site != last_site) {
3327 best_mv->row += ss[best_site].mv.row;
3328 best_mv->col += ss[best_site].mv.col;
3329 best_address += ss[best_site].offset;
3330 last_site = best_site;
3331#if defined(NEW_DIAMOND_SEARCH)
3332 while (1) {
3333 const MV this_mv = { best_mv->row + ss[best_site].mv.row,
3334 best_mv->col + ss[best_site].mv.col };
3335 if (is_mv_in(x, &this_mv)) {
3336 int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
3337 in_what->stride, wsrc, mask);
3338 if (sad < best_sad) {
3339 sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
3340 if (sad < best_sad) {
3341 best_sad = sad;
3342 best_mv->row += ss[best_site].mv.row;
3343 best_mv->col += ss[best_site].mv.col;
3344 best_address += ss[best_site].offset;
3345 continue;
3346 }
3347 }
3348 }
3349 break;
3350 }
3351#endif
3352 } else if (best_address == in_what_ref) {
3353 (*num00)++;
3354 }
3355 }
3356 return best_sad;
3357}
3358
Yaowu Xuf883b422016-08-30 14:01:10 -07003359int av1_obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
Yaowu Xuf883b422016-08-30 14:01:10 -07003360 MV *mvp_full, int step_param, int sadpb,
3361 int further_steps, int do_refine,
3362 const aom_variance_fn_ptr_t *fn_ptr,
3363 const MV *ref_mv, MV *dst_mv, int is_second) {
Yue Chene9638cc2016-10-10 12:37:54 -07003364 const int32_t *wsrc = x->wsrc_buf;
3365 const int32_t *mask = x->mask_buf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003366 MV temp_mv;
3367 int thissme, n, num00 = 0;
3368 int bestsme =
3369 obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full, &temp_mv,
3370 step_param, sadpb, &n, fn_ptr, ref_mv, is_second);
3371 if (bestsme < INT_MAX)
3372 bestsme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1,
3373 is_second);
3374 *dst_mv = temp_mv;
3375
3376 // If there won't be more n-step search, check to see if refining search is
3377 // needed.
3378 if (n > further_steps) do_refine = 0;
3379
3380 while (n < further_steps) {
3381 ++n;
3382
3383 if (num00) {
3384 num00--;
3385 } else {
3386 thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full,
3387 &temp_mv, step_param + n, sadpb, &num00,
3388 fn_ptr, ref_mv, is_second);
3389 if (thissme < INT_MAX)
3390 thissme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr,
3391 1, is_second);
3392
3393 // check to see if refining search is needed.
3394 if (num00 > further_steps - n) do_refine = 0;
3395
3396 if (thissme < bestsme) {
3397 bestsme = thissme;
3398 *dst_mv = temp_mv;
3399 }
3400 }
3401 }
3402
3403 // final 1-away diamond refining search
3404 if (do_refine) {
3405 const int search_range = 8;
3406 MV best_mv = *dst_mv;
3407 thissme = obmc_refining_search_sad(x, wsrc, mask, &best_mv, sadpb,
3408 search_range, fn_ptr, ref_mv, is_second);
3409 if (thissme < INT_MAX)
3410 thissme = get_obmc_mvpred_var(x, wsrc, mask, &best_mv, ref_mv, fn_ptr, 1,
3411 is_second);
3412 if (thissme < bestsme) {
3413 bestsme = thissme;
3414 *dst_mv = best_mv;
3415 }
3416 }
3417 return bestsme;
3418}
Yue Chencb60b182016-10-13 15:18:22 -07003419#endif // CONFIG_MOTION_VAR