blob: c74609d3bbed267881ec771b332e0ff1ed8dfe75 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xubde4ac82016-11-28 15:26:06 -08002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xubde4ac82016-11-28 15:26:06 -08004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <stdio.h>
13#include <stdlib.h>
14#include <memory.h>
15#include <math.h>
16#include <assert.h>
17
David Barkerd5dfa962017-01-10 15:06:08 +000018#include "./av1_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070019#include "av1/common/warped_motion.h"
20
Debargha Mukherjee8d59d112016-11-15 11:31:03 -080021/* clang-format off */
22static const int error_measure_lut[512] = {
Debargha Mukherjeed978cd52017-01-31 17:21:42 -080023 // pow 0.7
24 16384, 16339, 16294, 16249, 16204, 16158, 16113, 16068,
25 16022, 15977, 15932, 15886, 15840, 15795, 15749, 15703,
26 15657, 15612, 15566, 15520, 15474, 15427, 15381, 15335,
27 15289, 15242, 15196, 15149, 15103, 15056, 15010, 14963,
28 14916, 14869, 14822, 14775, 14728, 14681, 14634, 14587,
29 14539, 14492, 14445, 14397, 14350, 14302, 14254, 14206,
30 14159, 14111, 14063, 14015, 13967, 13918, 13870, 13822,
31 13773, 13725, 13676, 13628, 13579, 13530, 13481, 13432,
32 13383, 13334, 13285, 13236, 13187, 13137, 13088, 13038,
33 12988, 12939, 12889, 12839, 12789, 12739, 12689, 12639,
34 12588, 12538, 12487, 12437, 12386, 12335, 12285, 12234,
35 12183, 12132, 12080, 12029, 11978, 11926, 11875, 11823,
36 11771, 11719, 11667, 11615, 11563, 11511, 11458, 11406,
37 11353, 11301, 11248, 11195, 11142, 11089, 11036, 10982,
38 10929, 10875, 10822, 10768, 10714, 10660, 10606, 10552,
39 10497, 10443, 10388, 10333, 10279, 10224, 10168, 10113,
40 10058, 10002, 9947, 9891, 9835, 9779, 9723, 9666,
41 9610, 9553, 9497, 9440, 9383, 9326, 9268, 9211,
42 9153, 9095, 9037, 8979, 8921, 8862, 8804, 8745,
43 8686, 8627, 8568, 8508, 8449, 8389, 8329, 8269,
44 8208, 8148, 8087, 8026, 7965, 7903, 7842, 7780,
45 7718, 7656, 7593, 7531, 7468, 7405, 7341, 7278,
46 7214, 7150, 7086, 7021, 6956, 6891, 6826, 6760,
47 6695, 6628, 6562, 6495, 6428, 6361, 6293, 6225,
48 6157, 6089, 6020, 5950, 5881, 5811, 5741, 5670,
49 5599, 5527, 5456, 5383, 5311, 5237, 5164, 5090,
50 5015, 4941, 4865, 4789, 4713, 4636, 4558, 4480,
51 4401, 4322, 4242, 4162, 4080, 3998, 3916, 3832,
52 3748, 3663, 3577, 3490, 3402, 3314, 3224, 3133,
53 3041, 2948, 2854, 2758, 2661, 2562, 2461, 2359,
54 2255, 2148, 2040, 1929, 1815, 1698, 1577, 1452,
55 1323, 1187, 1045, 894, 731, 550, 339, 0,
56 339, 550, 731, 894, 1045, 1187, 1323, 1452,
57 1577, 1698, 1815, 1929, 2040, 2148, 2255, 2359,
58 2461, 2562, 2661, 2758, 2854, 2948, 3041, 3133,
59 3224, 3314, 3402, 3490, 3577, 3663, 3748, 3832,
60 3916, 3998, 4080, 4162, 4242, 4322, 4401, 4480,
61 4558, 4636, 4713, 4789, 4865, 4941, 5015, 5090,
62 5164, 5237, 5311, 5383, 5456, 5527, 5599, 5670,
63 5741, 5811, 5881, 5950, 6020, 6089, 6157, 6225,
64 6293, 6361, 6428, 6495, 6562, 6628, 6695, 6760,
65 6826, 6891, 6956, 7021, 7086, 7150, 7214, 7278,
66 7341, 7405, 7468, 7531, 7593, 7656, 7718, 7780,
67 7842, 7903, 7965, 8026, 8087, 8148, 8208, 8269,
68 8329, 8389, 8449, 8508, 8568, 8627, 8686, 8745,
69 8804, 8862, 8921, 8979, 9037, 9095, 9153, 9211,
70 9268, 9326, 9383, 9440, 9497, 9553, 9610, 9666,
71 9723, 9779, 9835, 9891, 9947, 10002, 10058, 10113,
72 10168, 10224, 10279, 10333, 10388, 10443, 10497, 10552,
73 10606, 10660, 10714, 10768, 10822, 10875, 10929, 10982,
74 11036, 11089, 11142, 11195, 11248, 11301, 11353, 11406,
75 11458, 11511, 11563, 11615, 11667, 11719, 11771, 11823,
76 11875, 11926, 11978, 12029, 12080, 12132, 12183, 12234,
77 12285, 12335, 12386, 12437, 12487, 12538, 12588, 12639,
78 12689, 12739, 12789, 12839, 12889, 12939, 12988, 13038,
79 13088, 13137, 13187, 13236, 13285, 13334, 13383, 13432,
80 13481, 13530, 13579, 13628, 13676, 13725, 13773, 13822,
81 13870, 13918, 13967, 14015, 14063, 14111, 14159, 14206,
82 14254, 14302, 14350, 14397, 14445, 14492, 14539, 14587,
83 14634, 14681, 14728, 14775, 14822, 14869, 14916, 14963,
84 15010, 15056, 15103, 15149, 15196, 15242, 15289, 15335,
85 15381, 15427, 15474, 15520, 15566, 15612, 15657, 15703,
86 15749, 15795, 15840, 15886, 15932, 15977, 16022, 16068,
87 16113, 16158, 16204, 16249, 16294, 16339, 16384, 16384,
Debargha Mukherjee8d59d112016-11-15 11:31:03 -080088};
89/* clang-format on */
Debargha Mukherjee09055d42016-11-11 13:52:12 -080090
Sarah Parkerf9a961c2016-09-06 11:25:04 -070091static ProjectPointsFunc get_project_points_type(TransformationType type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070092 switch (type) {
Sarah Parkerf9a961c2016-09-06 11:25:04 -070093 case HOMOGRAPHY: return project_points_homography;
94 case AFFINE: return project_points_affine;
95 case ROTZOOM: return project_points_rotzoom;
96 case TRANSLATION: return project_points_translation;
Yaowu Xuc27fc142016-08-22 16:08:15 -070097 default: assert(0); return NULL;
98 }
99}
100
Debargha Mukherjee5f305852016-11-03 15:47:21 -0700101void project_points_translation(int32_t *mat, int *points, int *proj,
Sarah Parkerf9a961c2016-09-06 11:25:04 -0700102 const int n, const int stride_points,
103 const int stride_proj, const int subsampling_x,
104 const int subsampling_y) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700105 int i;
106 for (i = 0; i < n; ++i) {
107 const int x = *(points++), y = *(points++);
108 if (subsampling_x)
109 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800110 ((x * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[0]),
Yaowu Xuc27fc142016-08-22 16:08:15 -0700111 WARPEDDIFF_PREC_BITS + 1);
112 else
113 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800114 ((x * (1 << WARPEDMODEL_PREC_BITS)) + mat[0]), WARPEDDIFF_PREC_BITS);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700115 if (subsampling_y)
116 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800117 ((y * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[1]),
Yaowu Xuc27fc142016-08-22 16:08:15 -0700118 WARPEDDIFF_PREC_BITS + 1);
119 else
120 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800121 ((y * (1 << WARPEDMODEL_PREC_BITS))) + mat[1], WARPEDDIFF_PREC_BITS);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700122 points += stride_points - 2;
123 proj += stride_proj - 2;
124 }
125}
126
Debargha Mukherjee5f305852016-11-03 15:47:21 -0700127void project_points_rotzoom(int32_t *mat, int *points, int *proj, const int n,
Sarah Parkerf9a961c2016-09-06 11:25:04 -0700128 const int stride_points, const int stride_proj,
129 const int subsampling_x, const int subsampling_y) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700130 int i;
131 for (i = 0; i < n; ++i) {
132 const int x = *(points++), y = *(points++);
133 if (subsampling_x)
134 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800135 mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
136 (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700137 WARPEDDIFF_PREC_BITS + 1);
138 else
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800139 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
Yaowu Xuc27fc142016-08-22 16:08:15 -0700140 WARPEDDIFF_PREC_BITS);
141 if (subsampling_y)
142 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800143 -mat[3] * 2 * x + mat[2] * 2 * y + mat[1] +
144 (-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700145 WARPEDDIFF_PREC_BITS + 1);
146 else
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800147 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[3] * x + mat[2] * y + mat[1],
Yaowu Xuc27fc142016-08-22 16:08:15 -0700148 WARPEDDIFF_PREC_BITS);
149 points += stride_points - 2;
150 proj += stride_proj - 2;
151 }
152}
153
Debargha Mukherjee5f305852016-11-03 15:47:21 -0700154void project_points_affine(int32_t *mat, int *points, int *proj, const int n,
Sarah Parkerf9a961c2016-09-06 11:25:04 -0700155 const int stride_points, const int stride_proj,
156 const int subsampling_x, const int subsampling_y) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700157 int i;
158 for (i = 0; i < n; ++i) {
159 const int x = *(points++), y = *(points++);
160 if (subsampling_x)
161 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800162 mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
163 (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700164 WARPEDDIFF_PREC_BITS + 1);
165 else
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800166 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
Yaowu Xuc27fc142016-08-22 16:08:15 -0700167 WARPEDDIFF_PREC_BITS);
168 if (subsampling_y)
169 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800170 mat[4] * 2 * x + mat[5] * 2 * y + mat[1] +
Sarah Parkerc4bcb502016-09-07 13:24:53 -0700171 (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700172 WARPEDDIFF_PREC_BITS + 1);
173 else
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800174 *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[4] * x + mat[5] * y + mat[1],
Yaowu Xuc27fc142016-08-22 16:08:15 -0700175 WARPEDDIFF_PREC_BITS);
176 points += stride_points - 2;
177 proj += stride_proj - 2;
178 }
179}
180
Debargha Mukherjee5dfa9302017-02-10 05:00:08 -0800181void project_points_hortrapezoid(int32_t *mat, int *points, int *proj,
182 const int n, const int stride_points,
183 const int stride_proj, const int subsampling_x,
184 const int subsampling_y) {
185 int i;
186 int64_t x, y, Z;
187 int64_t xp, yp;
188 for (i = 0; i < n; ++i) {
189 x = *(points++), y = *(points++);
190 x = (subsampling_x ? 4 * x + 1 : 2 * x);
191 y = (subsampling_y ? 4 * y + 1 : 2 * y);
192
193 Z = (mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
194 xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *
195 (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
196 WARPEDMODEL_PREC_BITS));
197 yp = (mat[5] * y + 2 * mat[1]) *
198 (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
199 WARPEDMODEL_PREC_BITS));
200
201 xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
202 yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
203
204 if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
205 if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
206 *(proj++) = xp;
207 *(proj++) = yp;
208
209 points += stride_points - 2;
210 proj += stride_proj - 2;
211 }
212}
213
214void project_points_vertrapezoid(int32_t *mat, int *points, int *proj,
215 const int n, const int stride_points,
216 const int stride_proj, const int subsampling_x,
217 const int subsampling_y) {
218 int i;
219 int64_t x, y, Z;
220 int64_t xp, yp;
221 for (i = 0; i < n; ++i) {
222 x = *(points++), y = *(points++);
223 x = (subsampling_x ? 4 * x + 1 : 2 * x);
224 y = (subsampling_y ? 4 * y + 1 : 2 * y);
225
226 Z = (mat[6] * x + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
227 xp = (mat[2] * x + 2 * mat[0]) *
228 (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
229 WARPEDMODEL_PREC_BITS));
230 yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *
231 (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
232 WARPEDMODEL_PREC_BITS));
233
234 xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
235 yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
236
237 if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
238 if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
239 *(proj++) = xp;
240 *(proj++) = yp;
241
242 points += stride_points - 2;
243 proj += stride_proj - 2;
244 }
245}
246
Debargha Mukherjee5f305852016-11-03 15:47:21 -0700247void project_points_homography(int32_t *mat, int *points, int *proj,
Sarah Parkerf9a961c2016-09-06 11:25:04 -0700248 const int n, const int stride_points,
249 const int stride_proj, const int subsampling_x,
250 const int subsampling_y) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700251 int i;
252 int64_t x, y, Z;
253 int64_t xp, yp;
254 for (i = 0; i < n; ++i) {
255 x = *(points++), y = *(points++);
256 x = (subsampling_x ? 4 * x + 1 : 2 * x);
257 y = (subsampling_y ? 4 * y + 1 : 2 * y);
258
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800259 Z = (mat[6] * x + mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
260 xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700261 (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
262 WARPEDMODEL_PREC_BITS));
Debargha Mukherjee8db4c772016-11-07 12:54:21 -0800263 yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700264 (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
265 WARPEDMODEL_PREC_BITS));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700266
267 xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
268 yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
269
270 if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
271 if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
272 *(proj++) = xp;
273 *(proj++) = yp;
274
275 points += stride_points - 2;
276 proj += stride_proj - 2;
277 }
278}
279
Yue Chen69f18e12016-09-08 14:48:15 -0700280// 'points' are at original scale, output 'proj's are scaled up by
281// 1 << WARPEDPIXEL_PREC_BITS
282void project_points(WarpedMotionParams *wm_params, int *points, int *proj,
283 const int n, const int stride_points, const int stride_proj,
284 const int subsampling_x, const int subsampling_y) {
285 switch (wm_params->wmtype) {
286 case AFFINE:
287 project_points_affine(wm_params->wmmat, points, proj, n, stride_points,
288 stride_proj, subsampling_x, subsampling_y);
289 break;
290 case ROTZOOM:
291 project_points_rotzoom(wm_params->wmmat, points, proj, n, stride_points,
292 stride_proj, subsampling_x, subsampling_y);
293 break;
294 case HOMOGRAPHY:
295 project_points_homography(wm_params->wmmat, points, proj, n,
296 stride_points, stride_proj, subsampling_x,
297 subsampling_y);
298 break;
299 default: assert(0 && "Invalid warped motion type!"); return;
300 }
301}
302
Yaowu Xuc27fc142016-08-22 16:08:15 -0700303static const int16_t
304 filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = {
Debargha Mukherjee16056f52017-04-12 16:10:50 -0700305#if WARPEDPIXEL_PREC_BITS == 6
Yaowu Xuc27fc142016-08-22 16:08:15 -0700306 { 0, 0, 128, 0, 0, 0 }, { 0, -1, 128, 2, -1, 0 },
307 { 1, -3, 127, 4, -1, 0 }, { 1, -4, 126, 6, -2, 1 },
308 { 1, -5, 126, 8, -3, 1 }, { 1, -6, 125, 11, -4, 1 },
309 { 1, -7, 124, 13, -4, 1 }, { 2, -8, 123, 15, -5, 1 },
310 { 2, -9, 122, 18, -6, 1 }, { 2, -10, 121, 20, -6, 1 },
311 { 2, -11, 120, 22, -7, 2 }, { 2, -12, 119, 25, -8, 2 },
312 { 3, -13, 117, 27, -8, 2 }, { 3, -13, 116, 29, -9, 2 },
313 { 3, -14, 114, 32, -10, 3 }, { 3, -15, 113, 35, -10, 2 },
314 { 3, -15, 111, 37, -11, 3 }, { 3, -16, 109, 40, -11, 3 },
315 { 3, -16, 108, 42, -12, 3 }, { 4, -17, 106, 45, -13, 3 },
316 { 4, -17, 104, 47, -13, 3 }, { 4, -17, 102, 50, -14, 3 },
317 { 4, -17, 100, 52, -14, 3 }, { 4, -18, 98, 55, -15, 4 },
318 { 4, -18, 96, 58, -15, 3 }, { 4, -18, 94, 60, -16, 4 },
319 { 4, -18, 91, 63, -16, 4 }, { 4, -18, 89, 65, -16, 4 },
320 { 4, -18, 87, 68, -17, 4 }, { 4, -18, 85, 70, -17, 4 },
321 { 4, -18, 82, 73, -17, 4 }, { 4, -18, 80, 75, -17, 4 },
322 { 4, -18, 78, 78, -18, 4 }, { 4, -17, 75, 80, -18, 4 },
323 { 4, -17, 73, 82, -18, 4 }, { 4, -17, 70, 85, -18, 4 },
324 { 4, -17, 68, 87, -18, 4 }, { 4, -16, 65, 89, -18, 4 },
325 { 4, -16, 63, 91, -18, 4 }, { 4, -16, 60, 94, -18, 4 },
326 { 3, -15, 58, 96, -18, 4 }, { 4, -15, 55, 98, -18, 4 },
327 { 3, -14, 52, 100, -17, 4 }, { 3, -14, 50, 102, -17, 4 },
328 { 3, -13, 47, 104, -17, 4 }, { 3, -13, 45, 106, -17, 4 },
329 { 3, -12, 42, 108, -16, 3 }, { 3, -11, 40, 109, -16, 3 },
330 { 3, -11, 37, 111, -15, 3 }, { 2, -10, 35, 113, -15, 3 },
331 { 3, -10, 32, 114, -14, 3 }, { 2, -9, 29, 116, -13, 3 },
332 { 2, -8, 27, 117, -13, 3 }, { 2, -8, 25, 119, -12, 2 },
333 { 2, -7, 22, 120, -11, 2 }, { 1, -6, 20, 121, -10, 2 },
334 { 1, -6, 18, 122, -9, 2 }, { 1, -5, 15, 123, -8, 2 },
335 { 1, -4, 13, 124, -7, 1 }, { 1, -4, 11, 125, -6, 1 },
336 { 1, -3, 8, 126, -5, 1 }, { 1, -2, 6, 126, -4, 1 },
337 { 0, -1, 4, 127, -3, 1 }, { 0, -1, 2, 128, -1, 0 },
Debargha Mukherjee16056f52017-04-12 16:10:50 -0700338#else
339 { 0, 0, 128, 0, 0, 0 }, { 1, -3, 127, 4, -1, 0 },
340 { 1, -5, 126, 8, -3, 1 }, { 1, -7, 124, 13, -4, 1 },
341 { 2, -9, 122, 18, -6, 1 }, { 2, -11, 120, 22, -7, 2 },
342 { 3, -13, 117, 27, -8, 2 }, { 3, -14, 114, 32, -10, 3 },
343 { 3, -15, 111, 37, -11, 3 }, { 3, -16, 108, 42, -12, 3 },
344 { 4, -17, 104, 47, -13, 3 }, { 4, -17, 100, 52, -14, 3 },
345 { 4, -18, 96, 58, -15, 3 }, { 4, -18, 91, 63, -16, 4 },
346 { 4, -18, 87, 68, -17, 4 }, { 4, -18, 82, 73, -17, 4 },
347 { 4, -18, 78, 78, -18, 4 }, { 4, -17, 73, 82, -18, 4 },
348 { 4, -17, 68, 87, -18, 4 }, { 4, -16, 63, 91, -18, 4 },
349 { 3, -15, 58, 96, -18, 4 }, { 3, -14, 52, 100, -17, 4 },
350 { 3, -13, 47, 104, -17, 4 }, { 3, -12, 42, 108, -16, 3 },
351 { 3, -11, 37, 111, -15, 3 }, { 3, -10, 32, 114, -14, 3 },
352 { 2, -8, 27, 117, -13, 3 }, { 2, -7, 22, 120, -11, 2 },
353 { 1, -6, 18, 122, -9, 2 }, { 1, -4, 13, 124, -7, 1 },
354 { 1, -3, 8, 126, -5, 1 }, { 0, -1, 4, 127, -3, 1 },
355#endif // WARPEDPIXEL_PREC_BITS == 6
Yaowu Xuc27fc142016-08-22 16:08:15 -0700356 };
357
358static int32_t do_ntap_filter(int32_t *p, int x) {
359 int i;
360 int32_t sum = 0;
361 for (i = 0; i < WARPEDPIXEL_FILTER_TAPS; ++i) {
362 sum += p[i - WARPEDPIXEL_FILTER_TAPS / 2 + 1] * filter_ntap[x][i];
363 }
364 return sum;
365}
366
367static int32_t do_cubic_filter(int32_t *p, int x) {
368 if (x == 0) {
David Barkerf23bdca2016-11-07 13:47:13 +0000369 return p[0] * (1 << WARPEDPIXEL_FILTER_BITS);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700370 } else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
David Barkerf23bdca2016-11-07 13:47:13 +0000371 return p[1] * (1 << WARPEDPIXEL_FILTER_BITS);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700372 } else {
Sarah Parkerdb926352016-10-26 12:46:03 -0700373 const int64_t v1 = (int64_t)x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700374 const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
375 const int64_t v3 = x * (p[1] - p[-1]);
376 const int64_t v4 = 2 * p[0];
377 return (int32_t)ROUND_POWER_OF_TWO_SIGNED(
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700378 (v4 * (1 << (3 * WARPEDPIXEL_PREC_BITS))) +
379 (v3 * (1 << (2 * WARPEDPIXEL_PREC_BITS))) +
380 (v2 * (1 << WARPEDPIXEL_PREC_BITS)) + v1,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700381 3 * WARPEDPIXEL_PREC_BITS + 1 - WARPEDPIXEL_FILTER_BITS);
382 }
383}
384
Yaowu Xuc27fc142016-08-22 16:08:15 -0700385static INLINE void get_subcolumn(int taps, uint8_t *ref, int32_t *col,
386 int stride, int x, int y_start) {
387 int i;
388 for (i = 0; i < taps; ++i) {
389 col[i] = ref[(i + y_start) * stride + x];
390 }
391}
392
393static uint8_t bi_ntap_filter(uint8_t *ref, int x, int y, int stride) {
394 int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
395 int k;
396 int i = (int)x >> WARPEDPIXEL_PREC_BITS;
397 int j = (int)y >> WARPEDPIXEL_PREC_BITS;
398 for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
399 int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
400 get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
401 i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
402 j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
403 arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700404 y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700405 }
406 val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700407 x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700408 val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
409 return (uint8_t)clip_pixel(val);
410}
411
412static uint8_t bi_cubic_filter(uint8_t *ref, int x, int y, int stride) {
413 int32_t val, arr[4];
414 int k;
415 int i = (int)x >> WARPEDPIXEL_PREC_BITS;
416 int j = (int)y >> WARPEDPIXEL_PREC_BITS;
417 for (k = 0; k < 4; ++k) {
418 int32_t arr_temp[4];
419 get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700420 arr[k] =
421 do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700422 }
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700423 val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700424 val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
425 return (uint8_t)clip_pixel(val);
426}
427
428static uint8_t bi_linear_filter(uint8_t *ref, int x, int y, int stride) {
429 const int ix = x >> WARPEDPIXEL_PREC_BITS;
430 const int iy = y >> WARPEDPIXEL_PREC_BITS;
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700431 const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
432 const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700433 int32_t val;
434 val = ROUND_POWER_OF_TWO_SIGNED(
435 ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
436 (WARPEDPIXEL_PREC_SHIFTS - sx) +
437 ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
438 ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
439 ref[(iy + 1) * stride + ix + 1] * sy * sx,
440 WARPEDPIXEL_PREC_BITS * 2);
441 return (uint8_t)clip_pixel(val);
442}
443
444static uint8_t warp_interpolate(uint8_t *ref, int x, int y, int width,
445 int height, int stride) {
446 int ix = x >> WARPEDPIXEL_PREC_BITS;
447 int iy = y >> WARPEDPIXEL_PREC_BITS;
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700448 int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
449 int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700450 int32_t v;
451
452 if (ix < 0 && iy < 0)
453 return ref[0];
Yue Chen69f18e12016-09-08 14:48:15 -0700454 else if (ix < 0 && iy >= height - 1)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700455 return ref[(height - 1) * stride];
Yue Chen69f18e12016-09-08 14:48:15 -0700456 else if (ix >= width - 1 && iy < 0)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700457 return ref[width - 1];
Yue Chen69f18e12016-09-08 14:48:15 -0700458 else if (ix >= width - 1 && iy >= height - 1)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700459 return ref[(height - 1) * stride + (width - 1)];
460 else if (ix < 0) {
461 v = ROUND_POWER_OF_TWO_SIGNED(
462 ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
463 ref[(iy + 1) * stride] * sy,
464 WARPEDPIXEL_PREC_BITS);
465 return clip_pixel(v);
466 } else if (iy < 0) {
467 v = ROUND_POWER_OF_TWO_SIGNED(
468 ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
469 WARPEDPIXEL_PREC_BITS);
470 return clip_pixel(v);
Yue Chen69f18e12016-09-08 14:48:15 -0700471 } else if (ix >= width - 1) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700472 v = ROUND_POWER_OF_TWO_SIGNED(
473 ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
474 ref[(iy + 1) * stride + width - 1] * sy,
475 WARPEDPIXEL_PREC_BITS);
476 return clip_pixel(v);
Yue Chen69f18e12016-09-08 14:48:15 -0700477 } else if (iy >= height - 1) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700478 v = ROUND_POWER_OF_TWO_SIGNED(
479 ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
480 ref[(height - 1) * stride + ix + 1] * sx,
481 WARPEDPIXEL_PREC_BITS);
482 return clip_pixel(v);
483 } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
484 iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
485 ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
486 iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
487 return bi_ntap_filter(ref, x, y, stride);
488 } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
489 return bi_cubic_filter(ref, x, y, stride);
490 } else {
491 return bi_linear_filter(ref, x, y, stride);
492 }
493}
494
David Barkerbe128602016-12-13 16:40:31 +0000495// For warping, we really use a 6-tap filter, but we do blocks of 8 pixels
496// at a time. The zoom/rotation/shear in the model are applied to the
497// "fractional" position of each pixel, which therefore varies within
498// [-1, 2) * WARPEDPIXEL_PREC_SHIFTS.
499// We need an extra 2 taps to fit this in, for a total of 8 taps.
500/* clang-format off */
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700501const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = {
Debargha Mukherjee16056f52017-04-12 16:10:50 -0700502#if WARPEDPIXEL_PREC_BITS == 6
David Barkerbe128602016-12-13 16:40:31 +0000503 // [-1, 0)
Sean Purser-Haskell6e1c7d72017-04-10 18:08:07 -0700504 { 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, - 1, 127, 2, 0, 0, 0, 0 },
David Barkerbe128602016-12-13 16:40:31 +0000505 { 1, - 3, 127, 4, - 1, 0, 0, 0 }, { 1, - 4, 126, 6, - 2, 1, 0, 0 },
506 { 1, - 5, 126, 8, - 3, 1, 0, 0 }, { 1, - 6, 125, 11, - 4, 1, 0, 0 },
507 { 1, - 7, 124, 13, - 4, 1, 0, 0 }, { 2, - 8, 123, 15, - 5, 1, 0, 0 },
508 { 2, - 9, 122, 18, - 6, 1, 0, 0 }, { 2, -10, 121, 20, - 6, 1, 0, 0 },
509 { 2, -11, 120, 22, - 7, 2, 0, 0 }, { 2, -12, 119, 25, - 8, 2, 0, 0 },
510 { 3, -13, 117, 27, - 8, 2, 0, 0 }, { 3, -13, 116, 29, - 9, 2, 0, 0 },
511 { 3, -14, 114, 32, -10, 3, 0, 0 }, { 3, -15, 113, 35, -10, 2, 0, 0 },
512 { 3, -15, 111, 37, -11, 3, 0, 0 }, { 3, -16, 109, 40, -11, 3, 0, 0 },
513 { 3, -16, 108, 42, -12, 3, 0, 0 }, { 4, -17, 106, 45, -13, 3, 0, 0 },
514 { 4, -17, 104, 47, -13, 3, 0, 0 }, { 4, -17, 102, 50, -14, 3, 0, 0 },
515 { 4, -17, 100, 52, -14, 3, 0, 0 }, { 4, -18, 98, 55, -15, 4, 0, 0 },
516 { 4, -18, 96, 58, -15, 3, 0, 0 }, { 4, -18, 94, 60, -16, 4, 0, 0 },
517 { 4, -18, 91, 63, -16, 4, 0, 0 }, { 4, -18, 89, 65, -16, 4, 0, 0 },
518 { 4, -18, 87, 68, -17, 4, 0, 0 }, { 4, -18, 85, 70, -17, 4, 0, 0 },
519 { 4, -18, 82, 73, -17, 4, 0, 0 }, { 4, -18, 80, 75, -17, 4, 0, 0 },
520 { 4, -18, 78, 78, -18, 4, 0, 0 }, { 4, -17, 75, 80, -18, 4, 0, 0 },
521 { 4, -17, 73, 82, -18, 4, 0, 0 }, { 4, -17, 70, 85, -18, 4, 0, 0 },
522 { 4, -17, 68, 87, -18, 4, 0, 0 }, { 4, -16, 65, 89, -18, 4, 0, 0 },
523 { 4, -16, 63, 91, -18, 4, 0, 0 }, { 4, -16, 60, 94, -18, 4, 0, 0 },
524 { 3, -15, 58, 96, -18, 4, 0, 0 }, { 4, -15, 55, 98, -18, 4, 0, 0 },
525 { 3, -14, 52, 100, -17, 4, 0, 0 }, { 3, -14, 50, 102, -17, 4, 0, 0 },
526 { 3, -13, 47, 104, -17, 4, 0, 0 }, { 3, -13, 45, 106, -17, 4, 0, 0 },
527 { 3, -12, 42, 108, -16, 3, 0, 0 }, { 3, -11, 40, 109, -16, 3, 0, 0 },
528 { 3, -11, 37, 111, -15, 3, 0, 0 }, { 2, -10, 35, 113, -15, 3, 0, 0 },
529 { 3, -10, 32, 114, -14, 3, 0, 0 }, { 2, - 9, 29, 116, -13, 3, 0, 0 },
530 { 2, - 8, 27, 117, -13, 3, 0, 0 }, { 2, - 8, 25, 119, -12, 2, 0, 0 },
531 { 2, - 7, 22, 120, -11, 2, 0, 0 }, { 1, - 6, 20, 121, -10, 2, 0, 0 },
532 { 1, - 6, 18, 122, - 9, 2, 0, 0 }, { 1, - 5, 15, 123, - 8, 2, 0, 0 },
533 { 1, - 4, 13, 124, - 7, 1, 0, 0 }, { 1, - 4, 11, 125, - 6, 1, 0, 0 },
534 { 1, - 3, 8, 126, - 5, 1, 0, 0 }, { 1, - 2, 6, 126, - 4, 1, 0, 0 },
Sean Purser-Haskell6e1c7d72017-04-10 18:08:07 -0700535 { 0, - 1, 4, 127, - 3, 1, 0, 0 }, { 0, 0, 2, 127, - 1, 0, 0, 0 },
David Barkerbe128602016-12-13 16:40:31 +0000536
537 // [0, 1)
Sean Purser-Haskell6e1c7d72017-04-10 18:08:07 -0700538 { 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 0, -1, 127, 2, 0, 0, 0},
Debargha Mukherjeee6044fe2017-01-19 02:13:14 -0800539 { 0, 1, -3, 127, 4, -2, 1, 0}, { 0, 1, -5, 127, 6, -2, 1, 0},
540 { 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 2, -7, 126, 11, -4, 2, -1},
541 {-1, 3, -8, 125, 13, -5, 2, -1}, {-1, 3, -10, 124, 16, -6, 3, -1},
542 {-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -12, 122, 20, -7, 3, -1},
543 {-1, 4, -13, 121, 23, -8, 3, -1}, {-2, 5, -14, 120, 25, -9, 4, -1},
544 {-1, 5, -15, 119, 27, -10, 4, -1}, {-1, 5, -16, 118, 30, -11, 4, -1},
545 {-2, 6, -17, 116, 33, -12, 5, -1}, {-2, 6, -17, 114, 35, -12, 5, -1},
546 {-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 111, 41, -14, 6, -2},
547 {-2, 7, -19, 110, 43, -15, 6, -2}, {-2, 7, -20, 108, 46, -15, 6, -2},
548 {-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 104, 51, -16, 7, -2},
549 {-2, 7, -21, 102, 54, -17, 7, -2}, {-2, 8, -21, 100, 56, -18, 7, -2},
550 {-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 96, 62, -19, 7, -2},
551 {-2, 8, -22, 94, 64, -19, 7, -2}, {-2, 8, -22, 91, 67, -20, 8, -2},
552 {-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -22, 87, 72, -21, 8, -2},
553 {-2, 8, -21, 84, 74, -21, 8, -2}, {-2, 8, -22, 82, 77, -21, 8, -2},
554 {-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 77, 82, -22, 8, -2},
555 {-2, 8, -21, 74, 84, -21, 8, -2}, {-2, 8, -21, 72, 87, -22, 8, -2},
556 {-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 8, -20, 67, 91, -22, 8, -2},
557 {-2, 7, -19, 64, 94, -22, 8, -2}, {-2, 7, -19, 62, 96, -22, 8, -2},
558 {-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -18, 56, 100, -21, 8, -2},
559 {-2, 7, -17, 54, 102, -21, 7, -2}, {-2, 7, -16, 51, 104, -21, 7, -2},
560 {-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 46, 108, -20, 7, -2},
561 {-2, 6, -15, 43, 110, -19, 7, -2}, {-2, 6, -14, 41, 111, -19, 7, -2},
562 {-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 35, 114, -17, 6, -2},
563 {-1, 5, -12, 33, 116, -17, 6, -2}, {-1, 4, -11, 30, 118, -16, 5, -1},
564 {-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 4, -9, 25, 120, -14, 5, -2},
565 {-1, 3, -8, 23, 121, -13, 4, -1}, {-1, 3, -7, 20, 122, -12, 4, -1},
566 {-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 3, -6, 16, 124, -10, 3, -1},
567 {-1, 2, -5, 13, 125, -8, 3, -1}, {-1, 2, -4, 11, 126, -7, 2, -1},
568 { 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 6, 127, -5, 1, 0},
Sean Purser-Haskell6e1c7d72017-04-10 18:08:07 -0700569 { 0, 1, -2, 4, 127, -3, 1, 0}, { 0, 0, 0, 2, 127, -1, 0, 0},
David Barkerbe128602016-12-13 16:40:31 +0000570
571 // [1, 2)
Sean Purser-Haskell6e1c7d72017-04-10 18:08:07 -0700572 { 0, 0, 0, 1, 127, 0, 0, 0 }, { 0, 0, 0, - 1, 127, 2, 0, 0 },
David Barkerbe128602016-12-13 16:40:31 +0000573 { 0, 0, 1, - 3, 127, 4, - 1, 0 }, { 0, 0, 1, - 4, 126, 6, - 2, 1 },
574 { 0, 0, 1, - 5, 126, 8, - 3, 1 }, { 0, 0, 1, - 6, 125, 11, - 4, 1 },
575 { 0, 0, 1, - 7, 124, 13, - 4, 1 }, { 0, 0, 2, - 8, 123, 15, - 5, 1 },
576 { 0, 0, 2, - 9, 122, 18, - 6, 1 }, { 0, 0, 2, -10, 121, 20, - 6, 1 },
577 { 0, 0, 2, -11, 120, 22, - 7, 2 }, { 0, 0, 2, -12, 119, 25, - 8, 2 },
578 { 0, 0, 3, -13, 117, 27, - 8, 2 }, { 0, 0, 3, -13, 116, 29, - 9, 2 },
579 { 0, 0, 3, -14, 114, 32, -10, 3 }, { 0, 0, 3, -15, 113, 35, -10, 2 },
580 { 0, 0, 3, -15, 111, 37, -11, 3 }, { 0, 0, 3, -16, 109, 40, -11, 3 },
581 { 0, 0, 3, -16, 108, 42, -12, 3 }, { 0, 0, 4, -17, 106, 45, -13, 3 },
582 { 0, 0, 4, -17, 104, 47, -13, 3 }, { 0, 0, 4, -17, 102, 50, -14, 3 },
583 { 0, 0, 4, -17, 100, 52, -14, 3 }, { 0, 0, 4, -18, 98, 55, -15, 4 },
584 { 0, 0, 4, -18, 96, 58, -15, 3 }, { 0, 0, 4, -18, 94, 60, -16, 4 },
585 { 0, 0, 4, -18, 91, 63, -16, 4 }, { 0, 0, 4, -18, 89, 65, -16, 4 },
586 { 0, 0, 4, -18, 87, 68, -17, 4 }, { 0, 0, 4, -18, 85, 70, -17, 4 },
587 { 0, 0, 4, -18, 82, 73, -17, 4 }, { 0, 0, 4, -18, 80, 75, -17, 4 },
588 { 0, 0, 4, -18, 78, 78, -18, 4 }, { 0, 0, 4, -17, 75, 80, -18, 4 },
589 { 0, 0, 4, -17, 73, 82, -18, 4 }, { 0, 0, 4, -17, 70, 85, -18, 4 },
590 { 0, 0, 4, -17, 68, 87, -18, 4 }, { 0, 0, 4, -16, 65, 89, -18, 4 },
591 { 0, 0, 4, -16, 63, 91, -18, 4 }, { 0, 0, 4, -16, 60, 94, -18, 4 },
592 { 0, 0, 3, -15, 58, 96, -18, 4 }, { 0, 0, 4, -15, 55, 98, -18, 4 },
593 { 0, 0, 3, -14, 52, 100, -17, 4 }, { 0, 0, 3, -14, 50, 102, -17, 4 },
594 { 0, 0, 3, -13, 47, 104, -17, 4 }, { 0, 0, 3, -13, 45, 106, -17, 4 },
595 { 0, 0, 3, -12, 42, 108, -16, 3 }, { 0, 0, 3, -11, 40, 109, -16, 3 },
596 { 0, 0, 3, -11, 37, 111, -15, 3 }, { 0, 0, 2, -10, 35, 113, -15, 3 },
597 { 0, 0, 3, -10, 32, 114, -14, 3 }, { 0, 0, 2, - 9, 29, 116, -13, 3 },
598 { 0, 0, 2, - 8, 27, 117, -13, 3 }, { 0, 0, 2, - 8, 25, 119, -12, 2 },
599 { 0, 0, 2, - 7, 22, 120, -11, 2 }, { 0, 0, 1, - 6, 20, 121, -10, 2 },
600 { 0, 0, 1, - 6, 18, 122, - 9, 2 }, { 0, 0, 1, - 5, 15, 123, - 8, 2 },
601 { 0, 0, 1, - 4, 13, 124, - 7, 1 }, { 0, 0, 1, - 4, 11, 125, - 6, 1 },
602 { 0, 0, 1, - 3, 8, 126, - 5, 1 }, { 0, 0, 1, - 2, 6, 126, - 4, 1 },
Sean Purser-Haskell6e1c7d72017-04-10 18:08:07 -0700603 { 0, 0, 0, - 1, 4, 127, - 3, 1 }, { 0, 0, 0, 0, 2, 127, - 1, 0 },
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700604
Debargha Mukherjee16056f52017-04-12 16:10:50 -0700605#else
606 // [-1, 0)
607 {0, 0, 127, 1, 0, 0, 0, 0}, {1, -3, 127, 4, -1, 0, 0, 0},
608 {1, -5, 126, 8, -3, 1, 0, 0}, {1, -7, 124, 13, -4, 1, 0, 0},
609 {2, -9, 122, 18, -6, 1, 0, 0}, {2, -11, 120, 22, -7, 2, 0, 0},
610 {3, -13, 117, 27, -8, 2, 0, 0}, {3, -14, 114, 32, -10, 3, 0, 0},
611 {3, -15, 111, 37, -11, 3, 0, 0}, {3, -16, 108, 42, -12, 3, 0, 0},
612 {4, -17, 104, 47, -13, 3, 0, 0}, {4, -17, 100, 52, -14, 3, 0, 0},
613 {4, -18, 96, 58, -15, 3, 0, 0}, {4, -18, 91, 63, -16, 4, 0, 0},
614 {4, -18, 87, 68, -17, 4, 0, 0}, {4, -18, 82, 73, -17, 4, 0, 0},
615 {4, -18, 78, 78, -18, 4, 0, 0}, {4, -17, 73, 82, -18, 4, 0, 0},
616 {4, -17, 68, 87, -18, 4, 0, 0}, {4, -16, 63, 91, -18, 4, 0, 0},
617 {3, -15, 58, 96, -18, 4, 0, 0}, {3, -14, 52, 100, -17, 4, 0, 0},
618 {3, -13, 47, 104, -17, 4, 0, 0}, {3, -12, 42, 108, -16, 3, 0, 0},
619 {3, -11, 37, 111, -15, 3, 0, 0}, {3, -10, 32, 114, -14, 3, 0, 0},
620 {2, -8, 27, 117, -13, 3, 0, 0}, {2, -7, 22, 120, -11, 2, 0, 0},
621 {1, -6, 18, 122, -9, 2, 0, 0}, {1, -4, 13, 124, -7, 1, 0, 0},
622 {1, -3, 8, 126, -5, 1, 0, 0}, {0, -1, 4, 127, -3, 1, 0, 0},
623 // [0, 1)
624 { 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 1, -3, 127, 4, -2, 1, 0},
625 { 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 3, -8, 125, 13, -5, 2, -1},
626 {-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -13, 121, 23, -8, 3, -1},
627 {-1, 5, -15, 119, 27, -10, 4, -1}, {-2, 6, -17, 116, 33, -12, 5, -1},
628 {-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 110, 43, -15, 6, -2},
629 {-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 102, 54, -17, 7, -2},
630 {-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 94, 64, -19, 7, -2},
631 {-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -21, 84, 74, -21, 8, -2},
632 {-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 74, 84, -21, 8, -2},
633 {-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 7, -19, 64, 94, -22, 8, -2},
634 {-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -17, 54, 102, -21, 7, -2},
635 {-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 43, 110, -19, 7, -2},
636 {-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 33, 116, -17, 6, -2},
637 {-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 3, -8, 23, 121, -13, 4, -1},
638 {-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 2, -5, 13, 125, -8, 3, -1},
639 { 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 4, 127, -3, 1, 0},
640 // [1, 2)
641 {0, 0, 0, 1, 127, 0, 0, 0}, {0, 0, 1, -3, 127, 4, -1, 0},
642 {0, 0, 1, -5, 126, 8, -3, 1}, {0, 0, 1, -7, 124, 13, -4, 1},
643 {0, 0, 2, -9, 122, 18, -6, 1}, {0, 0, 2, -11, 120, 22, -7, 2},
644 {0, 0, 3, -13, 117, 27, -8, 2}, {0, 0, 3, -14, 114, 32, -10, 3},
645 {0, 0, 3, -15, 111, 37, -11, 3}, {0, 0, 3, -16, 108, 42, -12, 3},
646 {0, 0, 4, -17, 104, 47, -13, 3}, {0, 0, 4, -17, 100, 52, -14, 3},
647 {0, 0, 4, -18, 96, 58, -15, 3}, {0, 0, 4, -18, 91, 63, -16, 4},
648 {0, 0, 4, -18, 87, 68, -17, 4}, {0, 0, 4, -18, 82, 73, -17, 4},
649 {0, 0, 4, -18, 78, 78, -18, 4}, {0, 0, 4, -17, 73, 82, -18, 4},
650 {0, 0, 4, -17, 68, 87, -18, 4}, {0, 0, 4, -16, 63, 91, -18, 4},
651 {0, 0, 3, -15, 58, 96, -18, 4}, {0, 0, 3, -14, 52, 100, -17, 4},
652 {0, 0, 3, -13, 47, 104, -17, 4}, {0, 0, 3, -12, 42, 108, -16, 3},
653 {0, 0, 3, -11, 37, 111, -15, 3}, {0, 0, 3, -10, 32, 114, -14, 3},
654 {0, 0, 2, -8, 27, 117, -13, 3}, {0, 0, 2, -7, 22, 120, -11, 2},
655 {0, 0, 1, -6, 18, 122, -9, 2}, {0, 0, 1, -4, 13, 124, -7, 1},
656 {0, 0, 1, -3, 8, 126, -5, 1}, {0, 0, 0, -1, 4, 127, -3, 1},
657
658#endif // WARPEDPIXEL_PREC_BITS == 6
659
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700660 // dummy
Sean Purser-Haskell6e1c7d72017-04-10 18:08:07 -0700661 { 0, 0, 0, 0, 1, 127, 0, 0 },
David Barkerbe128602016-12-13 16:40:31 +0000662};
Debargha Mukherjee16056f52017-04-12 16:10:50 -0700663
David Barkerbe128602016-12-13 16:40:31 +0000664/* clang-format on */
665
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700666#define DIV_LUT_PREC_BITS 14
667#define DIV_LUT_BITS 8
668#define DIV_LUT_NUM (1 << DIV_LUT_BITS)
669
670static const uint16_t div_lut[DIV_LUT_NUM + 1] = {
671 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
672 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
673 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
674 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
675 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
676 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
677 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
678 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
679 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
680 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
681 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
682 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
683 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
684 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
685 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
686 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
687 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
688 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
689 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
690 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
691 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
692 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
693 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
694 8240, 8224, 8208, 8192,
695};
696
David Barker2bcf2802017-04-05 11:44:31 +0100697static inline int16_t saturate_int16(int32_t v) {
698 if (v > 32767)
699 return 32767;
700 else if (v < -32768)
701 return -32768;
702 return v;
703}
704
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700705#if CONFIG_WARPED_MOTION
706// Decomposes a divisor D such that 1/D = y/2^shift, where y is returned
707// at precision of DIV_LUT_PREC_BITS along with the shift.
708static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) {
709 int64_t e, f;
710 *shift = (D >> 32) ? get_msb(D >> 32) + 32 : get_msb(D);
711 // e is obtained from D after resetting the most significant 1 bit.
712 e = D - ((uint64_t)1 << *shift);
713 // Get the most significant DIV_LUT_BITS (8) bits of e into f
714 if (*shift > DIV_LUT_BITS)
715 f = ROUND_POWER_OF_TWO_64(e, *shift - DIV_LUT_BITS);
716 else
717 f = e << (DIV_LUT_BITS - *shift);
718 assert(f <= DIV_LUT_NUM);
719 *shift += DIV_LUT_PREC_BITS;
720 // Use f as lookup into the precomputed table of multipliers
721 return div_lut[f];
722}
723#endif // CONFIG_WARPED_MOTION
724
725static int16_t resolve_divisor_32(uint32_t D, int16_t *shift) {
726 int32_t e, f;
727 *shift = get_msb(D);
728 // e is obtained from D after resetting the most significant 1 bit.
729 e = D - ((uint32_t)1 << *shift);
730 // Get the most significant DIV_LUT_BITS (8) bits of e into f
731 if (*shift > DIV_LUT_BITS)
732 f = ROUND_POWER_OF_TWO(e, *shift - DIV_LUT_BITS);
733 else
734 f = e << (DIV_LUT_BITS - *shift);
735 assert(f <= DIV_LUT_NUM);
736 *shift += DIV_LUT_PREC_BITS;
737 // Use f as lookup into the precomputed table of multipliers
738 return div_lut[f];
739}
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700740
741static int is_affine_valid(WarpedMotionParams *wm) {
742 const int32_t *mat = wm->wmmat;
743 return (mat[2] > 0);
744}
745
Debargha Mukherjee27f6e662017-04-10 11:17:16 -0700746static int is_affine_shear_allowed(int16_t alpha, int16_t beta, int16_t gamma,
747 int16_t delta) {
Sean Purser-Haskelle3bc0da2017-04-10 18:19:33 -0700748 if ((4 * abs(alpha) + 7 * abs(beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
749 (4 * abs(gamma) + 4 * abs(delta) >= (1 << WARPEDMODEL_PREC_BITS)))
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700750 return 0;
751 else
752 return 1;
753}
754
755// Returns 1 on success or 0 on an invalid affine set
Debargha Mukherjee3b6c5442017-03-30 08:22:00 -0700756int get_shear_params(WarpedMotionParams *wm) {
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700757 const int32_t *mat = wm->wmmat;
758 if (!is_affine_valid(wm)) return 0;
Debargha Mukherjee27f6e662017-04-10 11:17:16 -0700759 wm->alpha =
760 clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
761 wm->beta = clamp(mat[3], INT16_MIN, INT16_MAX);
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700762 int16_t shift;
763 int16_t y = resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
764 int64_t v;
765 v = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) * y;
Debargha Mukherjee27f6e662017-04-10 11:17:16 -0700766 wm->gamma =
767 clamp(ROUND_POWER_OF_TWO_SIGNED_64(v, shift), INT16_MIN, INT16_MAX);
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700768 v = ((int64_t)mat[3] * mat[4]) * y;
Debargha Mukherjee27f6e662017-04-10 11:17:16 -0700769 wm->delta = clamp(mat[5] - ROUND_POWER_OF_TWO_SIGNED_64(v, shift) -
770 (1 << WARPEDMODEL_PREC_BITS),
771 INT16_MIN, INT16_MAX);
Debargha Mukherjee3b6c5442017-03-30 08:22:00 -0700772 if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta))
773 return 0;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -0700774 return 1;
775}
776
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200777#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700778static INLINE void highbd_get_subcolumn(int taps, uint16_t *ref, int32_t *col,
779 int stride, int x, int y_start) {
780 int i;
781 for (i = 0; i < taps; ++i) {
782 col[i] = ref[(i + y_start) * stride + x];
783 }
784}
785
786static uint16_t highbd_bi_ntap_filter(uint16_t *ref, int x, int y, int stride,
787 int bd) {
788 int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
789 int k;
790 int i = (int)x >> WARPEDPIXEL_PREC_BITS;
791 int j = (int)y >> WARPEDPIXEL_PREC_BITS;
792 for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
793 int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
794 highbd_get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
795 i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
796 j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
797 arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700798 y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700799 }
800 val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700801 x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700802 val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
803 return (uint16_t)clip_pixel_highbd(val, bd);
804}
805
806static uint16_t highbd_bi_cubic_filter(uint16_t *ref, int x, int y, int stride,
807 int bd) {
808 int32_t val, arr[4];
809 int k;
810 int i = (int)x >> WARPEDPIXEL_PREC_BITS;
811 int j = (int)y >> WARPEDPIXEL_PREC_BITS;
812 for (k = 0; k < 4; ++k) {
813 int32_t arr_temp[4];
814 highbd_get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700815 arr[k] =
816 do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700817 }
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700818 val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700819 val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
820 return (uint16_t)clip_pixel_highbd(val, bd);
821}
822
823static uint16_t highbd_bi_linear_filter(uint16_t *ref, int x, int y, int stride,
824 int bd) {
825 const int ix = x >> WARPEDPIXEL_PREC_BITS;
826 const int iy = y >> WARPEDPIXEL_PREC_BITS;
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700827 const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
828 const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700829 int32_t val;
830 val = ROUND_POWER_OF_TWO_SIGNED(
831 ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
832 (WARPEDPIXEL_PREC_SHIFTS - sx) +
833 ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
834 ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
835 ref[(iy + 1) * stride + ix + 1] * sy * sx,
836 WARPEDPIXEL_PREC_BITS * 2);
837 return (uint16_t)clip_pixel_highbd(val, bd);
838}
839
840static uint16_t highbd_warp_interpolate(uint16_t *ref, int x, int y, int width,
841 int height, int stride, int bd) {
842 int ix = x >> WARPEDPIXEL_PREC_BITS;
843 int iy = y >> WARPEDPIXEL_PREC_BITS;
Sarah Parker7ba8dc12016-10-26 12:41:46 -0700844 int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
845 int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700846 int32_t v;
847
848 if (ix < 0 && iy < 0)
849 return ref[0];
850 else if (ix < 0 && iy > height - 1)
851 return ref[(height - 1) * stride];
852 else if (ix > width - 1 && iy < 0)
853 return ref[width - 1];
854 else if (ix > width - 1 && iy > height - 1)
855 return ref[(height - 1) * stride + (width - 1)];
856 else if (ix < 0) {
857 v = ROUND_POWER_OF_TWO_SIGNED(
858 ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
859 ref[(iy + 1) * stride] * sy,
860 WARPEDPIXEL_PREC_BITS);
861 return clip_pixel_highbd(v, bd);
862 } else if (iy < 0) {
863 v = ROUND_POWER_OF_TWO_SIGNED(
864 ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
865 WARPEDPIXEL_PREC_BITS);
866 return clip_pixel_highbd(v, bd);
867 } else if (ix > width - 1) {
868 v = ROUND_POWER_OF_TWO_SIGNED(
869 ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
870 ref[(iy + 1) * stride + width - 1] * sy,
871 WARPEDPIXEL_PREC_BITS);
872 return clip_pixel_highbd(v, bd);
873 } else if (iy > height - 1) {
874 v = ROUND_POWER_OF_TWO_SIGNED(
875 ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
876 ref[(height - 1) * stride + ix + 1] * sx,
877 WARPEDPIXEL_PREC_BITS);
878 return clip_pixel_highbd(v, bd);
879 } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
880 iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
881 ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
882 iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
883 return highbd_bi_ntap_filter(ref, x, y, stride, bd);
884 } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
885 return highbd_bi_cubic_filter(ref, x, y, stride, bd);
886 } else {
887 return highbd_bi_linear_filter(ref, x, y, stride, bd);
888 }
889}
890
Debargha Mukherjee8d59d112016-11-15 11:31:03 -0800891static INLINE int highbd_error_measure(int err, int bd) {
892 const int b = bd - 8;
893 const int bmask = (1 << b) - 1;
Debargha Mukherjee15a608f2016-11-16 14:57:26 -0800894 const int v = (1 << b);
Debargha Mukherjee8d59d112016-11-15 11:31:03 -0800895 int e1, e2;
896 err = abs(err);
897 e1 = err >> b;
898 e2 = err & bmask;
899 return error_measure_lut[255 + e1] * (v - e2) +
900 error_measure_lut[256 + e1] * e2;
901}
902
David Barker87fcb362016-12-19 10:31:00 +0000903static void highbd_warp_plane_old(WarpedMotionParams *wm, uint8_t *ref8,
904 int width, int height, int stride,
905 uint8_t *pred8, int p_col, int p_row,
906 int p_width, int p_height, int p_stride,
907 int subsampling_x, int subsampling_y,
908 int x_scale, int y_scale, int bd,
909 int ref_frm) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700910 int i, j;
Sarah Parkerf9a961c2016-09-06 11:25:04 -0700911 ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700912 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
913 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
914 if (projectpoints == NULL) return;
915 for (i = p_row; i < p_row + p_height; ++i) {
916 for (j = p_col; j < p_col + p_width; ++j) {
917 int in[2], out[2];
918 in[0] = j;
919 in[1] = i;
Debargha Mukherjee5f305852016-11-03 15:47:21 -0700920 projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700921 out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
922 out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
Sarah Parker43d56f32016-10-21 17:06:37 -0700923 if (ref_frm)
924 pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
925 pred[(j - p_col) + (i - p_row) * p_stride] +
926 highbd_warp_interpolate(ref, out[0], out[1], width, height,
927 stride, bd),
928 1);
929 else
930 pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(
931 ref, out[0], out[1], width, height, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700932 }
933 }
934}
David Barker87fcb362016-12-19 10:31:00 +0000935
David Barkerfa195162017-01-06 15:58:03 +0000936// Note: For an explanation of the warp algorithm, see the comment
937// above warp_plane()
David Barker2bcf2802017-04-05 11:44:31 +0100938//
939// Note also: The "worst case" in terms of modulus of the data stored into 'tmp'
940// (ie, the result of 'sum' in the horizontal filter) occurs when:
941// coeffs = { -2, 8, -22, 87, 72, -21, 8, -2}, and
942// ref = { 0, 255, 0, 255, 255, 0, 255, 0}
943// Before rounding, this gives sum = 716625. After rounding,
944// HORSHEAR_REDUCE_PREC_BITS = 4 => sum = 44789 > 2^15
945// HORSHEAR_REDUCE_PREC_BITS = 5 => sum = 22395 < 2^15
946//
947// So, as long as HORSHEAR_REDUCE_PREC_BITS >= 5, we can safely use a 16-bit
948// intermediate array.
949void av1_highbd_warp_affine_c(int32_t *mat, uint16_t *ref, int width,
950 int height, int stride, uint16_t *pred, int p_col,
951 int p_row, int p_width, int p_height,
952 int p_stride, int subsampling_x,
953 int subsampling_y, int bd, int ref_frm,
Debargha Mukherjee27f6e662017-04-10 11:17:16 -0700954 int16_t alpha, int16_t beta, int16_t gamma,
955 int16_t delta) {
David Barker2bcf2802017-04-05 11:44:31 +0100956#if HORSHEAR_REDUCE_PREC_BITS >= 5
957 int16_t tmp[15 * 8];
958#else
959 int32_t tmp[15 * 8];
960#endif
961 int i, j, k, l, m;
962
963 /* Note: For this code to work, the left/right frame borders need to be
964 extended by at least 13 pixels each. By the time we get here, other
965 code will have set up this border, but we allow an explicit check
966 for debugging purposes.
967 */
968 /*for (i = 0; i < height; ++i) {
969 for (j = 0; j < 13; ++j) {
970 assert(ref[i * stride - 13 + j] == ref[i * stride]);
971 assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
972 }
973 }*/
974
975 for (i = p_row; i < p_row + p_height; i += 8) {
976 for (j = p_col; j < p_col + p_width; j += 8) {
977 int32_t x4, y4, ix4, sx4, iy4, sy4;
978 if (subsampling_x)
979 x4 = ROUND_POWER_OF_TWO_SIGNED(
980 mat[2] * 2 * (j + 4) + mat[3] * 2 * (i + 4) + mat[0] +
981 (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
982 1);
983 else
984 x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
985
986 if (subsampling_y)
987 y4 = ROUND_POWER_OF_TWO_SIGNED(
988 mat[4] * 2 * (j + 4) + mat[5] * 2 * (i + 4) + mat[1] +
989 (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
990 1);
991 else
992 y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
993
994 ix4 = x4 >> WARPEDMODEL_PREC_BITS;
995 sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
996 iy4 = y4 >> WARPEDMODEL_PREC_BITS;
997 sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
998
999 // Horizontal filter
1000 for (k = -7; k < 8; ++k) {
1001 int iy = iy4 + k;
1002 if (iy < 0)
1003 iy = 0;
1004 else if (iy > height - 1)
1005 iy = height - 1;
1006
1007 if (ix4 <= -7) {
1008 for (l = 0; l < 8; ++l) {
1009 tmp[(k + 7) * 8 + l] =
1010 ref[iy * stride] *
1011 (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));
1012 }
1013 } else if (ix4 >= width + 6) {
1014 for (l = 0; l < 8; ++l) {
1015 tmp[(k + 7) * 8 + l] =
1016 ref[iy * stride + (width - 1)] *
1017 (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));
1018 }
1019 } else {
1020 int sx = sx4 + alpha * (-4) + beta * k;
1021
1022 for (l = -4; l < 4; ++l) {
1023 int ix = ix4 + l - 3;
1024 const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
1025 WARPEDPIXEL_PREC_SHIFTS;
1026 const int16_t *coeffs = warped_filter[offs];
1027 int32_t sum = 0;
1028 // assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
1029 for (m = 0; m < 8; ++m) {
1030 sum += ref[iy * stride + ix + m] * coeffs[m];
1031 }
1032 sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);
1033#if HORSHEAR_REDUCE_PREC_BITS >= 5
1034 tmp[(k + 7) * 8 + (l + 4)] = saturate_int16(sum);
1035#else
1036 tmp[(k + 7) * 8 + (l + 4)] = sum;
1037#endif
1038 sx += alpha;
1039 }
1040 }
1041 }
1042
1043 // Vertical filter
1044 for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
1045 int sy = sy4 + gamma * (-4) + delta * k;
1046 for (l = -4; l < 4; ++l) {
1047 uint16_t *p =
1048 &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
1049 const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
1050 WARPEDPIXEL_PREC_SHIFTS;
1051 const int16_t *coeffs = warped_filter[offs];
1052 int32_t sum = 0;
1053 // assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
1054 for (m = 0; m < 8; ++m) {
1055 sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
1056 }
1057 sum = clip_pixel_highbd(
1058 ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS), bd);
1059 if (ref_frm)
1060 *p = ROUND_POWER_OF_TWO(*p + sum, 1);
1061 else
1062 *p = sum;
1063 sy += gamma;
1064 }
1065 }
1066 }
1067 }
1068}
1069
David Barker87fcb362016-12-19 10:31:00 +00001070static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
1071 int height, int stride, uint8_t *pred8, int p_col,
1072 int p_row, int p_width, int p_height,
1073 int p_stride, int subsampling_x,
1074 int subsampling_y, int x_scale, int y_scale,
1075 int bd, int ref_frm) {
1076 if (wm->wmtype == ROTZOOM) {
1077 wm->wmmat[5] = wm->wmmat[2];
1078 wm->wmmat[4] = -wm->wmmat[3];
1079 }
David Barker2bcf2802017-04-05 11:44:31 +01001080 if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&
1081 y_scale == 16) {
David Barker87fcb362016-12-19 10:31:00 +00001082 int32_t *mat = wm->wmmat;
Debargha Mukherjee27f6e662017-04-10 11:17:16 -07001083 const int16_t alpha = wm->alpha;
1084 const int16_t beta = wm->beta;
1085 const int16_t gamma = wm->gamma;
1086 const int16_t delta = wm->delta;
David Barker87fcb362016-12-19 10:31:00 +00001087
David Barker2bcf2802017-04-05 11:44:31 +01001088 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1089 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
1090 av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
1091 p_width, p_height, p_stride, subsampling_x,
1092 subsampling_y, bd, ref_frm, alpha, beta, gamma,
1093 delta);
David Barker87fcb362016-12-19 10:31:00 +00001094 } else {
1095 highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row,
1096 p_width, p_height, p_stride, subsampling_x,
1097 subsampling_y, x_scale, y_scale, bd, ref_frm);
1098 }
1099}
1100
1101static double highbd_warp_erroradv(WarpedMotionParams *wm, uint8_t *ref8,
1102 int width, int height, int stride,
1103 uint8_t *dst8, int p_col, int p_row,
1104 int p_width, int p_height, int p_stride,
1105 int subsampling_x, int subsampling_y,
1106 int x_scale, int y_scale, int bd) {
1107 int gm_err = 0, no_gm_err = 0;
1108 int64_t gm_sumerr = 0, no_gm_sumerr = 0;
1109 int i, j;
1110 uint16_t *tmp = aom_malloc(p_width * p_height * sizeof(*tmp));
1111 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1112 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1113 highbd_warp_plane(wm, ref8, width, height, stride, CONVERT_TO_BYTEPTR(tmp),
1114 p_col, p_row, p_width, p_height, p_width, subsampling_x,
1115 subsampling_y, x_scale, y_scale, bd, 0);
1116 for (i = 0; i < p_height; ++i) {
1117 for (j = 0; j < p_width; ++j) {
1118 gm_err = dst[j + i * p_stride] - tmp[j + i * p_width];
1119 no_gm_err =
1120 dst[j + i * p_stride] - ref[(j + p_col) + (i + p_row) * stride];
1121 gm_sumerr += highbd_error_measure(gm_err, bd);
1122 no_gm_sumerr += highbd_error_measure(no_gm_err, bd);
1123 }
1124 }
1125 aom_free(tmp);
1126 return (double)gm_sumerr / no_gm_sumerr;
1127}
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001128#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001129
Debargha Mukherjee8d59d112016-11-15 11:31:03 -08001130static INLINE int error_measure(int err) {
1131 return error_measure_lut[255 + err];
1132}
1133
David Barkerbe128602016-12-13 16:40:31 +00001134static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width,
1135 int height, int stride, uint8_t *pred, int p_col,
1136 int p_row, int p_width, int p_height, int p_stride,
1137 int subsampling_x, int subsampling_y, int x_scale,
1138 int y_scale, int ref_frm) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001139 int i, j;
Sarah Parkerf9a961c2016-09-06 11:25:04 -07001140 ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001141 if (projectpoints == NULL) return;
1142 for (i = p_row; i < p_row + p_height; ++i) {
1143 for (j = p_col; j < p_col + p_width; ++j) {
1144 int in[2], out[2];
1145 in[0] = j;
1146 in[1] = i;
Debargha Mukherjee5f305852016-11-03 15:47:21 -07001147 projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001148 out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
1149 out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
Sarah Parker43d56f32016-10-21 17:06:37 -07001150 if (ref_frm)
1151 pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
1152 pred[(j - p_col) + (i - p_row) * p_stride] +
1153 warp_interpolate(ref, out[0], out[1], width, height, stride),
1154 1);
1155 else
1156 pred[(j - p_col) + (i - p_row) * p_stride] =
1157 warp_interpolate(ref, out[0], out[1], width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001158 }
1159 }
1160}
1161
David Barkerbe128602016-12-13 16:40:31 +00001162/* The warp filter for ROTZOOM and AFFINE models works as follows:
1163 * Split the input into 8x8 blocks
1164 * For each block, project the point (4, 4) within the block, to get the
1165 overall block position. Split into integer and fractional coordinates,
1166 maintaining full WARPEDMODEL precision
1167 * Filter horizontally: Generate 15 rows of 8 pixels each. Each pixel gets a
1168 variable horizontal offset. This means that, while the rows of the
1169 intermediate buffer align with the rows of the *reference* image, the
1170 columns align with the columns of the *destination* image.
1171 * Filter vertically: Generate the output block (up to 8x8 pixels, but if the
1172 destination is too small we crop the output at this stage). Each pixel has
1173 a variable vertical offset, so that the resulting rows are aligned with
1174 the rows of the destination image.
1175
1176 To accomplish these alignments, we factor the warp matrix as a
1177 product of two shear / asymmetric zoom matrices:
1178 / a b \ = / 1 0 \ * / 1+alpha beta \
1179 \ c d / \ gamma 1+delta / \ 0 1 /
1180 where a, b, c, d are wmmat[2], wmmat[3], wmmat[4], wmmat[5] respectively.
1181 The second shear (with alpha and beta) is applied by the horizontal filter,
1182 then the first shear (with gamma and delta) is applied by the vertical
1183 filter.
1184
1185 The only limitation is that, to fit this in a fixed 8-tap filter size,
1186 the fractional pixel offsets must be at most +-1. Since the horizontal filter
1187 generates 15 rows of 8 columns, and the initial point we project is at (4, 4)
1188 within the block, the parameters must satisfy
1189 4 * |alpha| + 7 * |beta| <= 1 and 4 * |gamma| + 7 * |delta| <= 1
1190 for this filter to be applicable.
David Barkerd5dfa962017-01-10 15:06:08 +00001191
1192 Note: warp_affine() assumes that the caller has done all of the relevant
1193 checks, ie. that we have a ROTZOOM or AFFINE model, that wm[4] and wm[5]
1194 are set appropriately (if using a ROTZOOM model), and that alpha, beta,
1195 gamma, delta are all in range.
1196
1197 TODO(david.barker): Maybe support scaled references?
David Barkerbe128602016-12-13 16:40:31 +00001198*/
David Barker838367d2017-01-12 15:13:28 +00001199void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
1200 int stride, uint8_t *pred, int p_col, int p_row,
1201 int p_width, int p_height, int p_stride,
1202 int subsampling_x, int subsampling_y, int ref_frm,
Debargha Mukherjee27f6e662017-04-10 11:17:16 -07001203 int16_t alpha, int16_t beta, int16_t gamma,
1204 int16_t delta) {
David Barkerd5dfa962017-01-10 15:06:08 +00001205 int16_t tmp[15 * 8];
1206 int i, j, k, l, m;
1207
1208 /* Note: For this code to work, the left/right frame borders need to be
1209 extended by at least 13 pixels each. By the time we get here, other
1210 code will have set up this border, but we allow an explicit check
1211 for debugging purposes.
1212 */
1213 /*for (i = 0; i < height; ++i) {
1214 for (j = 0; j < 13; ++j) {
1215 assert(ref[i * stride - 13 + j] == ref[i * stride]);
1216 assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
1217 }
1218 }*/
1219
1220 for (i = p_row; i < p_row + p_height; i += 8) {
1221 for (j = p_col; j < p_col + p_width; j += 8) {
1222 int32_t x4, y4, ix4, sx4, iy4, sy4;
1223 if (subsampling_x)
1224 x4 = ROUND_POWER_OF_TWO_SIGNED(
1225 mat[2] * 2 * (j + 4) + mat[3] * 2 * (i + 4) + mat[0] +
1226 (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1227 1);
1228 else
1229 x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
1230
1231 if (subsampling_y)
1232 y4 = ROUND_POWER_OF_TWO_SIGNED(
1233 mat[4] * 2 * (j + 4) + mat[5] * 2 * (i + 4) + mat[1] +
1234 (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1235 1);
1236 else
1237 y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
1238
1239 ix4 = x4 >> WARPEDMODEL_PREC_BITS;
1240 sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
1241 iy4 = y4 >> WARPEDMODEL_PREC_BITS;
1242 sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
1243
1244 // Horizontal filter
1245 for (k = -7; k < 8; ++k) {
1246 int iy = iy4 + k;
1247 if (iy < 0)
1248 iy = 0;
1249 else if (iy > height - 1)
1250 iy = height - 1;
1251
1252 if (ix4 <= -7) {
1253 // In this case, the rightmost pixel sampled is in column
1254 // ix4 + 3 + 7 - 3 = ix4 + 7 <= 0, ie. the entire block
1255 // will sample only from the leftmost column
1256 // (once border extension is taken into account)
1257 for (l = 0; l < 8; ++l) {
1258 tmp[(k + 7) * 8 + l] =
Debargha Mukherjee1d184602017-04-04 04:54:07 -07001259 ref[iy * stride] *
1260 (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));
David Barkerd5dfa962017-01-10 15:06:08 +00001261 }
1262 } else if (ix4 >= width + 6) {
1263 // In this case, the leftmost pixel sampled is in column
David Barker13797462017-01-23 10:50:11 +00001264 // ix4 - 4 + 0 - 3 = ix4 - 7 >= width - 1, ie. the entire block
David Barkerd5dfa962017-01-10 15:06:08 +00001265 // will sample only from the rightmost column
1266 // (once border extension is taken into account)
1267 for (l = 0; l < 8; ++l) {
1268 tmp[(k + 7) * 8 + l] =
Debargha Mukherjee1d184602017-04-04 04:54:07 -07001269 ref[iy * stride + (width - 1)] *
1270 (1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));
David Barkerd5dfa962017-01-10 15:06:08 +00001271 }
1272 } else {
1273 // If we get here, then
1274 // the leftmost pixel sampled is
1275 // ix4 - 4 + 0 - 3 = ix4 - 7 >= -13
1276 // and the rightmost pixel sampled is at most
1277 // ix4 + 3 + 7 - 3 = ix4 + 7 <= width + 12
1278 // So, assuming that border extension has been done, we
1279 // don't need to explicitly clamp values.
1280 int sx = sx4 + alpha * (-4) + beta * k;
1281
1282 for (l = -4; l < 4; ++l) {
1283 int ix = ix4 + l - 3;
1284 // At this point, sx = sx4 + alpha * l + beta * k
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001285 const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
1286 WARPEDPIXEL_PREC_SHIFTS;
1287 const int16_t *coeffs = warped_filter[offs];
David Barkerd5dfa962017-01-10 15:06:08 +00001288 int32_t sum = 0;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001289 // assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
David Barkerd5dfa962017-01-10 15:06:08 +00001290 for (m = 0; m < 8; ++m) {
1291 sum += ref[iy * stride + ix + m] * coeffs[m];
1292 }
Debargha Mukherjee1d184602017-04-04 04:54:07 -07001293 sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);
David Barkerd5dfa962017-01-10 15:06:08 +00001294 tmp[(k + 7) * 8 + (l + 4)] = saturate_int16(sum);
1295 sx += alpha;
1296 }
1297 }
1298 }
1299
1300 // Vertical filter
1301 for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
1302 int sy = sy4 + gamma * (-4) + delta * k;
1303 for (l = -4; l < 4; ++l) {
1304 uint8_t *p =
1305 &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
1306 // At this point, sy = sy4 + gamma * l + delta * k
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001307 const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
1308 WARPEDPIXEL_PREC_SHIFTS;
1309 const int16_t *coeffs = warped_filter[offs];
David Barkerd5dfa962017-01-10 15:06:08 +00001310 int32_t sum = 0;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001311 // assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
David Barkerd5dfa962017-01-10 15:06:08 +00001312 for (m = 0; m < 8; ++m) {
1313 sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
1314 }
Debargha Mukherjee1d184602017-04-04 04:54:07 -07001315 sum = clip_pixel(ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS));
David Barkerd5dfa962017-01-10 15:06:08 +00001316 if (ref_frm)
1317 *p = ROUND_POWER_OF_TWO(*p + sum, 1);
1318 else
1319 *p = sum;
1320 sy += gamma;
1321 }
1322 }
1323 }
1324 }
1325}
1326
David Barkerbe128602016-12-13 16:40:31 +00001327static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,
1328 int height, int stride, uint8_t *pred, int p_col,
1329 int p_row, int p_width, int p_height, int p_stride,
1330 int subsampling_x, int subsampling_y, int x_scale,
1331 int y_scale, int ref_frm) {
1332 if (wm->wmtype == ROTZOOM) {
1333 wm->wmmat[5] = wm->wmmat[2];
1334 wm->wmmat[4] = -wm->wmmat[3];
1335 }
David Barkerd5dfa962017-01-10 15:06:08 +00001336 if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&
1337 y_scale == 16) {
David Barkerbe128602016-12-13 16:40:31 +00001338 int32_t *mat = wm->wmmat;
Debargha Mukherjee27f6e662017-04-10 11:17:16 -07001339 const int16_t alpha = wm->alpha;
1340 const int16_t beta = wm->beta;
1341 const int16_t gamma = wm->gamma;
1342 const int16_t delta = wm->delta;
David Barkerfa195162017-01-06 15:58:03 +00001343
David Barker838367d2017-01-12 15:13:28 +00001344 av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
1345 p_width, p_height, p_stride, subsampling_x, subsampling_y,
1346 ref_frm, alpha, beta, gamma, delta);
David Barkerbe128602016-12-13 16:40:31 +00001347 } else {
1348 warp_plane_old(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
1349 p_height, p_stride, subsampling_x, subsampling_y, x_scale,
1350 y_scale, ref_frm);
1351 }
1352}
1353
1354static double warp_erroradv(WarpedMotionParams *wm, uint8_t *ref, int width,
1355 int height, int stride, uint8_t *dst, int p_col,
1356 int p_row, int p_width, int p_height, int p_stride,
1357 int subsampling_x, int subsampling_y, int x_scale,
1358 int y_scale) {
1359 int gm_err = 0, no_gm_err = 0;
1360 int gm_sumerr = 0, no_gm_sumerr = 0;
1361 int i, j;
1362 uint8_t *tmp = aom_malloc(p_width * p_height);
1363 warp_plane(wm, ref, width, height, stride, tmp, p_col, p_row, p_width,
1364 p_height, p_width, subsampling_x, subsampling_y, x_scale, y_scale,
1365 0);
1366
1367 for (i = 0; i < p_height; ++i) {
1368 for (j = 0; j < p_width; ++j) {
1369 gm_err = dst[j + i * p_stride] - tmp[j + i * p_width];
David Barker87fcb362016-12-19 10:31:00 +00001370 no_gm_err =
1371 dst[j + i * p_stride] - ref[(j + p_col) + (i + p_row) * stride];
David Barkerbe128602016-12-13 16:40:31 +00001372 gm_sumerr += error_measure(gm_err);
1373 no_gm_sumerr += error_measure(no_gm_err);
1374 }
1375 }
1376
1377 aom_free(tmp);
1378 return (double)gm_sumerr / no_gm_sumerr;
1379}
1380
Yaowu Xuf883b422016-08-30 14:01:10 -07001381double av1_warp_erroradv(WarpedMotionParams *wm,
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001382#if CONFIG_HIGHBITDEPTH
Yaowu Xuf883b422016-08-30 14:01:10 -07001383 int use_hbd, int bd,
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001384#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuf883b422016-08-30 14:01:10 -07001385 uint8_t *ref, int width, int height, int stride,
1386 uint8_t *dst, int p_col, int p_row, int p_width,
1387 int p_height, int p_stride, int subsampling_x,
1388 int subsampling_y, int x_scale, int y_scale) {
Debargha Mukherjee3b6c5442017-03-30 08:22:00 -07001389 if (wm->wmtype <= AFFINE)
1390 if (!get_shear_params(wm)) return 1;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001391#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001392 if (use_hbd)
1393 return highbd_warp_erroradv(
1394 wm, ref, width, height, stride, dst, p_col, p_row, p_width, p_height,
1395 p_stride, subsampling_x, subsampling_y, x_scale, y_scale, bd);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001396#endif // CONFIG_HIGHBITDEPTH
Sarah Parkerf9a961c2016-09-06 11:25:04 -07001397 return warp_erroradv(wm, ref, width, height, stride, dst, p_col, p_row,
1398 p_width, p_height, p_stride, subsampling_x,
1399 subsampling_y, x_scale, y_scale);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001400}
1401
Yaowu Xuf883b422016-08-30 14:01:10 -07001402void av1_warp_plane(WarpedMotionParams *wm,
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001403#if CONFIG_HIGHBITDEPTH
Yaowu Xuf883b422016-08-30 14:01:10 -07001404 int use_hbd, int bd,
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001405#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuf883b422016-08-30 14:01:10 -07001406 uint8_t *ref, int width, int height, int stride,
1407 uint8_t *pred, int p_col, int p_row, int p_width,
1408 int p_height, int p_stride, int subsampling_x,
Sarah Parker43d56f32016-10-21 17:06:37 -07001409 int subsampling_y, int x_scale, int y_scale, int ref_frm) {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001410#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001411 if (use_hbd)
1412 highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
1413 p_width, p_height, p_stride, subsampling_x, subsampling_y,
Sarah Parker43d56f32016-10-21 17:06:37 -07001414 x_scale, y_scale, bd, ref_frm);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001415 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001416#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001417 warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
1418 p_height, p_stride, subsampling_x, subsampling_y, x_scale,
Sarah Parker43d56f32016-10-21 17:06:37 -07001419 y_scale, ref_frm);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001420}
1421
Yue Chen7d2109e2017-01-03 11:51:38 -08001422#if CONFIG_WARPED_MOTION
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001423#define LEAST_SQUARES_ORDER 2
Yue Chen5558e5d2017-03-31 12:24:42 -07001424
Debargha Mukherjeed49c5c42017-04-07 15:46:03 -07001425#define LS_MV_MAX 256 // max mv in 1/8-pel
Yue Chen5558e5d2017-03-31 12:24:42 -07001426#define LS_STEP 2
1427
Debargha Mukherjee8c410242017-04-11 15:20:56 -07001428// Assuming LS_MV_MAX is < MAX_SB_SIZE * 8,
1429// the precision needed is:
1430// (MAX_SB_SIZE_LOG2 + 3) [for sx * sx magnitude] +
1431// (MAX_SB_SIZE_LOG2 + 4) [for sx * dx magnitude] +
1432// 1 [for sign] +
1433// LEAST_SQUARES_SAMPLES_MAX_BITS
1434// [for adding up to LEAST_SQUARES_SAMPLES_MAX samples]
1435// The value is 23
1436#define LS_MAT_RANGE_BITS \
1437 ((MAX_SB_SIZE_LOG2 + 4) * 2 + LEAST_SQUARES_SAMPLES_MAX_BITS)
1438
1439// Bit-depth reduction from the full-range
1440#define LS_MAT_DOWN_BITS 2
1441
1442// bits range of A, Bx and By after downshifting
1443#define LS_MAT_BITS (LS_MAT_RANGE_BITS - LS_MAT_DOWN_BITS)
1444#define LS_MAT_MIN (-(1 << (LS_MAT_BITS - 1)))
1445#define LS_MAT_MAX ((1 << (LS_MAT_BITS - 1)) - 1)
1446
Yue Chen5558e5d2017-03-31 12:24:42 -07001447#define LS_SUM(a) ((a)*4 + LS_STEP * 2)
Debargha Mukherjeef2f3bcd2017-04-04 04:17:52 -07001448#define LS_SQUARE(a) \
1449 (((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> 2)
Yue Chen5558e5d2017-03-31 12:24:42 -07001450#define LS_PRODUCT1(a, b) \
Debargha Mukherjeef2f3bcd2017-04-04 04:17:52 -07001451 (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP) >> 2)
Yue Chen5558e5d2017-03-31 12:24:42 -07001452#define LS_PRODUCT2(a, b) \
Debargha Mukherjeef2f3bcd2017-04-04 04:17:52 -07001453 (((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP * 2) >> 2)
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001454
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001455#if LEAST_SQUARES_ORDER == 2
1456static int find_affine_int(const int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
1457 int mvy, int mvx, WarpedMotionParams *wm, int mi_row,
1458 int mi_col) {
1459 int32_t A[2][2] = { { 0, 0 }, { 0, 0 } };
1460 int32_t Bx[2] = { 0, 0 };
1461 int32_t By[2] = { 0, 0 };
Yue Chen5558e5d2017-03-31 12:24:42 -07001462 int i, n = 0;
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001463
1464 const int bw = block_size_wide[bsize];
1465 const int bh = block_size_high[bsize];
1466 const int suy = (mi_row * MI_SIZE + AOMMAX(bh, MI_SIZE) / 2 - 1) * 8;
1467 const int sux = (mi_col * MI_SIZE + AOMMAX(bw, MI_SIZE) / 2 - 1) * 8;
1468 const int duy = suy + mvy;
1469 const int dux = sux + mvx;
1470
1471 // Assume the center pixel of the block has exactly the same motion vector
1472 // as transmitted for the block. First shift the origin of the source
1473 // points to the block center, and the origin of the destination points to
1474 // the block center added to the motion vector transmitted.
1475 // Let (xi, yi) denote the source points and (xi', yi') denote destination
1476 // points after origin shfifting, for i = 0, 1, 2, .... n-1.
1477 // Then if P = [x0, y0,
1478 // x1, y1
1479 // x2, y1,
1480 // ....
1481 // ]
1482 // q = [x0', x1', x2', ... ]'
1483 // r = [y0', y1', y2', ... ]'
1484 // the least squares problems that need to be solved are:
1485 // [h1, h2]' = inv(P'P)P'q and
1486 // [h3, h4]' = inv(P'P)P'r
1487 // where the affine transformation is given by:
1488 // x' = h1.x + h2.y
1489 // y' = h3.x + h4.y
1490 //
1491 // The loop below computes: A = P'P, Bx = P'q, By = P'r
1492 // We need to just compute inv(A).Bx and inv(A).By for the solutions.
Yue Chen5558e5d2017-03-31 12:24:42 -07001493 int sx, sy, dx, dy;
1494 // Contribution from neighbor block
1495 for (i = 0; i < np && n < LEAST_SQUARES_SAMPLES_MAX; i++) {
1496 dx = pts2[i * 2] - dux;
1497 dy = pts2[i * 2 + 1] - duy;
1498 sx = pts1[i * 2] - sux;
1499 sy = pts1[i * 2 + 1] - suy;
1500 if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {
1501 A[0][0] += LS_SQUARE(sx);
1502 A[0][1] += LS_PRODUCT1(sx, sy);
1503 A[1][1] += LS_SQUARE(sy);
1504 Bx[0] += LS_PRODUCT2(sx, dx);
1505 Bx[1] += LS_PRODUCT1(sy, dx);
1506 By[0] += LS_PRODUCT1(sx, dy);
1507 By[1] += LS_PRODUCT2(sy, dy);
1508 n++;
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001509 }
1510 }
Debargha Mukherjee8c410242017-04-11 15:20:56 -07001511 int downshift;
1512 if (n >= 4)
1513 downshift = LS_MAT_DOWN_BITS;
1514 else if (n >= 2)
1515 downshift = LS_MAT_DOWN_BITS - 1;
1516 else
1517 downshift = LS_MAT_DOWN_BITS - 2;
1518
1519 // Reduce precision by downshift bits
1520 A[0][0] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][0], downshift), LS_MAT_MIN,
1521 LS_MAT_MAX);
1522 A[0][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][1], downshift), LS_MAT_MIN,
1523 LS_MAT_MAX);
1524 A[1][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[1][1], downshift), LS_MAT_MIN,
1525 LS_MAT_MAX);
1526 Bx[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[0], downshift), LS_MAT_MIN,
1527 LS_MAT_MAX);
1528 Bx[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[1], downshift), LS_MAT_MIN,
1529 LS_MAT_MAX);
1530 By[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[0], downshift), LS_MAT_MIN,
1531 LS_MAT_MAX);
1532 By[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[1], downshift), LS_MAT_MIN,
1533 LS_MAT_MAX);
1534
1535 int64_t Px[2], Py[2], Det;
1536 int16_t iDet, shift;
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001537
1538 // These divided by the Det, are the least squares solutions
1539 Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1];
1540 Px[1] = -(int64_t)A[0][1] * Bx[0] + (int64_t)A[0][0] * Bx[1];
1541 Py[0] = (int64_t)A[1][1] * By[0] - (int64_t)A[0][1] * By[1];
1542 Py[1] = -(int64_t)A[0][1] * By[0] + (int64_t)A[0][0] * By[1];
1543
1544 // Compute Determinant of A
1545 Det = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1];
1546 if (Det == 0) return 1;
1547 iDet = resolve_divisor_64(labs(Det), &shift) * (Det < 0 ? -1 : 1);
Debargha Mukherjee65bd6da2017-04-06 09:46:35 -07001548 shift -= WARPEDMODEL_PREC_BITS;
1549 if (shift < 0) {
1550 iDet <<= (-shift);
1551 shift = 0;
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001552 }
1553
Debargha Mukherjee8c410242017-04-11 15:20:56 -07001554 int64_t v;
1555 v = Px[0] * (int64_t)iDet;
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001556 wm->wmmat[2] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
Debargha Mukherjee8c410242017-04-11 15:20:56 -07001557 v = Px[1] * (int64_t)iDet;
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001558 wm->wmmat[3] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
1559 v = (dux << WARPEDMODEL_PREC_BITS) - sux * wm->wmmat[2] - suy * wm->wmmat[3];
1560 wm->wmmat[0] = ROUND_POWER_OF_TWO_SIGNED(v, 3);
1561
Debargha Mukherjee8c410242017-04-11 15:20:56 -07001562 v = Py[0] * (int64_t)iDet;
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001563 wm->wmmat[4] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
Debargha Mukherjee8c410242017-04-11 15:20:56 -07001564 v = Py[1] * (int64_t)iDet;
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001565 wm->wmmat[5] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
1566 v = (duy << WARPEDMODEL_PREC_BITS) - sux * wm->wmmat[4] - suy * wm->wmmat[5];
1567 wm->wmmat[1] = ROUND_POWER_OF_TWO_SIGNED(v, 3);
1568
1569 wm->wmmat[6] = wm->wmmat[7] = 0;
Debargha Mukherjee1e6e1302017-04-07 15:27:53 -07001570
1571 // Clamp values
1572 wm->wmmat[0] = clamp(wm->wmmat[0], -WARPEDMODEL_TRANS_CLAMP,
1573 WARPEDMODEL_TRANS_CLAMP - 1);
1574 wm->wmmat[1] = clamp(wm->wmmat[1], -WARPEDMODEL_TRANS_CLAMP,
1575 WARPEDMODEL_TRANS_CLAMP - 1);
1576 wm->wmmat[2] = clamp(wm->wmmat[2], -WARPEDMODEL_DIAGAFFINE_CLAMP,
1577 WARPEDMODEL_DIAGAFFINE_CLAMP - 1);
1578 wm->wmmat[5] = clamp(wm->wmmat[5], -WARPEDMODEL_DIAGAFFINE_CLAMP,
1579 WARPEDMODEL_DIAGAFFINE_CLAMP - 1);
1580 wm->wmmat[3] = clamp(wm->wmmat[3], -WARPEDMODEL_NONDIAGAFFINE_CLAMP,
1581 WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
1582 wm->wmmat[4] = clamp(wm->wmmat[4], -WARPEDMODEL_NONDIAGAFFINE_CLAMP,
1583 WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001584 return 0;
1585}
1586
1587#else
Debargha Mukherjee93105532017-03-01 10:44:46 -08001588
Debargha Mukherjeee8e6cad2017-03-22 17:38:38 -07001589static int find_affine_int(const int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
1590 int mvy, int mvx, WarpedMotionParams *wm, int mi_row,
1591 int mi_col) {
Debargha Mukherjee93105532017-03-01 10:44:46 -08001592 int32_t A[3][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
1593 int32_t Bx[3] = { 0, 0, 0 };
1594 int32_t By[3] = { 0, 0, 0 };
Yue Chen5558e5d2017-03-31 12:24:42 -07001595 int i, n = 0, off;
Debargha Mukherjee93105532017-03-01 10:44:46 -08001596
1597 int64_t C00, C01, C02, C11, C12, C22;
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001598 int64_t Px[3], Py[3];
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001599 int64_t Det, v;
Debargha Mukherjeee8e6cad2017-03-22 17:38:38 -07001600 const int bw = block_size_wide[bsize];
1601 const int bh = block_size_high[bsize];
1602 const int cy_offset = AOMMAX(bh, MI_SIZE) / 2 - 1;
1603 const int cx_offset = AOMMAX(bw, MI_SIZE) / 2 - 1;
Debargha Mukherjee93105532017-03-01 10:44:46 -08001604
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001605 // Offsets to make the values in the arrays smaller
Debargha Mukherjee246d2732017-02-27 14:09:18 -08001606 const int ux = mi_col * MI_SIZE * 8, uy = mi_row * MI_SIZE * 8;
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001607 // Let source points (xi, yi) map to destimation points (xi', yi'),
1608 // for i = 0, 1, 2, .... n-1
1609 // Then if P = [x0, y0, 1,
1610 // x1, y1, 1
1611 // x2, y2, 1,
1612 // ....
1613 // ]
1614 // q = [x0', x1', x2', ... ]'
1615 // r = [y0', y1', y2', ... ]'
1616 // the least squares problems that need to be solved are:
1617 // [h1, h2, dx]' = inv(P'P)P'q and
1618 // [h3, h4, dy]' = inv(P'P)P'r
1619 // where the affine transformation is given by:
1620 // x' = h1.x + h2.y + dx
1621 // y' = h3.x + h4.y + dy
1622 //
1623 // The loop below computes: A = P'P, Bx = P'q, By = P'r
Debargha Mukherjee93105532017-03-01 10:44:46 -08001624 // We need to just compute inv(A).Bx and inv(A).By for the solutions.
1625 //
Yue Chen5558e5d2017-03-31 12:24:42 -07001626 int sx, sy, dx, dy;
1627 // Contribution from sample in current block
1628 sx = cx_offset * 8;
1629 sy = cy_offset * 8;
1630 dx = sx + mvx;
1631 dy = sy + mvy;
1632 if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {
1633 A[0][0] += LS_SQUARE(sx);
1634 A[0][1] += LS_PRODUCT1(sx, sy);
1635 A[0][2] += LS_SUM(sx);
1636 A[1][1] += LS_SQUARE(sy);
1637 A[1][2] += LS_SUM(sy);
1638 A[2][2] += 4;
1639 Bx[0] += LS_PRODUCT2(sx, dx);
1640 Bx[1] += LS_PRODUCT1(sy, dx);
1641 Bx[2] += LS_SUM(dx);
1642 By[0] += LS_PRODUCT1(sx, dy);
1643 By[1] += LS_PRODUCT2(sy, dy);
1644 By[2] += LS_SUM(dy);
1645 n++;
1646 }
1647 // Contribution from neighbor block
1648 for (i = 0; i < np && n < LEAST_SQUARES_SAMPLES_MAX; i++) {
1649 dx = pts2[i * 2] - ux;
1650 dy = pts2[i * 2 + 1] - uy;
1651 sx = pts1[i * 2] - ux;
1652 sy = pts1[i * 2 + 1] - uy;
1653 if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {
1654 A[0][0] += LS_SQUARE(sx);
1655 A[0][1] += LS_PRODUCT1(sx, sy);
1656 A[0][2] += LS_SUM(sx);
1657 A[1][1] += LS_SQUARE(sy);
1658 A[1][2] += LS_SUM(sy);
1659 A[2][2] += 4;
1660 Bx[0] += LS_PRODUCT2(sx, dx);
1661 Bx[1] += LS_PRODUCT1(sy, dx);
1662 Bx[2] += LS_SUM(dx);
1663 By[0] += LS_PRODUCT1(sx, dy);
1664 By[1] += LS_PRODUCT2(sy, dy);
1665 By[2] += LS_SUM(dy);
Debargha Mukherjee93105532017-03-01 10:44:46 -08001666 n++;
1667 }
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001668 }
1669 // Compute Cofactors of A
Debargha Mukherjee93105532017-03-01 10:44:46 -08001670 C00 = (int64_t)A[1][1] * A[2][2] - (int64_t)A[1][2] * A[1][2];
1671 C01 = (int64_t)A[1][2] * A[0][2] - (int64_t)A[0][1] * A[2][2];
1672 C02 = (int64_t)A[0][1] * A[1][2] - (int64_t)A[0][2] * A[1][1];
1673 C11 = (int64_t)A[0][0] * A[2][2] - (int64_t)A[0][2] * A[0][2];
1674 C12 = (int64_t)A[0][1] * A[0][2] - (int64_t)A[0][0] * A[1][2];
1675 C22 = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1];
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001676
Debargha Mukherjee8c410242017-04-11 15:20:56 -07001677 // Scale by 1/64
Debargha Mukherjee93105532017-03-01 10:44:46 -08001678 C00 = ROUND_POWER_OF_TWO_SIGNED(C00, 6);
1679 C01 = ROUND_POWER_OF_TWO_SIGNED(C01, 6);
1680 C02 = ROUND_POWER_OF_TWO_SIGNED(C02, 6);
1681 C11 = ROUND_POWER_OF_TWO_SIGNED(C11, 6);
1682 C12 = ROUND_POWER_OF_TWO_SIGNED(C12, 6);
1683 C22 = ROUND_POWER_OF_TWO_SIGNED(C22, 6);
1684
1685 // Compute Determinant of A
1686 Det = C00 * A[0][0] + C01 * A[0][1] + C02 * A[0][2];
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001687 if (Det == 0) return 1;
1688
Debargha Mukherjee93105532017-03-01 10:44:46 -08001689 // These divided by the Det, are the least squares solutions
1690 Px[0] = C00 * Bx[0] + C01 * Bx[1] + C02 * Bx[2];
1691 Px[1] = C01 * Bx[0] + C11 * Bx[1] + C12 * Bx[2];
1692 Px[2] = C02 * Bx[0] + C12 * Bx[1] + C22 * Bx[2];
1693 Py[0] = C00 * By[0] + C01 * By[1] + C02 * By[2];
1694 Py[1] = C01 * By[0] + C11 * By[1] + C12 * By[2];
1695 Py[2] = C02 * By[0] + C12 * By[1] + C22 * By[2];
1696
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001697 int16_t shift;
1698 int64_t iDet;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001699 iDet = resolve_divisor_64(labs(Det), &shift) * (Det < 0 ? -1 : 1);
Debargha Mukherjee65bd6da2017-04-06 09:46:35 -07001700 shift -= WARPEDMODEL_PREC_BITS;
1701 if (shift < 0) {
1702 iDet <<= (-shift);
1703 shift = 0;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001704 }
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001705
Debargha Mukherjee93105532017-03-01 10:44:46 -08001706 v = Px[0] * iDet;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001707 wm->wmmat[2] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
Debargha Mukherjee93105532017-03-01 10:44:46 -08001708 v = Px[1] * iDet;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001709 wm->wmmat[3] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
Debargha Mukherjee93105532017-03-01 10:44:46 -08001710 v = Px[2] * iDet;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001711 wm->wmmat[0] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift + 3);
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001712 // Adjust x displacement for the offset
1713 off = (ux << WARPEDMODEL_PREC_BITS) - ux * wm->wmmat[2] - uy * wm->wmmat[3];
1714 wm->wmmat[0] += ROUND_POWER_OF_TWO_SIGNED(off, 3);
1715
Debargha Mukherjee93105532017-03-01 10:44:46 -08001716 v = Py[0] * iDet;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001717 wm->wmmat[4] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
Debargha Mukherjee93105532017-03-01 10:44:46 -08001718 v = Py[1] * iDet;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001719 wm->wmmat[5] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
Debargha Mukherjee93105532017-03-01 10:44:46 -08001720 v = Py[2] * iDet;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001721 wm->wmmat[1] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift + 3);
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001722 // Adjust y displacement for the offset
1723 off = (uy << WARPEDMODEL_PREC_BITS) - ux * wm->wmmat[4] - uy * wm->wmmat[5];
1724 wm->wmmat[1] += ROUND_POWER_OF_TWO_SIGNED(off, 3);
1725 wm->wmmat[6] = wm->wmmat[7] = 0;
Debargha Mukherjee082d4df2017-03-16 07:10:41 -07001726
Debargha Mukherjee1e6e1302017-04-07 15:27:53 -07001727 // Clamp values
1728 wm->wmmat[0] = clamp(wm->wmmat[0], -WARPEDMODEL_TRANS_CLAMP,
1729 WARPEDMODEL_TRANS_CLAMP - 1);
1730 wm->wmmat[1] = clamp(wm->wmmat[1], -WARPEDMODEL_TRANS_CLAMP,
1731 WARPEDMODEL_TRANS_CLAMP - 1);
1732 wm->wmmat[2] = clamp(wm->wmmat[2], -WARPEDMODEL_DIAGAFFINE_CLAMP,
1733 WARPEDMODEL_DIAGAFFINE_CLAMP - 1);
1734 wm->wmmat[5] = clamp(wm->wmmat[5], -WARPEDMODEL_DIAGAFFINE_CLAMP,
1735 WARPEDMODEL_DIAGAFFINE_CLAMP - 1);
1736 wm->wmmat[3] = clamp(wm->wmmat[3], -WARPEDMODEL_NONDIAGAFFINE_CLAMP,
1737 WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
1738 wm->wmmat[4] = clamp(wm->wmmat[4], -WARPEDMODEL_NONDIAGAFFINE_CLAMP,
1739 WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
1740
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001741 return 0;
1742}
Debargha Mukherjeeb9370ac2017-03-23 06:10:18 -07001743#endif // LEAST_SQUARES_ORDER == 2
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001744
Debargha Mukherjeee8e6cad2017-03-22 17:38:38 -07001745int find_projection(const int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
1746 int mvy, int mvx, WarpedMotionParams *wm_params, int mi_row,
1747 int mi_col) {
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001748 int result = 1;
Yue Chen69f18e12016-09-08 14:48:15 -07001749 switch (wm_params->wmtype) {
Debargha Mukherjee246d2732017-02-27 14:09:18 -08001750 case AFFINE:
Debargha Mukherjeee8e6cad2017-03-22 17:38:38 -07001751 result = find_affine_int(np, pts1, pts2, bsize, mvy, mvx, wm_params,
1752 mi_row, mi_col);
Debargha Mukherjee246d2732017-02-27 14:09:18 -08001753 break;
Yue Chen69f18e12016-09-08 14:48:15 -07001754 default: assert(0 && "Invalid warped motion type!"); return 1;
1755 }
Yue Chen7d2109e2017-01-03 11:51:38 -08001756 if (result == 0) {
Yue Chen7d2109e2017-01-03 11:51:38 -08001757 if (wm_params->wmtype == ROTZOOM) {
1758 wm_params->wmmat[5] = wm_params->wmmat[2];
1759 wm_params->wmmat[4] = -wm_params->wmmat[3];
1760 }
Debargha Mukherjeee6eb3b52017-02-26 08:50:56 -08001761 if (wm_params->wmtype == AFFINE || wm_params->wmtype == ROTZOOM) {
1762 // check compatibility with the fast warp filter
Debargha Mukherjee3b6c5442017-03-30 08:22:00 -07001763 if (!get_shear_params(wm_params)) return 1;
Yue Chen7d2109e2017-01-03 11:51:38 -08001764 }
1765 }
Yue Chen69f18e12016-09-08 14:48:15 -07001766
1767 return result;
1768}
Yue Chen7d2109e2017-01-03 11:51:38 -08001769#endif // CONFIG_WARPED_MOTION