Blame - av1/common/warped_motion.c - avm

2016-08-22 16:08:15 -0700

[diff] [blame]

1

/*

Yaowu Xu

bde4ac8

2016-11-28 15:26:06 -0800

[diff] [blame]

2

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

3

*

Yaowu Xu

bde4ac8

2016-11-28 15:26:06 -0800

[diff] [blame]

4

* This source code is subject to the terms of the BSD 2 Clause License and

5

* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License

6

* was not distributed with this source code in the LICENSE file, you can

7

* obtain it at www.aomedia.org/license/software. If the Alliance for Open

8

* Media Patent License 1.0 was not distributed with this source code in the

9

* PATENTS file, you can obtain it at www.aomedia.org/license/patent.

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

*/

#include <stdio.h>

#include <stdlib.h>

#include <memory.h>

#include <math.h>

#include <assert.h>

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

18

#include "./av1_rtcd.h"

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

19

#include "av1/common/warped_motion.h"

20

Debargha Mukherjee

2016-11-15 11:31:03 -0800

[diff] [blame]

21

/* clang-format off */

22

static const int error_measure_lut[512] = {

Debargha Mukherjee

d978cd5

2017-01-31 17:21:42 -0800

[diff] [blame]

23

// pow 0.7

24

16384, 16339, 16294, 16249, 16204, 16158, 16113, 16068,

25

16022, 15977, 15932, 15886, 15840, 15795, 15749, 15703,

26

15657, 15612, 15566, 15520, 15474, 15427, 15381, 15335,

27

15289, 15242, 15196, 15149, 15103, 15056, 15010, 14963,

28

14916, 14869, 14822, 14775, 14728, 14681, 14634, 14587,

29

14539, 14492, 14445, 14397, 14350, 14302, 14254, 14206,

30

14159, 14111, 14063, 14015, 13967, 13918, 13870, 13822,

31

13773, 13725, 13676, 13628, 13579, 13530, 13481, 13432,

32

13383, 13334, 13285, 13236, 13187, 13137, 13088, 13038,

33

12988, 12939, 12889, 12839, 12789, 12739, 12689, 12639,

34

12588, 12538, 12487, 12437, 12386, 12335, 12285, 12234,

35

12183, 12132, 12080, 12029, 11978, 11926, 11875, 11823,

36

11771, 11719, 11667, 11615, 11563, 11511, 11458, 11406,

37

11353, 11301, 11248, 11195, 11142, 11089, 11036, 10982,

38

10929, 10875, 10822, 10768, 10714, 10660, 10606, 10552,

39

10497, 10443, 10388, 10333, 10279, 10224, 10168, 10113,

40

10058, 10002, 9947, 9891, 9835, 9779, 9723, 9666,

41

9610, 9553, 9497, 9440, 9383, 9326, 9268, 9211,

42

9153, 9095, 9037, 8979, 8921, 8862, 8804, 8745,

43

8686, 8627, 8568, 8508, 8449, 8389, 8329, 8269,

44

8208, 8148, 8087, 8026, 7965, 7903, 7842, 7780,

45

7718, 7656, 7593, 7531, 7468, 7405, 7341, 7278,

46

7214, 7150, 7086, 7021, 6956, 6891, 6826, 6760,

47

6695, 6628, 6562, 6495, 6428, 6361, 6293, 6225,

48

6157, 6089, 6020, 5950, 5881, 5811, 5741, 5670,

49

5599, 5527, 5456, 5383, 5311, 5237, 5164, 5090,

50

5015, 4941, 4865, 4789, 4713, 4636, 4558, 4480,

51

4401, 4322, 4242, 4162, 4080, 3998, 3916, 3832,

52

3748, 3663, 3577, 3490, 3402, 3314, 3224, 3133,

53

3041, 2948, 2854, 2758, 2661, 2562, 2461, 2359,

54

2255, 2148, 2040, 1929, 1815, 1698, 1577, 1452,

55

1323, 1187, 1045, 894, 731, 550, 339, 0,

56

339, 550, 731, 894, 1045, 1187, 1323, 1452,

57

1577, 1698, 1815, 1929, 2040, 2148, 2255, 2359,

58

2461, 2562, 2661, 2758, 2854, 2948, 3041, 3133,

59

3224, 3314, 3402, 3490, 3577, 3663, 3748, 3832,

60

3916, 3998, 4080, 4162, 4242, 4322, 4401, 4480,

61

4558, 4636, 4713, 4789, 4865, 4941, 5015, 5090,

62

5164, 5237, 5311, 5383, 5456, 5527, 5599, 5670,

63

5741, 5811, 5881, 5950, 6020, 6089, 6157, 6225,

64

6293, 6361, 6428, 6495, 6562, 6628, 6695, 6760,

65

6826, 6891, 6956, 7021, 7086, 7150, 7214, 7278,

66

7341, 7405, 7468, 7531, 7593, 7656, 7718, 7780,

67

7842, 7903, 7965, 8026, 8087, 8148, 8208, 8269,

68

8329, 8389, 8449, 8508, 8568, 8627, 8686, 8745,

69

8804, 8862, 8921, 8979, 9037, 9095, 9153, 9211,

70

9268, 9326, 9383, 9440, 9497, 9553, 9610, 9666,

71

9723, 9779, 9835, 9891, 9947, 10002, 10058, 10113,

72

10168, 10224, 10279, 10333, 10388, 10443, 10497, 10552,

73

10606, 10660, 10714, 10768, 10822, 10875, 10929, 10982,

74

11036, 11089, 11142, 11195, 11248, 11301, 11353, 11406,

75

11458, 11511, 11563, 11615, 11667, 11719, 11771, 11823,

76

11875, 11926, 11978, 12029, 12080, 12132, 12183, 12234,

77

12285, 12335, 12386, 12437, 12487, 12538, 12588, 12639,

78

12689, 12739, 12789, 12839, 12889, 12939, 12988, 13038,

79

13088, 13137, 13187, 13236, 13285, 13334, 13383, 13432,

80

13481, 13530, 13579, 13628, 13676, 13725, 13773, 13822,

81

13870, 13918, 13967, 14015, 14063, 14111, 14159, 14206,

82

14254, 14302, 14350, 14397, 14445, 14492, 14539, 14587,

83

14634, 14681, 14728, 14775, 14822, 14869, 14916, 14963,

84

15010, 15056, 15103, 15149, 15196, 15242, 15289, 15335,

85

15381, 15427, 15474, 15520, 15566, 15612, 15657, 15703,

86

15749, 15795, 15840, 15886, 15932, 15977, 16022, 16068,

87

16113, 16158, 16204, 16249, 16294, 16339, 16384, 16384,

Debargha Mukherjee

2016-11-15 11:31:03 -0800

[diff] [blame]

88

};

89

/* clang-format on */

Debargha Mukherjee

09055d4

2016-11-11 13:52:12 -0800

[diff] [blame]

90

Sarah Parker

2016-09-06 11:25:04 -0700

[diff] [blame]

91

static ProjectPointsFunc get_project_points_type(TransformationType type) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

92

switch (type) {

Sarah Parker

2016-09-06 11:25:04 -0700

[diff] [blame]

93

case HOMOGRAPHY: return project_points_homography;

94

case AFFINE: return project_points_affine;

95

case ROTZOOM: return project_points_rotzoom;

96

case TRANSLATION: return project_points_translation;

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

97

default: assert(0); return NULL;

}

}

Debargha Mukherjee

2016-11-03 15:47:21 -0700

[diff] [blame]

101

void project_points_translation(int32_t *mat, int *points, int *proj,

Sarah Parker

2016-09-06 11:25:04 -0700

[diff] [blame]

102

const int n, const int stride_points,

103

const int stride_proj, const int subsampling_x,

104

const int subsampling_y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

105

int i;

106

for (i = 0; i < n; ++i) {

107

const int x = *(points++), y = *(points++);

108

if (subsampling_x)

109

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

110

((x * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[0]),

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

111

WARPEDDIFF_PREC_BITS + 1);

112

else

113

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

114

((x * (1 << WARPEDMODEL_PREC_BITS)) + mat[0]), WARPEDDIFF_PREC_BITS);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

115

if (subsampling_y)

116

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

117

((y * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[1]),

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

118

WARPEDDIFF_PREC_BITS + 1);

119

else

120

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

121

((y * (1 << WARPEDMODEL_PREC_BITS))) + mat[1], WARPEDDIFF_PREC_BITS);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

122

points += stride_points - 2;

123

proj += stride_proj - 2;

}

}

Debargha Mukherjee

2016-11-03 15:47:21 -0700

[diff] [blame]

127

void project_points_rotzoom(int32_t *mat, int *points, int *proj, const int n,

Sarah Parker

2016-09-06 11:25:04 -0700

[diff] [blame]

128

const int stride_points, const int stride_proj,

129

const int subsampling_x, const int subsampling_y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

130

int i;

131

for (i = 0; i < n; ++i) {

132

const int x = *(points++), y = *(points++);

133

if (subsampling_x)

134

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

135

mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +

136

(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

137

WARPEDDIFF_PREC_BITS + 1);

138

else

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

139

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

140

WARPEDDIFF_PREC_BITS);

141

if (subsampling_y)

142

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

143

-mat[3] * 2 * x + mat[2] * 2 * y + mat[1] +

144

(-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

145

WARPEDDIFF_PREC_BITS + 1);

146

else

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

147

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[3] * x + mat[2] * y + mat[1],

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

148

WARPEDDIFF_PREC_BITS);

149

points += stride_points - 2;

150

proj += stride_proj - 2;

}

}

Debargha Mukherjee

2016-11-03 15:47:21 -0700

[diff] [blame]

154

void project_points_affine(int32_t *mat, int *points, int *proj, const int n,

Sarah Parker

2016-09-06 11:25:04 -0700

[diff] [blame]

155

const int stride_points, const int stride_proj,

156

const int subsampling_x, const int subsampling_y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

157

int i;

158

for (i = 0; i < n; ++i) {

159

const int x = *(points++), y = *(points++);

160

if (subsampling_x)

161

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

162

mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +

163

(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

164

WARPEDDIFF_PREC_BITS + 1);

165

else

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

166

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

167

WARPEDDIFF_PREC_BITS);

168

if (subsampling_y)

169

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

170

mat[4] * 2 * x + mat[5] * 2 * y + mat[1] +

Sarah Parker

c4bcb50

2016-09-07 13:24:53 -0700

[diff] [blame]

171

(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

172

WARPEDDIFF_PREC_BITS + 1);

173

else

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

174

*(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[4] * x + mat[5] * y + mat[1],

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

175

WARPEDDIFF_PREC_BITS);

176

points += stride_points - 2;

177

proj += stride_proj - 2;

}

}

Debargha Mukherjee

2017-02-10 05:00:08 -0800

[diff] [blame]

181

void project_points_hortrapezoid(int32_t *mat, int *points, int *proj,

182

const int n, const int stride_points,

183

const int stride_proj, const int subsampling_x,

184

const int subsampling_y) {

int i;

int64_t x, y, Z;

int64_t xp, yp;

for (i = 0; i < n; ++i) {

189

x = *(points++), y = *(points++);

190

x = (subsampling_x ? 4 * x + 1 : 2 * x);

191

y = (subsampling_y ? 4 * y + 1 : 2 * y);

192

193

Z = (mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));

194

xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *

195

(1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -

196

WARPEDMODEL_PREC_BITS));

197

yp = (mat[5] * y + 2 * mat[1]) *

198

(1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -

199

WARPEDMODEL_PREC_BITS));

200

201

xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;

202

yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;

203

204

if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;

205

if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;

*(proj++) = xp;

*(proj++) = yp;

points += stride_points - 2;

210

proj += stride_proj - 2;

}

}

void project_points_vertrapezoid(int32_t *mat, int *points, int *proj,

215

const int n, const int stride_points,

216

const int stride_proj, const int subsampling_x,

217

const int subsampling_y) {

int i;

int64_t x, y, Z;

int64_t xp, yp;

for (i = 0; i < n; ++i) {

222

x = *(points++), y = *(points++);

223

x = (subsampling_x ? 4 * x + 1 : 2 * x);

224

y = (subsampling_y ? 4 * y + 1 : 2 * y);

225

226

Z = (mat[6] * x + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));

227

xp = (mat[2] * x + 2 * mat[0]) *

228

(1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -

229

WARPEDMODEL_PREC_BITS));

230

yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *

231

(1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -

232

WARPEDMODEL_PREC_BITS));

233

234

xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;

235

yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;

236

237

if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;

238

if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;

*(proj++) = xp;

*(proj++) = yp;

points += stride_points - 2;

243

proj += stride_proj - 2;

}

}

Debargha Mukherjee

2016-11-03 15:47:21 -0700

[diff] [blame]

247

void project_points_homography(int32_t *mat, int *points, int *proj,

Sarah Parker

2016-09-06 11:25:04 -0700

[diff] [blame]

248

const int n, const int stride_points,

249

const int stride_proj, const int subsampling_x,

250

const int subsampling_y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

int i;

int64_t x, y, Z;

int64_t xp, yp;

for (i = 0; i < n; ++i) {

255

x = *(points++), y = *(points++);

256

x = (subsampling_x ? 4 * x + 1 : 2 * x);

257

y = (subsampling_y ? 4 * y + 1 : 2 * y);

258

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

259

Z = (mat[6] * x + mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));

260

xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

261

(1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -

262

WARPEDMODEL_PREC_BITS));

Debargha Mukherjee

2016-11-07 12:54:21 -0800

[diff] [blame]

263

yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

264

(1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -

265

WARPEDMODEL_PREC_BITS));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

266

267

xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;

268

yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;

269

270

if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;

271

if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;

*(proj++) = xp;

*(proj++) = yp;

points += stride_points - 2;

276

proj += stride_proj - 2;

}

}

Yue Chen

2016-09-08 14:48:15 -0700

[diff] [blame]

280

// 'points' are at original scale, output 'proj's are scaled up by

281

// 1 << WARPEDPIXEL_PREC_BITS

282

void project_points(WarpedMotionParams *wm_params, int *points, int *proj,

283

const int n, const int stride_points, const int stride_proj,

284

const int subsampling_x, const int subsampling_y) {

285

switch (wm_params->wmtype) {

286

case AFFINE:

287

project_points_affine(wm_params->wmmat, points, proj, n, stride_points,

288

stride_proj, subsampling_x, subsampling_y);

289

break;

290

case ROTZOOM:

291

project_points_rotzoom(wm_params->wmmat, points, proj, n, stride_points,

292

stride_proj, subsampling_x, subsampling_y);

293

break;

294

case HOMOGRAPHY:

295

project_points_homography(wm_params->wmmat, points, proj, n,

296

stride_points, stride_proj, subsampling_x,

297

subsampling_y);

298

break;

299

default: assert(0 && "Invalid warped motion type!"); return;

}

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

303

static const int16_t

304

filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = {

Debargha Mukherjee

2017-04-12 16:10:50 -0700

[diff] [blame]

305

#if WARPEDPIXEL_PREC_BITS == 6

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

306

{ 0, 0, 128, 0, 0, 0 }, { 0, -1, 128, 2, -1, 0 },

307

{ 1, -3, 127, 4, -1, 0 }, { 1, -4, 126, 6, -2, 1 },

308

{ 1, -5, 126, 8, -3, 1 }, { 1, -6, 125, 11, -4, 1 },

309

{ 1, -7, 124, 13, -4, 1 }, { 2, -8, 123, 15, -5, 1 },

310

{ 2, -9, 122, 18, -6, 1 }, { 2, -10, 121, 20, -6, 1 },

311

{ 2, -11, 120, 22, -7, 2 }, { 2, -12, 119, 25, -8, 2 },

312

{ 3, -13, 117, 27, -8, 2 }, { 3, -13, 116, 29, -9, 2 },

313

{ 3, -14, 114, 32, -10, 3 }, { 3, -15, 113, 35, -10, 2 },

314

{ 3, -15, 111, 37, -11, 3 }, { 3, -16, 109, 40, -11, 3 },

315

{ 3, -16, 108, 42, -12, 3 }, { 4, -17, 106, 45, -13, 3 },

316

{ 4, -17, 104, 47, -13, 3 }, { 4, -17, 102, 50, -14, 3 },

317

{ 4, -17, 100, 52, -14, 3 }, { 4, -18, 98, 55, -15, 4 },

318

{ 4, -18, 96, 58, -15, 3 }, { 4, -18, 94, 60, -16, 4 },

319

{ 4, -18, 91, 63, -16, 4 }, { 4, -18, 89, 65, -16, 4 },

320

{ 4, -18, 87, 68, -17, 4 }, { 4, -18, 85, 70, -17, 4 },

321

{ 4, -18, 82, 73, -17, 4 }, { 4, -18, 80, 75, -17, 4 },

322

{ 4, -18, 78, 78, -18, 4 }, { 4, -17, 75, 80, -18, 4 },

323

{ 4, -17, 73, 82, -18, 4 }, { 4, -17, 70, 85, -18, 4 },

324

{ 4, -17, 68, 87, -18, 4 }, { 4, -16, 65, 89, -18, 4 },

325

{ 4, -16, 63, 91, -18, 4 }, { 4, -16, 60, 94, -18, 4 },

326

{ 3, -15, 58, 96, -18, 4 }, { 4, -15, 55, 98, -18, 4 },

327

{ 3, -14, 52, 100, -17, 4 }, { 3, -14, 50, 102, -17, 4 },

328

{ 3, -13, 47, 104, -17, 4 }, { 3, -13, 45, 106, -17, 4 },

329

{ 3, -12, 42, 108, -16, 3 }, { 3, -11, 40, 109, -16, 3 },

330

{ 3, -11, 37, 111, -15, 3 }, { 2, -10, 35, 113, -15, 3 },

331

{ 3, -10, 32, 114, -14, 3 }, { 2, -9, 29, 116, -13, 3 },

332

{ 2, -8, 27, 117, -13, 3 }, { 2, -8, 25, 119, -12, 2 },

333

{ 2, -7, 22, 120, -11, 2 }, { 1, -6, 20, 121, -10, 2 },

334

{ 1, -6, 18, 122, -9, 2 }, { 1, -5, 15, 123, -8, 2 },

335

{ 1, -4, 13, 124, -7, 1 }, { 1, -4, 11, 125, -6, 1 },

336

{ 1, -3, 8, 126, -5, 1 }, { 1, -2, 6, 126, -4, 1 },

337

{ 0, -1, 4, 127, -3, 1 }, { 0, -1, 2, 128, -1, 0 },

Debargha Mukherjee

2017-04-12 16:10:50 -0700

[diff] [blame]

338

#else

339

{ 0, 0, 128, 0, 0, 0 }, { 1, -3, 127, 4, -1, 0 },

340

{ 1, -5, 126, 8, -3, 1 }, { 1, -7, 124, 13, -4, 1 },

341

{ 2, -9, 122, 18, -6, 1 }, { 2, -11, 120, 22, -7, 2 },

342

{ 3, -13, 117, 27, -8, 2 }, { 3, -14, 114, 32, -10, 3 },

343

{ 3, -15, 111, 37, -11, 3 }, { 3, -16, 108, 42, -12, 3 },

344

{ 4, -17, 104, 47, -13, 3 }, { 4, -17, 100, 52, -14, 3 },

345

{ 4, -18, 96, 58, -15, 3 }, { 4, -18, 91, 63, -16, 4 },

346

{ 4, -18, 87, 68, -17, 4 }, { 4, -18, 82, 73, -17, 4 },

347

{ 4, -18, 78, 78, -18, 4 }, { 4, -17, 73, 82, -18, 4 },

348

{ 4, -17, 68, 87, -18, 4 }, { 4, -16, 63, 91, -18, 4 },

349

{ 3, -15, 58, 96, -18, 4 }, { 3, -14, 52, 100, -17, 4 },

350

{ 3, -13, 47, 104, -17, 4 }, { 3, -12, 42, 108, -16, 3 },

351

{ 3, -11, 37, 111, -15, 3 }, { 3, -10, 32, 114, -14, 3 },

352

{ 2, -8, 27, 117, -13, 3 }, { 2, -7, 22, 120, -11, 2 },

353

{ 1, -6, 18, 122, -9, 2 }, { 1, -4, 13, 124, -7, 1 },

354

{ 1, -3, 8, 126, -5, 1 }, { 0, -1, 4, 127, -3, 1 },

355

#endif // WARPEDPIXEL_PREC_BITS == 6

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

356

};

357

358

static int32_t do_ntap_filter(int32_t *p, int x) {

359

int i;

360

int32_t sum = 0;

361

for (i = 0; i < WARPEDPIXEL_FILTER_TAPS; ++i) {

362

sum += p[i - WARPEDPIXEL_FILTER_TAPS / 2 + 1] * filter_ntap[x][i];

}

return sum;

}

static int32_t do_cubic_filter(int32_t *p, int x) {

368

if (x == 0) {

David Barker

f23bdca

2016-11-07 13:47:13 +0000

[diff] [blame]

369

return p[0] * (1 << WARPEDPIXEL_FILTER_BITS);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

370

} else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {

David Barker

f23bdca

2016-11-07 13:47:13 +0000

[diff] [blame]

371

return p[1] * (1 << WARPEDPIXEL_FILTER_BITS);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

372

} else {

Sarah Parker

db92635

2016-10-26 12:46:03 -0700

[diff] [blame]

373

const int64_t v1 = (int64_t)x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

374

const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);

375

const int64_t v3 = x * (p[1] - p[-1]);

376

const int64_t v4 = 2 * p[0];

377

return (int32_t)ROUND_POWER_OF_TWO_SIGNED(

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

378

(v4 * (1 << (3 * WARPEDPIXEL_PREC_BITS))) +

379

(v3 * (1 << (2 * WARPEDPIXEL_PREC_BITS))) +

380

(v2 * (1 << WARPEDPIXEL_PREC_BITS)) + v1,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

381

3 * WARPEDPIXEL_PREC_BITS + 1 - WARPEDPIXEL_FILTER_BITS);

}

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

385

static INLINE void get_subcolumn(int taps, uint8_t *ref, int32_t *col,

386

int stride, int x, int y_start) {

387

int i;

388

for (i = 0; i < taps; ++i) {

389

col[i] = ref[(i + y_start) * stride + x];

}

}

static uint8_t bi_ntap_filter(uint8_t *ref, int x, int y, int stride) {

394

int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];

395

int k;

396

int i = (int)x >> WARPEDPIXEL_PREC_BITS;

397

int j = (int)y >> WARPEDPIXEL_PREC_BITS;

398

for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {

399

int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];

400

get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,

401

i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,

402

j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);

403

arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

404

y - (j * (1 << WARPEDPIXEL_PREC_BITS)));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

405

}

406

val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

407

x - (i * (1 << WARPEDPIXEL_PREC_BITS)));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

408

val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);

409

return (uint8_t)clip_pixel(val);

410

}

411

412

static uint8_t bi_cubic_filter(uint8_t *ref, int x, int y, int stride) {

413

int32_t val, arr[4];

414

int k;

415

int i = (int)x >> WARPEDPIXEL_PREC_BITS;

416

int j = (int)y >> WARPEDPIXEL_PREC_BITS;

417

for (k = 0; k < 4; ++k) {

418

int32_t arr_temp[4];

419

get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

420

arr[k] =

421

do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

422

}

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

423

val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

424

val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);

425

return (uint8_t)clip_pixel(val);

426

}

427

428

static uint8_t bi_linear_filter(uint8_t *ref, int x, int y, int stride) {

429

const int ix = x >> WARPEDPIXEL_PREC_BITS;

430

const int iy = y >> WARPEDPIXEL_PREC_BITS;

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

431

const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));

432

const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

433

int32_t val;

434

val = ROUND_POWER_OF_TWO_SIGNED(

435

ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *

436

(WARPEDPIXEL_PREC_SHIFTS - sx) +

437

ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +

438

ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +

439

ref[(iy + 1) * stride + ix + 1] * sy * sx,

440

WARPEDPIXEL_PREC_BITS * 2);

441

return (uint8_t)clip_pixel(val);

442

}

443

444

static uint8_t warp_interpolate(uint8_t *ref, int x, int y, int width,

445

int height, int stride) {

446

int ix = x >> WARPEDPIXEL_PREC_BITS;

447

int iy = y >> WARPEDPIXEL_PREC_BITS;

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

448

int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));

449

int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

450

int32_t v;

451

452

if (ix < 0 && iy < 0)

453

return ref[0];

Yue Chen

2016-09-08 14:48:15 -0700

[diff] [blame]

454

else if (ix < 0 && iy >= height - 1)

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

455

return ref[(height - 1) * stride];

Yue Chen

2016-09-08 14:48:15 -0700

[diff] [blame]

456

else if (ix >= width - 1 && iy < 0)

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

457

return ref[width - 1];

Yue Chen

2016-09-08 14:48:15 -0700

[diff] [blame]

458

else if (ix >= width - 1 && iy >= height - 1)

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

459

return ref[(height - 1) * stride + (width - 1)];

460

else if (ix < 0) {

461

v = ROUND_POWER_OF_TWO_SIGNED(

462

ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +

463

ref[(iy + 1) * stride] * sy,

464

WARPEDPIXEL_PREC_BITS);

465

return clip_pixel(v);

466

} else if (iy < 0) {

467

v = ROUND_POWER_OF_TWO_SIGNED(

468

ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,

469

WARPEDPIXEL_PREC_BITS);

470

return clip_pixel(v);

Yue Chen

2016-09-08 14:48:15 -0700

[diff] [blame]

471

} else if (ix >= width - 1) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

472

v = ROUND_POWER_OF_TWO_SIGNED(

473

ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +

474

ref[(iy + 1) * stride + width - 1] * sy,

475

WARPEDPIXEL_PREC_BITS);

476

return clip_pixel(v);

Yue Chen

2016-09-08 14:48:15 -0700

[diff] [blame]

477

} else if (iy >= height - 1) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

478

v = ROUND_POWER_OF_TWO_SIGNED(

479

ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +

480

ref[(height - 1) * stride + ix + 1] * sx,

481

WARPEDPIXEL_PREC_BITS);

482

return clip_pixel(v);

483

} else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&

484

iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&

485

ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&

486

iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {

487

return bi_ntap_filter(ref, x, y, stride);

488

} else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {

489

return bi_cubic_filter(ref, x, y, stride);

490

} else {

491

return bi_linear_filter(ref, x, y, stride);

}

}

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

495

// For warping, we really use a 6-tap filter, but we do blocks of 8 pixels

496

// at a time. The zoom/rotation/shear in the model are applied to the

497

// "fractional" position of each pixel, which therefore varies within

498

// [-1, 2) * WARPEDPIXEL_PREC_SHIFTS.

499

// We need an extra 2 taps to fit this in, for a total of 8 taps.

500

/* clang-format off */

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

501

const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = {

Debargha Mukherjee

2017-04-12 16:10:50 -0700

[diff] [blame]

502

#if WARPEDPIXEL_PREC_BITS == 6

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

503

// [-1, 0)

Sean Purser-Haskell

2017-04-10 18:08:07 -0700

[diff] [blame]

504

{ 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, - 1, 127, 2, 0, 0, 0, 0 },

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

505

{ 1, - 3, 127, 4, - 1, 0, 0, 0 }, { 1, - 4, 126, 6, - 2, 1, 0, 0 },

506

{ 1, - 5, 126, 8, - 3, 1, 0, 0 }, { 1, - 6, 125, 11, - 4, 1, 0, 0 },

507

{ 1, - 7, 124, 13, - 4, 1, 0, 0 }, { 2, - 8, 123, 15, - 5, 1, 0, 0 },

508

{ 2, - 9, 122, 18, - 6, 1, 0, 0 }, { 2, -10, 121, 20, - 6, 1, 0, 0 },

509

{ 2, -11, 120, 22, - 7, 2, 0, 0 }, { 2, -12, 119, 25, - 8, 2, 0, 0 },

510

{ 3, -13, 117, 27, - 8, 2, 0, 0 }, { 3, -13, 116, 29, - 9, 2, 0, 0 },

511

{ 3, -14, 114, 32, -10, 3, 0, 0 }, { 3, -15, 113, 35, -10, 2, 0, 0 },

512

{ 3, -15, 111, 37, -11, 3, 0, 0 }, { 3, -16, 109, 40, -11, 3, 0, 0 },

513

{ 3, -16, 108, 42, -12, 3, 0, 0 }, { 4, -17, 106, 45, -13, 3, 0, 0 },

514

{ 4, -17, 104, 47, -13, 3, 0, 0 }, { 4, -17, 102, 50, -14, 3, 0, 0 },

515

{ 4, -17, 100, 52, -14, 3, 0, 0 }, { 4, -18, 98, 55, -15, 4, 0, 0 },

516

{ 4, -18, 96, 58, -15, 3, 0, 0 }, { 4, -18, 94, 60, -16, 4, 0, 0 },

517

{ 4, -18, 91, 63, -16, 4, 0, 0 }, { 4, -18, 89, 65, -16, 4, 0, 0 },

518

{ 4, -18, 87, 68, -17, 4, 0, 0 }, { 4, -18, 85, 70, -17, 4, 0, 0 },

519

{ 4, -18, 82, 73, -17, 4, 0, 0 }, { 4, -18, 80, 75, -17, 4, 0, 0 },

520

{ 4, -18, 78, 78, -18, 4, 0, 0 }, { 4, -17, 75, 80, -18, 4, 0, 0 },

521

{ 4, -17, 73, 82, -18, 4, 0, 0 }, { 4, -17, 70, 85, -18, 4, 0, 0 },

522

{ 4, -17, 68, 87, -18, 4, 0, 0 }, { 4, -16, 65, 89, -18, 4, 0, 0 },

523

{ 4, -16, 63, 91, -18, 4, 0, 0 }, { 4, -16, 60, 94, -18, 4, 0, 0 },

524

{ 3, -15, 58, 96, -18, 4, 0, 0 }, { 4, -15, 55, 98, -18, 4, 0, 0 },

525

{ 3, -14, 52, 100, -17, 4, 0, 0 }, { 3, -14, 50, 102, -17, 4, 0, 0 },

526

{ 3, -13, 47, 104, -17, 4, 0, 0 }, { 3, -13, 45, 106, -17, 4, 0, 0 },

527

{ 3, -12, 42, 108, -16, 3, 0, 0 }, { 3, -11, 40, 109, -16, 3, 0, 0 },

528

{ 3, -11, 37, 111, -15, 3, 0, 0 }, { 2, -10, 35, 113, -15, 3, 0, 0 },

529

{ 3, -10, 32, 114, -14, 3, 0, 0 }, { 2, - 9, 29, 116, -13, 3, 0, 0 },

530

{ 2, - 8, 27, 117, -13, 3, 0, 0 }, { 2, - 8, 25, 119, -12, 2, 0, 0 },

531

{ 2, - 7, 22, 120, -11, 2, 0, 0 }, { 1, - 6, 20, 121, -10, 2, 0, 0 },

532

{ 1, - 6, 18, 122, - 9, 2, 0, 0 }, { 1, - 5, 15, 123, - 8, 2, 0, 0 },

533

{ 1, - 4, 13, 124, - 7, 1, 0, 0 }, { 1, - 4, 11, 125, - 6, 1, 0, 0 },

534

{ 1, - 3, 8, 126, - 5, 1, 0, 0 }, { 1, - 2, 6, 126, - 4, 1, 0, 0 },

Sean Purser-Haskell

2017-04-10 18:08:07 -0700

[diff] [blame]

535

{ 0, - 1, 4, 127, - 3, 1, 0, 0 }, { 0, 0, 2, 127, - 1, 0, 0, 0 },

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

536

537

// [0, 1)

Sean Purser-Haskell

2017-04-10 18:08:07 -0700

[diff] [blame]

538

{ 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 0, -1, 127, 2, 0, 0, 0},

Debargha Mukherjee

e6044fe

2017-01-19 02:13:14 -0800

[diff] [blame]

539

{ 0, 1, -3, 127, 4, -2, 1, 0}, { 0, 1, -5, 127, 6, -2, 1, 0},

540

{ 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 2, -7, 126, 11, -4, 2, -1},

541

{-1, 3, -8, 125, 13, -5, 2, -1}, {-1, 3, -10, 124, 16, -6, 3, -1},

542

{-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -12, 122, 20, -7, 3, -1},

543

{-1, 4, -13, 121, 23, -8, 3, -1}, {-2, 5, -14, 120, 25, -9, 4, -1},

544

{-1, 5, -15, 119, 27, -10, 4, -1}, {-1, 5, -16, 118, 30, -11, 4, -1},

545

{-2, 6, -17, 116, 33, -12, 5, -1}, {-2, 6, -17, 114, 35, -12, 5, -1},

546

{-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 111, 41, -14, 6, -2},

547

{-2, 7, -19, 110, 43, -15, 6, -2}, {-2, 7, -20, 108, 46, -15, 6, -2},

548

{-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 104, 51, -16, 7, -2},

549

{-2, 7, -21, 102, 54, -17, 7, -2}, {-2, 8, -21, 100, 56, -18, 7, -2},

550

{-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 96, 62, -19, 7, -2},

551

{-2, 8, -22, 94, 64, -19, 7, -2}, {-2, 8, -22, 91, 67, -20, 8, -2},

552

{-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -22, 87, 72, -21, 8, -2},

553

{-2, 8, -21, 84, 74, -21, 8, -2}, {-2, 8, -22, 82, 77, -21, 8, -2},

554

{-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 77, 82, -22, 8, -2},

555

{-2, 8, -21, 74, 84, -21, 8, -2}, {-2, 8, -21, 72, 87, -22, 8, -2},

556

{-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 8, -20, 67, 91, -22, 8, -2},

557

{-2, 7, -19, 64, 94, -22, 8, -2}, {-2, 7, -19, 62, 96, -22, 8, -2},

558

{-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -18, 56, 100, -21, 8, -2},

559

{-2, 7, -17, 54, 102, -21, 7, -2}, {-2, 7, -16, 51, 104, -21, 7, -2},

560

{-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 46, 108, -20, 7, -2},

561

{-2, 6, -15, 43, 110, -19, 7, -2}, {-2, 6, -14, 41, 111, -19, 7, -2},

562

{-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 35, 114, -17, 6, -2},

563

{-1, 5, -12, 33, 116, -17, 6, -2}, {-1, 4, -11, 30, 118, -16, 5, -1},

564

{-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 4, -9, 25, 120, -14, 5, -2},

565

{-1, 3, -8, 23, 121, -13, 4, -1}, {-1, 3, -7, 20, 122, -12, 4, -1},

566

{-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 3, -6, 16, 124, -10, 3, -1},

567

{-1, 2, -5, 13, 125, -8, 3, -1}, {-1, 2, -4, 11, 126, -7, 2, -1},

568

{ 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 6, 127, -5, 1, 0},

Sean Purser-Haskell

2017-04-10 18:08:07 -0700

[diff] [blame]

569

{ 0, 1, -2, 4, 127, -3, 1, 0}, { 0, 0, 0, 2, 127, -1, 0, 0},

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

570

571

// [1, 2)

Sean Purser-Haskell

2017-04-10 18:08:07 -0700

[diff] [blame]

572

{ 0, 0, 0, 1, 127, 0, 0, 0 }, { 0, 0, 0, - 1, 127, 2, 0, 0 },

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

573

{ 0, 0, 1, - 3, 127, 4, - 1, 0 }, { 0, 0, 1, - 4, 126, 6, - 2, 1 },

574

{ 0, 0, 1, - 5, 126, 8, - 3, 1 }, { 0, 0, 1, - 6, 125, 11, - 4, 1 },

575

{ 0, 0, 1, - 7, 124, 13, - 4, 1 }, { 0, 0, 2, - 8, 123, 15, - 5, 1 },

576

{ 0, 0, 2, - 9, 122, 18, - 6, 1 }, { 0, 0, 2, -10, 121, 20, - 6, 1 },

577

{ 0, 0, 2, -11, 120, 22, - 7, 2 }, { 0, 0, 2, -12, 119, 25, - 8, 2 },

578

{ 0, 0, 3, -13, 117, 27, - 8, 2 }, { 0, 0, 3, -13, 116, 29, - 9, 2 },

579

{ 0, 0, 3, -14, 114, 32, -10, 3 }, { 0, 0, 3, -15, 113, 35, -10, 2 },

580

{ 0, 0, 3, -15, 111, 37, -11, 3 }, { 0, 0, 3, -16, 109, 40, -11, 3 },

581

{ 0, 0, 3, -16, 108, 42, -12, 3 }, { 0, 0, 4, -17, 106, 45, -13, 3 },

582

{ 0, 0, 4, -17, 104, 47, -13, 3 }, { 0, 0, 4, -17, 102, 50, -14, 3 },

583

{ 0, 0, 4, -17, 100, 52, -14, 3 }, { 0, 0, 4, -18, 98, 55, -15, 4 },

584

{ 0, 0, 4, -18, 96, 58, -15, 3 }, { 0, 0, 4, -18, 94, 60, -16, 4 },

585

{ 0, 0, 4, -18, 91, 63, -16, 4 }, { 0, 0, 4, -18, 89, 65, -16, 4 },

586

{ 0, 0, 4, -18, 87, 68, -17, 4 }, { 0, 0, 4, -18, 85, 70, -17, 4 },

587

{ 0, 0, 4, -18, 82, 73, -17, 4 }, { 0, 0, 4, -18, 80, 75, -17, 4 },

588

{ 0, 0, 4, -18, 78, 78, -18, 4 }, { 0, 0, 4, -17, 75, 80, -18, 4 },

589

{ 0, 0, 4, -17, 73, 82, -18, 4 }, { 0, 0, 4, -17, 70, 85, -18, 4 },

590

{ 0, 0, 4, -17, 68, 87, -18, 4 }, { 0, 0, 4, -16, 65, 89, -18, 4 },

591

{ 0, 0, 4, -16, 63, 91, -18, 4 }, { 0, 0, 4, -16, 60, 94, -18, 4 },

592

{ 0, 0, 3, -15, 58, 96, -18, 4 }, { 0, 0, 4, -15, 55, 98, -18, 4 },

593

{ 0, 0, 3, -14, 52, 100, -17, 4 }, { 0, 0, 3, -14, 50, 102, -17, 4 },

594

{ 0, 0, 3, -13, 47, 104, -17, 4 }, { 0, 0, 3, -13, 45, 106, -17, 4 },

595

{ 0, 0, 3, -12, 42, 108, -16, 3 }, { 0, 0, 3, -11, 40, 109, -16, 3 },

596

{ 0, 0, 3, -11, 37, 111, -15, 3 }, { 0, 0, 2, -10, 35, 113, -15, 3 },

597

{ 0, 0, 3, -10, 32, 114, -14, 3 }, { 0, 0, 2, - 9, 29, 116, -13, 3 },

598

{ 0, 0, 2, - 8, 27, 117, -13, 3 }, { 0, 0, 2, - 8, 25, 119, -12, 2 },

599

{ 0, 0, 2, - 7, 22, 120, -11, 2 }, { 0, 0, 1, - 6, 20, 121, -10, 2 },

600

{ 0, 0, 1, - 6, 18, 122, - 9, 2 }, { 0, 0, 1, - 5, 15, 123, - 8, 2 },

601

{ 0, 0, 1, - 4, 13, 124, - 7, 1 }, { 0, 0, 1, - 4, 11, 125, - 6, 1 },

602

{ 0, 0, 1, - 3, 8, 126, - 5, 1 }, { 0, 0, 1, - 2, 6, 126, - 4, 1 },

Sean Purser-Haskell

2017-04-10 18:08:07 -0700

[diff] [blame]

603

{ 0, 0, 0, - 1, 4, 127, - 3, 1 }, { 0, 0, 0, 0, 2, 127, - 1, 0 },

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

604

Debargha Mukherjee

2017-04-12 16:10:50 -0700

[diff] [blame]

605

#else

606

// [-1, 0)

607

{0, 0, 127, 1, 0, 0, 0, 0}, {1, -3, 127, 4, -1, 0, 0, 0},

608

{1, -5, 126, 8, -3, 1, 0, 0}, {1, -7, 124, 13, -4, 1, 0, 0},

609

{2, -9, 122, 18, -6, 1, 0, 0}, {2, -11, 120, 22, -7, 2, 0, 0},

610

{3, -13, 117, 27, -8, 2, 0, 0}, {3, -14, 114, 32, -10, 3, 0, 0},

611

{3, -15, 111, 37, -11, 3, 0, 0}, {3, -16, 108, 42, -12, 3, 0, 0},

612

{4, -17, 104, 47, -13, 3, 0, 0}, {4, -17, 100, 52, -14, 3, 0, 0},

613

{4, -18, 96, 58, -15, 3, 0, 0}, {4, -18, 91, 63, -16, 4, 0, 0},

614

{4, -18, 87, 68, -17, 4, 0, 0}, {4, -18, 82, 73, -17, 4, 0, 0},

615

{4, -18, 78, 78, -18, 4, 0, 0}, {4, -17, 73, 82, -18, 4, 0, 0},

616

{4, -17, 68, 87, -18, 4, 0, 0}, {4, -16, 63, 91, -18, 4, 0, 0},

617

{3, -15, 58, 96, -18, 4, 0, 0}, {3, -14, 52, 100, -17, 4, 0, 0},

618

{3, -13, 47, 104, -17, 4, 0, 0}, {3, -12, 42, 108, -16, 3, 0, 0},

619

{3, -11, 37, 111, -15, 3, 0, 0}, {3, -10, 32, 114, -14, 3, 0, 0},

620

{2, -8, 27, 117, -13, 3, 0, 0}, {2, -7, 22, 120, -11, 2, 0, 0},

621

{1, -6, 18, 122, -9, 2, 0, 0}, {1, -4, 13, 124, -7, 1, 0, 0},

622

{1, -3, 8, 126, -5, 1, 0, 0}, {0, -1, 4, 127, -3, 1, 0, 0},

623

// [0, 1)

624

{ 0, 0, 0, 127, 1, 0, 0, 0}, { 0, 1, -3, 127, 4, -2, 1, 0},

625

{ 0, 2, -6, 126, 8, -3, 1, 0}, {-1, 3, -8, 125, 13, -5, 2, -1},

626

{-1, 4, -11, 123, 18, -7, 3, -1}, {-1, 4, -13, 121, 23, -8, 3, -1},

627

{-1, 5, -15, 119, 27, -10, 4, -1}, {-2, 6, -17, 116, 33, -12, 5, -1},

628

{-2, 6, -18, 113, 38, -13, 5, -1}, {-2, 7, -19, 110, 43, -15, 6, -2},

629

{-2, 7, -20, 106, 49, -16, 6, -2}, {-2, 7, -21, 102, 54, -17, 7, -2},

630

{-2, 8, -22, 98, 59, -18, 7, -2}, {-2, 8, -22, 94, 64, -19, 7, -2},

631

{-2, 8, -22, 89, 69, -20, 8, -2}, {-2, 8, -21, 84, 74, -21, 8, -2},

632

{-2, 8, -21, 79, 79, -21, 8, -2}, {-2, 8, -21, 74, 84, -21, 8, -2},

633

{-2, 8, -20, 69, 89, -22, 8, -2}, {-2, 7, -19, 64, 94, -22, 8, -2},

634

{-2, 7, -18, 59, 98, -22, 8, -2}, {-2, 7, -17, 54, 102, -21, 7, -2},

635

{-2, 6, -16, 49, 106, -20, 7, -2}, {-2, 6, -15, 43, 110, -19, 7, -2},

636

{-1, 5, -13, 38, 113, -18, 6, -2}, {-1, 5, -12, 33, 116, -17, 6, -2},

637

{-1, 4, -10, 27, 119, -15, 5, -1}, {-1, 3, -8, 23, 121, -13, 4, -1},

638

{-1, 3, -7, 18, 123, -11, 4, -1}, {-1, 2, -5, 13, 125, -8, 3, -1},

639

{ 0, 1, -3, 8, 126, -6, 2, 0}, { 0, 1, -2, 4, 127, -3, 1, 0},

640

// [1, 2)

641

{0, 0, 0, 1, 127, 0, 0, 0}, {0, 0, 1, -3, 127, 4, -1, 0},

642

{0, 0, 1, -5, 126, 8, -3, 1}, {0, 0, 1, -7, 124, 13, -4, 1},

643

{0, 0, 2, -9, 122, 18, -6, 1}, {0, 0, 2, -11, 120, 22, -7, 2},

644

{0, 0, 3, -13, 117, 27, -8, 2}, {0, 0, 3, -14, 114, 32, -10, 3},

645

{0, 0, 3, -15, 111, 37, -11, 3}, {0, 0, 3, -16, 108, 42, -12, 3},

646

{0, 0, 4, -17, 104, 47, -13, 3}, {0, 0, 4, -17, 100, 52, -14, 3},

647

{0, 0, 4, -18, 96, 58, -15, 3}, {0, 0, 4, -18, 91, 63, -16, 4},

648

{0, 0, 4, -18, 87, 68, -17, 4}, {0, 0, 4, -18, 82, 73, -17, 4},

649

{0, 0, 4, -18, 78, 78, -18, 4}, {0, 0, 4, -17, 73, 82, -18, 4},

650

{0, 0, 4, -17, 68, 87, -18, 4}, {0, 0, 4, -16, 63, 91, -18, 4},

651

{0, 0, 3, -15, 58, 96, -18, 4}, {0, 0, 3, -14, 52, 100, -17, 4},

652

{0, 0, 3, -13, 47, 104, -17, 4}, {0, 0, 3, -12, 42, 108, -16, 3},

653

{0, 0, 3, -11, 37, 111, -15, 3}, {0, 0, 3, -10, 32, 114, -14, 3},

654

{0, 0, 2, -8, 27, 117, -13, 3}, {0, 0, 2, -7, 22, 120, -11, 2},

655

{0, 0, 1, -6, 18, 122, -9, 2}, {0, 0, 1, -4, 13, 124, -7, 1},

656

{0, 0, 1, -3, 8, 126, -5, 1}, {0, 0, 0, -1, 4, 127, -3, 1},

657

658

#endif // WARPEDPIXEL_PREC_BITS == 6

659

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

660

// dummy

Sean Purser-Haskell

2017-04-10 18:08:07 -0700

[diff] [blame]

661

{ 0, 0, 0, 0, 1, 127, 0, 0 },

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

662

};

Debargha Mukherjee

2017-04-12 16:10:50 -0700

[diff] [blame]

663

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

664

/* clang-format on */

665

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

666

#define DIV_LUT_PREC_BITS 14

667

#define DIV_LUT_BITS 8

668

#define DIV_LUT_NUM (1 << DIV_LUT_BITS)

669

670

static const uint16_t div_lut[DIV_LUT_NUM + 1] = {

671

16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,

672

15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,

673

15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,

674

14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,

675

13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,

676

13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,

677

13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,

678

12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,

679

12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,

680

11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,

681

11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,

682

11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,

683

10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,

684

10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,

685

10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,

686

9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,

687

9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,

688

9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,

689

9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,

690

9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,

691

8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,

692

8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,

693

8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,

694

8240, 8224, 8208, 8192,

695

};

696

David Barker

2017-04-05 11:44:31 +0100

[diff] [blame]

697

static inline int16_t saturate_int16(int32_t v) {

if (v > 32767)

return 32767;

else if (v < -32768)

return -32768;

return v;

}

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

705

#if CONFIG_WARPED_MOTION

706

// Decomposes a divisor D such that 1/D = y/2^shift, where y is returned

707

// at precision of DIV_LUT_PREC_BITS along with the shift.

708

static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) {

709

int64_t e, f;

710

*shift = (D >> 32) ? get_msb(D >> 32) + 32 : get_msb(D);

711

// e is obtained from D after resetting the most significant 1 bit.

712

e = D - ((uint64_t)1 << *shift);

713

// Get the most significant DIV_LUT_BITS (8) bits of e into f

714

if (*shift > DIV_LUT_BITS)

715

f = ROUND_POWER_OF_TWO_64(e, *shift - DIV_LUT_BITS);

716

else

717

f = e << (DIV_LUT_BITS - *shift);

718

assert(f <= DIV_LUT_NUM);

719

*shift += DIV_LUT_PREC_BITS;

720

// Use f as lookup into the precomputed table of multipliers

721

return div_lut[f];

722

}

723

#endif // CONFIG_WARPED_MOTION

724

725

static int16_t resolve_divisor_32(uint32_t D, int16_t *shift) {

726

int32_t e, f;

727

*shift = get_msb(D);

728

// e is obtained from D after resetting the most significant 1 bit.

729

e = D - ((uint32_t)1 << *shift);

730

// Get the most significant DIV_LUT_BITS (8) bits of e into f

731

if (*shift > DIV_LUT_BITS)

732

f = ROUND_POWER_OF_TWO(e, *shift - DIV_LUT_BITS);

733

else

734

f = e << (DIV_LUT_BITS - *shift);

735

assert(f <= DIV_LUT_NUM);

736

*shift += DIV_LUT_PREC_BITS;

737

// Use f as lookup into the precomputed table of multipliers

738

return div_lut[f];

739

}

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

740

741

static int is_affine_valid(WarpedMotionParams *wm) {

742

const int32_t *mat = wm->wmmat;

return (mat[2] > 0);

}

Debargha Mukherjee

2017-04-10 11:17:16 -0700

[diff] [blame]

746

static int is_affine_shear_allowed(int16_t alpha, int16_t beta, int16_t gamma,

747

int16_t delta) {

Sean Purser-Haskell

e3bc0da

2017-04-10 18:19:33 -0700

[diff] [blame]

748

if ((4 * abs(alpha) + 7 * abs(beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||

749

(4 * abs(gamma) + 4 * abs(delta) >= (1 << WARPEDMODEL_PREC_BITS)))

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

return 0;

else

return 1;

}

// Returns 1 on success or 0 on an invalid affine set

Debargha Mukherjee

2017-03-30 08:22:00 -0700

[diff] [blame]

756

int get_shear_params(WarpedMotionParams *wm) {

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

757

const int32_t *mat = wm->wmmat;

758

if (!is_affine_valid(wm)) return 0;

Debargha Mukherjee

2017-04-10 11:17:16 -0700

[diff] [blame]

759

wm->alpha =

760

clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);

761

wm->beta = clamp(mat[3], INT16_MIN, INT16_MAX);

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

762

int16_t shift;

763

int16_t y = resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);

764

int64_t v;

765

v = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) * y;

Debargha Mukherjee

2017-04-10 11:17:16 -0700

[diff] [blame]

766

wm->gamma =

767

clamp(ROUND_POWER_OF_TWO_SIGNED_64(v, shift), INT16_MIN, INT16_MAX);

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

768

v = ((int64_t)mat[3] * mat[4]) * y;

Debargha Mukherjee

2017-04-10 11:17:16 -0700

[diff] [blame]

769

wm->delta = clamp(mat[5] - ROUND_POWER_OF_TWO_SIGNED_64(v, shift) -

770

(1 << WARPEDMODEL_PREC_BITS),

771

INT16_MIN, INT16_MAX);

Debargha Mukherjee

2017-03-30 08:22:00 -0700

[diff] [blame]

772

if (!is_affine_shear_allowed(wm->alpha, wm->beta, wm->gamma, wm->delta))

773

return 0;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

return 1;

}

Sebastien Alaiwan

2017-04-12 16:03:28 +0200

[diff] [blame]

777

#if CONFIG_HIGHBITDEPTH

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

778

static INLINE void highbd_get_subcolumn(int taps, uint16_t *ref, int32_t *col,

779

int stride, int x, int y_start) {

780

int i;

781

for (i = 0; i < taps; ++i) {

782

col[i] = ref[(i + y_start) * stride + x];

}

}

static uint16_t highbd_bi_ntap_filter(uint16_t *ref, int x, int y, int stride,

787

int bd) {

788

int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];

789

int k;

790

int i = (int)x >> WARPEDPIXEL_PREC_BITS;

791

int j = (int)y >> WARPEDPIXEL_PREC_BITS;

792

for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {

793

int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];

794

highbd_get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,

795

i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,

796

j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);

797

arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

798

y - (j * (1 << WARPEDPIXEL_PREC_BITS)));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

799

}

800

val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

801

x - (i * (1 << WARPEDPIXEL_PREC_BITS)));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

802

val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);

803

return (uint16_t)clip_pixel_highbd(val, bd);

804

}

805

806

static uint16_t highbd_bi_cubic_filter(uint16_t *ref, int x, int y, int stride,

int bd) {

int32_t val, arr[4];

int k;

int i = (int)x >> WARPEDPIXEL_PREC_BITS;

811

int j = (int)y >> WARPEDPIXEL_PREC_BITS;

812

for (k = 0; k < 4; ++k) {

813

int32_t arr_temp[4];

814

highbd_get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

815

arr[k] =

816

do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

817

}

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

818

val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

819

val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);

820

return (uint16_t)clip_pixel_highbd(val, bd);

821

}

822

823

static uint16_t highbd_bi_linear_filter(uint16_t *ref, int x, int y, int stride,

824

int bd) {

825

const int ix = x >> WARPEDPIXEL_PREC_BITS;

826

const int iy = y >> WARPEDPIXEL_PREC_BITS;

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

827

const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));

828

const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

829

int32_t val;

830

val = ROUND_POWER_OF_TWO_SIGNED(

831

ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *

832

(WARPEDPIXEL_PREC_SHIFTS - sx) +

833

ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +

834

ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +

835

ref[(iy + 1) * stride + ix + 1] * sy * sx,

836

WARPEDPIXEL_PREC_BITS * 2);

837

return (uint16_t)clip_pixel_highbd(val, bd);

838

}

839

840

static uint16_t highbd_warp_interpolate(uint16_t *ref, int x, int y, int width,

841

int height, int stride, int bd) {

842

int ix = x >> WARPEDPIXEL_PREC_BITS;

843

int iy = y >> WARPEDPIXEL_PREC_BITS;

Sarah Parker

2016-10-26 12:41:46 -0700

[diff] [blame]

844

int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));

845

int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

846

int32_t v;

847

848

if (ix < 0 && iy < 0)

849

return ref[0];

850

else if (ix < 0 && iy > height - 1)

851

return ref[(height - 1) * stride];

852

else if (ix > width - 1 && iy < 0)

853

return ref[width - 1];

854

else if (ix > width - 1 && iy > height - 1)

855

return ref[(height - 1) * stride + (width - 1)];

856

else if (ix < 0) {

857

v = ROUND_POWER_OF_TWO_SIGNED(

858

ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +

859

ref[(iy + 1) * stride] * sy,

860

WARPEDPIXEL_PREC_BITS);

861

return clip_pixel_highbd(v, bd);

862

} else if (iy < 0) {

863

v = ROUND_POWER_OF_TWO_SIGNED(

864

ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,

865

WARPEDPIXEL_PREC_BITS);

866

return clip_pixel_highbd(v, bd);

867

} else if (ix > width - 1) {

868

v = ROUND_POWER_OF_TWO_SIGNED(

869

ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +

870

ref[(iy + 1) * stride + width - 1] * sy,

871

WARPEDPIXEL_PREC_BITS);

872

return clip_pixel_highbd(v, bd);

873

} else if (iy > height - 1) {

874

v = ROUND_POWER_OF_TWO_SIGNED(

875

ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +

876

ref[(height - 1) * stride + ix + 1] * sx,

877

WARPEDPIXEL_PREC_BITS);

878

return clip_pixel_highbd(v, bd);

879

} else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&

880

iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&

881

ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&

882

iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {

883

return highbd_bi_ntap_filter(ref, x, y, stride, bd);

884

} else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {

885

return highbd_bi_cubic_filter(ref, x, y, stride, bd);

886

} else {

887

return highbd_bi_linear_filter(ref, x, y, stride, bd);

}

}

Debargha Mukherjee

2016-11-15 11:31:03 -0800

[diff] [blame]

891

static INLINE int highbd_error_measure(int err, int bd) {

892

const int b = bd - 8;

893

const int bmask = (1 << b) - 1;

Debargha Mukherjee

15a608f

2016-11-16 14:57:26 -0800

[diff] [blame]

894

const int v = (1 << b);

Debargha Mukherjee

2016-11-15 11:31:03 -0800

[diff] [blame]

int e1, e2;

err = abs(err);

e1 = err >> b;

e2 = err & bmask;

return error_measure_lut[255 + e1] * (v - e2) +

900

error_measure_lut[256 + e1] * e2;

901

}

902

David Barker

2016-12-19 10:31:00 +0000

[diff] [blame]

903

static void highbd_warp_plane_old(WarpedMotionParams *wm, uint8_t *ref8,

904

int width, int height, int stride,

905

uint8_t *pred8, int p_col, int p_row,

906

int p_width, int p_height, int p_stride,

907

int subsampling_x, int subsampling_y,

908

int x_scale, int y_scale, int bd,

909

int ref_frm) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

910

int i, j;

Sarah Parker

2016-09-06 11:25:04 -0700

[diff] [blame]

911

ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

912

uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);

913

uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);

914

if (projectpoints == NULL) return;

915

for (i = p_row; i < p_row + p_height; ++i) {

916

for (j = p_col; j < p_col + p_width; ++j) {

917

int in[2], out[2];

918

in[0] = j;

919

in[1] = i;

Debargha Mukherjee

5f30585

2016-11-03 15:47:21 -0700

[diff] [blame]

920

projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

921

out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);

922

out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);

Sarah Parker

2016-10-21 17:06:37 -0700

[diff] [blame]

923

if (ref_frm)

924

pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(

925

pred[(j - p_col) + (i - p_row) * p_stride] +

926

highbd_warp_interpolate(ref, out[0], out[1], width, height,

stride, bd),

1);

else

pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(

931

ref, out[0], out[1], width, height, stride, bd);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

932

}

933

}

934

}

David Barker

2016-12-19 10:31:00 +0000

[diff] [blame]

935

David Barker

fa19516

2017-01-06 15:58:03 +0000

[diff] [blame]

936

// Note: For an explanation of the warp algorithm, see the comment

937

// above warp_plane()

David Barker

2017-04-05 11:44:31 +0100

[diff] [blame]

938

//

939

// Note also: The "worst case" in terms of modulus of the data stored into 'tmp'

940

// (ie, the result of 'sum' in the horizontal filter) occurs when:

941

// coeffs = { -2, 8, -22, 87, 72, -21, 8, -2}, and

942

// ref = { 0, 255, 0, 255, 255, 0, 255, 0}

943

// Before rounding, this gives sum = 716625. After rounding,

944

// HORSHEAR_REDUCE_PREC_BITS = 4 => sum = 44789 > 2^15

945

// HORSHEAR_REDUCE_PREC_BITS = 5 => sum = 22395 < 2^15

946

//

947

// So, as long as HORSHEAR_REDUCE_PREC_BITS >= 5, we can safely use a 16-bit

948

// intermediate array.

949

void av1_highbd_warp_affine_c(int32_t *mat, uint16_t *ref, int width,

950

int height, int stride, uint16_t *pred, int p_col,

951

int p_row, int p_width, int p_height,

952

int p_stride, int subsampling_x,

953

int subsampling_y, int bd, int ref_frm,

Debargha Mukherjee

2017-04-10 11:17:16 -0700

[diff] [blame]

954

int16_t alpha, int16_t beta, int16_t gamma,

955

int16_t delta) {

David Barker

2017-04-05 11:44:31 +0100

[diff] [blame]

956

#if HORSHEAR_REDUCE_PREC_BITS >= 5

int16_t tmp[15 * 8];

#else

int32_t tmp[15 * 8];

#endif

int i, j, k, l, m;

/* Note: For this code to work, the left/right frame borders need to be

964

extended by at least 13 pixels each. By the time we get here, other

965

code will have set up this border, but we allow an explicit check

966

for debugging purposes.

967

*/

968

/*for (i = 0; i < height; ++i) {

969

for (j = 0; j < 13; ++j) {

970

assert(ref[i * stride - 13 + j] == ref[i * stride]);

971

assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);

}

}*/

for (i = p_row; i < p_row + p_height; i += 8) {

976

for (j = p_col; j < p_col + p_width; j += 8) {

977

int32_t x4, y4, ix4, sx4, iy4, sy4;

978

if (subsampling_x)

979

x4 = ROUND_POWER_OF_TWO_SIGNED(

980

mat[2] * 2 * (j + 4) + mat[3] * 2 * (i + 4) + mat[0] +

981

(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,

982

1);

983

else

984

x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];

985

986

if (subsampling_y)

987

y4 = ROUND_POWER_OF_TWO_SIGNED(

988

mat[4] * 2 * (j + 4) + mat[5] * 2 * (i + 4) + mat[1] +

989

(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,

990

1);

991

else

992

y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];

993

994

ix4 = x4 >> WARPEDMODEL_PREC_BITS;

995

sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);

996

iy4 = y4 >> WARPEDMODEL_PREC_BITS;

997

sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);

998

999

// Horizontal filter

1000

for (k = -7; k < 8; ++k) {

int iy = iy4 + k;

if (iy < 0)

iy = 0;

else if (iy > height - 1)

iy = height - 1;

if (ix4 <= -7) {

for (l = 0; l < 8; ++l) {

1009

tmp[(k + 7) * 8 + l] =

1010

ref[iy * stride] *

1011

(1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));

1012

}

1013

} else if (ix4 >= width + 6) {

1014

for (l = 0; l < 8; ++l) {

1015

tmp[(k + 7) * 8 + l] =

1016

ref[iy * stride + (width - 1)] *

1017

(1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));

1018

}

1019

} else {

1020

int sx = sx4 + alpha * (-4) + beta * k;

1021

1022

for (l = -4; l < 4; ++l) {

1023

int ix = ix4 + l - 3;

1024

const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +

1025

WARPEDPIXEL_PREC_SHIFTS;

1026

const int16_t *coeffs = warped_filter[offs];

1027

int32_t sum = 0;

1028

// assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);

1029

for (m = 0; m < 8; ++m) {

1030

sum += ref[iy * stride + ix + m] * coeffs[m];

1031

}

1032

sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);

1033

#if HORSHEAR_REDUCE_PREC_BITS >= 5

1034

tmp[(k + 7) * 8 + (l + 4)] = saturate_int16(sum);

1035

#else

1036

tmp[(k + 7) * 8 + (l + 4)] = sum;

#endif

sx += alpha;

}

}

}

// Vertical filter

for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {

1045

int sy = sy4 + gamma * (-4) + delta * k;

1046

for (l = -4; l < 4; ++l) {

1047

uint16_t *p =

1048

&pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];

1049

const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +

1050

WARPEDPIXEL_PREC_SHIFTS;

1051

const int16_t *coeffs = warped_filter[offs];

1052

int32_t sum = 0;

1053

// assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);

1054

for (m = 0; m < 8; ++m) {

1055

sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];

1056

}

1057

sum = clip_pixel_highbd(

1058

ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS), bd);

1059

if (ref_frm)

1060

*p = ROUND_POWER_OF_TWO(*p + sum, 1);

else

*p = sum;

sy += gamma;

}

}

}

}

}

David Barker

2016-12-19 10:31:00 +0000

[diff] [blame]

1070

static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,

1071

int height, int stride, uint8_t *pred8, int p_col,

1072

int p_row, int p_width, int p_height,

1073

int p_stride, int subsampling_x,

1074

int subsampling_y, int x_scale, int y_scale,

1075

int bd, int ref_frm) {

1076

if (wm->wmtype == ROTZOOM) {

1077

wm->wmmat[5] = wm->wmmat[2];

1078

wm->wmmat[4] = -wm->wmmat[3];

1079

}

David Barker

2017-04-05 11:44:31 +0100

[diff] [blame]

1080

if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&

1081

y_scale == 16) {

David Barker

2016-12-19 10:31:00 +0000

[diff] [blame]

1082

int32_t *mat = wm->wmmat;

Debargha Mukherjee

2017-04-10 11:17:16 -0700

[diff] [blame]

1083

const int16_t alpha = wm->alpha;

1084

const int16_t beta = wm->beta;

1085

const int16_t gamma = wm->gamma;

1086

const int16_t delta = wm->delta;

David Barker

2016-12-19 10:31:00 +0000

[diff] [blame]

1087

David Barker

2017-04-05 11:44:31 +0100

[diff] [blame]

1088

uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);

1089

uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);

1090

av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,

1091

p_width, p_height, p_stride, subsampling_x,

1092

subsampling_y, bd, ref_frm, alpha, beta, gamma,

1093

delta);

David Barker

2016-12-19 10:31:00 +0000

[diff] [blame]

1094

} else {

1095

highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row,

1096

p_width, p_height, p_stride, subsampling_x,

1097

subsampling_y, x_scale, y_scale, bd, ref_frm);

}

}

static double highbd_warp_erroradv(WarpedMotionParams *wm, uint8_t *ref8,

1102

int width, int height, int stride,

1103

uint8_t *dst8, int p_col, int p_row,

1104

int p_width, int p_height, int p_stride,

1105

int subsampling_x, int subsampling_y,

1106

int x_scale, int y_scale, int bd) {

1107

int gm_err = 0, no_gm_err = 0;

1108

int64_t gm_sumerr = 0, no_gm_sumerr = 0;

1109

int i, j;

1110

uint16_t *tmp = aom_malloc(p_width * p_height * sizeof(*tmp));

1111

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1112

uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);

1113

highbd_warp_plane(wm, ref8, width, height, stride, CONVERT_TO_BYTEPTR(tmp),

1114

p_col, p_row, p_width, p_height, p_width, subsampling_x,

1115

subsampling_y, x_scale, y_scale, bd, 0);

1116

for (i = 0; i < p_height; ++i) {

1117

for (j = 0; j < p_width; ++j) {

1118

gm_err = dst[j + i * p_stride] - tmp[j + i * p_width];

1119

no_gm_err =

1120

dst[j + i * p_stride] - ref[(j + p_col) + (i + p_row) * stride];

1121

gm_sumerr += highbd_error_measure(gm_err, bd);

1122

no_gm_sumerr += highbd_error_measure(no_gm_err, bd);

}

}

aom_free(tmp);

return (double)gm_sumerr / no_gm_sumerr;

1127

}

Sebastien Alaiwan

2017-04-12 16:03:28 +0200

[diff] [blame]

1128

#endif // CONFIG_HIGHBITDEPTH

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1129

Debargha Mukherjee

2016-11-15 11:31:03 -0800

[diff] [blame]

1130

static INLINE int error_measure(int err) {

1131

return error_measure_lut[255 + err];

1132

}

1133

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

1134

static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width,

1135

int height, int stride, uint8_t *pred, int p_col,

1136

int p_row, int p_width, int p_height, int p_stride,

1137

int subsampling_x, int subsampling_y, int x_scale,

1138

int y_scale, int ref_frm) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1139

int i, j;

Sarah Parker

2016-09-06 11:25:04 -0700

[diff] [blame]

1140

ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1141

if (projectpoints == NULL) return;

1142

for (i = p_row; i < p_row + p_height; ++i) {

1143

for (j = p_col; j < p_col + p_width; ++j) {

1144

int in[2], out[2];

1145

in[0] = j;

1146

in[1] = i;

Debargha Mukherjee

5f30585

2016-11-03 15:47:21 -0700

[diff] [blame]

1147

projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1148

out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);

1149

out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);

Sarah Parker

2016-10-21 17:06:37 -0700

[diff] [blame]

1150

if (ref_frm)

1151

pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(

1152

pred[(j - p_col) + (i - p_row) * p_stride] +

1153

warp_interpolate(ref, out[0], out[1], width, height, stride),

1154

1);

1155

else

1156

pred[(j - p_col) + (i - p_row) * p_stride] =

1157

warp_interpolate(ref, out[0], out[1], width, height, stride);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

}

}

}

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

1162

/* The warp filter for ROTZOOM and AFFINE models works as follows:

1163

* Split the input into 8x8 blocks

1164

* For each block, project the point (4, 4) within the block, to get the

1165

overall block position. Split into integer and fractional coordinates,

1166

maintaining full WARPEDMODEL precision

1167

* Filter horizontally: Generate 15 rows of 8 pixels each. Each pixel gets a

1168

variable horizontal offset. This means that, while the rows of the

1169

intermediate buffer align with the rows of the *reference* image, the

1170

columns align with the columns of the *destination* image.

1171

* Filter vertically: Generate the output block (up to 8x8 pixels, but if the

1172

destination is too small we crop the output at this stage). Each pixel has

1173

a variable vertical offset, so that the resulting rows are aligned with

1174

the rows of the destination image.

1175

1176

To accomplish these alignments, we factor the warp matrix as a

1177

product of two shear / asymmetric zoom matrices:

1178

/ a b \ = / 1 0 \ * / 1+alpha beta \

1179

\ c d / \ gamma 1+delta / \ 0 1 /

1180

where a, b, c, d are wmmat[2], wmmat[3], wmmat[4], wmmat[5] respectively.

1181

The second shear (with alpha and beta) is applied by the horizontal filter,

1182

then the first shear (with gamma and delta) is applied by the vertical

1183

filter.

1184

1185

The only limitation is that, to fit this in a fixed 8-tap filter size,

1186

the fractional pixel offsets must be at most +-1. Since the horizontal filter

1187

generates 15 rows of 8 columns, and the initial point we project is at (4, 4)

1188

within the block, the parameters must satisfy

1189

4 * |alpha| + 7 * |beta| <= 1 and 4 * |gamma| + 7 * |delta| <= 1

1190

for this filter to be applicable.

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1191

1192

Note: warp_affine() assumes that the caller has done all of the relevant

1193

checks, ie. that we have a ROTZOOM or AFFINE model, that wm[4] and wm[5]

1194

are set appropriately (if using a ROTZOOM model), and that alpha, beta,

1195

gamma, delta are all in range.

1196

1197

TODO(david.barker): Maybe support scaled references?

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

1198

*/

David Barker

838367d

2017-01-12 15:13:28 +0000

[diff] [blame]

1199

void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,

1200

int stride, uint8_t *pred, int p_col, int p_row,

1201

int p_width, int p_height, int p_stride,

1202

int subsampling_x, int subsampling_y, int ref_frm,

Debargha Mukherjee

2017-04-10 11:17:16 -0700

[diff] [blame]

1203

int16_t alpha, int16_t beta, int16_t gamma,

1204

int16_t delta) {

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

int16_t tmp[15 * 8];

int i, j, k, l, m;

/* Note: For this code to work, the left/right frame borders need to be

1209

extended by at least 13 pixels each. By the time we get here, other

1210

code will have set up this border, but we allow an explicit check

1211

for debugging purposes.

1212

*/

1213

/*for (i = 0; i < height; ++i) {

1214

for (j = 0; j < 13; ++j) {

1215

assert(ref[i * stride - 13 + j] == ref[i * stride]);

1216

assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);

}

}*/

for (i = p_row; i < p_row + p_height; i += 8) {

1221

for (j = p_col; j < p_col + p_width; j += 8) {

1222

int32_t x4, y4, ix4, sx4, iy4, sy4;

1223

if (subsampling_x)

1224

x4 = ROUND_POWER_OF_TWO_SIGNED(

1225

mat[2] * 2 * (j + 4) + mat[3] * 2 * (i + 4) + mat[0] +

1226

(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,

1227

1);

1228

else

1229

x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];

1230

1231

if (subsampling_y)

1232

y4 = ROUND_POWER_OF_TWO_SIGNED(

1233

mat[4] * 2 * (j + 4) + mat[5] * 2 * (i + 4) + mat[1] +

1234

(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,

1235

1);

1236

else

1237

y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];

1238

1239

ix4 = x4 >> WARPEDMODEL_PREC_BITS;

1240

sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);

1241

iy4 = y4 >> WARPEDMODEL_PREC_BITS;

1242

sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);

1243

1244

// Horizontal filter

1245

for (k = -7; k < 8; ++k) {

int iy = iy4 + k;

if (iy < 0)

iy = 0;

else if (iy > height - 1)

iy = height - 1;

if (ix4 <= -7) {

// In this case, the rightmost pixel sampled is in column

1254

// ix4 + 3 + 7 - 3 = ix4 + 7 <= 0, ie. the entire block

1255

// will sample only from the leftmost column

1256

// (once border extension is taken into account)

1257

for (l = 0; l < 8; ++l) {

1258

tmp[(k + 7) * 8 + l] =

Debargha Mukherjee

2017-04-04 04:54:07 -0700

[diff] [blame]

1259

ref[iy * stride] *

1260

(1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1261

}

1262

} else if (ix4 >= width + 6) {

1263

// In this case, the leftmost pixel sampled is in column

David Barker

1379746

2017-01-23 10:50:11 +0000

[diff] [blame]

1264

// ix4 - 4 + 0 - 3 = ix4 - 7 >= width - 1, ie. the entire block

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1265

// will sample only from the rightmost column

1266

// (once border extension is taken into account)

1267

for (l = 0; l < 8; ++l) {

1268

tmp[(k + 7) * 8 + l] =

Debargha Mukherjee

2017-04-04 04:54:07 -0700

[diff] [blame]

1269

ref[iy * stride + (width - 1)] *

1270

(1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1271

}

1272

} else {

1273

// If we get here, then

1274

// the leftmost pixel sampled is

1275

// ix4 - 4 + 0 - 3 = ix4 - 7 >= -13

1276

// and the rightmost pixel sampled is at most

1277

// ix4 + 3 + 7 - 3 = ix4 + 7 <= width + 12

1278

// So, assuming that border extension has been done, we

1279

// don't need to explicitly clamp values.

1280

int sx = sx4 + alpha * (-4) + beta * k;

1281

1282

for (l = -4; l < 4; ++l) {

1283

int ix = ix4 + l - 3;

1284

// At this point, sx = sx4 + alpha * l + beta * k

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1285

const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +

1286

WARPEDPIXEL_PREC_SHIFTS;

1287

const int16_t *coeffs = warped_filter[offs];

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1288

int32_t sum = 0;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1289

// assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1290

for (m = 0; m < 8; ++m) {

1291

sum += ref[iy * stride + ix + m] * coeffs[m];

1292

}

Debargha Mukherjee

2017-04-04 04:54:07 -0700

[diff] [blame]

1293

sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1294

tmp[(k + 7) * 8 + (l + 4)] = saturate_int16(sum);

sx += alpha;

}

}

}

// Vertical filter

for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {

1302

int sy = sy4 + gamma * (-4) + delta * k;

1303

for (l = -4; l < 4; ++l) {

1304

uint8_t *p =

1305

&pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];

1306

// At this point, sy = sy4 + gamma * l + delta * k

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1307

const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +

1308

WARPEDPIXEL_PREC_SHIFTS;

1309

const int16_t *coeffs = warped_filter[offs];

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1310

int32_t sum = 0;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1311

// assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1312

for (m = 0; m < 8; ++m) {

1313

sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];

1314

}

Debargha Mukherjee

2017-04-04 04:54:07 -0700

[diff] [blame]

1315

sum = clip_pixel(ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS));

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1316

if (ref_frm)

1317

*p = ROUND_POWER_OF_TWO(*p + sum, 1);

else

*p = sum;

sy += gamma;

}

}

}

}

}

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

1327

static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,

1328

int height, int stride, uint8_t *pred, int p_col,

1329

int p_row, int p_width, int p_height, int p_stride,

1330

int subsampling_x, int subsampling_y, int x_scale,

1331

int y_scale, int ref_frm) {

1332

if (wm->wmtype == ROTZOOM) {

1333

wm->wmmat[5] = wm->wmmat[2];

1334

wm->wmmat[4] = -wm->wmmat[3];

1335

}

David Barker

2017-01-10 15:06:08 +0000

[diff] [blame]

1336

if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&

1337

y_scale == 16) {

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

1338

int32_t *mat = wm->wmmat;

Debargha Mukherjee

2017-04-10 11:17:16 -0700

[diff] [blame]

1339

const int16_t alpha = wm->alpha;

1340

const int16_t beta = wm->beta;

1341

const int16_t gamma = wm->gamma;

1342

const int16_t delta = wm->delta;

David Barker

fa19516

2017-01-06 15:58:03 +0000

[diff] [blame]

1343

David Barker

838367d

2017-01-12 15:13:28 +0000

[diff] [blame]

1344

av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,

1345

p_width, p_height, p_stride, subsampling_x, subsampling_y,

1346

ref_frm, alpha, beta, gamma, delta);

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

1347

} else {

1348

warp_plane_old(wm, ref, width, height, stride, pred, p_col, p_row, p_width,

1349

p_height, p_stride, subsampling_x, subsampling_y, x_scale,

y_scale, ref_frm);

}

}

static double warp_erroradv(WarpedMotionParams *wm, uint8_t *ref, int width,

1355

int height, int stride, uint8_t *dst, int p_col,

1356

int p_row, int p_width, int p_height, int p_stride,

1357

int subsampling_x, int subsampling_y, int x_scale,

1358

int y_scale) {

1359

int gm_err = 0, no_gm_err = 0;

1360

int gm_sumerr = 0, no_gm_sumerr = 0;

1361

int i, j;

1362

uint8_t *tmp = aom_malloc(p_width * p_height);

1363

warp_plane(wm, ref, width, height, stride, tmp, p_col, p_row, p_width,

1364

p_height, p_width, subsampling_x, subsampling_y, x_scale, y_scale,

1365

0);

1366

1367

for (i = 0; i < p_height; ++i) {

1368

for (j = 0; j < p_width; ++j) {

1369

gm_err = dst[j + i * p_stride] - tmp[j + i * p_width];

David Barker

2016-12-19 10:31:00 +0000

[diff] [blame]

1370

no_gm_err =

1371

dst[j + i * p_stride] - ref[(j + p_col) + (i + p_row) * stride];

David Barker

2016-12-13 16:40:31 +0000

[diff] [blame]

1372

gm_sumerr += error_measure(gm_err);

1373

no_gm_sumerr += error_measure(no_gm_err);

}

}

aom_free(tmp);

return (double)gm_sumerr / no_gm_sumerr;

1379

}

1380

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1381

double av1_warp_erroradv(WarpedMotionParams *wm,

Sebastien Alaiwan

2017-04-12 16:03:28 +0200

[diff] [blame]

1382

#if CONFIG_HIGHBITDEPTH

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1383

int use_hbd, int bd,

Sebastien Alaiwan

2017-04-12 16:03:28 +0200

[diff] [blame]

1384

#endif // CONFIG_HIGHBITDEPTH

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1385

uint8_t *ref, int width, int height, int stride,

1386

uint8_t *dst, int p_col, int p_row, int p_width,

1387

int p_height, int p_stride, int subsampling_x,

1388

int subsampling_y, int x_scale, int y_scale) {

Debargha Mukherjee

2017-03-30 08:22:00 -0700

[diff] [blame]

1389

if (wm->wmtype <= AFFINE)

1390

if (!get_shear_params(wm)) return 1;

Sebastien Alaiwan

2017-04-12 16:03:28 +0200

[diff] [blame]

1391

#if CONFIG_HIGHBITDEPTH

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1392

if (use_hbd)

1393

return highbd_warp_erroradv(

1394

wm, ref, width, height, stride, dst, p_col, p_row, p_width, p_height,

1395

p_stride, subsampling_x, subsampling_y, x_scale, y_scale, bd);

Sebastien Alaiwan

2017-04-12 16:03:28 +0200

[diff] [blame]

1396

#endif // CONFIG_HIGHBITDEPTH

Sarah Parker

2016-09-06 11:25:04 -0700

[diff] [blame]

1397

return warp_erroradv(wm, ref, width, height, stride, dst, p_col, p_row,

1398

p_width, p_height, p_stride, subsampling_x,

1399

subsampling_y, x_scale, y_scale);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1400

}

1401

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1402

void av1_warp_plane(WarpedMotionParams *wm,

Sebastien Alaiwan

2017-04-12 16:03:28 +0200

[diff] [blame]

1403

#if CONFIG_HIGHBITDEPTH

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1404

int use_hbd, int bd,

Sebastien Alaiwan

2017-04-12 16:03:28 +0200

[diff] [blame]

1405

#endif // CONFIG_HIGHBITDEPTH

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1406

uint8_t *ref, int width, int height, int stride,

1407

uint8_t *pred, int p_col, int p_row, int p_width,

1408

int p_height, int p_stride, int subsampling_x,

Sarah Parker

2016-10-21 17:06:37 -0700

[diff] [blame]

1409

int subsampling_y, int x_scale, int y_scale, int ref_frm) {

Sebastien Alaiwan

2017-04-12 16:03:28 +0200

[diff] [blame]

1410

#if CONFIG_HIGHBITDEPTH

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1411

if (use_hbd)

1412

highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,

1413

p_width, p_height, p_stride, subsampling_x, subsampling_y,

Sarah Parker

2016-10-21 17:06:37 -0700

[diff] [blame]

1414

x_scale, y_scale, bd, ref_frm);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1415

else

Sebastien Alaiwan

2017-04-12 16:03:28 +0200

[diff] [blame]

1416

#endif // CONFIG_HIGHBITDEPTH

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1417

warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,

1418

p_height, p_stride, subsampling_x, subsampling_y, x_scale,

Sarah Parker

2016-10-21 17:06:37 -0700

[diff] [blame]

1419

y_scale, ref_frm);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1420

}

1421

Yue Chen

2017-01-03 11:51:38 -0800

[diff] [blame]

1422

#if CONFIG_WARPED_MOTION

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1423

#define LEAST_SQUARES_ORDER 2

Yue Chen

2017-03-31 12:24:42 -0700

[diff] [blame]

1424

Debargha Mukherjee

d49c5c4

2017-04-07 15:46:03 -0700

[diff] [blame]

1425

#define LS_MV_MAX 256 // max mv in 1/8-pel

Yue Chen

2017-03-31 12:24:42 -0700

[diff] [blame]

1426

#define LS_STEP 2

1427

Debargha Mukherjee

2017-04-11 15:20:56 -0700

[diff] [blame]

1428

// Assuming LS_MV_MAX is < MAX_SB_SIZE * 8,

1429

// the precision needed is:

1430

// (MAX_SB_SIZE_LOG2 + 3) [for sx * sx magnitude] +

1431

// (MAX_SB_SIZE_LOG2 + 4) [for sx * dx magnitude] +

1432

// 1 [for sign] +

1433

// LEAST_SQUARES_SAMPLES_MAX_BITS

1434

// [for adding up to LEAST_SQUARES_SAMPLES_MAX samples]

1435

// The value is 23

1436

#define LS_MAT_RANGE_BITS \

1437

((MAX_SB_SIZE_LOG2 + 4) * 2 + LEAST_SQUARES_SAMPLES_MAX_BITS)

1438

1439

// Bit-depth reduction from the full-range

1440

#define LS_MAT_DOWN_BITS 2

1441

1442

// bits range of A, Bx and By after downshifting

1443

#define LS_MAT_BITS (LS_MAT_RANGE_BITS - LS_MAT_DOWN_BITS)

1444

#define LS_MAT_MIN (-(1 << (LS_MAT_BITS - 1)))

1445

#define LS_MAT_MAX ((1 << (LS_MAT_BITS - 1)) - 1)

1446

Yue Chen

2017-03-31 12:24:42 -0700

[diff] [blame]

1447

#define LS_SUM(a) ((a)*4 + LS_STEP * 2)

Debargha Mukherjee

f2f3bcd

2017-04-04 04:17:52 -0700

[diff] [blame]

1448

#define LS_SQUARE(a) \

1449

(((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> 2)

Yue Chen

2017-03-31 12:24:42 -0700

[diff] [blame]

1450

#define LS_PRODUCT1(a, b) \

Debargha Mukherjee

f2f3bcd

2017-04-04 04:17:52 -0700

[diff] [blame]

1451

(((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP) >> 2)

Yue Chen

2017-03-31 12:24:42 -0700

[diff] [blame]

1452

#define LS_PRODUCT2(a, b) \

Debargha Mukherjee

f2f3bcd

2017-04-04 04:17:52 -0700

[diff] [blame]

1453

(((a) * (b)*4 + ((a) + (b)) * 2 * LS_STEP + LS_STEP * LS_STEP * 2) >> 2)

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1454

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1455

#if LEAST_SQUARES_ORDER == 2

1456

static int find_affine_int(const int np, int *pts1, int *pts2, BLOCK_SIZE bsize,

1457

int mvy, int mvx, WarpedMotionParams *wm, int mi_row,

1458

int mi_col) {

1459

int32_t A[2][2] = { { 0, 0 }, { 0, 0 } };

1460

int32_t Bx[2] = { 0, 0 };

1461

int32_t By[2] = { 0, 0 };

Yue Chen

2017-03-31 12:24:42 -0700

[diff] [blame]

1462

int i, n = 0;

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1463

1464

const int bw = block_size_wide[bsize];

1465

const int bh = block_size_high[bsize];

1466

const int suy = (mi_row * MI_SIZE + AOMMAX(bh, MI_SIZE) / 2 - 1) * 8;

1467

const int sux = (mi_col * MI_SIZE + AOMMAX(bw, MI_SIZE) / 2 - 1) * 8;

1468

const int duy = suy + mvy;

1469

const int dux = sux + mvx;

1470

1471

// Assume the center pixel of the block has exactly the same motion vector

1472

// as transmitted for the block. First shift the origin of the source

1473

// points to the block center, and the origin of the destination points to

1474

// the block center added to the motion vector transmitted.

1475

// Let (xi, yi) denote the source points and (xi', yi') denote destination

1476

// points after origin shfifting, for i = 0, 1, 2, .... n-1.

1477

// Then if P = [x0, y0,

// x1, y1

// x2, y1,

// ....

// ]

// q = [x0', x1', x2', ... ]'

1483

// r = [y0', y1', y2', ... ]'

1484

// the least squares problems that need to be solved are:

1485

// [h1, h2]' = inv(P'P)P'q and

1486

// [h3, h4]' = inv(P'P)P'r

1487

// where the affine transformation is given by:

// x' = h1.x + h2.y

// y' = h3.x + h4.y

//

// The loop below computes: A = P'P, Bx = P'q, By = P'r

1492

// We need to just compute inv(A).Bx and inv(A).By for the solutions.

Yue Chen

2017-03-31 12:24:42 -0700

[diff] [blame]

1493

int sx, sy, dx, dy;

1494

// Contribution from neighbor block

1495

for (i = 0; i < np && n < LEAST_SQUARES_SAMPLES_MAX; i++) {

1496

dx = pts2[i * 2] - dux;

1497

dy = pts2[i * 2 + 1] - duy;

1498

sx = pts1[i * 2] - sux;

1499

sy = pts1[i * 2 + 1] - suy;

1500

if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {

1501

A[0][0] += LS_SQUARE(sx);

1502

A[0][1] += LS_PRODUCT1(sx, sy);

1503

A[1][1] += LS_SQUARE(sy);

1504

Bx[0] += LS_PRODUCT2(sx, dx);

1505

Bx[1] += LS_PRODUCT1(sy, dx);

1506

By[0] += LS_PRODUCT1(sx, dy);

1507

By[1] += LS_PRODUCT2(sy, dy);

1508

n++;

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1509

}

1510

}

Debargha Mukherjee

2017-04-11 15:20:56 -0700

[diff] [blame]

1511

int downshift;

1512

if (n >= 4)

1513

downshift = LS_MAT_DOWN_BITS;

1514

else if (n >= 2)

1515

downshift = LS_MAT_DOWN_BITS - 1;

1516

else

1517

downshift = LS_MAT_DOWN_BITS - 2;

1518

1519

// Reduce precision by downshift bits

1520

A[0][0] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][0], downshift), LS_MAT_MIN,

1521

LS_MAT_MAX);

1522

A[0][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][1], downshift), LS_MAT_MIN,

1523

LS_MAT_MAX);

1524

A[1][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[1][1], downshift), LS_MAT_MIN,

1525

LS_MAT_MAX);

1526

Bx[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[0], downshift), LS_MAT_MIN,

1527

LS_MAT_MAX);

1528

Bx[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[1], downshift), LS_MAT_MIN,

1529

LS_MAT_MAX);

1530

By[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[0], downshift), LS_MAT_MIN,

1531

LS_MAT_MAX);

1532

By[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[1], downshift), LS_MAT_MIN,

1533

LS_MAT_MAX);

1534

1535

int64_t Px[2], Py[2], Det;

1536

int16_t iDet, shift;

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1537

1538

// These divided by the Det, are the least squares solutions

1539

Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1];

1540

Px[1] = -(int64_t)A[0][1] * Bx[0] + (int64_t)A[0][0] * Bx[1];

1541

Py[0] = (int64_t)A[1][1] * By[0] - (int64_t)A[0][1] * By[1];

1542

Py[1] = -(int64_t)A[0][1] * By[0] + (int64_t)A[0][0] * By[1];

1543

1544

// Compute Determinant of A

1545

Det = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1];

1546

if (Det == 0) return 1;

1547

iDet = resolve_divisor_64(labs(Det), &shift) * (Det < 0 ? -1 : 1);

Debargha Mukherjee

65bd6da

2017-04-06 09:46:35 -0700

[diff] [blame]

1548

shift -= WARPEDMODEL_PREC_BITS;

1549

if (shift < 0) {

1550

iDet <<= (-shift);

1551

shift = 0;

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1552

}

1553

Debargha Mukherjee

2017-04-11 15:20:56 -0700

[diff] [blame]

1554

int64_t v;

1555

v = Px[0] * (int64_t)iDet;

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1556

wm->wmmat[2] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);

Debargha Mukherjee

2017-04-11 15:20:56 -0700

[diff] [blame]

1557

v = Px[1] * (int64_t)iDet;

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1558

wm->wmmat[3] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);

1559

v = (dux << WARPEDMODEL_PREC_BITS) - sux * wm->wmmat[2] - suy * wm->wmmat[3];

1560

wm->wmmat[0] = ROUND_POWER_OF_TWO_SIGNED(v, 3);

1561

Debargha Mukherjee

2017-04-11 15:20:56 -0700

[diff] [blame]

1562

v = Py[0] * (int64_t)iDet;

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1563

wm->wmmat[4] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);

Debargha Mukherjee

2017-04-11 15:20:56 -0700

[diff] [blame]

1564

v = Py[1] * (int64_t)iDet;

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1565

wm->wmmat[5] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);

1566

v = (duy << WARPEDMODEL_PREC_BITS) - sux * wm->wmmat[4] - suy * wm->wmmat[5];

1567

wm->wmmat[1] = ROUND_POWER_OF_TWO_SIGNED(v, 3);

1568

1569

wm->wmmat[6] = wm->wmmat[7] = 0;

Debargha Mukherjee

1e6e130

2017-04-07 15:27:53 -0700

[diff] [blame]

1570

1571

// Clamp values

1572

wm->wmmat[0] = clamp(wm->wmmat[0], -WARPEDMODEL_TRANS_CLAMP,

1573

WARPEDMODEL_TRANS_CLAMP - 1);

1574

wm->wmmat[1] = clamp(wm->wmmat[1], -WARPEDMODEL_TRANS_CLAMP,

1575

WARPEDMODEL_TRANS_CLAMP - 1);

1576

wm->wmmat[2] = clamp(wm->wmmat[2], -WARPEDMODEL_DIAGAFFINE_CLAMP,

1577

WARPEDMODEL_DIAGAFFINE_CLAMP - 1);

1578

wm->wmmat[5] = clamp(wm->wmmat[5], -WARPEDMODEL_DIAGAFFINE_CLAMP,

1579

WARPEDMODEL_DIAGAFFINE_CLAMP - 1);

1580

wm->wmmat[3] = clamp(wm->wmmat[3], -WARPEDMODEL_NONDIAGAFFINE_CLAMP,

1581

WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);

1582

wm->wmmat[4] = clamp(wm->wmmat[4], -WARPEDMODEL_NONDIAGAFFINE_CLAMP,

1583

WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

return 0;

}

#else

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1588

Debargha Mukherjee

2017-03-22 17:38:38 -0700

[diff] [blame]

1589

static int find_affine_int(const int np, int *pts1, int *pts2, BLOCK_SIZE bsize,

1590

int mvy, int mvx, WarpedMotionParams *wm, int mi_row,

1591

int mi_col) {

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1592

int32_t A[3][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };

1593

int32_t Bx[3] = { 0, 0, 0 };

1594

int32_t By[3] = { 0, 0, 0 };

Yue Chen

2017-03-31 12:24:42 -0700

[diff] [blame]

1595

int i, n = 0, off;

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1596

1597

int64_t C00, C01, C02, C11, C12, C22;

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1598

int64_t Px[3], Py[3];

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1599

int64_t Det, v;

Debargha Mukherjee

2017-03-22 17:38:38 -0700

[diff] [blame]

1600

const int bw = block_size_wide[bsize];

1601

const int bh = block_size_high[bsize];

1602

const int cy_offset = AOMMAX(bh, MI_SIZE) / 2 - 1;

1603

const int cx_offset = AOMMAX(bw, MI_SIZE) / 2 - 1;

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1604

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1605

// Offsets to make the values in the arrays smaller

Debargha Mukherjee

246d273

2017-02-27 14:09:18 -0800

[diff] [blame]

1606

const int ux = mi_col * MI_SIZE * 8, uy = mi_row * MI_SIZE * 8;

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1607

// Let source points (xi, yi) map to destimation points (xi', yi'),

1608

// for i = 0, 1, 2, .... n-1

1609

// Then if P = [x0, y0, 1,

// x1, y1, 1

// x2, y2, 1,

// ....

// ]

// q = [x0', x1', x2', ... ]'

1615

// r = [y0', y1', y2', ... ]'

1616

// the least squares problems that need to be solved are:

1617

// [h1, h2, dx]' = inv(P'P)P'q and

1618

// [h3, h4, dy]' = inv(P'P)P'r

1619

// where the affine transformation is given by:

1620

// x' = h1.x + h2.y + dx

1621

// y' = h3.x + h4.y + dy

1622

//

1623

// The loop below computes: A = P'P, Bx = P'q, By = P'r

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1624

// We need to just compute inv(A).Bx and inv(A).By for the solutions.

1625

//

Yue Chen

2017-03-31 12:24:42 -0700

[diff] [blame]

1626

int sx, sy, dx, dy;

1627

// Contribution from sample in current block

sx = cx_offset * 8;

sy = cy_offset * 8;

dx = sx + mvx;

dy = sy + mvy;

if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {

1633

A[0][0] += LS_SQUARE(sx);

1634

A[0][1] += LS_PRODUCT1(sx, sy);

1635

A[0][2] += LS_SUM(sx);

1636

A[1][1] += LS_SQUARE(sy);

1637

A[1][2] += LS_SUM(sy);

1638

A[2][2] += 4;

1639

Bx[0] += LS_PRODUCT2(sx, dx);

1640

Bx[1] += LS_PRODUCT1(sy, dx);

1641

Bx[2] += LS_SUM(dx);

1642

By[0] += LS_PRODUCT1(sx, dy);

1643

By[1] += LS_PRODUCT2(sy, dy);

By[2] += LS_SUM(dy);

n++;

}

// Contribution from neighbor block

1648

for (i = 0; i < np && n < LEAST_SQUARES_SAMPLES_MAX; i++) {

1649

dx = pts2[i * 2] - ux;

1650

dy = pts2[i * 2 + 1] - uy;

1651

sx = pts1[i * 2] - ux;

1652

sy = pts1[i * 2 + 1] - uy;

1653

if (abs(sx - dx) < LS_MV_MAX && abs(sy - dy) < LS_MV_MAX) {

1654

A[0][0] += LS_SQUARE(sx);

1655

A[0][1] += LS_PRODUCT1(sx, sy);

1656

A[0][2] += LS_SUM(sx);

1657

A[1][1] += LS_SQUARE(sy);

1658

A[1][2] += LS_SUM(sy);

1659

A[2][2] += 4;

1660

Bx[0] += LS_PRODUCT2(sx, dx);

1661

Bx[1] += LS_PRODUCT1(sy, dx);

1662

Bx[2] += LS_SUM(dx);

1663

By[0] += LS_PRODUCT1(sx, dy);

1664

By[1] += LS_PRODUCT2(sy, dy);

1665

By[2] += LS_SUM(dy);

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1666

n++;

1667

}

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1668

}

1669

// Compute Cofactors of A

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1670

C00 = (int64_t)A[1][1] * A[2][2] - (int64_t)A[1][2] * A[1][2];

1671

C01 = (int64_t)A[1][2] * A[0][2] - (int64_t)A[0][1] * A[2][2];

1672

C02 = (int64_t)A[0][1] * A[1][2] - (int64_t)A[0][2] * A[1][1];

1673

C11 = (int64_t)A[0][0] * A[2][2] - (int64_t)A[0][2] * A[0][2];

1674

C12 = (int64_t)A[0][1] * A[0][2] - (int64_t)A[0][0] * A[1][2];

1675

C22 = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1];

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1676

Debargha Mukherjee

2017-04-11 15:20:56 -0700

[diff] [blame]

1677

// Scale by 1/64

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1678

C00 = ROUND_POWER_OF_TWO_SIGNED(C00, 6);

1679

C01 = ROUND_POWER_OF_TWO_SIGNED(C01, 6);

1680

C02 = ROUND_POWER_OF_TWO_SIGNED(C02, 6);

1681

C11 = ROUND_POWER_OF_TWO_SIGNED(C11, 6);

1682

C12 = ROUND_POWER_OF_TWO_SIGNED(C12, 6);

1683

C22 = ROUND_POWER_OF_TWO_SIGNED(C22, 6);

1684

1685

// Compute Determinant of A

1686

Det = C00 * A[0][0] + C01 * A[0][1] + C02 * A[0][2];

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1687

if (Det == 0) return 1;

1688

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1689

// These divided by the Det, are the least squares solutions

1690

Px[0] = C00 * Bx[0] + C01 * Bx[1] + C02 * Bx[2];

1691

Px[1] = C01 * Bx[0] + C11 * Bx[1] + C12 * Bx[2];

1692

Px[2] = C02 * Bx[0] + C12 * Bx[1] + C22 * Bx[2];

1693

Py[0] = C00 * By[0] + C01 * By[1] + C02 * By[2];

1694

Py[1] = C01 * By[0] + C11 * By[1] + C12 * By[2];

1695

Py[2] = C02 * By[0] + C12 * By[1] + C22 * By[2];

1696

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1697

int16_t shift;

1698

int64_t iDet;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1699

iDet = resolve_divisor_64(labs(Det), &shift) * (Det < 0 ? -1 : 1);

Debargha Mukherjee

65bd6da

2017-04-06 09:46:35 -0700

[diff] [blame]

1700

shift -= WARPEDMODEL_PREC_BITS;

1701

if (shift < 0) {

1702

iDet <<= (-shift);

1703

shift = 0;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1704

}

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1705

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1706

v = Px[0] * iDet;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1707

wm->wmmat[2] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1708

v = Px[1] * iDet;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1709

wm->wmmat[3] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1710

v = Px[2] * iDet;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1711

wm->wmmat[0] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift + 3);

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1712

// Adjust x displacement for the offset

1713

off = (ux << WARPEDMODEL_PREC_BITS) - ux * wm->wmmat[2] - uy * wm->wmmat[3];

1714

wm->wmmat[0] += ROUND_POWER_OF_TWO_SIGNED(off, 3);

1715

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1716

v = Py[0] * iDet;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1717

wm->wmmat[4] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1718

v = Py[1] * iDet;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1719

wm->wmmat[5] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);

Debargha Mukherjee

2017-03-01 10:44:46 -0800

[diff] [blame]

1720

v = Py[2] * iDet;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1721

wm->wmmat[1] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift + 3);

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1722

// Adjust y displacement for the offset

1723

off = (uy << WARPEDMODEL_PREC_BITS) - ux * wm->wmmat[4] - uy * wm->wmmat[5];

1724

wm->wmmat[1] += ROUND_POWER_OF_TWO_SIGNED(off, 3);

1725

wm->wmmat[6] = wm->wmmat[7] = 0;

Debargha Mukherjee

2017-03-16 07:10:41 -0700

[diff] [blame]

1726

Debargha Mukherjee

1e6e130

2017-04-07 15:27:53 -0700

[diff] [blame]

1727

// Clamp values

1728

wm->wmmat[0] = clamp(wm->wmmat[0], -WARPEDMODEL_TRANS_CLAMP,

1729

WARPEDMODEL_TRANS_CLAMP - 1);

1730

wm->wmmat[1] = clamp(wm->wmmat[1], -WARPEDMODEL_TRANS_CLAMP,

1731

WARPEDMODEL_TRANS_CLAMP - 1);

1732

wm->wmmat[2] = clamp(wm->wmmat[2], -WARPEDMODEL_DIAGAFFINE_CLAMP,

1733

WARPEDMODEL_DIAGAFFINE_CLAMP - 1);

1734

wm->wmmat[5] = clamp(wm->wmmat[5], -WARPEDMODEL_DIAGAFFINE_CLAMP,

1735

WARPEDMODEL_DIAGAFFINE_CLAMP - 1);

1736

wm->wmmat[3] = clamp(wm->wmmat[3], -WARPEDMODEL_NONDIAGAFFINE_CLAMP,

1737

WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);

1738

wm->wmmat[4] = clamp(wm->wmmat[4], -WARPEDMODEL_NONDIAGAFFINE_CLAMP,

1739

WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);

1740

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1741

return 0;

1742

}

Debargha Mukherjee

2017-03-23 06:10:18 -0700

[diff] [blame]

1743

#endif // LEAST_SQUARES_ORDER == 2

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1744

Debargha Mukherjee

2017-03-22 17:38:38 -0700

[diff] [blame]

1745

int find_projection(const int np, int *pts1, int *pts2, BLOCK_SIZE bsize,

1746

int mvy, int mvx, WarpedMotionParams *wm_params, int mi_row,

1747

int mi_col) {

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1748

int result = 1;

Yue Chen

2016-09-08 14:48:15 -0700

[diff] [blame]

1749

switch (wm_params->wmtype) {

Debargha Mukherjee

246d273

2017-02-27 14:09:18 -0800

[diff] [blame]

1750

case AFFINE:

Debargha Mukherjee

2017-03-22 17:38:38 -0700

[diff] [blame]

1751

result = find_affine_int(np, pts1, pts2, bsize, mvy, mvx, wm_params,

1752

mi_row, mi_col);

Debargha Mukherjee

246d273

2017-02-27 14:09:18 -0800

[diff] [blame]

1753

break;

Yue Chen

2016-09-08 14:48:15 -0700

[diff] [blame]

1754

default: assert(0 && "Invalid warped motion type!"); return 1;

1755

}

Yue Chen

2017-01-03 11:51:38 -0800

[diff] [blame]

1756

if (result == 0) {

Yue Chen

2017-01-03 11:51:38 -0800

[diff] [blame]

1757

if (wm_params->wmtype == ROTZOOM) {

1758

wm_params->wmmat[5] = wm_params->wmmat[2];

1759

wm_params->wmmat[4] = -wm_params->wmmat[3];

1760

}

Debargha Mukherjee

2017-02-26 08:50:56 -0800

[diff] [blame]

1761

if (wm_params->wmtype == AFFINE || wm_params->wmtype == ROTZOOM) {

1762

// check compatibility with the fast warp filter

Debargha Mukherjee

2017-03-30 08:22:00 -0700

[diff] [blame]

1763

if (!get_shear_params(wm_params)) return 1;

Yue Chen

2017-01-03 11:51:38 -0800

[diff] [blame]

1764

}

1765

}

Yue Chen

2016-09-08 14:48:15 -0700

[diff] [blame]

1766

1767

return result;

1768

}

Yue Chen