blob: c46a16f4a936aad1a151310d61eade74e0255054 [file] [log] [blame]
Monty Montgomery02078a32017-07-11 21:22:29 -04001#include "av1/common/daala_tx.h"
2#include "av1/common/odintrin.h"
3
4/* clang-format off */
5
6# define OD_DCT_RSHIFT(_a, _b) OD_UNBIASED_RSHIFT32(_a, _b)
7
8/* TODO: Daala DCT overflow checks need to be ported as a later test */
9# if defined(OD_DCT_CHECK_OVERFLOW)
10# else
11# define OD_DCT_OVERFLOW_CHECK(val, scale, offset, idx)
12# endif
13
Monty Montgomerycf18fe42017-07-11 21:33:25 -040014#define OD_FDCT_2(p0, p1) \
15 /* Embedded 2-point orthonormal Type-II fDCT. */ \
16 do { \
17 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
18 OD_DCT_OVERFLOW_CHECK(p1, 13573, 16384, 100); \
19 p0 -= (p1*13573 + 16384) >> 15; \
20 /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
21 OD_DCT_OVERFLOW_CHECK(p0, 5793, 4096, 101); \
22 p1 += (p0*5793 + 4096) >> 13; \
23 /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
24 OD_DCT_OVERFLOW_CHECK(p1, 3393, 4096, 102); \
25 p0 -= (p1*3393 + 4096) >> 13; \
26 } \
27 while (0)
28
29#define OD_IDCT_2(p0, p1) \
30 /* Embedded 2-point orthonormal Type-II iDCT. */ \
31 do { \
32 /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
33 p0 += (p1*3393 + 4096) >> 13; \
34 /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
35 p1 -= (p0*5793 + 4096) >> 13; \
36 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
37 p0 += (p1*13573 + 16384) >> 15; \
38 } \
39 while (0)
40
Monty Montgomery02078a32017-07-11 21:22:29 -040041#define OD_FDCT_2_ASYM(p0, p1, p1h) \
42 /* Embedded 2-point asymmetric Type-II fDCT. */ \
43 do { \
44 p0 += p1h; \
45 p1 = p0 - p1; \
46 } \
47 while (0)
48
49#define OD_IDCT_2_ASYM(p0, p1, p1h) \
50 /* Embedded 2-point asymmetric Type-II iDCT. */ \
51 do { \
52 p1 = p0 - p1; \
53 p1h = OD_DCT_RSHIFT(p1, 1); \
54 p0 -= p1h; \
55 } \
56 while (0)
57
Monty Montgomerycf18fe42017-07-11 21:33:25 -040058#define OD_FDST_2(p0, p1) \
59 /* Embedded 2-point orthonormal Type-IV fDST. */ \
60 do { \
61 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
62 OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 103); \
63 p0 -= (p1*10947 + 8192) >> 14; \
64 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
65 OD_DCT_OVERFLOW_CHECK(p0, 473, 256, 104); \
66 p1 += (p0*473 + 256) >> 9; \
67 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
68 OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 105); \
69 p0 -= (p1*10947 + 8192) >> 14; \
70 } \
71 while (0)
72
73#define OD_IDST_2(p0, p1) \
74 /* Embedded 2-point orthonormal Type-IV iDST. */ \
75 do { \
76 /* 10947/16384 ~= Tan[3*Pi/16]) ~= 0.668178637919299 */ \
77 p0 += (p1*10947 + 8192) >> 14; \
78 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
79 p1 -= (p0*473 + 256) >> 9; \
80 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
81 p0 += (p1*10947 + 8192) >> 14; \
82 } \
83 while (0)
84
Monty Montgomery02078a32017-07-11 21:22:29 -040085#define OD_FDST_2_ASYM(p0, p1) \
86 /* Embedded 2-point asymmetric Type-IV fDST. */ \
87 do { \
88 /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
89 OD_DCT_OVERFLOW_CHECK(p1, 11507, 8192, 187); \
90 p0 -= (p1*11507 + 8192) >> 14; \
91 /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
92 OD_DCT_OVERFLOW_CHECK(p0, 669, 512, 188); \
93 p1 += (p0*669 + 512) >> 10; \
94 /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
95 OD_DCT_OVERFLOW_CHECK(p1, 4573, 2048, 189); \
96 p0 -= (p1*4573 + 2048) >> 12; \
97 } \
98 while (0)
99
100#define OD_IDST_2_ASYM(p0, p1) \
101 /* Embedded 2-point asymmetric Type-IV iDST. */ \
102 do { \
103 /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
104 p0 += (p1*4573 + 2048) >> 12; \
105 /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
106 p1 -= (p0*669 + 512) >> 10; \
107 /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
108 p0 += (p1*11507 + 8192) >> 14; \
109 } \
110 while (0)
111
112#define OD_FDCT_4(q0, q2, q1, q3) \
113 /* Embedded 4-point orthonormal Type-II fDCT. */ \
114 do { \
115 int q2h; \
116 int q3h; \
117 q3 = q0 - q3; \
118 q3h = OD_DCT_RSHIFT(q3, 1); \
119 q0 -= q3h; \
120 q2 += q1; \
121 q2h = OD_DCT_RSHIFT(q2, 1); \
122 q1 = q2h - q1; \
123 OD_FDCT_2_ASYM(q0, q2, q2h); \
124 OD_FDST_2_ASYM(q3, q1); \
125 } \
126 while (0)
127
128#define OD_IDCT_4(q0, q2, q1, q3) \
129 /* Embedded 4-point orthonormal Type-II iDCT. */ \
130 do { \
131 int q1h; \
132 int q3h; \
133 OD_IDST_2_ASYM(q3, q2); \
134 OD_IDCT_2_ASYM(q0, q1, q1h); \
135 q3h = OD_DCT_RSHIFT(q3, 1); \
136 q0 += q3h; \
137 q3 = q0 - q3; \
138 q2 = q1h - q2; \
139 q1 -= q2; \
140 } \
141 while (0)
142
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400143#define OD_FDCT_4_ASYM(q0, q2, q2h, q1, q3, q3h) \
144 /* Embedded 4-point asymmetric Type-II fDCT. */ \
145 do { \
146 q0 += q3h; \
147 q3 = q0 - q3; \
148 q1 = q2h - q1; \
149 q2 = q1 - q2; \
150 OD_FDCT_2(q0, q2); \
151 OD_FDST_2(q3, q1); \
152 } \
153 while (0)
154
155#define OD_IDCT_4_ASYM(q0, q2, q1, q1h, q3, q3h) \
156 /* Embedded 4-point asymmetric Type-II iDCT. */ \
157 do { \
158 OD_IDST_2(q3, q2); \
159 OD_IDCT_2(q0, q1); \
160 q1 = q2 - q1; \
161 q1h = OD_DCT_RSHIFT(q1, 1); \
162 q2 = q1h - q2; \
163 q3 = q0 - q3; \
164 q3h = OD_DCT_RSHIFT(q3, 1); \
165 q0 -= q3h; \
166 } \
167 while (0)
168
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400169#define OD_FDST_4(q0, q2, q1, q3) \
170 /* Embedded 4-point orthonormal Type-IV fDST. */ \
171 do { \
172 int q0h; \
173 int q1h; \
174 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
175 OD_DCT_OVERFLOW_CHECK(q1, 13573, 16384, 190); \
176 q2 += (q1*13573 + 16384) >> 15; \
177 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
178 OD_DCT_OVERFLOW_CHECK(q2, 5793, 4096, 191); \
179 q1 -= (q2*5793 + 4096) >> 13; \
180 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
181 OD_DCT_OVERFLOW_CHECK(q1, 3393, 4096, 192); \
182 q2 += (q1*3393 + 4096) >> 13; \
183 q0 += q2; \
184 q0h = OD_DCT_RSHIFT(q0, 1); \
185 q2 = q0h - q2; \
186 q1 += q3; \
187 q1h = OD_DCT_RSHIFT(q1, 1); \
188 q3 -= q1h; \
189 /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
190 0.524455699240090 */ \
191 OD_DCT_OVERFLOW_CHECK(q1, 537, 512, 193); \
192 q2 -= (q1*537 + 512) >> 10; \
193 /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
194 OD_DCT_OVERFLOW_CHECK(q2, 1609, 1024, 194); \
195 q1 += (q2*1609 + 1024) >> 11; \
196 /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
197 0.223847182092655 */ \
198 OD_DCT_OVERFLOW_CHECK(q1, 7335, 16384, 195); \
199 q2 += (q1*7335 + 16384) >> 15; \
200 /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
201 0.6215036383171189 */ \
202 OD_DCT_OVERFLOW_CHECK(q0, 5091, 4096, 196); \
203 q3 += (q0*5091 + 4096) >> 13; \
204 /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
205 OD_DCT_OVERFLOW_CHECK(q3, 5681, 2048, 197); \
206 q0 -= (q3*5681 + 2048) >> 12; \
207 /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
208 0.52204745462729 */ \
209 OD_DCT_OVERFLOW_CHECK(q0, 4277, 4096, 198); \
210 q3 += (q0*4277 + 4096) >> 13; \
211 } \
212 while (0)
213
214#define OD_IDST_4(q0, q2, q1, q3) \
215 /* Embedded 4-point orthonormal Type-IV iDST. */ \
216 do { \
217 int q0h; \
218 int q2h; \
219 /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
220 0.52204745462729 */ \
221 q3 -= (q0*4277 + 4096) >> 13; \
222 /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
223 q0 += (q3*5681 + 2048) >> 12; \
224 /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
225 0.6215036383171189 */ \
226 q3 -= (q0*5091 + 4096) >> 13; \
227 /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
228 0.223847182092655 */ \
229 q1 -= (q2*7335 + 16384) >> 15; \
230 /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
231 q2 -= (q1*1609 + 1024) >> 11; \
232 /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
233 0.524455699240090 */ \
234 q1 += (q2*537 + 512) >> 10; \
235 q2h = OD_DCT_RSHIFT(q2, 1); \
236 q3 += q2h; \
237 q2 -= q3; \
238 q0h = OD_DCT_RSHIFT(q0, 1); \
239 q1 = q0h - q1; \
240 q0 -= q1; \
241 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
242 q1 -= (q2*3393 + 4096) >> 13; \
243 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
244 q2 += (q1*5793 + 4096) >> 13; \
245 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
246 q1 -= (q2*13573 + 16384) >> 15; \
247 } \
248 while (0)
249
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400250#define OD_FDST_4_ASYM(t0, t0h, t2, t1, t3) \
251 /* Embedded 4-point asymmetric Type-IV fDST. */ \
252 do { \
253 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
254 OD_DCT_OVERFLOW_CHECK(t1, 7489, 4096, 106); \
255 t2 -= (t1*7489 + 4096) >> 13; \
256 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
257 OD_DCT_OVERFLOW_CHECK(t1, 11585, 8192, 107); \
258 t1 += (t2*11585 + 8192) >> 14; \
259 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
260 OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 108); \
261 t2 += (t1*19195 + 16384) >> 15; \
262 t3 += OD_DCT_RSHIFT(t2, 1); \
263 t2 -= t3; \
264 t1 = t0h - t1; \
265 t0 -= t1; \
266 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
267 OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 109); \
268 t3 += (t0*6723 + 4096) >> 13; \
269 /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
270 OD_DCT_OVERFLOW_CHECK(t3, 8035, 4096, 110); \
271 t0 -= (t3*8035 + 4096) >> 13; \
272 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
273 OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 111); \
274 t3 += (t0*6723 + 4096) >> 13; \
275 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
276 OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 112); \
277 t2 += (t1*8757 + 8192) >> 14; \
278 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
279 OD_DCT_OVERFLOW_CHECK(t2, 6811, 4096, 113); \
280 t1 -= (t2*6811 + 4096) >> 13; \
281 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
282 OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 114); \
283 t2 += (t1*8757 + 8192) >> 14; \
284 } \
285 while (0)
286
287#define OD_IDST_4_ASYM(t0, t0h, t2, t1, t3) \
288 /* Embedded 4-point asymmetric Type-IV iDST. */ \
289 do { \
290 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
291 t1 -= (t2*8757 + 8192) >> 14; \
292 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
293 t2 += (t1*6811 + 4096) >> 13; \
294 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
295 t1 -= (t2*8757 + 8192) >> 14; \
296 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
297 t3 -= (t0*6723 + 4096) >> 13; \
298 /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
299 t0 += (t3*8035 + 4096) >> 13; \
300 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
301 t3 -= (t0*6723 + 4096) >> 13; \
302 t0 += t2; \
303 t0h = OD_DCT_RSHIFT(t0, 1); \
304 t2 = t0h - t2; \
305 t1 += t3; \
306 t3 -= OD_DCT_RSHIFT(t1, 1); \
307 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
308 t1 -= (t2*19195 + 16384) >> 15; \
309 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
310 t2 -= (t1*11585 + 8192) >> 14; \
311 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
312 t1 += (t2*7489 + 4096) >> 13; \
313 } \
314 while (0)
315
316#define OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
317 /* Embedded 8-point orthonormal Type-II fDCT. */ \
318 do { \
319 int r4h; \
320 int r5h; \
321 int r6h; \
322 int r7h; \
323 r7 = r0 - r7; \
324 r7h = OD_DCT_RSHIFT(r7, 1); \
325 r0 -= r7h; \
326 r6 += r1; \
327 r6h = OD_DCT_RSHIFT(r6, 1); \
328 r1 = r6h - r1; \
329 r5 = r2 - r5; \
330 r5h = OD_DCT_RSHIFT(r5, 1); \
331 r2 -= r5h; \
332 r4 += r3; \
333 r4h = OD_DCT_RSHIFT(r4, 1); \
334 r3 = r4h - r3; \
335 OD_FDCT_4_ASYM(r0, r4, r4h, r2, r6, r6h); \
336 OD_FDST_4_ASYM(r7, r7h, r3, r5, r1); \
337 } \
338 while (0)
339
340#define OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
341 /* Embedded 8-point orthonormal Type-II iDCT. */ \
342 do { \
343 int r1h; \
344 int r3h; \
345 int r5h; \
346 int r7h; \
347 OD_IDST_4_ASYM(r7, r7h, r5, r6, r4); \
348 OD_IDCT_4_ASYM(r0, r2, r1, r1h, r3, r3h); \
349 r0 += r7h; \
350 r7 = r0 - r7; \
351 r6 = r1h - r6; \
352 r1 -= r6; \
353 r5h = OD_DCT_RSHIFT(r5, 1); \
354 r2 += r5h; \
355 r5 = r2 - r5; \
356 r4 = r3h - r4; \
357 r3 -= r4; \
358 } \
359 while (0)
360
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400361#define OD_FDCT_8_ASYM(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \
362 /* Embedded 8-point asymmetric Type-II fDCT. */ \
363 do { \
364 r0 += r7h; \
365 r7 = r0 - r7; \
366 r1 = r6h - r1; \
367 r6 -= r1; \
368 r2 += r5h; \
369 r5 = r2 - r5; \
370 r3 = r4h - r3; \
371 r4 -= r3; \
372 OD_FDCT_4(r0, r4, r2, r6); \
373 OD_FDST_4(r7, r3, r5, r1); \
374 } \
375 while (0)
376
377#define OD_IDCT_8_ASYM(r0, r4, r2, r6, r1, r1h, r5, r5h, r3, r3h, r7, r7h) \
378 /* Embedded 8-point asymmetric Type-II iDCT. */ \
379 do { \
380 OD_IDST_4(r7, r5, r6, r4); \
381 OD_IDCT_4(r0, r2, r1, r3); \
382 r7 = r0 - r7; \
383 r7h = OD_DCT_RSHIFT(r7, 1); \
384 r0 -= r7h; \
385 r1 += r6; \
386 r1h = OD_DCT_RSHIFT(r1, 1); \
387 r6 = r1h - r6; \
388 r5 = r2 - r5; \
389 r5h = OD_DCT_RSHIFT(r5, 1); \
390 r2 -= r5h; \
391 r3 += r4; \
392 r3h = OD_DCT_RSHIFT(r3, 1); \
393 r4 = r3h - r4; \
394 } \
395 while (0)
396
397#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400398 /* Embedded 8-point orthonormal Type-IV fDST. */ \
399 do { \
400 int t0h; \
401 int t2h; \
402 int t5h; \
403 int t7h; \
404 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
405 OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 115); \
406 t6 -= (t1*13573 + 16384) >> 15; \
407 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
408 OD_DCT_OVERFLOW_CHECK(t6, 11585, 8192, 116); \
409 t1 += (t6*11585 + 8192) >> 14; \
410 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
411 OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 117); \
412 t6 -= (t1*13573 + 16384) >> 15; \
413 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
414 OD_DCT_OVERFLOW_CHECK(t2, 21895, 16384, 118); \
415 t5 -= (t2*21895 + 16384) >> 15; \
416 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
417 OD_DCT_OVERFLOW_CHECK(t5, 15137, 8192, 119); \
418 t2 += (t5*15137 + 8192) >> 14; \
419 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
420 OD_DCT_OVERFLOW_CHECK(t2, 10947, 8192, 120); \
421 t5 -= (t2*10947 + 8192) >> 14; \
422 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
423 OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 121); \
424 t4 -= (t3*3259 + 8192) >> 14; \
425 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
426 OD_DCT_OVERFLOW_CHECK(t4, 3135, 4096, 122); \
427 t3 += (t4*3135 + 4096) >> 13; \
428 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
429 OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 123); \
430 t4 -= (t3*3259 + 8192) >> 14; \
431 t7 += t1; \
432 t7h = OD_DCT_RSHIFT(t7, 1); \
433 t1 -= t7h; \
434 t2 = t3 - t2; \
435 t2h = OD_DCT_RSHIFT(t2, 1); \
436 t3 -= t2h; \
437 t0 -= t6; \
438 t0h = OD_DCT_RSHIFT(t0, 1); \
439 t6 += t0h; \
440 t5 = t4 - t5; \
441 t5h = OD_DCT_RSHIFT(t5, 1); \
442 t4 -= t5h; \
443 t1 += t5h; \
444 t5 = t1 - t5; \
445 t4 += t0h; \
446 t0 -= t4; \
447 t6 -= t2h; \
448 t2 += t6; \
449 t3 -= t7h; \
450 t7 += t3; \
451 /* TODO: Can we move this into another operation */ \
452 t7 = -t7; \
453 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
454 OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 124); \
455 t0 -= (t7*7425 + 4096) >> 13; \
456 /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
457 OD_DCT_OVERFLOW_CHECK(t0, 8153, 4096, 125); \
458 t7 += (t0*8153 + 4096) >> 13; \
459 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
460 OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 126); \
461 t0 -= (t7*7425 + 4096) >> 13; \
462 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
463 OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 127); \
464 t6 -= (t1*4861 + 16384) >> 15; \
465 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
466 OD_DCT_OVERFLOW_CHECK(t6, 1189, 2048, 128); \
467 t1 += (t6*1189 + 2048) >> 12; \
468 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
469 OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 129); \
470 t6 -= (t1*4861 + 16384) >> 15; \
471 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
472 OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 130); \
473 t2 -= (t5*2455 + 2048) >> 12; \
474 /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
475 OD_DCT_OVERFLOW_CHECK(t2, 7225, 4096, 131); \
476 t5 += (t2*7225 + 4096) >> 13; \
477 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
478 OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 132); \
479 t2 -= (t5*2455 + 2048) >> 12; \
480 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
481 OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 133); \
482 t4 -= (t3*11725 + 16384) >> 15; \
483 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
484 OD_DCT_OVERFLOW_CHECK(t4, 5197, 4096, 134); \
485 t3 += (t4*5197 + 4096) >> 13; \
486 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
487 OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 135); \
488 t4 -= (t3*11725 + 16384) >> 15; \
489 } \
490 while (0)
491
492#define OD_IDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
493 /* Embedded 8-point orthonormal Type-IV iDST. */ \
494 do { \
495 int t0h; \
496 int t2h; \
497 int t5h_; \
498 int t7h_; \
499 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
500 t1 += (t6*11725 + 16384) >> 15; \
501 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
502 t6 -= (t1*5197 + 4096) >> 13; \
503 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
504 t1 += (t6*11725 + 16384) >> 15; \
505 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
506 t2 += (t5*2455 + 2048) >> 12; \
507 /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
508 t5 -= (t2*7225 + 4096) >> 13; \
509 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
510 t2 += (t5*2455 + 2048) >> 12; \
511 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
512 t3 += (t4*4861 + 16384) >> 15; \
513 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
514 t4 -= (t3*1189 + 2048) >> 12; \
515 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
516 t3 += (t4*4861 + 16384) >> 15; \
517 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
518 t0 += (t7*7425 + 4096) >> 13; \
519 /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
520 t7 -= (t0*8153 + 4096) >> 13; \
521 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
522 t0 += (t7*7425 + 4096) >> 13; \
523 /* TODO: Can we move this into another operation */ \
524 t7 = -t7; \
525 t7 -= t6; \
526 t7h_ = OD_DCT_RSHIFT(t7, 1); \
527 t6 += t7h_; \
528 t2 -= t3; \
529 t2h = OD_DCT_RSHIFT(t2, 1); \
530 t3 += t2h; \
531 t0 += t1; \
532 t0h = OD_DCT_RSHIFT(t0, 1); \
533 t1 -= t0h; \
534 t5 = t4 - t5; \
535 t5h_ = OD_DCT_RSHIFT(t5, 1); \
536 t4 -= t5h_; \
537 t1 += t5h_; \
538 t5 = t1 - t5; \
539 t3 -= t0h; \
540 t0 += t3; \
541 t6 += t2h; \
542 t2 = t6 - t2; \
543 t4 += t7h_; \
544 t7 -= t4; \
545 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
546 t1 += (t6*3259 + 8192) >> 14; \
547 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
548 t6 -= (t1*3135 + 4096) >> 13; \
549 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
550 t1 += (t6*3259 + 8192) >> 14; \
551 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
552 t5 += (t2*10947 + 8192) >> 14; \
553 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
554 t2 -= (t5*15137 + 8192) >> 14; \
555 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
556 t5 += (t2*21895 + 16384) >> 15; \
557 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
558 t3 += (t4*13573 + 16384) >> 15; \
559 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
560 t4 -= (t3*11585 + 8192) >> 14; \
561 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
562 t3 += (t4*13573 + 16384) >> 15; \
563 } \
564 while (0)
565
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400566/* Rewrite this so that t0h can be passed in. */
567#define OD_FDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
568 /* Embedded 8-point asymmetric Type-IV fDST. */ \
569 do { \
570 int t0h; \
571 int t2h; \
572 int t5h; \
573 int t7h; \
574 /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
575 OD_DCT_OVERFLOW_CHECK(t1, 1035, 1024, 199); \
576 t6 += (t1*1035 + 1024) >> 11; \
577 /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
578 OD_DCT_OVERFLOW_CHECK(t6, 3675, 2048, 200); \
579 t1 -= (t6*3675 + 2048) >> 12; \
580 /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
581 OD_DCT_OVERFLOW_CHECK(t1, 851, 4096, 201); \
582 t6 -= (t1*851 + 4096) >> 13; \
583 /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
584 OD_DCT_OVERFLOW_CHECK(t2, 4379, 4096, 202); \
585 t5 += (t2*4379 + 4096) >> 13; \
586 /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
587 OD_DCT_OVERFLOW_CHECK(t5, 10217, 4096, 203); \
588 t2 -= (t5*10217 + 4096) >> 13; \
589 /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
590 OD_DCT_OVERFLOW_CHECK(t2, 4379, 8192, 204); \
591 t5 += (t2*4379 + 8192) >> 14; \
592 /* 12905/16384 ~= (Sqrt[2] - Cos[3*Pi/32])/(2*Sin[3*Pi/32]) */ \
593 OD_DCT_OVERFLOW_CHECK(t3, 12905, 8192, 205); \
594 t4 += (t3*12905 + 8192) >> 14; \
595 /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
596 OD_DCT_OVERFLOW_CHECK(t4, 3363, 4096, 206); \
597 t3 -= (t4*3363 + 4096) >> 13; \
598 /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
599 OD_DCT_OVERFLOW_CHECK(t3, 3525, 2048, 207); \
600 t4 -= (t3*3525 + 2048) >> 12; \
601 /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
602 OD_DCT_OVERFLOW_CHECK(t0, 5417, 4096, 208); \
603 t7 += (t0*5417 + 4096) >> 13; \
604 /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
605 OD_DCT_OVERFLOW_CHECK(t7, 5765, 2048, 209); \
606 t0 -= (t7*5765 + 2048) >> 12; \
607 /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
608 OD_DCT_OVERFLOW_CHECK(t0, 2507, 2048, 210); \
609 t7 += (t0*2507 + 2048) >> 12; \
610 t0 += t1; \
611 t0h = OD_DCT_RSHIFT(t0, 1); \
612 t1 -= t0h; \
613 t2 -= t3; \
614 t2h = OD_DCT_RSHIFT(t2, 1); \
615 t3 += t2h; \
616 t5 -= t4; \
617 t5h = OD_DCT_RSHIFT(t5, 1); \
618 t4 += t5h; \
619 t7 += t6; \
620 t7h = OD_DCT_RSHIFT(t7, 1); \
621 t6 = t7h - t6; \
622 t4 = t7h - t4; \
623 t7 -= t4; \
624 t1 += t5h; \
625 t5 = t1 - t5; \
626 t6 += t2h; \
627 t2 = t6 - t2; \
628 t3 -= t0h; \
629 t0 += t3; \
630 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
631 OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 211); \
632 t1 += (t6*3259 + 8192) >> 14; \
633 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
634 OD_DCT_OVERFLOW_CHECK(t1, 3135, 4096, 212); \
635 t6 -= (t1*3135 + 4096) >> 13; \
636 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
637 OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 213); \
638 t1 += (t6*3259 + 8192) >> 14; \
639 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
640 OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 214); \
641 t5 += (t2*2737 + 2048) >> 12; \
642 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
643 OD_DCT_OVERFLOW_CHECK(t5, 473, 256, 215); \
644 t2 -= (t5*473 + 256) >> 9; \
645 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
646 OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 216); \
647 t5 += (t2*2737 + 2048) >> 12; \
648 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
649 OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 217); \
650 t3 += (t4*3393 + 4096) >> 13; \
651 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
652 OD_DCT_OVERFLOW_CHECK(t3, 5793, 4096, 218); \
653 t4 -= (t3*5793 + 4096) >> 13; \
654 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
655 OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 219); \
656 t3 += (t4*3393 + 4096) >> 13; \
657 } \
658 while (0)
659
660#define OD_IDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
661 /* Embedded 8-point asymmetric Type-IV iDST. */ \
662 do { \
663 int t0h; \
664 int t2h; \
665 int t5h__; \
666 int t7h__; \
667 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
668 t6 -= (t1*3393 + 4096) >> 13; \
669 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
670 t1 += (t6*5793 + 4096) >> 13; \
671 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
672 t6 -= (t1*3393 + 4096) >> 13; \
673 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
674 t5 -= (t2*2737 + 2048) >> 12; \
675 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
676 t2 += (t5*473 + 256) >> 9; \
677 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
678 t5 -= (t2*2737 + 2048) >> 12; \
679 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
680 t4 -= (t3*3259 + 8192) >> 14; \
681 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
682 t3 += (t4*3135 + 4096) >> 13; \
683 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
684 t4 -= (t3*3259 + 8192) >> 14; \
685 t0 -= t6; \
686 t0h = OD_DCT_RSHIFT(t0, 1); \
687 t6 += t0h; \
688 t2 = t3 - t2; \
689 t2h = OD_DCT_RSHIFT(t2, 1); \
690 t3 -= t2h; \
691 t5 = t4 - t5; \
692 t5h__ = OD_DCT_RSHIFT(t5, 1); \
693 t4 -= t5h__; \
694 t7 += t1; \
695 t7h__ = OD_DCT_RSHIFT(t7, 1); \
696 t1 = t7h__ - t1; \
697 t3 = t7h__ - t3; \
698 t7 -= t3; \
699 t1 -= t5h__; \
700 t5 += t1; \
701 t6 -= t2h; \
702 t2 += t6; \
703 t4 += t0h; \
704 t0 -= t4; \
705 /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
706 t7 -= (t0*2507 + 2048) >> 12; \
707 /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
708 t0 += (t7*5765 + 2048) >> 12; \
709 /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
710 t7 -= (t0*5417 + 4096) >> 13; \
711 /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
712 t1 += (t6*3525 + 2048) >> 12; \
713 /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
714 t6 += (t1*3363 + 4096) >> 13; \
715 /* 12905/16384 ~= (1/Sqrt[2] - Cos[3*Pi/32]/1)/Sin[3*Pi/32] */ \
716 t1 -= (t6*12905 + 8192) >> 14; \
717 /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
718 t5 -= (t2*4379 + 8192) >> 14; \
719 /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
720 t2 += (t5*10217 + 4096) >> 13; \
721 /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
722 t5 -= (t2*4379 + 4096) >> 13; \
723 /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
724 t3 += (t4*851 + 4096) >> 13; \
725 /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
726 t4 += (t3*3675 + 2048) >> 12; \
727 /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
728 t3 -= (t4*1035 + 1024) >> 11; \
729 } \
730 while (0)
731
732#define OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
733 s1, s9, s5, sd, s3, sb, s7, sf) \
734 /* Embedded 16-point orthonormal Type-II fDCT. */ \
735 do { \
736 int s8h; \
737 int sah; \
738 int sch; \
739 int seh; \
740 int sfh; \
741 sf = s0 - sf; \
742 sfh = OD_DCT_RSHIFT(sf, 1); \
743 s0 -= sfh; \
744 se += s1; \
745 seh = OD_DCT_RSHIFT(se, 1); \
746 s1 = seh - s1; \
747 sd = s2 - sd; \
748 s2 -= OD_DCT_RSHIFT(sd, 1); \
749 sc += s3; \
750 sch = OD_DCT_RSHIFT(sc, 1); \
751 s3 = sch - s3; \
752 sb = s4 - sb; \
753 s4 -= OD_DCT_RSHIFT(sb, 1); \
754 sa += s5; \
755 sah = OD_DCT_RSHIFT(sa, 1); \
756 s5 = sah - s5; \
757 s9 = s6 - s9; \
758 s6 -= OD_DCT_RSHIFT(s9, 1); \
759 s8 += s7; \
760 s8h = OD_DCT_RSHIFT(s8, 1); \
761 s7 = s8h - s7; \
762 OD_FDCT_8_ASYM(s0, s8, s8h, s4, sc, sch, s2, sa, sah, s6, se, seh); \
763 OD_FDST_8_ASYM(sf, s7, sb, s3, sd, s5, s9, s1); \
764 } \
765 while (0)
766
767#define OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
768 s1, s9, s5, sd, s3, sb, s7, sf) \
769 /* Embedded 16-point orthonormal Type-II iDCT. */ \
770 do { \
771 int s1h; \
772 int s3h; \
773 int s5h; \
774 int s7h; \
775 int sfh; \
776 OD_IDST_8_ASYM(sf, sb, sd, s9, se, sa, sc, s8); \
777 OD_IDCT_8_ASYM(s0, s4, s2, s6, s1, s1h, s5, s5h, s3, s3h, s7, s7h); \
778 sfh = OD_DCT_RSHIFT(sf, 1); \
779 s0 += sfh; \
780 sf = s0 - sf; \
781 se = s1h - se; \
782 s1 -= se; \
783 s2 += OD_DCT_RSHIFT(sd, 1); \
784 sd = s2 - sd; \
785 sc = s3h - sc; \
786 s3 -= sc; \
787 s4 += OD_DCT_RSHIFT(sb, 1); \
788 sb = s4 - sb; \
789 sa = s5h - sa; \
790 s5 -= sa; \
791 s6 += OD_DCT_RSHIFT(s9, 1); \
792 s9 = s6 - s9; \
793 s8 = s7h - s8; \
794 s7 -= s8; \
795 } \
796 while (0)
797
Monty Montgomery2cb52ba2017-07-17 18:27:27 -0400798#define OD_FDCT_16_ASYM(t0, t8, t8h, t4, tc, tch, t2, ta, tah, t6, te, teh, \
799 t1, t9, t9h, t5, td, tdh, t3, tb, tbh, t7, tf, tfh) \
800 /* Embedded 16-point asymmetric Type-II fDCT. */ \
801 do { \
802 t0 += tfh; \
803 tf = t0 - tf; \
804 t1 -= teh; \
805 te += t1; \
806 t2 += tdh; \
807 td = t2 - td; \
808 t3 -= tch; \
809 tc += t3; \
810 t4 += tbh; \
811 tb = t4 - tb; \
812 t5 -= tah; \
813 ta += t5; \
814 t6 += t9h; \
815 t9 = t6 - t9; \
816 t7 -= t8h; \
817 t8 += t7; \
818 OD_FDCT_8(t0, t8, t4, tc, t2, ta, t6, te); \
819 OD_FDST_8(tf, t7, tb, t3, td, t5, t9, t1); \
820 } \
821 while (0)
822
823#define OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
824 t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh) \
825 /* Embedded 16-point asymmetric Type-II iDCT. */ \
826 do { \
827 OD_IDST_8(tf, tb, td, t9, te, ta, tc, t8); \
828 OD_IDCT_8(t0, t4, t2, t6, t1, t5, t3, t7); \
829 t1 -= te; \
830 t1h = OD_DCT_RSHIFT(t1, 1); \
831 te += t1h; \
832 t9 = t6 - t9; \
833 t9h = OD_DCT_RSHIFT(t9, 1); \
834 t6 -= t9h; \
835 t5 -= ta; \
836 t5h = OD_DCT_RSHIFT(t5, 1); \
837 ta += t5h; \
838 td = t2 - td; \
839 tdh = OD_DCT_RSHIFT(td, 1); \
840 t2 -= tdh; \
841 t3 -= tc; \
842 t3h = OD_DCT_RSHIFT(t3, 1); \
843 tc += t3h; \
844 tb = t4 - tb; \
845 tbh = OD_DCT_RSHIFT(tb, 1); \
846 t4 -= tbh; \
847 t7 -= t8; \
848 t7h = OD_DCT_RSHIFT(t7, 1); \
849 t8 += t7h; \
850 tf = t0 - tf; \
851 tfh = OD_DCT_RSHIFT(tf, 1); \
852 t0 -= tfh; \
853 } \
854 while (0)
855
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400856#define OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
857 s1, s9, s5, sd, s3, sb, s7, sf) \
858 /* Embedded 16-point orthonormal Type-IV fDST. */ \
859 do { \
860 int s0h; \
861 int s2h; \
862 int sdh; \
863 int sfh; \
864 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
865 OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 220); \
866 s1 += (se*13573 + 16384) >> 15; \
867 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
868 OD_DCT_OVERFLOW_CHECK(s1, 11585, 8192, 221); \
869 se -= (s1*11585 + 8192) >> 14; \
870 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
871 OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 222); \
872 s1 += (se*13573 + 16384) >> 15; \
873 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
874 OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 223); \
875 sd += (s2*21895 + 16384) >> 15; \
876 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
877 OD_DCT_OVERFLOW_CHECK(sd, 15137, 16384, 224); \
878 s2 -= (sd*15137 + 8192) >> 14; \
879 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
880 OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 225); \
881 sd += (s2*21895 + 16384) >> 15; \
882 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
883 OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 226); \
884 sc += (s3*3259 + 8192) >> 14; \
885 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
886 OD_DCT_OVERFLOW_CHECK(sc, 3135, 4096, 227); \
887 s3 -= (sc*3135 + 4096) >> 13; \
888 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
889 OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 228); \
890 sc += (s3*3259 + 8192) >> 14; \
891 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
892 OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 229); \
893 sa += (s5*13573 + 16384) >> 15; \
894 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
895 OD_DCT_OVERFLOW_CHECK(sa, 11585, 8192, 230); \
896 s5 -= (sa*11585 + 8192) >> 14; \
897 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
898 OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 231); \
899 sa += (s5*13573 + 16384) >> 15; \
900 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
901 OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 232); \
902 s6 += (s9*13573 + 16384) >> 15; \
903 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
904 OD_DCT_OVERFLOW_CHECK(s6, 11585, 8192, 233); \
905 s9 -= (s6*11585 + 8192) >> 14; \
906 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
907 OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 234); \
908 s6 += (s9*13573 + 16384) >> 15; \
909 sf += se; \
910 sfh = OD_DCT_RSHIFT(sf, 1); \
911 se = sfh - se; \
912 s0 += s1; \
913 s0h = OD_DCT_RSHIFT(s0, 1); \
914 s1 = s0h - s1; \
915 s2 = s3 - s2; \
916 s2h = OD_DCT_RSHIFT(s2, 1); \
917 s3 -= s2h; \
918 sd -= sc; \
919 sdh = OD_DCT_RSHIFT(sd, 1); \
920 sc += sdh; \
921 sa = s4 - sa; \
922 s4 -= OD_DCT_RSHIFT(sa, 1); \
923 s5 += sb; \
924 sb = OD_DCT_RSHIFT(s5, 1) - sb; \
925 s8 += s6; \
926 s6 -= OD_DCT_RSHIFT(s8, 1); \
927 s7 = s9 - s7; \
928 s9 -= OD_DCT_RSHIFT(s7, 1); \
929 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
930 OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 235); \
931 s4 += (sb*6723 + 4096) >> 13; \
932 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
933 OD_DCT_OVERFLOW_CHECK(s4, 16069, 8192, 236); \
934 sb -= (s4*16069 + 8192) >> 14; \
935 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
936 OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 237); \
937 s4 += (sb*6723 + 4096) >> 13; \
938 /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
939 OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 238); \
940 sa += (s5*8757 + 8192) >> 14; \
941 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
942 OD_DCT_OVERFLOW_CHECK(sa, 6811, 4096, 239); \
943 s5 -= (sa*6811 + 4096) >> 13; \
944 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
945 OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 240); \
946 sa += (s5*8757 + 8192) >> 14; \
947 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
948 OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 241); \
949 s6 += (s9*2485 + 4096) >> 13; \
950 /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
951 OD_DCT_OVERFLOW_CHECK(s6, 4551, 4096, 242); \
952 s9 -= (s6*4551 + 4096) >> 13; \
953 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
954 OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 243); \
955 s6 += (s9*2485 + 4096) >> 13; \
956 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
957 OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 244); \
958 s7 += (s8*3227 + 16384) >> 15; \
959 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
960 OD_DCT_OVERFLOW_CHECK(s7, 6393, 16384, 245); \
961 s8 -= (s7*6393 + 16384) >> 15; \
962 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
963 OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 246); \
964 s7 += (s8*3227 + 16384) >> 15; \
965 s1 -= s2h; \
966 s2 += s1; \
967 se += sdh; \
968 sd = se - sd; \
969 s3 += sfh; \
970 sf -= s3; \
971 sc = s0h - sc; \
972 s0 -= sc; \
973 sb += OD_DCT_RSHIFT(s8, 1); \
974 s8 = sb - s8; \
975 s4 += OD_DCT_RSHIFT(s7, 1); \
976 s7 -= s4; \
977 s6 += OD_DCT_RSHIFT(s5, 1); \
978 s5 = s6 - s5; \
979 s9 -= OD_DCT_RSHIFT(sa, 1); \
980 sa += s9; \
981 s8 += s0; \
982 s0 -= OD_DCT_RSHIFT(s8, 1); \
983 sf += s7; \
984 s7 = OD_DCT_RSHIFT(sf, 1) - s7; \
985 s1 -= s6; \
986 s6 += OD_DCT_RSHIFT(s1, 1); \
987 s9 += se; \
988 se = OD_DCT_RSHIFT(s9, 1) - se; \
989 s2 += sa; \
990 sa = OD_DCT_RSHIFT(s2, 1) - sa; \
991 s5 += sd; \
992 sd -= OD_DCT_RSHIFT(s5, 1); \
993 s4 = sc - s4; \
994 sc -= OD_DCT_RSHIFT(s4, 1); \
995 s3 -= sb; \
996 sb += OD_DCT_RSHIFT(s3, 1); \
997 /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
998 OD_DCT_OVERFLOW_CHECK(sf, 2799, 2048, 247); \
999 s0 -= (sf*2799 + 2048) >> 12; \
1000 /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
1001 OD_DCT_OVERFLOW_CHECK(s0, 2893, 1024, 248); \
1002 sf += (s0*2893 + 1024) >> 11; \
1003 /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
1004 OD_DCT_OVERFLOW_CHECK(sf, 5397, 4096, 249); \
1005 s0 -= (sf*5397 + 4096) >> 13; \
1006 /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
1007 OD_DCT_OVERFLOW_CHECK(s1, 41, 32, 250); \
1008 se += (s1*41 + 32) >> 6; \
1009 /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
1010 OD_DCT_OVERFLOW_CHECK(se, 2865, 1024, 251); \
1011 s1 -= (se*2865 + 1024) >> 11; \
1012 /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
1013 OD_DCT_OVERFLOW_CHECK(s1, 4641, 4096, 252); \
1014 se += (s1*4641 + 4096) >> 13; \
1015 /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
1016 OD_DCT_OVERFLOW_CHECK(s2, 2473, 2048, 253); \
1017 sd += (s2*2473 + 2048) >> 12; \
1018 /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
1019 OD_DCT_OVERFLOW_CHECK(sd, 5619, 2048, 254); \
1020 s2 -= (sd*5619 + 2048) >> 12; \
1021 /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
1022 OD_DCT_OVERFLOW_CHECK(s2, 7839, 8192, 255); \
1023 sd += (s2*7839 + 8192) >> 14; \
1024 /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
1025 OD_DCT_OVERFLOW_CHECK(s3, 5747, 4096, 256); \
1026 sc -= (s3*5747 + 4096) >> 13; \
1027 /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] ~= */ \
1028 OD_DCT_OVERFLOW_CHECK(sc, 3903, 4096, 257); \
1029 s3 += (sc*3903 + 4096) >> 13; \
1030 /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
1031 OD_DCT_OVERFLOW_CHECK(s3, 5701, 4096, 258); \
1032 sc += (s3*5701 + 4096) >> 13; \
1033 /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
1034 OD_DCT_OVERFLOW_CHECK(s4, 4471, 4096, 259); \
1035 sb += (s4*4471 + 4096) >> 13; \
1036 /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
1037 OD_DCT_OVERFLOW_CHECK(sb, 1309, 512, 260); \
1038 s4 -= (sb*1309 + 512) >> 10; \
1039 /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
1040 OD_DCT_OVERFLOW_CHECK(s4, 5067, 8192, 261); \
1041 sb += (s4*5067 + 8192) >> 14; \
1042 /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
1043 OD_DCT_OVERFLOW_CHECK(s5, 2217, 2048, 262); \
1044 sa -= (s5*2217 + 2048) >> 12; \
1045 /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] ~= 0.72705107329128 */ \
1046 OD_DCT_OVERFLOW_CHECK(sa, 1489, 1024, 263); \
1047 s5 += (sa*1489 + 1024) >> 11; \
1048 /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
1049 OD_DCT_OVERFLOW_CHECK(s5, 75, 128, 264); \
1050 sa += (s5*75 + 128) >> 8; \
1051 /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
1052 OD_DCT_OVERFLOW_CHECK(s9, 2087, 2048, 265); \
1053 s6 -= (s9*2087 + 2048) >> 12; \
1054 /* 4653/4096 ~= Sqrt[2]*Sin[19*Pi/64] */ \
1055 OD_DCT_OVERFLOW_CHECK(s6, 4653, 2048, 266); \
1056 s9 += (s6*4653 + 2048) >> 12; \
1057 /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
1058 OD_DCT_OVERFLOW_CHECK(s9, 4545, 16384, 267); \
1059 s6 -= (s9*4545 + 16384) >> 15; \
1060 /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
1061 OD_DCT_OVERFLOW_CHECK(s8, 2053, 2048, 268); \
1062 s7 += (s8*2053 + 2048) >> 12; \
1063 /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
1064 OD_DCT_OVERFLOW_CHECK(s7, 1945, 1024, 269); \
1065 s8 -= (s7*1945 + 1024) >> 11; \
1066 /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
1067 OD_DCT_OVERFLOW_CHECK(s8, 1651, 16384, 270); \
1068 s7 -= (s8*1651 + 16384) >> 15; \
1069 } \
1070 while (0)
1071
1072#define OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
1073 s1, s9, s5, sd, s3, sb, s7, sf) \
1074 /* Embedded 16-point orthonormal Type-IV iDST. */ \
1075 do { \
1076 int s0h; \
1077 int s4h; \
1078 int sbh; \
1079 int sfh; \
1080 /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
1081 se += (s1*1651 + 16384) >> 15; \
1082 /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
1083 s1 += (se*1945 + 1024) >> 11; \
1084 /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
1085 se -= (s1*2053 + 2048) >> 12; \
1086 /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
1087 s6 += (s9*4545 + 16384) >> 15; \
1088 /* 4653/32768 ~= Sqrt[2]*Sin[19*Pi/64] */ \
1089 s9 -= (s6*4653 + 2048) >> 12; \
1090 /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
1091 s6 += (s9*2087 + 2048) >> 12; \
1092 /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
1093 s5 -= (sa*75 + 128) >> 8; \
1094 /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] */ \
1095 sa -= (s5*1489 + 1024) >> 11; \
1096 /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
1097 s5 += (sa*2217 + 2048) >> 12; \
1098 /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
1099 sd -= (s2*5067 + 8192) >> 14; \
1100 /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
1101 s2 += (sd*1309 + 512) >> 10; \
1102 /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
1103 sd -= (s2*4471 + 4096) >> 13; \
1104 /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
1105 s3 -= (sc*5701 + 4096) >> 13; \
1106 /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] */ \
1107 sc -= (s3*3903 + 4096) >> 13; \
1108 /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
1109 s3 += (sc*5747 + 4096) >> 13; \
1110 /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
1111 sb -= (s4*7839 + 8192) >> 14; \
1112 /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
1113 s4 += (sb*5619 + 2048) >> 12; \
1114 /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
1115 sb -= (s4*2473 + 2048) >> 12; \
1116 /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
1117 s7 -= (s8*4641 + 4096) >> 13; \
1118 /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
1119 s8 += (s7*2865 + 1024) >> 11; \
1120 /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
1121 s7 -= (s8*41 + 32) >> 6; \
1122 /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
1123 s0 += (sf*5397 + 4096) >> 13; \
1124 /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
1125 sf -= (s0*2893 + 1024) >> 11; \
1126 /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
1127 s0 += (sf*2799 + 2048) >> 12; \
1128 sd -= OD_DCT_RSHIFT(sc, 1); \
1129 sc += sd; \
1130 s3 += OD_DCT_RSHIFT(s2, 1); \
1131 s2 = s3 - s2; \
1132 sb += OD_DCT_RSHIFT(sa, 1); \
1133 sa -= sb; \
1134 s5 = OD_DCT_RSHIFT(s4, 1) - s5; \
1135 s4 -= s5; \
1136 s7 = OD_DCT_RSHIFT(s9, 1) - s7; \
1137 s9 -= s7; \
1138 s6 -= OD_DCT_RSHIFT(s8, 1); \
1139 s8 += s6; \
1140 se = OD_DCT_RSHIFT(sf, 1) - se; \
1141 sf -= se; \
1142 s0 += OD_DCT_RSHIFT(s1, 1); \
1143 s1 -= s0; \
1144 s5 -= s9; \
1145 s9 += OD_DCT_RSHIFT(s5, 1); \
1146 sa = s6 - sa; \
1147 s6 -= OD_DCT_RSHIFT(sa, 1); \
1148 se += s2; \
1149 s2 -= OD_DCT_RSHIFT(se, 1); \
1150 s1 = sd - s1; \
1151 sd -= OD_DCT_RSHIFT(s1, 1); \
1152 s0 += s3; \
1153 s0h = OD_DCT_RSHIFT(s0, 1); \
1154 s3 = s0h - s3; \
1155 sf += sc; \
1156 sfh = OD_DCT_RSHIFT(sf, 1); \
1157 sc -= sfh; \
1158 sb = s7 - sb; \
1159 sbh = OD_DCT_RSHIFT(sb, 1); \
1160 s7 -= sbh; \
1161 s4 -= s8; \
1162 s4h = OD_DCT_RSHIFT(s4, 1); \
1163 s8 += s4h; \
1164 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
1165 se -= (s1*3227 + 16384) >> 15; \
1166 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
1167 s1 += (se*6393 + 16384) >> 15; \
1168 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
1169 se -= (s1*3227 + 16384) >> 15; \
1170 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1171 s6 -= (s9*2485 + 4096) >> 13; \
1172 /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1173 s9 += (s6*4551 + 4096) >> 13; \
1174 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1175 s6 -= (s9*2485 + 4096) >> 13; \
1176 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1177 s5 -= (sa*8757 + 8192) >> 14; \
1178 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1179 sa += (s5*6811 + 4096) >> 13; \
1180 /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1181 s5 -= (sa*8757 + 8192) >> 14; \
1182 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1183 s2 -= (sd*6723 + 4096) >> 13; \
1184 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1185 sd += (s2*16069 + 8192) >> 14; \
1186 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1187 s2 -= (sd*6723 + 4096) >> 13; \
1188 s9 += OD_DCT_RSHIFT(se, 1); \
1189 se = s9 - se; \
1190 s6 += OD_DCT_RSHIFT(s1, 1); \
1191 s1 -= s6; \
1192 sd = OD_DCT_RSHIFT(sa, 1) - sd; \
1193 sa -= sd; \
1194 s2 += OD_DCT_RSHIFT(s5, 1); \
1195 s5 = s2 - s5; \
1196 s3 -= sbh; \
1197 sb += s3; \
1198 sc += s4h; \
1199 s4 = sc - s4; \
1200 s8 = s0h - s8; \
1201 s0 -= s8; \
1202 s7 = sfh - s7; \
1203 sf -= s7; \
1204 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1205 s6 -= (s9*13573 + 16384) >> 15; \
1206 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1207 s9 += (s6*11585 + 8192) >> 14; \
1208 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1209 s6 -= (s9*13573 + 16384) >> 15; \
1210 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1211 s5 -= (sa*13573 + 16384) >> 15; \
1212 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1213 sa += (s5*11585 + 8192) >> 14; \
1214 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1215 s5 -= (sa*13573 + 16384) >> 15; \
1216 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
1217 s3 -= (sc*3259 + 8192) >> 14; \
1218 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
1219 sc += (s3*3135 + 4096) >> 13; \
1220 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
1221 s3 -= (sc*3259 + 8192) >> 14; \
1222 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
1223 sb -= (s4*21895 + 16384) >> 15; \
1224 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1225 s4 += (sb*15137 + 8192) >> 14; \
1226 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
1227 sb -= (s4*21895 + 16384) >> 15; \
1228 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1229 s8 -= (s7*13573 + 16384) >> 15; \
1230 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1231 s7 += (s8*11585 + 8192) >> 14; \
1232 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1233 s8 -= (s7*13573 + 16384) >> 15; \
1234 } \
1235 while (0)
1236
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04001237/* TODO: rewrite this to match OD_FDST_16. */
1238#define OD_FDST_16_ASYM(t0, t0h, t8, t4, t4h, tc, t2, ta, t6, te, \
1239 t1, t9, t5, td, t3, tb, t7, t7h, tf) \
1240 /* Embedded 16-point asymmetric Type-IV fDST. */ \
1241 do { \
1242 int t2h; \
1243 int t3h; \
1244 int t6h; \
1245 int t8h; \
1246 int t9h; \
1247 int tch; \
1248 int tdh; \
1249 /* TODO: Can we move these into another operation */ \
1250 t8 = -t8; \
1251 t9 = -t9; \
1252 ta = -ta; \
1253 tb = -tb; \
1254 td = -td; \
1255 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1256 OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 136); \
1257 t1 -= (te*13573 + 8192) >> 14; \
1258 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1259 OD_DCT_OVERFLOW_CHECK(t1, 11585, 16384, 137); \
1260 te += (t1*11585 + 16384) >> 15; \
1261 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1262 OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 138); \
1263 t1 -= (te*13573 + 8192) >> 14; \
1264 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1265 OD_DCT_OVERFLOW_CHECK(td, 4161, 8192, 139); \
1266 t2 += (td*4161 + 8192) >> 14; \
1267 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1268 OD_DCT_OVERFLOW_CHECK(t2, 15137, 8192, 140); \
1269 td -= (t2*15137 + 8192) >> 14; \
1270 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1271 OD_DCT_OVERFLOW_CHECK(td, 14341, 8192, 141); \
1272 t2 += (td*14341 + 8192) >> 14; \
1273 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1274 OD_DCT_OVERFLOW_CHECK(t3, 14341, 8192, 142); \
1275 tc -= (t3*14341 + 8192) >> 14; \
1276 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1277 OD_DCT_OVERFLOW_CHECK(tc, 15137, 8192, 143); \
1278 t3 += (tc*15137 + 8192) >> 14; \
1279 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1280 OD_DCT_OVERFLOW_CHECK(t3, 4161, 8192, 144); \
1281 tc -= (t3*4161 + 8192) >> 14; \
1282 te = t0h - te; \
1283 t0 -= te; \
1284 tf = OD_DCT_RSHIFT(t1, 1) - tf; \
1285 t1 -= tf; \
1286 /* TODO: Can we move this into another operation */ \
1287 tc = -tc; \
1288 t2 = OD_DCT_RSHIFT(tc, 1) - t2; \
1289 tc -= t2; \
1290 t3 = OD_DCT_RSHIFT(td, 1) - t3; \
1291 td = t3 - td; \
1292 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1293 OD_DCT_OVERFLOW_CHECK(t6, 7489, 4096, 145); \
1294 t9 -= (t6*7489 + 4096) >> 13; \
1295 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1296 OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 146); \
1297 t6 += (t9*11585 + 8192) >> 14; \
1298 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1299 OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 147); \
1300 t9 += (t6*19195 + 16384) >> 15; \
1301 t8 += OD_DCT_RSHIFT(t9, 1); \
1302 t9 -= t8; \
1303 t6 = t7h - t6; \
1304 t7 -= t6; \
1305 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1306 OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 148); \
1307 t8 += (t7*6723 + 4096) >> 13; \
1308 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1309 OD_DCT_OVERFLOW_CHECK(t8, 16069, 8192, 149); \
1310 t7 -= (t8*16069 + 8192) >> 14; \
1311 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1312 OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 150); \
1313 t8 += (t7*6723 + 4096) >> 13; \
1314 /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1315 OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 151); \
1316 t9 += (t6*17515 + 16384) >> 15; \
1317 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1318 OD_DCT_OVERFLOW_CHECK(t9, 13623, 8192, 152); \
1319 t6 -= (t9*13623 + 8192) >> 14; \
1320 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1321 OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 153); \
1322 t9 += (t6*17515 + 16384) >> 15; \
1323 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1324 OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 154); \
1325 t5 += (ta*13573 + 8192) >> 14; \
1326 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1327 OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 155); \
1328 ta -= (t5*11585 + 16384) >> 15; \
1329 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1330 OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 156); \
1331 t5 += (ta*13573 + 8192) >> 14; \
1332 tb += OD_DCT_RSHIFT(t5, 1); \
1333 t5 = tb - t5; \
1334 ta += t4h; \
1335 t4 -= ta; \
1336 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1337 OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 157); \
1338 ta += (t5*2485 + 4096) >> 13; \
1339 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1340 OD_DCT_OVERFLOW_CHECK(ta, 18205, 16384, 158); \
1341 t5 -= (ta*18205 + 16384) >> 15; \
1342 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1343 OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 159); \
1344 ta += (t5*2485 + 4096) >> 13; \
1345 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1346 OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 160); \
1347 tb -= (t4*6723 + 4096) >> 13; \
1348 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1349 OD_DCT_OVERFLOW_CHECK(tb, 16069, 8192, 161); \
1350 t4 += (tb*16069 + 8192) >> 14; \
1351 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1352 OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 162); \
1353 tb -= (t4*6723 + 4096) >> 13; \
1354 /* TODO: Can we move this into another operation */ \
1355 t5 = -t5; \
1356 tc -= tf; \
1357 tch = OD_DCT_RSHIFT(tc, 1); \
1358 tf += tch; \
1359 t3 += t0; \
1360 t3h = OD_DCT_RSHIFT(t3, 1); \
1361 t0 -= t3h; \
1362 td -= t1; \
1363 tdh = OD_DCT_RSHIFT(td, 1); \
1364 t1 += tdh; \
1365 t2 += te; \
1366 t2h = OD_DCT_RSHIFT(t2, 1); \
1367 te -= t2h; \
1368 t8 += t4; \
1369 t8h = OD_DCT_RSHIFT(t8, 1); \
1370 t4 = t8h - t4; \
1371 t7 = tb - t7; \
1372 t7h = OD_DCT_RSHIFT(t7, 1); \
1373 tb = t7h - tb; \
1374 t6 -= ta; \
1375 t6h = OD_DCT_RSHIFT(t6, 1); \
1376 ta += t6h; \
1377 t9 = t5 - t9; \
1378 t9h = OD_DCT_RSHIFT(t9, 1); \
1379 t5 -= t9h; \
1380 t0 -= t7h; \
1381 t7 += t0; \
1382 tf += t8h; \
1383 t8 -= tf; \
1384 te -= t6h; \
1385 t6 += te; \
1386 t1 += t9h; \
1387 t9 -= t1; \
1388 tb -= tch; \
1389 tc += tb; \
1390 t4 += t3h; \
1391 t3 -= t4; \
1392 ta -= tdh; \
1393 td += ta; \
1394 t5 = t2h - t5; \
1395 t2 -= t5; \
1396 /* TODO: Can we move these into another operation */ \
1397 t8 = -t8; \
1398 t9 = -t9; \
1399 ta = -ta; \
1400 tb = -tb; \
1401 tc = -tc; \
1402 td = -td; \
1403 tf = -tf; \
1404 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1405 OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 163); \
1406 t0 -= (tf*7799 + 4096) >> 13; \
1407 /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
1408 OD_DCT_OVERFLOW_CHECK(t0, 4091, 2048, 164); \
1409 tf += (t0*4091 + 2048) >> 12; \
1410 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1411 OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 165); \
1412 t0 -= (tf*7799 + 4096) >> 13; \
1413 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1414 OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 166); \
1415 t1 += (te*2417 + 16384) >> 15; \
1416 /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
1417 OD_DCT_OVERFLOW_CHECK(t1, 601, 2048, 167); \
1418 te -= (t1*601 + 2048) >> 12; \
1419 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1420 OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 168); \
1421 t1 += (te*2417 + 16384) >> 15; \
1422 /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1423 OD_DCT_OVERFLOW_CHECK(t8, 14525, 16384, 169); \
1424 t7 -= (t8*14525 + 16384) >> 15; \
1425 /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
1426 OD_DCT_OVERFLOW_CHECK(t7, 3035, 2048, 170); \
1427 t8 += (t7*3035 + 2048) >> 12; \
1428 /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1429 OD_DCT_OVERFLOW_CHECK(t8, 7263, 8192, 171); \
1430 t7 -= (t8*7263 + 8192) >> 14; \
1431 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1432 OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 172); \
1433 t2 -= (td*6393 + 4096) >> 13; \
1434 /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
1435 OD_DCT_OVERFLOW_CHECK(t2, 3973, 2048, 173); \
1436 td += (t2*3973 + 2048) >> 12; \
1437 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1438 OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 174); \
1439 t2 -= (td*6393 + 4096) >> 13; \
1440 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1441 OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 175); \
1442 t5 -= (ta*9281 + 8192) >> 14; \
1443 /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
1444 OD_DCT_OVERFLOW_CHECK(t5, 7027, 4096, 176); \
1445 ta += (t5*7027 + 4096) >> 13; \
1446 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1447 OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 177); \
1448 t5 -= (ta*9281 + 8192) >> 14; \
1449 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1450 OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 178); \
1451 t3 -= (tc*11539 + 8192) >> 14; \
1452 /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
1453 OD_DCT_OVERFLOW_CHECK(t3, 7713, 4096, 179); \
1454 tc += (t3*7713 + 4096) >> 13; \
1455 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1456 OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 180); \
1457 t3 -= (tc*11539 + 8192) >> 14; \
1458 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1459 OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 181); \
1460 t4 -= (tb*10375 + 8192) >> 14; \
1461 /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
1462 OD_DCT_OVERFLOW_CHECK(t4, 7405, 4096, 182); \
1463 tb += (t4*7405 + 4096) >> 13; \
1464 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1465 OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 183); \
1466 t4 -= (tb*10375 + 8192) >> 14; \
1467 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1468 OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 184); \
1469 t6 -= (t9*8247 + 8192) >> 14; \
1470 /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
1471 OD_DCT_OVERFLOW_CHECK(t6, 1645, 1024, 185); \
1472 t9 += (t6*1645 + 1024) >> 11; \
1473 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1474 OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 186); \
1475 t6 -= (t9*8247 + 8192) >> 14; \
1476 } \
1477 while (0)
1478
1479#define OD_IDST_16_ASYM(t0, t0h, t8, t4, tc, t2, t2h, ta, t6, te, teh, \
1480 t1, t9, t5, td, t3, tb, t7, tf) \
1481 /* Embedded 16-point asymmetric Type-IV iDST. */ \
1482 do { \
1483 int t1h_; \
1484 int t3h_; \
1485 int t4h; \
1486 int t6h; \
1487 int t9h_; \
1488 int tbh_; \
1489 int tch; \
1490 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1491 t6 += (t9*8247 + 8192) >> 14; \
1492 /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
1493 t9 -= (t6*1645 + 1024) >> 11; \
1494 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1495 t6 += (t9*8247 + 8192) >> 14; \
1496 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1497 t2 += (td*10375 + 8192) >> 14; \
1498 /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
1499 td -= (t2*7405 + 4096) >> 13; \
1500 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1501 t2 += (td*10375 + 8192) >> 14; \
1502 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1503 tc += (t3*11539 + 8192) >> 14; \
1504 /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
1505 t3 -= (tc*7713 + 4096) >> 13; \
1506 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1507 tc += (t3*11539 + 8192) >> 14; \
1508 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1509 ta += (t5*9281 + 8192) >> 14; \
1510 /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
1511 t5 -= (ta*7027 + 4096) >> 13; \
1512 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1513 ta += (t5*9281 + 8192) >> 14; \
1514 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1515 t4 += (tb*6393 + 4096) >> 13; \
1516 /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
1517 tb -= (t4*3973 + 2048) >> 12; \
1518 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1519 t4 += (tb*6393 + 4096) >> 13; \
1520 /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1521 te += (t1*7263 + 8192) >> 14; \
1522 /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
1523 t1 -= (te*3035 + 2048) >> 12; \
1524 /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1525 te += (t1*14525 + 16384) >> 15; \
1526 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1527 t8 -= (t7*2417 + 16384) >> 15; \
1528 /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
1529 t7 += (t8*601 + 2048) >> 12; \
1530 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1531 t8 -= (t7*2417 + 16384) >> 15; \
1532 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1533 t0 += (tf*7799 + 4096) >> 13; \
1534 /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
1535 tf -= (t0*4091 + 2048) >> 12; \
1536 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1537 t0 += (tf*7799 + 4096) >> 13; \
1538 /* TODO: Can we move these into another operation */ \
1539 t1 = -t1; \
1540 t3 = -t3; \
1541 t5 = -t5; \
1542 t9 = -t9; \
1543 tb = -tb; \
1544 td = -td; \
1545 tf = -tf; \
1546 t4 += ta; \
1547 t4h = OD_DCT_RSHIFT(t4, 1); \
1548 ta = t4h - ta; \
1549 tb -= t5; \
1550 tbh_ = OD_DCT_RSHIFT(tb, 1); \
1551 t5 += tbh_; \
1552 tc += t2; \
1553 tch = OD_DCT_RSHIFT(tc, 1); \
1554 t2 -= tch; \
1555 t3 -= td; \
1556 t3h_ = OD_DCT_RSHIFT(t3, 1); \
1557 td += t3h_; \
1558 t9 += t8; \
1559 t9h_ = OD_DCT_RSHIFT(t9, 1); \
1560 t8 -= t9h_; \
1561 t6 -= t7; \
1562 t6h = OD_DCT_RSHIFT(t6, 1); \
1563 t7 += t6h; \
1564 t1 += tf; \
1565 t1h_ = OD_DCT_RSHIFT(t1, 1); \
1566 tf -= t1h_; \
1567 te -= t0; \
1568 teh = OD_DCT_RSHIFT(te, 1); \
1569 t0 += teh; \
1570 ta += t9h_; \
1571 t9 = ta - t9; \
1572 t5 -= t6h; \
1573 t6 += t5; \
1574 td = teh - td; \
1575 te = td - te; \
1576 t2 = t1h_ - t2; \
1577 t1 -= t2; \
1578 t7 += t4h; \
1579 t4 -= t7; \
1580 t8 -= tbh_; \
1581 tb += t8; \
1582 t0 += tch; \
1583 tc -= t0; \
1584 tf -= t3h_; \
1585 t3 += tf; \
1586 /* TODO: Can we move this into another operation */ \
1587 ta = -ta; \
1588 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1589 td += (t2*6723 + 4096) >> 13; \
1590 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1591 t2 -= (td*16069 + 8192) >> 14; \
1592 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1593 td += (t2*6723 + 4096) >> 13; \
1594 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1595 t5 -= (ta*2485 + 4096) >> 13; \
1596 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1597 ta += (t5*18205 + 16384) >> 15; \
1598 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1599 t5 -= (ta*2485 + 4096) >> 13; \
1600 t2 += t5; \
1601 t2h = OD_DCT_RSHIFT(t2, 1); \
1602 t5 -= t2h; \
1603 ta = td - ta; \
1604 td -= OD_DCT_RSHIFT(ta, 1); \
1605 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1606 ta -= (t5*13573 + 8192) >> 14; \
1607 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1608 t5 += (ta*11585 + 16384) >> 15; \
1609 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1610 ta -= (t5*13573 + 8192) >> 14; \
1611 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1612 t9 -= (t6*17515 + 16384) >> 15; \
1613 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1614 t6 += (t9*13623 + 8192) >> 14; \
1615 /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1616 t9 -= (t6*17515 + 16384) >> 15; \
1617 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1618 t1 -= (te*6723 + 4096) >> 13; \
1619 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1620 te += (t1*16069 + 8192) >> 14; \
1621 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1622 t1 -= (te*6723 + 4096) >> 13; \
1623 te += t6; \
1624 teh = OD_DCT_RSHIFT(te, 1); \
1625 t6 = teh - t6; \
1626 t9 += t1; \
1627 t1 -= OD_DCT_RSHIFT(t9, 1); \
1628 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1629 t9 -= (t6*19195 + 16384) >> 15; \
1630 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1631 t6 -= (t9*11585 + 8192) >> 14; \
1632 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1633 t9 += (t6*7489 + 4096) >> 13; \
1634 tb = tc - tb; \
1635 tc = OD_DCT_RSHIFT(tb, 1) - tc; \
1636 t3 += t4; \
1637 t4 = OD_DCT_RSHIFT(t3, 1) - t4; \
1638 /* TODO: Can we move this into another operation */ \
1639 t3 = -t3; \
1640 t8 += tf; \
1641 tf = OD_DCT_RSHIFT(t8, 1) - tf; \
1642 t0 += t7; \
1643 t0h = OD_DCT_RSHIFT(t0, 1); \
1644 t7 = t0h - t7; \
1645 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1646 t3 += (tc*4161 + 8192) >> 14; \
1647 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1648 tc -= (t3*15137 + 8192) >> 14; \
1649 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1650 t3 += (tc*14341 + 8192) >> 14; \
1651 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1652 t4 -= (tb*14341 + 8192) >> 14; \
1653 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1654 tb += (t4*15137 + 8192) >> 14; \
1655 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1656 t4 -= (tb*4161 + 8192) >> 14; \
1657 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1658 t8 += (t7*13573 + 8192) >> 14; \
1659 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1660 t7 -= (t8*11585 + 16384) >> 15; \
1661 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1662 t8 += (t7*13573 + 8192) >> 14; \
1663 /* TODO: Can we move these into another operation */ \
1664 t1 = -t1; \
1665 t5 = -t5; \
1666 t9 = -t9; \
1667 tb = -tb; \
1668 td = -td; \
1669 } \
1670 while (0)
1671
1672#define OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
1673 te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1674 /* Embedded 32-point orthonormal Type-II fDCT. */ \
1675 do { \
1676 int tgh; \
1677 int thh; \
1678 int tih; \
1679 int tkh; \
1680 int tmh; \
1681 int tnh; \
1682 int toh; \
1683 int tqh; \
1684 int tsh; \
1685 int tuh; \
1686 int tvh; \
1687 tv = t0 - tv; \
1688 tvh = OD_DCT_RSHIFT(tv, 1); \
1689 t0 -= tvh; \
1690 tu += t1; \
1691 tuh = OD_DCT_RSHIFT(tu, 1); \
1692 t1 = tuh - t1; \
1693 tt = t2 - tt; \
1694 t2 -= OD_DCT_RSHIFT(tt, 1); \
1695 ts += t3; \
1696 tsh = OD_DCT_RSHIFT(ts, 1); \
1697 t3 = tsh - t3; \
1698 tr = t4 - tr; \
1699 t4 -= OD_DCT_RSHIFT(tr, 1); \
1700 tq += t5; \
1701 tqh = OD_DCT_RSHIFT(tq, 1); \
1702 t5 = tqh - t5; \
1703 tp = t6 - tp; \
1704 t6 -= OD_DCT_RSHIFT(tp, 1); \
1705 to += t7; \
1706 toh = OD_DCT_RSHIFT(to, 1); \
1707 t7 = toh - t7; \
1708 tn = t8 - tn; \
1709 tnh = OD_DCT_RSHIFT(tn, 1); \
1710 t8 -= tnh; \
1711 tm += t9; \
1712 tmh = OD_DCT_RSHIFT(tm, 1); \
1713 t9 = tmh - t9; \
1714 tl = ta - tl; \
1715 ta -= OD_DCT_RSHIFT(tl, 1); \
1716 tk += tb; \
1717 tkh = OD_DCT_RSHIFT(tk, 1); \
1718 tb = tkh - tb; \
1719 tj = tc - tj; \
1720 tc -= OD_DCT_RSHIFT(tj, 1); \
1721 ti += td; \
1722 tih = OD_DCT_RSHIFT(ti, 1); \
1723 td = tih - td; \
1724 th = te - th; \
1725 thh = OD_DCT_RSHIFT(th, 1); \
1726 te -= thh; \
1727 tg += tf; \
1728 tgh = OD_DCT_RSHIFT(tg, 1); \
1729 tf = tgh - tf; \
1730 OD_FDCT_16_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
1731 t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh); \
1732 OD_FDST_16_ASYM(tv, tvh, tf, tn, tnh, t7, tr, tb, tj, t3, \
1733 tt, td, tl, t5, tp, t9, th, thh, t1); \
1734 } \
1735 while (0)
1736
1737#define OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
1738 te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1739 /* Embedded 32-point orthonormal Type-II iDCT. */ \
1740 do { \
1741 int t1h; \
1742 int t3h; \
1743 int t5h; \
1744 int t7h; \
1745 int t9h; \
1746 int tbh; \
1747 int tdh; \
1748 int tfh; \
1749 int thh; \
1750 int tth; \
1751 int tvh; \
1752 OD_IDST_16_ASYM(tv, tvh, tn, tr, tj, tt, tth, tl, tp, th, thh, \
1753 tu, tm, tq, ti, ts, tk, to, tg); \
1754 OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
1755 t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh); \
1756 tu = t1h - tu; \
1757 t1 -= tu; \
1758 te += thh; \
1759 th = te - th; \
1760 tm = t9h - tm; \
1761 t9 -= tm; \
1762 t6 += OD_DCT_RSHIFT(tp, 1); \
1763 tp = t6 - tp; \
1764 tq = t5h - tq; \
1765 t5 -= tq; \
1766 ta += OD_DCT_RSHIFT(tl, 1); \
1767 tl = ta - tl; \
1768 ti = tdh - ti; \
1769 td -= ti; \
1770 t2 += tth; \
1771 tt = t2 - tt; \
1772 ts = t3h - ts; \
1773 t3 -= ts; \
1774 tc += OD_DCT_RSHIFT(tj, 1); \
1775 tj = tc - tj; \
1776 tk = tbh - tk; \
1777 tb -= tk; \
1778 t4 += OD_DCT_RSHIFT(tr, 1); \
1779 tr = t4 - tr; \
1780 to = t7h - to; \
1781 t7 -= to; \
1782 t8 += OD_DCT_RSHIFT(tn, 1); \
1783 tn = t8 - tn; \
1784 tg = tfh - tg; \
1785 tf -= tg; \
1786 t0 += tvh; \
1787 tv = t0 - tv; \
1788 } \
1789 while (0)
1790
Monty Montgomerya4e245a2017-07-22 00:48:31 -04001791#if CONFIG_TX64X64
1792#define OD_FDCT_32_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
1793 t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh, t1, th, thh, \
1794 t9, tp, tph, t5, tl, tlh, td, tt, tth, t3, tj, tjh, tb, tr, trh, \
1795 t7, tn, tnh, tf, tv, tvh) \
1796 /* Embedded 32-point asymmetric Type-II fDCT. */ \
1797 do { \
1798 t0 += tvh; \
1799 tv = t0 - tv; \
1800 t1 = tuh - t1; \
1801 tu -= t1; \
1802 t2 += tth; \
1803 tt = t2 - tt; \
1804 t3 = tsh - t3; \
1805 ts -= t3; \
1806 t4 += trh; \
1807 tr = t4 - tr; \
1808 t5 = tqh - t5; \
1809 tq -= t5; \
1810 t6 += tph; \
1811 tp = t6 - tp; \
1812 t7 = toh - t7; \
1813 to -= t7; \
1814 t8 += tnh; \
1815 tn = t8 - tn; \
1816 t9 = tmh - t9; \
1817 tm -= t9; \
1818 ta += tlh; \
1819 tl = ta - tl; \
1820 tb = tkh - tb; \
1821 tk -= tb; \
1822 tc += tjh; \
1823 tj = tc - tj; \
1824 td = tih - td; \
1825 ti -= td; \
1826 te += thh; \
1827 th = te - th; \
1828 tf = tgh - tf; \
1829 tg -= tf; \
1830 OD_FDCT_16(t0, tg, t8, to, t4, tk, tc, ts, \
1831 t2, ti, ta, tq, t6, tm, te, tu); \
1832 OD_FDST_16(tv, tf, tn, t7, tr, tb, tj, t3, \
1833 tt, td, tl, t5, tp, t9, th, t1); \
1834 } \
1835 while (0)
1836
1837#define OD_IDCT_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, \
1838 t6, tm, te, tu, t1, t1h, th, thh, t9, t9h, tp, tph, t5, t5h, tl, tlh, \
1839 td, tdh, tt, tth, t3, t3h, tj, tjh, tb, tbh, tr, trh, t7, t7h, tn, tnh, \
1840 tf, tfh, tv, tvh) \
1841 /* Embedded 32-point asymmetric Type-II iDCT. */ \
1842 do { \
1843 OD_IDST_16(tv, tn, tr, tj, tt, tl, tp, th, \
1844 tu, tm, tq, ti, ts, tk, to, tg); \
1845 OD_IDCT_16(t0, t8, t4, tc, t2, ta, t6, te, \
1846 t1, t9, t5, td, t3, tb, t7, tf); \
1847 tv = t0 - tv; \
1848 tvh = OD_DCT_RSHIFT(tv, 1); \
1849 t0 -= tvh; \
1850 t1 += tu; \
1851 t1h = OD_DCT_RSHIFT(t1, 1); \
1852 tu = t1h - tu; \
1853 tt = t2 - tt; \
1854 tth = OD_DCT_RSHIFT(tt, 1); \
1855 t2 -= tth; \
1856 t3 += ts; \
1857 t3h = OD_DCT_RSHIFT(t3, 1); \
1858 ts = t3h - ts; \
1859 tr = t4 - tr; \
1860 trh = OD_DCT_RSHIFT(tr, 1); \
1861 t4 -= trh; \
1862 t5 += tq; \
1863 t5h = OD_DCT_RSHIFT(t5, 1); \
1864 tq = t5h - tq; \
1865 tp = t6 - tp; \
1866 tph = OD_DCT_RSHIFT(tp, 1); \
1867 t6 -= tph; \
1868 t7 += to; \
1869 t7h = OD_DCT_RSHIFT(t7, 1); \
1870 to = t7h - to; \
1871 tn = t8 - tn; \
1872 tnh = OD_DCT_RSHIFT(tn, 1); \
1873 t8 -= tnh; \
1874 t9 += tm; \
1875 t9h = OD_DCT_RSHIFT(t9, 1); \
1876 tm = t9h - tm; \
1877 tl = ta - tl; \
1878 tlh = OD_DCT_RSHIFT(tl, 1); \
1879 ta -= tlh; \
1880 tb += tk; \
1881 tbh = OD_DCT_RSHIFT(tb, 1); \
1882 tk = tbh - tk; \
1883 tj = tc - tj; \
1884 tjh = OD_DCT_RSHIFT(tj, 1); \
1885 tc -= tjh; \
1886 td += ti; \
1887 tdh = OD_DCT_RSHIFT(td, 1); \
1888 ti = tdh - ti; \
1889 th = te - th; \
1890 thh = OD_DCT_RSHIFT(th, 1); \
1891 te -= thh; \
1892 tf += tg; \
1893 tfh = OD_DCT_RSHIFT(tf, 1); \
1894 tg = tfh - tg; \
1895 } \
1896 while (0)
1897
1898#define OD_FDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
1899 tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1900 /* Embedded 32-point asymmetric Type-IV fDST. */ \
1901 do { \
1902 int t0h; \
1903 int t1h; \
1904 int t4h; \
1905 int t5h; \
1906 int tqh; \
1907 int trh; \
1908 int tuh; \
1909 int tvh; \
1910 \
1911 tu = -tu; \
1912 \
1913 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1914 OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 271); \
1915 t5 -= (tq*13573 + 8192) >> 14; \
1916 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1917 OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 272); \
1918 tq += (t5*11585 + 16384) >> 15; \
1919 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1920 OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 273); \
1921 t5 -= (tq*13573 + 8192) >> 14; \
1922 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1923 OD_DCT_OVERFLOW_CHECK(t6, 29957, 16384, 274); \
1924 tp += (t6*29957 + 16384) >> 15; \
1925 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1926 OD_DCT_OVERFLOW_CHECK(tp, 11585, 8192, 275); \
1927 t6 -= (tp*11585 + 8192) >> 14; \
1928 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1929 OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 276); \
1930 tp -= (t6*19195 + 16384) >> 15; \
1931 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1932 OD_DCT_OVERFLOW_CHECK(t1, 29957, 16384, 277); \
1933 tu += (t1*29957 + 16384) >> 15; \
1934 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1935 OD_DCT_OVERFLOW_CHECK(tu, 11585, 8192, 278); \
1936 t1 -= (tu*11585 + 8192) >> 14; \
1937 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1938 OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 279); \
1939 tu -= (t1*19195 + 16384) >> 15; \
1940 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1941 OD_DCT_OVERFLOW_CHECK(t2, 28681, 16384, 280); \
1942 tt += (t2*28681 + 16384) >> 15; \
1943 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1944 OD_DCT_OVERFLOW_CHECK(tt, 15137, 8192, 281); \
1945 t2 -= (tt*15137 + 8192) >> 14; \
1946 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1947 OD_DCT_OVERFLOW_CHECK(t2, 4161, 8192, 282); \
1948 tt += (t2*4161 + 8192) >> 14; \
1949 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1950 OD_DCT_OVERFLOW_CHECK(ts, 4161, 8192, 283); \
1951 t3 += (ts*4161 + 8192) >> 14; \
1952 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1953 OD_DCT_OVERFLOW_CHECK(t3, 15137, 8192, 284); \
1954 ts -= (t3*15137 + 8192) >> 14; \
1955 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1956 OD_DCT_OVERFLOW_CHECK(ts, 14341, 8192, 285); \
1957 t3 += (ts*14341 + 8192) >> 14; \
1958 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1959 OD_DCT_OVERFLOW_CHECK(tm, 19195, 16384, 286); \
1960 t9 -= (tm*19195 + 16384) >> 15; \
1961 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1962 OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 287); \
1963 tm -= (t9*11585 + 8192) >> 14; \
1964 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1965 OD_DCT_OVERFLOW_CHECK(tm, 7489, 4096, 288); \
1966 t9 += (tm*7489 + 4096) >> 13; \
1967 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
1968 OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 289); \
1969 ta += (tl*3259 + 4096) >> 13; \
1970 /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
1971 OD_DCT_OVERFLOW_CHECK(ta, 3135, 8192, 290); \
1972 tl -= (ta*3135 + 8192) >> 14; \
1973 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
1974 OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 291); \
1975 ta += (tl*3259 + 4096) >> 13; \
1976 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1977 OD_DCT_OVERFLOW_CHECK(tk, 4161, 8192, 292); \
1978 tb += (tk*4161 + 8192) >> 14; \
1979 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1980 OD_DCT_OVERFLOW_CHECK(tb, 15137, 8192, 293); \
1981 tk -= (tb*15137 + 8192) >> 14; \
1982 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1983 OD_DCT_OVERFLOW_CHECK(tk, 14341, 8192, 294); \
1984 tb += (tk*14341 + 8192) >> 14; \
1985 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1986 OD_DCT_OVERFLOW_CHECK(te, 29957, 16384, 295); \
1987 th += (te*29957 + 16384) >> 15; \
1988 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1989 OD_DCT_OVERFLOW_CHECK(th, 11585, 8192, 296); \
1990 te -= (th*11585 + 8192) >> 14; \
1991 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1992 OD_DCT_OVERFLOW_CHECK(te, 19195, 16384, 297); \
1993 th -= (te*19195 + 16384) >> 15; \
1994 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1995 OD_DCT_OVERFLOW_CHECK(tc, 28681, 16384, 298); \
1996 tj += (tc*28681 + 16384) >> 15; \
1997 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1998 OD_DCT_OVERFLOW_CHECK(tj, 15137, 8192, 299); \
1999 tc -= (tj*15137 + 8192) >> 14; \
2000 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2001 OD_DCT_OVERFLOW_CHECK(tc, 4161, 8192, 300); \
2002 tj += (tc*4161 + 8192) >> 14; \
2003 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2004 OD_DCT_OVERFLOW_CHECK(ti, 4161, 8192, 301); \
2005 td += (ti*4161 + 8192) >> 14; \
2006 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2007 OD_DCT_OVERFLOW_CHECK(td, 15137, 8192, 302); \
2008 ti -= (td*15137 + 8192) >> 14; \
2009 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2010 OD_DCT_OVERFLOW_CHECK(ti, 14341, 8192, 303); \
2011 td += (ti*14341 + 8192) >> 14; \
2012 \
2013 t1 = -t1; \
2014 t2 = -t2; \
2015 t3 = -t3; \
2016 td = -td; \
2017 tg = -tg; \
2018 to = -to; \
2019 ts = -ts; \
2020 \
2021 tr -= OD_DCT_RSHIFT(t5, 1); \
2022 t5 += tr; \
2023 tq -= OD_DCT_RSHIFT(t4, 1); /* pass */ \
2024 t4 += tq; \
2025 t6 -= OD_DCT_RSHIFT(t7, 1); \
2026 t7 += t6; \
2027 to -= OD_DCT_RSHIFT(tp, 1); /* pass */ \
2028 tp += to; \
2029 t1 += OD_DCT_RSHIFT(t0, 1); /* pass */ \
2030 t0 -= t1; \
2031 tv -= OD_DCT_RSHIFT(tu, 1); \
2032 tu += tv; \
2033 t3 -= OD_DCT_RSHIFT(tt, 1); \
2034 tt += t3; \
2035 t2 += OD_DCT_RSHIFT(ts, 1); \
2036 ts -= t2; \
2037 t9 -= OD_DCT_RSHIFT(t8, 1); /* pass */ \
2038 t8 += t9; \
2039 tn += OD_DCT_RSHIFT(tm, 1); \
2040 tm -= tn; \
2041 tb += OD_DCT_RSHIFT(ta, 1); \
2042 ta -= tb; \
2043 tl -= OD_DCT_RSHIFT(tk, 1); \
2044 tk += tl; \
2045 te -= OD_DCT_RSHIFT(tf, 1); /* pass */ \
2046 tf += te; \
2047 tg -= OD_DCT_RSHIFT(th, 1); \
2048 th += tg; \
2049 tc -= OD_DCT_RSHIFT(ti, 1); \
2050 ti += tc; \
2051 td += OD_DCT_RSHIFT(tj, 1); \
2052 tj -= td; \
2053 \
2054 t4 = -t4; \
2055 \
2056 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2057 OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 304); \
2058 t4 += (tr*6723 + 4096) >> 13; \
2059 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
2060 OD_DCT_OVERFLOW_CHECK(t4, 16069, 8192, 305); \
2061 tr -= (t4*16069 + 8192) >> 14; \
2062 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2063 OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 306); \
2064 t4 += (tr*6723 + 4096) >> 13; \
2065 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2066 OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 307); \
2067 t5 += (tq*17515 + 16384) >> 15; \
2068 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
2069 OD_DCT_OVERFLOW_CHECK(t5, 13623, 8192, 308); \
2070 tq -= (t5*13623 + 8192) >> 14; \
2071 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2072 OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 309); \
2073 t5 += (tq*17515 + 16384) >> 15; \
2074 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2075 OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 310); \
2076 t7 += (to*3227 + 16384) >> 15; \
2077 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
2078 OD_DCT_OVERFLOW_CHECK(t7, 6393, 16384, 311); \
2079 to -= (t7*6393 + 16384) >> 15; \
2080 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2081 OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 312); \
2082 t7 += (to*3227 + 16384) >> 15; \
2083 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2084 OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 313); \
2085 t6 += (tp*2485 + 4096) >> 13; \
2086 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
2087 OD_DCT_OVERFLOW_CHECK(t6, 18205, 16384, 314); \
2088 tp -= (t6*18205 + 16384) >> 15; \
2089 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2090 OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 315); \
2091 t6 += (tp*2485 + 4096) >> 13; \
2092 \
2093 t5 = -t5; \
2094 \
2095 tr += to; \
2096 trh = OD_DCT_RSHIFT(tr, 1); \
2097 to -= trh; \
2098 t4 += t7; \
2099 t4h = OD_DCT_RSHIFT(t4, 1); \
2100 t7 -= t4h; \
2101 t5 += tp; \
2102 t5h = OD_DCT_RSHIFT(t5, 1); \
2103 tp -= t5h; \
2104 tq += t6; \
2105 tqh = OD_DCT_RSHIFT(tq, 1); \
2106 t6 -= tqh; \
2107 t0 -= t3; \
2108 t0h = OD_DCT_RSHIFT(t0, 1); \
2109 t3 += t0h; \
2110 tv -= ts; \
2111 tvh = OD_DCT_RSHIFT(tv, 1); \
2112 ts += tvh; \
2113 tu += tt; \
2114 tuh = OD_DCT_RSHIFT(tu, 1); \
2115 tt -= tuh; \
2116 t1 -= t2; \
2117 t1h = OD_DCT_RSHIFT(t1, 1); \
2118 t2 += t1h; \
2119 t8 += tb; \
2120 tb -= OD_DCT_RSHIFT(t8, 1); \
2121 tn += tk; \
2122 tk -= OD_DCT_RSHIFT(tn, 1); \
2123 t9 += tl; \
2124 tl -= OD_DCT_RSHIFT(t9, 1); \
2125 tm -= ta; \
2126 ta += OD_DCT_RSHIFT(tm, 1); \
2127 tc -= tf; \
2128 tf += OD_DCT_RSHIFT(tc, 1); \
2129 tj += tg; \
2130 tg -= OD_DCT_RSHIFT(tj, 1); \
2131 td -= te; \
2132 te += OD_DCT_RSHIFT(td, 1); \
2133 ti += th; \
2134 th -= OD_DCT_RSHIFT(ti, 1); \
2135 \
2136 t9 = -t9; \
2137 tl = -tl; \
2138 \
2139 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2140 OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 316); \
2141 t8 += (tn*805 + 8192) >> 14; \
2142 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2143 OD_DCT_OVERFLOW_CHECK(t8, 803, 4096, 317); \
2144 tn -= (t8*803 + 4096) >> 13; \
2145 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2146 OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 318); \
2147 t8 += (tn*805 + 8192) >> 14; \
2148 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2149 OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 319); \
2150 tk += (tb*11725 + 16384) >> 15; \
2151 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2152 OD_DCT_OVERFLOW_CHECK(tk, 5197, 4096, 320); \
2153 tb -= (tk*5197 + 4096) >> 13; \
2154 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2155 OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 321); \
2156 tk += (tb*11725 + 16384) >> 15; \
2157 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2158 OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 322); \
2159 ta += (tl*2455 + 2048) >> 12; \
2160 /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
2161 OD_DCT_OVERFLOW_CHECK(ta, 14449, 8192, 323); \
2162 tl -= (ta*14449 + 8192) >> 14; \
2163 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2164 OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 324); \
2165 ta += (tl*2455 + 2048) >> 12; \
2166 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2167 OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 325); \
2168 t9 += (tm*4861 + 16384) >> 15; \
2169 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2170 OD_DCT_OVERFLOW_CHECK(t9, 1189, 2048, 326); \
2171 tm -= (t9*1189 + 2048) >> 12; \
2172 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2173 OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 327); \
2174 t9 += (tm*4861 + 16384) >> 15; \
2175 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2176 OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 328); \
2177 tf += (tg*805 + 8192) >> 14; \
2178 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2179 OD_DCT_OVERFLOW_CHECK(tf, 803, 4096, 329); \
2180 tg -= (tf*803 + 4096) >> 13; \
2181 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2182 OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 330); \
2183 tf += (tg*805 + 8192) >> 14; \
2184 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2185 OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 331); \
2186 tc += (tj*2931 + 4096) >> 13; \
2187 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2188 OD_DCT_OVERFLOW_CHECK(tc, 5197, 4096, 332); \
2189 tj -= (tc*5197 + 4096) >> 13; \
2190 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2191 OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 333); \
2192 tc += (tj*2931 + 4096) >> 13; \
2193 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2194 OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 334); \
2195 td += (ti*513 + 1024) >> 11; \
2196 /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
2197 OD_DCT_OVERFLOW_CHECK(td, 7723, 8192, 335); \
2198 ti -= (td*7723 + 8192) >> 14; \
2199 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2200 OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 336); \
2201 td += (ti*513 + 1024) >> 11; \
2202 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2203 OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 337); \
2204 te += (th*4861 + 16384) >> 15; \
2205 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2206 OD_DCT_OVERFLOW_CHECK(te, 1189, 2048, 338); \
2207 th -= (te*1189 + 2048) >> 12; \
2208 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2209 OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 339); \
2210 te += (th*4861 + 16384) >> 15; \
2211 \
2212 ta = -ta; \
2213 tb = -tb; \
2214 \
2215 tt += t5h; \
2216 t5 -= tt; \
2217 t2 -= tqh; \
2218 tq += t2; \
2219 tp += t1h; \
2220 t1 -= tp; \
2221 t6 -= tuh; \
2222 tu += t6; \
2223 t7 += tvh; \
2224 tv -= t7; \
2225 to += t0h; \
2226 t0 -= to; \
2227 t3 -= t4h; \
2228 t4 += t3; \
2229 ts += trh; \
2230 tr -= ts; \
2231 tf -= OD_DCT_RSHIFT(tn, 1); \
2232 tn += tf; \
2233 tg -= OD_DCT_RSHIFT(t8, 1); \
2234 t8 += tg; \
2235 tk += OD_DCT_RSHIFT(tc, 1); \
2236 tc -= tk; \
2237 tb += OD_DCT_RSHIFT(tj, 1); \
2238 tj -= tb; \
2239 ta += OD_DCT_RSHIFT(ti, 1); \
2240 ti -= ta; \
2241 tl += OD_DCT_RSHIFT(td, 1); \
2242 td -= tl; \
2243 te -= OD_DCT_RSHIFT(tm, 1); \
2244 tm += te; \
2245 th -= OD_DCT_RSHIFT(t9, 1); \
2246 t9 += th; \
2247 ta -= t5; \
2248 t5 += OD_DCT_RSHIFT(ta, 1); \
2249 tq -= tl; \
2250 tl += OD_DCT_RSHIFT(tq, 1); \
2251 t2 -= ti; \
2252 ti += OD_DCT_RSHIFT(t2, 1); \
2253 td -= tt; \
2254 tt += OD_DCT_RSHIFT(td, 1); \
2255 tm += tp; \
2256 tp -= OD_DCT_RSHIFT(tm, 1); \
2257 t6 += t9; \
2258 t9 -= OD_DCT_RSHIFT(t6, 1); \
2259 te -= tu; \
2260 tu += OD_DCT_RSHIFT(te, 1); \
2261 t1 -= th; \
2262 th += OD_DCT_RSHIFT(t1, 1); \
2263 t0 -= tg; \
2264 tg += OD_DCT_RSHIFT(t0, 1); \
2265 tf += tv; \
2266 tv -= OD_DCT_RSHIFT(tf, 1); \
2267 t8 -= t7; \
2268 t7 += OD_DCT_RSHIFT(t8, 1); \
2269 to -= tn; \
2270 tn += OD_DCT_RSHIFT(to, 1); \
2271 t4 -= tk; \
2272 tk += OD_DCT_RSHIFT(t4, 1); \
2273 tb -= tr; \
2274 tr += OD_DCT_RSHIFT(tb, 1); \
2275 t3 -= tj; \
2276 tj += OD_DCT_RSHIFT(t3, 1); \
2277 tc -= ts; \
2278 ts += OD_DCT_RSHIFT(tc, 1); \
2279 \
2280 tr = -tr; \
2281 ts = -ts; \
2282 tt = -tt; \
2283 tu = -tu; \
2284 \
2285 /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
2286 OD_DCT_OVERFLOW_CHECK(t0, 2847, 2048, 340); \
2287 tv += (t0*2847 + 2048) >> 12; \
2288 /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
2289 OD_DCT_OVERFLOW_CHECK(tv, 5791, 2048, 341); \
2290 t0 -= (tv*5791 + 2048) >> 12; \
2291 /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
2292 OD_DCT_OVERFLOW_CHECK(t0, 5593, 4096, 342); \
2293 tv += (t0*5593 + 4096) >> 13; \
2294 /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
2295 OD_DCT_OVERFLOW_CHECK(tf, 4099, 4096, 343); \
2296 tg -= (tf*4099 + 4096) >> 13; \
2297 /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
2298 OD_DCT_OVERFLOW_CHECK(tg, 1997, 1024, 344); \
2299 tf += (tg*1997 + 1024) >> 11; \
2300 /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
2301 OD_DCT_OVERFLOW_CHECK(tf, 815, 16384, 345); \
2302 tg += (tf*815 + 16384) >> 15; \
2303 /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
2304 OD_DCT_OVERFLOW_CHECK(t8, 2527, 2048, 346); \
2305 tn -= (t8*2527 + 2048) >> 12; \
2306 /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
2307 OD_DCT_OVERFLOW_CHECK(tn, 4695, 4096, 347); \
2308 t8 += (tn*4695 + 4096) >> 13; \
2309 /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
2310 OD_DCT_OVERFLOW_CHECK(t8, 4187, 4096, 348); \
2311 tn += (t8*4187 + 4096) >> 13; \
2312 /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
2313 OD_DCT_OVERFLOW_CHECK(to, 5477, 4096, 349); \
2314 t7 += (to*5477 + 4096) >> 13; \
2315 /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
2316 OD_DCT_OVERFLOW_CHECK(t7, 4169, 4096, 350); \
2317 to -= (t7*4169 + 4096) >> 13; \
2318 /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
2319 OD_DCT_OVERFLOW_CHECK(to, 2571, 2048, 351); \
2320 t7 -= (to*2571 + 2048) >> 12; \
2321 /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
2322 OD_DCT_OVERFLOW_CHECK(t2, 5331, 4096, 352); \
2323 tt += (t2*5331 + 4096) >> 13; \
2324 /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
2325 OD_DCT_OVERFLOW_CHECK(tt, 5749, 2048, 353); \
2326 t2 -= (tt*5749 + 2048) >> 12; \
2327 /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
2328 OD_DCT_OVERFLOW_CHECK(t2, 2413, 2048, 354); \
2329 tt += (t2*2413 + 2048) >> 12; \
2330 /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
2331 OD_DCT_OVERFLOW_CHECK(td, 4167, 4096, 355); \
2332 ti -= (td*4167 + 4096) >> 13; \
2333 /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
2334 OD_DCT_OVERFLOW_CHECK(ti, 891, 512, 356); \
2335 td += (ti*891 + 512) >> 10; \
2336 /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
2337 OD_DCT_OVERFLOW_CHECK(td, 4327, 16384, 357); \
2338 ti += (td*4327 + 16384) >> 15; \
2339 /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
2340 OD_DCT_OVERFLOW_CHECK(ta, 2261, 2048, 358); \
2341 tl -= (ta*2261 + 2048) >> 12; \
2342 /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
2343 OD_DCT_OVERFLOW_CHECK(tl, 2855, 2048, 359); \
2344 ta += (tl*2855 + 2048) >> 12; \
2345 /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
2346 OD_DCT_OVERFLOW_CHECK(ta, 5417, 8192, 360); \
2347 tl += (ta*5417 + 8192) >> 14; \
2348 /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
2349 OD_DCT_OVERFLOW_CHECK(tq, 3459, 2048, 361); \
2350 t5 += (tq*3459 + 2048) >> 12; \
2351 /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
2352 OD_DCT_OVERFLOW_CHECK(t5, 1545, 2048, 362); \
2353 tq -= (t5*1545 + 2048) >> 12; \
2354 /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
2355 OD_DCT_OVERFLOW_CHECK(tq, 1971, 1024, 363); \
2356 t5 -= (tq*1971 + 1024) >> 11; \
2357 /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
2358 OD_DCT_OVERFLOW_CHECK(t3, 323, 256, 364); \
2359 ts += (t3*323 + 256) >> 9; \
2360 /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
2361 OD_DCT_OVERFLOW_CHECK(ts, 5707, 2048, 365); \
2362 t3 -= (ts*5707 + 2048) >> 12; \
2363 /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
2364 OD_DCT_OVERFLOW_CHECK(t3, 2229, 2048, 366); \
2365 ts += (t3*2229 + 2048) >> 12; \
2366 /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
2367 OD_DCT_OVERFLOW_CHECK(tc, 1061, 1024, 367); \
2368 tj -= (tc*1061 + 1024) >> 11; \
2369 /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
2370 OD_DCT_OVERFLOW_CHECK(tj, 6671, 4096, 368); \
2371 tc += (tj*6671 + 4096) >> 13; \
2372 /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
2373 OD_DCT_OVERFLOW_CHECK(tc, 6287, 16384, 369); \
2374 tj += (tc*6287 + 16384) >> 15; \
2375 /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
2376 OD_DCT_OVERFLOW_CHECK(tb, 4359, 4096, 370); \
2377 tk -= (tb*4359 + 4096) >> 13; \
2378 /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
2379 OD_DCT_OVERFLOW_CHECK(tk, 3099, 2048, 371); \
2380 tb += (tk*3099 + 2048) >> 12; \
2381 /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
2382 OD_DCT_OVERFLOW_CHECK(tb, 2109, 4096, 372); \
2383 tk += (tb*2109 + 4096) >> 13; \
2384 /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
2385 OD_DCT_OVERFLOW_CHECK(t4, 5017, 4096, 373); \
2386 tr += (t4*5017 + 4096) >> 13; \
2387 /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
2388 OD_DCT_OVERFLOW_CHECK(tr, 1413, 512, 374); \
2389 t4 -= (tr*1413 + 512) >> 10; \
2390 /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
2391 OD_DCT_OVERFLOW_CHECK(t4, 8195, 8192, 375); \
2392 tr += (t4*8195 + 8192) >> 14; \
2393 /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
2394 OD_DCT_OVERFLOW_CHECK(tm, 2373, 2048, 376); \
2395 t9 += (tm*2373 + 2048) >> 12; \
2396 /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
2397 OD_DCT_OVERFLOW_CHECK(t9, 5209, 4096, 377); \
2398 tm -= (t9*5209 + 4096) >> 13; \
2399 /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
2400 OD_DCT_OVERFLOW_CHECK(tm, 3391, 4096, 378); \
2401 t9 -= (tm*3391 + 4096) >> 13; \
2402 /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
2403 OD_DCT_OVERFLOW_CHECK(t6, 1517, 1024, 379); \
2404 tp -= (t6*1517 + 1024) >> 11; \
2405 /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
2406 OD_DCT_OVERFLOW_CHECK(tp, 1817, 2048, 380); \
2407 t6 += (tp*1817 + 2048) >> 12; \
2408 /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
2409 OD_DCT_OVERFLOW_CHECK(t6, 6331, 4096, 381); \
2410 tp += (t6*6331 + 4096) >> 13; \
2411 /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
2412 OD_DCT_OVERFLOW_CHECK(te, 515, 512, 382); \
2413 th -= (te*515 + 512) >> 10; \
2414 /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
2415 OD_DCT_OVERFLOW_CHECK(th, 7567, 4096, 383); \
2416 te += (th*7567 + 4096) >> 13; \
2417 /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
2418 OD_DCT_OVERFLOW_CHECK(te, 2513, 16384, 384); \
2419 th += (te*2513 + 16384) >> 15; \
2420 /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
2421 OD_DCT_OVERFLOW_CHECK(t1, 2753, 2048, 385); \
2422 tu += (t1*2753 + 2048) >> 12; \
2423 /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
2424 OD_DCT_OVERFLOW_CHECK(tu, 5777, 2048, 386); \
2425 t1 -= (tu*5777 + 2048) >> 12; \
2426 /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
2427 OD_DCT_OVERFLOW_CHECK(t1, 1301, 1024, 387); \
2428 tu += (t1*1301 + 1024) >> 11; \
2429 } \
2430 while (0)
2431
2432#define OD_IDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
2433 tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
2434 /* Embedded 32-point asymmetric Type-IV iDST. */ \
2435 do { \
2436 int t0h; \
2437 int t4h; \
2438 int tbh; \
2439 int tfh; \
2440 int tgh; \
2441 int tkh; \
2442 int trh; \
2443 int tvh; \
2444 /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
2445 tf -= (tg*1301 + 1024) >> 11; \
2446 /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
2447 tg += (tf*5777 + 2048) >> 12; \
2448 /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
2449 tf -= (tg*2753 + 2048) >> 12; \
2450 /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
2451 th -= (te*2513 + 16384) >> 15; \
2452 /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
2453 te -= (th*7567 + 4096) >> 13; \
2454 /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
2455 th += (te*515 + 512) >> 10; \
2456 /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
2457 tj -= (tc*6331 + 4096) >> 13; \
2458 /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
2459 tc -= (tj*1817 + 2048) >> 12; \
2460 /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
2461 tj += (tc*1517 + 1024) >> 11; \
2462 /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
2463 ti += (td*3391 + 4096) >> 13; \
2464 /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
2465 td += (ti*5209 + 4096) >> 13; \
2466 /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
2467 ti -= (td*2373 + 2048) >> 12; \
2468 /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
2469 tr -= (t4*8195 + 8192) >> 14; \
2470 /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
2471 t4 += (tr*1413 + 512) >> 10; \
2472 /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
2473 tr -= (t4*5017 + 4096) >> 13; \
2474 /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
2475 t5 -= (tq*2109 + 4096) >> 13; \
2476 /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
2477 tq -= (t5*3099 + 2048) >> 12; \
2478 /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
2479 t5 += (tq*4359 + 4096) >> 13; \
2480 /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
2481 tp -= (t6*6287 + 16384) >> 15; \
2482 /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
2483 t6 -= (tp*6671 + 4096) >> 13; \
2484 /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
2485 tp += (t6*1061 + 1024) >> 11; \
2486 /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
2487 t7 -= (to*2229 + 2048) >> 12; \
2488 /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
2489 to += (t7*5707 + 2048) >> 12; \
2490 /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
2491 t7 -= (to*323 + 256) >> 9; \
2492 /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
2493 tk += (tb*1971 + 1024) >> 11; \
2494 /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
2495 tb += (tk*1545 + 2048) >> 12; \
2496 /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
2497 tk -= (tb*3459 + 2048) >> 12; \
2498 /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
2499 tl -= (ta*5417 + 8192) >> 14; \
2500 /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
2501 ta -= (tl*2855 + 2048) >> 12; \
2502 /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
2503 tl += (ta*2261 + 2048) >> 12; \
2504 /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
2505 t9 -= (tm*4327 + 16384) >> 15; \
2506 /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
2507 tm -= (t9*891 + 512) >> 10; \
2508 /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
2509 t9 += (tm*4167 + 4096) >> 13; \
2510 /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
2511 tn -= (t8*2413 + 2048) >> 12; \
2512 /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
2513 t8 += (tn*5749 + 2048) >> 12; \
2514 /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
2515 tn -= (t8*5331 + 4096) >> 13; \
2516 /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
2517 ts += (t3*2571 + 2048) >> 12; \
2518 /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
2519 t3 += (ts*4169 + 4096) >> 13; \
2520 /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
2521 ts -= (t3*5477 + 4096) >> 13; \
2522 /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
2523 tt -= (t2*4187 + 4096) >> 13; \
2524 /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
2525 t2 -= (tt*4695 + 4096) >> 13; \
2526 /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
2527 tt += (t2*2527 + 2048) >> 12; \
2528 /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
2529 t1 -= (tu*815 + 16384) >> 15; \
2530 /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
2531 tu -= (t1*1997 + 1024) >> 11; \
2532 /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
2533 t1 += (tu*4099 + 4096) >> 13; \
2534 /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
2535 tv -= (t0*5593 + 4096) >> 13; \
2536 /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
2537 t0 += (tv*5791 + 2048) >> 12; \
2538 /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
2539 tv -= (t0*2847 + 2048) >> 12; \
2540 \
2541 t7 = -t7; \
2542 tf = -tf; \
2543 tn = -tn; \
2544 tr = -tr; \
2545 \
2546 t7 -= OD_DCT_RSHIFT(t6, 1); \
2547 t6 += t7; \
2548 tp -= OD_DCT_RSHIFT(to, 1); \
2549 to += tp; \
2550 tr -= OD_DCT_RSHIFT(tq, 1); \
2551 tq += tr; \
2552 t5 -= OD_DCT_RSHIFT(t4, 1); \
2553 t4 += t5; \
2554 tt -= OD_DCT_RSHIFT(t3, 1); \
2555 t3 += tt; \
2556 ts -= OD_DCT_RSHIFT(t2, 1); \
2557 t2 += ts; \
2558 tv += OD_DCT_RSHIFT(tu, 1); \
2559 tu -= tv; \
2560 t1 -= OD_DCT_RSHIFT(t0, 1); \
2561 t0 += t1; \
2562 th -= OD_DCT_RSHIFT(tg, 1); \
2563 tg += th; \
2564 tf -= OD_DCT_RSHIFT(te, 1); \
2565 te += tf; \
2566 ti += OD_DCT_RSHIFT(tc, 1); \
2567 tc -= ti; \
2568 tj += OD_DCT_RSHIFT(td, 1); \
2569 td -= tj; \
2570 tn -= OD_DCT_RSHIFT(tm, 1); \
2571 tm += tn; \
2572 t9 -= OD_DCT_RSHIFT(t8, 1); \
2573 t8 += t9; \
2574 tl -= OD_DCT_RSHIFT(tb, 1); \
2575 tb += tl; \
2576 tk -= OD_DCT_RSHIFT(ta, 1); \
2577 ta += tk; \
2578 \
2579 ti -= th; \
2580 th += OD_DCT_RSHIFT(ti, 1); \
2581 td -= te; \
2582 te += OD_DCT_RSHIFT(td, 1); \
2583 tm += tl; \
2584 tl -= OD_DCT_RSHIFT(tm, 1); \
2585 t9 += ta; \
2586 ta -= OD_DCT_RSHIFT(t9, 1); \
2587 tp += tq; \
2588 tq -= OD_DCT_RSHIFT(tp, 1); \
2589 t6 += t5; \
2590 t5 -= OD_DCT_RSHIFT(t6, 1); \
2591 t2 -= t1; \
2592 t1 += OD_DCT_RSHIFT(t2, 1); \
2593 tt -= tu; \
2594 tu += OD_DCT_RSHIFT(tt, 1); \
2595 tr += t7; \
2596 trh = OD_DCT_RSHIFT(tr, 1); \
2597 t7 -= trh; \
2598 t4 -= to; \
2599 t4h = OD_DCT_RSHIFT(t4, 1); \
2600 to += t4h; \
2601 t0 += t3; \
2602 t0h = OD_DCT_RSHIFT(t0, 1); \
2603 t3 -= t0h; \
2604 tv += ts; \
2605 tvh = OD_DCT_RSHIFT(tv, 1); \
2606 ts -= tvh; \
2607 tf -= tc; \
2608 tfh = OD_DCT_RSHIFT(tf, 1); \
2609 tc += tfh; \
2610 tg += tj; \
2611 tgh = OD_DCT_RSHIFT(tg, 1); \
2612 tj -= tgh; \
2613 tb -= t8; \
2614 tbh = OD_DCT_RSHIFT(tb, 1); \
2615 t8 += tbh; \
2616 tk += tn; \
2617 tkh = OD_DCT_RSHIFT(tk, 1); \
2618 tn -= tkh; \
2619 \
2620 ta = -ta; \
2621 tq = -tq; \
2622 \
2623 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2624 te -= (th*4861 + 16384) >> 15; \
2625 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2626 th += (te*1189 + 2048) >> 12; \
2627 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2628 te -= (th*4861 + 16384) >> 15; \
2629 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2630 tm -= (t9*513 + 1024) >> 11; \
2631 /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
2632 t9 += (tm*7723 + 8192) >> 14; \
2633 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2634 tm -= (t9*513 + 1024) >> 11; \
2635 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2636 t6 -= (tp*2931 + 4096) >> 13; \
2637 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2638 tp += (t6*5197 + 4096) >> 13; \
2639 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2640 t6 -= (tp*2931 + 4096) >> 13; \
2641 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2642 tu -= (t1*805 + 8192) >> 14; \
2643 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2644 t1 += (tu*803 + 4096) >> 13; \
2645 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2646 tu -= (t1*805 + 8192) >> 14; \
2647 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2648 ti -= (td*4861 + 16384) >> 15; \
2649 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2650 td += (ti*1189 + 2048) >> 12; \
2651 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2652 ti -= (td*4861 + 16384) >> 15; \
2653 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2654 ta -= (tl*2455 + 2048) >> 12; \
2655 /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
2656 tl += (ta*14449 + 8192) >> 14; \
2657 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2658 ta -= (tl*2455 + 2048) >> 12; \
2659 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2660 t5 -= (tq*11725 + 16384) >> 15; \
2661 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2662 tq += (t5*5197 + 4096) >> 13; \
2663 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2664 t5 -= (tq*11725 + 16384) >> 15; \
2665 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2666 t2 -= (tt*805 + 8192) >> 14; \
2667 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2668 tt += (t2*803 + 4096) >> 13; \
2669 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2670 t2 -= (tt*805 + 8192) >> 14; \
2671 \
2672 tl = -tl; \
2673 ti = -ti; \
2674 \
2675 th += OD_DCT_RSHIFT(t9, 1); \
2676 t9 -= th; \
2677 te -= OD_DCT_RSHIFT(tm, 1); \
2678 tm += te; \
2679 t1 += OD_DCT_RSHIFT(tp, 1); \
2680 tp -= t1; \
2681 tu -= OD_DCT_RSHIFT(t6, 1); \
2682 t6 += tu; \
2683 ta -= OD_DCT_RSHIFT(td, 1); \
2684 td += ta; \
2685 tl += OD_DCT_RSHIFT(ti, 1); \
2686 ti -= tl; \
2687 t5 += OD_DCT_RSHIFT(tt, 1); \
2688 tt -= t5; \
2689 tq += OD_DCT_RSHIFT(t2, 1); \
2690 t2 -= tq; \
2691 \
2692 t8 -= tgh; \
2693 tg += t8; \
2694 tn += tfh; \
2695 tf -= tn; \
2696 t7 -= tvh; \
2697 tv += t7; \
2698 to -= t0h; \
2699 t0 += to; \
2700 tc += tbh; \
2701 tb -= tc; \
2702 tj += tkh; \
2703 tk -= tj; \
2704 ts += t4h; \
2705 t4 -= ts; \
2706 t3 += trh; \
2707 tr -= t3; \
2708 \
2709 tk = -tk; \
2710 \
2711 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2712 tc -= (tj*2485 + 4096) >> 13; \
2713 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
2714 tj += (tc*18205 + 16384) >> 15; \
2715 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2716 tc -= (tj*2485 + 4096) >> 13; \
2717 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2718 ts -= (t3*3227 + 16384) >> 15; \
2719 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
2720 t3 += (ts*6393 + 16384) >> 15; \
2721 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2722 ts -= (t3*3227 + 16384) >> 15; \
2723 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2724 tk -= (tb*17515 + 16384) >> 15; \
2725 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
2726 tb += (tk*13623 + 8192) >> 14; \
2727 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2728 tk -= (tb*17515 + 16384) >> 15; \
2729 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2730 t4 -= (tr*6723 + 4096) >> 13; \
2731 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
2732 tr += (t4*16069 + 8192) >> 14; \
2733 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2734 t4 -= (tr*6723 + 4096) >> 13; \
2735 \
2736 t4 = -t4; \
2737 \
2738 tp += tm; \
2739 tm -= OD_DCT_RSHIFT(tp, 1); \
2740 t9 -= t6; \
2741 t6 += OD_DCT_RSHIFT(t9, 1); \
2742 th -= t1; \
2743 t1 += OD_DCT_RSHIFT(th, 1); \
2744 tu -= te; \
2745 te += OD_DCT_RSHIFT(tu, 1); /* pass */ \
2746 t5 -= tl; \
2747 tl += OD_DCT_RSHIFT(t5, 1); \
2748 ta += tq; \
2749 tq -= OD_DCT_RSHIFT(ta, 1); \
2750 td += tt; \
2751 tt -= OD_DCT_RSHIFT(td, 1); \
2752 t2 -= ti; \
2753 ti += OD_DCT_RSHIFT(t2, 1); /* pass */ \
2754 t7 += t8; \
2755 t8 -= OD_DCT_RSHIFT(t7, 1); \
2756 tn -= to; \
2757 to += OD_DCT_RSHIFT(tn, 1); \
2758 tf -= tv; \
2759 tv += OD_DCT_RSHIFT(tf, 1); \
2760 t0 += tg; \
2761 tg -= OD_DCT_RSHIFT(t0, 1); /* pass */ \
2762 tj -= t3; \
2763 t3 += OD_DCT_RSHIFT(tj, 1); /* pass */ \
2764 ts -= tc; \
2765 tc += OD_DCT_RSHIFT(ts, 1); \
2766 t4 -= tb; \
2767 tb += OD_DCT_RSHIFT(t4, 1); /* pass */ \
2768 tk -= tr; \
2769 tr += OD_DCT_RSHIFT(tk, 1); \
2770 \
2771 t1 = -t1; \
2772 t3 = -t3; \
2773 t7 = -t7; \
2774 t8 = -t8; \
2775 tg = -tg; \
2776 tm = -tm; \
2777 to = -to; \
2778 \
2779 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2780 tm -= (t9*14341 + 8192) >> 14; \
2781 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2782 t9 += (tm*15137 + 8192) >> 14; \
2783 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2784 tm -= (t9*4161 + 8192) >> 14; \
2785 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2786 tp -= (t6*4161 + 8192) >> 14; \
2787 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2788 t6 += (tp*15137 + 8192) >> 14; \
2789 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2790 tp -= (t6*28681 + 16384) >> 15; \
2791 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2792 th += (te*19195 + 16384) >> 15; \
2793 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2794 te += (th*11585 + 8192) >> 14; \
2795 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2796 th -= (te*29957 + 16384) >> 15; \
2797 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2798 tq -= (t5*14341 + 8192) >> 14; \
2799 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2800 t5 += (tq*15137 + 8192) >> 14; \
2801 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2802 tq -= (t5*4161 + 8192) >> 14; \
2803 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
2804 ta -= (tl*3259 + 4096) >> 13; \
2805 /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
2806 tl += (ta*3135 + 8192) >> 14; \
2807 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
2808 ta -= (tl*3259 + 4096) >> 13; \
2809 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2810 ti -= (td*7489 + 4096) >> 13; \
2811 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2812 td += (ti*11585 + 8192) >> 14; \
2813 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2814 ti += (td*19195 + 16384) >> 15; \
2815 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2816 to -= (t7*14341 + 8192) >> 14; \
2817 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2818 t7 += (to*15137 + 8192) >> 14; \
2819 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2820 to -= (t7*4161 + 8192) >> 14; \
2821 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2822 tn -= (t8*4161 + 8192) >> 14; \
2823 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2824 t8 += (tn*15137 + 8192) >> 14; \
2825 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2826 tn -= (t8*28681 + 16384) >> 15; \
2827 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2828 tf += (tg*19195 + 16384) >> 15; \
2829 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2830 tg += (tf*11585 + 8192) >> 14; \
2831 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2832 tf -= (tg*29957 + 16384) >> 15; \
2833 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2834 tj += (tc*19195 + 16384) >> 15; \
2835 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2836 tc += (tj*11585 + 8192) >> 14; \
2837 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2838 tj -= (tc*29957 + 16384) >> 15; \
2839 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
2840 tk += (tb*13573 + 8192) >> 14; \
2841 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
2842 tb -= (tk*11585 + 16384) >> 15; \
2843 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
2844 tk += (tb*13573 + 8192) >> 14; \
2845 \
2846 tf = -tf; \
2847 \
2848 } \
2849 while (0)
2850
2851#define OD_FDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
2852 us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
2853 ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
2854 ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
2855 /* Embedded 64-point orthonormal Type-II fDCT. */ \
2856 do { \
2857 int uwh; \
2858 int uxh; \
2859 int uyh; \
2860 int uzh; \
2861 int uAh; \
2862 int uBh; \
2863 int uCh; \
2864 int uDh; \
2865 int uEh; \
2866 int uFh; \
2867 int uGh; \
2868 int uHh; \
2869 int uIh; \
2870 int uJh; \
2871 int uKh; \
2872 int uLh; \
2873 int uMh; \
2874 int uNh; \
2875 int uOh; \
2876 int uPh; \
2877 int uQh; \
2878 int uRh; \
2879 int uSh; \
2880 int uTh; \
2881 int uUh; \
2882 int uVh; \
2883 int uWh; \
2884 int uXh; \
2885 int uYh; \
2886 int uZh; \
2887 int u_h; \
2888 int uh_; \
2889 u = u0 - u; \
2890 uh_ = OD_DCT_RSHIFT(u, 1); \
2891 u0 -= uh_; \
2892 u_ += u1; \
2893 u_h = OD_DCT_RSHIFT(u_, 1); \
2894 u1 = u_h - u1; \
2895 uZ = u2 - uZ; \
2896 uZh = OD_DCT_RSHIFT(uZ, 1); \
2897 u2 -= uZh; \
2898 uY += u3; \
2899 uYh = OD_DCT_RSHIFT(uY, 1); \
2900 u3 = uYh - u3; \
2901 uX = u4 - uX; \
2902 uXh = OD_DCT_RSHIFT(uX, 1); \
2903 u4 -= uXh; \
2904 uW += u5; \
2905 uWh = OD_DCT_RSHIFT(uW, 1); \
2906 u5 = uWh - u5; \
2907 uV = u6 - uV; \
2908 uVh = OD_DCT_RSHIFT(uV, 1); \
2909 u6 -= uVh; \
2910 uU += u7; \
2911 uUh = OD_DCT_RSHIFT(uU, 1); \
2912 u7 = uUh - u7; \
2913 uT = u8 - uT; \
2914 uTh = OD_DCT_RSHIFT(uT, 1); \
2915 u8 -= uTh; \
2916 uS += u9; \
2917 uSh = OD_DCT_RSHIFT(uS, 1); \
2918 u9 = uSh - u9; \
2919 uR = ua - uR; \
2920 uRh = OD_DCT_RSHIFT(uR, 1); \
2921 ua -= uRh; \
2922 uQ += ub; \
2923 uQh = OD_DCT_RSHIFT(uQ, 1); \
2924 ub = uQh - ub; \
2925 uP = uc - uP; \
2926 uPh = OD_DCT_RSHIFT(uP, 1); \
2927 uc -= uPh; \
2928 uO += ud; \
2929 uOh = OD_DCT_RSHIFT(uO, 1); \
2930 ud = uOh - ud; \
2931 uN = ue - uN; \
2932 uNh = OD_DCT_RSHIFT(uN, 1); \
2933 ue -= uNh; \
2934 uM += uf; \
2935 uMh = OD_DCT_RSHIFT(uM, 1); \
2936 uf = uMh - uf; \
2937 uL = ug - uL; \
2938 uLh = OD_DCT_RSHIFT(uL, 1); \
2939 ug -= uLh; \
2940 uK += uh; \
2941 uKh = OD_DCT_RSHIFT(uK, 1); \
2942 uh = uKh - uh; \
2943 uJ = ui - uJ; \
2944 uJh = OD_DCT_RSHIFT(uJ, 1); \
2945 ui -= uJh; \
2946 uI += uj; \
2947 uIh = OD_DCT_RSHIFT(uI, 1); \
2948 uj = uIh - uj; \
2949 uH = uk - uH; \
2950 uHh = OD_DCT_RSHIFT(uH, 1); \
2951 uk -= uHh; \
2952 uG += ul; \
2953 uGh = OD_DCT_RSHIFT(uG, 1); \
2954 ul = uGh - ul; \
2955 uF = um - uF; \
2956 uFh = OD_DCT_RSHIFT(uF, 1); \
2957 um -= uFh; \
2958 uE += un; \
2959 uEh = OD_DCT_RSHIFT(uE, 1); \
2960 un = uEh - un; \
2961 uD = uo - uD; \
2962 uDh = OD_DCT_RSHIFT(uD, 1); \
2963 uo -= uDh; \
2964 uC += up; \
2965 uCh = OD_DCT_RSHIFT(uC, 1); \
2966 up = uCh - up; \
2967 uB = uq - uB; \
2968 uBh = OD_DCT_RSHIFT(uB, 1); \
2969 uq -= uBh; \
2970 uA += ur; \
2971 uAh = OD_DCT_RSHIFT(uA, 1); \
2972 ur = uAh - ur; \
2973 uz = us - uz; \
2974 uzh = OD_DCT_RSHIFT(uz, 1); \
2975 us -= uzh; \
2976 uy += ut; \
2977 uyh = OD_DCT_RSHIFT(uy, 1); \
2978 ut = uyh - ut; \
2979 ux = uu - ux; \
2980 uxh = OD_DCT_RSHIFT(ux, 1); \
2981 uu -= uxh; \
2982 uw += uv; \
2983 uwh = OD_DCT_RSHIFT(uw, 1); \
2984 uv = uwh - uv; \
2985 OD_FDCT_32_ASYM(u0, uw, uwh, ug, uM, uMh, u8, uE, uEh, uo, uU, uUh, \
2986 u4, uA, uAh, uk, uQ, uQh, uc, uI, uIh, us, uY, uYh, u2, uy, uyh, \
2987 ui, uO, uOh, ua, uG, uGh, uq, uW, uWh, u6, uC, uCh, um, uS, uSh, \
2988 ue, uK, uKh, uu, u_, u_h); \
2989 OD_FDST_32_ASYM(u, uv, uL, uf, uT, un, uD, u7, uX, ur, uH, ub, uP, uj, \
2990 uz, u3, uZ, ut, uJ, ud, uR, ul, uB, u5, uV, up, uF, u9, uN, uh, ux, u1); \
2991 } \
2992 while (0)
2993
2994#define OD_IDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
2995 us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
2996 ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
2997 ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
2998 /* Embedded 64-point orthonormal Type-II fDCT. */ \
2999 do { \
3000 int u1h; \
3001 int u3h; \
3002 int u5h; \
3003 int u7h; \
3004 int u9h; \
3005 int ubh; \
3006 int udh; \
3007 int ufh; \
3008 int uhh; \
3009 int ujh; \
3010 int ulh; \
3011 int unh; \
3012 int uph; \
3013 int urh; \
3014 int uth; \
3015 int uvh; \
3016 int uxh; \
3017 int uzh; \
3018 int uBh; \
3019 int uDh; \
3020 int uFh; \
3021 int uHh; \
3022 int uJh; \
3023 int uLh; \
3024 int uNh; \
3025 int uPh; \
3026 int uRh; \
3027 int uTh; \
3028 int uVh; \
3029 int uXh; \
3030 int uZh; \
3031 int uh_; \
3032 OD_IDST_32_ASYM(u, uL, uT, uD, uX, uH, uP, uz, uZ, uJ, uR, uB, uV, uF, \
3033 uN, ux, u_, uK, uS, uC, uW, uG, uO, uy, uY, uI, uQ, uA, uU, uE, uM, uw); \
3034 OD_IDCT_32_ASYM(u0, ug, u8, uo, u4, uk, uc, us, u2, ui, ua, uq, u6, um, \
3035 ue, uu, u1, u1h, uh, uhh, u9, u9h, up, uph, u5, u5h, ul, ulh, ud, udh, \
3036 ut, uth, u3, u3h, uj, ujh, ub, ubh, ur, urh, u7, u7h, un, unh, uf, ufh, \
3037 uv, uvh); \
3038 uh_ = OD_DCT_RSHIFT(u, 1); \
3039 u0 += uh_; \
3040 u = u0 - u; \
3041 u_ = u1h - u_; \
3042 u1 -= u_; \
3043 uZh = OD_DCT_RSHIFT(uZ, 1); \
3044 u2 += uZh; \
3045 uZ = u2 - uZ; \
3046 uY = u3h - uY; \
3047 u3 -= uY; \
3048 uXh = OD_DCT_RSHIFT(uX, 1); \
3049 u4 += uXh; \
3050 uX = u4 - uX; \
3051 uW = u5h - uW; \
3052 u5 -= uW; \
3053 uVh = OD_DCT_RSHIFT(uV, 1); \
3054 u6 += uVh; \
3055 uV = u6 - uV; \
3056 uU = u7h - uU; \
3057 u7 -= uU; \
3058 uTh = OD_DCT_RSHIFT(uT, 1); \
3059 u8 += uTh; \
3060 uT = u8 - uT; \
3061 uS = u9h - uS; \
3062 u9 -= uS; \
3063 uRh = OD_DCT_RSHIFT(uR, 1); \
3064 ua += uRh; \
3065 uR = ua - uR; \
3066 uQ = ubh - uQ; \
3067 ub -= uQ; \
3068 uPh = OD_DCT_RSHIFT(uP, 1); \
3069 uc += uPh; \
3070 uP = uc - uP; \
3071 uO = udh - uO; \
3072 ud -= uO; \
3073 uNh = OD_DCT_RSHIFT(uN, 1); \
3074 ue += uNh; \
3075 uN = ue - uN; \
3076 uM = ufh - uM; \
3077 uf -= uM; \
3078 uLh = OD_DCT_RSHIFT(uL, 1); \
3079 ug += uLh; \
3080 uL = ug - uL; \
3081 uK = uhh - uK; \
3082 uh -= uK; \
3083 uJh = OD_DCT_RSHIFT(uJ, 1); \
3084 ui += uJh; \
3085 uJ = ui - uJ; \
3086 uI = ujh - uI; \
3087 uj -= uI; \
3088 uHh = OD_DCT_RSHIFT(uH, 1); \
3089 uk += uHh; \
3090 uH = uk - uH; \
3091 uG = ulh - uG; \
3092 ul -= uG; \
3093 uFh = OD_DCT_RSHIFT(uF, 1); \
3094 um += uFh; \
3095 uF = um - uF; \
3096 uE = unh - uE; \
3097 un -= uE; \
3098 uDh = OD_DCT_RSHIFT(uD, 1); \
3099 uo += uDh; \
3100 uD = uo - uD; \
3101 uC = uph - uC; \
3102 up -= uC; \
3103 uBh = OD_DCT_RSHIFT(uB, 1); \
3104 uq += uBh; \
3105 uB = uq - uB; \
3106 uA = urh - uA; \
3107 ur -= uA; \
3108 uzh = OD_DCT_RSHIFT(uz, 1); \
3109 us += uzh; \
3110 uz = us - uz; \
3111 uy = uth - uy; \
3112 ut -= uy; \
3113 uxh = OD_DCT_RSHIFT(ux, 1); \
3114 uu += uxh; \
3115 ux = uu - ux; \
3116 uw = uvh - uw; \
3117 uv -= uw; \
3118 } while (0)
3119#endif
3120
Nathan E. Egge945176a2017-10-20 21:37:58 -04003121/* 4-point orthonormal Type-II fDCT. */
Monty Montgomery02078a32017-07-11 21:22:29 -04003122void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) {
Nathan E. Egge945176a2017-10-20 21:37:58 -04003123 /* 4 "muls", 8 adds, 2 shifts */
Monty Montgomery02078a32017-07-11 21:22:29 -04003124 int q0;
3125 int q1;
3126 int q2;
3127 int q3;
Nathan E. Egge945176a2017-10-20 21:37:58 -04003128 int u1;
3129 int t0;
3130 int t1;
3131 int t2;
3132 int t3;
Monty Montgomery02078a32017-07-11 21:22:29 -04003133 q0 = x[0*xstride];
Nathan E. Egge945176a2017-10-20 21:37:58 -04003134 q1 = x[1*xstride];
3135 q2 = x[2*xstride];
Monty Montgomery02078a32017-07-11 21:22:29 -04003136 q3 = x[3*xstride];
Nathan E. Egge945176a2017-10-20 21:37:58 -04003137 q3 = q0 - q3;
3138 q0 -= OD_DCT_RSHIFT(q3, 1);
3139 u1 = q1 + q2;
3140 q2 = q1 - q2;
3141 /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
3142 t0 = (q3*8867 + 16384) >> 15;
3143 /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
3144 t1 = (q2*21407 + 16384) >> 15;
3145 /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
3146 t2 = (q3*21407 + 16384) >> 15;
3147 /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
3148 t3 = (q2*8867 + 16384) >> 15;
3149 q0 += OD_DCT_RSHIFT(u1, 1);
3150 q1 = q0 - u1;
3151 q2 = t3 + t2;
3152 q3 = t0 - t1;
3153 y[0] = q0;
3154 y[1] = q2;
3155 y[2] = q1;
3156 y[3] = q3;
Monty Montgomery02078a32017-07-11 21:22:29 -04003157}
3158
Nathan E. Egge945176a2017-10-20 21:37:58 -04003159/* 4-point orthonormal Type-II iDCT. */
Monty Montgomery02078a32017-07-11 21:22:29 -04003160void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) {
Nathan E. Egge945176a2017-10-20 21:37:58 -04003161 /* 4 "muls", 8 adds, 1 shift */
Monty Montgomery02078a32017-07-11 21:22:29 -04003162 int q0;
3163 int q1;
3164 int q2;
3165 int q3;
Nathan E. Egge945176a2017-10-20 21:37:58 -04003166 int q1h;
3167 int u0;
3168 int t0;
3169 int t1;
3170 int t2;
3171 int t3;
Monty Montgomery02078a32017-07-11 21:22:29 -04003172 q0 = y[0];
3173 q2 = y[1];
3174 q1 = y[2];
3175 q3 = y[3];
Nathan E. Egge945176a2017-10-20 21:37:58 -04003176 /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
3177 t0 = (q3*8867 + 16384) >> 15;
3178 /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
3179 t1 = (q2*21407 + 16384) >> 15;
3180 /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
3181 t2 = (q3*21407 + 16384) >> 15;
3182 /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
3183 t3 = (q2*8867 + 16384) >> 15;
3184 q3 = t0 + t1;
3185 q2 = t3 - t2;
3186 q1 = q0 - q1;
3187 q1h = OD_DCT_RSHIFT(q1, 1);
3188 q0 -= q1h;
3189 u0 = q0 + q3;
3190 q3 = q0 - q3;
3191 q2 = q1h - q2;
3192 q1 -= q2;
3193 x[0*xstride] = u0;
Monty Montgomery02078a32017-07-11 21:22:29 -04003194 x[1*xstride] = q1;
3195 x[2*xstride] = q2;
3196 x[3*xstride] = q3;
3197}
Monty Montgomerycf18fe42017-07-11 21:33:25 -04003198
Monty Montgomery573cf252017-08-02 05:45:14 -04003199void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride) {
3200 int q0;
3201 int q1;
3202 int q2;
3203 int q3;
Nathan Egge5a5e1ad2017-09-12 12:33:48 +00003204 q0 = x[3*xstride];
Nathan E. Egge72c99e12017-08-21 17:35:04 -04003205 q2 = x[2*xstride];
Nathan Egge5a5e1ad2017-09-12 12:33:48 +00003206 q1 = x[1*xstride];
3207 q3 = x[0*xstride];
3208 OD_FDST_4(q0, q2, q1, q3);
3209 y[0] = (od_coeff)q3;
3210 y[1] = (od_coeff)q2;
3211 y[2] = (od_coeff)q1;
3212 y[3] = (od_coeff)q0;
Monty Montgomery573cf252017-08-02 05:45:14 -04003213}
3214
3215void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]) {
3216 int q0;
3217 int q1;
3218 int q2;
3219 int q3;
Nathan Egge5a5e1ad2017-09-12 12:33:48 +00003220 q0 = y[3];
Nathan E. Egge72c99e12017-08-21 17:35:04 -04003221 q2 = y[2];
Nathan Egge5a5e1ad2017-09-12 12:33:48 +00003222 q1 = y[1];
3223 q3 = y[0];
3224 OD_IDST_4(q0, q2, q1, q3);
3225 x[0*xstride] = q3;
3226 x[1*xstride] = q2;
3227 x[2*xstride] = q1;
3228 x[3*xstride] = q0;
Monty Montgomery573cf252017-08-02 05:45:14 -04003229}
3230
Monty Montgomerycf18fe42017-07-11 21:33:25 -04003231void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride) {
3232 int r0;
3233 int r1;
3234 int r2;
3235 int r3;
3236 int r4;
3237 int r5;
3238 int r6;
3239 int r7;
3240 r0 = x[0*xstride];
3241 r4 = x[1*xstride];
3242 r2 = x[2*xstride];
3243 r6 = x[3*xstride];
3244 r1 = x[4*xstride];
3245 r5 = x[5*xstride];
3246 r3 = x[6*xstride];
3247 r7 = x[7*xstride];
3248 OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7);
3249 y[0] = (od_coeff)r0;
3250 y[1] = (od_coeff)r1;
3251 y[2] = (od_coeff)r2;
3252 y[3] = (od_coeff)r3;
3253 y[4] = (od_coeff)r4;
3254 y[5] = (od_coeff)r5;
3255 y[6] = (od_coeff)r6;
3256 y[7] = (od_coeff)r7;
3257}
3258
3259void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]) {
3260 int r0;
3261 int r1;
3262 int r2;
3263 int r3;
3264 int r4;
3265 int r5;
3266 int r6;
3267 int r7;
3268 r0 = y[0];
3269 r4 = y[1];
3270 r2 = y[2];
3271 r6 = y[3];
3272 r1 = y[4];
3273 r5 = y[5];
3274 r3 = y[6];
3275 r7 = y[7];
3276 OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7);
3277 x[0*xstride] = (od_coeff)r0;
3278 x[1*xstride] = (od_coeff)r1;
3279 x[2*xstride] = (od_coeff)r2;
3280 x[3*xstride] = (od_coeff)r3;
3281 x[4*xstride] = (od_coeff)r4;
3282 x[5*xstride] = (od_coeff)r5;
3283 x[6*xstride] = (od_coeff)r6;
3284 x[7*xstride] = (od_coeff)r7;
3285}
3286
Nathan E. Eggeefb44bb2017-10-22 05:42:06 -04003287const int OD_DST_8_PERM[8] = { 0, 7, 1, 6, 2, 5, 3, 4 };
3288
3289/* Computes the Polynomial Product Y(z) ≡ X(z)*H(z) modulo (z^8 + 1) using
3290 Nussbaumer's "short" algorithm [1].
3291 The monomial coefficients in Y(z) are exactly the values of an acyclic
3292 convolution of the monomial coefficients of X(z) and H(z).
3293 Since H(z) is fixed, the multiplication terms are constant and precomputed.
3294
3295 [1] Nussbaumer, Henri J. "Fast Fourier Transform and Convolution Algorithms"
3296 Springer-Verlag: Berlin, Heidelberg, New York (1981) pages 76-78. */
3297static void od_poly_prod_8(od_coeff y[8], const od_coeff x[8]) {
3298 /* 21 "muls", 75 adds, 18 shifts */
3299 od_coeff q0;
3300 od_coeff q1;
3301 od_coeff q2;
3302 od_coeff q3;
3303 od_coeff q4;
3304 od_coeff q5;
3305 od_coeff q6;
3306 od_coeff q7;
3307 od_coeff q8;
3308 od_coeff q9;
3309 od_coeff q10;
3310 od_coeff q11;
3311 od_coeff q12;
3312 od_coeff q13;
3313 od_coeff q14;
3314 od_coeff q15;
3315 od_coeff q16;
3316 od_coeff q17;
3317 od_coeff q18;
3318 od_coeff q19;
3319 od_coeff q20;
3320 od_coeff t0;
3321 od_coeff t1;
3322 od_coeff t2;
3323 od_coeff t3;
3324 od_coeff t4;
3325 od_coeff t5;
3326 od_coeff t6;
3327 od_coeff t7;
3328 od_coeff u0;
3329 od_coeff u1;
3330 od_coeff u1h;
3331 od_coeff u2;
3332 od_coeff u2h;
3333 od_coeff u3;
3334 od_coeff u4;
3335 od_coeff u4h;
3336 od_coeff u5;
3337 od_coeff u6;
3338 od_coeff u7;
3339 od_coeff u7h;
3340 od_coeff u8;
3341 od_coeff u9;
3342 od_coeff u10;
3343 od_coeff u11;
3344 od_coeff u12;
3345 od_coeff u13;
3346 od_coeff u14;
3347 od_coeff u15;
3348 od_coeff u16;
3349 od_coeff u17;
3350 od_coeff u18;
3351 od_coeff u19;
3352 od_coeff u20;
3353 od_coeff u21;
3354 od_coeff u22;
3355 od_coeff u23;
3356 od_coeff u24;
3357 od_coeff u25;
3358 od_coeff u26;
3359 od_coeff u27;
3360 t0 = x[0];
3361 t1 = x[1];
3362 t2 = x[2];
3363 t3 = x[3];
3364 t4 = x[4];
3365 t5 = x[5];
3366 t6 = x[6];
3367 t7 = x[7];
3368 /* Stage 0 Butterfly */
3369 u7 = t0 - t7;
3370 u7h = OD_DCT_RSHIFT(u7, 1);
3371 u0 = t0 - u7h;
3372 u2 = t2 - t6;
3373 u2h = OD_DCT_RSHIFT(u2, 1);
3374 u6 = t2 - u2h;
3375 u4 = t4 + t5;
3376 u4h = OD_DCT_RSHIFT(u4, 1);
3377 u5 = t4 - u4h;
3378 u1 = t3 - t1;
3379 u1h = OD_DCT_RSHIFT(u1, 1);
3380 u3 = t3 - u1h;
3381 /* Stage 1 Butterfly */
3382 q0 = u0 + u2h;
3383 q1 = q0 - u2;
3384 q4 = u3 + u4h;
3385 q5 = q4 - u4;
3386 q2 = u7h + u5;
3387 q7 = u7 - q2;
3388 q6 = u1h + u6;
3389 q3 = u1 - q6;
3390 /* Stage 2 Half-Butterfly */
3391 /*The intermediate sums can overflow 16 bits, but all SIMD instruction sets
3392 should be able to compute them without issue (i.e., using PAVGW or
3393 V{R}HADD.S16).*/
3394 q8 = (q0 + q4 + 1) >> 1;
3395 q9 = (q1 + q5) >> 1;
3396 q10 = (q2 + q3 + 1) >> 1;
3397 q11 = (q7 + q6) >> 1;
3398 /* Stage 3 */
3399 q12 = t0 + t3;
3400 q13 = t0;
3401 q14 = t3;
3402 q15 = t5 - t6;
3403 q16 = t6;
3404 q17 = t5;
3405 q18 = ((q6 + ((t0 + t6 + 1) >> 1)) - (q4 + (t5 >> 1))) >> 1;
3406 q19 = ((q7 + ((t5 + t6 + 1) >> 1)) - (q0 + (t3 >> 1))) >> 1;
3407 q20 = (q18 - q19) >> 1;
3408 /* Stage 4 */
3409 q0 = (-5995*q0 + 8192) >> 14;
3410 q1 = (-1373*q1 + 4096) >> 13;
3411 q2 = (22891*q2 + 16384) >> 15;
3412 q3 = (-217*q3 + 512) >> 10;
3413 q4 = (13427*q4 + 16384) >> 15;
3414 q5 = (-11013*q5 + 8192) >> 14;
3415 q6 = (1373*q6 + 1024) >> 11;
3416 q7 = (-14077*q7 + 16384) >> 15;
3417 q8 = (-1437*q8 + 16384) >> 15;
3418 q9 = (27519*q9 + 16384) >> 15;
3419 q10 = (-15947*q10 + 16384) >> 15;
3420 q11 = (-7891*q11 + 16384) >> 15;
3421 q12 = (4897*q12 + 16384) >> 15;
3422 q13 = (-5079*q13 + 8192) >> 14;
3423 q14 = (365*q14 + 16384) >> 15;
3424 q15 = (3325*q15 + 8192) >> 14;
3425 q16 = (-5225*q16 + 8192) >> 14;
3426 q17 = (-1425*q17 + 8192) >> 14;
3427 q18 = (3453*q18 + 16384) >> 15;
3428 q19 = (-8421*q19 + 8192) >> 14;
3429 q20 = (-20295*q20 + 16384) >> 15;
3430 /* Stage 5 */
3431 u0 = q0 + q8;
3432 u1 = q1 + q9;
3433 u2 = q2 + q10;
3434 u3 = q3 + q10;
3435 u4 = q4 + q8;
3436 u5 = q5 + q9;
3437 u6 = q6 + q11;
3438 u7 = q7 + q11;
3439 /* Stage 6 */
3440 u10 = u0 + u1;
3441 u11 = u0 - u1;
3442 u12 = u2 + u7;
3443 u13 = u2 - u7;
3444 u14 = u3 + u6;
3445 u15 = u3 - u6;
3446 u16 = u5 + u4;
3447 u17 = u5 - u4;
3448 /* Stage 7 */
3449 u8 = q19 + q20;
3450 u9 = q19 - q18;
3451 u18 = q12 + u8;
3452 u19 = u18 + q13;
3453 u20 = u18 + q14;
3454 u21 = u9 << 1;
3455 u22 = q15 + u21;
3456 u23 = q16 - u22;
3457 u24 = u22 + q17;
3458 u25 = u8 << 1;
3459 u26 = u25 << 1;
3460 u27 = u25 - u9;
3461 /* Stage 8 */
3462 y[0] = u14 + u16 + u20;
3463 y[1] = u12 - u10 - u25;
3464 y[2] = u9 + u13 - u17;
3465 y[3] = u9 - u10 - u12 - u19;
3466 y[4] = u15 - u11 - u27;
3467 y[5] = u23 - u11 - u15;
3468 y[6] = u13 + u17 - u24 + u26;
3469 y[7] = u16 - u14 + u21 - u25;
3470}
3471
Monty Montgomerycf18fe42017-07-11 21:33:25 -04003472void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride) {
Nathan E. Eggeefb44bb2017-10-22 05:42:06 -04003473 int i;
3474 od_coeff xp[8];
3475 od_coeff yp[8];
3476 for (i = 0; i < 8; i++) xp[i] = x[i*xstride];
3477 od_poly_prod_8(yp, xp);
3478 for (i = 0; i < 8; i++) y[OD_DST_8_PERM[i]] = yp[i];
Monty Montgomerycf18fe42017-07-11 21:33:25 -04003479}
3480
3481void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]) {
Nathan E. Eggeefb44bb2017-10-22 05:42:06 -04003482 int i;
3483 od_coeff xp[8];
3484 od_coeff yp[8];
3485 for (i = 0; i < 8; i++) yp[i] = y[OD_DST_8_PERM[i]];
3486 od_poly_prod_8(xp, yp);
3487 for (i = 0; i < 8; i++) x[i*xstride] = xp[i];
Monty Montgomerycf18fe42017-07-11 21:33:25 -04003488}
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003489
3490void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride) {
3491 int s0;
3492 int s1;
3493 int s2;
3494 int s3;
3495 int s4;
3496 int s5;
3497 int s6;
3498 int s7;
3499 int s8;
3500 int s9;
3501 int sa;
3502 int sb;
3503 int sc;
3504 int sd;
3505 int se;
3506 int sf;
3507 s0 = x[0*xstride];
3508 s8 = x[1*xstride];
3509 s4 = x[2*xstride];
3510 sc = x[3*xstride];
3511 s2 = x[4*xstride];
3512 sa = x[5*xstride];
3513 s6 = x[6*xstride];
3514 se = x[7*xstride];
3515 s1 = x[8*xstride];
3516 s9 = x[9*xstride];
3517 s5 = x[10*xstride];
3518 sd = x[11*xstride];
3519 s3 = x[12*xstride];
3520 sb = x[13*xstride];
3521 s7 = x[14*xstride];
3522 sf = x[15*xstride];
3523 OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3524 y[0] = (od_coeff)s0;
3525 y[1] = (od_coeff)s1;
3526 y[2] = (od_coeff)s2;
3527 y[3] = (od_coeff)s3;
3528 y[4] = (od_coeff)s4;
3529 y[5] = (od_coeff)s5;
3530 y[6] = (od_coeff)s6;
3531 y[7] = (od_coeff)s7;
3532 y[8] = (od_coeff)s8;
3533 y[9] = (od_coeff)s9;
3534 y[10] = (od_coeff)sa;
3535 y[11] = (od_coeff)sb;
3536 y[12] = (od_coeff)sc;
3537 y[13] = (od_coeff)sd;
3538 y[14] = (od_coeff)se;
3539 y[15] = (od_coeff)sf;
3540}
3541
3542void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]) {
3543 int s0;
3544 int s1;
3545 int s2;
3546 int s3;
3547 int s4;
3548 int s5;
3549 int s6;
3550 int s7;
3551 int s8;
3552 int s9;
3553 int sa;
3554 int sb;
3555 int sc;
3556 int sd;
3557 int se;
3558 int sf;
3559 s0 = y[0];
3560 s8 = y[1];
3561 s4 = y[2];
3562 sc = y[3];
3563 s2 = y[4];
3564 sa = y[5];
3565 s6 = y[6];
3566 se = y[7];
3567 s1 = y[8];
3568 s9 = y[9];
3569 s5 = y[10];
3570 sd = y[11];
3571 s3 = y[12];
3572 sb = y[13];
3573 s7 = y[14];
3574 sf = y[15];
3575 OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3576 x[0*xstride] = (od_coeff)s0;
3577 x[1*xstride] = (od_coeff)s1;
3578 x[2*xstride] = (od_coeff)s2;
3579 x[3*xstride] = (od_coeff)s3;
3580 x[4*xstride] = (od_coeff)s4;
3581 x[5*xstride] = (od_coeff)s5;
3582 x[6*xstride] = (od_coeff)s6;
3583 x[7*xstride] = (od_coeff)s7;
3584 x[8*xstride] = (od_coeff)s8;
3585 x[9*xstride] = (od_coeff)s9;
3586 x[10*xstride] = (od_coeff)sa;
3587 x[11*xstride] = (od_coeff)sb;
3588 x[12*xstride] = (od_coeff)sc;
3589 x[13*xstride] = (od_coeff)sd;
3590 x[14*xstride] = (od_coeff)se;
3591 x[15*xstride] = (od_coeff)sf;
3592}
3593
3594void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride) {
3595 int s0;
3596 int s1;
3597 int s2;
3598 int s3;
3599 int s4;
3600 int s5;
3601 int s6;
3602 int s7;
3603 int s8;
3604 int s9;
3605 int sa;
3606 int sb;
3607 int sc;
3608 int sd;
3609 int se;
3610 int sf;
3611 s0 = x[15*xstride];
3612 s8 = x[14*xstride];
3613 s4 = x[13*xstride];
3614 sc = x[12*xstride];
3615 s2 = x[11*xstride];
3616 sa = x[10*xstride];
3617 s6 = x[9*xstride];
3618 se = x[8*xstride];
3619 s1 = x[7*xstride];
3620 s9 = x[6*xstride];
3621 s5 = x[5*xstride];
3622 sd = x[4*xstride];
3623 s3 = x[3*xstride];
3624 sb = x[2*xstride];
3625 s7 = x[1*xstride];
3626 sf = x[0*xstride];
3627 OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3628 y[0] = (od_coeff)sf;
Nathan E. Egge69a16432017-10-18 12:50:28 -04003629 y[1] = (od_coeff)-se;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003630 y[2] = (od_coeff)sd;
Nathan E. Egge69a16432017-10-18 12:50:28 -04003631 y[3] = (od_coeff)-sc;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003632 y[4] = (od_coeff)sb;
Nathan E. Egge69a16432017-10-18 12:50:28 -04003633 y[5] = (od_coeff)-sa;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003634 y[6] = (od_coeff)s9;
Nathan E. Egge69a16432017-10-18 12:50:28 -04003635 y[7] = (od_coeff)-s8;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003636 y[8] = (od_coeff)s7;
Nathan E. Egge69a16432017-10-18 12:50:28 -04003637 y[9] = (od_coeff)-s6;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003638 y[10] = (od_coeff)s5;
Nathan E. Egge69a16432017-10-18 12:50:28 -04003639 y[11] = (od_coeff)-s4;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003640 y[12] = (od_coeff)s3;
Nathan E. Egge69a16432017-10-18 12:50:28 -04003641 y[13] = (od_coeff)-s2;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003642 y[14] = (od_coeff)s1;
Nathan E. Egge69a16432017-10-18 12:50:28 -04003643 y[15] = (od_coeff)-s0;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003644}
3645
3646void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]) {
3647 int s0;
3648 int s1;
3649 int s2;
3650 int s3;
3651 int s4;
3652 int s5;
3653 int s6;
3654 int s7;
3655 int s8;
3656 int s9;
3657 int sa;
3658 int sb;
3659 int sc;
3660 int sd;
3661 int se;
3662 int sf;
Nathan E. Egge69a16432017-10-18 12:50:28 -04003663 s0 = -y[15];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003664 s8 = y[14];
Nathan E. Egge69a16432017-10-18 12:50:28 -04003665 s4 = -y[13];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003666 sc = y[12];
Nathan E. Egge69a16432017-10-18 12:50:28 -04003667 s2 = -y[11];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003668 sa = y[10];
Nathan E. Egge69a16432017-10-18 12:50:28 -04003669 s6 = -y[9];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003670 se = y[8];
Nathan E. Egge69a16432017-10-18 12:50:28 -04003671 s1 = -y[7];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003672 s9 = y[6];
Nathan E. Egge69a16432017-10-18 12:50:28 -04003673 s5 = -y[5];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003674 sd = y[4];
Nathan E. Egge69a16432017-10-18 12:50:28 -04003675 s3 = -y[3];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003676 sb = y[2];
Nathan E. Egge69a16432017-10-18 12:50:28 -04003677 s7 = -y[1];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003678 sf = y[0];
3679 OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3680 x[0*xstride] = (od_coeff)sf;
3681 x[1*xstride] = (od_coeff)se;
3682 x[2*xstride] = (od_coeff)sd;
3683 x[3*xstride] = (od_coeff)sc;
3684 x[4*xstride] = (od_coeff)sb;
3685 x[5*xstride] = (od_coeff)sa;
3686 x[6*xstride] = (od_coeff)s9;
3687 x[7*xstride] = (od_coeff)s8;
3688 x[8*xstride] = (od_coeff)s7;
3689 x[9*xstride] = (od_coeff)s6;
3690 x[10*xstride] = (od_coeff)s5;
3691 x[11*xstride] = (od_coeff)s4;
3692 x[12*xstride] = (od_coeff)s3;
3693 x[13*xstride] = (od_coeff)s2;
3694 x[14*xstride] = (od_coeff)s1;
3695 x[15*xstride] = (od_coeff)s0;
3696}
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04003697
3698void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride) {
3699 /*215 adds, 38 shifts, 87 "muls".*/
3700 int t0;
3701 int t1;
3702 int t2;
3703 int t3;
3704 int t4;
3705 int t5;
3706 int t6;
3707 int t7;
3708 int t8;
3709 int t9;
3710 int ta;
3711 int tb;
3712 int tc;
3713 int td;
3714 int te;
3715 int tf;
3716 int tg;
3717 int th;
3718 int ti;
3719 int tj;
3720 int tk;
3721 int tl;
3722 int tm;
3723 int tn;
3724 int to;
3725 int tp;
3726 int tq;
3727 int tr;
3728 int ts;
3729 int tt;
3730 int tu;
3731 int tv;
3732 t0 = x[0*xstride];
3733 tg = x[1*xstride];
3734 t8 = x[2*xstride];
3735 to = x[3*xstride];
3736 t4 = x[4*xstride];
3737 tk = x[5*xstride];
3738 tc = x[6*xstride];
3739 ts = x[7*xstride];
3740 t2 = x[8*xstride];
3741 ti = x[9*xstride];
3742 ta = x[10*xstride];
3743 tq = x[11*xstride];
3744 t6 = x[12*xstride];
3745 tm = x[13*xstride];
3746 te = x[14*xstride];
3747 tu = x[15*xstride];
3748 t1 = x[16*xstride];
3749 th = x[17*xstride];
3750 t9 = x[18*xstride];
3751 tp = x[19*xstride];
3752 t5 = x[20*xstride];
3753 tl = x[21*xstride];
3754 td = x[22*xstride];
3755 tt = x[23*xstride];
3756 t3 = x[24*xstride];
3757 tj = x[25*xstride];
3758 tb = x[26*xstride];
3759 tr = x[27*xstride];
3760 t7 = x[28*xstride];
3761 tn = x[29*xstride];
3762 tf = x[30*xstride];
3763 tv = x[31*xstride];
3764 OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
3765 t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
3766 y[0] = (od_coeff)t0;
3767 y[1] = (od_coeff)t1;
3768 y[2] = (od_coeff)t2;
3769 y[3] = (od_coeff)t3;
3770 y[4] = (od_coeff)t4;
3771 y[5] = (od_coeff)t5;
3772 y[6] = (od_coeff)t6;
3773 y[7] = (od_coeff)t7;
3774 y[8] = (od_coeff)t8;
3775 y[9] = (od_coeff)t9;
3776 y[10] = (od_coeff)ta;
3777 y[11] = (od_coeff)tb;
3778 y[12] = (od_coeff)tc;
3779 y[13] = (od_coeff)td;
3780 y[14] = (od_coeff)te;
3781 y[15] = (od_coeff)tf;
3782 y[16] = (od_coeff)tg;
3783 y[17] = (od_coeff)th;
3784 y[18] = (od_coeff)ti;
3785 y[19] = (od_coeff)tj;
3786 y[20] = (od_coeff)tk;
3787 y[21] = (od_coeff)tl;
3788 y[22] = (od_coeff)tm;
3789 y[23] = (od_coeff)tn;
3790 y[24] = (od_coeff)to;
3791 y[25] = (od_coeff)tp;
3792 y[26] = (od_coeff)tq;
3793 y[27] = (od_coeff)tr;
3794 y[28] = (od_coeff)ts;
3795 y[29] = (od_coeff)tt;
3796 y[30] = (od_coeff)tu;
3797 y[31] = (od_coeff)tv;
3798}
3799
3800void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]) {
3801 int t0;
3802 int t1;
3803 int t2;
3804 int t3;
3805 int t4;
3806 int t5;
3807 int t6;
3808 int t7;
3809 int t8;
3810 int t9;
3811 int ta;
3812 int tb;
3813 int tc;
3814 int td;
3815 int te;
3816 int tf;
3817 int tg;
3818 int th;
3819 int ti;
3820 int tj;
3821 int tk;
3822 int tl;
3823 int tm;
3824 int tn;
3825 int to;
3826 int tp;
3827 int tq;
3828 int tr;
3829 int ts;
3830 int tt;
3831 int tu;
3832 int tv;
3833 t0 = y[0];
3834 tg = y[1];
3835 t8 = y[2];
3836 to = y[3];
3837 t4 = y[4];
3838 tk = y[5];
3839 tc = y[6];
3840 ts = y[7];
3841 t2 = y[8];
3842 ti = y[9];
3843 ta = y[10];
3844 tq = y[11];
3845 t6 = y[12];
3846 tm = y[13];
3847 te = y[14];
3848 tu = y[15];
3849 t1 = y[16];
3850 th = y[17];
3851 t9 = y[18];
3852 tp = y[19];
3853 t5 = y[20];
3854 tl = y[21];
3855 td = y[22];
3856 tt = y[23];
3857 t3 = y[24];
3858 tj = y[25];
3859 tb = y[26];
3860 tr = y[27];
3861 t7 = y[28];
3862 tn = y[29];
3863 tf = y[30];
3864 tv = y[31];
3865 OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
3866 t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
3867 x[0*xstride] = (od_coeff)t0;
3868 x[1*xstride] = (od_coeff)t1;
3869 x[2*xstride] = (od_coeff)t2;
3870 x[3*xstride] = (od_coeff)t3;
3871 x[4*xstride] = (od_coeff)t4;
3872 x[5*xstride] = (od_coeff)t5;
3873 x[6*xstride] = (od_coeff)t6;
3874 x[7*xstride] = (od_coeff)t7;
3875 x[8*xstride] = (od_coeff)t8;
3876 x[9*xstride] = (od_coeff)t9;
3877 x[10*xstride] = (od_coeff)ta;
3878 x[11*xstride] = (od_coeff)tb;
3879 x[12*xstride] = (od_coeff)tc;
3880 x[13*xstride] = (od_coeff)td;
3881 x[14*xstride] = (od_coeff)te;
3882 x[15*xstride] = (od_coeff)tf;
3883 x[16*xstride] = (od_coeff)tg;
3884 x[17*xstride] = (od_coeff)th;
3885 x[18*xstride] = (od_coeff)ti;
3886 x[19*xstride] = (od_coeff)tj;
3887 x[20*xstride] = (od_coeff)tk;
3888 x[21*xstride] = (od_coeff)tl;
3889 x[22*xstride] = (od_coeff)tm;
3890 x[23*xstride] = (od_coeff)tn;
3891 x[24*xstride] = (od_coeff)to;
3892 x[25*xstride] = (od_coeff)tp;
3893 x[26*xstride] = (od_coeff)tq;
3894 x[27*xstride] = (od_coeff)tr;
3895 x[28*xstride] = (od_coeff)ts;
3896 x[29*xstride] = (od_coeff)tt;
3897 x[30*xstride] = (od_coeff)tu;
3898 x[31*xstride] = (od_coeff)tv;
3899}
Monty Montgomerya4e245a2017-07-22 00:48:31 -04003900
3901#if CONFIG_TX64X64
3902void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride) {
3903 int t0;
3904 int t1;
3905 int t2;
3906 int t3;
3907 int t4;
3908 int t5;
3909 int t6;
3910 int t7;
3911 int t8;
3912 int t9;
3913 int ta;
3914 int tb;
3915 int tc;
3916 int td;
3917 int te;
3918 int tf;
3919 int tg;
3920 int th;
3921 int ti;
3922 int tj;
3923 int tk;
3924 int tl;
3925 int tm;
3926 int tn;
3927 int to;
3928 int tp;
3929 int tq;
3930 int tr;
3931 int ts;
3932 int tt;
3933 int tu;
3934 int tv;
3935 int tw;
3936 int tx;
3937 int ty;
3938 int tz;
3939 int tA;
3940 int tB;
3941 int tC;
3942 int tD;
3943 int tE;
3944 int tF;
3945 int tG;
3946 int tH;
3947 int tI;
3948 int tJ;
3949 int tK;
3950 int tL;
3951 int tM;
3952 int tN;
3953 int tO;
3954 int tP;
3955 int tQ;
3956 int tR;
3957 int tS;
3958 int tT;
3959 int tU;
3960 int tV;
3961 int tW;
3962 int tX;
3963 int tY;
3964 int tZ;
3965 int t_;
3966 int t;
3967 t0 = x[0*xstride];
3968 tw = x[1*xstride];
3969 tg = x[2*xstride];
3970 tM = x[3*xstride];
3971 t8 = x[4*xstride];
3972 tE = x[5*xstride];
3973 to = x[6*xstride];
3974 tU = x[7*xstride];
3975 t4 = x[8*xstride];
3976 tA = x[9*xstride];
3977 tk = x[10*xstride];
3978 tQ = x[11*xstride];
3979 tc = x[12*xstride];
3980 tI = x[13*xstride];
3981 ts = x[14*xstride];
3982 tY = x[15*xstride];
3983 t2 = x[16*xstride];
3984 ty = x[17*xstride];
3985 ti = x[18*xstride];
3986 tO = x[19*xstride];
3987 ta = x[20*xstride];
3988 tG = x[21*xstride];
3989 tq = x[22*xstride];
3990 tW = x[23*xstride];
3991 t6 = x[24*xstride];
3992 tC = x[25*xstride];
3993 tm = x[26*xstride];
3994 tS = x[27*xstride];
3995 te = x[28*xstride];
3996 tK = x[29*xstride];
3997 tu = x[30*xstride];
3998 t_ = x[31*xstride];
3999 t1 = x[32*xstride];
4000 tx = x[33*xstride];
4001 th = x[34*xstride];
4002 tN = x[35*xstride];
4003 t9 = x[36*xstride];
4004 tF = x[37*xstride];
4005 tp = x[38*xstride];
4006 tV = x[39*xstride];
4007 t5 = x[40*xstride];
4008 tB = x[41*xstride];
4009 tl = x[42*xstride];
4010 tR = x[43*xstride];
4011 td = x[44*xstride];
4012 tJ = x[45*xstride];
4013 tt = x[46*xstride];
4014 tZ = x[47*xstride];
4015 t3 = x[48*xstride];
4016 tz = x[49*xstride];
4017 tj = x[50*xstride];
4018 tP = x[51*xstride];
4019 tb = x[52*xstride];
4020 tH = x[53*xstride];
4021 tr = x[54*xstride];
4022 tX = x[55*xstride];
4023 t7 = x[56*xstride];
4024 tD = x[57*xstride];
4025 tn = x[58*xstride];
4026 tT = x[59*xstride];
4027 tf = x[60*xstride];
4028 tL = x[61*xstride];
4029 tv = x[62*xstride];
4030 t = x[63*xstride];
4031 OD_FDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
4032 t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
4033 th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
4034 tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
4035 y[0] = (od_coeff)t0;
4036 y[1] = (od_coeff)t1;
4037 y[2] = (od_coeff)t2;
4038 y[3] = (od_coeff)t3;
4039 y[4] = (od_coeff)t4;
4040 y[5] = (od_coeff)t5;
4041 y[6] = (od_coeff)t6;
4042 y[7] = (od_coeff)t7;
4043 y[8] = (od_coeff)t8;
4044 y[9] = (od_coeff)t9;
4045 y[10] = (od_coeff)ta;
4046 y[11] = (od_coeff)tb;
4047 y[12] = (od_coeff)tc;
4048 y[13] = (od_coeff)td;
4049 y[14] = (od_coeff)te;
4050 y[15] = (od_coeff)tf;
4051 y[16] = (od_coeff)tg;
4052 y[17] = (od_coeff)th;
4053 y[18] = (od_coeff)ti;
4054 y[19] = (od_coeff)tj;
4055 y[20] = (od_coeff)tk;
4056 y[21] = (od_coeff)tl;
4057 y[22] = (od_coeff)tm;
4058 y[23] = (od_coeff)tn;
4059 y[24] = (od_coeff)to;
4060 y[25] = (od_coeff)tp;
4061 y[26] = (od_coeff)tq;
4062 y[27] = (od_coeff)tr;
4063 y[28] = (od_coeff)ts;
4064 y[29] = (od_coeff)tt;
4065 y[30] = (od_coeff)tu;
4066 y[31] = (od_coeff)tv;
4067 y[32] = (od_coeff)tw;
4068 y[33] = (od_coeff)tx;
4069 y[34] = (od_coeff)ty;
4070 y[35] = (od_coeff)tz;
4071 y[36] = (od_coeff)tA;
4072 y[37] = (od_coeff)tB;
4073 y[38] = (od_coeff)tC;
4074 y[39] = (od_coeff)tD;
4075 y[40] = (od_coeff)tE;
4076 y[41] = (od_coeff)tF;
4077 y[41] = (od_coeff)tF;
4078 y[42] = (od_coeff)tG;
4079 y[43] = (od_coeff)tH;
4080 y[44] = (od_coeff)tI;
4081 y[45] = (od_coeff)tJ;
4082 y[46] = (od_coeff)tK;
4083 y[47] = (od_coeff)tL;
4084 y[48] = (od_coeff)tM;
4085 y[49] = (od_coeff)tN;
4086 y[50] = (od_coeff)tO;
4087 y[51] = (od_coeff)tP;
4088 y[52] = (od_coeff)tQ;
4089 y[53] = (od_coeff)tR;
4090 y[54] = (od_coeff)tS;
4091 y[55] = (od_coeff)tT;
4092 y[56] = (od_coeff)tU;
4093 y[57] = (od_coeff)tV;
4094 y[58] = (od_coeff)tW;
4095 y[59] = (od_coeff)tX;
4096 y[60] = (od_coeff)tY;
4097 y[61] = (od_coeff)tZ;
4098 y[62] = (od_coeff)t_;
4099 y[63] = (od_coeff)t;
4100}
4101
4102void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]) {
4103 int t0;
4104 int t1;
4105 int t2;
4106 int t3;
4107 int t4;
4108 int t5;
4109 int t6;
4110 int t7;
4111 int t8;
4112 int t9;
4113 int ta;
4114 int tb;
4115 int tc;
4116 int td;
4117 int te;
4118 int tf;
4119 int tg;
4120 int th;
4121 int ti;
4122 int tj;
4123 int tk;
4124 int tl;
4125 int tm;
4126 int tn;
4127 int to;
4128 int tp;
4129 int tq;
4130 int tr;
4131 int ts;
4132 int tt;
4133 int tu;
4134 int tv;
4135 int tw;
4136 int tx;
4137 int ty;
4138 int tz;
4139 int tA;
4140 int tB;
4141 int tC;
4142 int tD;
4143 int tE;
4144 int tF;
4145 int tG;
4146 int tH;
4147 int tI;
4148 int tJ;
4149 int tK;
4150 int tL;
4151 int tM;
4152 int tN;
4153 int tO;
4154 int tP;
4155 int tQ;
4156 int tR;
4157 int tS;
4158 int tT;
4159 int tU;
4160 int tV;
4161 int tW;
4162 int tX;
4163 int tY;
4164 int tZ;
4165 int t_;
4166 int t;
4167 t0 = y[0];
4168 tw = y[1];
4169 tg = y[2];
4170 tM = y[3];
4171 t8 = y[4];
4172 tE = y[5];
4173 to = y[6];
4174 tU = y[7];
4175 t4 = y[8];
4176 tA = y[9];
4177 tk = y[10];
4178 tQ = y[11];
4179 tc = y[12];
4180 tI = y[13];
4181 ts = y[14];
4182 tY = y[15];
4183 t2 = y[16];
4184 ty = y[17];
4185 ti = y[18];
4186 tO = y[19];
4187 ta = y[20];
4188 tG = y[21];
4189 tq = y[22];
4190 tW = y[23];
4191 t6 = y[24];
4192 tC = y[25];
4193 tm = y[26];
4194 tS = y[27];
4195 te = y[28];
4196 tK = y[29];
4197 tu = y[30];
4198 t_ = y[31];
4199 t1 = y[32];
4200 tx = y[33];
4201 th = y[34];
4202 tN = y[35];
4203 t9 = y[36];
4204 tF = y[37];
4205 tp = y[38];
4206 tV = y[39];
4207 t5 = y[40];
4208 tB = y[41];
4209 tl = y[42];
4210 tR = y[43];
4211 td = y[44];
4212 tJ = y[45];
4213 tt = y[46];
4214 tZ = y[47];
4215 t3 = y[48];
4216 tz = y[49];
4217 tj = y[50];
4218 tP = y[51];
4219 tb = y[52];
4220 tH = y[53];
4221 tr = y[54];
4222 tX = y[55];
4223 t7 = y[56];
4224 tD = y[57];
4225 tn = y[58];
4226 tT = y[59];
4227 tf = y[60];
4228 tL = y[61];
4229 tv = y[62];
4230 t = y[63];
4231 OD_IDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
4232 t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
4233 th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
4234 tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
4235 x[0*xstride] = (od_coeff)t0;
4236 x[1*xstride] = (od_coeff)t1;
4237 x[2*xstride] = (od_coeff)t2;
4238 x[3*xstride] = (od_coeff)t3;
4239 x[4*xstride] = (od_coeff)t4;
4240 x[5*xstride] = (od_coeff)t5;
4241 x[6*xstride] = (od_coeff)t6;
4242 x[7*xstride] = (od_coeff)t7;
4243 x[8*xstride] = (od_coeff)t8;
4244 x[9*xstride] = (od_coeff)t9;
4245 x[10*xstride] = (od_coeff)ta;
4246 x[11*xstride] = (od_coeff)tb;
4247 x[12*xstride] = (od_coeff)tc;
4248 x[13*xstride] = (od_coeff)td;
4249 x[14*xstride] = (od_coeff)te;
4250 x[15*xstride] = (od_coeff)tf;
4251 x[16*xstride] = (od_coeff)tg;
4252 x[17*xstride] = (od_coeff)th;
4253 x[18*xstride] = (od_coeff)ti;
4254 x[19*xstride] = (od_coeff)tj;
4255 x[20*xstride] = (od_coeff)tk;
4256 x[21*xstride] = (od_coeff)tl;
4257 x[22*xstride] = (od_coeff)tm;
4258 x[23*xstride] = (od_coeff)tn;
4259 x[24*xstride] = (od_coeff)to;
4260 x[25*xstride] = (od_coeff)tp;
4261 x[26*xstride] = (od_coeff)tq;
4262 x[27*xstride] = (od_coeff)tr;
4263 x[28*xstride] = (od_coeff)ts;
4264 x[29*xstride] = (od_coeff)tt;
4265 x[30*xstride] = (od_coeff)tu;
4266 x[31*xstride] = (od_coeff)tv;
4267 x[32*xstride] = (od_coeff)tw;
4268 x[33*xstride] = (od_coeff)tx;
4269 x[34*xstride] = (od_coeff)ty;
4270 x[35*xstride] = (od_coeff)tz;
4271 x[36*xstride] = (od_coeff)tA;
4272 x[37*xstride] = (od_coeff)tB;
4273 x[38*xstride] = (od_coeff)tC;
4274 x[39*xstride] = (od_coeff)tD;
4275 x[40*xstride] = (od_coeff)tE;
4276 x[41*xstride] = (od_coeff)tF;
4277 x[41*xstride] = (od_coeff)tF;
4278 x[42*xstride] = (od_coeff)tG;
4279 x[43*xstride] = (od_coeff)tH;
4280 x[44*xstride] = (od_coeff)tI;
4281 x[45*xstride] = (od_coeff)tJ;
4282 x[46*xstride] = (od_coeff)tK;
4283 x[47*xstride] = (od_coeff)tL;
4284 x[48*xstride] = (od_coeff)tM;
4285 x[49*xstride] = (od_coeff)tN;
4286 x[50*xstride] = (od_coeff)tO;
4287 x[51*xstride] = (od_coeff)tP;
4288 x[52*xstride] = (od_coeff)tQ;
4289 x[53*xstride] = (od_coeff)tR;
4290 x[54*xstride] = (od_coeff)tS;
4291 x[55*xstride] = (od_coeff)tT;
4292 x[56*xstride] = (od_coeff)tU;
4293 x[57*xstride] = (od_coeff)tV;
4294 x[58*xstride] = (od_coeff)tW;
4295 x[59*xstride] = (od_coeff)tX;
4296 x[60*xstride] = (od_coeff)tY;
4297 x[61*xstride] = (od_coeff)tZ;
4298 x[62*xstride] = (od_coeff)t_;
4299 x[63*xstride] = (od_coeff)t;
4300}
4301#endif
Nathan E. Egge5e6bda82017-09-16 10:13:51 -04004302
4303void daala_fdct4(const tran_low_t *input, tran_low_t *output) {
4304 int i;
4305 od_coeff x[4];
4306 od_coeff y[4];
4307 for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
4308 od_bin_fdct4(y, x, 1);
4309 for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
4310}
4311
4312void daala_idct4(const tran_low_t *input, tran_low_t *output) {
4313 int i;
4314 od_coeff x[4];
4315 od_coeff y[4];
4316 for (i = 0; i < 4; i++) y[i] = input[i];
4317 od_bin_idct4(x, 1, y);
4318 for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i];
4319}
Nathan E. Egge1aefb5e2017-09-16 11:28:41 -04004320
4321void daala_fdst4(const tran_low_t *input, tran_low_t *output) {
4322 int i;
4323 od_coeff x[4];
4324 od_coeff y[4];
4325 for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
4326 od_bin_fdst4(y, x, 1);
4327 for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
4328}
4329
4330void daala_idst4(const tran_low_t *input, tran_low_t *output) {
4331 int i;
4332 od_coeff x[4];
4333 od_coeff y[4];
4334 for (i = 0; i < 4; i++) y[i] = input[i];
4335 od_bin_idst4(x, 1, y);
4336 for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i];
4337}
Nathan E. Egge75bfeb82017-09-16 20:41:24 -04004338
Nathan E. Egge31f24ee2017-09-18 11:25:26 -04004339void daala_idtx4(const tran_low_t *input, tran_low_t *output) {
4340 int i;
4341 for (i = 0; i < 4; i++) output[i] = input[i];
4342}
4343
Nathan E. Egge75bfeb82017-09-16 20:41:24 -04004344void daala_fdct8(const tran_low_t *input, tran_low_t *output) {
4345 int i;
4346 od_coeff x[8];
4347 od_coeff y[8];
4348 for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
4349 od_bin_fdct8(y, x, 1);
4350 for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
4351}
4352
4353void daala_idct8(const tran_low_t *input, tran_low_t *output) {
4354 int i;
4355 od_coeff x[8];
4356 od_coeff y[8];
4357 for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i];
4358 od_bin_idct8(x, 1, y);
4359 for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i];
4360}
Nathan E. Egge8a873db2017-09-16 20:55:20 -04004361
4362void daala_fdst8(const tran_low_t *input, tran_low_t *output) {
4363 int i;
4364 od_coeff x[8];
4365 od_coeff y[8];
4366 for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
4367 od_bin_fdst8(y, x, 1);
4368 for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
4369}
4370
4371void daala_idst8(const tran_low_t *input, tran_low_t *output) {
4372 int i;
4373 od_coeff x[8];
4374 od_coeff y[8];
4375 for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i];
4376 od_bin_idst8(x, 1, y);
4377 for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i];
4378}
Nathan E. Eggec5c1e562017-09-16 22:18:18 -04004379
Nathan E. Egge3f45fb32017-09-18 11:34:48 -04004380void daala_idtx8(const tran_low_t *input, tran_low_t *output) {
4381 int i;
4382 for (i = 0; i < 8; i++) output[i] = input[i];
4383}
4384
Nathan E. Eggec5c1e562017-09-16 22:18:18 -04004385void daala_fdct16(const tran_low_t *input, tran_low_t *output) {
4386 int i;
4387 od_coeff x[16];
4388 od_coeff y[16];
4389 for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i];
4390 od_bin_fdct16(y, x, 1);
4391 for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i];
4392}
4393
4394void daala_idct16(const tran_low_t *input, tran_low_t *output) {
4395 int i;
4396 od_coeff x[16];
4397 od_coeff y[16];
4398 for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i];
4399 od_bin_idct16(x, 1, y);
4400 for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i];
4401}
Nathan E. Eggecbcff062017-09-16 22:32:19 -04004402
4403void daala_fdst16(const tran_low_t *input, tran_low_t *output) {
4404 int i;
4405 od_coeff x[16];
4406 od_coeff y[16];
4407 for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i];
4408 od_bin_fdst16(y, x, 1);
4409 for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i];
4410}
4411
4412void daala_idst16(const tran_low_t *input, tran_low_t *output) {
4413 int i;
4414 od_coeff x[16];
4415 od_coeff y[16];
4416 for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i];
4417 od_bin_idst16(x, 1, y);
4418 for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i];
4419}
Nathan E. Eggedfd1a922017-09-16 23:35:30 -04004420
Nathan E. Egge74e7fd02017-09-18 11:40:31 -04004421void daala_idtx16(const tran_low_t *input, tran_low_t *output) {
4422 int i;
4423 for (i = 0; i < 16; i++) output[i] = input[i];
4424}
4425
Nathan E. Eggedfd1a922017-09-16 23:35:30 -04004426void daala_fdct32(const tran_low_t *input, tran_low_t *output) {
4427 int i;
4428 od_coeff x[32];
4429 od_coeff y[32];
4430 for (i = 0; i < 32; i++) x[i] = (od_coeff)input[i];
4431 od_bin_fdct32(y, x, 1);
4432 for (i = 0; i < 32; i++) output[i] = (tran_low_t)y[i];
4433}
4434
4435void daala_idct32(const tran_low_t *input, tran_low_t *output) {
4436 int i;
4437 od_coeff x[32];
4438 od_coeff y[32];
4439 for (i = 0; i < 32; i++) y[i] = (od_coeff)input[i];
4440 od_bin_idct32(x, 1, y);
4441 for (i = 0; i < 32; i++) output[i] = (tran_low_t)x[i];
4442}
Nathan E. Egged8661142017-09-16 23:57:51 -04004443
Nathan E. Eggef6d3ba62017-09-18 15:40:08 -04004444/* Preserve the "half-right" transform behavior. */
4445void daala_fdst32(const tran_low_t *input, tran_low_t *output) {
4446 int i;
4447 tran_low_t inputhalf[16];
4448 for (i = 0; i < 16; ++i) {
4449 output[16 + i] = input[i];
4450 }
4451 for (i = 0; i < 16; ++i) {
4452 inputhalf[i] = input[i + 16];
4453 }
4454 daala_fdct16(inputhalf, output);
4455}
4456
4457/* Preserve the "half-right" transform behavior. */
4458void daala_idst32(const tran_low_t *input, tran_low_t *output) {
4459 int i;
4460 tran_low_t inputhalf[16];
4461 for (i = 0; i < 16; ++i) {
4462 inputhalf[i] = input[i];
4463 }
4464 for (i = 0; i < 16; ++i) {
4465 output[i] = input[16 + i];
4466 }
4467 daala_idct16(inputhalf, output + 16);
4468}
4469
Nathan E. Egge4c77fc02017-09-18 11:47:52 -04004470void daala_idtx32(const tran_low_t *input, tran_low_t *output) {
4471 int i;
4472 for (i = 0; i < 32; i++) output[i] = input[i];
4473}
4474
Nathan E. Egged8661142017-09-16 23:57:51 -04004475#if CONFIG_TX64X64
4476void daala_fdct64(const tran_low_t *input, tran_low_t *output) {
4477 int i;
4478 od_coeff x[64];
4479 od_coeff y[64];
4480 for (i = 0; i < 64; i++) x[i] = (od_coeff)input[i];
4481 od_bin_fdct64(y, x, 1);
4482 for (i = 0; i < 64; i++) output[i] = (tran_low_t)y[i];
4483}
4484
4485void daala_idct64(const tran_low_t *input, tran_low_t *output) {
4486 int i;
4487 od_coeff x[64];
4488 od_coeff y[64];
4489 for (i = 0; i < 64; i++) y[i] = (od_coeff)input[i];
4490 od_bin_idct64(x, 1, y);
4491 for (i = 0; i < 64; i++) output[i] = (tran_low_t)x[i];
4492}
Nathan E. Egge01b1d912017-09-18 12:02:22 -04004493
Nathan E. Egge2496a852017-09-18 15:59:54 -04004494/* Preserve the "half-right" transform behavior. */
4495void daala_fdst64(const tran_low_t *input, tran_low_t *output) {
4496 int i;
4497 tran_low_t inputhalf[32];
4498 for (i = 0; i < 32; ++i) {
4499 output[32 + i] = input[i];
4500 }
4501 for (i = 0; i < 32; ++i) {
4502 inputhalf[i] = input[i + 32];
4503 }
4504 daala_fdct32(inputhalf, output);
4505}
4506
4507/* Preserve the "half-right" transform behavior. */
4508void daala_idst64(const tran_low_t *input, tran_low_t *output) {
4509 int i;
4510 tran_low_t inputhalf[32];
4511 for (i = 0; i < 32; ++i) {
4512 inputhalf[i] = input[i];
4513 }
4514 for (i = 0; i < 32; ++i) {
4515 output[i] = input[32 + i];
4516 }
4517 daala_idct32(inputhalf, output + 32);
4518}
4519
Nathan E. Egge01b1d912017-09-18 12:02:22 -04004520void daala_idtx64(const tran_low_t *input, tran_low_t *output) {
4521 int i;
4522 for (i = 0; i < 64; i++) output[i] = input[i];
4523}
Nathan E. Egged8661142017-09-16 23:57:51 -04004524#endif