blob: c35cc44dcc769c32e7c72acae876f7655fecbdc3 [file] [log] [blame]
Monty Montgomery02078a32017-07-11 21:22:29 -04001#include "av1/common/daala_tx.h"
2#include "av1/common/odintrin.h"
3
4/* clang-format off */
5
6# define OD_DCT_RSHIFT(_a, _b) OD_UNBIASED_RSHIFT32(_a, _b)
7
8/* TODO: Daala DCT overflow checks need to be ported as a later test */
9# if defined(OD_DCT_CHECK_OVERFLOW)
10# else
11# define OD_DCT_OVERFLOW_CHECK(val, scale, offset, idx)
12# endif
13
Monty Montgomerycf18fe42017-07-11 21:33:25 -040014#define OD_FDCT_2(p0, p1) \
15 /* Embedded 2-point orthonormal Type-II fDCT. */ \
16 do { \
17 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
18 OD_DCT_OVERFLOW_CHECK(p1, 13573, 16384, 100); \
19 p0 -= (p1*13573 + 16384) >> 15; \
20 /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
21 OD_DCT_OVERFLOW_CHECK(p0, 5793, 4096, 101); \
22 p1 += (p0*5793 + 4096) >> 13; \
23 /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
24 OD_DCT_OVERFLOW_CHECK(p1, 3393, 4096, 102); \
25 p0 -= (p1*3393 + 4096) >> 13; \
26 } \
27 while (0)
28
29#define OD_IDCT_2(p0, p1) \
30 /* Embedded 2-point orthonormal Type-II iDCT. */ \
31 do { \
32 /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
33 p0 += (p1*3393 + 4096) >> 13; \
34 /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
35 p1 -= (p0*5793 + 4096) >> 13; \
36 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
37 p0 += (p1*13573 + 16384) >> 15; \
38 } \
39 while (0)
40
Monty Montgomery02078a32017-07-11 21:22:29 -040041#define OD_FDCT_2_ASYM(p0, p1, p1h) \
42 /* Embedded 2-point asymmetric Type-II fDCT. */ \
43 do { \
44 p0 += p1h; \
45 p1 = p0 - p1; \
46 } \
47 while (0)
48
49#define OD_IDCT_2_ASYM(p0, p1, p1h) \
50 /* Embedded 2-point asymmetric Type-II iDCT. */ \
51 do { \
52 p1 = p0 - p1; \
53 p1h = OD_DCT_RSHIFT(p1, 1); \
54 p0 -= p1h; \
55 } \
56 while (0)
57
Monty Montgomerycf18fe42017-07-11 21:33:25 -040058#define OD_FDST_2(p0, p1) \
59 /* Embedded 2-point orthonormal Type-IV fDST. */ \
60 do { \
61 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
62 OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 103); \
63 p0 -= (p1*10947 + 8192) >> 14; \
64 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
65 OD_DCT_OVERFLOW_CHECK(p0, 473, 256, 104); \
66 p1 += (p0*473 + 256) >> 9; \
67 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
68 OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 105); \
69 p0 -= (p1*10947 + 8192) >> 14; \
70 } \
71 while (0)
72
73#define OD_IDST_2(p0, p1) \
74 /* Embedded 2-point orthonormal Type-IV iDST. */ \
75 do { \
76 /* 10947/16384 ~= Tan[3*Pi/16]) ~= 0.668178637919299 */ \
77 p0 += (p1*10947 + 8192) >> 14; \
78 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
79 p1 -= (p0*473 + 256) >> 9; \
80 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
81 p0 += (p1*10947 + 8192) >> 14; \
82 } \
83 while (0)
84
Monty Montgomery02078a32017-07-11 21:22:29 -040085#define OD_FDST_2_ASYM(p0, p1) \
86 /* Embedded 2-point asymmetric Type-IV fDST. */ \
87 do { \
88 /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
89 OD_DCT_OVERFLOW_CHECK(p1, 11507, 8192, 187); \
90 p0 -= (p1*11507 + 8192) >> 14; \
91 /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
92 OD_DCT_OVERFLOW_CHECK(p0, 669, 512, 188); \
93 p1 += (p0*669 + 512) >> 10; \
94 /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
95 OD_DCT_OVERFLOW_CHECK(p1, 4573, 2048, 189); \
96 p0 -= (p1*4573 + 2048) >> 12; \
97 } \
98 while (0)
99
100#define OD_IDST_2_ASYM(p0, p1) \
101 /* Embedded 2-point asymmetric Type-IV iDST. */ \
102 do { \
103 /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
104 p0 += (p1*4573 + 2048) >> 12; \
105 /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
106 p1 -= (p0*669 + 512) >> 10; \
107 /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
108 p0 += (p1*11507 + 8192) >> 14; \
109 } \
110 while (0)
111
112#define OD_FDCT_4(q0, q2, q1, q3) \
113 /* Embedded 4-point orthonormal Type-II fDCT. */ \
114 do { \
115 int q2h; \
116 int q3h; \
117 q3 = q0 - q3; \
118 q3h = OD_DCT_RSHIFT(q3, 1); \
119 q0 -= q3h; \
120 q2 += q1; \
121 q2h = OD_DCT_RSHIFT(q2, 1); \
122 q1 = q2h - q1; \
123 OD_FDCT_2_ASYM(q0, q2, q2h); \
124 OD_FDST_2_ASYM(q3, q1); \
125 } \
126 while (0)
127
128#define OD_IDCT_4(q0, q2, q1, q3) \
129 /* Embedded 4-point orthonormal Type-II iDCT. */ \
130 do { \
131 int q1h; \
132 int q3h; \
133 OD_IDST_2_ASYM(q3, q2); \
134 OD_IDCT_2_ASYM(q0, q1, q1h); \
135 q3h = OD_DCT_RSHIFT(q3, 1); \
136 q0 += q3h; \
137 q3 = q0 - q3; \
138 q2 = q1h - q2; \
139 q1 -= q2; \
140 } \
141 while (0)
142
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400143#define OD_FDCT_4_ASYM(q0, q2, q2h, q1, q3, q3h) \
144 /* Embedded 4-point asymmetric Type-II fDCT. */ \
145 do { \
146 q0 += q3h; \
147 q3 = q0 - q3; \
148 q1 = q2h - q1; \
149 q2 = q1 - q2; \
150 OD_FDCT_2(q0, q2); \
151 OD_FDST_2(q3, q1); \
152 } \
153 while (0)
154
155#define OD_IDCT_4_ASYM(q0, q2, q1, q1h, q3, q3h) \
156 /* Embedded 4-point asymmetric Type-II iDCT. */ \
157 do { \
158 OD_IDST_2(q3, q2); \
159 OD_IDCT_2(q0, q1); \
160 q1 = q2 - q1; \
161 q1h = OD_DCT_RSHIFT(q1, 1); \
162 q2 = q1h - q2; \
163 q3 = q0 - q3; \
164 q3h = OD_DCT_RSHIFT(q3, 1); \
165 q0 -= q3h; \
166 } \
167 while (0)
168
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400169#define OD_FDST_4(q0, q2, q1, q3) \
170 /* Embedded 4-point orthonormal Type-IV fDST. */ \
171 do { \
172 int q0h; \
173 int q1h; \
174 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
175 OD_DCT_OVERFLOW_CHECK(q1, 13573, 16384, 190); \
176 q2 += (q1*13573 + 16384) >> 15; \
177 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
178 OD_DCT_OVERFLOW_CHECK(q2, 5793, 4096, 191); \
179 q1 -= (q2*5793 + 4096) >> 13; \
180 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
181 OD_DCT_OVERFLOW_CHECK(q1, 3393, 4096, 192); \
182 q2 += (q1*3393 + 4096) >> 13; \
183 q0 += q2; \
184 q0h = OD_DCT_RSHIFT(q0, 1); \
185 q2 = q0h - q2; \
186 q1 += q3; \
187 q1h = OD_DCT_RSHIFT(q1, 1); \
188 q3 -= q1h; \
189 /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
190 0.524455699240090 */ \
191 OD_DCT_OVERFLOW_CHECK(q1, 537, 512, 193); \
192 q2 -= (q1*537 + 512) >> 10; \
193 /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
194 OD_DCT_OVERFLOW_CHECK(q2, 1609, 1024, 194); \
195 q1 += (q2*1609 + 1024) >> 11; \
196 /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
197 0.223847182092655 */ \
198 OD_DCT_OVERFLOW_CHECK(q1, 7335, 16384, 195); \
199 q2 += (q1*7335 + 16384) >> 15; \
200 /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
201 0.6215036383171189 */ \
202 OD_DCT_OVERFLOW_CHECK(q0, 5091, 4096, 196); \
203 q3 += (q0*5091 + 4096) >> 13; \
204 /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
205 OD_DCT_OVERFLOW_CHECK(q3, 5681, 2048, 197); \
206 q0 -= (q3*5681 + 2048) >> 12; \
207 /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
208 0.52204745462729 */ \
209 OD_DCT_OVERFLOW_CHECK(q0, 4277, 4096, 198); \
210 q3 += (q0*4277 + 4096) >> 13; \
211 } \
212 while (0)
213
214#define OD_IDST_4(q0, q2, q1, q3) \
215 /* Embedded 4-point orthonormal Type-IV iDST. */ \
216 do { \
217 int q0h; \
218 int q2h; \
219 /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
220 0.52204745462729 */ \
221 q3 -= (q0*4277 + 4096) >> 13; \
222 /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
223 q0 += (q3*5681 + 2048) >> 12; \
224 /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
225 0.6215036383171189 */ \
226 q3 -= (q0*5091 + 4096) >> 13; \
227 /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
228 0.223847182092655 */ \
229 q1 -= (q2*7335 + 16384) >> 15; \
230 /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
231 q2 -= (q1*1609 + 1024) >> 11; \
232 /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
233 0.524455699240090 */ \
234 q1 += (q2*537 + 512) >> 10; \
235 q2h = OD_DCT_RSHIFT(q2, 1); \
236 q3 += q2h; \
237 q2 -= q3; \
238 q0h = OD_DCT_RSHIFT(q0, 1); \
239 q1 = q0h - q1; \
240 q0 -= q1; \
241 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
242 q1 -= (q2*3393 + 4096) >> 13; \
243 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
244 q2 += (q1*5793 + 4096) >> 13; \
245 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
246 q1 -= (q2*13573 + 16384) >> 15; \
247 } \
248 while (0)
249
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400250#define OD_FDST_4_ASYM(t0, t0h, t2, t1, t3) \
251 /* Embedded 4-point asymmetric Type-IV fDST. */ \
252 do { \
253 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
254 OD_DCT_OVERFLOW_CHECK(t1, 7489, 4096, 106); \
255 t2 -= (t1*7489 + 4096) >> 13; \
256 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
257 OD_DCT_OVERFLOW_CHECK(t1, 11585, 8192, 107); \
258 t1 += (t2*11585 + 8192) >> 14; \
259 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
260 OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 108); \
261 t2 += (t1*19195 + 16384) >> 15; \
262 t3 += OD_DCT_RSHIFT(t2, 1); \
263 t2 -= t3; \
264 t1 = t0h - t1; \
265 t0 -= t1; \
266 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
267 OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 109); \
268 t3 += (t0*6723 + 4096) >> 13; \
269 /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
270 OD_DCT_OVERFLOW_CHECK(t3, 8035, 4096, 110); \
271 t0 -= (t3*8035 + 4096) >> 13; \
272 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
273 OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 111); \
274 t3 += (t0*6723 + 4096) >> 13; \
275 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
276 OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 112); \
277 t2 += (t1*8757 + 8192) >> 14; \
278 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
279 OD_DCT_OVERFLOW_CHECK(t2, 6811, 4096, 113); \
280 t1 -= (t2*6811 + 4096) >> 13; \
281 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
282 OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 114); \
283 t2 += (t1*8757 + 8192) >> 14; \
284 } \
285 while (0)
286
287#define OD_IDST_4_ASYM(t0, t0h, t2, t1, t3) \
288 /* Embedded 4-point asymmetric Type-IV iDST. */ \
289 do { \
290 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
291 t1 -= (t2*8757 + 8192) >> 14; \
292 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
293 t2 += (t1*6811 + 4096) >> 13; \
294 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
295 t1 -= (t2*8757 + 8192) >> 14; \
296 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
297 t3 -= (t0*6723 + 4096) >> 13; \
298 /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
299 t0 += (t3*8035 + 4096) >> 13; \
300 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
301 t3 -= (t0*6723 + 4096) >> 13; \
302 t0 += t2; \
303 t0h = OD_DCT_RSHIFT(t0, 1); \
304 t2 = t0h - t2; \
305 t1 += t3; \
306 t3 -= OD_DCT_RSHIFT(t1, 1); \
307 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
308 t1 -= (t2*19195 + 16384) >> 15; \
309 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
310 t2 -= (t1*11585 + 8192) >> 14; \
311 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
312 t1 += (t2*7489 + 4096) >> 13; \
313 } \
314 while (0)
315
316#define OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
317 /* Embedded 8-point orthonormal Type-II fDCT. */ \
318 do { \
319 int r4h; \
320 int r5h; \
321 int r6h; \
322 int r7h; \
323 r7 = r0 - r7; \
324 r7h = OD_DCT_RSHIFT(r7, 1); \
325 r0 -= r7h; \
326 r6 += r1; \
327 r6h = OD_DCT_RSHIFT(r6, 1); \
328 r1 = r6h - r1; \
329 r5 = r2 - r5; \
330 r5h = OD_DCT_RSHIFT(r5, 1); \
331 r2 -= r5h; \
332 r4 += r3; \
333 r4h = OD_DCT_RSHIFT(r4, 1); \
334 r3 = r4h - r3; \
335 OD_FDCT_4_ASYM(r0, r4, r4h, r2, r6, r6h); \
336 OD_FDST_4_ASYM(r7, r7h, r3, r5, r1); \
337 } \
338 while (0)
339
340#define OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
341 /* Embedded 8-point orthonormal Type-II iDCT. */ \
342 do { \
343 int r1h; \
344 int r3h; \
345 int r5h; \
346 int r7h; \
347 OD_IDST_4_ASYM(r7, r7h, r5, r6, r4); \
348 OD_IDCT_4_ASYM(r0, r2, r1, r1h, r3, r3h); \
349 r0 += r7h; \
350 r7 = r0 - r7; \
351 r6 = r1h - r6; \
352 r1 -= r6; \
353 r5h = OD_DCT_RSHIFT(r5, 1); \
354 r2 += r5h; \
355 r5 = r2 - r5; \
356 r4 = r3h - r4; \
357 r3 -= r4; \
358 } \
359 while (0)
360
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400361#define OD_FDCT_8_ASYM(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \
362 /* Embedded 8-point asymmetric Type-II fDCT. */ \
363 do { \
364 r0 += r7h; \
365 r7 = r0 - r7; \
366 r1 = r6h - r1; \
367 r6 -= r1; \
368 r2 += r5h; \
369 r5 = r2 - r5; \
370 r3 = r4h - r3; \
371 r4 -= r3; \
372 OD_FDCT_4(r0, r4, r2, r6); \
373 OD_FDST_4(r7, r3, r5, r1); \
374 } \
375 while (0)
376
377#define OD_IDCT_8_ASYM(r0, r4, r2, r6, r1, r1h, r5, r5h, r3, r3h, r7, r7h) \
378 /* Embedded 8-point asymmetric Type-II iDCT. */ \
379 do { \
380 OD_IDST_4(r7, r5, r6, r4); \
381 OD_IDCT_4(r0, r2, r1, r3); \
382 r7 = r0 - r7; \
383 r7h = OD_DCT_RSHIFT(r7, 1); \
384 r0 -= r7h; \
385 r1 += r6; \
386 r1h = OD_DCT_RSHIFT(r1, 1); \
387 r6 = r1h - r6; \
388 r5 = r2 - r5; \
389 r5h = OD_DCT_RSHIFT(r5, 1); \
390 r2 -= r5h; \
391 r3 += r4; \
392 r3h = OD_DCT_RSHIFT(r3, 1); \
393 r4 = r3h - r4; \
394 } \
395 while (0)
396
397#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400398 /* Embedded 8-point orthonormal Type-IV fDST. */ \
399 do { \
400 int t0h; \
401 int t2h; \
402 int t5h; \
403 int t7h; \
404 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
405 OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 115); \
406 t6 -= (t1*13573 + 16384) >> 15; \
407 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
408 OD_DCT_OVERFLOW_CHECK(t6, 11585, 8192, 116); \
409 t1 += (t6*11585 + 8192) >> 14; \
410 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
411 OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 117); \
412 t6 -= (t1*13573 + 16384) >> 15; \
413 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
414 OD_DCT_OVERFLOW_CHECK(t2, 21895, 16384, 118); \
415 t5 -= (t2*21895 + 16384) >> 15; \
416 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
417 OD_DCT_OVERFLOW_CHECK(t5, 15137, 8192, 119); \
418 t2 += (t5*15137 + 8192) >> 14; \
419 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
420 OD_DCT_OVERFLOW_CHECK(t2, 10947, 8192, 120); \
421 t5 -= (t2*10947 + 8192) >> 14; \
422 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
423 OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 121); \
424 t4 -= (t3*3259 + 8192) >> 14; \
425 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
426 OD_DCT_OVERFLOW_CHECK(t4, 3135, 4096, 122); \
427 t3 += (t4*3135 + 4096) >> 13; \
428 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
429 OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 123); \
430 t4 -= (t3*3259 + 8192) >> 14; \
431 t7 += t1; \
432 t7h = OD_DCT_RSHIFT(t7, 1); \
433 t1 -= t7h; \
434 t2 = t3 - t2; \
435 t2h = OD_DCT_RSHIFT(t2, 1); \
436 t3 -= t2h; \
437 t0 -= t6; \
438 t0h = OD_DCT_RSHIFT(t0, 1); \
439 t6 += t0h; \
440 t5 = t4 - t5; \
441 t5h = OD_DCT_RSHIFT(t5, 1); \
442 t4 -= t5h; \
443 t1 += t5h; \
444 t5 = t1 - t5; \
445 t4 += t0h; \
446 t0 -= t4; \
447 t6 -= t2h; \
448 t2 += t6; \
449 t3 -= t7h; \
450 t7 += t3; \
451 /* TODO: Can we move this into another operation */ \
452 t7 = -t7; \
453 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
454 OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 124); \
455 t0 -= (t7*7425 + 4096) >> 13; \
456 /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
457 OD_DCT_OVERFLOW_CHECK(t0, 8153, 4096, 125); \
458 t7 += (t0*8153 + 4096) >> 13; \
459 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
460 OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 126); \
461 t0 -= (t7*7425 + 4096) >> 13; \
462 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
463 OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 127); \
464 t6 -= (t1*4861 + 16384) >> 15; \
465 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
466 OD_DCT_OVERFLOW_CHECK(t6, 1189, 2048, 128); \
467 t1 += (t6*1189 + 2048) >> 12; \
468 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
469 OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 129); \
470 t6 -= (t1*4861 + 16384) >> 15; \
471 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
472 OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 130); \
473 t2 -= (t5*2455 + 2048) >> 12; \
474 /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
475 OD_DCT_OVERFLOW_CHECK(t2, 7225, 4096, 131); \
476 t5 += (t2*7225 + 4096) >> 13; \
477 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
478 OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 132); \
479 t2 -= (t5*2455 + 2048) >> 12; \
480 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
481 OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 133); \
482 t4 -= (t3*11725 + 16384) >> 15; \
483 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
484 OD_DCT_OVERFLOW_CHECK(t4, 5197, 4096, 134); \
485 t3 += (t4*5197 + 4096) >> 13; \
486 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
487 OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 135); \
488 t4 -= (t3*11725 + 16384) >> 15; \
489 } \
490 while (0)
491
492#define OD_IDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
493 /* Embedded 8-point orthonormal Type-IV iDST. */ \
494 do { \
495 int t0h; \
496 int t2h; \
497 int t5h_; \
498 int t7h_; \
499 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
500 t1 += (t6*11725 + 16384) >> 15; \
501 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
502 t6 -= (t1*5197 + 4096) >> 13; \
503 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
504 t1 += (t6*11725 + 16384) >> 15; \
505 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
506 t2 += (t5*2455 + 2048) >> 12; \
507 /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
508 t5 -= (t2*7225 + 4096) >> 13; \
509 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
510 t2 += (t5*2455 + 2048) >> 12; \
511 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
512 t3 += (t4*4861 + 16384) >> 15; \
513 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
514 t4 -= (t3*1189 + 2048) >> 12; \
515 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
516 t3 += (t4*4861 + 16384) >> 15; \
517 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
518 t0 += (t7*7425 + 4096) >> 13; \
519 /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
520 t7 -= (t0*8153 + 4096) >> 13; \
521 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
522 t0 += (t7*7425 + 4096) >> 13; \
523 /* TODO: Can we move this into another operation */ \
524 t7 = -t7; \
525 t7 -= t6; \
526 t7h_ = OD_DCT_RSHIFT(t7, 1); \
527 t6 += t7h_; \
528 t2 -= t3; \
529 t2h = OD_DCT_RSHIFT(t2, 1); \
530 t3 += t2h; \
531 t0 += t1; \
532 t0h = OD_DCT_RSHIFT(t0, 1); \
533 t1 -= t0h; \
534 t5 = t4 - t5; \
535 t5h_ = OD_DCT_RSHIFT(t5, 1); \
536 t4 -= t5h_; \
537 t1 += t5h_; \
538 t5 = t1 - t5; \
539 t3 -= t0h; \
540 t0 += t3; \
541 t6 += t2h; \
542 t2 = t6 - t2; \
543 t4 += t7h_; \
544 t7 -= t4; \
545 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
546 t1 += (t6*3259 + 8192) >> 14; \
547 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
548 t6 -= (t1*3135 + 4096) >> 13; \
549 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
550 t1 += (t6*3259 + 8192) >> 14; \
551 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
552 t5 += (t2*10947 + 8192) >> 14; \
553 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
554 t2 -= (t5*15137 + 8192) >> 14; \
555 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
556 t5 += (t2*21895 + 16384) >> 15; \
557 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
558 t3 += (t4*13573 + 16384) >> 15; \
559 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
560 t4 -= (t3*11585 + 8192) >> 14; \
561 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
562 t3 += (t4*13573 + 16384) >> 15; \
563 } \
564 while (0)
565
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400566/* Rewrite this so that t0h can be passed in. */
567#define OD_FDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
568 /* Embedded 8-point asymmetric Type-IV fDST. */ \
569 do { \
570 int t0h; \
571 int t2h; \
572 int t5h; \
573 int t7h; \
574 /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
575 OD_DCT_OVERFLOW_CHECK(t1, 1035, 1024, 199); \
576 t6 += (t1*1035 + 1024) >> 11; \
577 /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
578 OD_DCT_OVERFLOW_CHECK(t6, 3675, 2048, 200); \
579 t1 -= (t6*3675 + 2048) >> 12; \
580 /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
581 OD_DCT_OVERFLOW_CHECK(t1, 851, 4096, 201); \
582 t6 -= (t1*851 + 4096) >> 13; \
583 /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
584 OD_DCT_OVERFLOW_CHECK(t2, 4379, 4096, 202); \
585 t5 += (t2*4379 + 4096) >> 13; \
586 /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
587 OD_DCT_OVERFLOW_CHECK(t5, 10217, 4096, 203); \
588 t2 -= (t5*10217 + 4096) >> 13; \
589 /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
590 OD_DCT_OVERFLOW_CHECK(t2, 4379, 8192, 204); \
591 t5 += (t2*4379 + 8192) >> 14; \
592 /* 12905/16384 ~= (Sqrt[2] - Cos[3*Pi/32])/(2*Sin[3*Pi/32]) */ \
593 OD_DCT_OVERFLOW_CHECK(t3, 12905, 8192, 205); \
594 t4 += (t3*12905 + 8192) >> 14; \
595 /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
596 OD_DCT_OVERFLOW_CHECK(t4, 3363, 4096, 206); \
597 t3 -= (t4*3363 + 4096) >> 13; \
598 /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
599 OD_DCT_OVERFLOW_CHECK(t3, 3525, 2048, 207); \
600 t4 -= (t3*3525 + 2048) >> 12; \
601 /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
602 OD_DCT_OVERFLOW_CHECK(t0, 5417, 4096, 208); \
603 t7 += (t0*5417 + 4096) >> 13; \
604 /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
605 OD_DCT_OVERFLOW_CHECK(t7, 5765, 2048, 209); \
606 t0 -= (t7*5765 + 2048) >> 12; \
607 /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
608 OD_DCT_OVERFLOW_CHECK(t0, 2507, 2048, 210); \
609 t7 += (t0*2507 + 2048) >> 12; \
610 t0 += t1; \
611 t0h = OD_DCT_RSHIFT(t0, 1); \
612 t1 -= t0h; \
613 t2 -= t3; \
614 t2h = OD_DCT_RSHIFT(t2, 1); \
615 t3 += t2h; \
616 t5 -= t4; \
617 t5h = OD_DCT_RSHIFT(t5, 1); \
618 t4 += t5h; \
619 t7 += t6; \
620 t7h = OD_DCT_RSHIFT(t7, 1); \
621 t6 = t7h - t6; \
622 t4 = t7h - t4; \
623 t7 -= t4; \
624 t1 += t5h; \
625 t5 = t1 - t5; \
626 t6 += t2h; \
627 t2 = t6 - t2; \
628 t3 -= t0h; \
629 t0 += t3; \
630 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
631 OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 211); \
632 t1 += (t6*3259 + 8192) >> 14; \
633 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
634 OD_DCT_OVERFLOW_CHECK(t1, 3135, 4096, 212); \
635 t6 -= (t1*3135 + 4096) >> 13; \
636 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
637 OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 213); \
638 t1 += (t6*3259 + 8192) >> 14; \
639 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
640 OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 214); \
641 t5 += (t2*2737 + 2048) >> 12; \
642 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
643 OD_DCT_OVERFLOW_CHECK(t5, 473, 256, 215); \
644 t2 -= (t5*473 + 256) >> 9; \
645 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
646 OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 216); \
647 t5 += (t2*2737 + 2048) >> 12; \
648 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
649 OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 217); \
650 t3 += (t4*3393 + 4096) >> 13; \
651 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
652 OD_DCT_OVERFLOW_CHECK(t3, 5793, 4096, 218); \
653 t4 -= (t3*5793 + 4096) >> 13; \
654 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
655 OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 219); \
656 t3 += (t4*3393 + 4096) >> 13; \
657 } \
658 while (0)
659
660#define OD_IDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
661 /* Embedded 8-point asymmetric Type-IV iDST. */ \
662 do { \
663 int t0h; \
664 int t2h; \
665 int t5h__; \
666 int t7h__; \
667 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
668 t6 -= (t1*3393 + 4096) >> 13; \
669 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
670 t1 += (t6*5793 + 4096) >> 13; \
671 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
672 t6 -= (t1*3393 + 4096) >> 13; \
673 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
674 t5 -= (t2*2737 + 2048) >> 12; \
675 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
676 t2 += (t5*473 + 256) >> 9; \
677 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
678 t5 -= (t2*2737 + 2048) >> 12; \
679 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
680 t4 -= (t3*3259 + 8192) >> 14; \
681 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
682 t3 += (t4*3135 + 4096) >> 13; \
683 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
684 t4 -= (t3*3259 + 8192) >> 14; \
685 t0 -= t6; \
686 t0h = OD_DCT_RSHIFT(t0, 1); \
687 t6 += t0h; \
688 t2 = t3 - t2; \
689 t2h = OD_DCT_RSHIFT(t2, 1); \
690 t3 -= t2h; \
691 t5 = t4 - t5; \
692 t5h__ = OD_DCT_RSHIFT(t5, 1); \
693 t4 -= t5h__; \
694 t7 += t1; \
695 t7h__ = OD_DCT_RSHIFT(t7, 1); \
696 t1 = t7h__ - t1; \
697 t3 = t7h__ - t3; \
698 t7 -= t3; \
699 t1 -= t5h__; \
700 t5 += t1; \
701 t6 -= t2h; \
702 t2 += t6; \
703 t4 += t0h; \
704 t0 -= t4; \
705 /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
706 t7 -= (t0*2507 + 2048) >> 12; \
707 /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
708 t0 += (t7*5765 + 2048) >> 12; \
709 /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
710 t7 -= (t0*5417 + 4096) >> 13; \
711 /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
712 t1 += (t6*3525 + 2048) >> 12; \
713 /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
714 t6 += (t1*3363 + 4096) >> 13; \
715 /* 12905/16384 ~= (1/Sqrt[2] - Cos[3*Pi/32]/1)/Sin[3*Pi/32] */ \
716 t1 -= (t6*12905 + 8192) >> 14; \
717 /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
718 t5 -= (t2*4379 + 8192) >> 14; \
719 /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
720 t2 += (t5*10217 + 4096) >> 13; \
721 /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
722 t5 -= (t2*4379 + 4096) >> 13; \
723 /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
724 t3 += (t4*851 + 4096) >> 13; \
725 /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
726 t4 += (t3*3675 + 2048) >> 12; \
727 /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
728 t3 -= (t4*1035 + 1024) >> 11; \
729 } \
730 while (0)
731
732#define OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
733 s1, s9, s5, sd, s3, sb, s7, sf) \
734 /* Embedded 16-point orthonormal Type-II fDCT. */ \
735 do { \
736 int s8h; \
737 int sah; \
738 int sch; \
739 int seh; \
740 int sfh; \
741 sf = s0 - sf; \
742 sfh = OD_DCT_RSHIFT(sf, 1); \
743 s0 -= sfh; \
744 se += s1; \
745 seh = OD_DCT_RSHIFT(se, 1); \
746 s1 = seh - s1; \
747 sd = s2 - sd; \
748 s2 -= OD_DCT_RSHIFT(sd, 1); \
749 sc += s3; \
750 sch = OD_DCT_RSHIFT(sc, 1); \
751 s3 = sch - s3; \
752 sb = s4 - sb; \
753 s4 -= OD_DCT_RSHIFT(sb, 1); \
754 sa += s5; \
755 sah = OD_DCT_RSHIFT(sa, 1); \
756 s5 = sah - s5; \
757 s9 = s6 - s9; \
758 s6 -= OD_DCT_RSHIFT(s9, 1); \
759 s8 += s7; \
760 s8h = OD_DCT_RSHIFT(s8, 1); \
761 s7 = s8h - s7; \
762 OD_FDCT_8_ASYM(s0, s8, s8h, s4, sc, sch, s2, sa, sah, s6, se, seh); \
763 OD_FDST_8_ASYM(sf, s7, sb, s3, sd, s5, s9, s1); \
764 } \
765 while (0)
766
767#define OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
768 s1, s9, s5, sd, s3, sb, s7, sf) \
769 /* Embedded 16-point orthonormal Type-II iDCT. */ \
770 do { \
771 int s1h; \
772 int s3h; \
773 int s5h; \
774 int s7h; \
775 int sfh; \
776 OD_IDST_8_ASYM(sf, sb, sd, s9, se, sa, sc, s8); \
777 OD_IDCT_8_ASYM(s0, s4, s2, s6, s1, s1h, s5, s5h, s3, s3h, s7, s7h); \
778 sfh = OD_DCT_RSHIFT(sf, 1); \
779 s0 += sfh; \
780 sf = s0 - sf; \
781 se = s1h - se; \
782 s1 -= se; \
783 s2 += OD_DCT_RSHIFT(sd, 1); \
784 sd = s2 - sd; \
785 sc = s3h - sc; \
786 s3 -= sc; \
787 s4 += OD_DCT_RSHIFT(sb, 1); \
788 sb = s4 - sb; \
789 sa = s5h - sa; \
790 s5 -= sa; \
791 s6 += OD_DCT_RSHIFT(s9, 1); \
792 s9 = s6 - s9; \
793 s8 = s7h - s8; \
794 s7 -= s8; \
795 } \
796 while (0)
797
Monty Montgomery2cb52ba2017-07-17 18:27:27 -0400798#define OD_FDCT_16_ASYM(t0, t8, t8h, t4, tc, tch, t2, ta, tah, t6, te, teh, \
799 t1, t9, t9h, t5, td, tdh, t3, tb, tbh, t7, tf, tfh) \
800 /* Embedded 16-point asymmetric Type-II fDCT. */ \
801 do { \
802 t0 += tfh; \
803 tf = t0 - tf; \
804 t1 -= teh; \
805 te += t1; \
806 t2 += tdh; \
807 td = t2 - td; \
808 t3 -= tch; \
809 tc += t3; \
810 t4 += tbh; \
811 tb = t4 - tb; \
812 t5 -= tah; \
813 ta += t5; \
814 t6 += t9h; \
815 t9 = t6 - t9; \
816 t7 -= t8h; \
817 t8 += t7; \
818 OD_FDCT_8(t0, t8, t4, tc, t2, ta, t6, te); \
819 OD_FDST_8(tf, t7, tb, t3, td, t5, t9, t1); \
820 } \
821 while (0)
822
823#define OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
824 t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh) \
825 /* Embedded 16-point asymmetric Type-II iDCT. */ \
826 do { \
827 OD_IDST_8(tf, tb, td, t9, te, ta, tc, t8); \
828 OD_IDCT_8(t0, t4, t2, t6, t1, t5, t3, t7); \
829 t1 -= te; \
830 t1h = OD_DCT_RSHIFT(t1, 1); \
831 te += t1h; \
832 t9 = t6 - t9; \
833 t9h = OD_DCT_RSHIFT(t9, 1); \
834 t6 -= t9h; \
835 t5 -= ta; \
836 t5h = OD_DCT_RSHIFT(t5, 1); \
837 ta += t5h; \
838 td = t2 - td; \
839 tdh = OD_DCT_RSHIFT(td, 1); \
840 t2 -= tdh; \
841 t3 -= tc; \
842 t3h = OD_DCT_RSHIFT(t3, 1); \
843 tc += t3h; \
844 tb = t4 - tb; \
845 tbh = OD_DCT_RSHIFT(tb, 1); \
846 t4 -= tbh; \
847 t7 -= t8; \
848 t7h = OD_DCT_RSHIFT(t7, 1); \
849 t8 += t7h; \
850 tf = t0 - tf; \
851 tfh = OD_DCT_RSHIFT(tf, 1); \
852 t0 -= tfh; \
853 } \
854 while (0)
855
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400856#define OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
857 s1, s9, s5, sd, s3, sb, s7, sf) \
858 /* Embedded 16-point orthonormal Type-IV fDST. */ \
859 do { \
860 int s0h; \
861 int s2h; \
862 int sdh; \
863 int sfh; \
864 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
865 OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 220); \
866 s1 += (se*13573 + 16384) >> 15; \
867 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
868 OD_DCT_OVERFLOW_CHECK(s1, 11585, 8192, 221); \
869 se -= (s1*11585 + 8192) >> 14; \
870 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
871 OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 222); \
872 s1 += (se*13573 + 16384) >> 15; \
873 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
874 OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 223); \
875 sd += (s2*21895 + 16384) >> 15; \
876 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
877 OD_DCT_OVERFLOW_CHECK(sd, 15137, 16384, 224); \
878 s2 -= (sd*15137 + 8192) >> 14; \
879 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
880 OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 225); \
881 sd += (s2*21895 + 16384) >> 15; \
882 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
883 OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 226); \
884 sc += (s3*3259 + 8192) >> 14; \
885 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
886 OD_DCT_OVERFLOW_CHECK(sc, 3135, 4096, 227); \
887 s3 -= (sc*3135 + 4096) >> 13; \
888 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
889 OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 228); \
890 sc += (s3*3259 + 8192) >> 14; \
891 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
892 OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 229); \
893 sa += (s5*13573 + 16384) >> 15; \
894 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
895 OD_DCT_OVERFLOW_CHECK(sa, 11585, 8192, 230); \
896 s5 -= (sa*11585 + 8192) >> 14; \
897 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
898 OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 231); \
899 sa += (s5*13573 + 16384) >> 15; \
900 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
901 OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 232); \
902 s6 += (s9*13573 + 16384) >> 15; \
903 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
904 OD_DCT_OVERFLOW_CHECK(s6, 11585, 8192, 233); \
905 s9 -= (s6*11585 + 8192) >> 14; \
906 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
907 OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 234); \
908 s6 += (s9*13573 + 16384) >> 15; \
909 sf += se; \
910 sfh = OD_DCT_RSHIFT(sf, 1); \
911 se = sfh - se; \
912 s0 += s1; \
913 s0h = OD_DCT_RSHIFT(s0, 1); \
914 s1 = s0h - s1; \
915 s2 = s3 - s2; \
916 s2h = OD_DCT_RSHIFT(s2, 1); \
917 s3 -= s2h; \
918 sd -= sc; \
919 sdh = OD_DCT_RSHIFT(sd, 1); \
920 sc += sdh; \
921 sa = s4 - sa; \
922 s4 -= OD_DCT_RSHIFT(sa, 1); \
923 s5 += sb; \
924 sb = OD_DCT_RSHIFT(s5, 1) - sb; \
925 s8 += s6; \
926 s6 -= OD_DCT_RSHIFT(s8, 1); \
927 s7 = s9 - s7; \
928 s9 -= OD_DCT_RSHIFT(s7, 1); \
929 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
930 OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 235); \
931 s4 += (sb*6723 + 4096) >> 13; \
932 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
933 OD_DCT_OVERFLOW_CHECK(s4, 16069, 8192, 236); \
934 sb -= (s4*16069 + 8192) >> 14; \
935 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
936 OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 237); \
937 s4 += (sb*6723 + 4096) >> 13; \
938 /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
939 OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 238); \
940 sa += (s5*8757 + 8192) >> 14; \
941 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
942 OD_DCT_OVERFLOW_CHECK(sa, 6811, 4096, 239); \
943 s5 -= (sa*6811 + 4096) >> 13; \
944 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
945 OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 240); \
946 sa += (s5*8757 + 8192) >> 14; \
947 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
948 OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 241); \
949 s6 += (s9*2485 + 4096) >> 13; \
950 /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
951 OD_DCT_OVERFLOW_CHECK(s6, 4551, 4096, 242); \
952 s9 -= (s6*4551 + 4096) >> 13; \
953 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
954 OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 243); \
955 s6 += (s9*2485 + 4096) >> 13; \
956 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
957 OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 244); \
958 s7 += (s8*3227 + 16384) >> 15; \
959 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
960 OD_DCT_OVERFLOW_CHECK(s7, 6393, 16384, 245); \
961 s8 -= (s7*6393 + 16384) >> 15; \
962 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
963 OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 246); \
964 s7 += (s8*3227 + 16384) >> 15; \
965 s1 -= s2h; \
966 s2 += s1; \
967 se += sdh; \
968 sd = se - sd; \
969 s3 += sfh; \
970 sf -= s3; \
971 sc = s0h - sc; \
972 s0 -= sc; \
973 sb += OD_DCT_RSHIFT(s8, 1); \
974 s8 = sb - s8; \
975 s4 += OD_DCT_RSHIFT(s7, 1); \
976 s7 -= s4; \
977 s6 += OD_DCT_RSHIFT(s5, 1); \
978 s5 = s6 - s5; \
979 s9 -= OD_DCT_RSHIFT(sa, 1); \
980 sa += s9; \
981 s8 += s0; \
982 s0 -= OD_DCT_RSHIFT(s8, 1); \
983 sf += s7; \
984 s7 = OD_DCT_RSHIFT(sf, 1) - s7; \
985 s1 -= s6; \
986 s6 += OD_DCT_RSHIFT(s1, 1); \
987 s9 += se; \
988 se = OD_DCT_RSHIFT(s9, 1) - se; \
989 s2 += sa; \
990 sa = OD_DCT_RSHIFT(s2, 1) - sa; \
991 s5 += sd; \
992 sd -= OD_DCT_RSHIFT(s5, 1); \
993 s4 = sc - s4; \
994 sc -= OD_DCT_RSHIFT(s4, 1); \
995 s3 -= sb; \
996 sb += OD_DCT_RSHIFT(s3, 1); \
997 /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
998 OD_DCT_OVERFLOW_CHECK(sf, 2799, 2048, 247); \
999 s0 -= (sf*2799 + 2048) >> 12; \
1000 /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
1001 OD_DCT_OVERFLOW_CHECK(s0, 2893, 1024, 248); \
1002 sf += (s0*2893 + 1024) >> 11; \
1003 /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
1004 OD_DCT_OVERFLOW_CHECK(sf, 5397, 4096, 249); \
1005 s0 -= (sf*5397 + 4096) >> 13; \
1006 /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
1007 OD_DCT_OVERFLOW_CHECK(s1, 41, 32, 250); \
1008 se += (s1*41 + 32) >> 6; \
1009 /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
1010 OD_DCT_OVERFLOW_CHECK(se, 2865, 1024, 251); \
1011 s1 -= (se*2865 + 1024) >> 11; \
1012 /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
1013 OD_DCT_OVERFLOW_CHECK(s1, 4641, 4096, 252); \
1014 se += (s1*4641 + 4096) >> 13; \
1015 /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
1016 OD_DCT_OVERFLOW_CHECK(s2, 2473, 2048, 253); \
1017 sd += (s2*2473 + 2048) >> 12; \
1018 /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
1019 OD_DCT_OVERFLOW_CHECK(sd, 5619, 2048, 254); \
1020 s2 -= (sd*5619 + 2048) >> 12; \
1021 /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
1022 OD_DCT_OVERFLOW_CHECK(s2, 7839, 8192, 255); \
1023 sd += (s2*7839 + 8192) >> 14; \
1024 /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
1025 OD_DCT_OVERFLOW_CHECK(s3, 5747, 4096, 256); \
1026 sc -= (s3*5747 + 4096) >> 13; \
1027 /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] ~= */ \
1028 OD_DCT_OVERFLOW_CHECK(sc, 3903, 4096, 257); \
1029 s3 += (sc*3903 + 4096) >> 13; \
1030 /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
1031 OD_DCT_OVERFLOW_CHECK(s3, 5701, 4096, 258); \
1032 sc += (s3*5701 + 4096) >> 13; \
1033 /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
1034 OD_DCT_OVERFLOW_CHECK(s4, 4471, 4096, 259); \
1035 sb += (s4*4471 + 4096) >> 13; \
1036 /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
1037 OD_DCT_OVERFLOW_CHECK(sb, 1309, 512, 260); \
1038 s4 -= (sb*1309 + 512) >> 10; \
1039 /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
1040 OD_DCT_OVERFLOW_CHECK(s4, 5067, 8192, 261); \
1041 sb += (s4*5067 + 8192) >> 14; \
1042 /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
1043 OD_DCT_OVERFLOW_CHECK(s5, 2217, 2048, 262); \
1044 sa -= (s5*2217 + 2048) >> 12; \
1045 /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] ~= 0.72705107329128 */ \
1046 OD_DCT_OVERFLOW_CHECK(sa, 1489, 1024, 263); \
1047 s5 += (sa*1489 + 1024) >> 11; \
1048 /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
1049 OD_DCT_OVERFLOW_CHECK(s5, 75, 128, 264); \
1050 sa += (s5*75 + 128) >> 8; \
1051 /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
1052 OD_DCT_OVERFLOW_CHECK(s9, 2087, 2048, 265); \
1053 s6 -= (s9*2087 + 2048) >> 12; \
1054 /* 4653/4096 ~= Sqrt[2]*Sin[19*Pi/64] */ \
1055 OD_DCT_OVERFLOW_CHECK(s6, 4653, 2048, 266); \
1056 s9 += (s6*4653 + 2048) >> 12; \
1057 /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
1058 OD_DCT_OVERFLOW_CHECK(s9, 4545, 16384, 267); \
1059 s6 -= (s9*4545 + 16384) >> 15; \
1060 /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
1061 OD_DCT_OVERFLOW_CHECK(s8, 2053, 2048, 268); \
1062 s7 += (s8*2053 + 2048) >> 12; \
1063 /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
1064 OD_DCT_OVERFLOW_CHECK(s7, 1945, 1024, 269); \
1065 s8 -= (s7*1945 + 1024) >> 11; \
1066 /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
1067 OD_DCT_OVERFLOW_CHECK(s8, 1651, 16384, 270); \
1068 s7 -= (s8*1651 + 16384) >> 15; \
1069 } \
1070 while (0)
1071
1072#define OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
1073 s1, s9, s5, sd, s3, sb, s7, sf) \
1074 /* Embedded 16-point orthonormal Type-IV iDST. */ \
1075 do { \
1076 int s0h; \
1077 int s4h; \
1078 int sbh; \
1079 int sfh; \
1080 /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
1081 se += (s1*1651 + 16384) >> 15; \
1082 /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
1083 s1 += (se*1945 + 1024) >> 11; \
1084 /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
1085 se -= (s1*2053 + 2048) >> 12; \
1086 /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
1087 s6 += (s9*4545 + 16384) >> 15; \
1088 /* 4653/32768 ~= Sqrt[2]*Sin[19*Pi/64] */ \
1089 s9 -= (s6*4653 + 2048) >> 12; \
1090 /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
1091 s6 += (s9*2087 + 2048) >> 12; \
1092 /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
1093 s5 -= (sa*75 + 128) >> 8; \
1094 /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] */ \
1095 sa -= (s5*1489 + 1024) >> 11; \
1096 /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
1097 s5 += (sa*2217 + 2048) >> 12; \
1098 /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
1099 sd -= (s2*5067 + 8192) >> 14; \
1100 /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
1101 s2 += (sd*1309 + 512) >> 10; \
1102 /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
1103 sd -= (s2*4471 + 4096) >> 13; \
1104 /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
1105 s3 -= (sc*5701 + 4096) >> 13; \
1106 /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] */ \
1107 sc -= (s3*3903 + 4096) >> 13; \
1108 /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
1109 s3 += (sc*5747 + 4096) >> 13; \
1110 /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
1111 sb -= (s4*7839 + 8192) >> 14; \
1112 /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
1113 s4 += (sb*5619 + 2048) >> 12; \
1114 /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
1115 sb -= (s4*2473 + 2048) >> 12; \
1116 /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
1117 s7 -= (s8*4641 + 4096) >> 13; \
1118 /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
1119 s8 += (s7*2865 + 1024) >> 11; \
1120 /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
1121 s7 -= (s8*41 + 32) >> 6; \
1122 /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
1123 s0 += (sf*5397 + 4096) >> 13; \
1124 /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
1125 sf -= (s0*2893 + 1024) >> 11; \
1126 /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
1127 s0 += (sf*2799 + 2048) >> 12; \
1128 sd -= OD_DCT_RSHIFT(sc, 1); \
1129 sc += sd; \
1130 s3 += OD_DCT_RSHIFT(s2, 1); \
1131 s2 = s3 - s2; \
1132 sb += OD_DCT_RSHIFT(sa, 1); \
1133 sa -= sb; \
1134 s5 = OD_DCT_RSHIFT(s4, 1) - s5; \
1135 s4 -= s5; \
1136 s7 = OD_DCT_RSHIFT(s9, 1) - s7; \
1137 s9 -= s7; \
1138 s6 -= OD_DCT_RSHIFT(s8, 1); \
1139 s8 += s6; \
1140 se = OD_DCT_RSHIFT(sf, 1) - se; \
1141 sf -= se; \
1142 s0 += OD_DCT_RSHIFT(s1, 1); \
1143 s1 -= s0; \
1144 s5 -= s9; \
1145 s9 += OD_DCT_RSHIFT(s5, 1); \
1146 sa = s6 - sa; \
1147 s6 -= OD_DCT_RSHIFT(sa, 1); \
1148 se += s2; \
1149 s2 -= OD_DCT_RSHIFT(se, 1); \
1150 s1 = sd - s1; \
1151 sd -= OD_DCT_RSHIFT(s1, 1); \
1152 s0 += s3; \
1153 s0h = OD_DCT_RSHIFT(s0, 1); \
1154 s3 = s0h - s3; \
1155 sf += sc; \
1156 sfh = OD_DCT_RSHIFT(sf, 1); \
1157 sc -= sfh; \
1158 sb = s7 - sb; \
1159 sbh = OD_DCT_RSHIFT(sb, 1); \
1160 s7 -= sbh; \
1161 s4 -= s8; \
1162 s4h = OD_DCT_RSHIFT(s4, 1); \
1163 s8 += s4h; \
1164 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
1165 se -= (s1*3227 + 16384) >> 15; \
1166 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
1167 s1 += (se*6393 + 16384) >> 15; \
1168 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
1169 se -= (s1*3227 + 16384) >> 15; \
1170 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1171 s6 -= (s9*2485 + 4096) >> 13; \
1172 /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1173 s9 += (s6*4551 + 4096) >> 13; \
1174 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1175 s6 -= (s9*2485 + 4096) >> 13; \
1176 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1177 s5 -= (sa*8757 + 8192) >> 14; \
1178 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1179 sa += (s5*6811 + 4096) >> 13; \
1180 /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1181 s5 -= (sa*8757 + 8192) >> 14; \
1182 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1183 s2 -= (sd*6723 + 4096) >> 13; \
1184 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1185 sd += (s2*16069 + 8192) >> 14; \
1186 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1187 s2 -= (sd*6723 + 4096) >> 13; \
1188 s9 += OD_DCT_RSHIFT(se, 1); \
1189 se = s9 - se; \
1190 s6 += OD_DCT_RSHIFT(s1, 1); \
1191 s1 -= s6; \
1192 sd = OD_DCT_RSHIFT(sa, 1) - sd; \
1193 sa -= sd; \
1194 s2 += OD_DCT_RSHIFT(s5, 1); \
1195 s5 = s2 - s5; \
1196 s3 -= sbh; \
1197 sb += s3; \
1198 sc += s4h; \
1199 s4 = sc - s4; \
1200 s8 = s0h - s8; \
1201 s0 -= s8; \
1202 s7 = sfh - s7; \
1203 sf -= s7; \
1204 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1205 s6 -= (s9*13573 + 16384) >> 15; \
1206 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1207 s9 += (s6*11585 + 8192) >> 14; \
1208 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1209 s6 -= (s9*13573 + 16384) >> 15; \
1210 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1211 s5 -= (sa*13573 + 16384) >> 15; \
1212 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1213 sa += (s5*11585 + 8192) >> 14; \
1214 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1215 s5 -= (sa*13573 + 16384) >> 15; \
1216 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
1217 s3 -= (sc*3259 + 8192) >> 14; \
1218 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
1219 sc += (s3*3135 + 4096) >> 13; \
1220 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
1221 s3 -= (sc*3259 + 8192) >> 14; \
1222 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
1223 sb -= (s4*21895 + 16384) >> 15; \
1224 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1225 s4 += (sb*15137 + 8192) >> 14; \
1226 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
1227 sb -= (s4*21895 + 16384) >> 15; \
1228 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1229 s8 -= (s7*13573 + 16384) >> 15; \
1230 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1231 s7 += (s8*11585 + 8192) >> 14; \
1232 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1233 s8 -= (s7*13573 + 16384) >> 15; \
1234 } \
1235 while (0)
1236
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04001237/* TODO: rewrite this to match OD_FDST_16. */
1238#define OD_FDST_16_ASYM(t0, t0h, t8, t4, t4h, tc, t2, ta, t6, te, \
1239 t1, t9, t5, td, t3, tb, t7, t7h, tf) \
1240 /* Embedded 16-point asymmetric Type-IV fDST. */ \
1241 do { \
1242 int t2h; \
1243 int t3h; \
1244 int t6h; \
1245 int t8h; \
1246 int t9h; \
1247 int tch; \
1248 int tdh; \
1249 /* TODO: Can we move these into another operation */ \
1250 t8 = -t8; \
1251 t9 = -t9; \
1252 ta = -ta; \
1253 tb = -tb; \
1254 td = -td; \
1255 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1256 OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 136); \
1257 t1 -= (te*13573 + 8192) >> 14; \
1258 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1259 OD_DCT_OVERFLOW_CHECK(t1, 11585, 16384, 137); \
1260 te += (t1*11585 + 16384) >> 15; \
1261 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1262 OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 138); \
1263 t1 -= (te*13573 + 8192) >> 14; \
1264 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1265 OD_DCT_OVERFLOW_CHECK(td, 4161, 8192, 139); \
1266 t2 += (td*4161 + 8192) >> 14; \
1267 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1268 OD_DCT_OVERFLOW_CHECK(t2, 15137, 8192, 140); \
1269 td -= (t2*15137 + 8192) >> 14; \
1270 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1271 OD_DCT_OVERFLOW_CHECK(td, 14341, 8192, 141); \
1272 t2 += (td*14341 + 8192) >> 14; \
1273 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1274 OD_DCT_OVERFLOW_CHECK(t3, 14341, 8192, 142); \
1275 tc -= (t3*14341 + 8192) >> 14; \
1276 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1277 OD_DCT_OVERFLOW_CHECK(tc, 15137, 8192, 143); \
1278 t3 += (tc*15137 + 8192) >> 14; \
1279 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1280 OD_DCT_OVERFLOW_CHECK(t3, 4161, 8192, 144); \
1281 tc -= (t3*4161 + 8192) >> 14; \
1282 te = t0h - te; \
1283 t0 -= te; \
1284 tf = OD_DCT_RSHIFT(t1, 1) - tf; \
1285 t1 -= tf; \
1286 /* TODO: Can we move this into another operation */ \
1287 tc = -tc; \
1288 t2 = OD_DCT_RSHIFT(tc, 1) - t2; \
1289 tc -= t2; \
1290 t3 = OD_DCT_RSHIFT(td, 1) - t3; \
1291 td = t3 - td; \
1292 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1293 OD_DCT_OVERFLOW_CHECK(t6, 7489, 4096, 145); \
1294 t9 -= (t6*7489 + 4096) >> 13; \
1295 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1296 OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 146); \
1297 t6 += (t9*11585 + 8192) >> 14; \
1298 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1299 OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 147); \
1300 t9 += (t6*19195 + 16384) >> 15; \
1301 t8 += OD_DCT_RSHIFT(t9, 1); \
1302 t9 -= t8; \
1303 t6 = t7h - t6; \
1304 t7 -= t6; \
1305 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1306 OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 148); \
1307 t8 += (t7*6723 + 4096) >> 13; \
1308 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1309 OD_DCT_OVERFLOW_CHECK(t8, 16069, 8192, 149); \
1310 t7 -= (t8*16069 + 8192) >> 14; \
1311 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1312 OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 150); \
1313 t8 += (t7*6723 + 4096) >> 13; \
1314 /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1315 OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 151); \
1316 t9 += (t6*17515 + 16384) >> 15; \
1317 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1318 OD_DCT_OVERFLOW_CHECK(t9, 13623, 8192, 152); \
1319 t6 -= (t9*13623 + 8192) >> 14; \
1320 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1321 OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 153); \
1322 t9 += (t6*17515 + 16384) >> 15; \
1323 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1324 OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 154); \
1325 t5 += (ta*13573 + 8192) >> 14; \
1326 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1327 OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 155); \
1328 ta -= (t5*11585 + 16384) >> 15; \
1329 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1330 OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 156); \
1331 t5 += (ta*13573 + 8192) >> 14; \
1332 tb += OD_DCT_RSHIFT(t5, 1); \
1333 t5 = tb - t5; \
1334 ta += t4h; \
1335 t4 -= ta; \
1336 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1337 OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 157); \
1338 ta += (t5*2485 + 4096) >> 13; \
1339 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1340 OD_DCT_OVERFLOW_CHECK(ta, 18205, 16384, 158); \
1341 t5 -= (ta*18205 + 16384) >> 15; \
1342 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1343 OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 159); \
1344 ta += (t5*2485 + 4096) >> 13; \
1345 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1346 OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 160); \
1347 tb -= (t4*6723 + 4096) >> 13; \
1348 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1349 OD_DCT_OVERFLOW_CHECK(tb, 16069, 8192, 161); \
1350 t4 += (tb*16069 + 8192) >> 14; \
1351 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1352 OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 162); \
1353 tb -= (t4*6723 + 4096) >> 13; \
1354 /* TODO: Can we move this into another operation */ \
1355 t5 = -t5; \
1356 tc -= tf; \
1357 tch = OD_DCT_RSHIFT(tc, 1); \
1358 tf += tch; \
1359 t3 += t0; \
1360 t3h = OD_DCT_RSHIFT(t3, 1); \
1361 t0 -= t3h; \
1362 td -= t1; \
1363 tdh = OD_DCT_RSHIFT(td, 1); \
1364 t1 += tdh; \
1365 t2 += te; \
1366 t2h = OD_DCT_RSHIFT(t2, 1); \
1367 te -= t2h; \
1368 t8 += t4; \
1369 t8h = OD_DCT_RSHIFT(t8, 1); \
1370 t4 = t8h - t4; \
1371 t7 = tb - t7; \
1372 t7h = OD_DCT_RSHIFT(t7, 1); \
1373 tb = t7h - tb; \
1374 t6 -= ta; \
1375 t6h = OD_DCT_RSHIFT(t6, 1); \
1376 ta += t6h; \
1377 t9 = t5 - t9; \
1378 t9h = OD_DCT_RSHIFT(t9, 1); \
1379 t5 -= t9h; \
1380 t0 -= t7h; \
1381 t7 += t0; \
1382 tf += t8h; \
1383 t8 -= tf; \
1384 te -= t6h; \
1385 t6 += te; \
1386 t1 += t9h; \
1387 t9 -= t1; \
1388 tb -= tch; \
1389 tc += tb; \
1390 t4 += t3h; \
1391 t3 -= t4; \
1392 ta -= tdh; \
1393 td += ta; \
1394 t5 = t2h - t5; \
1395 t2 -= t5; \
1396 /* TODO: Can we move these into another operation */ \
1397 t8 = -t8; \
1398 t9 = -t9; \
1399 ta = -ta; \
1400 tb = -tb; \
1401 tc = -tc; \
1402 td = -td; \
1403 tf = -tf; \
1404 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1405 OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 163); \
1406 t0 -= (tf*7799 + 4096) >> 13; \
1407 /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
1408 OD_DCT_OVERFLOW_CHECK(t0, 4091, 2048, 164); \
1409 tf += (t0*4091 + 2048) >> 12; \
1410 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1411 OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 165); \
1412 t0 -= (tf*7799 + 4096) >> 13; \
1413 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1414 OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 166); \
1415 t1 += (te*2417 + 16384) >> 15; \
1416 /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
1417 OD_DCT_OVERFLOW_CHECK(t1, 601, 2048, 167); \
1418 te -= (t1*601 + 2048) >> 12; \
1419 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1420 OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 168); \
1421 t1 += (te*2417 + 16384) >> 15; \
1422 /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1423 OD_DCT_OVERFLOW_CHECK(t8, 14525, 16384, 169); \
1424 t7 -= (t8*14525 + 16384) >> 15; \
1425 /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
1426 OD_DCT_OVERFLOW_CHECK(t7, 3035, 2048, 170); \
1427 t8 += (t7*3035 + 2048) >> 12; \
1428 /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1429 OD_DCT_OVERFLOW_CHECK(t8, 7263, 8192, 171); \
1430 t7 -= (t8*7263 + 8192) >> 14; \
1431 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1432 OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 172); \
1433 t2 -= (td*6393 + 4096) >> 13; \
1434 /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
1435 OD_DCT_OVERFLOW_CHECK(t2, 3973, 2048, 173); \
1436 td += (t2*3973 + 2048) >> 12; \
1437 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1438 OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 174); \
1439 t2 -= (td*6393 + 4096) >> 13; \
1440 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1441 OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 175); \
1442 t5 -= (ta*9281 + 8192) >> 14; \
1443 /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
1444 OD_DCT_OVERFLOW_CHECK(t5, 7027, 4096, 176); \
1445 ta += (t5*7027 + 4096) >> 13; \
1446 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1447 OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 177); \
1448 t5 -= (ta*9281 + 8192) >> 14; \
1449 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1450 OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 178); \
1451 t3 -= (tc*11539 + 8192) >> 14; \
1452 /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
1453 OD_DCT_OVERFLOW_CHECK(t3, 7713, 4096, 179); \
1454 tc += (t3*7713 + 4096) >> 13; \
1455 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1456 OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 180); \
1457 t3 -= (tc*11539 + 8192) >> 14; \
1458 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1459 OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 181); \
1460 t4 -= (tb*10375 + 8192) >> 14; \
1461 /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
1462 OD_DCT_OVERFLOW_CHECK(t4, 7405, 4096, 182); \
1463 tb += (t4*7405 + 4096) >> 13; \
1464 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1465 OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 183); \
1466 t4 -= (tb*10375 + 8192) >> 14; \
1467 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1468 OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 184); \
1469 t6 -= (t9*8247 + 8192) >> 14; \
1470 /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
1471 OD_DCT_OVERFLOW_CHECK(t6, 1645, 1024, 185); \
1472 t9 += (t6*1645 + 1024) >> 11; \
1473 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1474 OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 186); \
1475 t6 -= (t9*8247 + 8192) >> 14; \
1476 } \
1477 while (0)
1478
1479#define OD_IDST_16_ASYM(t0, t0h, t8, t4, tc, t2, t2h, ta, t6, te, teh, \
1480 t1, t9, t5, td, t3, tb, t7, tf) \
1481 /* Embedded 16-point asymmetric Type-IV iDST. */ \
1482 do { \
1483 int t1h_; \
1484 int t3h_; \
1485 int t4h; \
1486 int t6h; \
1487 int t9h_; \
1488 int tbh_; \
1489 int tch; \
1490 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1491 t6 += (t9*8247 + 8192) >> 14; \
1492 /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
1493 t9 -= (t6*1645 + 1024) >> 11; \
1494 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1495 t6 += (t9*8247 + 8192) >> 14; \
1496 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1497 t2 += (td*10375 + 8192) >> 14; \
1498 /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
1499 td -= (t2*7405 + 4096) >> 13; \
1500 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1501 t2 += (td*10375 + 8192) >> 14; \
1502 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1503 tc += (t3*11539 + 8192) >> 14; \
1504 /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
1505 t3 -= (tc*7713 + 4096) >> 13; \
1506 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1507 tc += (t3*11539 + 8192) >> 14; \
1508 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1509 ta += (t5*9281 + 8192) >> 14; \
1510 /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
1511 t5 -= (ta*7027 + 4096) >> 13; \
1512 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1513 ta += (t5*9281 + 8192) >> 14; \
1514 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1515 t4 += (tb*6393 + 4096) >> 13; \
1516 /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
1517 tb -= (t4*3973 + 2048) >> 12; \
1518 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1519 t4 += (tb*6393 + 4096) >> 13; \
1520 /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1521 te += (t1*7263 + 8192) >> 14; \
1522 /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
1523 t1 -= (te*3035 + 2048) >> 12; \
1524 /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1525 te += (t1*14525 + 16384) >> 15; \
1526 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1527 t8 -= (t7*2417 + 16384) >> 15; \
1528 /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
1529 t7 += (t8*601 + 2048) >> 12; \
1530 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1531 t8 -= (t7*2417 + 16384) >> 15; \
1532 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1533 t0 += (tf*7799 + 4096) >> 13; \
1534 /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
1535 tf -= (t0*4091 + 2048) >> 12; \
1536 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1537 t0 += (tf*7799 + 4096) >> 13; \
1538 /* TODO: Can we move these into another operation */ \
1539 t1 = -t1; \
1540 t3 = -t3; \
1541 t5 = -t5; \
1542 t9 = -t9; \
1543 tb = -tb; \
1544 td = -td; \
1545 tf = -tf; \
1546 t4 += ta; \
1547 t4h = OD_DCT_RSHIFT(t4, 1); \
1548 ta = t4h - ta; \
1549 tb -= t5; \
1550 tbh_ = OD_DCT_RSHIFT(tb, 1); \
1551 t5 += tbh_; \
1552 tc += t2; \
1553 tch = OD_DCT_RSHIFT(tc, 1); \
1554 t2 -= tch; \
1555 t3 -= td; \
1556 t3h_ = OD_DCT_RSHIFT(t3, 1); \
1557 td += t3h_; \
1558 t9 += t8; \
1559 t9h_ = OD_DCT_RSHIFT(t9, 1); \
1560 t8 -= t9h_; \
1561 t6 -= t7; \
1562 t6h = OD_DCT_RSHIFT(t6, 1); \
1563 t7 += t6h; \
1564 t1 += tf; \
1565 t1h_ = OD_DCT_RSHIFT(t1, 1); \
1566 tf -= t1h_; \
1567 te -= t0; \
1568 teh = OD_DCT_RSHIFT(te, 1); \
1569 t0 += teh; \
1570 ta += t9h_; \
1571 t9 = ta - t9; \
1572 t5 -= t6h; \
1573 t6 += t5; \
1574 td = teh - td; \
1575 te = td - te; \
1576 t2 = t1h_ - t2; \
1577 t1 -= t2; \
1578 t7 += t4h; \
1579 t4 -= t7; \
1580 t8 -= tbh_; \
1581 tb += t8; \
1582 t0 += tch; \
1583 tc -= t0; \
1584 tf -= t3h_; \
1585 t3 += tf; \
1586 /* TODO: Can we move this into another operation */ \
1587 ta = -ta; \
1588 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1589 td += (t2*6723 + 4096) >> 13; \
1590 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1591 t2 -= (td*16069 + 8192) >> 14; \
1592 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1593 td += (t2*6723 + 4096) >> 13; \
1594 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1595 t5 -= (ta*2485 + 4096) >> 13; \
1596 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1597 ta += (t5*18205 + 16384) >> 15; \
1598 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1599 t5 -= (ta*2485 + 4096) >> 13; \
1600 t2 += t5; \
1601 t2h = OD_DCT_RSHIFT(t2, 1); \
1602 t5 -= t2h; \
1603 ta = td - ta; \
1604 td -= OD_DCT_RSHIFT(ta, 1); \
1605 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1606 ta -= (t5*13573 + 8192) >> 14; \
1607 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1608 t5 += (ta*11585 + 16384) >> 15; \
1609 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1610 ta -= (t5*13573 + 8192) >> 14; \
1611 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1612 t9 -= (t6*17515 + 16384) >> 15; \
1613 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1614 t6 += (t9*13623 + 8192) >> 14; \
1615 /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1616 t9 -= (t6*17515 + 16384) >> 15; \
1617 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1618 t1 -= (te*6723 + 4096) >> 13; \
1619 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1620 te += (t1*16069 + 8192) >> 14; \
1621 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1622 t1 -= (te*6723 + 4096) >> 13; \
1623 te += t6; \
1624 teh = OD_DCT_RSHIFT(te, 1); \
1625 t6 = teh - t6; \
1626 t9 += t1; \
1627 t1 -= OD_DCT_RSHIFT(t9, 1); \
1628 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1629 t9 -= (t6*19195 + 16384) >> 15; \
1630 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1631 t6 -= (t9*11585 + 8192) >> 14; \
1632 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1633 t9 += (t6*7489 + 4096) >> 13; \
1634 tb = tc - tb; \
1635 tc = OD_DCT_RSHIFT(tb, 1) - tc; \
1636 t3 += t4; \
1637 t4 = OD_DCT_RSHIFT(t3, 1) - t4; \
1638 /* TODO: Can we move this into another operation */ \
1639 t3 = -t3; \
1640 t8 += tf; \
1641 tf = OD_DCT_RSHIFT(t8, 1) - tf; \
1642 t0 += t7; \
1643 t0h = OD_DCT_RSHIFT(t0, 1); \
1644 t7 = t0h - t7; \
1645 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1646 t3 += (tc*4161 + 8192) >> 14; \
1647 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1648 tc -= (t3*15137 + 8192) >> 14; \
1649 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1650 t3 += (tc*14341 + 8192) >> 14; \
1651 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1652 t4 -= (tb*14341 + 8192) >> 14; \
1653 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1654 tb += (t4*15137 + 8192) >> 14; \
1655 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1656 t4 -= (tb*4161 + 8192) >> 14; \
1657 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1658 t8 += (t7*13573 + 8192) >> 14; \
1659 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1660 t7 -= (t8*11585 + 16384) >> 15; \
1661 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1662 t8 += (t7*13573 + 8192) >> 14; \
1663 /* TODO: Can we move these into another operation */ \
1664 t1 = -t1; \
1665 t5 = -t5; \
1666 t9 = -t9; \
1667 tb = -tb; \
1668 td = -td; \
1669 } \
1670 while (0)
1671
1672#define OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
1673 te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1674 /* Embedded 32-point orthonormal Type-II fDCT. */ \
1675 do { \
1676 int tgh; \
1677 int thh; \
1678 int tih; \
1679 int tkh; \
1680 int tmh; \
1681 int tnh; \
1682 int toh; \
1683 int tqh; \
1684 int tsh; \
1685 int tuh; \
1686 int tvh; \
1687 tv = t0 - tv; \
1688 tvh = OD_DCT_RSHIFT(tv, 1); \
1689 t0 -= tvh; \
1690 tu += t1; \
1691 tuh = OD_DCT_RSHIFT(tu, 1); \
1692 t1 = tuh - t1; \
1693 tt = t2 - tt; \
1694 t2 -= OD_DCT_RSHIFT(tt, 1); \
1695 ts += t3; \
1696 tsh = OD_DCT_RSHIFT(ts, 1); \
1697 t3 = tsh - t3; \
1698 tr = t4 - tr; \
1699 t4 -= OD_DCT_RSHIFT(tr, 1); \
1700 tq += t5; \
1701 tqh = OD_DCT_RSHIFT(tq, 1); \
1702 t5 = tqh - t5; \
1703 tp = t6 - tp; \
1704 t6 -= OD_DCT_RSHIFT(tp, 1); \
1705 to += t7; \
1706 toh = OD_DCT_RSHIFT(to, 1); \
1707 t7 = toh - t7; \
1708 tn = t8 - tn; \
1709 tnh = OD_DCT_RSHIFT(tn, 1); \
1710 t8 -= tnh; \
1711 tm += t9; \
1712 tmh = OD_DCT_RSHIFT(tm, 1); \
1713 t9 = tmh - t9; \
1714 tl = ta - tl; \
1715 ta -= OD_DCT_RSHIFT(tl, 1); \
1716 tk += tb; \
1717 tkh = OD_DCT_RSHIFT(tk, 1); \
1718 tb = tkh - tb; \
1719 tj = tc - tj; \
1720 tc -= OD_DCT_RSHIFT(tj, 1); \
1721 ti += td; \
1722 tih = OD_DCT_RSHIFT(ti, 1); \
1723 td = tih - td; \
1724 th = te - th; \
1725 thh = OD_DCT_RSHIFT(th, 1); \
1726 te -= thh; \
1727 tg += tf; \
1728 tgh = OD_DCT_RSHIFT(tg, 1); \
1729 tf = tgh - tf; \
1730 OD_FDCT_16_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
1731 t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh); \
1732 OD_FDST_16_ASYM(tv, tvh, tf, tn, tnh, t7, tr, tb, tj, t3, \
1733 tt, td, tl, t5, tp, t9, th, thh, t1); \
1734 } \
1735 while (0)
1736
1737#define OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
1738 te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1739 /* Embedded 32-point orthonormal Type-II iDCT. */ \
1740 do { \
1741 int t1h; \
1742 int t3h; \
1743 int t5h; \
1744 int t7h; \
1745 int t9h; \
1746 int tbh; \
1747 int tdh; \
1748 int tfh; \
1749 int thh; \
1750 int tth; \
1751 int tvh; \
1752 OD_IDST_16_ASYM(tv, tvh, tn, tr, tj, tt, tth, tl, tp, th, thh, \
1753 tu, tm, tq, ti, ts, tk, to, tg); \
1754 OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
1755 t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh); \
1756 tu = t1h - tu; \
1757 t1 -= tu; \
1758 te += thh; \
1759 th = te - th; \
1760 tm = t9h - tm; \
1761 t9 -= tm; \
1762 t6 += OD_DCT_RSHIFT(tp, 1); \
1763 tp = t6 - tp; \
1764 tq = t5h - tq; \
1765 t5 -= tq; \
1766 ta += OD_DCT_RSHIFT(tl, 1); \
1767 tl = ta - tl; \
1768 ti = tdh - ti; \
1769 td -= ti; \
1770 t2 += tth; \
1771 tt = t2 - tt; \
1772 ts = t3h - ts; \
1773 t3 -= ts; \
1774 tc += OD_DCT_RSHIFT(tj, 1); \
1775 tj = tc - tj; \
1776 tk = tbh - tk; \
1777 tb -= tk; \
1778 t4 += OD_DCT_RSHIFT(tr, 1); \
1779 tr = t4 - tr; \
1780 to = t7h - to; \
1781 t7 -= to; \
1782 t8 += OD_DCT_RSHIFT(tn, 1); \
1783 tn = t8 - tn; \
1784 tg = tfh - tg; \
1785 tf -= tg; \
1786 t0 += tvh; \
1787 tv = t0 - tv; \
1788 } \
1789 while (0)
1790
Monty Montgomerya4e245a2017-07-22 00:48:31 -04001791#if CONFIG_TX64X64
1792#define OD_FDCT_32_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
1793 t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh, t1, th, thh, \
1794 t9, tp, tph, t5, tl, tlh, td, tt, tth, t3, tj, tjh, tb, tr, trh, \
1795 t7, tn, tnh, tf, tv, tvh) \
1796 /* Embedded 32-point asymmetric Type-II fDCT. */ \
1797 do { \
1798 t0 += tvh; \
1799 tv = t0 - tv; \
1800 t1 = tuh - t1; \
1801 tu -= t1; \
1802 t2 += tth; \
1803 tt = t2 - tt; \
1804 t3 = tsh - t3; \
1805 ts -= t3; \
1806 t4 += trh; \
1807 tr = t4 - tr; \
1808 t5 = tqh - t5; \
1809 tq -= t5; \
1810 t6 += tph; \
1811 tp = t6 - tp; \
1812 t7 = toh - t7; \
1813 to -= t7; \
1814 t8 += tnh; \
1815 tn = t8 - tn; \
1816 t9 = tmh - t9; \
1817 tm -= t9; \
1818 ta += tlh; \
1819 tl = ta - tl; \
1820 tb = tkh - tb; \
1821 tk -= tb; \
1822 tc += tjh; \
1823 tj = tc - tj; \
1824 td = tih - td; \
1825 ti -= td; \
1826 te += thh; \
1827 th = te - th; \
1828 tf = tgh - tf; \
1829 tg -= tf; \
1830 OD_FDCT_16(t0, tg, t8, to, t4, tk, tc, ts, \
1831 t2, ti, ta, tq, t6, tm, te, tu); \
1832 OD_FDST_16(tv, tf, tn, t7, tr, tb, tj, t3, \
1833 tt, td, tl, t5, tp, t9, th, t1); \
1834 } \
1835 while (0)
1836
1837#define OD_IDCT_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, \
1838 t6, tm, te, tu, t1, t1h, th, thh, t9, t9h, tp, tph, t5, t5h, tl, tlh, \
1839 td, tdh, tt, tth, t3, t3h, tj, tjh, tb, tbh, tr, trh, t7, t7h, tn, tnh, \
1840 tf, tfh, tv, tvh) \
1841 /* Embedded 32-point asymmetric Type-II iDCT. */ \
1842 do { \
1843 OD_IDST_16(tv, tn, tr, tj, tt, tl, tp, th, \
1844 tu, tm, tq, ti, ts, tk, to, tg); \
1845 OD_IDCT_16(t0, t8, t4, tc, t2, ta, t6, te, \
1846 t1, t9, t5, td, t3, tb, t7, tf); \
1847 tv = t0 - tv; \
1848 tvh = OD_DCT_RSHIFT(tv, 1); \
1849 t0 -= tvh; \
1850 t1 += tu; \
1851 t1h = OD_DCT_RSHIFT(t1, 1); \
1852 tu = t1h - tu; \
1853 tt = t2 - tt; \
1854 tth = OD_DCT_RSHIFT(tt, 1); \
1855 t2 -= tth; \
1856 t3 += ts; \
1857 t3h = OD_DCT_RSHIFT(t3, 1); \
1858 ts = t3h - ts; \
1859 tr = t4 - tr; \
1860 trh = OD_DCT_RSHIFT(tr, 1); \
1861 t4 -= trh; \
1862 t5 += tq; \
1863 t5h = OD_DCT_RSHIFT(t5, 1); \
1864 tq = t5h - tq; \
1865 tp = t6 - tp; \
1866 tph = OD_DCT_RSHIFT(tp, 1); \
1867 t6 -= tph; \
1868 t7 += to; \
1869 t7h = OD_DCT_RSHIFT(t7, 1); \
1870 to = t7h - to; \
1871 tn = t8 - tn; \
1872 tnh = OD_DCT_RSHIFT(tn, 1); \
1873 t8 -= tnh; \
1874 t9 += tm; \
1875 t9h = OD_DCT_RSHIFT(t9, 1); \
1876 tm = t9h - tm; \
1877 tl = ta - tl; \
1878 tlh = OD_DCT_RSHIFT(tl, 1); \
1879 ta -= tlh; \
1880 tb += tk; \
1881 tbh = OD_DCT_RSHIFT(tb, 1); \
1882 tk = tbh - tk; \
1883 tj = tc - tj; \
1884 tjh = OD_DCT_RSHIFT(tj, 1); \
1885 tc -= tjh; \
1886 td += ti; \
1887 tdh = OD_DCT_RSHIFT(td, 1); \
1888 ti = tdh - ti; \
1889 th = te - th; \
1890 thh = OD_DCT_RSHIFT(th, 1); \
1891 te -= thh; \
1892 tf += tg; \
1893 tfh = OD_DCT_RSHIFT(tf, 1); \
1894 tg = tfh - tg; \
1895 } \
1896 while (0)
1897
1898#define OD_FDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
1899 tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1900 /* Embedded 32-point asymmetric Type-IV fDST. */ \
1901 do { \
1902 int t0h; \
1903 int t1h; \
1904 int t4h; \
1905 int t5h; \
1906 int tqh; \
1907 int trh; \
1908 int tuh; \
1909 int tvh; \
1910 \
1911 tu = -tu; \
1912 \
1913 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1914 OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 271); \
1915 t5 -= (tq*13573 + 8192) >> 14; \
1916 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1917 OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 272); \
1918 tq += (t5*11585 + 16384) >> 15; \
1919 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1920 OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 273); \
1921 t5 -= (tq*13573 + 8192) >> 14; \
1922 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1923 OD_DCT_OVERFLOW_CHECK(t6, 29957, 16384, 274); \
1924 tp += (t6*29957 + 16384) >> 15; \
1925 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1926 OD_DCT_OVERFLOW_CHECK(tp, 11585, 8192, 275); \
1927 t6 -= (tp*11585 + 8192) >> 14; \
1928 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1929 OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 276); \
1930 tp -= (t6*19195 + 16384) >> 15; \
1931 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1932 OD_DCT_OVERFLOW_CHECK(t1, 29957, 16384, 277); \
1933 tu += (t1*29957 + 16384) >> 15; \
1934 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1935 OD_DCT_OVERFLOW_CHECK(tu, 11585, 8192, 278); \
1936 t1 -= (tu*11585 + 8192) >> 14; \
1937 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1938 OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 279); \
1939 tu -= (t1*19195 + 16384) >> 15; \
1940 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1941 OD_DCT_OVERFLOW_CHECK(t2, 28681, 16384, 280); \
1942 tt += (t2*28681 + 16384) >> 15; \
1943 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1944 OD_DCT_OVERFLOW_CHECK(tt, 15137, 8192, 281); \
1945 t2 -= (tt*15137 + 8192) >> 14; \
1946 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1947 OD_DCT_OVERFLOW_CHECK(t2, 4161, 8192, 282); \
1948 tt += (t2*4161 + 8192) >> 14; \
1949 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1950 OD_DCT_OVERFLOW_CHECK(ts, 4161, 8192, 283); \
1951 t3 += (ts*4161 + 8192) >> 14; \
1952 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1953 OD_DCT_OVERFLOW_CHECK(t3, 15137, 8192, 284); \
1954 ts -= (t3*15137 + 8192) >> 14; \
1955 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1956 OD_DCT_OVERFLOW_CHECK(ts, 14341, 8192, 285); \
1957 t3 += (ts*14341 + 8192) >> 14; \
1958 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1959 OD_DCT_OVERFLOW_CHECK(tm, 19195, 16384, 286); \
1960 t9 -= (tm*19195 + 16384) >> 15; \
1961 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1962 OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 287); \
1963 tm -= (t9*11585 + 8192) >> 14; \
1964 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1965 OD_DCT_OVERFLOW_CHECK(tm, 7489, 4096, 288); \
1966 t9 += (tm*7489 + 4096) >> 13; \
1967 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
1968 OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 289); \
1969 ta += (tl*3259 + 4096) >> 13; \
1970 /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
1971 OD_DCT_OVERFLOW_CHECK(ta, 3135, 8192, 290); \
1972 tl -= (ta*3135 + 8192) >> 14; \
1973 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
1974 OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 291); \
1975 ta += (tl*3259 + 4096) >> 13; \
1976 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1977 OD_DCT_OVERFLOW_CHECK(tk, 4161, 8192, 292); \
1978 tb += (tk*4161 + 8192) >> 14; \
1979 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1980 OD_DCT_OVERFLOW_CHECK(tb, 15137, 8192, 293); \
1981 tk -= (tb*15137 + 8192) >> 14; \
1982 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1983 OD_DCT_OVERFLOW_CHECK(tk, 14341, 8192, 294); \
1984 tb += (tk*14341 + 8192) >> 14; \
1985 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1986 OD_DCT_OVERFLOW_CHECK(te, 29957, 16384, 295); \
1987 th += (te*29957 + 16384) >> 15; \
1988 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1989 OD_DCT_OVERFLOW_CHECK(th, 11585, 8192, 296); \
1990 te -= (th*11585 + 8192) >> 14; \
1991 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1992 OD_DCT_OVERFLOW_CHECK(te, 19195, 16384, 297); \
1993 th -= (te*19195 + 16384) >> 15; \
1994 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1995 OD_DCT_OVERFLOW_CHECK(tc, 28681, 16384, 298); \
1996 tj += (tc*28681 + 16384) >> 15; \
1997 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1998 OD_DCT_OVERFLOW_CHECK(tj, 15137, 8192, 299); \
1999 tc -= (tj*15137 + 8192) >> 14; \
2000 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2001 OD_DCT_OVERFLOW_CHECK(tc, 4161, 8192, 300); \
2002 tj += (tc*4161 + 8192) >> 14; \
2003 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2004 OD_DCT_OVERFLOW_CHECK(ti, 4161, 8192, 301); \
2005 td += (ti*4161 + 8192) >> 14; \
2006 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2007 OD_DCT_OVERFLOW_CHECK(td, 15137, 8192, 302); \
2008 ti -= (td*15137 + 8192) >> 14; \
2009 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2010 OD_DCT_OVERFLOW_CHECK(ti, 14341, 8192, 303); \
2011 td += (ti*14341 + 8192) >> 14; \
2012 \
2013 t1 = -t1; \
2014 t2 = -t2; \
2015 t3 = -t3; \
2016 td = -td; \
2017 tg = -tg; \
2018 to = -to; \
2019 ts = -ts; \
2020 \
2021 tr -= OD_DCT_RSHIFT(t5, 1); \
2022 t5 += tr; \
2023 tq -= OD_DCT_RSHIFT(t4, 1); /* pass */ \
2024 t4 += tq; \
2025 t6 -= OD_DCT_RSHIFT(t7, 1); \
2026 t7 += t6; \
2027 to -= OD_DCT_RSHIFT(tp, 1); /* pass */ \
2028 tp += to; \
2029 t1 += OD_DCT_RSHIFT(t0, 1); /* pass */ \
2030 t0 -= t1; \
2031 tv -= OD_DCT_RSHIFT(tu, 1); \
2032 tu += tv; \
2033 t3 -= OD_DCT_RSHIFT(tt, 1); \
2034 tt += t3; \
2035 t2 += OD_DCT_RSHIFT(ts, 1); \
2036 ts -= t2; \
2037 t9 -= OD_DCT_RSHIFT(t8, 1); /* pass */ \
2038 t8 += t9; \
2039 tn += OD_DCT_RSHIFT(tm, 1); \
2040 tm -= tn; \
2041 tb += OD_DCT_RSHIFT(ta, 1); \
2042 ta -= tb; \
2043 tl -= OD_DCT_RSHIFT(tk, 1); \
2044 tk += tl; \
2045 te -= OD_DCT_RSHIFT(tf, 1); /* pass */ \
2046 tf += te; \
2047 tg -= OD_DCT_RSHIFT(th, 1); \
2048 th += tg; \
2049 tc -= OD_DCT_RSHIFT(ti, 1); \
2050 ti += tc; \
2051 td += OD_DCT_RSHIFT(tj, 1); \
2052 tj -= td; \
2053 \
2054 t4 = -t4; \
2055 \
2056 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2057 OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 304); \
2058 t4 += (tr*6723 + 4096) >> 13; \
2059 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
2060 OD_DCT_OVERFLOW_CHECK(t4, 16069, 8192, 305); \
2061 tr -= (t4*16069 + 8192) >> 14; \
2062 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2063 OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 306); \
2064 t4 += (tr*6723 + 4096) >> 13; \
2065 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2066 OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 307); \
2067 t5 += (tq*17515 + 16384) >> 15; \
2068 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
2069 OD_DCT_OVERFLOW_CHECK(t5, 13623, 8192, 308); \
2070 tq -= (t5*13623 + 8192) >> 14; \
2071 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2072 OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 309); \
2073 t5 += (tq*17515 + 16384) >> 15; \
2074 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2075 OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 310); \
2076 t7 += (to*3227 + 16384) >> 15; \
2077 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
2078 OD_DCT_OVERFLOW_CHECK(t7, 6393, 16384, 311); \
2079 to -= (t7*6393 + 16384) >> 15; \
2080 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2081 OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 312); \
2082 t7 += (to*3227 + 16384) >> 15; \
2083 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2084 OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 313); \
2085 t6 += (tp*2485 + 4096) >> 13; \
2086 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
2087 OD_DCT_OVERFLOW_CHECK(t6, 18205, 16384, 314); \
2088 tp -= (t6*18205 + 16384) >> 15; \
2089 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2090 OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 315); \
2091 t6 += (tp*2485 + 4096) >> 13; \
2092 \
2093 t5 = -t5; \
2094 \
2095 tr += to; \
2096 trh = OD_DCT_RSHIFT(tr, 1); \
2097 to -= trh; \
2098 t4 += t7; \
2099 t4h = OD_DCT_RSHIFT(t4, 1); \
2100 t7 -= t4h; \
2101 t5 += tp; \
2102 t5h = OD_DCT_RSHIFT(t5, 1); \
2103 tp -= t5h; \
2104 tq += t6; \
2105 tqh = OD_DCT_RSHIFT(tq, 1); \
2106 t6 -= tqh; \
2107 t0 -= t3; \
2108 t0h = OD_DCT_RSHIFT(t0, 1); \
2109 t3 += t0h; \
2110 tv -= ts; \
2111 tvh = OD_DCT_RSHIFT(tv, 1); \
2112 ts += tvh; \
2113 tu += tt; \
2114 tuh = OD_DCT_RSHIFT(tu, 1); \
2115 tt -= tuh; \
2116 t1 -= t2; \
2117 t1h = OD_DCT_RSHIFT(t1, 1); \
2118 t2 += t1h; \
2119 t8 += tb; \
2120 tb -= OD_DCT_RSHIFT(t8, 1); \
2121 tn += tk; \
2122 tk -= OD_DCT_RSHIFT(tn, 1); \
2123 t9 += tl; \
2124 tl -= OD_DCT_RSHIFT(t9, 1); \
2125 tm -= ta; \
2126 ta += OD_DCT_RSHIFT(tm, 1); \
2127 tc -= tf; \
2128 tf += OD_DCT_RSHIFT(tc, 1); \
2129 tj += tg; \
2130 tg -= OD_DCT_RSHIFT(tj, 1); \
2131 td -= te; \
2132 te += OD_DCT_RSHIFT(td, 1); \
2133 ti += th; \
2134 th -= OD_DCT_RSHIFT(ti, 1); \
2135 \
2136 t9 = -t9; \
2137 tl = -tl; \
2138 \
2139 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2140 OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 316); \
2141 t8 += (tn*805 + 8192) >> 14; \
2142 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2143 OD_DCT_OVERFLOW_CHECK(t8, 803, 4096, 317); \
2144 tn -= (t8*803 + 4096) >> 13; \
2145 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2146 OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 318); \
2147 t8 += (tn*805 + 8192) >> 14; \
2148 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2149 OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 319); \
2150 tk += (tb*11725 + 16384) >> 15; \
2151 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2152 OD_DCT_OVERFLOW_CHECK(tk, 5197, 4096, 320); \
2153 tb -= (tk*5197 + 4096) >> 13; \
2154 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2155 OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 321); \
2156 tk += (tb*11725 + 16384) >> 15; \
2157 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2158 OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 322); \
2159 ta += (tl*2455 + 2048) >> 12; \
2160 /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
2161 OD_DCT_OVERFLOW_CHECK(ta, 14449, 8192, 323); \
2162 tl -= (ta*14449 + 8192) >> 14; \
2163 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2164 OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 324); \
2165 ta += (tl*2455 + 2048) >> 12; \
2166 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2167 OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 325); \
2168 t9 += (tm*4861 + 16384) >> 15; \
2169 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2170 OD_DCT_OVERFLOW_CHECK(t9, 1189, 2048, 326); \
2171 tm -= (t9*1189 + 2048) >> 12; \
2172 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2173 OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 327); \
2174 t9 += (tm*4861 + 16384) >> 15; \
2175 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2176 OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 328); \
2177 tf += (tg*805 + 8192) >> 14; \
2178 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2179 OD_DCT_OVERFLOW_CHECK(tf, 803, 4096, 329); \
2180 tg -= (tf*803 + 4096) >> 13; \
2181 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2182 OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 330); \
2183 tf += (tg*805 + 8192) >> 14; \
2184 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2185 OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 331); \
2186 tc += (tj*2931 + 4096) >> 13; \
2187 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2188 OD_DCT_OVERFLOW_CHECK(tc, 5197, 4096, 332); \
2189 tj -= (tc*5197 + 4096) >> 13; \
2190 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2191 OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 333); \
2192 tc += (tj*2931 + 4096) >> 13; \
2193 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2194 OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 334); \
2195 td += (ti*513 + 1024) >> 11; \
2196 /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
2197 OD_DCT_OVERFLOW_CHECK(td, 7723, 8192, 335); \
2198 ti -= (td*7723 + 8192) >> 14; \
2199 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2200 OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 336); \
2201 td += (ti*513 + 1024) >> 11; \
2202 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2203 OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 337); \
2204 te += (th*4861 + 16384) >> 15; \
2205 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2206 OD_DCT_OVERFLOW_CHECK(te, 1189, 2048, 338); \
2207 th -= (te*1189 + 2048) >> 12; \
2208 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2209 OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 339); \
2210 te += (th*4861 + 16384) >> 15; \
2211 \
2212 ta = -ta; \
2213 tb = -tb; \
2214 \
2215 tt += t5h; \
2216 t5 -= tt; \
2217 t2 -= tqh; \
2218 tq += t2; \
2219 tp += t1h; \
2220 t1 -= tp; \
2221 t6 -= tuh; \
2222 tu += t6; \
2223 t7 += tvh; \
2224 tv -= t7; \
2225 to += t0h; \
2226 t0 -= to; \
2227 t3 -= t4h; \
2228 t4 += t3; \
2229 ts += trh; \
2230 tr -= ts; \
2231 tf -= OD_DCT_RSHIFT(tn, 1); \
2232 tn += tf; \
2233 tg -= OD_DCT_RSHIFT(t8, 1); \
2234 t8 += tg; \
2235 tk += OD_DCT_RSHIFT(tc, 1); \
2236 tc -= tk; \
2237 tb += OD_DCT_RSHIFT(tj, 1); \
2238 tj -= tb; \
2239 ta += OD_DCT_RSHIFT(ti, 1); \
2240 ti -= ta; \
2241 tl += OD_DCT_RSHIFT(td, 1); \
2242 td -= tl; \
2243 te -= OD_DCT_RSHIFT(tm, 1); \
2244 tm += te; \
2245 th -= OD_DCT_RSHIFT(t9, 1); \
2246 t9 += th; \
2247 ta -= t5; \
2248 t5 += OD_DCT_RSHIFT(ta, 1); \
2249 tq -= tl; \
2250 tl += OD_DCT_RSHIFT(tq, 1); \
2251 t2 -= ti; \
2252 ti += OD_DCT_RSHIFT(t2, 1); \
2253 td -= tt; \
2254 tt += OD_DCT_RSHIFT(td, 1); \
2255 tm += tp; \
2256 tp -= OD_DCT_RSHIFT(tm, 1); \
2257 t6 += t9; \
2258 t9 -= OD_DCT_RSHIFT(t6, 1); \
2259 te -= tu; \
2260 tu += OD_DCT_RSHIFT(te, 1); \
2261 t1 -= th; \
2262 th += OD_DCT_RSHIFT(t1, 1); \
2263 t0 -= tg; \
2264 tg += OD_DCT_RSHIFT(t0, 1); \
2265 tf += tv; \
2266 tv -= OD_DCT_RSHIFT(tf, 1); \
2267 t8 -= t7; \
2268 t7 += OD_DCT_RSHIFT(t8, 1); \
2269 to -= tn; \
2270 tn += OD_DCT_RSHIFT(to, 1); \
2271 t4 -= tk; \
2272 tk += OD_DCT_RSHIFT(t4, 1); \
2273 tb -= tr; \
2274 tr += OD_DCT_RSHIFT(tb, 1); \
2275 t3 -= tj; \
2276 tj += OD_DCT_RSHIFT(t3, 1); \
2277 tc -= ts; \
2278 ts += OD_DCT_RSHIFT(tc, 1); \
2279 \
2280 tr = -tr; \
2281 ts = -ts; \
2282 tt = -tt; \
2283 tu = -tu; \
2284 \
2285 /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
2286 OD_DCT_OVERFLOW_CHECK(t0, 2847, 2048, 340); \
2287 tv += (t0*2847 + 2048) >> 12; \
2288 /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
2289 OD_DCT_OVERFLOW_CHECK(tv, 5791, 2048, 341); \
2290 t0 -= (tv*5791 + 2048) >> 12; \
2291 /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
2292 OD_DCT_OVERFLOW_CHECK(t0, 5593, 4096, 342); \
2293 tv += (t0*5593 + 4096) >> 13; \
2294 /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
2295 OD_DCT_OVERFLOW_CHECK(tf, 4099, 4096, 343); \
2296 tg -= (tf*4099 + 4096) >> 13; \
2297 /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
2298 OD_DCT_OVERFLOW_CHECK(tg, 1997, 1024, 344); \
2299 tf += (tg*1997 + 1024) >> 11; \
2300 /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
2301 OD_DCT_OVERFLOW_CHECK(tf, 815, 16384, 345); \
2302 tg += (tf*815 + 16384) >> 15; \
2303 /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
2304 OD_DCT_OVERFLOW_CHECK(t8, 2527, 2048, 346); \
2305 tn -= (t8*2527 + 2048) >> 12; \
2306 /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
2307 OD_DCT_OVERFLOW_CHECK(tn, 4695, 4096, 347); \
2308 t8 += (tn*4695 + 4096) >> 13; \
2309 /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
2310 OD_DCT_OVERFLOW_CHECK(t8, 4187, 4096, 348); \
2311 tn += (t8*4187 + 4096) >> 13; \
2312 /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
2313 OD_DCT_OVERFLOW_CHECK(to, 5477, 4096, 349); \
2314 t7 += (to*5477 + 4096) >> 13; \
2315 /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
2316 OD_DCT_OVERFLOW_CHECK(t7, 4169, 4096, 350); \
2317 to -= (t7*4169 + 4096) >> 13; \
2318 /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
2319 OD_DCT_OVERFLOW_CHECK(to, 2571, 2048, 351); \
2320 t7 -= (to*2571 + 2048) >> 12; \
2321 /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
2322 OD_DCT_OVERFLOW_CHECK(t2, 5331, 4096, 352); \
2323 tt += (t2*5331 + 4096) >> 13; \
2324 /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
2325 OD_DCT_OVERFLOW_CHECK(tt, 5749, 2048, 353); \
2326 t2 -= (tt*5749 + 2048) >> 12; \
2327 /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
2328 OD_DCT_OVERFLOW_CHECK(t2, 2413, 2048, 354); \
2329 tt += (t2*2413 + 2048) >> 12; \
2330 /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
2331 OD_DCT_OVERFLOW_CHECK(td, 4167, 4096, 355); \
2332 ti -= (td*4167 + 4096) >> 13; \
2333 /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
2334 OD_DCT_OVERFLOW_CHECK(ti, 891, 512, 356); \
2335 td += (ti*891 + 512) >> 10; \
2336 /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
2337 OD_DCT_OVERFLOW_CHECK(td, 4327, 16384, 357); \
2338 ti += (td*4327 + 16384) >> 15; \
2339 /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
2340 OD_DCT_OVERFLOW_CHECK(ta, 2261, 2048, 358); \
2341 tl -= (ta*2261 + 2048) >> 12; \
2342 /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
2343 OD_DCT_OVERFLOW_CHECK(tl, 2855, 2048, 359); \
2344 ta += (tl*2855 + 2048) >> 12; \
2345 /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
2346 OD_DCT_OVERFLOW_CHECK(ta, 5417, 8192, 360); \
2347 tl += (ta*5417 + 8192) >> 14; \
2348 /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
2349 OD_DCT_OVERFLOW_CHECK(tq, 3459, 2048, 361); \
2350 t5 += (tq*3459 + 2048) >> 12; \
2351 /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
2352 OD_DCT_OVERFLOW_CHECK(t5, 1545, 2048, 362); \
2353 tq -= (t5*1545 + 2048) >> 12; \
2354 /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
2355 OD_DCT_OVERFLOW_CHECK(tq, 1971, 1024, 363); \
2356 t5 -= (tq*1971 + 1024) >> 11; \
2357 /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
2358 OD_DCT_OVERFLOW_CHECK(t3, 323, 256, 364); \
2359 ts += (t3*323 + 256) >> 9; \
2360 /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
2361 OD_DCT_OVERFLOW_CHECK(ts, 5707, 2048, 365); \
2362 t3 -= (ts*5707 + 2048) >> 12; \
2363 /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
2364 OD_DCT_OVERFLOW_CHECK(t3, 2229, 2048, 366); \
2365 ts += (t3*2229 + 2048) >> 12; \
2366 /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
2367 OD_DCT_OVERFLOW_CHECK(tc, 1061, 1024, 367); \
2368 tj -= (tc*1061 + 1024) >> 11; \
2369 /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
2370 OD_DCT_OVERFLOW_CHECK(tj, 6671, 4096, 368); \
2371 tc += (tj*6671 + 4096) >> 13; \
2372 /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
2373 OD_DCT_OVERFLOW_CHECK(tc, 6287, 16384, 369); \
2374 tj += (tc*6287 + 16384) >> 15; \
2375 /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
2376 OD_DCT_OVERFLOW_CHECK(tb, 4359, 4096, 370); \
2377 tk -= (tb*4359 + 4096) >> 13; \
2378 /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
2379 OD_DCT_OVERFLOW_CHECK(tk, 3099, 2048, 371); \
2380 tb += (tk*3099 + 2048) >> 12; \
2381 /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
2382 OD_DCT_OVERFLOW_CHECK(tb, 2109, 4096, 372); \
2383 tk += (tb*2109 + 4096) >> 13; \
2384 /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
2385 OD_DCT_OVERFLOW_CHECK(t4, 5017, 4096, 373); \
2386 tr += (t4*5017 + 4096) >> 13; \
2387 /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
2388 OD_DCT_OVERFLOW_CHECK(tr, 1413, 512, 374); \
2389 t4 -= (tr*1413 + 512) >> 10; \
2390 /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
2391 OD_DCT_OVERFLOW_CHECK(t4, 8195, 8192, 375); \
2392 tr += (t4*8195 + 8192) >> 14; \
2393 /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
2394 OD_DCT_OVERFLOW_CHECK(tm, 2373, 2048, 376); \
2395 t9 += (tm*2373 + 2048) >> 12; \
2396 /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
2397 OD_DCT_OVERFLOW_CHECK(t9, 5209, 4096, 377); \
2398 tm -= (t9*5209 + 4096) >> 13; \
2399 /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
2400 OD_DCT_OVERFLOW_CHECK(tm, 3391, 4096, 378); \
2401 t9 -= (tm*3391 + 4096) >> 13; \
2402 /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
2403 OD_DCT_OVERFLOW_CHECK(t6, 1517, 1024, 379); \
2404 tp -= (t6*1517 + 1024) >> 11; \
2405 /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
2406 OD_DCT_OVERFLOW_CHECK(tp, 1817, 2048, 380); \
2407 t6 += (tp*1817 + 2048) >> 12; \
2408 /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
2409 OD_DCT_OVERFLOW_CHECK(t6, 6331, 4096, 381); \
2410 tp += (t6*6331 + 4096) >> 13; \
2411 /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
2412 OD_DCT_OVERFLOW_CHECK(te, 515, 512, 382); \
2413 th -= (te*515 + 512) >> 10; \
2414 /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
2415 OD_DCT_OVERFLOW_CHECK(th, 7567, 4096, 383); \
2416 te += (th*7567 + 4096) >> 13; \
2417 /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
2418 OD_DCT_OVERFLOW_CHECK(te, 2513, 16384, 384); \
2419 th += (te*2513 + 16384) >> 15; \
2420 /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
2421 OD_DCT_OVERFLOW_CHECK(t1, 2753, 2048, 385); \
2422 tu += (t1*2753 + 2048) >> 12; \
2423 /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
2424 OD_DCT_OVERFLOW_CHECK(tu, 5777, 2048, 386); \
2425 t1 -= (tu*5777 + 2048) >> 12; \
2426 /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
2427 OD_DCT_OVERFLOW_CHECK(t1, 1301, 1024, 387); \
2428 tu += (t1*1301 + 1024) >> 11; \
2429 } \
2430 while (0)
2431
2432#define OD_IDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
2433 tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
2434 /* Embedded 32-point asymmetric Type-IV iDST. */ \
2435 do { \
2436 int t0h; \
2437 int t4h; \
2438 int tbh; \
2439 int tfh; \
2440 int tgh; \
2441 int tkh; \
2442 int trh; \
2443 int tvh; \
2444 /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
2445 tf -= (tg*1301 + 1024) >> 11; \
2446 /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
2447 tg += (tf*5777 + 2048) >> 12; \
2448 /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
2449 tf -= (tg*2753 + 2048) >> 12; \
2450 /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
2451 th -= (te*2513 + 16384) >> 15; \
2452 /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
2453 te -= (th*7567 + 4096) >> 13; \
2454 /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
2455 th += (te*515 + 512) >> 10; \
2456 /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
2457 tj -= (tc*6331 + 4096) >> 13; \
2458 /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
2459 tc -= (tj*1817 + 2048) >> 12; \
2460 /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
2461 tj += (tc*1517 + 1024) >> 11; \
2462 /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
2463 ti += (td*3391 + 4096) >> 13; \
2464 /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
2465 td += (ti*5209 + 4096) >> 13; \
2466 /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
2467 ti -= (td*2373 + 2048) >> 12; \
2468 /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
2469 tr -= (t4*8195 + 8192) >> 14; \
2470 /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
2471 t4 += (tr*1413 + 512) >> 10; \
2472 /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
2473 tr -= (t4*5017 + 4096) >> 13; \
2474 /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
2475 t5 -= (tq*2109 + 4096) >> 13; \
2476 /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
2477 tq -= (t5*3099 + 2048) >> 12; \
2478 /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
2479 t5 += (tq*4359 + 4096) >> 13; \
2480 /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
2481 tp -= (t6*6287 + 16384) >> 15; \
2482 /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
2483 t6 -= (tp*6671 + 4096) >> 13; \
2484 /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
2485 tp += (t6*1061 + 1024) >> 11; \
2486 /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
2487 t7 -= (to*2229 + 2048) >> 12; \
2488 /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
2489 to += (t7*5707 + 2048) >> 12; \
2490 /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
2491 t7 -= (to*323 + 256) >> 9; \
2492 /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
2493 tk += (tb*1971 + 1024) >> 11; \
2494 /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
2495 tb += (tk*1545 + 2048) >> 12; \
2496 /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
2497 tk -= (tb*3459 + 2048) >> 12; \
2498 /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
2499 tl -= (ta*5417 + 8192) >> 14; \
2500 /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
2501 ta -= (tl*2855 + 2048) >> 12; \
2502 /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
2503 tl += (ta*2261 + 2048) >> 12; \
2504 /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
2505 t9 -= (tm*4327 + 16384) >> 15; \
2506 /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
2507 tm -= (t9*891 + 512) >> 10; \
2508 /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
2509 t9 += (tm*4167 + 4096) >> 13; \
2510 /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
2511 tn -= (t8*2413 + 2048) >> 12; \
2512 /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
2513 t8 += (tn*5749 + 2048) >> 12; \
2514 /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
2515 tn -= (t8*5331 + 4096) >> 13; \
2516 /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
2517 ts += (t3*2571 + 2048) >> 12; \
2518 /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
2519 t3 += (ts*4169 + 4096) >> 13; \
2520 /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
2521 ts -= (t3*5477 + 4096) >> 13; \
2522 /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
2523 tt -= (t2*4187 + 4096) >> 13; \
2524 /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
2525 t2 -= (tt*4695 + 4096) >> 13; \
2526 /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
2527 tt += (t2*2527 + 2048) >> 12; \
2528 /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
2529 t1 -= (tu*815 + 16384) >> 15; \
2530 /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
2531 tu -= (t1*1997 + 1024) >> 11; \
2532 /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
2533 t1 += (tu*4099 + 4096) >> 13; \
2534 /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
2535 tv -= (t0*5593 + 4096) >> 13; \
2536 /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
2537 t0 += (tv*5791 + 2048) >> 12; \
2538 /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
2539 tv -= (t0*2847 + 2048) >> 12; \
2540 \
2541 t7 = -t7; \
2542 tf = -tf; \
2543 tn = -tn; \
2544 tr = -tr; \
2545 \
2546 t7 -= OD_DCT_RSHIFT(t6, 1); \
2547 t6 += t7; \
2548 tp -= OD_DCT_RSHIFT(to, 1); \
2549 to += tp; \
2550 tr -= OD_DCT_RSHIFT(tq, 1); \
2551 tq += tr; \
2552 t5 -= OD_DCT_RSHIFT(t4, 1); \
2553 t4 += t5; \
2554 tt -= OD_DCT_RSHIFT(t3, 1); \
2555 t3 += tt; \
2556 ts -= OD_DCT_RSHIFT(t2, 1); \
2557 t2 += ts; \
2558 tv += OD_DCT_RSHIFT(tu, 1); \
2559 tu -= tv; \
2560 t1 -= OD_DCT_RSHIFT(t0, 1); \
2561 t0 += t1; \
2562 th -= OD_DCT_RSHIFT(tg, 1); \
2563 tg += th; \
2564 tf -= OD_DCT_RSHIFT(te, 1); \
2565 te += tf; \
2566 ti += OD_DCT_RSHIFT(tc, 1); \
2567 tc -= ti; \
2568 tj += OD_DCT_RSHIFT(td, 1); \
2569 td -= tj; \
2570 tn -= OD_DCT_RSHIFT(tm, 1); \
2571 tm += tn; \
2572 t9 -= OD_DCT_RSHIFT(t8, 1); \
2573 t8 += t9; \
2574 tl -= OD_DCT_RSHIFT(tb, 1); \
2575 tb += tl; \
2576 tk -= OD_DCT_RSHIFT(ta, 1); \
2577 ta += tk; \
2578 \
2579 ti -= th; \
2580 th += OD_DCT_RSHIFT(ti, 1); \
2581 td -= te; \
2582 te += OD_DCT_RSHIFT(td, 1); \
2583 tm += tl; \
2584 tl -= OD_DCT_RSHIFT(tm, 1); \
2585 t9 += ta; \
2586 ta -= OD_DCT_RSHIFT(t9, 1); \
2587 tp += tq; \
2588 tq -= OD_DCT_RSHIFT(tp, 1); \
2589 t6 += t5; \
2590 t5 -= OD_DCT_RSHIFT(t6, 1); \
2591 t2 -= t1; \
2592 t1 += OD_DCT_RSHIFT(t2, 1); \
2593 tt -= tu; \
2594 tu += OD_DCT_RSHIFT(tt, 1); \
2595 tr += t7; \
2596 trh = OD_DCT_RSHIFT(tr, 1); \
2597 t7 -= trh; \
2598 t4 -= to; \
2599 t4h = OD_DCT_RSHIFT(t4, 1); \
2600 to += t4h; \
2601 t0 += t3; \
2602 t0h = OD_DCT_RSHIFT(t0, 1); \
2603 t3 -= t0h; \
2604 tv += ts; \
2605 tvh = OD_DCT_RSHIFT(tv, 1); \
2606 ts -= tvh; \
2607 tf -= tc; \
2608 tfh = OD_DCT_RSHIFT(tf, 1); \
2609 tc += tfh; \
2610 tg += tj; \
2611 tgh = OD_DCT_RSHIFT(tg, 1); \
2612 tj -= tgh; \
2613 tb -= t8; \
2614 tbh = OD_DCT_RSHIFT(tb, 1); \
2615 t8 += tbh; \
2616 tk += tn; \
2617 tkh = OD_DCT_RSHIFT(tk, 1); \
2618 tn -= tkh; \
2619 \
2620 ta = -ta; \
2621 tq = -tq; \
2622 \
2623 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2624 te -= (th*4861 + 16384) >> 15; \
2625 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2626 th += (te*1189 + 2048) >> 12; \
2627 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2628 te -= (th*4861 + 16384) >> 15; \
2629 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2630 tm -= (t9*513 + 1024) >> 11; \
2631 /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
2632 t9 += (tm*7723 + 8192) >> 14; \
2633 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2634 tm -= (t9*513 + 1024) >> 11; \
2635 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2636 t6 -= (tp*2931 + 4096) >> 13; \
2637 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2638 tp += (t6*5197 + 4096) >> 13; \
2639 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2640 t6 -= (tp*2931 + 4096) >> 13; \
2641 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2642 tu -= (t1*805 + 8192) >> 14; \
2643 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2644 t1 += (tu*803 + 4096) >> 13; \
2645 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2646 tu -= (t1*805 + 8192) >> 14; \
2647 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2648 ti -= (td*4861 + 16384) >> 15; \
2649 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2650 td += (ti*1189 + 2048) >> 12; \
2651 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2652 ti -= (td*4861 + 16384) >> 15; \
2653 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2654 ta -= (tl*2455 + 2048) >> 12; \
2655 /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
2656 tl += (ta*14449 + 8192) >> 14; \
2657 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2658 ta -= (tl*2455 + 2048) >> 12; \
2659 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2660 t5 -= (tq*11725 + 16384) >> 15; \
2661 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2662 tq += (t5*5197 + 4096) >> 13; \
2663 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2664 t5 -= (tq*11725 + 16384) >> 15; \
2665 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2666 t2 -= (tt*805 + 8192) >> 14; \
2667 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2668 tt += (t2*803 + 4096) >> 13; \
2669 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2670 t2 -= (tt*805 + 8192) >> 14; \
2671 \
2672 tl = -tl; \
2673 ti = -ti; \
2674 \
2675 th += OD_DCT_RSHIFT(t9, 1); \
2676 t9 -= th; \
2677 te -= OD_DCT_RSHIFT(tm, 1); \
2678 tm += te; \
2679 t1 += OD_DCT_RSHIFT(tp, 1); \
2680 tp -= t1; \
2681 tu -= OD_DCT_RSHIFT(t6, 1); \
2682 t6 += tu; \
2683 ta -= OD_DCT_RSHIFT(td, 1); \
2684 td += ta; \
2685 tl += OD_DCT_RSHIFT(ti, 1); \
2686 ti -= tl; \
2687 t5 += OD_DCT_RSHIFT(tt, 1); \
2688 tt -= t5; \
2689 tq += OD_DCT_RSHIFT(t2, 1); \
2690 t2 -= tq; \
2691 \
2692 t8 -= tgh; \
2693 tg += t8; \
2694 tn += tfh; \
2695 tf -= tn; \
2696 t7 -= tvh; \
2697 tv += t7; \
2698 to -= t0h; \
2699 t0 += to; \
2700 tc += tbh; \
2701 tb -= tc; \
2702 tj += tkh; \
2703 tk -= tj; \
2704 ts += t4h; \
2705 t4 -= ts; \
2706 t3 += trh; \
2707 tr -= t3; \
2708 \
2709 tk = -tk; \
2710 \
2711 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2712 tc -= (tj*2485 + 4096) >> 13; \
2713 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
2714 tj += (tc*18205 + 16384) >> 15; \
2715 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2716 tc -= (tj*2485 + 4096) >> 13; \
2717 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2718 ts -= (t3*3227 + 16384) >> 15; \
2719 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
2720 t3 += (ts*6393 + 16384) >> 15; \
2721 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2722 ts -= (t3*3227 + 16384) >> 15; \
2723 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2724 tk -= (tb*17515 + 16384) >> 15; \
2725 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
2726 tb += (tk*13623 + 8192) >> 14; \
2727 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2728 tk -= (tb*17515 + 16384) >> 15; \
2729 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2730 t4 -= (tr*6723 + 4096) >> 13; \
2731 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
2732 tr += (t4*16069 + 8192) >> 14; \
2733 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2734 t4 -= (tr*6723 + 4096) >> 13; \
2735 \
2736 t4 = -t4; \
2737 \
2738 tp += tm; \
2739 tm -= OD_DCT_RSHIFT(tp, 1); \
2740 t9 -= t6; \
2741 t6 += OD_DCT_RSHIFT(t9, 1); \
2742 th -= t1; \
2743 t1 += OD_DCT_RSHIFT(th, 1); \
2744 tu -= te; \
2745 te += OD_DCT_RSHIFT(tu, 1); /* pass */ \
2746 t5 -= tl; \
2747 tl += OD_DCT_RSHIFT(t5, 1); \
2748 ta += tq; \
2749 tq -= OD_DCT_RSHIFT(ta, 1); \
2750 td += tt; \
2751 tt -= OD_DCT_RSHIFT(td, 1); \
2752 t2 -= ti; \
2753 ti += OD_DCT_RSHIFT(t2, 1); /* pass */ \
2754 t7 += t8; \
2755 t8 -= OD_DCT_RSHIFT(t7, 1); \
2756 tn -= to; \
2757 to += OD_DCT_RSHIFT(tn, 1); \
2758 tf -= tv; \
2759 tv += OD_DCT_RSHIFT(tf, 1); \
2760 t0 += tg; \
2761 tg -= OD_DCT_RSHIFT(t0, 1); /* pass */ \
2762 tj -= t3; \
2763 t3 += OD_DCT_RSHIFT(tj, 1); /* pass */ \
2764 ts -= tc; \
2765 tc += OD_DCT_RSHIFT(ts, 1); \
2766 t4 -= tb; \
2767 tb += OD_DCT_RSHIFT(t4, 1); /* pass */ \
2768 tk -= tr; \
2769 tr += OD_DCT_RSHIFT(tk, 1); \
2770 \
2771 t1 = -t1; \
2772 t3 = -t3; \
2773 t7 = -t7; \
2774 t8 = -t8; \
2775 tg = -tg; \
2776 tm = -tm; \
2777 to = -to; \
2778 \
2779 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2780 tm -= (t9*14341 + 8192) >> 14; \
2781 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2782 t9 += (tm*15137 + 8192) >> 14; \
2783 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2784 tm -= (t9*4161 + 8192) >> 14; \
2785 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2786 tp -= (t6*4161 + 8192) >> 14; \
2787 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2788 t6 += (tp*15137 + 8192) >> 14; \
2789 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2790 tp -= (t6*28681 + 16384) >> 15; \
2791 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2792 th += (te*19195 + 16384) >> 15; \
2793 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2794 te += (th*11585 + 8192) >> 14; \
2795 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2796 th -= (te*29957 + 16384) >> 15; \
2797 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2798 tq -= (t5*14341 + 8192) >> 14; \
2799 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2800 t5 += (tq*15137 + 8192) >> 14; \
2801 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2802 tq -= (t5*4161 + 8192) >> 14; \
2803 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
2804 ta -= (tl*3259 + 4096) >> 13; \
2805 /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
2806 tl += (ta*3135 + 8192) >> 14; \
2807 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
2808 ta -= (tl*3259 + 4096) >> 13; \
2809 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2810 ti -= (td*7489 + 4096) >> 13; \
2811 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2812 td += (ti*11585 + 8192) >> 14; \
2813 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2814 ti += (td*19195 + 16384) >> 15; \
2815 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2816 to -= (t7*14341 + 8192) >> 14; \
2817 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2818 t7 += (to*15137 + 8192) >> 14; \
2819 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2820 to -= (t7*4161 + 8192) >> 14; \
2821 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2822 tn -= (t8*4161 + 8192) >> 14; \
2823 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2824 t8 += (tn*15137 + 8192) >> 14; \
2825 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2826 tn -= (t8*28681 + 16384) >> 15; \
2827 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2828 tf += (tg*19195 + 16384) >> 15; \
2829 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2830 tg += (tf*11585 + 8192) >> 14; \
2831 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2832 tf -= (tg*29957 + 16384) >> 15; \
2833 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2834 tj += (tc*19195 + 16384) >> 15; \
2835 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2836 tc += (tj*11585 + 8192) >> 14; \
2837 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2838 tj -= (tc*29957 + 16384) >> 15; \
2839 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
2840 tk += (tb*13573 + 8192) >> 14; \
2841 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
2842 tb -= (tk*11585 + 16384) >> 15; \
2843 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
2844 tk += (tb*13573 + 8192) >> 14; \
2845 \
2846 tf = -tf; \
2847 \
2848 } \
2849 while (0)
2850
2851#define OD_FDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
2852 us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
2853 ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
2854 ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
2855 /* Embedded 64-point orthonormal Type-II fDCT. */ \
2856 do { \
2857 int uwh; \
2858 int uxh; \
2859 int uyh; \
2860 int uzh; \
2861 int uAh; \
2862 int uBh; \
2863 int uCh; \
2864 int uDh; \
2865 int uEh; \
2866 int uFh; \
2867 int uGh; \
2868 int uHh; \
2869 int uIh; \
2870 int uJh; \
2871 int uKh; \
2872 int uLh; \
2873 int uMh; \
2874 int uNh; \
2875 int uOh; \
2876 int uPh; \
2877 int uQh; \
2878 int uRh; \
2879 int uSh; \
2880 int uTh; \
2881 int uUh; \
2882 int uVh; \
2883 int uWh; \
2884 int uXh; \
2885 int uYh; \
2886 int uZh; \
2887 int u_h; \
2888 int uh_; \
2889 u = u0 - u; \
2890 uh_ = OD_DCT_RSHIFT(u, 1); \
2891 u0 -= uh_; \
2892 u_ += u1; \
2893 u_h = OD_DCT_RSHIFT(u_, 1); \
2894 u1 = u_h - u1; \
2895 uZ = u2 - uZ; \
2896 uZh = OD_DCT_RSHIFT(uZ, 1); \
2897 u2 -= uZh; \
2898 uY += u3; \
2899 uYh = OD_DCT_RSHIFT(uY, 1); \
2900 u3 = uYh - u3; \
2901 uX = u4 - uX; \
2902 uXh = OD_DCT_RSHIFT(uX, 1); \
2903 u4 -= uXh; \
2904 uW += u5; \
2905 uWh = OD_DCT_RSHIFT(uW, 1); \
2906 u5 = uWh - u5; \
2907 uV = u6 - uV; \
2908 uVh = OD_DCT_RSHIFT(uV, 1); \
2909 u6 -= uVh; \
2910 uU += u7; \
2911 uUh = OD_DCT_RSHIFT(uU, 1); \
2912 u7 = uUh - u7; \
2913 uT = u8 - uT; \
2914 uTh = OD_DCT_RSHIFT(uT, 1); \
2915 u8 -= uTh; \
2916 uS += u9; \
2917 uSh = OD_DCT_RSHIFT(uS, 1); \
2918 u9 = uSh - u9; \
2919 uR = ua - uR; \
2920 uRh = OD_DCT_RSHIFT(uR, 1); \
2921 ua -= uRh; \
2922 uQ += ub; \
2923 uQh = OD_DCT_RSHIFT(uQ, 1); \
2924 ub = uQh - ub; \
2925 uP = uc - uP; \
2926 uPh = OD_DCT_RSHIFT(uP, 1); \
2927 uc -= uPh; \
2928 uO += ud; \
2929 uOh = OD_DCT_RSHIFT(uO, 1); \
2930 ud = uOh - ud; \
2931 uN = ue - uN; \
2932 uNh = OD_DCT_RSHIFT(uN, 1); \
2933 ue -= uNh; \
2934 uM += uf; \
2935 uMh = OD_DCT_RSHIFT(uM, 1); \
2936 uf = uMh - uf; \
2937 uL = ug - uL; \
2938 uLh = OD_DCT_RSHIFT(uL, 1); \
2939 ug -= uLh; \
2940 uK += uh; \
2941 uKh = OD_DCT_RSHIFT(uK, 1); \
2942 uh = uKh - uh; \
2943 uJ = ui - uJ; \
2944 uJh = OD_DCT_RSHIFT(uJ, 1); \
2945 ui -= uJh; \
2946 uI += uj; \
2947 uIh = OD_DCT_RSHIFT(uI, 1); \
2948 uj = uIh - uj; \
2949 uH = uk - uH; \
2950 uHh = OD_DCT_RSHIFT(uH, 1); \
2951 uk -= uHh; \
2952 uG += ul; \
2953 uGh = OD_DCT_RSHIFT(uG, 1); \
2954 ul = uGh - ul; \
2955 uF = um - uF; \
2956 uFh = OD_DCT_RSHIFT(uF, 1); \
2957 um -= uFh; \
2958 uE += un; \
2959 uEh = OD_DCT_RSHIFT(uE, 1); \
2960 un = uEh - un; \
2961 uD = uo - uD; \
2962 uDh = OD_DCT_RSHIFT(uD, 1); \
2963 uo -= uDh; \
2964 uC += up; \
2965 uCh = OD_DCT_RSHIFT(uC, 1); \
2966 up = uCh - up; \
2967 uB = uq - uB; \
2968 uBh = OD_DCT_RSHIFT(uB, 1); \
2969 uq -= uBh; \
2970 uA += ur; \
2971 uAh = OD_DCT_RSHIFT(uA, 1); \
2972 ur = uAh - ur; \
2973 uz = us - uz; \
2974 uzh = OD_DCT_RSHIFT(uz, 1); \
2975 us -= uzh; \
2976 uy += ut; \
2977 uyh = OD_DCT_RSHIFT(uy, 1); \
2978 ut = uyh - ut; \
2979 ux = uu - ux; \
2980 uxh = OD_DCT_RSHIFT(ux, 1); \
2981 uu -= uxh; \
2982 uw += uv; \
2983 uwh = OD_DCT_RSHIFT(uw, 1); \
2984 uv = uwh - uv; \
2985 OD_FDCT_32_ASYM(u0, uw, uwh, ug, uM, uMh, u8, uE, uEh, uo, uU, uUh, \
2986 u4, uA, uAh, uk, uQ, uQh, uc, uI, uIh, us, uY, uYh, u2, uy, uyh, \
2987 ui, uO, uOh, ua, uG, uGh, uq, uW, uWh, u6, uC, uCh, um, uS, uSh, \
2988 ue, uK, uKh, uu, u_, u_h); \
2989 OD_FDST_32_ASYM(u, uv, uL, uf, uT, un, uD, u7, uX, ur, uH, ub, uP, uj, \
2990 uz, u3, uZ, ut, uJ, ud, uR, ul, uB, u5, uV, up, uF, u9, uN, uh, ux, u1); \
2991 } \
2992 while (0)
2993
2994#define OD_IDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
2995 us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
2996 ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
2997 ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
2998 /* Embedded 64-point orthonormal Type-II fDCT. */ \
2999 do { \
3000 int u1h; \
3001 int u3h; \
3002 int u5h; \
3003 int u7h; \
3004 int u9h; \
3005 int ubh; \
3006 int udh; \
3007 int ufh; \
3008 int uhh; \
3009 int ujh; \
3010 int ulh; \
3011 int unh; \
3012 int uph; \
3013 int urh; \
3014 int uth; \
3015 int uvh; \
3016 int uxh; \
3017 int uzh; \
3018 int uBh; \
3019 int uDh; \
3020 int uFh; \
3021 int uHh; \
3022 int uJh; \
3023 int uLh; \
3024 int uNh; \
3025 int uPh; \
3026 int uRh; \
3027 int uTh; \
3028 int uVh; \
3029 int uXh; \
3030 int uZh; \
3031 int uh_; \
3032 OD_IDST_32_ASYM(u, uL, uT, uD, uX, uH, uP, uz, uZ, uJ, uR, uB, uV, uF, \
3033 uN, ux, u_, uK, uS, uC, uW, uG, uO, uy, uY, uI, uQ, uA, uU, uE, uM, uw); \
3034 OD_IDCT_32_ASYM(u0, ug, u8, uo, u4, uk, uc, us, u2, ui, ua, uq, u6, um, \
3035 ue, uu, u1, u1h, uh, uhh, u9, u9h, up, uph, u5, u5h, ul, ulh, ud, udh, \
3036 ut, uth, u3, u3h, uj, ujh, ub, ubh, ur, urh, u7, u7h, un, unh, uf, ufh, \
3037 uv, uvh); \
3038 uh_ = OD_DCT_RSHIFT(u, 1); \
3039 u0 += uh_; \
3040 u = u0 - u; \
3041 u_ = u1h - u_; \
3042 u1 -= u_; \
3043 uZh = OD_DCT_RSHIFT(uZ, 1); \
3044 u2 += uZh; \
3045 uZ = u2 - uZ; \
3046 uY = u3h - uY; \
3047 u3 -= uY; \
3048 uXh = OD_DCT_RSHIFT(uX, 1); \
3049 u4 += uXh; \
3050 uX = u4 - uX; \
3051 uW = u5h - uW; \
3052 u5 -= uW; \
3053 uVh = OD_DCT_RSHIFT(uV, 1); \
3054 u6 += uVh; \
3055 uV = u6 - uV; \
3056 uU = u7h - uU; \
3057 u7 -= uU; \
3058 uTh = OD_DCT_RSHIFT(uT, 1); \
3059 u8 += uTh; \
3060 uT = u8 - uT; \
3061 uS = u9h - uS; \
3062 u9 -= uS; \
3063 uRh = OD_DCT_RSHIFT(uR, 1); \
3064 ua += uRh; \
3065 uR = ua - uR; \
3066 uQ = ubh - uQ; \
3067 ub -= uQ; \
3068 uPh = OD_DCT_RSHIFT(uP, 1); \
3069 uc += uPh; \
3070 uP = uc - uP; \
3071 uO = udh - uO; \
3072 ud -= uO; \
3073 uNh = OD_DCT_RSHIFT(uN, 1); \
3074 ue += uNh; \
3075 uN = ue - uN; \
3076 uM = ufh - uM; \
3077 uf -= uM; \
3078 uLh = OD_DCT_RSHIFT(uL, 1); \
3079 ug += uLh; \
3080 uL = ug - uL; \
3081 uK = uhh - uK; \
3082 uh -= uK; \
3083 uJh = OD_DCT_RSHIFT(uJ, 1); \
3084 ui += uJh; \
3085 uJ = ui - uJ; \
3086 uI = ujh - uI; \
3087 uj -= uI; \
3088 uHh = OD_DCT_RSHIFT(uH, 1); \
3089 uk += uHh; \
3090 uH = uk - uH; \
3091 uG = ulh - uG; \
3092 ul -= uG; \
3093 uFh = OD_DCT_RSHIFT(uF, 1); \
3094 um += uFh; \
3095 uF = um - uF; \
3096 uE = unh - uE; \
3097 un -= uE; \
3098 uDh = OD_DCT_RSHIFT(uD, 1); \
3099 uo += uDh; \
3100 uD = uo - uD; \
3101 uC = uph - uC; \
3102 up -= uC; \
3103 uBh = OD_DCT_RSHIFT(uB, 1); \
3104 uq += uBh; \
3105 uB = uq - uB; \
3106 uA = urh - uA; \
3107 ur -= uA; \
3108 uzh = OD_DCT_RSHIFT(uz, 1); \
3109 us += uzh; \
3110 uz = us - uz; \
3111 uy = uth - uy; \
3112 ut -= uy; \
3113 uxh = OD_DCT_RSHIFT(ux, 1); \
3114 uu += uxh; \
3115 ux = uu - ux; \
3116 uw = uvh - uw; \
3117 uv -= uw; \
3118 } while (0)
3119#endif
3120
Monty Montgomery02078a32017-07-11 21:22:29 -04003121void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) {
3122 int q0;
3123 int q1;
3124 int q2;
3125 int q3;
3126 q0 = x[0*xstride];
3127 q2 = x[1*xstride];
3128 q1 = x[2*xstride];
3129 q3 = x[3*xstride];
3130 OD_FDCT_4(q0, q2, q1, q3);
3131 y[0] = (od_coeff)q0;
3132 y[1] = (od_coeff)q1;
3133 y[2] = (od_coeff)q2;
3134 y[3] = (od_coeff)q3;
3135}
3136
3137void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) {
3138 int q0;
3139 int q1;
3140 int q2;
3141 int q3;
3142 q0 = y[0];
3143 q2 = y[1];
3144 q1 = y[2];
3145 q3 = y[3];
3146 OD_IDCT_4(q0, q2, q1, q3);
3147 x[0*xstride] = q0;
3148 x[1*xstride] = q1;
3149 x[2*xstride] = q2;
3150 x[3*xstride] = q3;
3151}
Monty Montgomerycf18fe42017-07-11 21:33:25 -04003152
Monty Montgomery573cf252017-08-02 05:45:14 -04003153void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride) {
3154 int q0;
3155 int q1;
3156 int q2;
3157 int q3;
Nathan Egge5a5e1ad2017-09-12 12:33:48 +00003158 q0 = x[3*xstride];
Nathan E. Egge72c99e12017-08-21 17:35:04 -04003159 q2 = x[2*xstride];
Nathan Egge5a5e1ad2017-09-12 12:33:48 +00003160 q1 = x[1*xstride];
3161 q3 = x[0*xstride];
3162 OD_FDST_4(q0, q2, q1, q3);
3163 y[0] = (od_coeff)q3;
3164 y[1] = (od_coeff)q2;
3165 y[2] = (od_coeff)q1;
3166 y[3] = (od_coeff)q0;
Monty Montgomery573cf252017-08-02 05:45:14 -04003167}
3168
3169void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]) {
3170 int q0;
3171 int q1;
3172 int q2;
3173 int q3;
Nathan Egge5a5e1ad2017-09-12 12:33:48 +00003174 q0 = y[3];
Nathan E. Egge72c99e12017-08-21 17:35:04 -04003175 q2 = y[2];
Nathan Egge5a5e1ad2017-09-12 12:33:48 +00003176 q1 = y[1];
3177 q3 = y[0];
3178 OD_IDST_4(q0, q2, q1, q3);
3179 x[0*xstride] = q3;
3180 x[1*xstride] = q2;
3181 x[2*xstride] = q1;
3182 x[3*xstride] = q0;
Monty Montgomery573cf252017-08-02 05:45:14 -04003183}
3184
Monty Montgomerycf18fe42017-07-11 21:33:25 -04003185void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride) {
3186 int r0;
3187 int r1;
3188 int r2;
3189 int r3;
3190 int r4;
3191 int r5;
3192 int r6;
3193 int r7;
3194 r0 = x[0*xstride];
3195 r4 = x[1*xstride];
3196 r2 = x[2*xstride];
3197 r6 = x[3*xstride];
3198 r1 = x[4*xstride];
3199 r5 = x[5*xstride];
3200 r3 = x[6*xstride];
3201 r7 = x[7*xstride];
3202 OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7);
3203 y[0] = (od_coeff)r0;
3204 y[1] = (od_coeff)r1;
3205 y[2] = (od_coeff)r2;
3206 y[3] = (od_coeff)r3;
3207 y[4] = (od_coeff)r4;
3208 y[5] = (od_coeff)r5;
3209 y[6] = (od_coeff)r6;
3210 y[7] = (od_coeff)r7;
3211}
3212
3213void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]) {
3214 int r0;
3215 int r1;
3216 int r2;
3217 int r3;
3218 int r4;
3219 int r5;
3220 int r6;
3221 int r7;
3222 r0 = y[0];
3223 r4 = y[1];
3224 r2 = y[2];
3225 r6 = y[3];
3226 r1 = y[4];
3227 r5 = y[5];
3228 r3 = y[6];
3229 r7 = y[7];
3230 OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7);
3231 x[0*xstride] = (od_coeff)r0;
3232 x[1*xstride] = (od_coeff)r1;
3233 x[2*xstride] = (od_coeff)r2;
3234 x[3*xstride] = (od_coeff)r3;
3235 x[4*xstride] = (od_coeff)r4;
3236 x[5*xstride] = (od_coeff)r5;
3237 x[6*xstride] = (od_coeff)r6;
3238 x[7*xstride] = (od_coeff)r7;
3239}
3240
3241void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride) {
3242 int r0;
3243 int r1;
3244 int r2;
3245 int r3;
3246 int r4;
3247 int r5;
3248 int r6;
3249 int r7;
3250 r0 = x[0*xstride];
3251 r4 = x[1*xstride];
3252 r2 = x[2*xstride];
3253 r6 = x[3*xstride];
3254 r1 = x[4*xstride];
3255 r5 = x[5*xstride];
3256 r3 = x[6*xstride];
3257 r7 = x[7*xstride];
3258 OD_FDST_8(r0, r4, r2, r6, r1, r5, r3, r7);
3259 y[0] = (od_coeff)r0;
3260 y[1] = (od_coeff)r1;
3261 y[2] = (od_coeff)r2;
3262 y[3] = (od_coeff)r3;
3263 y[4] = (od_coeff)r4;
3264 y[5] = (od_coeff)r5;
3265 y[6] = (od_coeff)r6;
3266 y[7] = (od_coeff)r7;
3267}
3268
3269void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]) {
3270 int r0;
3271 int r1;
3272 int r2;
3273 int r3;
3274 int r4;
3275 int r5;
3276 int r6;
3277 int r7;
3278 r0 = y[0];
3279 r4 = y[1];
3280 r2 = y[2];
3281 r6 = y[3];
3282 r1 = y[4];
3283 r5 = y[5];
3284 r3 = y[6];
3285 r7 = y[7];
3286 OD_IDST_8(r0, r4, r2, r6, r1, r5, r3, r7);
3287 x[0*xstride] = (od_coeff)r0;
3288 x[1*xstride] = (od_coeff)r1;
3289 x[2*xstride] = (od_coeff)r2;
3290 x[3*xstride] = (od_coeff)r3;
3291 x[4*xstride] = (od_coeff)r4;
3292 x[5*xstride] = (od_coeff)r5;
3293 x[6*xstride] = (od_coeff)r6;
3294 x[7*xstride] = (od_coeff)r7;
3295}
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003296
3297void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride) {
3298 int s0;
3299 int s1;
3300 int s2;
3301 int s3;
3302 int s4;
3303 int s5;
3304 int s6;
3305 int s7;
3306 int s8;
3307 int s9;
3308 int sa;
3309 int sb;
3310 int sc;
3311 int sd;
3312 int se;
3313 int sf;
3314 s0 = x[0*xstride];
3315 s8 = x[1*xstride];
3316 s4 = x[2*xstride];
3317 sc = x[3*xstride];
3318 s2 = x[4*xstride];
3319 sa = x[5*xstride];
3320 s6 = x[6*xstride];
3321 se = x[7*xstride];
3322 s1 = x[8*xstride];
3323 s9 = x[9*xstride];
3324 s5 = x[10*xstride];
3325 sd = x[11*xstride];
3326 s3 = x[12*xstride];
3327 sb = x[13*xstride];
3328 s7 = x[14*xstride];
3329 sf = x[15*xstride];
3330 OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3331 y[0] = (od_coeff)s0;
3332 y[1] = (od_coeff)s1;
3333 y[2] = (od_coeff)s2;
3334 y[3] = (od_coeff)s3;
3335 y[4] = (od_coeff)s4;
3336 y[5] = (od_coeff)s5;
3337 y[6] = (od_coeff)s6;
3338 y[7] = (od_coeff)s7;
3339 y[8] = (od_coeff)s8;
3340 y[9] = (od_coeff)s9;
3341 y[10] = (od_coeff)sa;
3342 y[11] = (od_coeff)sb;
3343 y[12] = (od_coeff)sc;
3344 y[13] = (od_coeff)sd;
3345 y[14] = (od_coeff)se;
3346 y[15] = (od_coeff)sf;
3347}
3348
3349void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]) {
3350 int s0;
3351 int s1;
3352 int s2;
3353 int s3;
3354 int s4;
3355 int s5;
3356 int s6;
3357 int s7;
3358 int s8;
3359 int s9;
3360 int sa;
3361 int sb;
3362 int sc;
3363 int sd;
3364 int se;
3365 int sf;
3366 s0 = y[0];
3367 s8 = y[1];
3368 s4 = y[2];
3369 sc = y[3];
3370 s2 = y[4];
3371 sa = y[5];
3372 s6 = y[6];
3373 se = y[7];
3374 s1 = y[8];
3375 s9 = y[9];
3376 s5 = y[10];
3377 sd = y[11];
3378 s3 = y[12];
3379 sb = y[13];
3380 s7 = y[14];
3381 sf = y[15];
3382 OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3383 x[0*xstride] = (od_coeff)s0;
3384 x[1*xstride] = (od_coeff)s1;
3385 x[2*xstride] = (od_coeff)s2;
3386 x[3*xstride] = (od_coeff)s3;
3387 x[4*xstride] = (od_coeff)s4;
3388 x[5*xstride] = (od_coeff)s5;
3389 x[6*xstride] = (od_coeff)s6;
3390 x[7*xstride] = (od_coeff)s7;
3391 x[8*xstride] = (od_coeff)s8;
3392 x[9*xstride] = (od_coeff)s9;
3393 x[10*xstride] = (od_coeff)sa;
3394 x[11*xstride] = (od_coeff)sb;
3395 x[12*xstride] = (od_coeff)sc;
3396 x[13*xstride] = (od_coeff)sd;
3397 x[14*xstride] = (od_coeff)se;
3398 x[15*xstride] = (od_coeff)sf;
3399}
3400
3401void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride) {
3402 int s0;
3403 int s1;
3404 int s2;
3405 int s3;
3406 int s4;
3407 int s5;
3408 int s6;
3409 int s7;
3410 int s8;
3411 int s9;
3412 int sa;
3413 int sb;
3414 int sc;
3415 int sd;
3416 int se;
3417 int sf;
3418 s0 = x[15*xstride];
3419 s8 = x[14*xstride];
3420 s4 = x[13*xstride];
3421 sc = x[12*xstride];
3422 s2 = x[11*xstride];
3423 sa = x[10*xstride];
3424 s6 = x[9*xstride];
3425 se = x[8*xstride];
3426 s1 = x[7*xstride];
3427 s9 = x[6*xstride];
3428 s5 = x[5*xstride];
3429 sd = x[4*xstride];
3430 s3 = x[3*xstride];
3431 sb = x[2*xstride];
3432 s7 = x[1*xstride];
3433 sf = x[0*xstride];
3434 OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3435 y[0] = (od_coeff)sf;
3436 y[1] = (od_coeff)se;
3437 y[2] = (od_coeff)sd;
3438 y[3] = (od_coeff)sc;
3439 y[4] = (od_coeff)sb;
3440 y[5] = (od_coeff)sa;
3441 y[6] = (od_coeff)s9;
3442 y[7] = (od_coeff)s8;
3443 y[8] = (od_coeff)s7;
3444 y[9] = (od_coeff)s6;
3445 y[10] = (od_coeff)s5;
3446 y[11] = (od_coeff)s4;
3447 y[12] = (od_coeff)s3;
3448 y[13] = (od_coeff)s2;
3449 y[14] = (od_coeff)s1;
3450 y[15] = (od_coeff)s0;
3451}
3452
3453void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]) {
3454 int s0;
3455 int s1;
3456 int s2;
3457 int s3;
3458 int s4;
3459 int s5;
3460 int s6;
3461 int s7;
3462 int s8;
3463 int s9;
3464 int sa;
3465 int sb;
3466 int sc;
3467 int sd;
3468 int se;
3469 int sf;
3470 s0 = y[15];
3471 s8 = y[14];
3472 s4 = y[13];
3473 sc = y[12];
3474 s2 = y[11];
3475 sa = y[10];
3476 s6 = y[9];
3477 se = y[8];
3478 s1 = y[7];
3479 s9 = y[6];
3480 s5 = y[5];
3481 sd = y[4];
3482 s3 = y[3];
3483 sb = y[2];
3484 s7 = y[1];
3485 sf = y[0];
3486 OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3487 x[0*xstride] = (od_coeff)sf;
3488 x[1*xstride] = (od_coeff)se;
3489 x[2*xstride] = (od_coeff)sd;
3490 x[3*xstride] = (od_coeff)sc;
3491 x[4*xstride] = (od_coeff)sb;
3492 x[5*xstride] = (od_coeff)sa;
3493 x[6*xstride] = (od_coeff)s9;
3494 x[7*xstride] = (od_coeff)s8;
3495 x[8*xstride] = (od_coeff)s7;
3496 x[9*xstride] = (od_coeff)s6;
3497 x[10*xstride] = (od_coeff)s5;
3498 x[11*xstride] = (od_coeff)s4;
3499 x[12*xstride] = (od_coeff)s3;
3500 x[13*xstride] = (od_coeff)s2;
3501 x[14*xstride] = (od_coeff)s1;
3502 x[15*xstride] = (od_coeff)s0;
3503}
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04003504
3505void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride) {
3506 /*215 adds, 38 shifts, 87 "muls".*/
3507 int t0;
3508 int t1;
3509 int t2;
3510 int t3;
3511 int t4;
3512 int t5;
3513 int t6;
3514 int t7;
3515 int t8;
3516 int t9;
3517 int ta;
3518 int tb;
3519 int tc;
3520 int td;
3521 int te;
3522 int tf;
3523 int tg;
3524 int th;
3525 int ti;
3526 int tj;
3527 int tk;
3528 int tl;
3529 int tm;
3530 int tn;
3531 int to;
3532 int tp;
3533 int tq;
3534 int tr;
3535 int ts;
3536 int tt;
3537 int tu;
3538 int tv;
3539 t0 = x[0*xstride];
3540 tg = x[1*xstride];
3541 t8 = x[2*xstride];
3542 to = x[3*xstride];
3543 t4 = x[4*xstride];
3544 tk = x[5*xstride];
3545 tc = x[6*xstride];
3546 ts = x[7*xstride];
3547 t2 = x[8*xstride];
3548 ti = x[9*xstride];
3549 ta = x[10*xstride];
3550 tq = x[11*xstride];
3551 t6 = x[12*xstride];
3552 tm = x[13*xstride];
3553 te = x[14*xstride];
3554 tu = x[15*xstride];
3555 t1 = x[16*xstride];
3556 th = x[17*xstride];
3557 t9 = x[18*xstride];
3558 tp = x[19*xstride];
3559 t5 = x[20*xstride];
3560 tl = x[21*xstride];
3561 td = x[22*xstride];
3562 tt = x[23*xstride];
3563 t3 = x[24*xstride];
3564 tj = x[25*xstride];
3565 tb = x[26*xstride];
3566 tr = x[27*xstride];
3567 t7 = x[28*xstride];
3568 tn = x[29*xstride];
3569 tf = x[30*xstride];
3570 tv = x[31*xstride];
3571 OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
3572 t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
3573 y[0] = (od_coeff)t0;
3574 y[1] = (od_coeff)t1;
3575 y[2] = (od_coeff)t2;
3576 y[3] = (od_coeff)t3;
3577 y[4] = (od_coeff)t4;
3578 y[5] = (od_coeff)t5;
3579 y[6] = (od_coeff)t6;
3580 y[7] = (od_coeff)t7;
3581 y[8] = (od_coeff)t8;
3582 y[9] = (od_coeff)t9;
3583 y[10] = (od_coeff)ta;
3584 y[11] = (od_coeff)tb;
3585 y[12] = (od_coeff)tc;
3586 y[13] = (od_coeff)td;
3587 y[14] = (od_coeff)te;
3588 y[15] = (od_coeff)tf;
3589 y[16] = (od_coeff)tg;
3590 y[17] = (od_coeff)th;
3591 y[18] = (od_coeff)ti;
3592 y[19] = (od_coeff)tj;
3593 y[20] = (od_coeff)tk;
3594 y[21] = (od_coeff)tl;
3595 y[22] = (od_coeff)tm;
3596 y[23] = (od_coeff)tn;
3597 y[24] = (od_coeff)to;
3598 y[25] = (od_coeff)tp;
3599 y[26] = (od_coeff)tq;
3600 y[27] = (od_coeff)tr;
3601 y[28] = (od_coeff)ts;
3602 y[29] = (od_coeff)tt;
3603 y[30] = (od_coeff)tu;
3604 y[31] = (od_coeff)tv;
3605}
3606
3607void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]) {
3608 int t0;
3609 int t1;
3610 int t2;
3611 int t3;
3612 int t4;
3613 int t5;
3614 int t6;
3615 int t7;
3616 int t8;
3617 int t9;
3618 int ta;
3619 int tb;
3620 int tc;
3621 int td;
3622 int te;
3623 int tf;
3624 int tg;
3625 int th;
3626 int ti;
3627 int tj;
3628 int tk;
3629 int tl;
3630 int tm;
3631 int tn;
3632 int to;
3633 int tp;
3634 int tq;
3635 int tr;
3636 int ts;
3637 int tt;
3638 int tu;
3639 int tv;
3640 t0 = y[0];
3641 tg = y[1];
3642 t8 = y[2];
3643 to = y[3];
3644 t4 = y[4];
3645 tk = y[5];
3646 tc = y[6];
3647 ts = y[7];
3648 t2 = y[8];
3649 ti = y[9];
3650 ta = y[10];
3651 tq = y[11];
3652 t6 = y[12];
3653 tm = y[13];
3654 te = y[14];
3655 tu = y[15];
3656 t1 = y[16];
3657 th = y[17];
3658 t9 = y[18];
3659 tp = y[19];
3660 t5 = y[20];
3661 tl = y[21];
3662 td = y[22];
3663 tt = y[23];
3664 t3 = y[24];
3665 tj = y[25];
3666 tb = y[26];
3667 tr = y[27];
3668 t7 = y[28];
3669 tn = y[29];
3670 tf = y[30];
3671 tv = y[31];
3672 OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
3673 t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
3674 x[0*xstride] = (od_coeff)t0;
3675 x[1*xstride] = (od_coeff)t1;
3676 x[2*xstride] = (od_coeff)t2;
3677 x[3*xstride] = (od_coeff)t3;
3678 x[4*xstride] = (od_coeff)t4;
3679 x[5*xstride] = (od_coeff)t5;
3680 x[6*xstride] = (od_coeff)t6;
3681 x[7*xstride] = (od_coeff)t7;
3682 x[8*xstride] = (od_coeff)t8;
3683 x[9*xstride] = (od_coeff)t9;
3684 x[10*xstride] = (od_coeff)ta;
3685 x[11*xstride] = (od_coeff)tb;
3686 x[12*xstride] = (od_coeff)tc;
3687 x[13*xstride] = (od_coeff)td;
3688 x[14*xstride] = (od_coeff)te;
3689 x[15*xstride] = (od_coeff)tf;
3690 x[16*xstride] = (od_coeff)tg;
3691 x[17*xstride] = (od_coeff)th;
3692 x[18*xstride] = (od_coeff)ti;
3693 x[19*xstride] = (od_coeff)tj;
3694 x[20*xstride] = (od_coeff)tk;
3695 x[21*xstride] = (od_coeff)tl;
3696 x[22*xstride] = (od_coeff)tm;
3697 x[23*xstride] = (od_coeff)tn;
3698 x[24*xstride] = (od_coeff)to;
3699 x[25*xstride] = (od_coeff)tp;
3700 x[26*xstride] = (od_coeff)tq;
3701 x[27*xstride] = (od_coeff)tr;
3702 x[28*xstride] = (od_coeff)ts;
3703 x[29*xstride] = (od_coeff)tt;
3704 x[30*xstride] = (od_coeff)tu;
3705 x[31*xstride] = (od_coeff)tv;
3706}
Monty Montgomerya4e245a2017-07-22 00:48:31 -04003707
3708#if CONFIG_TX64X64
3709void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride) {
3710 int t0;
3711 int t1;
3712 int t2;
3713 int t3;
3714 int t4;
3715 int t5;
3716 int t6;
3717 int t7;
3718 int t8;
3719 int t9;
3720 int ta;
3721 int tb;
3722 int tc;
3723 int td;
3724 int te;
3725 int tf;
3726 int tg;
3727 int th;
3728 int ti;
3729 int tj;
3730 int tk;
3731 int tl;
3732 int tm;
3733 int tn;
3734 int to;
3735 int tp;
3736 int tq;
3737 int tr;
3738 int ts;
3739 int tt;
3740 int tu;
3741 int tv;
3742 int tw;
3743 int tx;
3744 int ty;
3745 int tz;
3746 int tA;
3747 int tB;
3748 int tC;
3749 int tD;
3750 int tE;
3751 int tF;
3752 int tG;
3753 int tH;
3754 int tI;
3755 int tJ;
3756 int tK;
3757 int tL;
3758 int tM;
3759 int tN;
3760 int tO;
3761 int tP;
3762 int tQ;
3763 int tR;
3764 int tS;
3765 int tT;
3766 int tU;
3767 int tV;
3768 int tW;
3769 int tX;
3770 int tY;
3771 int tZ;
3772 int t_;
3773 int t;
3774 t0 = x[0*xstride];
3775 tw = x[1*xstride];
3776 tg = x[2*xstride];
3777 tM = x[3*xstride];
3778 t8 = x[4*xstride];
3779 tE = x[5*xstride];
3780 to = x[6*xstride];
3781 tU = x[7*xstride];
3782 t4 = x[8*xstride];
3783 tA = x[9*xstride];
3784 tk = x[10*xstride];
3785 tQ = x[11*xstride];
3786 tc = x[12*xstride];
3787 tI = x[13*xstride];
3788 ts = x[14*xstride];
3789 tY = x[15*xstride];
3790 t2 = x[16*xstride];
3791 ty = x[17*xstride];
3792 ti = x[18*xstride];
3793 tO = x[19*xstride];
3794 ta = x[20*xstride];
3795 tG = x[21*xstride];
3796 tq = x[22*xstride];
3797 tW = x[23*xstride];
3798 t6 = x[24*xstride];
3799 tC = x[25*xstride];
3800 tm = x[26*xstride];
3801 tS = x[27*xstride];
3802 te = x[28*xstride];
3803 tK = x[29*xstride];
3804 tu = x[30*xstride];
3805 t_ = x[31*xstride];
3806 t1 = x[32*xstride];
3807 tx = x[33*xstride];
3808 th = x[34*xstride];
3809 tN = x[35*xstride];
3810 t9 = x[36*xstride];
3811 tF = x[37*xstride];
3812 tp = x[38*xstride];
3813 tV = x[39*xstride];
3814 t5 = x[40*xstride];
3815 tB = x[41*xstride];
3816 tl = x[42*xstride];
3817 tR = x[43*xstride];
3818 td = x[44*xstride];
3819 tJ = x[45*xstride];
3820 tt = x[46*xstride];
3821 tZ = x[47*xstride];
3822 t3 = x[48*xstride];
3823 tz = x[49*xstride];
3824 tj = x[50*xstride];
3825 tP = x[51*xstride];
3826 tb = x[52*xstride];
3827 tH = x[53*xstride];
3828 tr = x[54*xstride];
3829 tX = x[55*xstride];
3830 t7 = x[56*xstride];
3831 tD = x[57*xstride];
3832 tn = x[58*xstride];
3833 tT = x[59*xstride];
3834 tf = x[60*xstride];
3835 tL = x[61*xstride];
3836 tv = x[62*xstride];
3837 t = x[63*xstride];
3838 OD_FDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
3839 t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
3840 th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
3841 tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
3842 y[0] = (od_coeff)t0;
3843 y[1] = (od_coeff)t1;
3844 y[2] = (od_coeff)t2;
3845 y[3] = (od_coeff)t3;
3846 y[4] = (od_coeff)t4;
3847 y[5] = (od_coeff)t5;
3848 y[6] = (od_coeff)t6;
3849 y[7] = (od_coeff)t7;
3850 y[8] = (od_coeff)t8;
3851 y[9] = (od_coeff)t9;
3852 y[10] = (od_coeff)ta;
3853 y[11] = (od_coeff)tb;
3854 y[12] = (od_coeff)tc;
3855 y[13] = (od_coeff)td;
3856 y[14] = (od_coeff)te;
3857 y[15] = (od_coeff)tf;
3858 y[16] = (od_coeff)tg;
3859 y[17] = (od_coeff)th;
3860 y[18] = (od_coeff)ti;
3861 y[19] = (od_coeff)tj;
3862 y[20] = (od_coeff)tk;
3863 y[21] = (od_coeff)tl;
3864 y[22] = (od_coeff)tm;
3865 y[23] = (od_coeff)tn;
3866 y[24] = (od_coeff)to;
3867 y[25] = (od_coeff)tp;
3868 y[26] = (od_coeff)tq;
3869 y[27] = (od_coeff)tr;
3870 y[28] = (od_coeff)ts;
3871 y[29] = (od_coeff)tt;
3872 y[30] = (od_coeff)tu;
3873 y[31] = (od_coeff)tv;
3874 y[32] = (od_coeff)tw;
3875 y[33] = (od_coeff)tx;
3876 y[34] = (od_coeff)ty;
3877 y[35] = (od_coeff)tz;
3878 y[36] = (od_coeff)tA;
3879 y[37] = (od_coeff)tB;
3880 y[38] = (od_coeff)tC;
3881 y[39] = (od_coeff)tD;
3882 y[40] = (od_coeff)tE;
3883 y[41] = (od_coeff)tF;
3884 y[41] = (od_coeff)tF;
3885 y[42] = (od_coeff)tG;
3886 y[43] = (od_coeff)tH;
3887 y[44] = (od_coeff)tI;
3888 y[45] = (od_coeff)tJ;
3889 y[46] = (od_coeff)tK;
3890 y[47] = (od_coeff)tL;
3891 y[48] = (od_coeff)tM;
3892 y[49] = (od_coeff)tN;
3893 y[50] = (od_coeff)tO;
3894 y[51] = (od_coeff)tP;
3895 y[52] = (od_coeff)tQ;
3896 y[53] = (od_coeff)tR;
3897 y[54] = (od_coeff)tS;
3898 y[55] = (od_coeff)tT;
3899 y[56] = (od_coeff)tU;
3900 y[57] = (od_coeff)tV;
3901 y[58] = (od_coeff)tW;
3902 y[59] = (od_coeff)tX;
3903 y[60] = (od_coeff)tY;
3904 y[61] = (od_coeff)tZ;
3905 y[62] = (od_coeff)t_;
3906 y[63] = (od_coeff)t;
3907}
3908
3909void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]) {
3910 int t0;
3911 int t1;
3912 int t2;
3913 int t3;
3914 int t4;
3915 int t5;
3916 int t6;
3917 int t7;
3918 int t8;
3919 int t9;
3920 int ta;
3921 int tb;
3922 int tc;
3923 int td;
3924 int te;
3925 int tf;
3926 int tg;
3927 int th;
3928 int ti;
3929 int tj;
3930 int tk;
3931 int tl;
3932 int tm;
3933 int tn;
3934 int to;
3935 int tp;
3936 int tq;
3937 int tr;
3938 int ts;
3939 int tt;
3940 int tu;
3941 int tv;
3942 int tw;
3943 int tx;
3944 int ty;
3945 int tz;
3946 int tA;
3947 int tB;
3948 int tC;
3949 int tD;
3950 int tE;
3951 int tF;
3952 int tG;
3953 int tH;
3954 int tI;
3955 int tJ;
3956 int tK;
3957 int tL;
3958 int tM;
3959 int tN;
3960 int tO;
3961 int tP;
3962 int tQ;
3963 int tR;
3964 int tS;
3965 int tT;
3966 int tU;
3967 int tV;
3968 int tW;
3969 int tX;
3970 int tY;
3971 int tZ;
3972 int t_;
3973 int t;
3974 t0 = y[0];
3975 tw = y[1];
3976 tg = y[2];
3977 tM = y[3];
3978 t8 = y[4];
3979 tE = y[5];
3980 to = y[6];
3981 tU = y[7];
3982 t4 = y[8];
3983 tA = y[9];
3984 tk = y[10];
3985 tQ = y[11];
3986 tc = y[12];
3987 tI = y[13];
3988 ts = y[14];
3989 tY = y[15];
3990 t2 = y[16];
3991 ty = y[17];
3992 ti = y[18];
3993 tO = y[19];
3994 ta = y[20];
3995 tG = y[21];
3996 tq = y[22];
3997 tW = y[23];
3998 t6 = y[24];
3999 tC = y[25];
4000 tm = y[26];
4001 tS = y[27];
4002 te = y[28];
4003 tK = y[29];
4004 tu = y[30];
4005 t_ = y[31];
4006 t1 = y[32];
4007 tx = y[33];
4008 th = y[34];
4009 tN = y[35];
4010 t9 = y[36];
4011 tF = y[37];
4012 tp = y[38];
4013 tV = y[39];
4014 t5 = y[40];
4015 tB = y[41];
4016 tl = y[42];
4017 tR = y[43];
4018 td = y[44];
4019 tJ = y[45];
4020 tt = y[46];
4021 tZ = y[47];
4022 t3 = y[48];
4023 tz = y[49];
4024 tj = y[50];
4025 tP = y[51];
4026 tb = y[52];
4027 tH = y[53];
4028 tr = y[54];
4029 tX = y[55];
4030 t7 = y[56];
4031 tD = y[57];
4032 tn = y[58];
4033 tT = y[59];
4034 tf = y[60];
4035 tL = y[61];
4036 tv = y[62];
4037 t = y[63];
4038 OD_IDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
4039 t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
4040 th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
4041 tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
4042 x[0*xstride] = (od_coeff)t0;
4043 x[1*xstride] = (od_coeff)t1;
4044 x[2*xstride] = (od_coeff)t2;
4045 x[3*xstride] = (od_coeff)t3;
4046 x[4*xstride] = (od_coeff)t4;
4047 x[5*xstride] = (od_coeff)t5;
4048 x[6*xstride] = (od_coeff)t6;
4049 x[7*xstride] = (od_coeff)t7;
4050 x[8*xstride] = (od_coeff)t8;
4051 x[9*xstride] = (od_coeff)t9;
4052 x[10*xstride] = (od_coeff)ta;
4053 x[11*xstride] = (od_coeff)tb;
4054 x[12*xstride] = (od_coeff)tc;
4055 x[13*xstride] = (od_coeff)td;
4056 x[14*xstride] = (od_coeff)te;
4057 x[15*xstride] = (od_coeff)tf;
4058 x[16*xstride] = (od_coeff)tg;
4059 x[17*xstride] = (od_coeff)th;
4060 x[18*xstride] = (od_coeff)ti;
4061 x[19*xstride] = (od_coeff)tj;
4062 x[20*xstride] = (od_coeff)tk;
4063 x[21*xstride] = (od_coeff)tl;
4064 x[22*xstride] = (od_coeff)tm;
4065 x[23*xstride] = (od_coeff)tn;
4066 x[24*xstride] = (od_coeff)to;
4067 x[25*xstride] = (od_coeff)tp;
4068 x[26*xstride] = (od_coeff)tq;
4069 x[27*xstride] = (od_coeff)tr;
4070 x[28*xstride] = (od_coeff)ts;
4071 x[29*xstride] = (od_coeff)tt;
4072 x[30*xstride] = (od_coeff)tu;
4073 x[31*xstride] = (od_coeff)tv;
4074 x[32*xstride] = (od_coeff)tw;
4075 x[33*xstride] = (od_coeff)tx;
4076 x[34*xstride] = (od_coeff)ty;
4077 x[35*xstride] = (od_coeff)tz;
4078 x[36*xstride] = (od_coeff)tA;
4079 x[37*xstride] = (od_coeff)tB;
4080 x[38*xstride] = (od_coeff)tC;
4081 x[39*xstride] = (od_coeff)tD;
4082 x[40*xstride] = (od_coeff)tE;
4083 x[41*xstride] = (od_coeff)tF;
4084 x[41*xstride] = (od_coeff)tF;
4085 x[42*xstride] = (od_coeff)tG;
4086 x[43*xstride] = (od_coeff)tH;
4087 x[44*xstride] = (od_coeff)tI;
4088 x[45*xstride] = (od_coeff)tJ;
4089 x[46*xstride] = (od_coeff)tK;
4090 x[47*xstride] = (od_coeff)tL;
4091 x[48*xstride] = (od_coeff)tM;
4092 x[49*xstride] = (od_coeff)tN;
4093 x[50*xstride] = (od_coeff)tO;
4094 x[51*xstride] = (od_coeff)tP;
4095 x[52*xstride] = (od_coeff)tQ;
4096 x[53*xstride] = (od_coeff)tR;
4097 x[54*xstride] = (od_coeff)tS;
4098 x[55*xstride] = (od_coeff)tT;
4099 x[56*xstride] = (od_coeff)tU;
4100 x[57*xstride] = (od_coeff)tV;
4101 x[58*xstride] = (od_coeff)tW;
4102 x[59*xstride] = (od_coeff)tX;
4103 x[60*xstride] = (od_coeff)tY;
4104 x[61*xstride] = (od_coeff)tZ;
4105 x[62*xstride] = (od_coeff)t_;
4106 x[63*xstride] = (od_coeff)t;
4107}
4108#endif