blob: 72e9ebca72ea96d965efa52c25aea06e5ccf68e3 [file] [log] [blame]
Monty Montgomery02078a32017-07-11 21:22:29 -04001#include "av1/common/daala_tx.h"
2#include "av1/common/odintrin.h"
3
4/* clang-format off */
5
6# define OD_DCT_RSHIFT(_a, _b) OD_UNBIASED_RSHIFT32(_a, _b)
7
8/* TODO: Daala DCT overflow checks need to be ported as a later test */
9# if defined(OD_DCT_CHECK_OVERFLOW)
10# else
11# define OD_DCT_OVERFLOW_CHECK(val, scale, offset, idx)
12# endif
13
Monty Montgomerycf18fe42017-07-11 21:33:25 -040014#define OD_FDCT_2(p0, p1) \
15 /* Embedded 2-point orthonormal Type-II fDCT. */ \
16 do { \
17 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
18 OD_DCT_OVERFLOW_CHECK(p1, 13573, 16384, 100); \
19 p0 -= (p1*13573 + 16384) >> 15; \
20 /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
21 OD_DCT_OVERFLOW_CHECK(p0, 5793, 4096, 101); \
22 p1 += (p0*5793 + 4096) >> 13; \
23 /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
24 OD_DCT_OVERFLOW_CHECK(p1, 3393, 4096, 102); \
25 p0 -= (p1*3393 + 4096) >> 13; \
26 } \
27 while (0)
28
29#define OD_IDCT_2(p0, p1) \
30 /* Embedded 2-point orthonormal Type-II iDCT. */ \
31 do { \
32 /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
33 p0 += (p1*3393 + 4096) >> 13; \
34 /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
35 p1 -= (p0*5793 + 4096) >> 13; \
36 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
37 p0 += (p1*13573 + 16384) >> 15; \
38 } \
39 while (0)
40
Monty Montgomery02078a32017-07-11 21:22:29 -040041#define OD_FDCT_2_ASYM(p0, p1, p1h) \
42 /* Embedded 2-point asymmetric Type-II fDCT. */ \
43 do { \
44 p0 += p1h; \
45 p1 = p0 - p1; \
46 } \
47 while (0)
48
49#define OD_IDCT_2_ASYM(p0, p1, p1h) \
50 /* Embedded 2-point asymmetric Type-II iDCT. */ \
51 do { \
52 p1 = p0 - p1; \
53 p1h = OD_DCT_RSHIFT(p1, 1); \
54 p0 -= p1h; \
55 } \
56 while (0)
57
Monty Montgomerycf18fe42017-07-11 21:33:25 -040058#define OD_FDST_2(p0, p1) \
59 /* Embedded 2-point orthonormal Type-IV fDST. */ \
60 do { \
61 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
62 OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 103); \
63 p0 -= (p1*10947 + 8192) >> 14; \
64 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
65 OD_DCT_OVERFLOW_CHECK(p0, 473, 256, 104); \
66 p1 += (p0*473 + 256) >> 9; \
67 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
68 OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 105); \
69 p0 -= (p1*10947 + 8192) >> 14; \
70 } \
71 while (0)
72
73#define OD_IDST_2(p0, p1) \
74 /* Embedded 2-point orthonormal Type-IV iDST. */ \
75 do { \
76 /* 10947/16384 ~= Tan[3*Pi/16]) ~= 0.668178637919299 */ \
77 p0 += (p1*10947 + 8192) >> 14; \
78 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
79 p1 -= (p0*473 + 256) >> 9; \
80 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
81 p0 += (p1*10947 + 8192) >> 14; \
82 } \
83 while (0)
84
Monty Montgomery02078a32017-07-11 21:22:29 -040085#define OD_FDST_2_ASYM(p0, p1) \
86 /* Embedded 2-point asymmetric Type-IV fDST. */ \
87 do { \
88 /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
89 OD_DCT_OVERFLOW_CHECK(p1, 11507, 8192, 187); \
90 p0 -= (p1*11507 + 8192) >> 14; \
91 /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
92 OD_DCT_OVERFLOW_CHECK(p0, 669, 512, 188); \
93 p1 += (p0*669 + 512) >> 10; \
94 /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
95 OD_DCT_OVERFLOW_CHECK(p1, 4573, 2048, 189); \
96 p0 -= (p1*4573 + 2048) >> 12; \
97 } \
98 while (0)
99
100#define OD_IDST_2_ASYM(p0, p1) \
101 /* Embedded 2-point asymmetric Type-IV iDST. */ \
102 do { \
103 /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
104 p0 += (p1*4573 + 2048) >> 12; \
105 /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
106 p1 -= (p0*669 + 512) >> 10; \
107 /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
108 p0 += (p1*11507 + 8192) >> 14; \
109 } \
110 while (0)
111
112#define OD_FDCT_4(q0, q2, q1, q3) \
113 /* Embedded 4-point orthonormal Type-II fDCT. */ \
114 do { \
115 int q2h; \
116 int q3h; \
117 q3 = q0 - q3; \
118 q3h = OD_DCT_RSHIFT(q3, 1); \
119 q0 -= q3h; \
120 q2 += q1; \
121 q2h = OD_DCT_RSHIFT(q2, 1); \
122 q1 = q2h - q1; \
123 OD_FDCT_2_ASYM(q0, q2, q2h); \
124 OD_FDST_2_ASYM(q3, q1); \
125 } \
126 while (0)
127
128#define OD_IDCT_4(q0, q2, q1, q3) \
129 /* Embedded 4-point orthonormal Type-II iDCT. */ \
130 do { \
131 int q1h; \
132 int q3h; \
133 OD_IDST_2_ASYM(q3, q2); \
134 OD_IDCT_2_ASYM(q0, q1, q1h); \
135 q3h = OD_DCT_RSHIFT(q3, 1); \
136 q0 += q3h; \
137 q3 = q0 - q3; \
138 q2 = q1h - q2; \
139 q1 -= q2; \
140 } \
141 while (0)
142
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400143#define OD_FDCT_4_ASYM(q0, q2, q2h, q1, q3, q3h) \
144 /* Embedded 4-point asymmetric Type-II fDCT. */ \
145 do { \
146 q0 += q3h; \
147 q3 = q0 - q3; \
148 q1 = q2h - q1; \
149 q2 = q1 - q2; \
150 OD_FDCT_2(q0, q2); \
151 OD_FDST_2(q3, q1); \
152 } \
153 while (0)
154
155#define OD_IDCT_4_ASYM(q0, q2, q1, q1h, q3, q3h) \
156 /* Embedded 4-point asymmetric Type-II iDCT. */ \
157 do { \
158 OD_IDST_2(q3, q2); \
159 OD_IDCT_2(q0, q1); \
160 q1 = q2 - q1; \
161 q1h = OD_DCT_RSHIFT(q1, 1); \
162 q2 = q1h - q2; \
163 q3 = q0 - q3; \
164 q3h = OD_DCT_RSHIFT(q3, 1); \
165 q0 -= q3h; \
166 } \
167 while (0)
168
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400169#define OD_FDST_4(q0, q2, q1, q3) \
170 /* Embedded 4-point orthonormal Type-IV fDST. */ \
171 do { \
172 int q0h; \
173 int q1h; \
174 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
175 OD_DCT_OVERFLOW_CHECK(q1, 13573, 16384, 190); \
176 q2 += (q1*13573 + 16384) >> 15; \
177 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
178 OD_DCT_OVERFLOW_CHECK(q2, 5793, 4096, 191); \
179 q1 -= (q2*5793 + 4096) >> 13; \
180 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
181 OD_DCT_OVERFLOW_CHECK(q1, 3393, 4096, 192); \
182 q2 += (q1*3393 + 4096) >> 13; \
183 q0 += q2; \
184 q0h = OD_DCT_RSHIFT(q0, 1); \
185 q2 = q0h - q2; \
186 q1 += q3; \
187 q1h = OD_DCT_RSHIFT(q1, 1); \
188 q3 -= q1h; \
189 /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
190 0.524455699240090 */ \
191 OD_DCT_OVERFLOW_CHECK(q1, 537, 512, 193); \
192 q2 -= (q1*537 + 512) >> 10; \
193 /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
194 OD_DCT_OVERFLOW_CHECK(q2, 1609, 1024, 194); \
195 q1 += (q2*1609 + 1024) >> 11; \
196 /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
197 0.223847182092655 */ \
198 OD_DCT_OVERFLOW_CHECK(q1, 7335, 16384, 195); \
199 q2 += (q1*7335 + 16384) >> 15; \
200 /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
201 0.6215036383171189 */ \
202 OD_DCT_OVERFLOW_CHECK(q0, 5091, 4096, 196); \
203 q3 += (q0*5091 + 4096) >> 13; \
204 /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
205 OD_DCT_OVERFLOW_CHECK(q3, 5681, 2048, 197); \
206 q0 -= (q3*5681 + 2048) >> 12; \
207 /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
208 0.52204745462729 */ \
209 OD_DCT_OVERFLOW_CHECK(q0, 4277, 4096, 198); \
210 q3 += (q0*4277 + 4096) >> 13; \
211 } \
212 while (0)
213
214#define OD_IDST_4(q0, q2, q1, q3) \
215 /* Embedded 4-point orthonormal Type-IV iDST. */ \
216 do { \
217 int q0h; \
218 int q2h; \
219 /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
220 0.52204745462729 */ \
221 q3 -= (q0*4277 + 4096) >> 13; \
222 /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
223 q0 += (q3*5681 + 2048) >> 12; \
224 /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
225 0.6215036383171189 */ \
226 q3 -= (q0*5091 + 4096) >> 13; \
227 /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
228 0.223847182092655 */ \
229 q1 -= (q2*7335 + 16384) >> 15; \
230 /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
231 q2 -= (q1*1609 + 1024) >> 11; \
232 /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
233 0.524455699240090 */ \
234 q1 += (q2*537 + 512) >> 10; \
235 q2h = OD_DCT_RSHIFT(q2, 1); \
236 q3 += q2h; \
237 q2 -= q3; \
238 q0h = OD_DCT_RSHIFT(q0, 1); \
239 q1 = q0h - q1; \
240 q0 -= q1; \
241 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
242 q1 -= (q2*3393 + 4096) >> 13; \
243 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
244 q2 += (q1*5793 + 4096) >> 13; \
245 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
246 q1 -= (q2*13573 + 16384) >> 15; \
247 } \
248 while (0)
249
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400250#define OD_FDST_4_ASYM(t0, t0h, t2, t1, t3) \
251 /* Embedded 4-point asymmetric Type-IV fDST. */ \
252 do { \
253 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
254 OD_DCT_OVERFLOW_CHECK(t1, 7489, 4096, 106); \
255 t2 -= (t1*7489 + 4096) >> 13; \
256 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
257 OD_DCT_OVERFLOW_CHECK(t1, 11585, 8192, 107); \
258 t1 += (t2*11585 + 8192) >> 14; \
259 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
260 OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 108); \
261 t2 += (t1*19195 + 16384) >> 15; \
262 t3 += OD_DCT_RSHIFT(t2, 1); \
263 t2 -= t3; \
264 t1 = t0h - t1; \
265 t0 -= t1; \
266 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
267 OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 109); \
268 t3 += (t0*6723 + 4096) >> 13; \
269 /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
270 OD_DCT_OVERFLOW_CHECK(t3, 8035, 4096, 110); \
271 t0 -= (t3*8035 + 4096) >> 13; \
272 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
273 OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 111); \
274 t3 += (t0*6723 + 4096) >> 13; \
275 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
276 OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 112); \
277 t2 += (t1*8757 + 8192) >> 14; \
278 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
279 OD_DCT_OVERFLOW_CHECK(t2, 6811, 4096, 113); \
280 t1 -= (t2*6811 + 4096) >> 13; \
281 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
282 OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 114); \
283 t2 += (t1*8757 + 8192) >> 14; \
284 } \
285 while (0)
286
287#define OD_IDST_4_ASYM(t0, t0h, t2, t1, t3) \
288 /* Embedded 4-point asymmetric Type-IV iDST. */ \
289 do { \
290 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
291 t1 -= (t2*8757 + 8192) >> 14; \
292 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
293 t2 += (t1*6811 + 4096) >> 13; \
294 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
295 t1 -= (t2*8757 + 8192) >> 14; \
296 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
297 t3 -= (t0*6723 + 4096) >> 13; \
298 /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
299 t0 += (t3*8035 + 4096) >> 13; \
300 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
301 t3 -= (t0*6723 + 4096) >> 13; \
302 t0 += t2; \
303 t0h = OD_DCT_RSHIFT(t0, 1); \
304 t2 = t0h - t2; \
305 t1 += t3; \
306 t3 -= OD_DCT_RSHIFT(t1, 1); \
307 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
308 t1 -= (t2*19195 + 16384) >> 15; \
309 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
310 t2 -= (t1*11585 + 8192) >> 14; \
311 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
312 t1 += (t2*7489 + 4096) >> 13; \
313 } \
314 while (0)
315
316#define OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
317 /* Embedded 8-point orthonormal Type-II fDCT. */ \
318 do { \
319 int r4h; \
320 int r5h; \
321 int r6h; \
322 int r7h; \
323 r7 = r0 - r7; \
324 r7h = OD_DCT_RSHIFT(r7, 1); \
325 r0 -= r7h; \
326 r6 += r1; \
327 r6h = OD_DCT_RSHIFT(r6, 1); \
328 r1 = r6h - r1; \
329 r5 = r2 - r5; \
330 r5h = OD_DCT_RSHIFT(r5, 1); \
331 r2 -= r5h; \
332 r4 += r3; \
333 r4h = OD_DCT_RSHIFT(r4, 1); \
334 r3 = r4h - r3; \
335 OD_FDCT_4_ASYM(r0, r4, r4h, r2, r6, r6h); \
336 OD_FDST_4_ASYM(r7, r7h, r3, r5, r1); \
337 } \
338 while (0)
339
340#define OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
341 /* Embedded 8-point orthonormal Type-II iDCT. */ \
342 do { \
343 int r1h; \
344 int r3h; \
345 int r5h; \
346 int r7h; \
347 OD_IDST_4_ASYM(r7, r7h, r5, r6, r4); \
348 OD_IDCT_4_ASYM(r0, r2, r1, r1h, r3, r3h); \
349 r0 += r7h; \
350 r7 = r0 - r7; \
351 r6 = r1h - r6; \
352 r1 -= r6; \
353 r5h = OD_DCT_RSHIFT(r5, 1); \
354 r2 += r5h; \
355 r5 = r2 - r5; \
356 r4 = r3h - r4; \
357 r3 -= r4; \
358 } \
359 while (0)
360
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400361#define OD_FDCT_8_ASYM(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \
362 /* Embedded 8-point asymmetric Type-II fDCT. */ \
363 do { \
364 r0 += r7h; \
365 r7 = r0 - r7; \
366 r1 = r6h - r1; \
367 r6 -= r1; \
368 r2 += r5h; \
369 r5 = r2 - r5; \
370 r3 = r4h - r3; \
371 r4 -= r3; \
372 OD_FDCT_4(r0, r4, r2, r6); \
373 OD_FDST_4(r7, r3, r5, r1); \
374 } \
375 while (0)
376
377#define OD_IDCT_8_ASYM(r0, r4, r2, r6, r1, r1h, r5, r5h, r3, r3h, r7, r7h) \
378 /* Embedded 8-point asymmetric Type-II iDCT. */ \
379 do { \
380 OD_IDST_4(r7, r5, r6, r4); \
381 OD_IDCT_4(r0, r2, r1, r3); \
382 r7 = r0 - r7; \
383 r7h = OD_DCT_RSHIFT(r7, 1); \
384 r0 -= r7h; \
385 r1 += r6; \
386 r1h = OD_DCT_RSHIFT(r1, 1); \
387 r6 = r1h - r6; \
388 r5 = r2 - r5; \
389 r5h = OD_DCT_RSHIFT(r5, 1); \
390 r2 -= r5h; \
391 r3 += r4; \
392 r3h = OD_DCT_RSHIFT(r3, 1); \
393 r4 = r3h - r4; \
394 } \
395 while (0)
396
397#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400398 /* Embedded 8-point orthonormal Type-IV fDST. */ \
399 do { \
400 int t0h; \
401 int t2h; \
402 int t5h; \
403 int t7h; \
404 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
405 OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 115); \
406 t6 -= (t1*13573 + 16384) >> 15; \
407 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
408 OD_DCT_OVERFLOW_CHECK(t6, 11585, 8192, 116); \
409 t1 += (t6*11585 + 8192) >> 14; \
410 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
411 OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 117); \
412 t6 -= (t1*13573 + 16384) >> 15; \
413 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
414 OD_DCT_OVERFLOW_CHECK(t2, 21895, 16384, 118); \
415 t5 -= (t2*21895 + 16384) >> 15; \
416 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
417 OD_DCT_OVERFLOW_CHECK(t5, 15137, 8192, 119); \
418 t2 += (t5*15137 + 8192) >> 14; \
419 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
420 OD_DCT_OVERFLOW_CHECK(t2, 10947, 8192, 120); \
421 t5 -= (t2*10947 + 8192) >> 14; \
422 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
423 OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 121); \
424 t4 -= (t3*3259 + 8192) >> 14; \
425 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
426 OD_DCT_OVERFLOW_CHECK(t4, 3135, 4096, 122); \
427 t3 += (t4*3135 + 4096) >> 13; \
428 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
429 OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 123); \
430 t4 -= (t3*3259 + 8192) >> 14; \
431 t7 += t1; \
432 t7h = OD_DCT_RSHIFT(t7, 1); \
433 t1 -= t7h; \
434 t2 = t3 - t2; \
435 t2h = OD_DCT_RSHIFT(t2, 1); \
436 t3 -= t2h; \
437 t0 -= t6; \
438 t0h = OD_DCT_RSHIFT(t0, 1); \
439 t6 += t0h; \
440 t5 = t4 - t5; \
441 t5h = OD_DCT_RSHIFT(t5, 1); \
442 t4 -= t5h; \
443 t1 += t5h; \
444 t5 = t1 - t5; \
445 t4 += t0h; \
446 t0 -= t4; \
447 t6 -= t2h; \
448 t2 += t6; \
449 t3 -= t7h; \
450 t7 += t3; \
451 /* TODO: Can we move this into another operation */ \
452 t7 = -t7; \
453 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
454 OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 124); \
455 t0 -= (t7*7425 + 4096) >> 13; \
456 /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
457 OD_DCT_OVERFLOW_CHECK(t0, 8153, 4096, 125); \
458 t7 += (t0*8153 + 4096) >> 13; \
459 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
460 OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 126); \
461 t0 -= (t7*7425 + 4096) >> 13; \
462 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
463 OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 127); \
464 t6 -= (t1*4861 + 16384) >> 15; \
465 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
466 OD_DCT_OVERFLOW_CHECK(t6, 1189, 2048, 128); \
467 t1 += (t6*1189 + 2048) >> 12; \
468 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
469 OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 129); \
470 t6 -= (t1*4861 + 16384) >> 15; \
471 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
472 OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 130); \
473 t2 -= (t5*2455 + 2048) >> 12; \
474 /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
475 OD_DCT_OVERFLOW_CHECK(t2, 7225, 4096, 131); \
476 t5 += (t2*7225 + 4096) >> 13; \
477 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
478 OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 132); \
479 t2 -= (t5*2455 + 2048) >> 12; \
480 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
481 OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 133); \
482 t4 -= (t3*11725 + 16384) >> 15; \
483 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
484 OD_DCT_OVERFLOW_CHECK(t4, 5197, 4096, 134); \
485 t3 += (t4*5197 + 4096) >> 13; \
486 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
487 OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 135); \
488 t4 -= (t3*11725 + 16384) >> 15; \
489 } \
490 while (0)
491
492#define OD_IDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
493 /* Embedded 8-point orthonormal Type-IV iDST. */ \
494 do { \
495 int t0h; \
496 int t2h; \
497 int t5h_; \
498 int t7h_; \
499 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
500 t1 += (t6*11725 + 16384) >> 15; \
501 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
502 t6 -= (t1*5197 + 4096) >> 13; \
503 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
504 t1 += (t6*11725 + 16384) >> 15; \
505 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
506 t2 += (t5*2455 + 2048) >> 12; \
507 /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
508 t5 -= (t2*7225 + 4096) >> 13; \
509 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
510 t2 += (t5*2455 + 2048) >> 12; \
511 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
512 t3 += (t4*4861 + 16384) >> 15; \
513 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
514 t4 -= (t3*1189 + 2048) >> 12; \
515 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
516 t3 += (t4*4861 + 16384) >> 15; \
517 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
518 t0 += (t7*7425 + 4096) >> 13; \
519 /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
520 t7 -= (t0*8153 + 4096) >> 13; \
521 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
522 t0 += (t7*7425 + 4096) >> 13; \
523 /* TODO: Can we move this into another operation */ \
524 t7 = -t7; \
525 t7 -= t6; \
526 t7h_ = OD_DCT_RSHIFT(t7, 1); \
527 t6 += t7h_; \
528 t2 -= t3; \
529 t2h = OD_DCT_RSHIFT(t2, 1); \
530 t3 += t2h; \
531 t0 += t1; \
532 t0h = OD_DCT_RSHIFT(t0, 1); \
533 t1 -= t0h; \
534 t5 = t4 - t5; \
535 t5h_ = OD_DCT_RSHIFT(t5, 1); \
536 t4 -= t5h_; \
537 t1 += t5h_; \
538 t5 = t1 - t5; \
539 t3 -= t0h; \
540 t0 += t3; \
541 t6 += t2h; \
542 t2 = t6 - t2; \
543 t4 += t7h_; \
544 t7 -= t4; \
545 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
546 t1 += (t6*3259 + 8192) >> 14; \
547 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
548 t6 -= (t1*3135 + 4096) >> 13; \
549 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
550 t1 += (t6*3259 + 8192) >> 14; \
551 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
552 t5 += (t2*10947 + 8192) >> 14; \
553 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
554 t2 -= (t5*15137 + 8192) >> 14; \
555 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
556 t5 += (t2*21895 + 16384) >> 15; \
557 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
558 t3 += (t4*13573 + 16384) >> 15; \
559 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
560 t4 -= (t3*11585 + 8192) >> 14; \
561 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
562 t3 += (t4*13573 + 16384) >> 15; \
563 } \
564 while (0)
565
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400566/* Rewrite this so that t0h can be passed in. */
567#define OD_FDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
568 /* Embedded 8-point asymmetric Type-IV fDST. */ \
569 do { \
570 int t0h; \
571 int t2h; \
572 int t5h; \
573 int t7h; \
574 /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
575 OD_DCT_OVERFLOW_CHECK(t1, 1035, 1024, 199); \
576 t6 += (t1*1035 + 1024) >> 11; \
577 /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
578 OD_DCT_OVERFLOW_CHECK(t6, 3675, 2048, 200); \
579 t1 -= (t6*3675 + 2048) >> 12; \
580 /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
581 OD_DCT_OVERFLOW_CHECK(t1, 851, 4096, 201); \
582 t6 -= (t1*851 + 4096) >> 13; \
583 /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
584 OD_DCT_OVERFLOW_CHECK(t2, 4379, 4096, 202); \
585 t5 += (t2*4379 + 4096) >> 13; \
586 /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
587 OD_DCT_OVERFLOW_CHECK(t5, 10217, 4096, 203); \
588 t2 -= (t5*10217 + 4096) >> 13; \
589 /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
590 OD_DCT_OVERFLOW_CHECK(t2, 4379, 8192, 204); \
591 t5 += (t2*4379 + 8192) >> 14; \
592 /* 12905/16384 ~= (Sqrt[2] - Cos[3*Pi/32])/(2*Sin[3*Pi/32]) */ \
593 OD_DCT_OVERFLOW_CHECK(t3, 12905, 8192, 205); \
594 t4 += (t3*12905 + 8192) >> 14; \
595 /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
596 OD_DCT_OVERFLOW_CHECK(t4, 3363, 4096, 206); \
597 t3 -= (t4*3363 + 4096) >> 13; \
598 /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
599 OD_DCT_OVERFLOW_CHECK(t3, 3525, 2048, 207); \
600 t4 -= (t3*3525 + 2048) >> 12; \
601 /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
602 OD_DCT_OVERFLOW_CHECK(t0, 5417, 4096, 208); \
603 t7 += (t0*5417 + 4096) >> 13; \
604 /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
605 OD_DCT_OVERFLOW_CHECK(t7, 5765, 2048, 209); \
606 t0 -= (t7*5765 + 2048) >> 12; \
607 /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
608 OD_DCT_OVERFLOW_CHECK(t0, 2507, 2048, 210); \
609 t7 += (t0*2507 + 2048) >> 12; \
610 t0 += t1; \
611 t0h = OD_DCT_RSHIFT(t0, 1); \
612 t1 -= t0h; \
613 t2 -= t3; \
614 t2h = OD_DCT_RSHIFT(t2, 1); \
615 t3 += t2h; \
616 t5 -= t4; \
617 t5h = OD_DCT_RSHIFT(t5, 1); \
618 t4 += t5h; \
619 t7 += t6; \
620 t7h = OD_DCT_RSHIFT(t7, 1); \
621 t6 = t7h - t6; \
622 t4 = t7h - t4; \
623 t7 -= t4; \
624 t1 += t5h; \
625 t5 = t1 - t5; \
626 t6 += t2h; \
627 t2 = t6 - t2; \
628 t3 -= t0h; \
629 t0 += t3; \
630 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
631 OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 211); \
632 t1 += (t6*3259 + 8192) >> 14; \
633 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
634 OD_DCT_OVERFLOW_CHECK(t1, 3135, 4096, 212); \
635 t6 -= (t1*3135 + 4096) >> 13; \
636 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
637 OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 213); \
638 t1 += (t6*3259 + 8192) >> 14; \
639 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
640 OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 214); \
641 t5 += (t2*2737 + 2048) >> 12; \
642 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
643 OD_DCT_OVERFLOW_CHECK(t5, 473, 256, 215); \
644 t2 -= (t5*473 + 256) >> 9; \
645 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
646 OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 216); \
647 t5 += (t2*2737 + 2048) >> 12; \
648 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
649 OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 217); \
650 t3 += (t4*3393 + 4096) >> 13; \
651 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
652 OD_DCT_OVERFLOW_CHECK(t3, 5793, 4096, 218); \
653 t4 -= (t3*5793 + 4096) >> 13; \
654 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
655 OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 219); \
656 t3 += (t4*3393 + 4096) >> 13; \
657 } \
658 while (0)
659
660#define OD_IDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
661 /* Embedded 8-point asymmetric Type-IV iDST. */ \
662 do { \
663 int t0h; \
664 int t2h; \
665 int t5h__; \
666 int t7h__; \
667 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
668 t6 -= (t1*3393 + 4096) >> 13; \
669 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
670 t1 += (t6*5793 + 4096) >> 13; \
671 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
672 t6 -= (t1*3393 + 4096) >> 13; \
673 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
674 t5 -= (t2*2737 + 2048) >> 12; \
675 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
676 t2 += (t5*473 + 256) >> 9; \
677 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
678 t5 -= (t2*2737 + 2048) >> 12; \
679 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
680 t4 -= (t3*3259 + 8192) >> 14; \
681 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
682 t3 += (t4*3135 + 4096) >> 13; \
683 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
684 t4 -= (t3*3259 + 8192) >> 14; \
685 t0 -= t6; \
686 t0h = OD_DCT_RSHIFT(t0, 1); \
687 t6 += t0h; \
688 t2 = t3 - t2; \
689 t2h = OD_DCT_RSHIFT(t2, 1); \
690 t3 -= t2h; \
691 t5 = t4 - t5; \
692 t5h__ = OD_DCT_RSHIFT(t5, 1); \
693 t4 -= t5h__; \
694 t7 += t1; \
695 t7h__ = OD_DCT_RSHIFT(t7, 1); \
696 t1 = t7h__ - t1; \
697 t3 = t7h__ - t3; \
698 t7 -= t3; \
699 t1 -= t5h__; \
700 t5 += t1; \
701 t6 -= t2h; \
702 t2 += t6; \
703 t4 += t0h; \
704 t0 -= t4; \
705 /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
706 t7 -= (t0*2507 + 2048) >> 12; \
707 /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
708 t0 += (t7*5765 + 2048) >> 12; \
709 /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
710 t7 -= (t0*5417 + 4096) >> 13; \
711 /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
712 t1 += (t6*3525 + 2048) >> 12; \
713 /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
714 t6 += (t1*3363 + 4096) >> 13; \
715 /* 12905/16384 ~= (1/Sqrt[2] - Cos[3*Pi/32]/1)/Sin[3*Pi/32] */ \
716 t1 -= (t6*12905 + 8192) >> 14; \
717 /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
718 t5 -= (t2*4379 + 8192) >> 14; \
719 /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
720 t2 += (t5*10217 + 4096) >> 13; \
721 /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
722 t5 -= (t2*4379 + 4096) >> 13; \
723 /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
724 t3 += (t4*851 + 4096) >> 13; \
725 /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
726 t4 += (t3*3675 + 2048) >> 12; \
727 /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
728 t3 -= (t4*1035 + 1024) >> 11; \
729 } \
730 while (0)
731
732#define OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
733 s1, s9, s5, sd, s3, sb, s7, sf) \
734 /* Embedded 16-point orthonormal Type-II fDCT. */ \
735 do { \
736 int s8h; \
737 int sah; \
738 int sch; \
739 int seh; \
740 int sfh; \
741 sf = s0 - sf; \
742 sfh = OD_DCT_RSHIFT(sf, 1); \
743 s0 -= sfh; \
744 se += s1; \
745 seh = OD_DCT_RSHIFT(se, 1); \
746 s1 = seh - s1; \
747 sd = s2 - sd; \
748 s2 -= OD_DCT_RSHIFT(sd, 1); \
749 sc += s3; \
750 sch = OD_DCT_RSHIFT(sc, 1); \
751 s3 = sch - s3; \
752 sb = s4 - sb; \
753 s4 -= OD_DCT_RSHIFT(sb, 1); \
754 sa += s5; \
755 sah = OD_DCT_RSHIFT(sa, 1); \
756 s5 = sah - s5; \
757 s9 = s6 - s9; \
758 s6 -= OD_DCT_RSHIFT(s9, 1); \
759 s8 += s7; \
760 s8h = OD_DCT_RSHIFT(s8, 1); \
761 s7 = s8h - s7; \
762 OD_FDCT_8_ASYM(s0, s8, s8h, s4, sc, sch, s2, sa, sah, s6, se, seh); \
763 OD_FDST_8_ASYM(sf, s7, sb, s3, sd, s5, s9, s1); \
764 } \
765 while (0)
766
767#define OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
768 s1, s9, s5, sd, s3, sb, s7, sf) \
769 /* Embedded 16-point orthonormal Type-II iDCT. */ \
770 do { \
771 int s1h; \
772 int s3h; \
773 int s5h; \
774 int s7h; \
775 int sfh; \
776 OD_IDST_8_ASYM(sf, sb, sd, s9, se, sa, sc, s8); \
777 OD_IDCT_8_ASYM(s0, s4, s2, s6, s1, s1h, s5, s5h, s3, s3h, s7, s7h); \
778 sfh = OD_DCT_RSHIFT(sf, 1); \
779 s0 += sfh; \
780 sf = s0 - sf; \
781 se = s1h - se; \
782 s1 -= se; \
783 s2 += OD_DCT_RSHIFT(sd, 1); \
784 sd = s2 - sd; \
785 sc = s3h - sc; \
786 s3 -= sc; \
787 s4 += OD_DCT_RSHIFT(sb, 1); \
788 sb = s4 - sb; \
789 sa = s5h - sa; \
790 s5 -= sa; \
791 s6 += OD_DCT_RSHIFT(s9, 1); \
792 s9 = s6 - s9; \
793 s8 = s7h - s8; \
794 s7 -= s8; \
795 } \
796 while (0)
797
Monty Montgomery2cb52ba2017-07-17 18:27:27 -0400798#define OD_FDCT_16_ASYM(t0, t8, t8h, t4, tc, tch, t2, ta, tah, t6, te, teh, \
799 t1, t9, t9h, t5, td, tdh, t3, tb, tbh, t7, tf, tfh) \
800 /* Embedded 16-point asymmetric Type-II fDCT. */ \
801 do { \
802 t0 += tfh; \
803 tf = t0 - tf; \
804 t1 -= teh; \
805 te += t1; \
806 t2 += tdh; \
807 td = t2 - td; \
808 t3 -= tch; \
809 tc += t3; \
810 t4 += tbh; \
811 tb = t4 - tb; \
812 t5 -= tah; \
813 ta += t5; \
814 t6 += t9h; \
815 t9 = t6 - t9; \
816 t7 -= t8h; \
817 t8 += t7; \
818 OD_FDCT_8(t0, t8, t4, tc, t2, ta, t6, te); \
819 OD_FDST_8(tf, t7, tb, t3, td, t5, t9, t1); \
820 } \
821 while (0)
822
823#define OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
824 t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh) \
825 /* Embedded 16-point asymmetric Type-II iDCT. */ \
826 do { \
827 OD_IDST_8(tf, tb, td, t9, te, ta, tc, t8); \
828 OD_IDCT_8(t0, t4, t2, t6, t1, t5, t3, t7); \
829 t1 -= te; \
830 t1h = OD_DCT_RSHIFT(t1, 1); \
831 te += t1h; \
832 t9 = t6 - t9; \
833 t9h = OD_DCT_RSHIFT(t9, 1); \
834 t6 -= t9h; \
835 t5 -= ta; \
836 t5h = OD_DCT_RSHIFT(t5, 1); \
837 ta += t5h; \
838 td = t2 - td; \
839 tdh = OD_DCT_RSHIFT(td, 1); \
840 t2 -= tdh; \
841 t3 -= tc; \
842 t3h = OD_DCT_RSHIFT(t3, 1); \
843 tc += t3h; \
844 tb = t4 - tb; \
845 tbh = OD_DCT_RSHIFT(tb, 1); \
846 t4 -= tbh; \
847 t7 -= t8; \
848 t7h = OD_DCT_RSHIFT(t7, 1); \
849 t8 += t7h; \
850 tf = t0 - tf; \
851 tfh = OD_DCT_RSHIFT(tf, 1); \
852 t0 -= tfh; \
853 } \
854 while (0)
855
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400856#define OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
857 s1, s9, s5, sd, s3, sb, s7, sf) \
858 /* Embedded 16-point orthonormal Type-IV fDST. */ \
859 do { \
860 int s0h; \
861 int s2h; \
862 int sdh; \
863 int sfh; \
864 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
865 OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 220); \
866 s1 += (se*13573 + 16384) >> 15; \
867 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
868 OD_DCT_OVERFLOW_CHECK(s1, 11585, 8192, 221); \
869 se -= (s1*11585 + 8192) >> 14; \
870 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
871 OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 222); \
872 s1 += (se*13573 + 16384) >> 15; \
873 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
874 OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 223); \
875 sd += (s2*21895 + 16384) >> 15; \
876 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
877 OD_DCT_OVERFLOW_CHECK(sd, 15137, 16384, 224); \
878 s2 -= (sd*15137 + 8192) >> 14; \
879 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
880 OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 225); \
881 sd += (s2*21895 + 16384) >> 15; \
882 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
883 OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 226); \
884 sc += (s3*3259 + 8192) >> 14; \
885 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
886 OD_DCT_OVERFLOW_CHECK(sc, 3135, 4096, 227); \
887 s3 -= (sc*3135 + 4096) >> 13; \
888 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
889 OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 228); \
890 sc += (s3*3259 + 8192) >> 14; \
891 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
892 OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 229); \
893 sa += (s5*13573 + 16384) >> 15; \
894 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
895 OD_DCT_OVERFLOW_CHECK(sa, 11585, 8192, 230); \
896 s5 -= (sa*11585 + 8192) >> 14; \
897 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
898 OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 231); \
899 sa += (s5*13573 + 16384) >> 15; \
900 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
901 OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 232); \
902 s6 += (s9*13573 + 16384) >> 15; \
903 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
904 OD_DCT_OVERFLOW_CHECK(s6, 11585, 8192, 233); \
905 s9 -= (s6*11585 + 8192) >> 14; \
906 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
907 OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 234); \
908 s6 += (s9*13573 + 16384) >> 15; \
909 sf += se; \
910 sfh = OD_DCT_RSHIFT(sf, 1); \
911 se = sfh - se; \
912 s0 += s1; \
913 s0h = OD_DCT_RSHIFT(s0, 1); \
914 s1 = s0h - s1; \
915 s2 = s3 - s2; \
916 s2h = OD_DCT_RSHIFT(s2, 1); \
917 s3 -= s2h; \
918 sd -= sc; \
919 sdh = OD_DCT_RSHIFT(sd, 1); \
920 sc += sdh; \
921 sa = s4 - sa; \
922 s4 -= OD_DCT_RSHIFT(sa, 1); \
923 s5 += sb; \
924 sb = OD_DCT_RSHIFT(s5, 1) - sb; \
925 s8 += s6; \
926 s6 -= OD_DCT_RSHIFT(s8, 1); \
927 s7 = s9 - s7; \
928 s9 -= OD_DCT_RSHIFT(s7, 1); \
929 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
930 OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 235); \
931 s4 += (sb*6723 + 4096) >> 13; \
932 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
933 OD_DCT_OVERFLOW_CHECK(s4, 16069, 8192, 236); \
934 sb -= (s4*16069 + 8192) >> 14; \
935 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
936 OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 237); \
937 s4 += (sb*6723 + 4096) >> 13; \
938 /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
939 OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 238); \
940 sa += (s5*8757 + 8192) >> 14; \
941 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
942 OD_DCT_OVERFLOW_CHECK(sa, 6811, 4096, 239); \
943 s5 -= (sa*6811 + 4096) >> 13; \
944 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
945 OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 240); \
946 sa += (s5*8757 + 8192) >> 14; \
947 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
948 OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 241); \
949 s6 += (s9*2485 + 4096) >> 13; \
950 /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
951 OD_DCT_OVERFLOW_CHECK(s6, 4551, 4096, 242); \
952 s9 -= (s6*4551 + 4096) >> 13; \
953 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
954 OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 243); \
955 s6 += (s9*2485 + 4096) >> 13; \
956 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
957 OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 244); \
958 s7 += (s8*3227 + 16384) >> 15; \
959 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
960 OD_DCT_OVERFLOW_CHECK(s7, 6393, 16384, 245); \
961 s8 -= (s7*6393 + 16384) >> 15; \
962 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
963 OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 246); \
964 s7 += (s8*3227 + 16384) >> 15; \
965 s1 -= s2h; \
966 s2 += s1; \
967 se += sdh; \
968 sd = se - sd; \
969 s3 += sfh; \
970 sf -= s3; \
971 sc = s0h - sc; \
972 s0 -= sc; \
973 sb += OD_DCT_RSHIFT(s8, 1); \
974 s8 = sb - s8; \
975 s4 += OD_DCT_RSHIFT(s7, 1); \
976 s7 -= s4; \
977 s6 += OD_DCT_RSHIFT(s5, 1); \
978 s5 = s6 - s5; \
979 s9 -= OD_DCT_RSHIFT(sa, 1); \
980 sa += s9; \
981 s8 += s0; \
982 s0 -= OD_DCT_RSHIFT(s8, 1); \
983 sf += s7; \
984 s7 = OD_DCT_RSHIFT(sf, 1) - s7; \
985 s1 -= s6; \
986 s6 += OD_DCT_RSHIFT(s1, 1); \
987 s9 += se; \
988 se = OD_DCT_RSHIFT(s9, 1) - se; \
989 s2 += sa; \
990 sa = OD_DCT_RSHIFT(s2, 1) - sa; \
991 s5 += sd; \
992 sd -= OD_DCT_RSHIFT(s5, 1); \
993 s4 = sc - s4; \
994 sc -= OD_DCT_RSHIFT(s4, 1); \
995 s3 -= sb; \
996 sb += OD_DCT_RSHIFT(s3, 1); \
997 /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
998 OD_DCT_OVERFLOW_CHECK(sf, 2799, 2048, 247); \
999 s0 -= (sf*2799 + 2048) >> 12; \
1000 /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
1001 OD_DCT_OVERFLOW_CHECK(s0, 2893, 1024, 248); \
1002 sf += (s0*2893 + 1024) >> 11; \
1003 /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
1004 OD_DCT_OVERFLOW_CHECK(sf, 5397, 4096, 249); \
1005 s0 -= (sf*5397 + 4096) >> 13; \
1006 /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
1007 OD_DCT_OVERFLOW_CHECK(s1, 41, 32, 250); \
1008 se += (s1*41 + 32) >> 6; \
1009 /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
1010 OD_DCT_OVERFLOW_CHECK(se, 2865, 1024, 251); \
1011 s1 -= (se*2865 + 1024) >> 11; \
1012 /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
1013 OD_DCT_OVERFLOW_CHECK(s1, 4641, 4096, 252); \
1014 se += (s1*4641 + 4096) >> 13; \
1015 /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
1016 OD_DCT_OVERFLOW_CHECK(s2, 2473, 2048, 253); \
1017 sd += (s2*2473 + 2048) >> 12; \
1018 /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
1019 OD_DCT_OVERFLOW_CHECK(sd, 5619, 2048, 254); \
1020 s2 -= (sd*5619 + 2048) >> 12; \
1021 /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
1022 OD_DCT_OVERFLOW_CHECK(s2, 7839, 8192, 255); \
1023 sd += (s2*7839 + 8192) >> 14; \
1024 /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
1025 OD_DCT_OVERFLOW_CHECK(s3, 5747, 4096, 256); \
1026 sc -= (s3*5747 + 4096) >> 13; \
1027 /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] ~= */ \
1028 OD_DCT_OVERFLOW_CHECK(sc, 3903, 4096, 257); \
1029 s3 += (sc*3903 + 4096) >> 13; \
1030 /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
1031 OD_DCT_OVERFLOW_CHECK(s3, 5701, 4096, 258); \
1032 sc += (s3*5701 + 4096) >> 13; \
1033 /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
1034 OD_DCT_OVERFLOW_CHECK(s4, 4471, 4096, 259); \
1035 sb += (s4*4471 + 4096) >> 13; \
1036 /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
1037 OD_DCT_OVERFLOW_CHECK(sb, 1309, 512, 260); \
1038 s4 -= (sb*1309 + 512) >> 10; \
1039 /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
1040 OD_DCT_OVERFLOW_CHECK(s4, 5067, 8192, 261); \
1041 sb += (s4*5067 + 8192) >> 14; \
1042 /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
1043 OD_DCT_OVERFLOW_CHECK(s5, 2217, 2048, 262); \
1044 sa -= (s5*2217 + 2048) >> 12; \
1045 /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] ~= 0.72705107329128 */ \
1046 OD_DCT_OVERFLOW_CHECK(sa, 1489, 1024, 263); \
1047 s5 += (sa*1489 + 1024) >> 11; \
1048 /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
1049 OD_DCT_OVERFLOW_CHECK(s5, 75, 128, 264); \
1050 sa += (s5*75 + 128) >> 8; \
1051 /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
1052 OD_DCT_OVERFLOW_CHECK(s9, 2087, 2048, 265); \
1053 s6 -= (s9*2087 + 2048) >> 12; \
1054 /* 4653/4096 ~= Sqrt[2]*Sin[19*Pi/64] */ \
1055 OD_DCT_OVERFLOW_CHECK(s6, 4653, 2048, 266); \
1056 s9 += (s6*4653 + 2048) >> 12; \
1057 /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
1058 OD_DCT_OVERFLOW_CHECK(s9, 4545, 16384, 267); \
1059 s6 -= (s9*4545 + 16384) >> 15; \
1060 /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
1061 OD_DCT_OVERFLOW_CHECK(s8, 2053, 2048, 268); \
1062 s7 += (s8*2053 + 2048) >> 12; \
1063 /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
1064 OD_DCT_OVERFLOW_CHECK(s7, 1945, 1024, 269); \
1065 s8 -= (s7*1945 + 1024) >> 11; \
1066 /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
1067 OD_DCT_OVERFLOW_CHECK(s8, 1651, 16384, 270); \
1068 s7 -= (s8*1651 + 16384) >> 15; \
1069 } \
1070 while (0)
1071
1072#define OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
1073 s1, s9, s5, sd, s3, sb, s7, sf) \
1074 /* Embedded 16-point orthonormal Type-IV iDST. */ \
1075 do { \
1076 int s0h; \
1077 int s4h; \
1078 int sbh; \
1079 int sfh; \
1080 /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
1081 se += (s1*1651 + 16384) >> 15; \
1082 /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
1083 s1 += (se*1945 + 1024) >> 11; \
1084 /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
1085 se -= (s1*2053 + 2048) >> 12; \
1086 /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
1087 s6 += (s9*4545 + 16384) >> 15; \
1088 /* 4653/32768 ~= Sqrt[2]*Sin[19*Pi/64] */ \
1089 s9 -= (s6*4653 + 2048) >> 12; \
1090 /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
1091 s6 += (s9*2087 + 2048) >> 12; \
1092 /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
1093 s5 -= (sa*75 + 128) >> 8; \
1094 /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] */ \
1095 sa -= (s5*1489 + 1024) >> 11; \
1096 /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
1097 s5 += (sa*2217 + 2048) >> 12; \
1098 /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
1099 sd -= (s2*5067 + 8192) >> 14; \
1100 /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
1101 s2 += (sd*1309 + 512) >> 10; \
1102 /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
1103 sd -= (s2*4471 + 4096) >> 13; \
1104 /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
1105 s3 -= (sc*5701 + 4096) >> 13; \
1106 /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] */ \
1107 sc -= (s3*3903 + 4096) >> 13; \
1108 /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
1109 s3 += (sc*5747 + 4096) >> 13; \
1110 /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
1111 sb -= (s4*7839 + 8192) >> 14; \
1112 /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
1113 s4 += (sb*5619 + 2048) >> 12; \
1114 /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
1115 sb -= (s4*2473 + 2048) >> 12; \
1116 /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
1117 s7 -= (s8*4641 + 4096) >> 13; \
1118 /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
1119 s8 += (s7*2865 + 1024) >> 11; \
1120 /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
1121 s7 -= (s8*41 + 32) >> 6; \
1122 /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
1123 s0 += (sf*5397 + 4096) >> 13; \
1124 /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
1125 sf -= (s0*2893 + 1024) >> 11; \
1126 /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
1127 s0 += (sf*2799 + 2048) >> 12; \
1128 sd -= OD_DCT_RSHIFT(sc, 1); \
1129 sc += sd; \
1130 s3 += OD_DCT_RSHIFT(s2, 1); \
1131 s2 = s3 - s2; \
1132 sb += OD_DCT_RSHIFT(sa, 1); \
1133 sa -= sb; \
1134 s5 = OD_DCT_RSHIFT(s4, 1) - s5; \
1135 s4 -= s5; \
1136 s7 = OD_DCT_RSHIFT(s9, 1) - s7; \
1137 s9 -= s7; \
1138 s6 -= OD_DCT_RSHIFT(s8, 1); \
1139 s8 += s6; \
1140 se = OD_DCT_RSHIFT(sf, 1) - se; \
1141 sf -= se; \
1142 s0 += OD_DCT_RSHIFT(s1, 1); \
1143 s1 -= s0; \
1144 s5 -= s9; \
1145 s9 += OD_DCT_RSHIFT(s5, 1); \
1146 sa = s6 - sa; \
1147 s6 -= OD_DCT_RSHIFT(sa, 1); \
1148 se += s2; \
1149 s2 -= OD_DCT_RSHIFT(se, 1); \
1150 s1 = sd - s1; \
1151 sd -= OD_DCT_RSHIFT(s1, 1); \
1152 s0 += s3; \
1153 s0h = OD_DCT_RSHIFT(s0, 1); \
1154 s3 = s0h - s3; \
1155 sf += sc; \
1156 sfh = OD_DCT_RSHIFT(sf, 1); \
1157 sc -= sfh; \
1158 sb = s7 - sb; \
1159 sbh = OD_DCT_RSHIFT(sb, 1); \
1160 s7 -= sbh; \
1161 s4 -= s8; \
1162 s4h = OD_DCT_RSHIFT(s4, 1); \
1163 s8 += s4h; \
1164 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
1165 se -= (s1*3227 + 16384) >> 15; \
1166 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
1167 s1 += (se*6393 + 16384) >> 15; \
1168 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
1169 se -= (s1*3227 + 16384) >> 15; \
1170 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1171 s6 -= (s9*2485 + 4096) >> 13; \
1172 /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1173 s9 += (s6*4551 + 4096) >> 13; \
1174 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1175 s6 -= (s9*2485 + 4096) >> 13; \
1176 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1177 s5 -= (sa*8757 + 8192) >> 14; \
1178 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1179 sa += (s5*6811 + 4096) >> 13; \
1180 /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1181 s5 -= (sa*8757 + 8192) >> 14; \
1182 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1183 s2 -= (sd*6723 + 4096) >> 13; \
1184 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1185 sd += (s2*16069 + 8192) >> 14; \
1186 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1187 s2 -= (sd*6723 + 4096) >> 13; \
1188 s9 += OD_DCT_RSHIFT(se, 1); \
1189 se = s9 - se; \
1190 s6 += OD_DCT_RSHIFT(s1, 1); \
1191 s1 -= s6; \
1192 sd = OD_DCT_RSHIFT(sa, 1) - sd; \
1193 sa -= sd; \
1194 s2 += OD_DCT_RSHIFT(s5, 1); \
1195 s5 = s2 - s5; \
1196 s3 -= sbh; \
1197 sb += s3; \
1198 sc += s4h; \
1199 s4 = sc - s4; \
1200 s8 = s0h - s8; \
1201 s0 -= s8; \
1202 s7 = sfh - s7; \
1203 sf -= s7; \
1204 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1205 s6 -= (s9*13573 + 16384) >> 15; \
1206 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1207 s9 += (s6*11585 + 8192) >> 14; \
1208 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1209 s6 -= (s9*13573 + 16384) >> 15; \
1210 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1211 s5 -= (sa*13573 + 16384) >> 15; \
1212 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1213 sa += (s5*11585 + 8192) >> 14; \
1214 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1215 s5 -= (sa*13573 + 16384) >> 15; \
1216 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
1217 s3 -= (sc*3259 + 8192) >> 14; \
1218 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
1219 sc += (s3*3135 + 4096) >> 13; \
1220 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
1221 s3 -= (sc*3259 + 8192) >> 14; \
1222 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
1223 sb -= (s4*21895 + 16384) >> 15; \
1224 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1225 s4 += (sb*15137 + 8192) >> 14; \
1226 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
1227 sb -= (s4*21895 + 16384) >> 15; \
1228 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1229 s8 -= (s7*13573 + 16384) >> 15; \
1230 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1231 s7 += (s8*11585 + 8192) >> 14; \
1232 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1233 s8 -= (s7*13573 + 16384) >> 15; \
1234 } \
1235 while (0)
1236
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04001237/* TODO: rewrite this to match OD_FDST_16. */
1238#define OD_FDST_16_ASYM(t0, t0h, t8, t4, t4h, tc, t2, ta, t6, te, \
1239 t1, t9, t5, td, t3, tb, t7, t7h, tf) \
1240 /* Embedded 16-point asymmetric Type-IV fDST. */ \
1241 do { \
1242 int t2h; \
1243 int t3h; \
1244 int t6h; \
1245 int t8h; \
1246 int t9h; \
1247 int tch; \
1248 int tdh; \
1249 /* TODO: Can we move these into another operation */ \
1250 t8 = -t8; \
1251 t9 = -t9; \
1252 ta = -ta; \
1253 tb = -tb; \
1254 td = -td; \
1255 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1256 OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 136); \
1257 t1 -= (te*13573 + 8192) >> 14; \
1258 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1259 OD_DCT_OVERFLOW_CHECK(t1, 11585, 16384, 137); \
1260 te += (t1*11585 + 16384) >> 15; \
1261 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1262 OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 138); \
1263 t1 -= (te*13573 + 8192) >> 14; \
1264 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1265 OD_DCT_OVERFLOW_CHECK(td, 4161, 8192, 139); \
1266 t2 += (td*4161 + 8192) >> 14; \
1267 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1268 OD_DCT_OVERFLOW_CHECK(t2, 15137, 8192, 140); \
1269 td -= (t2*15137 + 8192) >> 14; \
1270 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1271 OD_DCT_OVERFLOW_CHECK(td, 14341, 8192, 141); \
1272 t2 += (td*14341 + 8192) >> 14; \
1273 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1274 OD_DCT_OVERFLOW_CHECK(t3, 14341, 8192, 142); \
1275 tc -= (t3*14341 + 8192) >> 14; \
1276 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1277 OD_DCT_OVERFLOW_CHECK(tc, 15137, 8192, 143); \
1278 t3 += (tc*15137 + 8192) >> 14; \
1279 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1280 OD_DCT_OVERFLOW_CHECK(t3, 4161, 8192, 144); \
1281 tc -= (t3*4161 + 8192) >> 14; \
1282 te = t0h - te; \
1283 t0 -= te; \
1284 tf = OD_DCT_RSHIFT(t1, 1) - tf; \
1285 t1 -= tf; \
1286 /* TODO: Can we move this into another operation */ \
1287 tc = -tc; \
1288 t2 = OD_DCT_RSHIFT(tc, 1) - t2; \
1289 tc -= t2; \
1290 t3 = OD_DCT_RSHIFT(td, 1) - t3; \
1291 td = t3 - td; \
1292 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1293 OD_DCT_OVERFLOW_CHECK(t6, 7489, 4096, 145); \
1294 t9 -= (t6*7489 + 4096) >> 13; \
1295 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1296 OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 146); \
1297 t6 += (t9*11585 + 8192) >> 14; \
1298 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1299 OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 147); \
1300 t9 += (t6*19195 + 16384) >> 15; \
1301 t8 += OD_DCT_RSHIFT(t9, 1); \
1302 t9 -= t8; \
1303 t6 = t7h - t6; \
1304 t7 -= t6; \
1305 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1306 OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 148); \
1307 t8 += (t7*6723 + 4096) >> 13; \
1308 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1309 OD_DCT_OVERFLOW_CHECK(t8, 16069, 8192, 149); \
1310 t7 -= (t8*16069 + 8192) >> 14; \
1311 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1312 OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 150); \
1313 t8 += (t7*6723 + 4096) >> 13; \
1314 /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1315 OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 151); \
1316 t9 += (t6*17515 + 16384) >> 15; \
1317 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1318 OD_DCT_OVERFLOW_CHECK(t9, 13623, 8192, 152); \
1319 t6 -= (t9*13623 + 8192) >> 14; \
1320 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1321 OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 153); \
1322 t9 += (t6*17515 + 16384) >> 15; \
1323 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1324 OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 154); \
1325 t5 += (ta*13573 + 8192) >> 14; \
1326 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1327 OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 155); \
1328 ta -= (t5*11585 + 16384) >> 15; \
1329 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1330 OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 156); \
1331 t5 += (ta*13573 + 8192) >> 14; \
1332 tb += OD_DCT_RSHIFT(t5, 1); \
1333 t5 = tb - t5; \
1334 ta += t4h; \
1335 t4 -= ta; \
1336 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1337 OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 157); \
1338 ta += (t5*2485 + 4096) >> 13; \
1339 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1340 OD_DCT_OVERFLOW_CHECK(ta, 18205, 16384, 158); \
1341 t5 -= (ta*18205 + 16384) >> 15; \
1342 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1343 OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 159); \
1344 ta += (t5*2485 + 4096) >> 13; \
1345 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1346 OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 160); \
1347 tb -= (t4*6723 + 4096) >> 13; \
1348 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1349 OD_DCT_OVERFLOW_CHECK(tb, 16069, 8192, 161); \
1350 t4 += (tb*16069 + 8192) >> 14; \
1351 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1352 OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 162); \
1353 tb -= (t4*6723 + 4096) >> 13; \
1354 /* TODO: Can we move this into another operation */ \
1355 t5 = -t5; \
1356 tc -= tf; \
1357 tch = OD_DCT_RSHIFT(tc, 1); \
1358 tf += tch; \
1359 t3 += t0; \
1360 t3h = OD_DCT_RSHIFT(t3, 1); \
1361 t0 -= t3h; \
1362 td -= t1; \
1363 tdh = OD_DCT_RSHIFT(td, 1); \
1364 t1 += tdh; \
1365 t2 += te; \
1366 t2h = OD_DCT_RSHIFT(t2, 1); \
1367 te -= t2h; \
1368 t8 += t4; \
1369 t8h = OD_DCT_RSHIFT(t8, 1); \
1370 t4 = t8h - t4; \
1371 t7 = tb - t7; \
1372 t7h = OD_DCT_RSHIFT(t7, 1); \
1373 tb = t7h - tb; \
1374 t6 -= ta; \
1375 t6h = OD_DCT_RSHIFT(t6, 1); \
1376 ta += t6h; \
1377 t9 = t5 - t9; \
1378 t9h = OD_DCT_RSHIFT(t9, 1); \
1379 t5 -= t9h; \
1380 t0 -= t7h; \
1381 t7 += t0; \
1382 tf += t8h; \
1383 t8 -= tf; \
1384 te -= t6h; \
1385 t6 += te; \
1386 t1 += t9h; \
1387 t9 -= t1; \
1388 tb -= tch; \
1389 tc += tb; \
1390 t4 += t3h; \
1391 t3 -= t4; \
1392 ta -= tdh; \
1393 td += ta; \
1394 t5 = t2h - t5; \
1395 t2 -= t5; \
1396 /* TODO: Can we move these into another operation */ \
1397 t8 = -t8; \
1398 t9 = -t9; \
1399 ta = -ta; \
1400 tb = -tb; \
1401 tc = -tc; \
1402 td = -td; \
1403 tf = -tf; \
1404 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1405 OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 163); \
1406 t0 -= (tf*7799 + 4096) >> 13; \
1407 /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
1408 OD_DCT_OVERFLOW_CHECK(t0, 4091, 2048, 164); \
1409 tf += (t0*4091 + 2048) >> 12; \
1410 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1411 OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 165); \
1412 t0 -= (tf*7799 + 4096) >> 13; \
1413 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1414 OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 166); \
1415 t1 += (te*2417 + 16384) >> 15; \
1416 /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
1417 OD_DCT_OVERFLOW_CHECK(t1, 601, 2048, 167); \
1418 te -= (t1*601 + 2048) >> 12; \
1419 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1420 OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 168); \
1421 t1 += (te*2417 + 16384) >> 15; \
1422 /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1423 OD_DCT_OVERFLOW_CHECK(t8, 14525, 16384, 169); \
1424 t7 -= (t8*14525 + 16384) >> 15; \
1425 /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
1426 OD_DCT_OVERFLOW_CHECK(t7, 3035, 2048, 170); \
1427 t8 += (t7*3035 + 2048) >> 12; \
1428 /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1429 OD_DCT_OVERFLOW_CHECK(t8, 7263, 8192, 171); \
1430 t7 -= (t8*7263 + 8192) >> 14; \
1431 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1432 OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 172); \
1433 t2 -= (td*6393 + 4096) >> 13; \
1434 /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
1435 OD_DCT_OVERFLOW_CHECK(t2, 3973, 2048, 173); \
1436 td += (t2*3973 + 2048) >> 12; \
1437 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1438 OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 174); \
1439 t2 -= (td*6393 + 4096) >> 13; \
1440 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1441 OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 175); \
1442 t5 -= (ta*9281 + 8192) >> 14; \
1443 /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
1444 OD_DCT_OVERFLOW_CHECK(t5, 7027, 4096, 176); \
1445 ta += (t5*7027 + 4096) >> 13; \
1446 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1447 OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 177); \
1448 t5 -= (ta*9281 + 8192) >> 14; \
1449 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1450 OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 178); \
1451 t3 -= (tc*11539 + 8192) >> 14; \
1452 /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
1453 OD_DCT_OVERFLOW_CHECK(t3, 7713, 4096, 179); \
1454 tc += (t3*7713 + 4096) >> 13; \
1455 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1456 OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 180); \
1457 t3 -= (tc*11539 + 8192) >> 14; \
1458 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1459 OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 181); \
1460 t4 -= (tb*10375 + 8192) >> 14; \
1461 /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
1462 OD_DCT_OVERFLOW_CHECK(t4, 7405, 4096, 182); \
1463 tb += (t4*7405 + 4096) >> 13; \
1464 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1465 OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 183); \
1466 t4 -= (tb*10375 + 8192) >> 14; \
1467 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1468 OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 184); \
1469 t6 -= (t9*8247 + 8192) >> 14; \
1470 /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
1471 OD_DCT_OVERFLOW_CHECK(t6, 1645, 1024, 185); \
1472 t9 += (t6*1645 + 1024) >> 11; \
1473 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1474 OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 186); \
1475 t6 -= (t9*8247 + 8192) >> 14; \
1476 } \
1477 while (0)
1478
1479#define OD_IDST_16_ASYM(t0, t0h, t8, t4, tc, t2, t2h, ta, t6, te, teh, \
1480 t1, t9, t5, td, t3, tb, t7, tf) \
1481 /* Embedded 16-point asymmetric Type-IV iDST. */ \
1482 do { \
1483 int t1h_; \
1484 int t3h_; \
1485 int t4h; \
1486 int t6h; \
1487 int t9h_; \
1488 int tbh_; \
1489 int tch; \
1490 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1491 t6 += (t9*8247 + 8192) >> 14; \
1492 /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
1493 t9 -= (t6*1645 + 1024) >> 11; \
1494 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1495 t6 += (t9*8247 + 8192) >> 14; \
1496 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1497 t2 += (td*10375 + 8192) >> 14; \
1498 /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
1499 td -= (t2*7405 + 4096) >> 13; \
1500 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1501 t2 += (td*10375 + 8192) >> 14; \
1502 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1503 tc += (t3*11539 + 8192) >> 14; \
1504 /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
1505 t3 -= (tc*7713 + 4096) >> 13; \
1506 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1507 tc += (t3*11539 + 8192) >> 14; \
1508 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1509 ta += (t5*9281 + 8192) >> 14; \
1510 /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
1511 t5 -= (ta*7027 + 4096) >> 13; \
1512 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1513 ta += (t5*9281 + 8192) >> 14; \
1514 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1515 t4 += (tb*6393 + 4096) >> 13; \
1516 /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
1517 tb -= (t4*3973 + 2048) >> 12; \
1518 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1519 t4 += (tb*6393 + 4096) >> 13; \
1520 /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1521 te += (t1*7263 + 8192) >> 14; \
1522 /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
1523 t1 -= (te*3035 + 2048) >> 12; \
1524 /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1525 te += (t1*14525 + 16384) >> 15; \
1526 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1527 t8 -= (t7*2417 + 16384) >> 15; \
1528 /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
1529 t7 += (t8*601 + 2048) >> 12; \
1530 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1531 t8 -= (t7*2417 + 16384) >> 15; \
1532 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1533 t0 += (tf*7799 + 4096) >> 13; \
1534 /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
1535 tf -= (t0*4091 + 2048) >> 12; \
1536 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1537 t0 += (tf*7799 + 4096) >> 13; \
1538 /* TODO: Can we move these into another operation */ \
1539 t1 = -t1; \
1540 t3 = -t3; \
1541 t5 = -t5; \
1542 t9 = -t9; \
1543 tb = -tb; \
1544 td = -td; \
1545 tf = -tf; \
1546 t4 += ta; \
1547 t4h = OD_DCT_RSHIFT(t4, 1); \
1548 ta = t4h - ta; \
1549 tb -= t5; \
1550 tbh_ = OD_DCT_RSHIFT(tb, 1); \
1551 t5 += tbh_; \
1552 tc += t2; \
1553 tch = OD_DCT_RSHIFT(tc, 1); \
1554 t2 -= tch; \
1555 t3 -= td; \
1556 t3h_ = OD_DCT_RSHIFT(t3, 1); \
1557 td += t3h_; \
1558 t9 += t8; \
1559 t9h_ = OD_DCT_RSHIFT(t9, 1); \
1560 t8 -= t9h_; \
1561 t6 -= t7; \
1562 t6h = OD_DCT_RSHIFT(t6, 1); \
1563 t7 += t6h; \
1564 t1 += tf; \
1565 t1h_ = OD_DCT_RSHIFT(t1, 1); \
1566 tf -= t1h_; \
1567 te -= t0; \
1568 teh = OD_DCT_RSHIFT(te, 1); \
1569 t0 += teh; \
1570 ta += t9h_; \
1571 t9 = ta - t9; \
1572 t5 -= t6h; \
1573 t6 += t5; \
1574 td = teh - td; \
1575 te = td - te; \
1576 t2 = t1h_ - t2; \
1577 t1 -= t2; \
1578 t7 += t4h; \
1579 t4 -= t7; \
1580 t8 -= tbh_; \
1581 tb += t8; \
1582 t0 += tch; \
1583 tc -= t0; \
1584 tf -= t3h_; \
1585 t3 += tf; \
1586 /* TODO: Can we move this into another operation */ \
1587 ta = -ta; \
1588 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1589 td += (t2*6723 + 4096) >> 13; \
1590 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1591 t2 -= (td*16069 + 8192) >> 14; \
1592 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1593 td += (t2*6723 + 4096) >> 13; \
1594 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1595 t5 -= (ta*2485 + 4096) >> 13; \
1596 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1597 ta += (t5*18205 + 16384) >> 15; \
1598 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1599 t5 -= (ta*2485 + 4096) >> 13; \
1600 t2 += t5; \
1601 t2h = OD_DCT_RSHIFT(t2, 1); \
1602 t5 -= t2h; \
1603 ta = td - ta; \
1604 td -= OD_DCT_RSHIFT(ta, 1); \
1605 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1606 ta -= (t5*13573 + 8192) >> 14; \
1607 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1608 t5 += (ta*11585 + 16384) >> 15; \
1609 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1610 ta -= (t5*13573 + 8192) >> 14; \
1611 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1612 t9 -= (t6*17515 + 16384) >> 15; \
1613 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1614 t6 += (t9*13623 + 8192) >> 14; \
1615 /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1616 t9 -= (t6*17515 + 16384) >> 15; \
1617 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1618 t1 -= (te*6723 + 4096) >> 13; \
1619 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1620 te += (t1*16069 + 8192) >> 14; \
1621 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1622 t1 -= (te*6723 + 4096) >> 13; \
1623 te += t6; \
1624 teh = OD_DCT_RSHIFT(te, 1); \
1625 t6 = teh - t6; \
1626 t9 += t1; \
1627 t1 -= OD_DCT_RSHIFT(t9, 1); \
1628 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1629 t9 -= (t6*19195 + 16384) >> 15; \
1630 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1631 t6 -= (t9*11585 + 8192) >> 14; \
1632 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1633 t9 += (t6*7489 + 4096) >> 13; \
1634 tb = tc - tb; \
1635 tc = OD_DCT_RSHIFT(tb, 1) - tc; \
1636 t3 += t4; \
1637 t4 = OD_DCT_RSHIFT(t3, 1) - t4; \
1638 /* TODO: Can we move this into another operation */ \
1639 t3 = -t3; \
1640 t8 += tf; \
1641 tf = OD_DCT_RSHIFT(t8, 1) - tf; \
1642 t0 += t7; \
1643 t0h = OD_DCT_RSHIFT(t0, 1); \
1644 t7 = t0h - t7; \
1645 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1646 t3 += (tc*4161 + 8192) >> 14; \
1647 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1648 tc -= (t3*15137 + 8192) >> 14; \
1649 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1650 t3 += (tc*14341 + 8192) >> 14; \
1651 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1652 t4 -= (tb*14341 + 8192) >> 14; \
1653 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1654 tb += (t4*15137 + 8192) >> 14; \
1655 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1656 t4 -= (tb*4161 + 8192) >> 14; \
1657 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1658 t8 += (t7*13573 + 8192) >> 14; \
1659 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1660 t7 -= (t8*11585 + 16384) >> 15; \
1661 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1662 t8 += (t7*13573 + 8192) >> 14; \
1663 /* TODO: Can we move these into another operation */ \
1664 t1 = -t1; \
1665 t5 = -t5; \
1666 t9 = -t9; \
1667 tb = -tb; \
1668 td = -td; \
1669 } \
1670 while (0)
1671
1672#define OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
1673 te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1674 /* Embedded 32-point orthonormal Type-II fDCT. */ \
1675 do { \
1676 int tgh; \
1677 int thh; \
1678 int tih; \
1679 int tkh; \
1680 int tmh; \
1681 int tnh; \
1682 int toh; \
1683 int tqh; \
1684 int tsh; \
1685 int tuh; \
1686 int tvh; \
1687 tv = t0 - tv; \
1688 tvh = OD_DCT_RSHIFT(tv, 1); \
1689 t0 -= tvh; \
1690 tu += t1; \
1691 tuh = OD_DCT_RSHIFT(tu, 1); \
1692 t1 = tuh - t1; \
1693 tt = t2 - tt; \
1694 t2 -= OD_DCT_RSHIFT(tt, 1); \
1695 ts += t3; \
1696 tsh = OD_DCT_RSHIFT(ts, 1); \
1697 t3 = tsh - t3; \
1698 tr = t4 - tr; \
1699 t4 -= OD_DCT_RSHIFT(tr, 1); \
1700 tq += t5; \
1701 tqh = OD_DCT_RSHIFT(tq, 1); \
1702 t5 = tqh - t5; \
1703 tp = t6 - tp; \
1704 t6 -= OD_DCT_RSHIFT(tp, 1); \
1705 to += t7; \
1706 toh = OD_DCT_RSHIFT(to, 1); \
1707 t7 = toh - t7; \
1708 tn = t8 - tn; \
1709 tnh = OD_DCT_RSHIFT(tn, 1); \
1710 t8 -= tnh; \
1711 tm += t9; \
1712 tmh = OD_DCT_RSHIFT(tm, 1); \
1713 t9 = tmh - t9; \
1714 tl = ta - tl; \
1715 ta -= OD_DCT_RSHIFT(tl, 1); \
1716 tk += tb; \
1717 tkh = OD_DCT_RSHIFT(tk, 1); \
1718 tb = tkh - tb; \
1719 tj = tc - tj; \
1720 tc -= OD_DCT_RSHIFT(tj, 1); \
1721 ti += td; \
1722 tih = OD_DCT_RSHIFT(ti, 1); \
1723 td = tih - td; \
1724 th = te - th; \
1725 thh = OD_DCT_RSHIFT(th, 1); \
1726 te -= thh; \
1727 tg += tf; \
1728 tgh = OD_DCT_RSHIFT(tg, 1); \
1729 tf = tgh - tf; \
1730 OD_FDCT_16_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
1731 t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh); \
1732 OD_FDST_16_ASYM(tv, tvh, tf, tn, tnh, t7, tr, tb, tj, t3, \
1733 tt, td, tl, t5, tp, t9, th, thh, t1); \
1734 } \
1735 while (0)
1736
1737#define OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
1738 te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1739 /* Embedded 32-point orthonormal Type-II iDCT. */ \
1740 do { \
1741 int t1h; \
1742 int t3h; \
1743 int t5h; \
1744 int t7h; \
1745 int t9h; \
1746 int tbh; \
1747 int tdh; \
1748 int tfh; \
1749 int thh; \
1750 int tth; \
1751 int tvh; \
1752 OD_IDST_16_ASYM(tv, tvh, tn, tr, tj, tt, tth, tl, tp, th, thh, \
1753 tu, tm, tq, ti, ts, tk, to, tg); \
1754 OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
1755 t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh); \
1756 tu = t1h - tu; \
1757 t1 -= tu; \
1758 te += thh; \
1759 th = te - th; \
1760 tm = t9h - tm; \
1761 t9 -= tm; \
1762 t6 += OD_DCT_RSHIFT(tp, 1); \
1763 tp = t6 - tp; \
1764 tq = t5h - tq; \
1765 t5 -= tq; \
1766 ta += OD_DCT_RSHIFT(tl, 1); \
1767 tl = ta - tl; \
1768 ti = tdh - ti; \
1769 td -= ti; \
1770 t2 += tth; \
1771 tt = t2 - tt; \
1772 ts = t3h - ts; \
1773 t3 -= ts; \
1774 tc += OD_DCT_RSHIFT(tj, 1); \
1775 tj = tc - tj; \
1776 tk = tbh - tk; \
1777 tb -= tk; \
1778 t4 += OD_DCT_RSHIFT(tr, 1); \
1779 tr = t4 - tr; \
1780 to = t7h - to; \
1781 t7 -= to; \
1782 t8 += OD_DCT_RSHIFT(tn, 1); \
1783 tn = t8 - tn; \
1784 tg = tfh - tg; \
1785 tf -= tg; \
1786 t0 += tvh; \
1787 tv = t0 - tv; \
1788 } \
1789 while (0)
1790
Monty Montgomerya4e245a2017-07-22 00:48:31 -04001791#if CONFIG_TX64X64
1792#define OD_FDCT_32_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
1793 t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh, t1, th, thh, \
1794 t9, tp, tph, t5, tl, tlh, td, tt, tth, t3, tj, tjh, tb, tr, trh, \
1795 t7, tn, tnh, tf, tv, tvh) \
1796 /* Embedded 32-point asymmetric Type-II fDCT. */ \
1797 do { \
1798 t0 += tvh; \
1799 tv = t0 - tv; \
1800 t1 = tuh - t1; \
1801 tu -= t1; \
1802 t2 += tth; \
1803 tt = t2 - tt; \
1804 t3 = tsh - t3; \
1805 ts -= t3; \
1806 t4 += trh; \
1807 tr = t4 - tr; \
1808 t5 = tqh - t5; \
1809 tq -= t5; \
1810 t6 += tph; \
1811 tp = t6 - tp; \
1812 t7 = toh - t7; \
1813 to -= t7; \
1814 t8 += tnh; \
1815 tn = t8 - tn; \
1816 t9 = tmh - t9; \
1817 tm -= t9; \
1818 ta += tlh; \
1819 tl = ta - tl; \
1820 tb = tkh - tb; \
1821 tk -= tb; \
1822 tc += tjh; \
1823 tj = tc - tj; \
1824 td = tih - td; \
1825 ti -= td; \
1826 te += thh; \
1827 th = te - th; \
1828 tf = tgh - tf; \
1829 tg -= tf; \
1830 OD_FDCT_16(t0, tg, t8, to, t4, tk, tc, ts, \
1831 t2, ti, ta, tq, t6, tm, te, tu); \
1832 OD_FDST_16(tv, tf, tn, t7, tr, tb, tj, t3, \
1833 tt, td, tl, t5, tp, t9, th, t1); \
1834 } \
1835 while (0)
1836
1837#define OD_IDCT_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, \
1838 t6, tm, te, tu, t1, t1h, th, thh, t9, t9h, tp, tph, t5, t5h, tl, tlh, \
1839 td, tdh, tt, tth, t3, t3h, tj, tjh, tb, tbh, tr, trh, t7, t7h, tn, tnh, \
1840 tf, tfh, tv, tvh) \
1841 /* Embedded 32-point asymmetric Type-II iDCT. */ \
1842 do { \
1843 OD_IDST_16(tv, tn, tr, tj, tt, tl, tp, th, \
1844 tu, tm, tq, ti, ts, tk, to, tg); \
1845 OD_IDCT_16(t0, t8, t4, tc, t2, ta, t6, te, \
1846 t1, t9, t5, td, t3, tb, t7, tf); \
1847 tv = t0 - tv; \
1848 tvh = OD_DCT_RSHIFT(tv, 1); \
1849 t0 -= tvh; \
1850 t1 += tu; \
1851 t1h = OD_DCT_RSHIFT(t1, 1); \
1852 tu = t1h - tu; \
1853 tt = t2 - tt; \
1854 tth = OD_DCT_RSHIFT(tt, 1); \
1855 t2 -= tth; \
1856 t3 += ts; \
1857 t3h = OD_DCT_RSHIFT(t3, 1); \
1858 ts = t3h - ts; \
1859 tr = t4 - tr; \
1860 trh = OD_DCT_RSHIFT(tr, 1); \
1861 t4 -= trh; \
1862 t5 += tq; \
1863 t5h = OD_DCT_RSHIFT(t5, 1); \
1864 tq = t5h - tq; \
1865 tp = t6 - tp; \
1866 tph = OD_DCT_RSHIFT(tp, 1); \
1867 t6 -= tph; \
1868 t7 += to; \
1869 t7h = OD_DCT_RSHIFT(t7, 1); \
1870 to = t7h - to; \
1871 tn = t8 - tn; \
1872 tnh = OD_DCT_RSHIFT(tn, 1); \
1873 t8 -= tnh; \
1874 t9 += tm; \
1875 t9h = OD_DCT_RSHIFT(t9, 1); \
1876 tm = t9h - tm; \
1877 tl = ta - tl; \
1878 tlh = OD_DCT_RSHIFT(tl, 1); \
1879 ta -= tlh; \
1880 tb += tk; \
1881 tbh = OD_DCT_RSHIFT(tb, 1); \
1882 tk = tbh - tk; \
1883 tj = tc - tj; \
1884 tjh = OD_DCT_RSHIFT(tj, 1); \
1885 tc -= tjh; \
1886 td += ti; \
1887 tdh = OD_DCT_RSHIFT(td, 1); \
1888 ti = tdh - ti; \
1889 th = te - th; \
1890 thh = OD_DCT_RSHIFT(th, 1); \
1891 te -= thh; \
1892 tf += tg; \
1893 tfh = OD_DCT_RSHIFT(tf, 1); \
1894 tg = tfh - tg; \
1895 } \
1896 while (0)
1897
1898#define OD_FDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
1899 tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1900 /* Embedded 32-point asymmetric Type-IV fDST. */ \
1901 do { \
1902 int t0h; \
1903 int t1h; \
1904 int t4h; \
1905 int t5h; \
1906 int tqh; \
1907 int trh; \
1908 int tuh; \
1909 int tvh; \
1910 \
1911 tu = -tu; \
1912 \
1913 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1914 OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 271); \
1915 t5 -= (tq*13573 + 8192) >> 14; \
1916 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1917 OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 272); \
1918 tq += (t5*11585 + 16384) >> 15; \
1919 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1920 OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 273); \
1921 t5 -= (tq*13573 + 8192) >> 14; \
1922 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1923 OD_DCT_OVERFLOW_CHECK(t6, 29957, 16384, 274); \
1924 tp += (t6*29957 + 16384) >> 15; \
1925 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1926 OD_DCT_OVERFLOW_CHECK(tp, 11585, 8192, 275); \
1927 t6 -= (tp*11585 + 8192) >> 14; \
1928 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1929 OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 276); \
1930 tp -= (t6*19195 + 16384) >> 15; \
1931 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1932 OD_DCT_OVERFLOW_CHECK(t1, 29957, 16384, 277); \
1933 tu += (t1*29957 + 16384) >> 15; \
1934 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1935 OD_DCT_OVERFLOW_CHECK(tu, 11585, 8192, 278); \
1936 t1 -= (tu*11585 + 8192) >> 14; \
1937 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1938 OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 279); \
1939 tu -= (t1*19195 + 16384) >> 15; \
1940 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1941 OD_DCT_OVERFLOW_CHECK(t2, 28681, 16384, 280); \
1942 tt += (t2*28681 + 16384) >> 15; \
1943 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1944 OD_DCT_OVERFLOW_CHECK(tt, 15137, 8192, 281); \
1945 t2 -= (tt*15137 + 8192) >> 14; \
1946 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1947 OD_DCT_OVERFLOW_CHECK(t2, 4161, 8192, 282); \
1948 tt += (t2*4161 + 8192) >> 14; \
1949 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1950 OD_DCT_OVERFLOW_CHECK(ts, 4161, 8192, 283); \
1951 t3 += (ts*4161 + 8192) >> 14; \
1952 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1953 OD_DCT_OVERFLOW_CHECK(t3, 15137, 8192, 284); \
1954 ts -= (t3*15137 + 8192) >> 14; \
1955 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1956 OD_DCT_OVERFLOW_CHECK(ts, 14341, 8192, 285); \
1957 t3 += (ts*14341 + 8192) >> 14; \
1958 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1959 OD_DCT_OVERFLOW_CHECK(tm, 19195, 16384, 286); \
1960 t9 -= (tm*19195 + 16384) >> 15; \
1961 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1962 OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 287); \
1963 tm -= (t9*11585 + 8192) >> 14; \
1964 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1965 OD_DCT_OVERFLOW_CHECK(tm, 7489, 4096, 288); \
1966 t9 += (tm*7489 + 4096) >> 13; \
1967 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
1968 OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 289); \
1969 ta += (tl*3259 + 4096) >> 13; \
1970 /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
1971 OD_DCT_OVERFLOW_CHECK(ta, 3135, 8192, 290); \
1972 tl -= (ta*3135 + 8192) >> 14; \
1973 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
1974 OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 291); \
1975 ta += (tl*3259 + 4096) >> 13; \
1976 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1977 OD_DCT_OVERFLOW_CHECK(tk, 4161, 8192, 292); \
1978 tb += (tk*4161 + 8192) >> 14; \
1979 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1980 OD_DCT_OVERFLOW_CHECK(tb, 15137, 8192, 293); \
1981 tk -= (tb*15137 + 8192) >> 14; \
1982 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1983 OD_DCT_OVERFLOW_CHECK(tk, 14341, 8192, 294); \
1984 tb += (tk*14341 + 8192) >> 14; \
1985 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1986 OD_DCT_OVERFLOW_CHECK(te, 29957, 16384, 295); \
1987 th += (te*29957 + 16384) >> 15; \
1988 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1989 OD_DCT_OVERFLOW_CHECK(th, 11585, 8192, 296); \
1990 te -= (th*11585 + 8192) >> 14; \
1991 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1992 OD_DCT_OVERFLOW_CHECK(te, 19195, 16384, 297); \
1993 th -= (te*19195 + 16384) >> 15; \
1994 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1995 OD_DCT_OVERFLOW_CHECK(tc, 28681, 16384, 298); \
1996 tj += (tc*28681 + 16384) >> 15; \
1997 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1998 OD_DCT_OVERFLOW_CHECK(tj, 15137, 8192, 299); \
1999 tc -= (tj*15137 + 8192) >> 14; \
2000 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2001 OD_DCT_OVERFLOW_CHECK(tc, 4161, 8192, 300); \
2002 tj += (tc*4161 + 8192) >> 14; \
2003 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2004 OD_DCT_OVERFLOW_CHECK(ti, 4161, 8192, 301); \
2005 td += (ti*4161 + 8192) >> 14; \
2006 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2007 OD_DCT_OVERFLOW_CHECK(td, 15137, 8192, 302); \
2008 ti -= (td*15137 + 8192) >> 14; \
2009 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2010 OD_DCT_OVERFLOW_CHECK(ti, 14341, 8192, 303); \
2011 td += (ti*14341 + 8192) >> 14; \
2012 \
2013 t1 = -t1; \
2014 t2 = -t2; \
2015 t3 = -t3; \
2016 td = -td; \
2017 tg = -tg; \
2018 to = -to; \
2019 ts = -ts; \
2020 \
2021 tr -= OD_DCT_RSHIFT(t5, 1); \
2022 t5 += tr; \
2023 tq -= OD_DCT_RSHIFT(t4, 1); /* pass */ \
2024 t4 += tq; \
2025 t6 -= OD_DCT_RSHIFT(t7, 1); \
2026 t7 += t6; \
2027 to -= OD_DCT_RSHIFT(tp, 1); /* pass */ \
2028 tp += to; \
2029 t1 += OD_DCT_RSHIFT(t0, 1); /* pass */ \
2030 t0 -= t1; \
2031 tv -= OD_DCT_RSHIFT(tu, 1); \
2032 tu += tv; \
2033 t3 -= OD_DCT_RSHIFT(tt, 1); \
2034 tt += t3; \
2035 t2 += OD_DCT_RSHIFT(ts, 1); \
2036 ts -= t2; \
2037 t9 -= OD_DCT_RSHIFT(t8, 1); /* pass */ \
2038 t8 += t9; \
2039 tn += OD_DCT_RSHIFT(tm, 1); \
2040 tm -= tn; \
2041 tb += OD_DCT_RSHIFT(ta, 1); \
2042 ta -= tb; \
2043 tl -= OD_DCT_RSHIFT(tk, 1); \
2044 tk += tl; \
2045 te -= OD_DCT_RSHIFT(tf, 1); /* pass */ \
2046 tf += te; \
2047 tg -= OD_DCT_RSHIFT(th, 1); \
2048 th += tg; \
2049 tc -= OD_DCT_RSHIFT(ti, 1); \
2050 ti += tc; \
2051 td += OD_DCT_RSHIFT(tj, 1); \
2052 tj -= td; \
2053 \
2054 t4 = -t4; \
2055 \
2056 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2057 OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 304); \
2058 t4 += (tr*6723 + 4096) >> 13; \
2059 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
2060 OD_DCT_OVERFLOW_CHECK(t4, 16069, 8192, 305); \
2061 tr -= (t4*16069 + 8192) >> 14; \
2062 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2063 OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 306); \
2064 t4 += (tr*6723 + 4096) >> 13; \
2065 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2066 OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 307); \
2067 t5 += (tq*17515 + 16384) >> 15; \
2068 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
2069 OD_DCT_OVERFLOW_CHECK(t5, 13623, 8192, 308); \
2070 tq -= (t5*13623 + 8192) >> 14; \
2071 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2072 OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 309); \
2073 t5 += (tq*17515 + 16384) >> 15; \
2074 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2075 OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 310); \
2076 t7 += (to*3227 + 16384) >> 15; \
2077 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
2078 OD_DCT_OVERFLOW_CHECK(t7, 6393, 16384, 311); \
2079 to -= (t7*6393 + 16384) >> 15; \
2080 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2081 OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 312); \
2082 t7 += (to*3227 + 16384) >> 15; \
2083 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2084 OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 313); \
2085 t6 += (tp*2485 + 4096) >> 13; \
2086 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
2087 OD_DCT_OVERFLOW_CHECK(t6, 18205, 16384, 314); \
2088 tp -= (t6*18205 + 16384) >> 15; \
2089 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2090 OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 315); \
2091 t6 += (tp*2485 + 4096) >> 13; \
2092 \
2093 t5 = -t5; \
2094 \
2095 tr += to; \
2096 trh = OD_DCT_RSHIFT(tr, 1); \
2097 to -= trh; \
2098 t4 += t7; \
2099 t4h = OD_DCT_RSHIFT(t4, 1); \
2100 t7 -= t4h; \
2101 t5 += tp; \
2102 t5h = OD_DCT_RSHIFT(t5, 1); \
2103 tp -= t5h; \
2104 tq += t6; \
2105 tqh = OD_DCT_RSHIFT(tq, 1); \
2106 t6 -= tqh; \
2107 t0 -= t3; \
2108 t0h = OD_DCT_RSHIFT(t0, 1); \
2109 t3 += t0h; \
2110 tv -= ts; \
2111 tvh = OD_DCT_RSHIFT(tv, 1); \
2112 ts += tvh; \
2113 tu += tt; \
2114 tuh = OD_DCT_RSHIFT(tu, 1); \
2115 tt -= tuh; \
2116 t1 -= t2; \
2117 t1h = OD_DCT_RSHIFT(t1, 1); \
2118 t2 += t1h; \
2119 t8 += tb; \
2120 tb -= OD_DCT_RSHIFT(t8, 1); \
2121 tn += tk; \
2122 tk -= OD_DCT_RSHIFT(tn, 1); \
2123 t9 += tl; \
2124 tl -= OD_DCT_RSHIFT(t9, 1); \
2125 tm -= ta; \
2126 ta += OD_DCT_RSHIFT(tm, 1); \
2127 tc -= tf; \
2128 tf += OD_DCT_RSHIFT(tc, 1); \
2129 tj += tg; \
2130 tg -= OD_DCT_RSHIFT(tj, 1); \
2131 td -= te; \
2132 te += OD_DCT_RSHIFT(td, 1); \
2133 ti += th; \
2134 th -= OD_DCT_RSHIFT(ti, 1); \
2135 \
2136 t9 = -t9; \
2137 tl = -tl; \
2138 \
2139 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2140 OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 316); \
2141 t8 += (tn*805 + 8192) >> 14; \
2142 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2143 OD_DCT_OVERFLOW_CHECK(t8, 803, 4096, 317); \
2144 tn -= (t8*803 + 4096) >> 13; \
2145 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2146 OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 318); \
2147 t8 += (tn*805 + 8192) >> 14; \
2148 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2149 OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 319); \
2150 tk += (tb*11725 + 16384) >> 15; \
2151 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2152 OD_DCT_OVERFLOW_CHECK(tk, 5197, 4096, 320); \
2153 tb -= (tk*5197 + 4096) >> 13; \
2154 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2155 OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 321); \
2156 tk += (tb*11725 + 16384) >> 15; \
2157 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2158 OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 322); \
2159 ta += (tl*2455 + 2048) >> 12; \
2160 /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
2161 OD_DCT_OVERFLOW_CHECK(ta, 14449, 8192, 323); \
2162 tl -= (ta*14449 + 8192) >> 14; \
2163 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2164 OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 324); \
2165 ta += (tl*2455 + 2048) >> 12; \
2166 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2167 OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 325); \
2168 t9 += (tm*4861 + 16384) >> 15; \
2169 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2170 OD_DCT_OVERFLOW_CHECK(t9, 1189, 2048, 326); \
2171 tm -= (t9*1189 + 2048) >> 12; \
2172 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2173 OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 327); \
2174 t9 += (tm*4861 + 16384) >> 15; \
2175 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2176 OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 328); \
2177 tf += (tg*805 + 8192) >> 14; \
2178 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2179 OD_DCT_OVERFLOW_CHECK(tf, 803, 4096, 329); \
2180 tg -= (tf*803 + 4096) >> 13; \
2181 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2182 OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 330); \
2183 tf += (tg*805 + 8192) >> 14; \
2184 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2185 OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 331); \
2186 tc += (tj*2931 + 4096) >> 13; \
2187 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2188 OD_DCT_OVERFLOW_CHECK(tc, 5197, 4096, 332); \
2189 tj -= (tc*5197 + 4096) >> 13; \
2190 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2191 OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 333); \
2192 tc += (tj*2931 + 4096) >> 13; \
2193 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2194 OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 334); \
2195 td += (ti*513 + 1024) >> 11; \
2196 /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
2197 OD_DCT_OVERFLOW_CHECK(td, 7723, 8192, 335); \
2198 ti -= (td*7723 + 8192) >> 14; \
2199 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2200 OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 336); \
2201 td += (ti*513 + 1024) >> 11; \
2202 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2203 OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 337); \
2204 te += (th*4861 + 16384) >> 15; \
2205 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2206 OD_DCT_OVERFLOW_CHECK(te, 1189, 2048, 338); \
2207 th -= (te*1189 + 2048) >> 12; \
2208 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2209 OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 339); \
2210 te += (th*4861 + 16384) >> 15; \
2211 \
2212 ta = -ta; \
2213 tb = -tb; \
2214 \
2215 tt += t5h; \
2216 t5 -= tt; \
2217 t2 -= tqh; \
2218 tq += t2; \
2219 tp += t1h; \
2220 t1 -= tp; \
2221 t6 -= tuh; \
2222 tu += t6; \
2223 t7 += tvh; \
2224 tv -= t7; \
2225 to += t0h; \
2226 t0 -= to; \
2227 t3 -= t4h; \
2228 t4 += t3; \
2229 ts += trh; \
2230 tr -= ts; \
2231 tf -= OD_DCT_RSHIFT(tn, 1); \
2232 tn += tf; \
2233 tg -= OD_DCT_RSHIFT(t8, 1); \
2234 t8 += tg; \
2235 tk += OD_DCT_RSHIFT(tc, 1); \
2236 tc -= tk; \
2237 tb += OD_DCT_RSHIFT(tj, 1); \
2238 tj -= tb; \
2239 ta += OD_DCT_RSHIFT(ti, 1); \
2240 ti -= ta; \
2241 tl += OD_DCT_RSHIFT(td, 1); \
2242 td -= tl; \
2243 te -= OD_DCT_RSHIFT(tm, 1); \
2244 tm += te; \
2245 th -= OD_DCT_RSHIFT(t9, 1); \
2246 t9 += th; \
2247 ta -= t5; \
2248 t5 += OD_DCT_RSHIFT(ta, 1); \
2249 tq -= tl; \
2250 tl += OD_DCT_RSHIFT(tq, 1); \
2251 t2 -= ti; \
2252 ti += OD_DCT_RSHIFT(t2, 1); \
2253 td -= tt; \
2254 tt += OD_DCT_RSHIFT(td, 1); \
2255 tm += tp; \
2256 tp -= OD_DCT_RSHIFT(tm, 1); \
2257 t6 += t9; \
2258 t9 -= OD_DCT_RSHIFT(t6, 1); \
2259 te -= tu; \
2260 tu += OD_DCT_RSHIFT(te, 1); \
2261 t1 -= th; \
2262 th += OD_DCT_RSHIFT(t1, 1); \
2263 t0 -= tg; \
2264 tg += OD_DCT_RSHIFT(t0, 1); \
2265 tf += tv; \
2266 tv -= OD_DCT_RSHIFT(tf, 1); \
2267 t8 -= t7; \
2268 t7 += OD_DCT_RSHIFT(t8, 1); \
2269 to -= tn; \
2270 tn += OD_DCT_RSHIFT(to, 1); \
2271 t4 -= tk; \
2272 tk += OD_DCT_RSHIFT(t4, 1); \
2273 tb -= tr; \
2274 tr += OD_DCT_RSHIFT(tb, 1); \
2275 t3 -= tj; \
2276 tj += OD_DCT_RSHIFT(t3, 1); \
2277 tc -= ts; \
2278 ts += OD_DCT_RSHIFT(tc, 1); \
2279 \
2280 tr = -tr; \
2281 ts = -ts; \
2282 tt = -tt; \
2283 tu = -tu; \
2284 \
2285 /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
2286 OD_DCT_OVERFLOW_CHECK(t0, 2847, 2048, 340); \
2287 tv += (t0*2847 + 2048) >> 12; \
2288 /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
2289 OD_DCT_OVERFLOW_CHECK(tv, 5791, 2048, 341); \
2290 t0 -= (tv*5791 + 2048) >> 12; \
2291 /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
2292 OD_DCT_OVERFLOW_CHECK(t0, 5593, 4096, 342); \
2293 tv += (t0*5593 + 4096) >> 13; \
2294 /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
2295 OD_DCT_OVERFLOW_CHECK(tf, 4099, 4096, 343); \
2296 tg -= (tf*4099 + 4096) >> 13; \
2297 /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
2298 OD_DCT_OVERFLOW_CHECK(tg, 1997, 1024, 344); \
2299 tf += (tg*1997 + 1024) >> 11; \
2300 /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
2301 OD_DCT_OVERFLOW_CHECK(tf, 815, 16384, 345); \
2302 tg += (tf*815 + 16384) >> 15; \
2303 /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
2304 OD_DCT_OVERFLOW_CHECK(t8, 2527, 2048, 346); \
2305 tn -= (t8*2527 + 2048) >> 12; \
2306 /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
2307 OD_DCT_OVERFLOW_CHECK(tn, 4695, 4096, 347); \
2308 t8 += (tn*4695 + 4096) >> 13; \
2309 /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
2310 OD_DCT_OVERFLOW_CHECK(t8, 4187, 4096, 348); \
2311 tn += (t8*4187 + 4096) >> 13; \
2312 /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
2313 OD_DCT_OVERFLOW_CHECK(to, 5477, 4096, 349); \
2314 t7 += (to*5477 + 4096) >> 13; \
2315 /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
2316 OD_DCT_OVERFLOW_CHECK(t7, 4169, 4096, 350); \
2317 to -= (t7*4169 + 4096) >> 13; \
2318 /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
2319 OD_DCT_OVERFLOW_CHECK(to, 2571, 2048, 351); \
2320 t7 -= (to*2571 + 2048) >> 12; \
2321 /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
2322 OD_DCT_OVERFLOW_CHECK(t2, 5331, 4096, 352); \
2323 tt += (t2*5331 + 4096) >> 13; \
2324 /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
2325 OD_DCT_OVERFLOW_CHECK(tt, 5749, 2048, 353); \
2326 t2 -= (tt*5749 + 2048) >> 12; \
2327 /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
2328 OD_DCT_OVERFLOW_CHECK(t2, 2413, 2048, 354); \
2329 tt += (t2*2413 + 2048) >> 12; \
2330 /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
2331 OD_DCT_OVERFLOW_CHECK(td, 4167, 4096, 355); \
2332 ti -= (td*4167 + 4096) >> 13; \
2333 /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
2334 OD_DCT_OVERFLOW_CHECK(ti, 891, 512, 356); \
2335 td += (ti*891 + 512) >> 10; \
2336 /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
2337 OD_DCT_OVERFLOW_CHECK(td, 4327, 16384, 357); \
2338 ti += (td*4327 + 16384) >> 15; \
2339 /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
2340 OD_DCT_OVERFLOW_CHECK(ta, 2261, 2048, 358); \
2341 tl -= (ta*2261 + 2048) >> 12; \
2342 /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
2343 OD_DCT_OVERFLOW_CHECK(tl, 2855, 2048, 359); \
2344 ta += (tl*2855 + 2048) >> 12; \
2345 /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
2346 OD_DCT_OVERFLOW_CHECK(ta, 5417, 8192, 360); \
2347 tl += (ta*5417 + 8192) >> 14; \
2348 /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
2349 OD_DCT_OVERFLOW_CHECK(tq, 3459, 2048, 361); \
2350 t5 += (tq*3459 + 2048) >> 12; \
2351 /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
2352 OD_DCT_OVERFLOW_CHECK(t5, 1545, 2048, 362); \
2353 tq -= (t5*1545 + 2048) >> 12; \
2354 /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
2355 OD_DCT_OVERFLOW_CHECK(tq, 1971, 1024, 363); \
2356 t5 -= (tq*1971 + 1024) >> 11; \
2357 /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
2358 OD_DCT_OVERFLOW_CHECK(t3, 323, 256, 364); \
2359 ts += (t3*323 + 256) >> 9; \
2360 /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
2361 OD_DCT_OVERFLOW_CHECK(ts, 5707, 2048, 365); \
2362 t3 -= (ts*5707 + 2048) >> 12; \
2363 /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
2364 OD_DCT_OVERFLOW_CHECK(t3, 2229, 2048, 366); \
2365 ts += (t3*2229 + 2048) >> 12; \
2366 /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
2367 OD_DCT_OVERFLOW_CHECK(tc, 1061, 1024, 367); \
2368 tj -= (tc*1061 + 1024) >> 11; \
2369 /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
2370 OD_DCT_OVERFLOW_CHECK(tj, 6671, 4096, 368); \
2371 tc += (tj*6671 + 4096) >> 13; \
2372 /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
2373 OD_DCT_OVERFLOW_CHECK(tc, 6287, 16384, 369); \
2374 tj += (tc*6287 + 16384) >> 15; \
2375 /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
2376 OD_DCT_OVERFLOW_CHECK(tb, 4359, 4096, 370); \
2377 tk -= (tb*4359 + 4096) >> 13; \
2378 /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
2379 OD_DCT_OVERFLOW_CHECK(tk, 3099, 2048, 371); \
2380 tb += (tk*3099 + 2048) >> 12; \
2381 /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
2382 OD_DCT_OVERFLOW_CHECK(tb, 2109, 4096, 372); \
2383 tk += (tb*2109 + 4096) >> 13; \
2384 /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
2385 OD_DCT_OVERFLOW_CHECK(t4, 5017, 4096, 373); \
2386 tr += (t4*5017 + 4096) >> 13; \
2387 /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
2388 OD_DCT_OVERFLOW_CHECK(tr, 1413, 512, 374); \
2389 t4 -= (tr*1413 + 512) >> 10; \
2390 /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
2391 OD_DCT_OVERFLOW_CHECK(t4, 8195, 8192, 375); \
2392 tr += (t4*8195 + 8192) >> 14; \
2393 /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
2394 OD_DCT_OVERFLOW_CHECK(tm, 2373, 2048, 376); \
2395 t9 += (tm*2373 + 2048) >> 12; \
2396 /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
2397 OD_DCT_OVERFLOW_CHECK(t9, 5209, 4096, 377); \
2398 tm -= (t9*5209 + 4096) >> 13; \
2399 /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
2400 OD_DCT_OVERFLOW_CHECK(tm, 3391, 4096, 378); \
2401 t9 -= (tm*3391 + 4096) >> 13; \
2402 /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
2403 OD_DCT_OVERFLOW_CHECK(t6, 1517, 1024, 379); \
2404 tp -= (t6*1517 + 1024) >> 11; \
2405 /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
2406 OD_DCT_OVERFLOW_CHECK(tp, 1817, 2048, 380); \
2407 t6 += (tp*1817 + 2048) >> 12; \
2408 /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
2409 OD_DCT_OVERFLOW_CHECK(t6, 6331, 4096, 381); \
2410 tp += (t6*6331 + 4096) >> 13; \
2411 /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
2412 OD_DCT_OVERFLOW_CHECK(te, 515, 512, 382); \
2413 th -= (te*515 + 512) >> 10; \
2414 /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
2415 OD_DCT_OVERFLOW_CHECK(th, 7567, 4096, 383); \
2416 te += (th*7567 + 4096) >> 13; \
2417 /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
2418 OD_DCT_OVERFLOW_CHECK(te, 2513, 16384, 384); \
2419 th += (te*2513 + 16384) >> 15; \
2420 /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
2421 OD_DCT_OVERFLOW_CHECK(t1, 2753, 2048, 385); \
2422 tu += (t1*2753 + 2048) >> 12; \
2423 /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
2424 OD_DCT_OVERFLOW_CHECK(tu, 5777, 2048, 386); \
2425 t1 -= (tu*5777 + 2048) >> 12; \
2426 /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
2427 OD_DCT_OVERFLOW_CHECK(t1, 1301, 1024, 387); \
2428 tu += (t1*1301 + 1024) >> 11; \
2429 } \
2430 while (0)
2431
2432#define OD_IDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
2433 tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
2434 /* Embedded 32-point asymmetric Type-IV iDST. */ \
2435 do { \
2436 int t0h; \
2437 int t4h; \
2438 int tbh; \
2439 int tfh; \
2440 int tgh; \
2441 int tkh; \
2442 int trh; \
2443 int tvh; \
2444 /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
2445 tf -= (tg*1301 + 1024) >> 11; \
2446 /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
2447 tg += (tf*5777 + 2048) >> 12; \
2448 /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
2449 tf -= (tg*2753 + 2048) >> 12; \
2450 /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
2451 th -= (te*2513 + 16384) >> 15; \
2452 /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
2453 te -= (th*7567 + 4096) >> 13; \
2454 /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
2455 th += (te*515 + 512) >> 10; \
2456 /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
2457 tj -= (tc*6331 + 4096) >> 13; \
2458 /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
2459 tc -= (tj*1817 + 2048) >> 12; \
2460 /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
2461 tj += (tc*1517 + 1024) >> 11; \
2462 /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
2463 ti += (td*3391 + 4096) >> 13; \
2464 /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
2465 td += (ti*5209 + 4096) >> 13; \
2466 /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
2467 ti -= (td*2373 + 2048) >> 12; \
2468 /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
2469 tr -= (t4*8195 + 8192) >> 14; \
2470 /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
2471 t4 += (tr*1413 + 512) >> 10; \
2472 /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
2473 tr -= (t4*5017 + 4096) >> 13; \
2474 /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
2475 t5 -= (tq*2109 + 4096) >> 13; \
2476 /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
2477 tq -= (t5*3099 + 2048) >> 12; \
2478 /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
2479 t5 += (tq*4359 + 4096) >> 13; \
2480 /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
2481 tp -= (t6*6287 + 16384) >> 15; \
2482 /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
2483 t6 -= (tp*6671 + 4096) >> 13; \
2484 /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
2485 tp += (t6*1061 + 1024) >> 11; \
2486 /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
2487 t7 -= (to*2229 + 2048) >> 12; \
2488 /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
2489 to += (t7*5707 + 2048) >> 12; \
2490 /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
2491 t7 -= (to*323 + 256) >> 9; \
2492 /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
2493 tk += (tb*1971 + 1024) >> 11; \
2494 /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
2495 tb += (tk*1545 + 2048) >> 12; \
2496 /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
2497 tk -= (tb*3459 + 2048) >> 12; \
2498 /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
2499 tl -= (ta*5417 + 8192) >> 14; \
2500 /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
2501 ta -= (tl*2855 + 2048) >> 12; \
2502 /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
2503 tl += (ta*2261 + 2048) >> 12; \
2504 /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
2505 t9 -= (tm*4327 + 16384) >> 15; \
2506 /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
2507 tm -= (t9*891 + 512) >> 10; \
2508 /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
2509 t9 += (tm*4167 + 4096) >> 13; \
2510 /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
2511 tn -= (t8*2413 + 2048) >> 12; \
2512 /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
2513 t8 += (tn*5749 + 2048) >> 12; \
2514 /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
2515 tn -= (t8*5331 + 4096) >> 13; \
2516 /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
2517 ts += (t3*2571 + 2048) >> 12; \
2518 /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
2519 t3 += (ts*4169 + 4096) >> 13; \
2520 /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
2521 ts -= (t3*5477 + 4096) >> 13; \
2522 /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
2523 tt -= (t2*4187 + 4096) >> 13; \
2524 /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
2525 t2 -= (tt*4695 + 4096) >> 13; \
2526 /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
2527 tt += (t2*2527 + 2048) >> 12; \
2528 /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
2529 t1 -= (tu*815 + 16384) >> 15; \
2530 /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
2531 tu -= (t1*1997 + 1024) >> 11; \
2532 /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
2533 t1 += (tu*4099 + 4096) >> 13; \
2534 /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
2535 tv -= (t0*5593 + 4096) >> 13; \
2536 /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
2537 t0 += (tv*5791 + 2048) >> 12; \
2538 /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
2539 tv -= (t0*2847 + 2048) >> 12; \
2540 \
2541 t7 = -t7; \
2542 tf = -tf; \
2543 tn = -tn; \
2544 tr = -tr; \
2545 \
2546 t7 -= OD_DCT_RSHIFT(t6, 1); \
2547 t6 += t7; \
2548 tp -= OD_DCT_RSHIFT(to, 1); \
2549 to += tp; \
2550 tr -= OD_DCT_RSHIFT(tq, 1); \
2551 tq += tr; \
2552 t5 -= OD_DCT_RSHIFT(t4, 1); \
2553 t4 += t5; \
2554 tt -= OD_DCT_RSHIFT(t3, 1); \
2555 t3 += tt; \
2556 ts -= OD_DCT_RSHIFT(t2, 1); \
2557 t2 += ts; \
2558 tv += OD_DCT_RSHIFT(tu, 1); \
2559 tu -= tv; \
2560 t1 -= OD_DCT_RSHIFT(t0, 1); \
2561 t0 += t1; \
2562 th -= OD_DCT_RSHIFT(tg, 1); \
2563 tg += th; \
2564 tf -= OD_DCT_RSHIFT(te, 1); \
2565 te += tf; \
2566 ti += OD_DCT_RSHIFT(tc, 1); \
2567 tc -= ti; \
2568 tj += OD_DCT_RSHIFT(td, 1); \
2569 td -= tj; \
2570 tn -= OD_DCT_RSHIFT(tm, 1); \
2571 tm += tn; \
2572 t9 -= OD_DCT_RSHIFT(t8, 1); \
2573 t8 += t9; \
2574 tl -= OD_DCT_RSHIFT(tb, 1); \
2575 tb += tl; \
2576 tk -= OD_DCT_RSHIFT(ta, 1); \
2577 ta += tk; \
2578 \
2579 ti -= th; \
2580 th += OD_DCT_RSHIFT(ti, 1); \
2581 td -= te; \
2582 te += OD_DCT_RSHIFT(td, 1); \
2583 tm += tl; \
2584 tl -= OD_DCT_RSHIFT(tm, 1); \
2585 t9 += ta; \
2586 ta -= OD_DCT_RSHIFT(t9, 1); \
2587 tp += tq; \
2588 tq -= OD_DCT_RSHIFT(tp, 1); \
2589 t6 += t5; \
2590 t5 -= OD_DCT_RSHIFT(t6, 1); \
2591 t2 -= t1; \
2592 t1 += OD_DCT_RSHIFT(t2, 1); \
2593 tt -= tu; \
2594 tu += OD_DCT_RSHIFT(tt, 1); \
2595 tr += t7; \
2596 trh = OD_DCT_RSHIFT(tr, 1); \
2597 t7 -= trh; \
2598 t4 -= to; \
2599 t4h = OD_DCT_RSHIFT(t4, 1); \
2600 to += t4h; \
2601 t0 += t3; \
2602 t0h = OD_DCT_RSHIFT(t0, 1); \
2603 t3 -= t0h; \
2604 tv += ts; \
2605 tvh = OD_DCT_RSHIFT(tv, 1); \
2606 ts -= tvh; \
2607 tf -= tc; \
2608 tfh = OD_DCT_RSHIFT(tf, 1); \
2609 tc += tfh; \
2610 tg += tj; \
2611 tgh = OD_DCT_RSHIFT(tg, 1); \
2612 tj -= tgh; \
2613 tb -= t8; \
2614 tbh = OD_DCT_RSHIFT(tb, 1); \
2615 t8 += tbh; \
2616 tk += tn; \
2617 tkh = OD_DCT_RSHIFT(tk, 1); \
2618 tn -= tkh; \
2619 \
2620 ta = -ta; \
2621 tq = -tq; \
2622 \
2623 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2624 te -= (th*4861 + 16384) >> 15; \
2625 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2626 th += (te*1189 + 2048) >> 12; \
2627 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2628 te -= (th*4861 + 16384) >> 15; \
2629 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2630 tm -= (t9*513 + 1024) >> 11; \
2631 /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
2632 t9 += (tm*7723 + 8192) >> 14; \
2633 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2634 tm -= (t9*513 + 1024) >> 11; \
2635 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2636 t6 -= (tp*2931 + 4096) >> 13; \
2637 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2638 tp += (t6*5197 + 4096) >> 13; \
2639 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2640 t6 -= (tp*2931 + 4096) >> 13; \
2641 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2642 tu -= (t1*805 + 8192) >> 14; \
2643 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2644 t1 += (tu*803 + 4096) >> 13; \
2645 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2646 tu -= (t1*805 + 8192) >> 14; \
2647 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2648 ti -= (td*4861 + 16384) >> 15; \
2649 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2650 td += (ti*1189 + 2048) >> 12; \
2651 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2652 ti -= (td*4861 + 16384) >> 15; \
2653 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2654 ta -= (tl*2455 + 2048) >> 12; \
2655 /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
2656 tl += (ta*14449 + 8192) >> 14; \
2657 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2658 ta -= (tl*2455 + 2048) >> 12; \
2659 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2660 t5 -= (tq*11725 + 16384) >> 15; \
2661 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2662 tq += (t5*5197 + 4096) >> 13; \
2663 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2664 t5 -= (tq*11725 + 16384) >> 15; \
2665 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2666 t2 -= (tt*805 + 8192) >> 14; \
2667 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2668 tt += (t2*803 + 4096) >> 13; \
2669 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2670 t2 -= (tt*805 + 8192) >> 14; \
2671 \
2672 tl = -tl; \
2673 ti = -ti; \
2674 \
2675 th += OD_DCT_RSHIFT(t9, 1); \
2676 t9 -= th; \
2677 te -= OD_DCT_RSHIFT(tm, 1); \
2678 tm += te; \
2679 t1 += OD_DCT_RSHIFT(tp, 1); \
2680 tp -= t1; \
2681 tu -= OD_DCT_RSHIFT(t6, 1); \
2682 t6 += tu; \
2683 ta -= OD_DCT_RSHIFT(td, 1); \
2684 td += ta; \
2685 tl += OD_DCT_RSHIFT(ti, 1); \
2686 ti -= tl; \
2687 t5 += OD_DCT_RSHIFT(tt, 1); \
2688 tt -= t5; \
2689 tq += OD_DCT_RSHIFT(t2, 1); \
2690 t2 -= tq; \
2691 \
2692 t8 -= tgh; \
2693 tg += t8; \
2694 tn += tfh; \
2695 tf -= tn; \
2696 t7 -= tvh; \
2697 tv += t7; \
2698 to -= t0h; \
2699 t0 += to; \
2700 tc += tbh; \
2701 tb -= tc; \
2702 tj += tkh; \
2703 tk -= tj; \
2704 ts += t4h; \
2705 t4 -= ts; \
2706 t3 += trh; \
2707 tr -= t3; \
2708 \
2709 tk = -tk; \
2710 \
2711 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2712 tc -= (tj*2485 + 4096) >> 13; \
2713 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
2714 tj += (tc*18205 + 16384) >> 15; \
2715 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2716 tc -= (tj*2485 + 4096) >> 13; \
2717 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2718 ts -= (t3*3227 + 16384) >> 15; \
2719 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
2720 t3 += (ts*6393 + 16384) >> 15; \
2721 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2722 ts -= (t3*3227 + 16384) >> 15; \
2723 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2724 tk -= (tb*17515 + 16384) >> 15; \
2725 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
2726 tb += (tk*13623 + 8192) >> 14; \
2727 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2728 tk -= (tb*17515 + 16384) >> 15; \
2729 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2730 t4 -= (tr*6723 + 4096) >> 13; \
2731 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
2732 tr += (t4*16069 + 8192) >> 14; \
2733 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2734 t4 -= (tr*6723 + 4096) >> 13; \
2735 \
2736 t4 = -t4; \
2737 \
2738 tp += tm; \
2739 tm -= OD_DCT_RSHIFT(tp, 1); \
2740 t9 -= t6; \
2741 t6 += OD_DCT_RSHIFT(t9, 1); \
2742 th -= t1; \
2743 t1 += OD_DCT_RSHIFT(th, 1); \
2744 tu -= te; \
2745 te += OD_DCT_RSHIFT(tu, 1); /* pass */ \
2746 t5 -= tl; \
2747 tl += OD_DCT_RSHIFT(t5, 1); \
2748 ta += tq; \
2749 tq -= OD_DCT_RSHIFT(ta, 1); \
2750 td += tt; \
2751 tt -= OD_DCT_RSHIFT(td, 1); \
2752 t2 -= ti; \
2753 ti += OD_DCT_RSHIFT(t2, 1); /* pass */ \
2754 t7 += t8; \
2755 t8 -= OD_DCT_RSHIFT(t7, 1); \
2756 tn -= to; \
2757 to += OD_DCT_RSHIFT(tn, 1); \
2758 tf -= tv; \
2759 tv += OD_DCT_RSHIFT(tf, 1); \
2760 t0 += tg; \
2761 tg -= OD_DCT_RSHIFT(t0, 1); /* pass */ \
2762 tj -= t3; \
2763 t3 += OD_DCT_RSHIFT(tj, 1); /* pass */ \
2764 ts -= tc; \
2765 tc += OD_DCT_RSHIFT(ts, 1); \
2766 t4 -= tb; \
2767 tb += OD_DCT_RSHIFT(t4, 1); /* pass */ \
2768 tk -= tr; \
2769 tr += OD_DCT_RSHIFT(tk, 1); \
2770 \
2771 t1 = -t1; \
2772 t3 = -t3; \
2773 t7 = -t7; \
2774 t8 = -t8; \
2775 tg = -tg; \
2776 tm = -tm; \
2777 to = -to; \
2778 \
2779 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2780 tm -= (t9*14341 + 8192) >> 14; \
2781 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2782 t9 += (tm*15137 + 8192) >> 14; \
2783 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2784 tm -= (t9*4161 + 8192) >> 14; \
2785 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2786 tp -= (t6*4161 + 8192) >> 14; \
2787 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2788 t6 += (tp*15137 + 8192) >> 14; \
2789 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2790 tp -= (t6*28681 + 16384) >> 15; \
2791 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2792 th += (te*19195 + 16384) >> 15; \
2793 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2794 te += (th*11585 + 8192) >> 14; \
2795 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2796 th -= (te*29957 + 16384) >> 15; \
2797 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2798 tq -= (t5*14341 + 8192) >> 14; \
2799 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2800 t5 += (tq*15137 + 8192) >> 14; \
2801 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2802 tq -= (t5*4161 + 8192) >> 14; \
2803 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
2804 ta -= (tl*3259 + 4096) >> 13; \
2805 /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
2806 tl += (ta*3135 + 8192) >> 14; \
2807 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
2808 ta -= (tl*3259 + 4096) >> 13; \
2809 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2810 ti -= (td*7489 + 4096) >> 13; \
2811 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2812 td += (ti*11585 + 8192) >> 14; \
2813 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2814 ti += (td*19195 + 16384) >> 15; \
2815 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2816 to -= (t7*14341 + 8192) >> 14; \
2817 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2818 t7 += (to*15137 + 8192) >> 14; \
2819 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2820 to -= (t7*4161 + 8192) >> 14; \
2821 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2822 tn -= (t8*4161 + 8192) >> 14; \
2823 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2824 t8 += (tn*15137 + 8192) >> 14; \
2825 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2826 tn -= (t8*28681 + 16384) >> 15; \
2827 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2828 tf += (tg*19195 + 16384) >> 15; \
2829 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2830 tg += (tf*11585 + 8192) >> 14; \
2831 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2832 tf -= (tg*29957 + 16384) >> 15; \
2833 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2834 tj += (tc*19195 + 16384) >> 15; \
2835 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2836 tc += (tj*11585 + 8192) >> 14; \
2837 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2838 tj -= (tc*29957 + 16384) >> 15; \
2839 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
2840 tk += (tb*13573 + 8192) >> 14; \
2841 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
2842 tb -= (tk*11585 + 16384) >> 15; \
2843 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
2844 tk += (tb*13573 + 8192) >> 14; \
2845 \
2846 tf = -tf; \
2847 \
2848 } \
2849 while (0)
2850
2851#define OD_FDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
2852 us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
2853 ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
2854 ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
2855 /* Embedded 64-point orthonormal Type-II fDCT. */ \
2856 do { \
2857 int uwh; \
2858 int uxh; \
2859 int uyh; \
2860 int uzh; \
2861 int uAh; \
2862 int uBh; \
2863 int uCh; \
2864 int uDh; \
2865 int uEh; \
2866 int uFh; \
2867 int uGh; \
2868 int uHh; \
2869 int uIh; \
2870 int uJh; \
2871 int uKh; \
2872 int uLh; \
2873 int uMh; \
2874 int uNh; \
2875 int uOh; \
2876 int uPh; \
2877 int uQh; \
2878 int uRh; \
2879 int uSh; \
2880 int uTh; \
2881 int uUh; \
2882 int uVh; \
2883 int uWh; \
2884 int uXh; \
2885 int uYh; \
2886 int uZh; \
2887 int u_h; \
2888 int uh_; \
2889 u = u0 - u; \
2890 uh_ = OD_DCT_RSHIFT(u, 1); \
2891 u0 -= uh_; \
2892 u_ += u1; \
2893 u_h = OD_DCT_RSHIFT(u_, 1); \
2894 u1 = u_h - u1; \
2895 uZ = u2 - uZ; \
2896 uZh = OD_DCT_RSHIFT(uZ, 1); \
2897 u2 -= uZh; \
2898 uY += u3; \
2899 uYh = OD_DCT_RSHIFT(uY, 1); \
2900 u3 = uYh - u3; \
2901 uX = u4 - uX; \
2902 uXh = OD_DCT_RSHIFT(uX, 1); \
2903 u4 -= uXh; \
2904 uW += u5; \
2905 uWh = OD_DCT_RSHIFT(uW, 1); \
2906 u5 = uWh - u5; \
2907 uV = u6 - uV; \
2908 uVh = OD_DCT_RSHIFT(uV, 1); \
2909 u6 -= uVh; \
2910 uU += u7; \
2911 uUh = OD_DCT_RSHIFT(uU, 1); \
2912 u7 = uUh - u7; \
2913 uT = u8 - uT; \
2914 uTh = OD_DCT_RSHIFT(uT, 1); \
2915 u8 -= uTh; \
2916 uS += u9; \
2917 uSh = OD_DCT_RSHIFT(uS, 1); \
2918 u9 = uSh - u9; \
2919 uR = ua - uR; \
2920 uRh = OD_DCT_RSHIFT(uR, 1); \
2921 ua -= uRh; \
2922 uQ += ub; \
2923 uQh = OD_DCT_RSHIFT(uQ, 1); \
2924 ub = uQh - ub; \
2925 uP = uc - uP; \
2926 uPh = OD_DCT_RSHIFT(uP, 1); \
2927 uc -= uPh; \
2928 uO += ud; \
2929 uOh = OD_DCT_RSHIFT(uO, 1); \
2930 ud = uOh - ud; \
2931 uN = ue - uN; \
2932 uNh = OD_DCT_RSHIFT(uN, 1); \
2933 ue -= uNh; \
2934 uM += uf; \
2935 uMh = OD_DCT_RSHIFT(uM, 1); \
2936 uf = uMh - uf; \
2937 uL = ug - uL; \
2938 uLh = OD_DCT_RSHIFT(uL, 1); \
2939 ug -= uLh; \
2940 uK += uh; \
2941 uKh = OD_DCT_RSHIFT(uK, 1); \
2942 uh = uKh - uh; \
2943 uJ = ui - uJ; \
2944 uJh = OD_DCT_RSHIFT(uJ, 1); \
2945 ui -= uJh; \
2946 uI += uj; \
2947 uIh = OD_DCT_RSHIFT(uI, 1); \
2948 uj = uIh - uj; \
2949 uH = uk - uH; \
2950 uHh = OD_DCT_RSHIFT(uH, 1); \
2951 uk -= uHh; \
2952 uG += ul; \
2953 uGh = OD_DCT_RSHIFT(uG, 1); \
2954 ul = uGh - ul; \
2955 uF = um - uF; \
2956 uFh = OD_DCT_RSHIFT(uF, 1); \
2957 um -= uFh; \
2958 uE += un; \
2959 uEh = OD_DCT_RSHIFT(uE, 1); \
2960 un = uEh - un; \
2961 uD = uo - uD; \
2962 uDh = OD_DCT_RSHIFT(uD, 1); \
2963 uo -= uDh; \
2964 uC += up; \
2965 uCh = OD_DCT_RSHIFT(uC, 1); \
2966 up = uCh - up; \
2967 uB = uq - uB; \
2968 uBh = OD_DCT_RSHIFT(uB, 1); \
2969 uq -= uBh; \
2970 uA += ur; \
2971 uAh = OD_DCT_RSHIFT(uA, 1); \
2972 ur = uAh - ur; \
2973 uz = us - uz; \
2974 uzh = OD_DCT_RSHIFT(uz, 1); \
2975 us -= uzh; \
2976 uy += ut; \
2977 uyh = OD_DCT_RSHIFT(uy, 1); \
2978 ut = uyh - ut; \
2979 ux = uu - ux; \
2980 uxh = OD_DCT_RSHIFT(ux, 1); \
2981 uu -= uxh; \
2982 uw += uv; \
2983 uwh = OD_DCT_RSHIFT(uw, 1); \
2984 uv = uwh - uv; \
2985 OD_FDCT_32_ASYM(u0, uw, uwh, ug, uM, uMh, u8, uE, uEh, uo, uU, uUh, \
2986 u4, uA, uAh, uk, uQ, uQh, uc, uI, uIh, us, uY, uYh, u2, uy, uyh, \
2987 ui, uO, uOh, ua, uG, uGh, uq, uW, uWh, u6, uC, uCh, um, uS, uSh, \
2988 ue, uK, uKh, uu, u_, u_h); \
2989 OD_FDST_32_ASYM(u, uv, uL, uf, uT, un, uD, u7, uX, ur, uH, ub, uP, uj, \
2990 uz, u3, uZ, ut, uJ, ud, uR, ul, uB, u5, uV, up, uF, u9, uN, uh, ux, u1); \
2991 } \
2992 while (0)
2993
2994#define OD_IDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
2995 us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
2996 ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
2997 ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
2998 /* Embedded 64-point orthonormal Type-II fDCT. */ \
2999 do { \
3000 int u1h; \
3001 int u3h; \
3002 int u5h; \
3003 int u7h; \
3004 int u9h; \
3005 int ubh; \
3006 int udh; \
3007 int ufh; \
3008 int uhh; \
3009 int ujh; \
3010 int ulh; \
3011 int unh; \
3012 int uph; \
3013 int urh; \
3014 int uth; \
3015 int uvh; \
3016 int uxh; \
3017 int uzh; \
3018 int uBh; \
3019 int uDh; \
3020 int uFh; \
3021 int uHh; \
3022 int uJh; \
3023 int uLh; \
3024 int uNh; \
3025 int uPh; \
3026 int uRh; \
3027 int uTh; \
3028 int uVh; \
3029 int uXh; \
3030 int uZh; \
3031 int uh_; \
3032 OD_IDST_32_ASYM(u, uL, uT, uD, uX, uH, uP, uz, uZ, uJ, uR, uB, uV, uF, \
3033 uN, ux, u_, uK, uS, uC, uW, uG, uO, uy, uY, uI, uQ, uA, uU, uE, uM, uw); \
3034 OD_IDCT_32_ASYM(u0, ug, u8, uo, u4, uk, uc, us, u2, ui, ua, uq, u6, um, \
3035 ue, uu, u1, u1h, uh, uhh, u9, u9h, up, uph, u5, u5h, ul, ulh, ud, udh, \
3036 ut, uth, u3, u3h, uj, ujh, ub, ubh, ur, urh, u7, u7h, un, unh, uf, ufh, \
3037 uv, uvh); \
3038 uh_ = OD_DCT_RSHIFT(u, 1); \
3039 u0 += uh_; \
3040 u = u0 - u; \
3041 u_ = u1h - u_; \
3042 u1 -= u_; \
3043 uZh = OD_DCT_RSHIFT(uZ, 1); \
3044 u2 += uZh; \
3045 uZ = u2 - uZ; \
3046 uY = u3h - uY; \
3047 u3 -= uY; \
3048 uXh = OD_DCT_RSHIFT(uX, 1); \
3049 u4 += uXh; \
3050 uX = u4 - uX; \
3051 uW = u5h - uW; \
3052 u5 -= uW; \
3053 uVh = OD_DCT_RSHIFT(uV, 1); \
3054 u6 += uVh; \
3055 uV = u6 - uV; \
3056 uU = u7h - uU; \
3057 u7 -= uU; \
3058 uTh = OD_DCT_RSHIFT(uT, 1); \
3059 u8 += uTh; \
3060 uT = u8 - uT; \
3061 uS = u9h - uS; \
3062 u9 -= uS; \
3063 uRh = OD_DCT_RSHIFT(uR, 1); \
3064 ua += uRh; \
3065 uR = ua - uR; \
3066 uQ = ubh - uQ; \
3067 ub -= uQ; \
3068 uPh = OD_DCT_RSHIFT(uP, 1); \
3069 uc += uPh; \
3070 uP = uc - uP; \
3071 uO = udh - uO; \
3072 ud -= uO; \
3073 uNh = OD_DCT_RSHIFT(uN, 1); \
3074 ue += uNh; \
3075 uN = ue - uN; \
3076 uM = ufh - uM; \
3077 uf -= uM; \
3078 uLh = OD_DCT_RSHIFT(uL, 1); \
3079 ug += uLh; \
3080 uL = ug - uL; \
3081 uK = uhh - uK; \
3082 uh -= uK; \
3083 uJh = OD_DCT_RSHIFT(uJ, 1); \
3084 ui += uJh; \
3085 uJ = ui - uJ; \
3086 uI = ujh - uI; \
3087 uj -= uI; \
3088 uHh = OD_DCT_RSHIFT(uH, 1); \
3089 uk += uHh; \
3090 uH = uk - uH; \
3091 uG = ulh - uG; \
3092 ul -= uG; \
3093 uFh = OD_DCT_RSHIFT(uF, 1); \
3094 um += uFh; \
3095 uF = um - uF; \
3096 uE = unh - uE; \
3097 un -= uE; \
3098 uDh = OD_DCT_RSHIFT(uD, 1); \
3099 uo += uDh; \
3100 uD = uo - uD; \
3101 uC = uph - uC; \
3102 up -= uC; \
3103 uBh = OD_DCT_RSHIFT(uB, 1); \
3104 uq += uBh; \
3105 uB = uq - uB; \
3106 uA = urh - uA; \
3107 ur -= uA; \
3108 uzh = OD_DCT_RSHIFT(uz, 1); \
3109 us += uzh; \
3110 uz = us - uz; \
3111 uy = uth - uy; \
3112 ut -= uy; \
3113 uxh = OD_DCT_RSHIFT(ux, 1); \
3114 uu += uxh; \
3115 ux = uu - ux; \
3116 uw = uvh - uw; \
3117 uv -= uw; \
3118 } while (0)
3119#endif
3120
Monty Montgomery02078a32017-07-11 21:22:29 -04003121void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) {
3122 int q0;
3123 int q1;
3124 int q2;
3125 int q3;
3126 q0 = x[0*xstride];
3127 q2 = x[1*xstride];
3128 q1 = x[2*xstride];
3129 q3 = x[3*xstride];
3130 OD_FDCT_4(q0, q2, q1, q3);
3131 y[0] = (od_coeff)q0;
3132 y[1] = (od_coeff)q1;
3133 y[2] = (od_coeff)q2;
3134 y[3] = (od_coeff)q3;
3135}
3136
3137void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) {
3138 int q0;
3139 int q1;
3140 int q2;
3141 int q3;
3142 q0 = y[0];
3143 q2 = y[1];
3144 q1 = y[2];
3145 q3 = y[3];
3146 OD_IDCT_4(q0, q2, q1, q3);
3147 x[0*xstride] = q0;
3148 x[1*xstride] = q1;
3149 x[2*xstride] = q2;
3150 x[3*xstride] = q3;
3151}
Monty Montgomerycf18fe42017-07-11 21:33:25 -04003152
3153void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride) {
3154 int r0;
3155 int r1;
3156 int r2;
3157 int r3;
3158 int r4;
3159 int r5;
3160 int r6;
3161 int r7;
3162 r0 = x[0*xstride];
3163 r4 = x[1*xstride];
3164 r2 = x[2*xstride];
3165 r6 = x[3*xstride];
3166 r1 = x[4*xstride];
3167 r5 = x[5*xstride];
3168 r3 = x[6*xstride];
3169 r7 = x[7*xstride];
3170 OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7);
3171 y[0] = (od_coeff)r0;
3172 y[1] = (od_coeff)r1;
3173 y[2] = (od_coeff)r2;
3174 y[3] = (od_coeff)r3;
3175 y[4] = (od_coeff)r4;
3176 y[5] = (od_coeff)r5;
3177 y[6] = (od_coeff)r6;
3178 y[7] = (od_coeff)r7;
3179}
3180
3181void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]) {
3182 int r0;
3183 int r1;
3184 int r2;
3185 int r3;
3186 int r4;
3187 int r5;
3188 int r6;
3189 int r7;
3190 r0 = y[0];
3191 r4 = y[1];
3192 r2 = y[2];
3193 r6 = y[3];
3194 r1 = y[4];
3195 r5 = y[5];
3196 r3 = y[6];
3197 r7 = y[7];
3198 OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7);
3199 x[0*xstride] = (od_coeff)r0;
3200 x[1*xstride] = (od_coeff)r1;
3201 x[2*xstride] = (od_coeff)r2;
3202 x[3*xstride] = (od_coeff)r3;
3203 x[4*xstride] = (od_coeff)r4;
3204 x[5*xstride] = (od_coeff)r5;
3205 x[6*xstride] = (od_coeff)r6;
3206 x[7*xstride] = (od_coeff)r7;
3207}
3208
3209void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride) {
3210 int r0;
3211 int r1;
3212 int r2;
3213 int r3;
3214 int r4;
3215 int r5;
3216 int r6;
3217 int r7;
3218 r0 = x[0*xstride];
3219 r4 = x[1*xstride];
3220 r2 = x[2*xstride];
3221 r6 = x[3*xstride];
3222 r1 = x[4*xstride];
3223 r5 = x[5*xstride];
3224 r3 = x[6*xstride];
3225 r7 = x[7*xstride];
3226 OD_FDST_8(r0, r4, r2, r6, r1, r5, r3, r7);
3227 y[0] = (od_coeff)r0;
3228 y[1] = (od_coeff)r1;
3229 y[2] = (od_coeff)r2;
3230 y[3] = (od_coeff)r3;
3231 y[4] = (od_coeff)r4;
3232 y[5] = (od_coeff)r5;
3233 y[6] = (od_coeff)r6;
3234 y[7] = (od_coeff)r7;
3235}
3236
3237void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]) {
3238 int r0;
3239 int r1;
3240 int r2;
3241 int r3;
3242 int r4;
3243 int r5;
3244 int r6;
3245 int r7;
3246 r0 = y[0];
3247 r4 = y[1];
3248 r2 = y[2];
3249 r6 = y[3];
3250 r1 = y[4];
3251 r5 = y[5];
3252 r3 = y[6];
3253 r7 = y[7];
3254 OD_IDST_8(r0, r4, r2, r6, r1, r5, r3, r7);
3255 x[0*xstride] = (od_coeff)r0;
3256 x[1*xstride] = (od_coeff)r1;
3257 x[2*xstride] = (od_coeff)r2;
3258 x[3*xstride] = (od_coeff)r3;
3259 x[4*xstride] = (od_coeff)r4;
3260 x[5*xstride] = (od_coeff)r5;
3261 x[6*xstride] = (od_coeff)r6;
3262 x[7*xstride] = (od_coeff)r7;
3263}
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04003264
3265void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride) {
3266 int s0;
3267 int s1;
3268 int s2;
3269 int s3;
3270 int s4;
3271 int s5;
3272 int s6;
3273 int s7;
3274 int s8;
3275 int s9;
3276 int sa;
3277 int sb;
3278 int sc;
3279 int sd;
3280 int se;
3281 int sf;
3282 s0 = x[0*xstride];
3283 s8 = x[1*xstride];
3284 s4 = x[2*xstride];
3285 sc = x[3*xstride];
3286 s2 = x[4*xstride];
3287 sa = x[5*xstride];
3288 s6 = x[6*xstride];
3289 se = x[7*xstride];
3290 s1 = x[8*xstride];
3291 s9 = x[9*xstride];
3292 s5 = x[10*xstride];
3293 sd = x[11*xstride];
3294 s3 = x[12*xstride];
3295 sb = x[13*xstride];
3296 s7 = x[14*xstride];
3297 sf = x[15*xstride];
3298 OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3299 y[0] = (od_coeff)s0;
3300 y[1] = (od_coeff)s1;
3301 y[2] = (od_coeff)s2;
3302 y[3] = (od_coeff)s3;
3303 y[4] = (od_coeff)s4;
3304 y[5] = (od_coeff)s5;
3305 y[6] = (od_coeff)s6;
3306 y[7] = (od_coeff)s7;
3307 y[8] = (od_coeff)s8;
3308 y[9] = (od_coeff)s9;
3309 y[10] = (od_coeff)sa;
3310 y[11] = (od_coeff)sb;
3311 y[12] = (od_coeff)sc;
3312 y[13] = (od_coeff)sd;
3313 y[14] = (od_coeff)se;
3314 y[15] = (od_coeff)sf;
3315}
3316
3317void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]) {
3318 int s0;
3319 int s1;
3320 int s2;
3321 int s3;
3322 int s4;
3323 int s5;
3324 int s6;
3325 int s7;
3326 int s8;
3327 int s9;
3328 int sa;
3329 int sb;
3330 int sc;
3331 int sd;
3332 int se;
3333 int sf;
3334 s0 = y[0];
3335 s8 = y[1];
3336 s4 = y[2];
3337 sc = y[3];
3338 s2 = y[4];
3339 sa = y[5];
3340 s6 = y[6];
3341 se = y[7];
3342 s1 = y[8];
3343 s9 = y[9];
3344 s5 = y[10];
3345 sd = y[11];
3346 s3 = y[12];
3347 sb = y[13];
3348 s7 = y[14];
3349 sf = y[15];
3350 OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3351 x[0*xstride] = (od_coeff)s0;
3352 x[1*xstride] = (od_coeff)s1;
3353 x[2*xstride] = (od_coeff)s2;
3354 x[3*xstride] = (od_coeff)s3;
3355 x[4*xstride] = (od_coeff)s4;
3356 x[5*xstride] = (od_coeff)s5;
3357 x[6*xstride] = (od_coeff)s6;
3358 x[7*xstride] = (od_coeff)s7;
3359 x[8*xstride] = (od_coeff)s8;
3360 x[9*xstride] = (od_coeff)s9;
3361 x[10*xstride] = (od_coeff)sa;
3362 x[11*xstride] = (od_coeff)sb;
3363 x[12*xstride] = (od_coeff)sc;
3364 x[13*xstride] = (od_coeff)sd;
3365 x[14*xstride] = (od_coeff)se;
3366 x[15*xstride] = (od_coeff)sf;
3367}
3368
3369void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride) {
3370 int s0;
3371 int s1;
3372 int s2;
3373 int s3;
3374 int s4;
3375 int s5;
3376 int s6;
3377 int s7;
3378 int s8;
3379 int s9;
3380 int sa;
3381 int sb;
3382 int sc;
3383 int sd;
3384 int se;
3385 int sf;
3386 s0 = x[15*xstride];
3387 s8 = x[14*xstride];
3388 s4 = x[13*xstride];
3389 sc = x[12*xstride];
3390 s2 = x[11*xstride];
3391 sa = x[10*xstride];
3392 s6 = x[9*xstride];
3393 se = x[8*xstride];
3394 s1 = x[7*xstride];
3395 s9 = x[6*xstride];
3396 s5 = x[5*xstride];
3397 sd = x[4*xstride];
3398 s3 = x[3*xstride];
3399 sb = x[2*xstride];
3400 s7 = x[1*xstride];
3401 sf = x[0*xstride];
3402 OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3403 y[0] = (od_coeff)sf;
3404 y[1] = (od_coeff)se;
3405 y[2] = (od_coeff)sd;
3406 y[3] = (od_coeff)sc;
3407 y[4] = (od_coeff)sb;
3408 y[5] = (od_coeff)sa;
3409 y[6] = (od_coeff)s9;
3410 y[7] = (od_coeff)s8;
3411 y[8] = (od_coeff)s7;
3412 y[9] = (od_coeff)s6;
3413 y[10] = (od_coeff)s5;
3414 y[11] = (od_coeff)s4;
3415 y[12] = (od_coeff)s3;
3416 y[13] = (od_coeff)s2;
3417 y[14] = (od_coeff)s1;
3418 y[15] = (od_coeff)s0;
3419}
3420
3421void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]) {
3422 int s0;
3423 int s1;
3424 int s2;
3425 int s3;
3426 int s4;
3427 int s5;
3428 int s6;
3429 int s7;
3430 int s8;
3431 int s9;
3432 int sa;
3433 int sb;
3434 int sc;
3435 int sd;
3436 int se;
3437 int sf;
3438 s0 = y[15];
3439 s8 = y[14];
3440 s4 = y[13];
3441 sc = y[12];
3442 s2 = y[11];
3443 sa = y[10];
3444 s6 = y[9];
3445 se = y[8];
3446 s1 = y[7];
3447 s9 = y[6];
3448 s5 = y[5];
3449 sd = y[4];
3450 s3 = y[3];
3451 sb = y[2];
3452 s7 = y[1];
3453 sf = y[0];
3454 OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
3455 x[0*xstride] = (od_coeff)sf;
3456 x[1*xstride] = (od_coeff)se;
3457 x[2*xstride] = (od_coeff)sd;
3458 x[3*xstride] = (od_coeff)sc;
3459 x[4*xstride] = (od_coeff)sb;
3460 x[5*xstride] = (od_coeff)sa;
3461 x[6*xstride] = (od_coeff)s9;
3462 x[7*xstride] = (od_coeff)s8;
3463 x[8*xstride] = (od_coeff)s7;
3464 x[9*xstride] = (od_coeff)s6;
3465 x[10*xstride] = (od_coeff)s5;
3466 x[11*xstride] = (od_coeff)s4;
3467 x[12*xstride] = (od_coeff)s3;
3468 x[13*xstride] = (od_coeff)s2;
3469 x[14*xstride] = (od_coeff)s1;
3470 x[15*xstride] = (od_coeff)s0;
3471}
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04003472
3473void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride) {
3474 /*215 adds, 38 shifts, 87 "muls".*/
3475 int t0;
3476 int t1;
3477 int t2;
3478 int t3;
3479 int t4;
3480 int t5;
3481 int t6;
3482 int t7;
3483 int t8;
3484 int t9;
3485 int ta;
3486 int tb;
3487 int tc;
3488 int td;
3489 int te;
3490 int tf;
3491 int tg;
3492 int th;
3493 int ti;
3494 int tj;
3495 int tk;
3496 int tl;
3497 int tm;
3498 int tn;
3499 int to;
3500 int tp;
3501 int tq;
3502 int tr;
3503 int ts;
3504 int tt;
3505 int tu;
3506 int tv;
3507 t0 = x[0*xstride];
3508 tg = x[1*xstride];
3509 t8 = x[2*xstride];
3510 to = x[3*xstride];
3511 t4 = x[4*xstride];
3512 tk = x[5*xstride];
3513 tc = x[6*xstride];
3514 ts = x[7*xstride];
3515 t2 = x[8*xstride];
3516 ti = x[9*xstride];
3517 ta = x[10*xstride];
3518 tq = x[11*xstride];
3519 t6 = x[12*xstride];
3520 tm = x[13*xstride];
3521 te = x[14*xstride];
3522 tu = x[15*xstride];
3523 t1 = x[16*xstride];
3524 th = x[17*xstride];
3525 t9 = x[18*xstride];
3526 tp = x[19*xstride];
3527 t5 = x[20*xstride];
3528 tl = x[21*xstride];
3529 td = x[22*xstride];
3530 tt = x[23*xstride];
3531 t3 = x[24*xstride];
3532 tj = x[25*xstride];
3533 tb = x[26*xstride];
3534 tr = x[27*xstride];
3535 t7 = x[28*xstride];
3536 tn = x[29*xstride];
3537 tf = x[30*xstride];
3538 tv = x[31*xstride];
3539 OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
3540 t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
3541 y[0] = (od_coeff)t0;
3542 y[1] = (od_coeff)t1;
3543 y[2] = (od_coeff)t2;
3544 y[3] = (od_coeff)t3;
3545 y[4] = (od_coeff)t4;
3546 y[5] = (od_coeff)t5;
3547 y[6] = (od_coeff)t6;
3548 y[7] = (od_coeff)t7;
3549 y[8] = (od_coeff)t8;
3550 y[9] = (od_coeff)t9;
3551 y[10] = (od_coeff)ta;
3552 y[11] = (od_coeff)tb;
3553 y[12] = (od_coeff)tc;
3554 y[13] = (od_coeff)td;
3555 y[14] = (od_coeff)te;
3556 y[15] = (od_coeff)tf;
3557 y[16] = (od_coeff)tg;
3558 y[17] = (od_coeff)th;
3559 y[18] = (od_coeff)ti;
3560 y[19] = (od_coeff)tj;
3561 y[20] = (od_coeff)tk;
3562 y[21] = (od_coeff)tl;
3563 y[22] = (od_coeff)tm;
3564 y[23] = (od_coeff)tn;
3565 y[24] = (od_coeff)to;
3566 y[25] = (od_coeff)tp;
3567 y[26] = (od_coeff)tq;
3568 y[27] = (od_coeff)tr;
3569 y[28] = (od_coeff)ts;
3570 y[29] = (od_coeff)tt;
3571 y[30] = (od_coeff)tu;
3572 y[31] = (od_coeff)tv;
3573}
3574
3575void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]) {
3576 int t0;
3577 int t1;
3578 int t2;
3579 int t3;
3580 int t4;
3581 int t5;
3582 int t6;
3583 int t7;
3584 int t8;
3585 int t9;
3586 int ta;
3587 int tb;
3588 int tc;
3589 int td;
3590 int te;
3591 int tf;
3592 int tg;
3593 int th;
3594 int ti;
3595 int tj;
3596 int tk;
3597 int tl;
3598 int tm;
3599 int tn;
3600 int to;
3601 int tp;
3602 int tq;
3603 int tr;
3604 int ts;
3605 int tt;
3606 int tu;
3607 int tv;
3608 t0 = y[0];
3609 tg = y[1];
3610 t8 = y[2];
3611 to = y[3];
3612 t4 = y[4];
3613 tk = y[5];
3614 tc = y[6];
3615 ts = y[7];
3616 t2 = y[8];
3617 ti = y[9];
3618 ta = y[10];
3619 tq = y[11];
3620 t6 = y[12];
3621 tm = y[13];
3622 te = y[14];
3623 tu = y[15];
3624 t1 = y[16];
3625 th = y[17];
3626 t9 = y[18];
3627 tp = y[19];
3628 t5 = y[20];
3629 tl = y[21];
3630 td = y[22];
3631 tt = y[23];
3632 t3 = y[24];
3633 tj = y[25];
3634 tb = y[26];
3635 tr = y[27];
3636 t7 = y[28];
3637 tn = y[29];
3638 tf = y[30];
3639 tv = y[31];
3640 OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
3641 t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
3642 x[0*xstride] = (od_coeff)t0;
3643 x[1*xstride] = (od_coeff)t1;
3644 x[2*xstride] = (od_coeff)t2;
3645 x[3*xstride] = (od_coeff)t3;
3646 x[4*xstride] = (od_coeff)t4;
3647 x[5*xstride] = (od_coeff)t5;
3648 x[6*xstride] = (od_coeff)t6;
3649 x[7*xstride] = (od_coeff)t7;
3650 x[8*xstride] = (od_coeff)t8;
3651 x[9*xstride] = (od_coeff)t9;
3652 x[10*xstride] = (od_coeff)ta;
3653 x[11*xstride] = (od_coeff)tb;
3654 x[12*xstride] = (od_coeff)tc;
3655 x[13*xstride] = (od_coeff)td;
3656 x[14*xstride] = (od_coeff)te;
3657 x[15*xstride] = (od_coeff)tf;
3658 x[16*xstride] = (od_coeff)tg;
3659 x[17*xstride] = (od_coeff)th;
3660 x[18*xstride] = (od_coeff)ti;
3661 x[19*xstride] = (od_coeff)tj;
3662 x[20*xstride] = (od_coeff)tk;
3663 x[21*xstride] = (od_coeff)tl;
3664 x[22*xstride] = (od_coeff)tm;
3665 x[23*xstride] = (od_coeff)tn;
3666 x[24*xstride] = (od_coeff)to;
3667 x[25*xstride] = (od_coeff)tp;
3668 x[26*xstride] = (od_coeff)tq;
3669 x[27*xstride] = (od_coeff)tr;
3670 x[28*xstride] = (od_coeff)ts;
3671 x[29*xstride] = (od_coeff)tt;
3672 x[30*xstride] = (od_coeff)tu;
3673 x[31*xstride] = (od_coeff)tv;
3674}
Monty Montgomerya4e245a2017-07-22 00:48:31 -04003675
3676#if CONFIG_TX64X64
3677void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride) {
3678 int t0;
3679 int t1;
3680 int t2;
3681 int t3;
3682 int t4;
3683 int t5;
3684 int t6;
3685 int t7;
3686 int t8;
3687 int t9;
3688 int ta;
3689 int tb;
3690 int tc;
3691 int td;
3692 int te;
3693 int tf;
3694 int tg;
3695 int th;
3696 int ti;
3697 int tj;
3698 int tk;
3699 int tl;
3700 int tm;
3701 int tn;
3702 int to;
3703 int tp;
3704 int tq;
3705 int tr;
3706 int ts;
3707 int tt;
3708 int tu;
3709 int tv;
3710 int tw;
3711 int tx;
3712 int ty;
3713 int tz;
3714 int tA;
3715 int tB;
3716 int tC;
3717 int tD;
3718 int tE;
3719 int tF;
3720 int tG;
3721 int tH;
3722 int tI;
3723 int tJ;
3724 int tK;
3725 int tL;
3726 int tM;
3727 int tN;
3728 int tO;
3729 int tP;
3730 int tQ;
3731 int tR;
3732 int tS;
3733 int tT;
3734 int tU;
3735 int tV;
3736 int tW;
3737 int tX;
3738 int tY;
3739 int tZ;
3740 int t_;
3741 int t;
3742 t0 = x[0*xstride];
3743 tw = x[1*xstride];
3744 tg = x[2*xstride];
3745 tM = x[3*xstride];
3746 t8 = x[4*xstride];
3747 tE = x[5*xstride];
3748 to = x[6*xstride];
3749 tU = x[7*xstride];
3750 t4 = x[8*xstride];
3751 tA = x[9*xstride];
3752 tk = x[10*xstride];
3753 tQ = x[11*xstride];
3754 tc = x[12*xstride];
3755 tI = x[13*xstride];
3756 ts = x[14*xstride];
3757 tY = x[15*xstride];
3758 t2 = x[16*xstride];
3759 ty = x[17*xstride];
3760 ti = x[18*xstride];
3761 tO = x[19*xstride];
3762 ta = x[20*xstride];
3763 tG = x[21*xstride];
3764 tq = x[22*xstride];
3765 tW = x[23*xstride];
3766 t6 = x[24*xstride];
3767 tC = x[25*xstride];
3768 tm = x[26*xstride];
3769 tS = x[27*xstride];
3770 te = x[28*xstride];
3771 tK = x[29*xstride];
3772 tu = x[30*xstride];
3773 t_ = x[31*xstride];
3774 t1 = x[32*xstride];
3775 tx = x[33*xstride];
3776 th = x[34*xstride];
3777 tN = x[35*xstride];
3778 t9 = x[36*xstride];
3779 tF = x[37*xstride];
3780 tp = x[38*xstride];
3781 tV = x[39*xstride];
3782 t5 = x[40*xstride];
3783 tB = x[41*xstride];
3784 tl = x[42*xstride];
3785 tR = x[43*xstride];
3786 td = x[44*xstride];
3787 tJ = x[45*xstride];
3788 tt = x[46*xstride];
3789 tZ = x[47*xstride];
3790 t3 = x[48*xstride];
3791 tz = x[49*xstride];
3792 tj = x[50*xstride];
3793 tP = x[51*xstride];
3794 tb = x[52*xstride];
3795 tH = x[53*xstride];
3796 tr = x[54*xstride];
3797 tX = x[55*xstride];
3798 t7 = x[56*xstride];
3799 tD = x[57*xstride];
3800 tn = x[58*xstride];
3801 tT = x[59*xstride];
3802 tf = x[60*xstride];
3803 tL = x[61*xstride];
3804 tv = x[62*xstride];
3805 t = x[63*xstride];
3806 OD_FDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
3807 t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
3808 th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
3809 tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
3810 y[0] = (od_coeff)t0;
3811 y[1] = (od_coeff)t1;
3812 y[2] = (od_coeff)t2;
3813 y[3] = (od_coeff)t3;
3814 y[4] = (od_coeff)t4;
3815 y[5] = (od_coeff)t5;
3816 y[6] = (od_coeff)t6;
3817 y[7] = (od_coeff)t7;
3818 y[8] = (od_coeff)t8;
3819 y[9] = (od_coeff)t9;
3820 y[10] = (od_coeff)ta;
3821 y[11] = (od_coeff)tb;
3822 y[12] = (od_coeff)tc;
3823 y[13] = (od_coeff)td;
3824 y[14] = (od_coeff)te;
3825 y[15] = (od_coeff)tf;
3826 y[16] = (od_coeff)tg;
3827 y[17] = (od_coeff)th;
3828 y[18] = (od_coeff)ti;
3829 y[19] = (od_coeff)tj;
3830 y[20] = (od_coeff)tk;
3831 y[21] = (od_coeff)tl;
3832 y[22] = (od_coeff)tm;
3833 y[23] = (od_coeff)tn;
3834 y[24] = (od_coeff)to;
3835 y[25] = (od_coeff)tp;
3836 y[26] = (od_coeff)tq;
3837 y[27] = (od_coeff)tr;
3838 y[28] = (od_coeff)ts;
3839 y[29] = (od_coeff)tt;
3840 y[30] = (od_coeff)tu;
3841 y[31] = (od_coeff)tv;
3842 y[32] = (od_coeff)tw;
3843 y[33] = (od_coeff)tx;
3844 y[34] = (od_coeff)ty;
3845 y[35] = (od_coeff)tz;
3846 y[36] = (od_coeff)tA;
3847 y[37] = (od_coeff)tB;
3848 y[38] = (od_coeff)tC;
3849 y[39] = (od_coeff)tD;
3850 y[40] = (od_coeff)tE;
3851 y[41] = (od_coeff)tF;
3852 y[41] = (od_coeff)tF;
3853 y[42] = (od_coeff)tG;
3854 y[43] = (od_coeff)tH;
3855 y[44] = (od_coeff)tI;
3856 y[45] = (od_coeff)tJ;
3857 y[46] = (od_coeff)tK;
3858 y[47] = (od_coeff)tL;
3859 y[48] = (od_coeff)tM;
3860 y[49] = (od_coeff)tN;
3861 y[50] = (od_coeff)tO;
3862 y[51] = (od_coeff)tP;
3863 y[52] = (od_coeff)tQ;
3864 y[53] = (od_coeff)tR;
3865 y[54] = (od_coeff)tS;
3866 y[55] = (od_coeff)tT;
3867 y[56] = (od_coeff)tU;
3868 y[57] = (od_coeff)tV;
3869 y[58] = (od_coeff)tW;
3870 y[59] = (od_coeff)tX;
3871 y[60] = (od_coeff)tY;
3872 y[61] = (od_coeff)tZ;
3873 y[62] = (od_coeff)t_;
3874 y[63] = (od_coeff)t;
3875}
3876
3877void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]) {
3878 int t0;
3879 int t1;
3880 int t2;
3881 int t3;
3882 int t4;
3883 int t5;
3884 int t6;
3885 int t7;
3886 int t8;
3887 int t9;
3888 int ta;
3889 int tb;
3890 int tc;
3891 int td;
3892 int te;
3893 int tf;
3894 int tg;
3895 int th;
3896 int ti;
3897 int tj;
3898 int tk;
3899 int tl;
3900 int tm;
3901 int tn;
3902 int to;
3903 int tp;
3904 int tq;
3905 int tr;
3906 int ts;
3907 int tt;
3908 int tu;
3909 int tv;
3910 int tw;
3911 int tx;
3912 int ty;
3913 int tz;
3914 int tA;
3915 int tB;
3916 int tC;
3917 int tD;
3918 int tE;
3919 int tF;
3920 int tG;
3921 int tH;
3922 int tI;
3923 int tJ;
3924 int tK;
3925 int tL;
3926 int tM;
3927 int tN;
3928 int tO;
3929 int tP;
3930 int tQ;
3931 int tR;
3932 int tS;
3933 int tT;
3934 int tU;
3935 int tV;
3936 int tW;
3937 int tX;
3938 int tY;
3939 int tZ;
3940 int t_;
3941 int t;
3942 t0 = y[0];
3943 tw = y[1];
3944 tg = y[2];
3945 tM = y[3];
3946 t8 = y[4];
3947 tE = y[5];
3948 to = y[6];
3949 tU = y[7];
3950 t4 = y[8];
3951 tA = y[9];
3952 tk = y[10];
3953 tQ = y[11];
3954 tc = y[12];
3955 tI = y[13];
3956 ts = y[14];
3957 tY = y[15];
3958 t2 = y[16];
3959 ty = y[17];
3960 ti = y[18];
3961 tO = y[19];
3962 ta = y[20];
3963 tG = y[21];
3964 tq = y[22];
3965 tW = y[23];
3966 t6 = y[24];
3967 tC = y[25];
3968 tm = y[26];
3969 tS = y[27];
3970 te = y[28];
3971 tK = y[29];
3972 tu = y[30];
3973 t_ = y[31];
3974 t1 = y[32];
3975 tx = y[33];
3976 th = y[34];
3977 tN = y[35];
3978 t9 = y[36];
3979 tF = y[37];
3980 tp = y[38];
3981 tV = y[39];
3982 t5 = y[40];
3983 tB = y[41];
3984 tl = y[42];
3985 tR = y[43];
3986 td = y[44];
3987 tJ = y[45];
3988 tt = y[46];
3989 tZ = y[47];
3990 t3 = y[48];
3991 tz = y[49];
3992 tj = y[50];
3993 tP = y[51];
3994 tb = y[52];
3995 tH = y[53];
3996 tr = y[54];
3997 tX = y[55];
3998 t7 = y[56];
3999 tD = y[57];
4000 tn = y[58];
4001 tT = y[59];
4002 tf = y[60];
4003 tL = y[61];
4004 tv = y[62];
4005 t = y[63];
4006 OD_IDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
4007 t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
4008 th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
4009 tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
4010 x[0*xstride] = (od_coeff)t0;
4011 x[1*xstride] = (od_coeff)t1;
4012 x[2*xstride] = (od_coeff)t2;
4013 x[3*xstride] = (od_coeff)t3;
4014 x[4*xstride] = (od_coeff)t4;
4015 x[5*xstride] = (od_coeff)t5;
4016 x[6*xstride] = (od_coeff)t6;
4017 x[7*xstride] = (od_coeff)t7;
4018 x[8*xstride] = (od_coeff)t8;
4019 x[9*xstride] = (od_coeff)t9;
4020 x[10*xstride] = (od_coeff)ta;
4021 x[11*xstride] = (od_coeff)tb;
4022 x[12*xstride] = (od_coeff)tc;
4023 x[13*xstride] = (od_coeff)td;
4024 x[14*xstride] = (od_coeff)te;
4025 x[15*xstride] = (od_coeff)tf;
4026 x[16*xstride] = (od_coeff)tg;
4027 x[17*xstride] = (od_coeff)th;
4028 x[18*xstride] = (od_coeff)ti;
4029 x[19*xstride] = (od_coeff)tj;
4030 x[20*xstride] = (od_coeff)tk;
4031 x[21*xstride] = (od_coeff)tl;
4032 x[22*xstride] = (od_coeff)tm;
4033 x[23*xstride] = (od_coeff)tn;
4034 x[24*xstride] = (od_coeff)to;
4035 x[25*xstride] = (od_coeff)tp;
4036 x[26*xstride] = (od_coeff)tq;
4037 x[27*xstride] = (od_coeff)tr;
4038 x[28*xstride] = (od_coeff)ts;
4039 x[29*xstride] = (od_coeff)tt;
4040 x[30*xstride] = (od_coeff)tu;
4041 x[31*xstride] = (od_coeff)tv;
4042 x[32*xstride] = (od_coeff)tw;
4043 x[33*xstride] = (od_coeff)tx;
4044 x[34*xstride] = (od_coeff)ty;
4045 x[35*xstride] = (od_coeff)tz;
4046 x[36*xstride] = (od_coeff)tA;
4047 x[37*xstride] = (od_coeff)tB;
4048 x[38*xstride] = (od_coeff)tC;
4049 x[39*xstride] = (od_coeff)tD;
4050 x[40*xstride] = (od_coeff)tE;
4051 x[41*xstride] = (od_coeff)tF;
4052 x[41*xstride] = (od_coeff)tF;
4053 x[42*xstride] = (od_coeff)tG;
4054 x[43*xstride] = (od_coeff)tH;
4055 x[44*xstride] = (od_coeff)tI;
4056 x[45*xstride] = (od_coeff)tJ;
4057 x[46*xstride] = (od_coeff)tK;
4058 x[47*xstride] = (od_coeff)tL;
4059 x[48*xstride] = (od_coeff)tM;
4060 x[49*xstride] = (od_coeff)tN;
4061 x[50*xstride] = (od_coeff)tO;
4062 x[51*xstride] = (od_coeff)tP;
4063 x[52*xstride] = (od_coeff)tQ;
4064 x[53*xstride] = (od_coeff)tR;
4065 x[54*xstride] = (od_coeff)tS;
4066 x[55*xstride] = (od_coeff)tT;
4067 x[56*xstride] = (od_coeff)tU;
4068 x[57*xstride] = (od_coeff)tV;
4069 x[58*xstride] = (od_coeff)tW;
4070 x[59*xstride] = (od_coeff)tX;
4071 x[60*xstride] = (od_coeff)tY;
4072 x[61*xstride] = (od_coeff)tZ;
4073 x[62*xstride] = (od_coeff)t_;
4074 x[63*xstride] = (od_coeff)t;
4075}
4076#endif