blob: 166f6f962ff4d79e366e1f428a1309a7c2f54826 [file] [log] [blame]
Monty Montgomery02078a32017-07-11 21:22:29 -04001#include "av1/common/daala_tx.h"
2#include "av1/common/odintrin.h"
3
4/* clang-format off */
5
6# define OD_DCT_RSHIFT(_a, _b) OD_UNBIASED_RSHIFT32(_a, _b)
7
8/* TODO: Daala DCT overflow checks need to be ported as a later test */
9# if defined(OD_DCT_CHECK_OVERFLOW)
10# else
11# define OD_DCT_OVERFLOW_CHECK(val, scale, offset, idx)
12# endif
13
Monty Montgomerycf18fe42017-07-11 21:33:25 -040014#define OD_FDCT_2(p0, p1) \
15 /* Embedded 2-point orthonormal Type-II fDCT. */ \
16 do { \
17 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
18 OD_DCT_OVERFLOW_CHECK(p1, 13573, 16384, 100); \
19 p0 -= (p1*13573 + 16384) >> 15; \
20 /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
21 OD_DCT_OVERFLOW_CHECK(p0, 5793, 4096, 101); \
22 p1 += (p0*5793 + 4096) >> 13; \
23 /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
24 OD_DCT_OVERFLOW_CHECK(p1, 3393, 4096, 102); \
25 p0 -= (p1*3393 + 4096) >> 13; \
26 } \
27 while (0)
28
29#define OD_IDCT_2(p0, p1) \
30 /* Embedded 2-point orthonormal Type-II iDCT. */ \
31 do { \
32 /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
33 p0 += (p1*3393 + 4096) >> 13; \
34 /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
35 p1 -= (p0*5793 + 4096) >> 13; \
36 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
37 p0 += (p1*13573 + 16384) >> 15; \
38 } \
39 while (0)
40
Monty Montgomery02078a32017-07-11 21:22:29 -040041#define OD_FDCT_2_ASYM(p0, p1, p1h) \
42 /* Embedded 2-point asymmetric Type-II fDCT. */ \
43 do { \
44 p0 += p1h; \
45 p1 = p0 - p1; \
46 } \
47 while (0)
48
49#define OD_IDCT_2_ASYM(p0, p1, p1h) \
50 /* Embedded 2-point asymmetric Type-II iDCT. */ \
51 do { \
52 p1 = p0 - p1; \
53 p1h = OD_DCT_RSHIFT(p1, 1); \
54 p0 -= p1h; \
55 } \
56 while (0)
57
Monty Montgomerycf18fe42017-07-11 21:33:25 -040058#define OD_FDST_2(p0, p1) \
59 /* Embedded 2-point orthonormal Type-IV fDST. */ \
60 do { \
61 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
62 OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 103); \
63 p0 -= (p1*10947 + 8192) >> 14; \
64 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
65 OD_DCT_OVERFLOW_CHECK(p0, 473, 256, 104); \
66 p1 += (p0*473 + 256) >> 9; \
67 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
68 OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 105); \
69 p0 -= (p1*10947 + 8192) >> 14; \
70 } \
71 while (0)
72
73#define OD_IDST_2(p0, p1) \
74 /* Embedded 2-point orthonormal Type-IV iDST. */ \
75 do { \
76 /* 10947/16384 ~= Tan[3*Pi/16]) ~= 0.668178637919299 */ \
77 p0 += (p1*10947 + 8192) >> 14; \
78 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
79 p1 -= (p0*473 + 256) >> 9; \
80 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
81 p0 += (p1*10947 + 8192) >> 14; \
82 } \
83 while (0)
84
Monty Montgomery02078a32017-07-11 21:22:29 -040085#define OD_FDST_2_ASYM(p0, p1) \
86 /* Embedded 2-point asymmetric Type-IV fDST. */ \
87 do { \
88 /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
89 OD_DCT_OVERFLOW_CHECK(p1, 11507, 8192, 187); \
90 p0 -= (p1*11507 + 8192) >> 14; \
91 /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
92 OD_DCT_OVERFLOW_CHECK(p0, 669, 512, 188); \
93 p1 += (p0*669 + 512) >> 10; \
94 /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
95 OD_DCT_OVERFLOW_CHECK(p1, 4573, 2048, 189); \
96 p0 -= (p1*4573 + 2048) >> 12; \
97 } \
98 while (0)
99
100#define OD_IDST_2_ASYM(p0, p1) \
101 /* Embedded 2-point asymmetric Type-IV iDST. */ \
102 do { \
103 /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
104 p0 += (p1*4573 + 2048) >> 12; \
105 /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
106 p1 -= (p0*669 + 512) >> 10; \
107 /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
108 p0 += (p1*11507 + 8192) >> 14; \
109 } \
110 while (0)
111
112#define OD_FDCT_4(q0, q2, q1, q3) \
113 /* Embedded 4-point orthonormal Type-II fDCT. */ \
114 do { \
115 int q2h; \
116 int q3h; \
117 q3 = q0 - q3; \
118 q3h = OD_DCT_RSHIFT(q3, 1); \
119 q0 -= q3h; \
120 q2 += q1; \
121 q2h = OD_DCT_RSHIFT(q2, 1); \
122 q1 = q2h - q1; \
123 OD_FDCT_2_ASYM(q0, q2, q2h); \
124 OD_FDST_2_ASYM(q3, q1); \
125 } \
126 while (0)
127
128#define OD_IDCT_4(q0, q2, q1, q3) \
129 /* Embedded 4-point orthonormal Type-II iDCT. */ \
130 do { \
131 int q1h; \
132 int q3h; \
133 OD_IDST_2_ASYM(q3, q2); \
134 OD_IDCT_2_ASYM(q0, q1, q1h); \
135 q3h = OD_DCT_RSHIFT(q3, 1); \
136 q0 += q3h; \
137 q3 = q0 - q3; \
138 q2 = q1h - q2; \
139 q1 -= q2; \
140 } \
141 while (0)
142
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400143#define OD_FDCT_4_ASYM(q0, q2, q2h, q1, q3, q3h) \
144 /* Embedded 4-point asymmetric Type-II fDCT. */ \
145 do { \
146 q0 += q3h; \
147 q3 = q0 - q3; \
148 q1 = q2h - q1; \
149 q2 = q1 - q2; \
150 OD_FDCT_2(q0, q2); \
151 OD_FDST_2(q3, q1); \
152 } \
153 while (0)
154
155#define OD_IDCT_4_ASYM(q0, q2, q1, q1h, q3, q3h) \
156 /* Embedded 4-point asymmetric Type-II iDCT. */ \
157 do { \
158 OD_IDST_2(q3, q2); \
159 OD_IDCT_2(q0, q1); \
160 q1 = q2 - q1; \
161 q1h = OD_DCT_RSHIFT(q1, 1); \
162 q2 = q1h - q2; \
163 q3 = q0 - q3; \
164 q3h = OD_DCT_RSHIFT(q3, 1); \
165 q0 -= q3h; \
166 } \
167 while (0)
168
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400169#define OD_FDST_4(q0, q2, q1, q3) \
170 /* Embedded 4-point orthonormal Type-IV fDST. */ \
171 do { \
172 int q0h; \
173 int q1h; \
174 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
175 OD_DCT_OVERFLOW_CHECK(q1, 13573, 16384, 190); \
176 q2 += (q1*13573 + 16384) >> 15; \
177 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
178 OD_DCT_OVERFLOW_CHECK(q2, 5793, 4096, 191); \
179 q1 -= (q2*5793 + 4096) >> 13; \
180 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
181 OD_DCT_OVERFLOW_CHECK(q1, 3393, 4096, 192); \
182 q2 += (q1*3393 + 4096) >> 13; \
183 q0 += q2; \
184 q0h = OD_DCT_RSHIFT(q0, 1); \
185 q2 = q0h - q2; \
186 q1 += q3; \
187 q1h = OD_DCT_RSHIFT(q1, 1); \
188 q3 -= q1h; \
189 /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
190 0.524455699240090 */ \
191 OD_DCT_OVERFLOW_CHECK(q1, 537, 512, 193); \
192 q2 -= (q1*537 + 512) >> 10; \
193 /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
194 OD_DCT_OVERFLOW_CHECK(q2, 1609, 1024, 194); \
195 q1 += (q2*1609 + 1024) >> 11; \
196 /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
197 0.223847182092655 */ \
198 OD_DCT_OVERFLOW_CHECK(q1, 7335, 16384, 195); \
199 q2 += (q1*7335 + 16384) >> 15; \
200 /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
201 0.6215036383171189 */ \
202 OD_DCT_OVERFLOW_CHECK(q0, 5091, 4096, 196); \
203 q3 += (q0*5091 + 4096) >> 13; \
204 /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
205 OD_DCT_OVERFLOW_CHECK(q3, 5681, 2048, 197); \
206 q0 -= (q3*5681 + 2048) >> 12; \
207 /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
208 0.52204745462729 */ \
209 OD_DCT_OVERFLOW_CHECK(q0, 4277, 4096, 198); \
210 q3 += (q0*4277 + 4096) >> 13; \
211 } \
212 while (0)
213
214#define OD_IDST_4(q0, q2, q1, q3) \
215 /* Embedded 4-point orthonormal Type-IV iDST. */ \
216 do { \
217 int q0h; \
218 int q2h; \
219 /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
220 0.52204745462729 */ \
221 q3 -= (q0*4277 + 4096) >> 13; \
222 /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
223 q0 += (q3*5681 + 2048) >> 12; \
224 /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
225 0.6215036383171189 */ \
226 q3 -= (q0*5091 + 4096) >> 13; \
227 /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
228 0.223847182092655 */ \
229 q1 -= (q2*7335 + 16384) >> 15; \
230 /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
231 q2 -= (q1*1609 + 1024) >> 11; \
232 /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
233 0.524455699240090 */ \
234 q1 += (q2*537 + 512) >> 10; \
235 q2h = OD_DCT_RSHIFT(q2, 1); \
236 q3 += q2h; \
237 q2 -= q3; \
238 q0h = OD_DCT_RSHIFT(q0, 1); \
239 q1 = q0h - q1; \
240 q0 -= q1; \
241 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
242 q1 -= (q2*3393 + 4096) >> 13; \
243 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
244 q2 += (q1*5793 + 4096) >> 13; \
245 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
246 q1 -= (q2*13573 + 16384) >> 15; \
247 } \
248 while (0)
249
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400250#define OD_FDST_4_ASYM(t0, t0h, t2, t1, t3) \
251 /* Embedded 4-point asymmetric Type-IV fDST. */ \
252 do { \
253 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
254 OD_DCT_OVERFLOW_CHECK(t1, 7489, 4096, 106); \
255 t2 -= (t1*7489 + 4096) >> 13; \
256 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
257 OD_DCT_OVERFLOW_CHECK(t1, 11585, 8192, 107); \
258 t1 += (t2*11585 + 8192) >> 14; \
259 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
260 OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 108); \
261 t2 += (t1*19195 + 16384) >> 15; \
262 t3 += OD_DCT_RSHIFT(t2, 1); \
263 t2 -= t3; \
264 t1 = t0h - t1; \
265 t0 -= t1; \
266 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
267 OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 109); \
268 t3 += (t0*6723 + 4096) >> 13; \
269 /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
270 OD_DCT_OVERFLOW_CHECK(t3, 8035, 4096, 110); \
271 t0 -= (t3*8035 + 4096) >> 13; \
272 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
273 OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 111); \
274 t3 += (t0*6723 + 4096) >> 13; \
275 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
276 OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 112); \
277 t2 += (t1*8757 + 8192) >> 14; \
278 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
279 OD_DCT_OVERFLOW_CHECK(t2, 6811, 4096, 113); \
280 t1 -= (t2*6811 + 4096) >> 13; \
281 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
282 OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 114); \
283 t2 += (t1*8757 + 8192) >> 14; \
284 } \
285 while (0)
286
287#define OD_IDST_4_ASYM(t0, t0h, t2, t1, t3) \
288 /* Embedded 4-point asymmetric Type-IV iDST. */ \
289 do { \
290 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
291 t1 -= (t2*8757 + 8192) >> 14; \
292 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
293 t2 += (t1*6811 + 4096) >> 13; \
294 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
295 t1 -= (t2*8757 + 8192) >> 14; \
296 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
297 t3 -= (t0*6723 + 4096) >> 13; \
298 /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
299 t0 += (t3*8035 + 4096) >> 13; \
300 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
301 t3 -= (t0*6723 + 4096) >> 13; \
302 t0 += t2; \
303 t0h = OD_DCT_RSHIFT(t0, 1); \
304 t2 = t0h - t2; \
305 t1 += t3; \
306 t3 -= OD_DCT_RSHIFT(t1, 1); \
307 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
308 t1 -= (t2*19195 + 16384) >> 15; \
309 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
310 t2 -= (t1*11585 + 8192) >> 14; \
311 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
312 t1 += (t2*7489 + 4096) >> 13; \
313 } \
314 while (0)
315
316#define OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
317 /* Embedded 8-point orthonormal Type-II fDCT. */ \
318 do { \
319 int r4h; \
320 int r5h; \
321 int r6h; \
322 int r7h; \
323 r7 = r0 - r7; \
324 r7h = OD_DCT_RSHIFT(r7, 1); \
325 r0 -= r7h; \
326 r6 += r1; \
327 r6h = OD_DCT_RSHIFT(r6, 1); \
328 r1 = r6h - r1; \
329 r5 = r2 - r5; \
330 r5h = OD_DCT_RSHIFT(r5, 1); \
331 r2 -= r5h; \
332 r4 += r3; \
333 r4h = OD_DCT_RSHIFT(r4, 1); \
334 r3 = r4h - r3; \
335 OD_FDCT_4_ASYM(r0, r4, r4h, r2, r6, r6h); \
336 OD_FDST_4_ASYM(r7, r7h, r3, r5, r1); \
337 } \
338 while (0)
339
340#define OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
341 /* Embedded 8-point orthonormal Type-II iDCT. */ \
342 do { \
343 int r1h; \
344 int r3h; \
345 int r5h; \
346 int r7h; \
347 OD_IDST_4_ASYM(r7, r7h, r5, r6, r4); \
348 OD_IDCT_4_ASYM(r0, r2, r1, r1h, r3, r3h); \
349 r0 += r7h; \
350 r7 = r0 - r7; \
351 r6 = r1h - r6; \
352 r1 -= r6; \
353 r5h = OD_DCT_RSHIFT(r5, 1); \
354 r2 += r5h; \
355 r5 = r2 - r5; \
356 r4 = r3h - r4; \
357 r3 -= r4; \
358 } \
359 while (0)
360
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400361#define OD_FDCT_8_ASYM(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \
362 /* Embedded 8-point asymmetric Type-II fDCT. */ \
363 do { \
364 r0 += r7h; \
365 r7 = r0 - r7; \
366 r1 = r6h - r1; \
367 r6 -= r1; \
368 r2 += r5h; \
369 r5 = r2 - r5; \
370 r3 = r4h - r3; \
371 r4 -= r3; \
372 OD_FDCT_4(r0, r4, r2, r6); \
373 OD_FDST_4(r7, r3, r5, r1); \
374 } \
375 while (0)
376
377#define OD_IDCT_8_ASYM(r0, r4, r2, r6, r1, r1h, r5, r5h, r3, r3h, r7, r7h) \
378 /* Embedded 8-point asymmetric Type-II iDCT. */ \
379 do { \
380 OD_IDST_4(r7, r5, r6, r4); \
381 OD_IDCT_4(r0, r2, r1, r3); \
382 r7 = r0 - r7; \
383 r7h = OD_DCT_RSHIFT(r7, 1); \
384 r0 -= r7h; \
385 r1 += r6; \
386 r1h = OD_DCT_RSHIFT(r1, 1); \
387 r6 = r1h - r6; \
388 r5 = r2 - r5; \
389 r5h = OD_DCT_RSHIFT(r5, 1); \
390 r2 -= r5h; \
391 r3 += r4; \
392 r3h = OD_DCT_RSHIFT(r3, 1); \
393 r4 = r3h - r4; \
394 } \
395 while (0)
396
397#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
Monty Montgomerycf18fe42017-07-11 21:33:25 -0400398 /* Embedded 8-point orthonormal Type-IV fDST. */ \
399 do { \
400 int t0h; \
401 int t2h; \
402 int t5h; \
403 int t7h; \
404 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
405 OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 115); \
406 t6 -= (t1*13573 + 16384) >> 15; \
407 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
408 OD_DCT_OVERFLOW_CHECK(t6, 11585, 8192, 116); \
409 t1 += (t6*11585 + 8192) >> 14; \
410 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
411 OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 117); \
412 t6 -= (t1*13573 + 16384) >> 15; \
413 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
414 OD_DCT_OVERFLOW_CHECK(t2, 21895, 16384, 118); \
415 t5 -= (t2*21895 + 16384) >> 15; \
416 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
417 OD_DCT_OVERFLOW_CHECK(t5, 15137, 8192, 119); \
418 t2 += (t5*15137 + 8192) >> 14; \
419 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
420 OD_DCT_OVERFLOW_CHECK(t2, 10947, 8192, 120); \
421 t5 -= (t2*10947 + 8192) >> 14; \
422 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
423 OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 121); \
424 t4 -= (t3*3259 + 8192) >> 14; \
425 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
426 OD_DCT_OVERFLOW_CHECK(t4, 3135, 4096, 122); \
427 t3 += (t4*3135 + 4096) >> 13; \
428 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
429 OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 123); \
430 t4 -= (t3*3259 + 8192) >> 14; \
431 t7 += t1; \
432 t7h = OD_DCT_RSHIFT(t7, 1); \
433 t1 -= t7h; \
434 t2 = t3 - t2; \
435 t2h = OD_DCT_RSHIFT(t2, 1); \
436 t3 -= t2h; \
437 t0 -= t6; \
438 t0h = OD_DCT_RSHIFT(t0, 1); \
439 t6 += t0h; \
440 t5 = t4 - t5; \
441 t5h = OD_DCT_RSHIFT(t5, 1); \
442 t4 -= t5h; \
443 t1 += t5h; \
444 t5 = t1 - t5; \
445 t4 += t0h; \
446 t0 -= t4; \
447 t6 -= t2h; \
448 t2 += t6; \
449 t3 -= t7h; \
450 t7 += t3; \
451 /* TODO: Can we move this into another operation */ \
452 t7 = -t7; \
453 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
454 OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 124); \
455 t0 -= (t7*7425 + 4096) >> 13; \
456 /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
457 OD_DCT_OVERFLOW_CHECK(t0, 8153, 4096, 125); \
458 t7 += (t0*8153 + 4096) >> 13; \
459 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
460 OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 126); \
461 t0 -= (t7*7425 + 4096) >> 13; \
462 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
463 OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 127); \
464 t6 -= (t1*4861 + 16384) >> 15; \
465 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
466 OD_DCT_OVERFLOW_CHECK(t6, 1189, 2048, 128); \
467 t1 += (t6*1189 + 2048) >> 12; \
468 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
469 OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 129); \
470 t6 -= (t1*4861 + 16384) >> 15; \
471 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
472 OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 130); \
473 t2 -= (t5*2455 + 2048) >> 12; \
474 /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
475 OD_DCT_OVERFLOW_CHECK(t2, 7225, 4096, 131); \
476 t5 += (t2*7225 + 4096) >> 13; \
477 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
478 OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 132); \
479 t2 -= (t5*2455 + 2048) >> 12; \
480 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
481 OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 133); \
482 t4 -= (t3*11725 + 16384) >> 15; \
483 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
484 OD_DCT_OVERFLOW_CHECK(t4, 5197, 4096, 134); \
485 t3 += (t4*5197 + 4096) >> 13; \
486 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
487 OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 135); \
488 t4 -= (t3*11725 + 16384) >> 15; \
489 } \
490 while (0)
491
492#define OD_IDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
493 /* Embedded 8-point orthonormal Type-IV iDST. */ \
494 do { \
495 int t0h; \
496 int t2h; \
497 int t5h_; \
498 int t7h_; \
499 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
500 t1 += (t6*11725 + 16384) >> 15; \
501 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
502 t6 -= (t1*5197 + 4096) >> 13; \
503 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
504 t1 += (t6*11725 + 16384) >> 15; \
505 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
506 t2 += (t5*2455 + 2048) >> 12; \
507 /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
508 t5 -= (t2*7225 + 4096) >> 13; \
509 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
510 t2 += (t5*2455 + 2048) >> 12; \
511 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
512 t3 += (t4*4861 + 16384) >> 15; \
513 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
514 t4 -= (t3*1189 + 2048) >> 12; \
515 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
516 t3 += (t4*4861 + 16384) >> 15; \
517 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
518 t0 += (t7*7425 + 4096) >> 13; \
519 /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
520 t7 -= (t0*8153 + 4096) >> 13; \
521 /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
522 t0 += (t7*7425 + 4096) >> 13; \
523 /* TODO: Can we move this into another operation */ \
524 t7 = -t7; \
525 t7 -= t6; \
526 t7h_ = OD_DCT_RSHIFT(t7, 1); \
527 t6 += t7h_; \
528 t2 -= t3; \
529 t2h = OD_DCT_RSHIFT(t2, 1); \
530 t3 += t2h; \
531 t0 += t1; \
532 t0h = OD_DCT_RSHIFT(t0, 1); \
533 t1 -= t0h; \
534 t5 = t4 - t5; \
535 t5h_ = OD_DCT_RSHIFT(t5, 1); \
536 t4 -= t5h_; \
537 t1 += t5h_; \
538 t5 = t1 - t5; \
539 t3 -= t0h; \
540 t0 += t3; \
541 t6 += t2h; \
542 t2 = t6 - t2; \
543 t4 += t7h_; \
544 t7 -= t4; \
545 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
546 t1 += (t6*3259 + 8192) >> 14; \
547 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
548 t6 -= (t1*3135 + 4096) >> 13; \
549 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
550 t1 += (t6*3259 + 8192) >> 14; \
551 /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
552 t5 += (t2*10947 + 8192) >> 14; \
553 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
554 t2 -= (t5*15137 + 8192) >> 14; \
555 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
556 t5 += (t2*21895 + 16384) >> 15; \
557 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
558 t3 += (t4*13573 + 16384) >> 15; \
559 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
560 t4 -= (t3*11585 + 8192) >> 14; \
561 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
562 t3 += (t4*13573 + 16384) >> 15; \
563 } \
564 while (0)
565
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400566/* Rewrite this so that t0h can be passed in. */
567#define OD_FDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
568 /* Embedded 8-point asymmetric Type-IV fDST. */ \
569 do { \
570 int t0h; \
571 int t2h; \
572 int t5h; \
573 int t7h; \
574 /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
575 OD_DCT_OVERFLOW_CHECK(t1, 1035, 1024, 199); \
576 t6 += (t1*1035 + 1024) >> 11; \
577 /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
578 OD_DCT_OVERFLOW_CHECK(t6, 3675, 2048, 200); \
579 t1 -= (t6*3675 + 2048) >> 12; \
580 /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
581 OD_DCT_OVERFLOW_CHECK(t1, 851, 4096, 201); \
582 t6 -= (t1*851 + 4096) >> 13; \
583 /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
584 OD_DCT_OVERFLOW_CHECK(t2, 4379, 4096, 202); \
585 t5 += (t2*4379 + 4096) >> 13; \
586 /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
587 OD_DCT_OVERFLOW_CHECK(t5, 10217, 4096, 203); \
588 t2 -= (t5*10217 + 4096) >> 13; \
589 /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
590 OD_DCT_OVERFLOW_CHECK(t2, 4379, 8192, 204); \
591 t5 += (t2*4379 + 8192) >> 14; \
592 /* 12905/16384 ~= (Sqrt[2] - Cos[3*Pi/32])/(2*Sin[3*Pi/32]) */ \
593 OD_DCT_OVERFLOW_CHECK(t3, 12905, 8192, 205); \
594 t4 += (t3*12905 + 8192) >> 14; \
595 /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
596 OD_DCT_OVERFLOW_CHECK(t4, 3363, 4096, 206); \
597 t3 -= (t4*3363 + 4096) >> 13; \
598 /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
599 OD_DCT_OVERFLOW_CHECK(t3, 3525, 2048, 207); \
600 t4 -= (t3*3525 + 2048) >> 12; \
601 /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
602 OD_DCT_OVERFLOW_CHECK(t0, 5417, 4096, 208); \
603 t7 += (t0*5417 + 4096) >> 13; \
604 /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
605 OD_DCT_OVERFLOW_CHECK(t7, 5765, 2048, 209); \
606 t0 -= (t7*5765 + 2048) >> 12; \
607 /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
608 OD_DCT_OVERFLOW_CHECK(t0, 2507, 2048, 210); \
609 t7 += (t0*2507 + 2048) >> 12; \
610 t0 += t1; \
611 t0h = OD_DCT_RSHIFT(t0, 1); \
612 t1 -= t0h; \
613 t2 -= t3; \
614 t2h = OD_DCT_RSHIFT(t2, 1); \
615 t3 += t2h; \
616 t5 -= t4; \
617 t5h = OD_DCT_RSHIFT(t5, 1); \
618 t4 += t5h; \
619 t7 += t6; \
620 t7h = OD_DCT_RSHIFT(t7, 1); \
621 t6 = t7h - t6; \
622 t4 = t7h - t4; \
623 t7 -= t4; \
624 t1 += t5h; \
625 t5 = t1 - t5; \
626 t6 += t2h; \
627 t2 = t6 - t2; \
628 t3 -= t0h; \
629 t0 += t3; \
630 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
631 OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 211); \
632 t1 += (t6*3259 + 8192) >> 14; \
633 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
634 OD_DCT_OVERFLOW_CHECK(t1, 3135, 4096, 212); \
635 t6 -= (t1*3135 + 4096) >> 13; \
636 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
637 OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 213); \
638 t1 += (t6*3259 + 8192) >> 14; \
639 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
640 OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 214); \
641 t5 += (t2*2737 + 2048) >> 12; \
642 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
643 OD_DCT_OVERFLOW_CHECK(t5, 473, 256, 215); \
644 t2 -= (t5*473 + 256) >> 9; \
645 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
646 OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 216); \
647 t5 += (t2*2737 + 2048) >> 12; \
648 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
649 OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 217); \
650 t3 += (t4*3393 + 4096) >> 13; \
651 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
652 OD_DCT_OVERFLOW_CHECK(t3, 5793, 4096, 218); \
653 t4 -= (t3*5793 + 4096) >> 13; \
654 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
655 OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 219); \
656 t3 += (t4*3393 + 4096) >> 13; \
657 } \
658 while (0)
659
660#define OD_IDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
661 /* Embedded 8-point asymmetric Type-IV iDST. */ \
662 do { \
663 int t0h; \
664 int t2h; \
665 int t5h__; \
666 int t7h__; \
667 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
668 t6 -= (t1*3393 + 4096) >> 13; \
669 /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
670 t1 += (t6*5793 + 4096) >> 13; \
671 /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
672 t6 -= (t1*3393 + 4096) >> 13; \
673 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
674 t5 -= (t2*2737 + 2048) >> 12; \
675 /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
676 t2 += (t5*473 + 256) >> 9; \
677 /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
678 t5 -= (t2*2737 + 2048) >> 12; \
679 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
680 t4 -= (t3*3259 + 8192) >> 14; \
681 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
682 t3 += (t4*3135 + 4096) >> 13; \
683 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
684 t4 -= (t3*3259 + 8192) >> 14; \
685 t0 -= t6; \
686 t0h = OD_DCT_RSHIFT(t0, 1); \
687 t6 += t0h; \
688 t2 = t3 - t2; \
689 t2h = OD_DCT_RSHIFT(t2, 1); \
690 t3 -= t2h; \
691 t5 = t4 - t5; \
692 t5h__ = OD_DCT_RSHIFT(t5, 1); \
693 t4 -= t5h__; \
694 t7 += t1; \
695 t7h__ = OD_DCT_RSHIFT(t7, 1); \
696 t1 = t7h__ - t1; \
697 t3 = t7h__ - t3; \
698 t7 -= t3; \
699 t1 -= t5h__; \
700 t5 += t1; \
701 t6 -= t2h; \
702 t2 += t6; \
703 t4 += t0h; \
704 t0 -= t4; \
705 /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
706 t7 -= (t0*2507 + 2048) >> 12; \
707 /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
708 t0 += (t7*5765 + 2048) >> 12; \
709 /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
710 t7 -= (t0*5417 + 4096) >> 13; \
711 /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
712 t1 += (t6*3525 + 2048) >> 12; \
713 /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
714 t6 += (t1*3363 + 4096) >> 13; \
715 /* 12905/16384 ~= (1/Sqrt[2] - Cos[3*Pi/32]/1)/Sin[3*Pi/32] */ \
716 t1 -= (t6*12905 + 8192) >> 14; \
717 /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
718 t5 -= (t2*4379 + 8192) >> 14; \
719 /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
720 t2 += (t5*10217 + 4096) >> 13; \
721 /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
722 t5 -= (t2*4379 + 4096) >> 13; \
723 /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
724 t3 += (t4*851 + 4096) >> 13; \
725 /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
726 t4 += (t3*3675 + 2048) >> 12; \
727 /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
728 t3 -= (t4*1035 + 1024) >> 11; \
729 } \
730 while (0)
731
732#define OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
733 s1, s9, s5, sd, s3, sb, s7, sf) \
734 /* Embedded 16-point orthonormal Type-II fDCT. */ \
735 do { \
736 int s8h; \
737 int sah; \
738 int sch; \
739 int seh; \
740 int sfh; \
741 sf = s0 - sf; \
742 sfh = OD_DCT_RSHIFT(sf, 1); \
743 s0 -= sfh; \
744 se += s1; \
745 seh = OD_DCT_RSHIFT(se, 1); \
746 s1 = seh - s1; \
747 sd = s2 - sd; \
748 s2 -= OD_DCT_RSHIFT(sd, 1); \
749 sc += s3; \
750 sch = OD_DCT_RSHIFT(sc, 1); \
751 s3 = sch - s3; \
752 sb = s4 - sb; \
753 s4 -= OD_DCT_RSHIFT(sb, 1); \
754 sa += s5; \
755 sah = OD_DCT_RSHIFT(sa, 1); \
756 s5 = sah - s5; \
757 s9 = s6 - s9; \
758 s6 -= OD_DCT_RSHIFT(s9, 1); \
759 s8 += s7; \
760 s8h = OD_DCT_RSHIFT(s8, 1); \
761 s7 = s8h - s7; \
762 OD_FDCT_8_ASYM(s0, s8, s8h, s4, sc, sch, s2, sa, sah, s6, se, seh); \
763 OD_FDST_8_ASYM(sf, s7, sb, s3, sd, s5, s9, s1); \
764 } \
765 while (0)
766
767#define OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
768 s1, s9, s5, sd, s3, sb, s7, sf) \
769 /* Embedded 16-point orthonormal Type-II iDCT. */ \
770 do { \
771 int s1h; \
772 int s3h; \
773 int s5h; \
774 int s7h; \
775 int sfh; \
776 OD_IDST_8_ASYM(sf, sb, sd, s9, se, sa, sc, s8); \
777 OD_IDCT_8_ASYM(s0, s4, s2, s6, s1, s1h, s5, s5h, s3, s3h, s7, s7h); \
778 sfh = OD_DCT_RSHIFT(sf, 1); \
779 s0 += sfh; \
780 sf = s0 - sf; \
781 se = s1h - se; \
782 s1 -= se; \
783 s2 += OD_DCT_RSHIFT(sd, 1); \
784 sd = s2 - sd; \
785 sc = s3h - sc; \
786 s3 -= sc; \
787 s4 += OD_DCT_RSHIFT(sb, 1); \
788 sb = s4 - sb; \
789 sa = s5h - sa; \
790 s5 -= sa; \
791 s6 += OD_DCT_RSHIFT(s9, 1); \
792 s9 = s6 - s9; \
793 s8 = s7h - s8; \
794 s7 -= s8; \
795 } \
796 while (0)
797
Monty Montgomery2cb52ba2017-07-17 18:27:27 -0400798#define OD_FDCT_16_ASYM(t0, t8, t8h, t4, tc, tch, t2, ta, tah, t6, te, teh, \
799 t1, t9, t9h, t5, td, tdh, t3, tb, tbh, t7, tf, tfh) \
800 /* Embedded 16-point asymmetric Type-II fDCT. */ \
801 do { \
802 t0 += tfh; \
803 tf = t0 - tf; \
804 t1 -= teh; \
805 te += t1; \
806 t2 += tdh; \
807 td = t2 - td; \
808 t3 -= tch; \
809 tc += t3; \
810 t4 += tbh; \
811 tb = t4 - tb; \
812 t5 -= tah; \
813 ta += t5; \
814 t6 += t9h; \
815 t9 = t6 - t9; \
816 t7 -= t8h; \
817 t8 += t7; \
818 OD_FDCT_8(t0, t8, t4, tc, t2, ta, t6, te); \
819 OD_FDST_8(tf, t7, tb, t3, td, t5, t9, t1); \
820 } \
821 while (0)
822
823#define OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
824 t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh) \
825 /* Embedded 16-point asymmetric Type-II iDCT. */ \
826 do { \
827 OD_IDST_8(tf, tb, td, t9, te, ta, tc, t8); \
828 OD_IDCT_8(t0, t4, t2, t6, t1, t5, t3, t7); \
829 t1 -= te; \
830 t1h = OD_DCT_RSHIFT(t1, 1); \
831 te += t1h; \
832 t9 = t6 - t9; \
833 t9h = OD_DCT_RSHIFT(t9, 1); \
834 t6 -= t9h; \
835 t5 -= ta; \
836 t5h = OD_DCT_RSHIFT(t5, 1); \
837 ta += t5h; \
838 td = t2 - td; \
839 tdh = OD_DCT_RSHIFT(td, 1); \
840 t2 -= tdh; \
841 t3 -= tc; \
842 t3h = OD_DCT_RSHIFT(t3, 1); \
843 tc += t3h; \
844 tb = t4 - tb; \
845 tbh = OD_DCT_RSHIFT(tb, 1); \
846 t4 -= tbh; \
847 t7 -= t8; \
848 t7h = OD_DCT_RSHIFT(t7, 1); \
849 t8 += t7h; \
850 tf = t0 - tf; \
851 tfh = OD_DCT_RSHIFT(tf, 1); \
852 t0 -= tfh; \
853 } \
854 while (0)
855
Monty Montgomerycb9c1c52017-07-17 18:15:30 -0400856#define OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
857 s1, s9, s5, sd, s3, sb, s7, sf) \
858 /* Embedded 16-point orthonormal Type-IV fDST. */ \
859 do { \
860 int s0h; \
861 int s2h; \
862 int sdh; \
863 int sfh; \
864 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
865 OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 220); \
866 s1 += (se*13573 + 16384) >> 15; \
867 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
868 OD_DCT_OVERFLOW_CHECK(s1, 11585, 8192, 221); \
869 se -= (s1*11585 + 8192) >> 14; \
870 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
871 OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 222); \
872 s1 += (se*13573 + 16384) >> 15; \
873 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
874 OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 223); \
875 sd += (s2*21895 + 16384) >> 15; \
876 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
877 OD_DCT_OVERFLOW_CHECK(sd, 15137, 16384, 224); \
878 s2 -= (sd*15137 + 8192) >> 14; \
879 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
880 OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 225); \
881 sd += (s2*21895 + 16384) >> 15; \
882 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
883 OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 226); \
884 sc += (s3*3259 + 8192) >> 14; \
885 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
886 OD_DCT_OVERFLOW_CHECK(sc, 3135, 4096, 227); \
887 s3 -= (sc*3135 + 4096) >> 13; \
888 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
889 OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 228); \
890 sc += (s3*3259 + 8192) >> 14; \
891 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
892 OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 229); \
893 sa += (s5*13573 + 16384) >> 15; \
894 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
895 OD_DCT_OVERFLOW_CHECK(sa, 11585, 8192, 230); \
896 s5 -= (sa*11585 + 8192) >> 14; \
897 /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
898 OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 231); \
899 sa += (s5*13573 + 16384) >> 15; \
900 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
901 OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 232); \
902 s6 += (s9*13573 + 16384) >> 15; \
903 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
904 OD_DCT_OVERFLOW_CHECK(s6, 11585, 8192, 233); \
905 s9 -= (s6*11585 + 8192) >> 14; \
906 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
907 OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 234); \
908 s6 += (s9*13573 + 16384) >> 15; \
909 sf += se; \
910 sfh = OD_DCT_RSHIFT(sf, 1); \
911 se = sfh - se; \
912 s0 += s1; \
913 s0h = OD_DCT_RSHIFT(s0, 1); \
914 s1 = s0h - s1; \
915 s2 = s3 - s2; \
916 s2h = OD_DCT_RSHIFT(s2, 1); \
917 s3 -= s2h; \
918 sd -= sc; \
919 sdh = OD_DCT_RSHIFT(sd, 1); \
920 sc += sdh; \
921 sa = s4 - sa; \
922 s4 -= OD_DCT_RSHIFT(sa, 1); \
923 s5 += sb; \
924 sb = OD_DCT_RSHIFT(s5, 1) - sb; \
925 s8 += s6; \
926 s6 -= OD_DCT_RSHIFT(s8, 1); \
927 s7 = s9 - s7; \
928 s9 -= OD_DCT_RSHIFT(s7, 1); \
929 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
930 OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 235); \
931 s4 += (sb*6723 + 4096) >> 13; \
932 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
933 OD_DCT_OVERFLOW_CHECK(s4, 16069, 8192, 236); \
934 sb -= (s4*16069 + 8192) >> 14; \
935 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
936 OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 237); \
937 s4 += (sb*6723 + 4096) >> 13; \
938 /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
939 OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 238); \
940 sa += (s5*8757 + 8192) >> 14; \
941 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
942 OD_DCT_OVERFLOW_CHECK(sa, 6811, 4096, 239); \
943 s5 -= (sa*6811 + 4096) >> 13; \
944 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
945 OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 240); \
946 sa += (s5*8757 + 8192) >> 14; \
947 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
948 OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 241); \
949 s6 += (s9*2485 + 4096) >> 13; \
950 /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
951 OD_DCT_OVERFLOW_CHECK(s6, 4551, 4096, 242); \
952 s9 -= (s6*4551 + 4096) >> 13; \
953 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
954 OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 243); \
955 s6 += (s9*2485 + 4096) >> 13; \
956 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
957 OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 244); \
958 s7 += (s8*3227 + 16384) >> 15; \
959 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
960 OD_DCT_OVERFLOW_CHECK(s7, 6393, 16384, 245); \
961 s8 -= (s7*6393 + 16384) >> 15; \
962 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
963 OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 246); \
964 s7 += (s8*3227 + 16384) >> 15; \
965 s1 -= s2h; \
966 s2 += s1; \
967 se += sdh; \
968 sd = se - sd; \
969 s3 += sfh; \
970 sf -= s3; \
971 sc = s0h - sc; \
972 s0 -= sc; \
973 sb += OD_DCT_RSHIFT(s8, 1); \
974 s8 = sb - s8; \
975 s4 += OD_DCT_RSHIFT(s7, 1); \
976 s7 -= s4; \
977 s6 += OD_DCT_RSHIFT(s5, 1); \
978 s5 = s6 - s5; \
979 s9 -= OD_DCT_RSHIFT(sa, 1); \
980 sa += s9; \
981 s8 += s0; \
982 s0 -= OD_DCT_RSHIFT(s8, 1); \
983 sf += s7; \
984 s7 = OD_DCT_RSHIFT(sf, 1) - s7; \
985 s1 -= s6; \
986 s6 += OD_DCT_RSHIFT(s1, 1); \
987 s9 += se; \
988 se = OD_DCT_RSHIFT(s9, 1) - se; \
989 s2 += sa; \
990 sa = OD_DCT_RSHIFT(s2, 1) - sa; \
991 s5 += sd; \
992 sd -= OD_DCT_RSHIFT(s5, 1); \
993 s4 = sc - s4; \
994 sc -= OD_DCT_RSHIFT(s4, 1); \
995 s3 -= sb; \
996 sb += OD_DCT_RSHIFT(s3, 1); \
997 /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
998 OD_DCT_OVERFLOW_CHECK(sf, 2799, 2048, 247); \
999 s0 -= (sf*2799 + 2048) >> 12; \
1000 /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
1001 OD_DCT_OVERFLOW_CHECK(s0, 2893, 1024, 248); \
1002 sf += (s0*2893 + 1024) >> 11; \
1003 /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
1004 OD_DCT_OVERFLOW_CHECK(sf, 5397, 4096, 249); \
1005 s0 -= (sf*5397 + 4096) >> 13; \
1006 /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
1007 OD_DCT_OVERFLOW_CHECK(s1, 41, 32, 250); \
1008 se += (s1*41 + 32) >> 6; \
1009 /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
1010 OD_DCT_OVERFLOW_CHECK(se, 2865, 1024, 251); \
1011 s1 -= (se*2865 + 1024) >> 11; \
1012 /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
1013 OD_DCT_OVERFLOW_CHECK(s1, 4641, 4096, 252); \
1014 se += (s1*4641 + 4096) >> 13; \
1015 /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
1016 OD_DCT_OVERFLOW_CHECK(s2, 2473, 2048, 253); \
1017 sd += (s2*2473 + 2048) >> 12; \
1018 /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
1019 OD_DCT_OVERFLOW_CHECK(sd, 5619, 2048, 254); \
1020 s2 -= (sd*5619 + 2048) >> 12; \
1021 /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
1022 OD_DCT_OVERFLOW_CHECK(s2, 7839, 8192, 255); \
1023 sd += (s2*7839 + 8192) >> 14; \
1024 /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
1025 OD_DCT_OVERFLOW_CHECK(s3, 5747, 4096, 256); \
1026 sc -= (s3*5747 + 4096) >> 13; \
1027 /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] ~= */ \
1028 OD_DCT_OVERFLOW_CHECK(sc, 3903, 4096, 257); \
1029 s3 += (sc*3903 + 4096) >> 13; \
1030 /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
1031 OD_DCT_OVERFLOW_CHECK(s3, 5701, 4096, 258); \
1032 sc += (s3*5701 + 4096) >> 13; \
1033 /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
1034 OD_DCT_OVERFLOW_CHECK(s4, 4471, 4096, 259); \
1035 sb += (s4*4471 + 4096) >> 13; \
1036 /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
1037 OD_DCT_OVERFLOW_CHECK(sb, 1309, 512, 260); \
1038 s4 -= (sb*1309 + 512) >> 10; \
1039 /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
1040 OD_DCT_OVERFLOW_CHECK(s4, 5067, 8192, 261); \
1041 sb += (s4*5067 + 8192) >> 14; \
1042 /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
1043 OD_DCT_OVERFLOW_CHECK(s5, 2217, 2048, 262); \
1044 sa -= (s5*2217 + 2048) >> 12; \
1045 /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] ~= 0.72705107329128 */ \
1046 OD_DCT_OVERFLOW_CHECK(sa, 1489, 1024, 263); \
1047 s5 += (sa*1489 + 1024) >> 11; \
1048 /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
1049 OD_DCT_OVERFLOW_CHECK(s5, 75, 128, 264); \
1050 sa += (s5*75 + 128) >> 8; \
1051 /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
1052 OD_DCT_OVERFLOW_CHECK(s9, 2087, 2048, 265); \
1053 s6 -= (s9*2087 + 2048) >> 12; \
1054 /* 4653/4096 ~= Sqrt[2]*Sin[19*Pi/64] */ \
1055 OD_DCT_OVERFLOW_CHECK(s6, 4653, 2048, 266); \
1056 s9 += (s6*4653 + 2048) >> 12; \
1057 /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
1058 OD_DCT_OVERFLOW_CHECK(s9, 4545, 16384, 267); \
1059 s6 -= (s9*4545 + 16384) >> 15; \
1060 /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
1061 OD_DCT_OVERFLOW_CHECK(s8, 2053, 2048, 268); \
1062 s7 += (s8*2053 + 2048) >> 12; \
1063 /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
1064 OD_DCT_OVERFLOW_CHECK(s7, 1945, 1024, 269); \
1065 s8 -= (s7*1945 + 1024) >> 11; \
1066 /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
1067 OD_DCT_OVERFLOW_CHECK(s8, 1651, 16384, 270); \
1068 s7 -= (s8*1651 + 16384) >> 15; \
1069 } \
1070 while (0)
1071
1072#define OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
1073 s1, s9, s5, sd, s3, sb, s7, sf) \
1074 /* Embedded 16-point orthonormal Type-IV iDST. */ \
1075 do { \
1076 int s0h; \
1077 int s4h; \
1078 int sbh; \
1079 int sfh; \
1080 /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
1081 se += (s1*1651 + 16384) >> 15; \
1082 /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
1083 s1 += (se*1945 + 1024) >> 11; \
1084 /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
1085 se -= (s1*2053 + 2048) >> 12; \
1086 /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
1087 s6 += (s9*4545 + 16384) >> 15; \
1088 /* 4653/32768 ~= Sqrt[2]*Sin[19*Pi/64] */ \
1089 s9 -= (s6*4653 + 2048) >> 12; \
1090 /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
1091 s6 += (s9*2087 + 2048) >> 12; \
1092 /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
1093 s5 -= (sa*75 + 128) >> 8; \
1094 /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] */ \
1095 sa -= (s5*1489 + 1024) >> 11; \
1096 /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
1097 s5 += (sa*2217 + 2048) >> 12; \
1098 /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
1099 sd -= (s2*5067 + 8192) >> 14; \
1100 /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
1101 s2 += (sd*1309 + 512) >> 10; \
1102 /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
1103 sd -= (s2*4471 + 4096) >> 13; \
1104 /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
1105 s3 -= (sc*5701 + 4096) >> 13; \
1106 /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] */ \
1107 sc -= (s3*3903 + 4096) >> 13; \
1108 /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
1109 s3 += (sc*5747 + 4096) >> 13; \
1110 /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
1111 sb -= (s4*7839 + 8192) >> 14; \
1112 /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
1113 s4 += (sb*5619 + 2048) >> 12; \
1114 /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
1115 sb -= (s4*2473 + 2048) >> 12; \
1116 /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
1117 s7 -= (s8*4641 + 4096) >> 13; \
1118 /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
1119 s8 += (s7*2865 + 1024) >> 11; \
1120 /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
1121 s7 -= (s8*41 + 32) >> 6; \
1122 /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
1123 s0 += (sf*5397 + 4096) >> 13; \
1124 /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
1125 sf -= (s0*2893 + 1024) >> 11; \
1126 /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
1127 s0 += (sf*2799 + 2048) >> 12; \
1128 sd -= OD_DCT_RSHIFT(sc, 1); \
1129 sc += sd; \
1130 s3 += OD_DCT_RSHIFT(s2, 1); \
1131 s2 = s3 - s2; \
1132 sb += OD_DCT_RSHIFT(sa, 1); \
1133 sa -= sb; \
1134 s5 = OD_DCT_RSHIFT(s4, 1) - s5; \
1135 s4 -= s5; \
1136 s7 = OD_DCT_RSHIFT(s9, 1) - s7; \
1137 s9 -= s7; \
1138 s6 -= OD_DCT_RSHIFT(s8, 1); \
1139 s8 += s6; \
1140 se = OD_DCT_RSHIFT(sf, 1) - se; \
1141 sf -= se; \
1142 s0 += OD_DCT_RSHIFT(s1, 1); \
1143 s1 -= s0; \
1144 s5 -= s9; \
1145 s9 += OD_DCT_RSHIFT(s5, 1); \
1146 sa = s6 - sa; \
1147 s6 -= OD_DCT_RSHIFT(sa, 1); \
1148 se += s2; \
1149 s2 -= OD_DCT_RSHIFT(se, 1); \
1150 s1 = sd - s1; \
1151 sd -= OD_DCT_RSHIFT(s1, 1); \
1152 s0 += s3; \
1153 s0h = OD_DCT_RSHIFT(s0, 1); \
1154 s3 = s0h - s3; \
1155 sf += sc; \
1156 sfh = OD_DCT_RSHIFT(sf, 1); \
1157 sc -= sfh; \
1158 sb = s7 - sb; \
1159 sbh = OD_DCT_RSHIFT(sb, 1); \
1160 s7 -= sbh; \
1161 s4 -= s8; \
1162 s4h = OD_DCT_RSHIFT(s4, 1); \
1163 s8 += s4h; \
1164 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
1165 se -= (s1*3227 + 16384) >> 15; \
1166 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
1167 s1 += (se*6393 + 16384) >> 15; \
1168 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
1169 se -= (s1*3227 + 16384) >> 15; \
1170 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1171 s6 -= (s9*2485 + 4096) >> 13; \
1172 /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1173 s9 += (s6*4551 + 4096) >> 13; \
1174 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1175 s6 -= (s9*2485 + 4096) >> 13; \
1176 /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1177 s5 -= (sa*8757 + 8192) >> 14; \
1178 /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1179 sa += (s5*6811 + 4096) >> 13; \
1180 /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1181 s5 -= (sa*8757 + 8192) >> 14; \
1182 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1183 s2 -= (sd*6723 + 4096) >> 13; \
1184 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1185 sd += (s2*16069 + 8192) >> 14; \
1186 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1187 s2 -= (sd*6723 + 4096) >> 13; \
1188 s9 += OD_DCT_RSHIFT(se, 1); \
1189 se = s9 - se; \
1190 s6 += OD_DCT_RSHIFT(s1, 1); \
1191 s1 -= s6; \
1192 sd = OD_DCT_RSHIFT(sa, 1) - sd; \
1193 sa -= sd; \
1194 s2 += OD_DCT_RSHIFT(s5, 1); \
1195 s5 = s2 - s5; \
1196 s3 -= sbh; \
1197 sb += s3; \
1198 sc += s4h; \
1199 s4 = sc - s4; \
1200 s8 = s0h - s8; \
1201 s0 -= s8; \
1202 s7 = sfh - s7; \
1203 sf -= s7; \
1204 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1205 s6 -= (s9*13573 + 16384) >> 15; \
1206 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1207 s9 += (s6*11585 + 8192) >> 14; \
1208 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1209 s6 -= (s9*13573 + 16384) >> 15; \
1210 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1211 s5 -= (sa*13573 + 16384) >> 15; \
1212 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1213 sa += (s5*11585 + 8192) >> 14; \
1214 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1215 s5 -= (sa*13573 + 16384) >> 15; \
1216 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
1217 s3 -= (sc*3259 + 8192) >> 14; \
1218 /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
1219 sc += (s3*3135 + 4096) >> 13; \
1220 /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
1221 s3 -= (sc*3259 + 8192) >> 14; \
1222 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
1223 sb -= (s4*21895 + 16384) >> 15; \
1224 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1225 s4 += (sb*15137 + 8192) >> 14; \
1226 /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
1227 sb -= (s4*21895 + 16384) >> 15; \
1228 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1229 s8 -= (s7*13573 + 16384) >> 15; \
1230 /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
1231 s7 += (s8*11585 + 8192) >> 14; \
1232 /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
1233 s8 -= (s7*13573 + 16384) >> 15; \
1234 } \
1235 while (0)
1236
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04001237/* TODO: rewrite this to match OD_FDST_16. */
1238#define OD_FDST_16_ASYM(t0, t0h, t8, t4, t4h, tc, t2, ta, t6, te, \
1239 t1, t9, t5, td, t3, tb, t7, t7h, tf) \
1240 /* Embedded 16-point asymmetric Type-IV fDST. */ \
1241 do { \
1242 int t2h; \
1243 int t3h; \
1244 int t6h; \
1245 int t8h; \
1246 int t9h; \
1247 int tch; \
1248 int tdh; \
1249 /* TODO: Can we move these into another operation */ \
1250 t8 = -t8; \
1251 t9 = -t9; \
1252 ta = -ta; \
1253 tb = -tb; \
1254 td = -td; \
1255 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1256 OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 136); \
1257 t1 -= (te*13573 + 8192) >> 14; \
1258 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1259 OD_DCT_OVERFLOW_CHECK(t1, 11585, 16384, 137); \
1260 te += (t1*11585 + 16384) >> 15; \
1261 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1262 OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 138); \
1263 t1 -= (te*13573 + 8192) >> 14; \
1264 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1265 OD_DCT_OVERFLOW_CHECK(td, 4161, 8192, 139); \
1266 t2 += (td*4161 + 8192) >> 14; \
1267 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1268 OD_DCT_OVERFLOW_CHECK(t2, 15137, 8192, 140); \
1269 td -= (t2*15137 + 8192) >> 14; \
1270 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1271 OD_DCT_OVERFLOW_CHECK(td, 14341, 8192, 141); \
1272 t2 += (td*14341 + 8192) >> 14; \
1273 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1274 OD_DCT_OVERFLOW_CHECK(t3, 14341, 8192, 142); \
1275 tc -= (t3*14341 + 8192) >> 14; \
1276 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1277 OD_DCT_OVERFLOW_CHECK(tc, 15137, 8192, 143); \
1278 t3 += (tc*15137 + 8192) >> 14; \
1279 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1280 OD_DCT_OVERFLOW_CHECK(t3, 4161, 8192, 144); \
1281 tc -= (t3*4161 + 8192) >> 14; \
1282 te = t0h - te; \
1283 t0 -= te; \
1284 tf = OD_DCT_RSHIFT(t1, 1) - tf; \
1285 t1 -= tf; \
1286 /* TODO: Can we move this into another operation */ \
1287 tc = -tc; \
1288 t2 = OD_DCT_RSHIFT(tc, 1) - t2; \
1289 tc -= t2; \
1290 t3 = OD_DCT_RSHIFT(td, 1) - t3; \
1291 td = t3 - td; \
1292 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1293 OD_DCT_OVERFLOW_CHECK(t6, 7489, 4096, 145); \
1294 t9 -= (t6*7489 + 4096) >> 13; \
1295 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1296 OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 146); \
1297 t6 += (t9*11585 + 8192) >> 14; \
1298 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1299 OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 147); \
1300 t9 += (t6*19195 + 16384) >> 15; \
1301 t8 += OD_DCT_RSHIFT(t9, 1); \
1302 t9 -= t8; \
1303 t6 = t7h - t6; \
1304 t7 -= t6; \
1305 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1306 OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 148); \
1307 t8 += (t7*6723 + 4096) >> 13; \
1308 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1309 OD_DCT_OVERFLOW_CHECK(t8, 16069, 8192, 149); \
1310 t7 -= (t8*16069 + 8192) >> 14; \
1311 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1312 OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 150); \
1313 t8 += (t7*6723 + 4096) >> 13; \
1314 /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1315 OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 151); \
1316 t9 += (t6*17515 + 16384) >> 15; \
1317 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1318 OD_DCT_OVERFLOW_CHECK(t9, 13623, 8192, 152); \
1319 t6 -= (t9*13623 + 8192) >> 14; \
1320 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1321 OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 153); \
1322 t9 += (t6*17515 + 16384) >> 15; \
1323 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1324 OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 154); \
1325 t5 += (ta*13573 + 8192) >> 14; \
1326 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1327 OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 155); \
1328 ta -= (t5*11585 + 16384) >> 15; \
1329 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1330 OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 156); \
1331 t5 += (ta*13573 + 8192) >> 14; \
1332 tb += OD_DCT_RSHIFT(t5, 1); \
1333 t5 = tb - t5; \
1334 ta += t4h; \
1335 t4 -= ta; \
1336 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1337 OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 157); \
1338 ta += (t5*2485 + 4096) >> 13; \
1339 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1340 OD_DCT_OVERFLOW_CHECK(ta, 18205, 16384, 158); \
1341 t5 -= (ta*18205 + 16384) >> 15; \
1342 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1343 OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 159); \
1344 ta += (t5*2485 + 4096) >> 13; \
1345 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1346 OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 160); \
1347 tb -= (t4*6723 + 4096) >> 13; \
1348 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1349 OD_DCT_OVERFLOW_CHECK(tb, 16069, 8192, 161); \
1350 t4 += (tb*16069 + 8192) >> 14; \
1351 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1352 OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 162); \
1353 tb -= (t4*6723 + 4096) >> 13; \
1354 /* TODO: Can we move this into another operation */ \
1355 t5 = -t5; \
1356 tc -= tf; \
1357 tch = OD_DCT_RSHIFT(tc, 1); \
1358 tf += tch; \
1359 t3 += t0; \
1360 t3h = OD_DCT_RSHIFT(t3, 1); \
1361 t0 -= t3h; \
1362 td -= t1; \
1363 tdh = OD_DCT_RSHIFT(td, 1); \
1364 t1 += tdh; \
1365 t2 += te; \
1366 t2h = OD_DCT_RSHIFT(t2, 1); \
1367 te -= t2h; \
1368 t8 += t4; \
1369 t8h = OD_DCT_RSHIFT(t8, 1); \
1370 t4 = t8h - t4; \
1371 t7 = tb - t7; \
1372 t7h = OD_DCT_RSHIFT(t7, 1); \
1373 tb = t7h - tb; \
1374 t6 -= ta; \
1375 t6h = OD_DCT_RSHIFT(t6, 1); \
1376 ta += t6h; \
1377 t9 = t5 - t9; \
1378 t9h = OD_DCT_RSHIFT(t9, 1); \
1379 t5 -= t9h; \
1380 t0 -= t7h; \
1381 t7 += t0; \
1382 tf += t8h; \
1383 t8 -= tf; \
1384 te -= t6h; \
1385 t6 += te; \
1386 t1 += t9h; \
1387 t9 -= t1; \
1388 tb -= tch; \
1389 tc += tb; \
1390 t4 += t3h; \
1391 t3 -= t4; \
1392 ta -= tdh; \
1393 td += ta; \
1394 t5 = t2h - t5; \
1395 t2 -= t5; \
1396 /* TODO: Can we move these into another operation */ \
1397 t8 = -t8; \
1398 t9 = -t9; \
1399 ta = -ta; \
1400 tb = -tb; \
1401 tc = -tc; \
1402 td = -td; \
1403 tf = -tf; \
1404 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1405 OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 163); \
1406 t0 -= (tf*7799 + 4096) >> 13; \
1407 /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
1408 OD_DCT_OVERFLOW_CHECK(t0, 4091, 2048, 164); \
1409 tf += (t0*4091 + 2048) >> 12; \
1410 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1411 OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 165); \
1412 t0 -= (tf*7799 + 4096) >> 13; \
1413 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1414 OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 166); \
1415 t1 += (te*2417 + 16384) >> 15; \
1416 /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
1417 OD_DCT_OVERFLOW_CHECK(t1, 601, 2048, 167); \
1418 te -= (t1*601 + 2048) >> 12; \
1419 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1420 OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 168); \
1421 t1 += (te*2417 + 16384) >> 15; \
1422 /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1423 OD_DCT_OVERFLOW_CHECK(t8, 14525, 16384, 169); \
1424 t7 -= (t8*14525 + 16384) >> 15; \
1425 /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
1426 OD_DCT_OVERFLOW_CHECK(t7, 3035, 2048, 170); \
1427 t8 += (t7*3035 + 2048) >> 12; \
1428 /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1429 OD_DCT_OVERFLOW_CHECK(t8, 7263, 8192, 171); \
1430 t7 -= (t8*7263 + 8192) >> 14; \
1431 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1432 OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 172); \
1433 t2 -= (td*6393 + 4096) >> 13; \
1434 /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
1435 OD_DCT_OVERFLOW_CHECK(t2, 3973, 2048, 173); \
1436 td += (t2*3973 + 2048) >> 12; \
1437 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1438 OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 174); \
1439 t2 -= (td*6393 + 4096) >> 13; \
1440 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1441 OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 175); \
1442 t5 -= (ta*9281 + 8192) >> 14; \
1443 /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
1444 OD_DCT_OVERFLOW_CHECK(t5, 7027, 4096, 176); \
1445 ta += (t5*7027 + 4096) >> 13; \
1446 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1447 OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 177); \
1448 t5 -= (ta*9281 + 8192) >> 14; \
1449 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1450 OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 178); \
1451 t3 -= (tc*11539 + 8192) >> 14; \
1452 /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
1453 OD_DCT_OVERFLOW_CHECK(t3, 7713, 4096, 179); \
1454 tc += (t3*7713 + 4096) >> 13; \
1455 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1456 OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 180); \
1457 t3 -= (tc*11539 + 8192) >> 14; \
1458 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1459 OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 181); \
1460 t4 -= (tb*10375 + 8192) >> 14; \
1461 /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
1462 OD_DCT_OVERFLOW_CHECK(t4, 7405, 4096, 182); \
1463 tb += (t4*7405 + 4096) >> 13; \
1464 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1465 OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 183); \
1466 t4 -= (tb*10375 + 8192) >> 14; \
1467 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1468 OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 184); \
1469 t6 -= (t9*8247 + 8192) >> 14; \
1470 /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
1471 OD_DCT_OVERFLOW_CHECK(t6, 1645, 1024, 185); \
1472 t9 += (t6*1645 + 1024) >> 11; \
1473 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1474 OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 186); \
1475 t6 -= (t9*8247 + 8192) >> 14; \
1476 } \
1477 while (0)
1478
1479#define OD_IDST_16_ASYM(t0, t0h, t8, t4, tc, t2, t2h, ta, t6, te, teh, \
1480 t1, t9, t5, td, t3, tb, t7, tf) \
1481 /* Embedded 16-point asymmetric Type-IV iDST. */ \
1482 do { \
1483 int t1h_; \
1484 int t3h_; \
1485 int t4h; \
1486 int t6h; \
1487 int t9h_; \
1488 int tbh_; \
1489 int tch; \
1490 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1491 t6 += (t9*8247 + 8192) >> 14; \
1492 /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
1493 t9 -= (t6*1645 + 1024) >> 11; \
1494 /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
1495 t6 += (t9*8247 + 8192) >> 14; \
1496 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1497 t2 += (td*10375 + 8192) >> 14; \
1498 /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
1499 td -= (t2*7405 + 4096) >> 13; \
1500 /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
1501 t2 += (td*10375 + 8192) >> 14; \
1502 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1503 tc += (t3*11539 + 8192) >> 14; \
1504 /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
1505 t3 -= (tc*7713 + 4096) >> 13; \
1506 /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
1507 tc += (t3*11539 + 8192) >> 14; \
1508 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1509 ta += (t5*9281 + 8192) >> 14; \
1510 /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
1511 t5 -= (ta*7027 + 4096) >> 13; \
1512 /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
1513 ta += (t5*9281 + 8192) >> 14; \
1514 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1515 t4 += (tb*6393 + 4096) >> 13; \
1516 /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
1517 tb -= (t4*3973 + 2048) >> 12; \
1518 /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
1519 t4 += (tb*6393 + 4096) >> 13; \
1520 /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1521 te += (t1*7263 + 8192) >> 14; \
1522 /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
1523 t1 -= (te*3035 + 2048) >> 12; \
1524 /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
1525 te += (t1*14525 + 16384) >> 15; \
1526 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1527 t8 -= (t7*2417 + 16384) >> 15; \
1528 /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
1529 t7 += (t8*601 + 2048) >> 12; \
1530 /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
1531 t8 -= (t7*2417 + 16384) >> 15; \
1532 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1533 t0 += (tf*7799 + 4096) >> 13; \
1534 /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
1535 tf -= (t0*4091 + 2048) >> 12; \
1536 /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
1537 t0 += (tf*7799 + 4096) >> 13; \
1538 /* TODO: Can we move these into another operation */ \
1539 t1 = -t1; \
1540 t3 = -t3; \
1541 t5 = -t5; \
1542 t9 = -t9; \
1543 tb = -tb; \
1544 td = -td; \
1545 tf = -tf; \
1546 t4 += ta; \
1547 t4h = OD_DCT_RSHIFT(t4, 1); \
1548 ta = t4h - ta; \
1549 tb -= t5; \
1550 tbh_ = OD_DCT_RSHIFT(tb, 1); \
1551 t5 += tbh_; \
1552 tc += t2; \
1553 tch = OD_DCT_RSHIFT(tc, 1); \
1554 t2 -= tch; \
1555 t3 -= td; \
1556 t3h_ = OD_DCT_RSHIFT(t3, 1); \
1557 td += t3h_; \
1558 t9 += t8; \
1559 t9h_ = OD_DCT_RSHIFT(t9, 1); \
1560 t8 -= t9h_; \
1561 t6 -= t7; \
1562 t6h = OD_DCT_RSHIFT(t6, 1); \
1563 t7 += t6h; \
1564 t1 += tf; \
1565 t1h_ = OD_DCT_RSHIFT(t1, 1); \
1566 tf -= t1h_; \
1567 te -= t0; \
1568 teh = OD_DCT_RSHIFT(te, 1); \
1569 t0 += teh; \
1570 ta += t9h_; \
1571 t9 = ta - t9; \
1572 t5 -= t6h; \
1573 t6 += t5; \
1574 td = teh - td; \
1575 te = td - te; \
1576 t2 = t1h_ - t2; \
1577 t1 -= t2; \
1578 t7 += t4h; \
1579 t4 -= t7; \
1580 t8 -= tbh_; \
1581 tb += t8; \
1582 t0 += tch; \
1583 tc -= t0; \
1584 tf -= t3h_; \
1585 t3 += tf; \
1586 /* TODO: Can we move this into another operation */ \
1587 ta = -ta; \
1588 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1589 td += (t2*6723 + 4096) >> 13; \
1590 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1591 t2 -= (td*16069 + 8192) >> 14; \
1592 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
1593 td += (t2*6723 + 4096) >> 13; \
1594 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1595 t5 -= (ta*2485 + 4096) >> 13; \
1596 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
1597 ta += (t5*18205 + 16384) >> 15; \
1598 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
1599 t5 -= (ta*2485 + 4096) >> 13; \
1600 t2 += t5; \
1601 t2h = OD_DCT_RSHIFT(t2, 1); \
1602 t5 -= t2h; \
1603 ta = td - ta; \
1604 td -= OD_DCT_RSHIFT(ta, 1); \
1605 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1606 ta -= (t5*13573 + 8192) >> 14; \
1607 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1608 t5 += (ta*11585 + 16384) >> 15; \
1609 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1610 ta -= (t5*13573 + 8192) >> 14; \
1611 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
1612 t9 -= (t6*17515 + 16384) >> 15; \
1613 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
1614 t6 += (t9*13623 + 8192) >> 14; \
1615 /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
1616 t9 -= (t6*17515 + 16384) >> 15; \
1617 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1618 t1 -= (te*6723 + 4096) >> 13; \
1619 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
1620 te += (t1*16069 + 8192) >> 14; \
1621 /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
1622 t1 -= (te*6723 + 4096) >> 13; \
1623 te += t6; \
1624 teh = OD_DCT_RSHIFT(te, 1); \
1625 t6 = teh - t6; \
1626 t9 += t1; \
1627 t1 -= OD_DCT_RSHIFT(t9, 1); \
1628 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
1629 t9 -= (t6*19195 + 16384) >> 15; \
1630 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
1631 t6 -= (t9*11585 + 8192) >> 14; \
1632 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
1633 t9 += (t6*7489 + 4096) >> 13; \
1634 tb = tc - tb; \
1635 tc = OD_DCT_RSHIFT(tb, 1) - tc; \
1636 t3 += t4; \
1637 t4 = OD_DCT_RSHIFT(t3, 1) - t4; \
1638 /* TODO: Can we move this into another operation */ \
1639 t3 = -t3; \
1640 t8 += tf; \
1641 tf = OD_DCT_RSHIFT(t8, 1) - tf; \
1642 t0 += t7; \
1643 t0h = OD_DCT_RSHIFT(t0, 1); \
1644 t7 = t0h - t7; \
1645 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1646 t3 += (tc*4161 + 8192) >> 14; \
1647 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1648 tc -= (t3*15137 + 8192) >> 14; \
1649 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1650 t3 += (tc*14341 + 8192) >> 14; \
1651 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
1652 t4 -= (tb*14341 + 8192) >> 14; \
1653 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
1654 tb += (t4*15137 + 8192) >> 14; \
1655 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
1656 t4 -= (tb*4161 + 8192) >> 14; \
1657 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1658 t8 += (t7*13573 + 8192) >> 14; \
1659 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
1660 t7 -= (t8*11585 + 16384) >> 15; \
1661 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
1662 t8 += (t7*13573 + 8192) >> 14; \
1663 /* TODO: Can we move these into another operation */ \
1664 t1 = -t1; \
1665 t5 = -t5; \
1666 t9 = -t9; \
1667 tb = -tb; \
1668 td = -td; \
1669 } \
1670 while (0)
1671
1672#define OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
1673 te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1674 /* Embedded 32-point orthonormal Type-II fDCT. */ \
1675 do { \
1676 int tgh; \
1677 int thh; \
1678 int tih; \
1679 int tkh; \
1680 int tmh; \
1681 int tnh; \
1682 int toh; \
1683 int tqh; \
1684 int tsh; \
1685 int tuh; \
1686 int tvh; \
1687 tv = t0 - tv; \
1688 tvh = OD_DCT_RSHIFT(tv, 1); \
1689 t0 -= tvh; \
1690 tu += t1; \
1691 tuh = OD_DCT_RSHIFT(tu, 1); \
1692 t1 = tuh - t1; \
1693 tt = t2 - tt; \
1694 t2 -= OD_DCT_RSHIFT(tt, 1); \
1695 ts += t3; \
1696 tsh = OD_DCT_RSHIFT(ts, 1); \
1697 t3 = tsh - t3; \
1698 tr = t4 - tr; \
1699 t4 -= OD_DCT_RSHIFT(tr, 1); \
1700 tq += t5; \
1701 tqh = OD_DCT_RSHIFT(tq, 1); \
1702 t5 = tqh - t5; \
1703 tp = t6 - tp; \
1704 t6 -= OD_DCT_RSHIFT(tp, 1); \
1705 to += t7; \
1706 toh = OD_DCT_RSHIFT(to, 1); \
1707 t7 = toh - t7; \
1708 tn = t8 - tn; \
1709 tnh = OD_DCT_RSHIFT(tn, 1); \
1710 t8 -= tnh; \
1711 tm += t9; \
1712 tmh = OD_DCT_RSHIFT(tm, 1); \
1713 t9 = tmh - t9; \
1714 tl = ta - tl; \
1715 ta -= OD_DCT_RSHIFT(tl, 1); \
1716 tk += tb; \
1717 tkh = OD_DCT_RSHIFT(tk, 1); \
1718 tb = tkh - tb; \
1719 tj = tc - tj; \
1720 tc -= OD_DCT_RSHIFT(tj, 1); \
1721 ti += td; \
1722 tih = OD_DCT_RSHIFT(ti, 1); \
1723 td = tih - td; \
1724 th = te - th; \
1725 thh = OD_DCT_RSHIFT(th, 1); \
1726 te -= thh; \
1727 tg += tf; \
1728 tgh = OD_DCT_RSHIFT(tg, 1); \
1729 tf = tgh - tf; \
1730 OD_FDCT_16_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
1731 t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh); \
1732 OD_FDST_16_ASYM(tv, tvh, tf, tn, tnh, t7, tr, tb, tj, t3, \
1733 tt, td, tl, t5, tp, t9, th, thh, t1); \
1734 } \
1735 while (0)
1736
1737#define OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
1738 te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
1739 /* Embedded 32-point orthonormal Type-II iDCT. */ \
1740 do { \
1741 int t1h; \
1742 int t3h; \
1743 int t5h; \
1744 int t7h; \
1745 int t9h; \
1746 int tbh; \
1747 int tdh; \
1748 int tfh; \
1749 int thh; \
1750 int tth; \
1751 int tvh; \
1752 OD_IDST_16_ASYM(tv, tvh, tn, tr, tj, tt, tth, tl, tp, th, thh, \
1753 tu, tm, tq, ti, ts, tk, to, tg); \
1754 OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
1755 t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh); \
1756 tu = t1h - tu; \
1757 t1 -= tu; \
1758 te += thh; \
1759 th = te - th; \
1760 tm = t9h - tm; \
1761 t9 -= tm; \
1762 t6 += OD_DCT_RSHIFT(tp, 1); \
1763 tp = t6 - tp; \
1764 tq = t5h - tq; \
1765 t5 -= tq; \
1766 ta += OD_DCT_RSHIFT(tl, 1); \
1767 tl = ta - tl; \
1768 ti = tdh - ti; \
1769 td -= ti; \
1770 t2 += tth; \
1771 tt = t2 - tt; \
1772 ts = t3h - ts; \
1773 t3 -= ts; \
1774 tc += OD_DCT_RSHIFT(tj, 1); \
1775 tj = tc - tj; \
1776 tk = tbh - tk; \
1777 tb -= tk; \
1778 t4 += OD_DCT_RSHIFT(tr, 1); \
1779 tr = t4 - tr; \
1780 to = t7h - to; \
1781 t7 -= to; \
1782 t8 += OD_DCT_RSHIFT(tn, 1); \
1783 tn = t8 - tn; \
1784 tg = tfh - tg; \
1785 tf -= tg; \
1786 t0 += tvh; \
1787 tv = t0 - tv; \
1788 } \
1789 while (0)
1790
Nathan E. Eggef73e47e2017-10-22 06:41:55 -04001791/* Embedded 32-point orthonormal Type-IV fDST. */
1792#define OD_FDST_32(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, \
1793 te, tf, tg, th, ti, tj, tk, tl, tm, tn, to, tp, tq, tr, ts, tt, tu, tv) \
1794 /* 117 "muls", 117 + 128 = 245 adds, 36 shifts */ \
1795 do { \
1796 od_coeff t0h; \
1797 od_coeff t1h; \
1798 od_coeff t2h; \
1799 od_coeff t3h; \
1800 od_coeff t4h; \
1801 od_coeff t6h; \
1802 od_coeff t8h; \
1803 od_coeff t9h; \
1804 od_coeff tah; \
1805 od_coeff tbh; \
1806 od_coeff tch; \
1807 od_coeff tdh; \
1808 od_coeff teh; \
1809 od_coeff tfh; \
1810 od_coeff tgh; \
1811 od_coeff thh; \
1812 od_coeff tih; \
1813 od_coeff tjh; \
1814 od_coeff tkh; \
1815 od_coeff tlh; \
1816 od_coeff tmh; \
1817 od_coeff tnh; \
1818 od_coeff tph; \
1819 od_coeff trh; \
1820 od_coeff tsh; \
1821 od_coeff tth; \
1822 od_coeff tuh; \
1823 od_coeff tvh; \
1824 /* Stage 0 */ \
1825 tp += (t6*659 + 2048) >> 12; \
1826 t6 -= (tp*10279 + 16384) >> 15; \
1827 tp += (t6*659 + 2048) >> 12; \
1828 th += (te*3045 + 4096) >> 13; \
1829 te -= (th*21403 + 16384) >> 15; \
1830 th += (te*3045 + 4096) >> 13; \
1831 t9 += (tm*20191 + 16384) >> 15; \
1832 tm -= (t9*29269 + 16384) >> 15; \
1833 t9 += (tm*20191 + 16384) >> 15; \
1834 tu += (t1*1207 + 16384) >> 15; \
1835 t1 -= (tu*2411 + 16384) >> 15; \
1836 tu += (t1*1207 + 16384) >> 15; \
1837 t4 += (tr*13113 + 8192) >> 14; \
1838 tr -= (t4*7993 + 4096) >> 13; \
1839 t4 += (tr*13113 + 8192) >> 14; \
1840 tj += (tc*10381 + 16384) >> 15; \
1841 tc -= (tj*4717 + 4096) >> 13; \
1842 tj += (tc*10381 + 16384) >> 15; \
1843 tb += (tk*18035 + 16384) >> 15; \
1844 tk -= (tb*6921 + 4096) >> 13; \
1845 tb += (tk*18035 + 16384) >> 15; \
1846 ts += (t3*1411 + 8192) >> 14; \
1847 t3 -= (ts*2801 + 8192) >> 14; \
1848 ts += (t3*1411 + 8192) >> 14; \
1849 tq += (t5*2225 + 8192) >> 14; \
1850 t5 -= (tq*2185 + 4096) >> 13; \
1851 tq += (t5*2225 + 8192) >> 14; \
1852 ti += (td*11273 + 16384) >> 15; \
1853 td -= (ti*315 + 256) >> 9; \
1854 ti += (td*11273 + 16384) >> 15; \
1855 tl += (ta*8637 + 16384) >> 15; \
1856 ta -= (tl*16151 + 16384) >> 15; \
1857 tl += (ta*8637 + 16384) >> 15; \
1858 tt += (t2*2013 + 16384) >> 15; \
1859 t2 -= (tt*4011 + 16384) >> 15; \
1860 tt += (t2*2013 + 16384) >> 15; \
1861 to += (t7*6101 + 16384) >> 15; \
1862 t7 -= (to*11793 + 16384) >> 15; \
1863 to += (t7*6101 + 16384) >> 15; \
1864 t8 += (tn*10659 + 8192) >> 14; \
1865 tn -= (t8*29957 + 16384) >> 15; \
1866 t8 += (tn*10659 + 8192) >> 14; \
1867 tg += (tf*819 + 1024) >> 11; \
1868 tf -= (tg*22595 + 16384) >> 15; \
1869 tg += (tf*819 + 1024) >> 11; \
1870 t0 += (tv*31973 + 16384) >> 15; \
1871 tv -= (t0*16379 + 8192) >> 14; \
1872 t0 += (tv*31973 + 16384) >> 15; \
1873 /* Stage 1 */ \
1874 tj -= ts; \
1875 tjh = OD_DCT_RSHIFT(tj, 1); \
1876 ts += tjh; \
1877 tr = tk - tr; \
1878 trh = OD_DCT_RSHIFT(tr, 1); \
1879 tk = trh - tk; \
1880 tc += t3; \
1881 tch = OD_DCT_RSHIFT(tc, 1); \
1882 t3 -= tch; \
1883 t4 += tb; \
1884 t4h = OD_DCT_RSHIFT(t4, 1); \
1885 tb -= t4h; \
1886 tv += tf; \
1887 tvh = OD_DCT_RSHIFT(tv, 1); \
1888 tf -= tvh; \
1889 t8 -= to; \
1890 t8h = OD_DCT_RSHIFT(t8, 1); \
1891 to += t8h; \
1892 t0 += tg; \
1893 t0h = OD_DCT_RSHIFT(t0, 1); \
1894 tg -= t0h; \
1895 tn = t7 - tn; \
1896 tnh = OD_DCT_RSHIFT(tn, 1); \
1897 t7 -= tnh; \
1898 th -= tu; \
1899 thh = OD_DCT_RSHIFT(th, 1); \
1900 tu += thh; \
1901 t6 += tm; \
1902 t6h = OD_DCT_RSHIFT(t6, 1); \
1903 tm = t6h - tm; \
1904 te += t1; \
1905 teh = OD_DCT_RSHIFT(te, 1); \
1906 t1 -= teh; \
1907 tp += t9; \
1908 tph = OD_DCT_RSHIFT(tp, 1); \
1909 t9 -= tph; \
1910 t2 -= td; \
1911 t2h = OD_DCT_RSHIFT(t2, 1); \
1912 td += t2h; \
1913 tl = tq - tl; \
1914 tlh = OD_DCT_RSHIFT(tl, 1); \
1915 tq -= tlh; \
1916 tt += ti; \
1917 tth = OD_DCT_RSHIFT(tt, 1); \
1918 ti -= tth; \
1919 ta += t5; \
1920 tah = OD_DCT_RSHIFT(ta, 1); \
1921 t5 -= tah; \
1922 /* Stage 2 */ \
1923 tm -= thh; \
1924 th += tm; \
1925 t9 = teh - t9; \
1926 te -= t9; \
1927 td = tlh - td; \
1928 tl -= td; \
1929 ti += tah; \
1930 ta -= ti; \
1931 tk = tjh - tk; \
1932 tj -= tk; \
1933 tb -= tch; \
1934 tc += tb; \
1935 tg += tnh; \
1936 tn = tg - tn; \
1937 tf += t8h; \
1938 t8 = tf - t8; \
1939 t3 -= trh; \
1940 tr += t3; \
1941 ts += t4h; \
1942 t4 -= ts; \
1943 to -= t0h; \
1944 t0 += to; \
1945 t7 = tvh - t7; \
1946 tv = t7 - tv; \
1947 t1 -= t6h; \
1948 t6 += t1; \
1949 tu += tph; \
1950 tp -= tu; \
1951 tq -= tth; \
1952 tt += tq; \
1953 t5 += t2h; \
1954 t2 -= t5; \
1955 /* Stage 3 */ \
1956 tj += (tc*11725 + 16384) >> 15; \
1957 tc -= (tj*5197 + 4096) >> 13; \
1958 tj += (tc*11725 + 16384) >> 15; \
1959 td += (ti*513 + 1024) >> 11; \
1960 ti -= (td*15447 + 16384) >> 15; \
1961 td += (ti*513 + 1024) >> 11; \
1962 th += (te*4861 + 16384) >> 15; \
1963 te -= (th*1189 + 2048) >> 12; \
1964 th += (te*4861 + 16384) >> 15; \
1965 tg += (tf*805 + 8192) >> 14; \
1966 tf -= (tg*803 + 4096) >> 13; \
1967 tg += (tf*805 + 8192) >> 14; \
1968 tb += (tk*7749 + 8192) >> 14; \
1969 tk -= (tb*12665 + 8192) >> 14; \
1970 tb += (tk*7749 + 8192) >> 14; \
1971 tl += (ta*2455 + 2048) >> 12; \
1972 ta -= (tl*28899 + 16384) >> 15; \
1973 tl += (ta*2455 + 2048) >> 12; \
1974 t9 += (tm*12151 + 8192) >> 14; \
1975 tm -= (t9*31357 + 16384) >> 15; \
1976 t9 += (tm*12151 + 8192) >> 14; \
1977 tn += (t8*29699 + 16384) >> 15; \
1978 t8 -= (tn*16305 + 8192) >> 14; \
1979 tn += (t8*29699 + 16384) >> 15; \
1980 /* Stage 4 */ \
1981 tf -= tc; \
1982 tfh = OD_DCT_RSHIFT(tf, 1); \
1983 tc += tfh; \
1984 ti = th - ti; \
1985 tih = OD_DCT_RSHIFT(ti, 1); \
1986 th -= tih; \
1987 tg += tj; \
1988 tgh = OD_DCT_RSHIFT(tg, 1); \
1989 tj = tgh - tj; \
1990 td -= te; \
1991 tdh = OD_DCT_RSHIFT(td, 1); \
1992 te += tdh; \
1993 tm = ta - tm; \
1994 tmh = OD_DCT_RSHIFT(tm, 1); \
1995 ta = tmh - ta; \
1996 t9 += tl; \
1997 t9h = OD_DCT_RSHIFT(t9, 1); \
1998 tl -= t9h; \
1999 tb += t8; \
2000 tbh = OD_DCT_RSHIFT(tb, 1); \
2001 t8 -= tbh; \
2002 tk += tn; \
2003 tkh = OD_DCT_RSHIFT(tk, 1); \
2004 tn -= tkh; \
2005 t1 -= t2; \
2006 t1h = OD_DCT_RSHIFT(t1, 1); \
2007 t2 += t1h; \
2008 t3 += tv; \
2009 t3h = OD_DCT_RSHIFT(t3, 1); \
2010 tv -= t3h; \
2011 tu += tt; \
2012 tuh = OD_DCT_RSHIFT(tu, 1); \
2013 tt -= tuh; \
2014 ts -= t0; \
2015 tsh = OD_DCT_RSHIFT(ts, 1); \
2016 t0 += tsh; \
2017 tq = t6 - tq; \
2018 t6 -= OD_DCT_RSHIFT(tq, 1); \
2019 to += tr; \
2020 tr = OD_DCT_RSHIFT(to, 1) - tr; \
2021 t7 = t4 - t7; \
2022 t4 -= OD_DCT_RSHIFT(t7, 1); \
2023 t5 -= tp; \
2024 tp += OD_DCT_RSHIFT(t5, 1); \
2025 /* Stage 5 */ \
2026 tp += (t6*2485 + 4096) >> 13; \
2027 t6 -= (tp*18205 + 16384) >> 15; \
2028 tp += (t6*2485 + 4096) >> 13; \
2029 to += (t7*3227 + 16384) >> 15; \
2030 t7 -= (to*6393 + 16384) >> 15; \
2031 to += (t7*3227 + 16384) >> 15; \
2032 tq += (t5*17515 + 16384) >> 15; \
2033 t5 -= (tq*13623 + 8192) >> 14; \
2034 tq += (t5*17515 + 16384) >> 15; \
2035 t4 += (tr*6723 + 4096) >> 13; \
2036 tr -= (t4*16069 + 8192) >> 14; \
2037 t4 += (tr*6723 + 4096) >> 13; \
2038 /* Stage 6 */ \
2039 tj += tdh; \
2040 td -= tj; \
2041 tc -= tih; \
2042 ti += tc; \
2043 th = tgh - th; \
2044 tg -= th; \
2045 te += tfh; \
2046 tf -= te; \
2047 tl = tkh - tl; \
2048 tk -= tl; \
2049 ta += tbh; \
2050 tb -= ta; \
2051 tn -= tmh; \
2052 tm += tn; \
2053 t8 += t9h; \
2054 t9 = t8 - t9; \
2055 tt = t3h - tt; \
2056 t3 -= tt; \
2057 t2 -= tsh; \
2058 ts += t2; \
2059 tv -= t1h; \
2060 t1 += tv; \
2061 t0 += tuh; \
2062 tu -= t0; \
2063 tp = OD_DCT_RSHIFT(to, 1) - tp; \
2064 to -= tp; \
2065 t6 += OD_DCT_RSHIFT(t7, 1); \
2066 t7 -= t6; \
2067 t4 = OD_DCT_RSHIFT(tq, 1) - t4; \
2068 tq -= t4; \
2069 tr += OD_DCT_RSHIFT(t5, 1); \
2070 t5 = tr - t5; \
2071 /* Stage 7 */ \
2072 td += (ti*21894 + 16384) >> 15; \
2073 ti -= (td*15137 + 8192) >> 14; \
2074 td += (ti*21895 + 16384) >> 15; \
2075 tj += (tc*21894 + 16384) >> 15; \
2076 tc -= (tj*15137 + 8192) >> 14; \
2077 tj += (tc*21895 + 16384) >> 15; \
2078 th += (te*13573 + 16384) >> 15; \
2079 te -= (th*11585 + 8192) >> 14; \
2080 th += (te*13573 + 16384) >> 15; \
2081 tb += (tk*21894 + 16384) >> 15; \
2082 tk -= (tb*15137 + 8192) >> 14; \
2083 tb += (tk*21895 + 16384) >> 15; \
2084 ta += (tl*3259 + 8192) >> 14; \
2085 tl -= (ta*3135 + 4096) >> 13; \
2086 ta += (tl*3259 + 8192) >> 14; \
2087 t9 += (tm*13573 + 16384) >> 15; \
2088 tm -= (t9*11585 + 8192) >> 14; \
2089 t9 += (tm*13573 + 16384) >> 15; \
2090 ts += (t3*3259 + 8192) >> 14; \
2091 t3 -= (ts*3135 + 4096) >> 13; \
2092 ts += (t3*3259 + 8192) >> 14; \
2093 t2 += (tt*3259 + 8192) >> 14; \
2094 tt -= (t2*3135 + 4096) >> 13; \
2095 t2 += (tt*3259 + 8192) >> 14; \
2096 tu += (t1*13573 + 16384) >> 15; \
2097 t1 -= (tu*11585 + 8192) >> 14; \
2098 tu += (t1*13573 + 16384) >> 15; \
2099 tp += (t6*13573 + 16384) >> 15; \
2100 t6 -= (tp*11585 + 8192) >> 14; \
2101 tp += (t6*13573 + 16384) >> 15; \
2102 tq += (t5*13573 + 16384) >> 15; \
2103 t5 -= (tq*11585 + 8192) >> 14; \
2104 tq += (t5*13573 + 16384) >> 15; \
2105 } \
2106 while (0)
2107
2108/* Embedded 32-point orthonormal Type-IV iDST. */
2109#define OD_IDST_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
2110 te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
2111 /* 117 "muls", 117 + 128 = 245 adds, 36 shifts */ \
2112 do { \
2113 od_coeff t0h; \
2114 od_coeff t1h; \
2115 od_coeff t2h; \
2116 od_coeff t3h; \
2117 od_coeff t4h; \
2118 od_coeff t6h; \
2119 od_coeff t8h; \
2120 od_coeff t9h; \
2121 od_coeff tah; \
2122 od_coeff tbh; \
2123 od_coeff tch; \
2124 od_coeff tdh; \
2125 od_coeff teh; \
2126 od_coeff tfh; \
2127 od_coeff tgh; \
2128 od_coeff thh; \
2129 od_coeff tih; \
2130 od_coeff tjh; \
2131 od_coeff tkh; \
2132 od_coeff tlh; \
2133 od_coeff tmh; \
2134 od_coeff tnh; \
2135 od_coeff tph; \
2136 od_coeff trh; \
2137 od_coeff tsh; \
2138 od_coeff tth; \
2139 od_coeff tuh; \
2140 od_coeff tvh; \
2141 /* Stage 0 */ \
2142 tq -= (t5*13573 + 16384) >> 15; \
2143 t5 += (tq*11585 + 8192) >> 14; \
2144 tq -= (t5*13573 + 16384) >> 15; \
2145 tp -= (t6*13573 + 16384) >> 15; \
2146 t6 += (tp*11585 + 8192) >> 14; \
2147 tp -= (t6*13573 + 16384) >> 15; \
2148 tu -= (t1*13573 + 16384) >> 15; \
2149 t1 += (tu*11585 + 8192) >> 14; \
2150 tu -= (t1*13573 + 16384) >> 15; \
2151 t2 -= (tt*3259 + 8192) >> 14; \
2152 tt += (t2*3135 + 4096) >> 13; \
2153 t2 -= (tt*3259 + 8192) >> 14; \
2154 ts -= (t3*3259 + 8192) >> 14; \
2155 t3 += (ts*3135 + 4096) >> 13; \
2156 ts -= (t3*3259 + 8192) >> 14; \
2157 t9 -= (tm*13573 + 16384) >> 15; \
2158 tm += (t9*11585 + 8192) >> 14; \
2159 t9 -= (tm*13573 + 16384) >> 15; \
2160 ta -= (tl*3259 + 8192) >> 14; \
2161 tl += (ta*3135 + 4096) >> 13; \
2162 ta -= (tl*3259 + 8192) >> 14; \
2163 tb -= (tk*21895 + 16384) >> 15; \
2164 tk += (tb*15137 + 8192) >> 14; \
2165 tb -= (tk*21894 + 16384) >> 15; \
2166 th -= (te*13573 + 16384) >> 15; \
2167 te += (th*11585 + 8192) >> 14; \
2168 th -= (te*13573 + 16384) >> 15; \
2169 tj -= (tc*21895 + 16384) >> 15; \
2170 tc += (tj*15137 + 8192) >> 14; \
2171 tj -= (tc*21894 + 16384) >> 15; \
2172 td -= (ti*21895 + 16384) >> 15; \
2173 ti += (td*15137 + 8192) >> 14; \
2174 td -= (ti*21894 + 16384) >> 15; \
2175 /* Stage 1 */ \
2176 t5 = tr - t5; \
2177 tr -= OD_DCT_RSHIFT(t5, 1); \
2178 tq += t4; \
2179 t4 = OD_DCT_RSHIFT(tq, 1) - t4; \
2180 t7 += t6; \
2181 t6 -= OD_DCT_RSHIFT(t7, 1); \
2182 to += tp; \
2183 tp = OD_DCT_RSHIFT(to, 1) - tp; \
2184 tu += t0; \
2185 tuh = OD_DCT_RSHIFT(tu, 1); \
2186 t0 -= tuh; \
2187 t1 -= tv; \
2188 t1h = OD_DCT_RSHIFT(t1, 1); \
2189 tv += t1h; \
2190 ts -= t2; \
2191 tsh = OD_DCT_RSHIFT(ts, 1); \
2192 t2 += tsh; \
2193 t3 += tt; \
2194 t3h = OD_DCT_RSHIFT(t3, 1); \
2195 tt = t3h - tt; \
2196 t9 = t8 - t9; \
2197 t9h = OD_DCT_RSHIFT(t9, 1); \
2198 t8 -= t9h; \
2199 tm -= tn; \
2200 tmh = OD_DCT_RSHIFT(tm, 1); \
2201 tn += tmh; \
2202 tb += ta; \
2203 tbh = OD_DCT_RSHIFT(tb, 1); \
2204 ta -= tbh; \
2205 tk += tl; \
2206 tkh = OD_DCT_RSHIFT(tk, 1); \
2207 tl = tkh - tl; \
2208 tf += te; \
2209 tfh = OD_DCT_RSHIFT(tf, 1); \
2210 te -= tfh; \
2211 tg += th; \
2212 tgh = OD_DCT_RSHIFT(tg, 1); \
2213 th = tgh - th; \
2214 ti -= tc; \
2215 tih = OD_DCT_RSHIFT(ti, 1); \
2216 tc += tih; \
2217 td += tj; \
2218 tdh = OD_DCT_RSHIFT(td, 1); \
2219 tj -= tdh; \
2220 /* Stage 2 */ \
2221 t4 -= (tr*6723 + 4096) >> 13; \
2222 tr += (t4*16069 + 8192) >> 14; \
2223 t4 -= (tr*6723 + 4096) >> 13; \
2224 tq -= (t5*17515 + 16384) >> 15; \
2225 t5 += (tq*13623 + 8192) >> 14; \
2226 tq -= (t5*17515 + 16384) >> 15; \
2227 to -= (t7*3227 + 16384) >> 15; \
2228 t7 += (to*6393 + 16384) >> 15; \
2229 to -= (t7*3227 + 16384) >> 15; \
2230 tp -= (t6*2485 + 4096) >> 13; \
2231 t6 += (tp*18205 + 16384) >> 15; \
2232 tp -= (t6*2485 + 4096) >> 13; \
2233 /* Stage 3 */ \
2234 tp -= OD_DCT_RSHIFT(t5, 1); \
2235 t5 += tp; \
2236 t4 += OD_DCT_RSHIFT(t7, 1); \
2237 t7 = t4 - t7; \
2238 tr = OD_DCT_RSHIFT(to, 1) - tr; \
2239 to -= tr; \
2240 t6 += OD_DCT_RSHIFT(tq, 1); \
2241 tq = t6 - tq; \
2242 t0 -= tsh; \
2243 ts += t0; \
2244 tt += tuh; \
2245 tu -= tt; \
2246 tv += t3h; \
2247 t3 -= tv; \
2248 t2 -= t1h; \
2249 t1 += t2; \
2250 tn += tkh; \
2251 tk -= tn; \
2252 t8 += tbh; \
2253 tb -= t8; \
2254 tl += t9h; \
2255 t9 -= tl; \
2256 ta = tmh - ta; \
2257 tm = ta - tm; \
2258 te -= tdh; \
2259 td += te; \
2260 tj = tgh - tj; \
2261 tg -= tj; \
2262 th += tih; \
2263 ti = th - ti; \
2264 tc -= tfh; \
2265 tf += tc; \
2266 /* Stage 4 */ \
2267 tn -= (t8*29699 + 16384) >> 15; \
2268 t8 += (tn*16305 + 8192) >> 14; \
2269 tn -= (t8*29699 + 16384) >> 15; \
2270 t9 -= (tm*12151 + 8192) >> 14; \
2271 tm += (t9*31357 + 16384) >> 15; \
2272 t9 -= (tm*12151 + 8192) >> 14; \
2273 tl -= (ta*2455 + 2048) >> 12; \
2274 ta += (tl*28899 + 16384) >> 15; \
2275 tl -= (ta*2455 + 2048) >> 12; \
2276 tb -= (tk*7749 + 8192) >> 14; \
2277 tk += (tb*12665 + 8192) >> 14; \
2278 tb -= (tk*7749 + 8192) >> 14; \
2279 tg -= (tf*805 + 8192) >> 14; \
2280 tf += (tg*803 + 4096) >> 13; \
2281 tg -= (tf*805 + 8192) >> 14; \
2282 th -= (te*4861 + 16384) >> 15; \
2283 te += (th*1189 + 2048) >> 12; \
2284 th -= (te*4861 + 16384) >> 15; \
2285 td -= (ti*513 + 1024) >> 11; \
2286 ti += (td*15447 + 16384) >> 15; \
2287 td -= (ti*513 + 1024) >> 11; \
2288 tj -= (tc*11725 + 16384) >> 15; \
2289 tc += (tj*5197 + 4096) >> 13; \
2290 tj -= (tc*11725 + 16384) >> 15; \
2291 /* Stage 5 */ \
2292 t2 += t5; \
2293 t2h = OD_DCT_RSHIFT(t2, 1); \
2294 t5 -= t2h; \
2295 tt -= tq; \
2296 tth = OD_DCT_RSHIFT(tt, 1); \
2297 tq += tth; \
2298 tp += tu; \
2299 tph = OD_DCT_RSHIFT(tp, 1); \
2300 tu -= tph; \
2301 t6 -= t1; \
2302 t6h = OD_DCT_RSHIFT(t6, 1); \
2303 t1 += t6h; \
2304 tv = t7 - tv; \
2305 tvh = OD_DCT_RSHIFT(tv, 1); \
2306 t7 = tvh - t7; \
2307 t0 -= to; \
2308 t0h = OD_DCT_RSHIFT(t0, 1); \
2309 to += t0h; \
2310 t4 += ts; \
2311 t4h = OD_DCT_RSHIFT(t4, 1); \
2312 ts -= t4h; \
2313 tr -= t3; \
2314 trh = OD_DCT_RSHIFT(tr, 1); \
2315 t3 += trh; \
2316 t8 = tf - t8; \
2317 t8h = OD_DCT_RSHIFT(t8, 1); \
2318 tf -= t8h; \
2319 tn = tg - tn; \
2320 tnh = OD_DCT_RSHIFT(tn, 1); \
2321 tg -= tnh; \
2322 tc -= tb; \
2323 tch = OD_DCT_RSHIFT(tc, 1); \
2324 tb += tch; \
2325 tj += tk; \
2326 tjh = OD_DCT_RSHIFT(tj, 1); \
2327 tk = tjh - tk; \
2328 ta += ti; \
2329 tah = OD_DCT_RSHIFT(ta, 1); \
2330 ti -= tah; \
2331 tl += td; \
2332 tlh = OD_DCT_RSHIFT(tl, 1); \
2333 td = tlh - td; \
2334 te += t9; \
2335 teh = OD_DCT_RSHIFT(te, 1); \
2336 t9 = teh - t9; \
2337 th -= tm; \
2338 thh = OD_DCT_RSHIFT(th, 1); \
2339 tm += thh; \
2340 /* Stage 6 */ \
2341 t5 += tah; \
2342 ta -= t5; \
2343 ti += tth; \
2344 tt -= ti; \
2345 tq += tlh; \
2346 tl = tq - tl; \
2347 td -= t2h; \
2348 t2 += td; \
2349 t9 += tph; \
2350 tp -= t9; \
2351 t1 += teh; \
2352 te -= t1; \
2353 tm = t6h - tm; \
2354 t6 -= tm; \
2355 tu -= thh; \
2356 th += tu; \
2357 t7 += tnh; \
2358 tn = t7 - tn; \
2359 tg += t0h; \
2360 t0 -= tg; \
2361 to -= t8h; \
2362 t8 += to; \
2363 tf += tvh; \
2364 tv -= tf; \
2365 tb += t4h; \
2366 t4 -= tb; \
2367 t3 += tch; \
2368 tc -= t3; \
2369 tk = trh - tk; \
2370 tr = tk - tr; \
2371 ts -= tjh; \
2372 tj += ts; \
2373 /* Stage 7 */ \
2374 t0 -= (tv*31973 + 16384) >> 15; \
2375 tv += (t0*16379 + 8192) >> 14; \
2376 t0 -= (tv*31973 + 16384) >> 15; \
2377 tg -= (tf*819 + 1024) >> 11; \
2378 tf += (tg*22595 + 16384) >> 15; \
2379 tg -= (tf*819 + 1024) >> 11; \
2380 t8 -= (tn*10659 + 8192) >> 14; \
2381 tn += (t8*29957 + 16384) >> 15; \
2382 t8 -= (tn*10659 + 8192) >> 14; \
2383 to -= (t7*6101 + 16384) >> 15; \
2384 t7 += (to*11793 + 16384) >> 15; \
2385 to -= (t7*6101 + 16384) >> 15; \
2386 tt -= (t2*2013 + 16384) >> 15; \
2387 t2 += (tt*4011 + 16384) >> 15; \
2388 tt -= (t2*2013 + 16384) >> 15; \
2389 tl -= (ta*8637 + 16384) >> 15; \
2390 ta += (tl*16151 + 16384) >> 15; \
2391 tl -= (ta*8637 + 16384) >> 15; \
2392 ti -= (td*11273 + 16384) >> 15; \
2393 td += (ti*315 + 256) >> 9; \
2394 ti -= (td*11273 + 16384) >> 15; \
2395 tq -= (t5*2225 + 8192) >> 14; \
2396 t5 += (tq*2185 + 4096) >> 13; \
2397 tq -= (t5*2225 + 8192) >> 14; \
2398 ts -= (t3*1411 + 8192) >> 14; \
2399 t3 += (ts*2801 + 8192) >> 14; \
2400 ts -= (t3*1411 + 8192) >> 14; \
2401 tb -= (tk*18035 + 16384) >> 15; \
2402 tk += (tb*6921 + 4096) >> 13; \
2403 tb -= (tk*18035 + 16384) >> 15; \
2404 tj -= (tc*10381 + 16384) >> 15; \
2405 tc += (tj*4717 + 4096) >> 13; \
2406 tj -= (tc*10381 + 16384) >> 15; \
2407 t4 -= (tr*13113 + 8192) >> 14; \
2408 tr += (t4*7993 + 4096) >> 13; \
2409 t4 -= (tr*13113 + 8192) >> 14; \
2410 tu -= (t1*1207 + 16384) >> 15; \
2411 t1 += (tu*2411 + 16384) >> 15; \
2412 tu -= (t1*1207 + 16384) >> 15; \
2413 t9 -= (tm*20191 + 16384) >> 15; \
2414 tm += (t9*29269 + 16384) >> 15; \
2415 t9 -= (tm*20191 + 16384) >> 15; \
2416 th -= (te*3045 + 4096) >> 13; \
2417 te += (th*21403 + 16384) >> 15; \
2418 th -= (te*3045 + 4096) >> 13; \
2419 tp -= (t6*659 + 2048) >> 12; \
2420 t6 += (tp*10279 + 16384) >> 15; \
2421 tp -= (t6*659 + 2048) >> 12; \
2422 } \
2423 while (0)
2424
Monty Montgomerya4e245a2017-07-22 00:48:31 -04002425#if CONFIG_TX64X64
2426#define OD_FDCT_32_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
2427 t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh, t1, th, thh, \
2428 t9, tp, tph, t5, tl, tlh, td, tt, tth, t3, tj, tjh, tb, tr, trh, \
2429 t7, tn, tnh, tf, tv, tvh) \
2430 /* Embedded 32-point asymmetric Type-II fDCT. */ \
2431 do { \
2432 t0 += tvh; \
2433 tv = t0 - tv; \
2434 t1 = tuh - t1; \
2435 tu -= t1; \
2436 t2 += tth; \
2437 tt = t2 - tt; \
2438 t3 = tsh - t3; \
2439 ts -= t3; \
2440 t4 += trh; \
2441 tr = t4 - tr; \
2442 t5 = tqh - t5; \
2443 tq -= t5; \
2444 t6 += tph; \
2445 tp = t6 - tp; \
2446 t7 = toh - t7; \
2447 to -= t7; \
2448 t8 += tnh; \
2449 tn = t8 - tn; \
2450 t9 = tmh - t9; \
2451 tm -= t9; \
2452 ta += tlh; \
2453 tl = ta - tl; \
2454 tb = tkh - tb; \
2455 tk -= tb; \
2456 tc += tjh; \
2457 tj = tc - tj; \
2458 td = tih - td; \
2459 ti -= td; \
2460 te += thh; \
2461 th = te - th; \
2462 tf = tgh - tf; \
2463 tg -= tf; \
2464 OD_FDCT_16(t0, tg, t8, to, t4, tk, tc, ts, \
2465 t2, ti, ta, tq, t6, tm, te, tu); \
2466 OD_FDST_16(tv, tf, tn, t7, tr, tb, tj, t3, \
2467 tt, td, tl, t5, tp, t9, th, t1); \
2468 } \
2469 while (0)
2470
2471#define OD_IDCT_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, \
2472 t6, tm, te, tu, t1, t1h, th, thh, t9, t9h, tp, tph, t5, t5h, tl, tlh, \
2473 td, tdh, tt, tth, t3, t3h, tj, tjh, tb, tbh, tr, trh, t7, t7h, tn, tnh, \
2474 tf, tfh, tv, tvh) \
2475 /* Embedded 32-point asymmetric Type-II iDCT. */ \
2476 do { \
2477 OD_IDST_16(tv, tn, tr, tj, tt, tl, tp, th, \
2478 tu, tm, tq, ti, ts, tk, to, tg); \
2479 OD_IDCT_16(t0, t8, t4, tc, t2, ta, t6, te, \
2480 t1, t9, t5, td, t3, tb, t7, tf); \
2481 tv = t0 - tv; \
2482 tvh = OD_DCT_RSHIFT(tv, 1); \
2483 t0 -= tvh; \
2484 t1 += tu; \
2485 t1h = OD_DCT_RSHIFT(t1, 1); \
2486 tu = t1h - tu; \
2487 tt = t2 - tt; \
2488 tth = OD_DCT_RSHIFT(tt, 1); \
2489 t2 -= tth; \
2490 t3 += ts; \
2491 t3h = OD_DCT_RSHIFT(t3, 1); \
2492 ts = t3h - ts; \
2493 tr = t4 - tr; \
2494 trh = OD_DCT_RSHIFT(tr, 1); \
2495 t4 -= trh; \
2496 t5 += tq; \
2497 t5h = OD_DCT_RSHIFT(t5, 1); \
2498 tq = t5h - tq; \
2499 tp = t6 - tp; \
2500 tph = OD_DCT_RSHIFT(tp, 1); \
2501 t6 -= tph; \
2502 t7 += to; \
2503 t7h = OD_DCT_RSHIFT(t7, 1); \
2504 to = t7h - to; \
2505 tn = t8 - tn; \
2506 tnh = OD_DCT_RSHIFT(tn, 1); \
2507 t8 -= tnh; \
2508 t9 += tm; \
2509 t9h = OD_DCT_RSHIFT(t9, 1); \
2510 tm = t9h - tm; \
2511 tl = ta - tl; \
2512 tlh = OD_DCT_RSHIFT(tl, 1); \
2513 ta -= tlh; \
2514 tb += tk; \
2515 tbh = OD_DCT_RSHIFT(tb, 1); \
2516 tk = tbh - tk; \
2517 tj = tc - tj; \
2518 tjh = OD_DCT_RSHIFT(tj, 1); \
2519 tc -= tjh; \
2520 td += ti; \
2521 tdh = OD_DCT_RSHIFT(td, 1); \
2522 ti = tdh - ti; \
2523 th = te - th; \
2524 thh = OD_DCT_RSHIFT(th, 1); \
2525 te -= thh; \
2526 tf += tg; \
2527 tfh = OD_DCT_RSHIFT(tf, 1); \
2528 tg = tfh - tg; \
2529 } \
2530 while (0)
2531
2532#define OD_FDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
2533 tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
2534 /* Embedded 32-point asymmetric Type-IV fDST. */ \
2535 do { \
2536 int t0h; \
2537 int t1h; \
2538 int t4h; \
2539 int t5h; \
2540 int tqh; \
2541 int trh; \
2542 int tuh; \
2543 int tvh; \
2544 \
2545 tu = -tu; \
2546 \
2547 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
2548 OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 271); \
2549 t5 -= (tq*13573 + 8192) >> 14; \
2550 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
2551 OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 272); \
2552 tq += (t5*11585 + 16384) >> 15; \
2553 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
2554 OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 273); \
2555 t5 -= (tq*13573 + 8192) >> 14; \
2556 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2557 OD_DCT_OVERFLOW_CHECK(t6, 29957, 16384, 274); \
2558 tp += (t6*29957 + 16384) >> 15; \
2559 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2560 OD_DCT_OVERFLOW_CHECK(tp, 11585, 8192, 275); \
2561 t6 -= (tp*11585 + 8192) >> 14; \
2562 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2563 OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 276); \
2564 tp -= (t6*19195 + 16384) >> 15; \
2565 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2566 OD_DCT_OVERFLOW_CHECK(t1, 29957, 16384, 277); \
2567 tu += (t1*29957 + 16384) >> 15; \
2568 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2569 OD_DCT_OVERFLOW_CHECK(tu, 11585, 8192, 278); \
2570 t1 -= (tu*11585 + 8192) >> 14; \
2571 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2572 OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 279); \
2573 tu -= (t1*19195 + 16384) >> 15; \
2574 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2575 OD_DCT_OVERFLOW_CHECK(t2, 28681, 16384, 280); \
2576 tt += (t2*28681 + 16384) >> 15; \
2577 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2578 OD_DCT_OVERFLOW_CHECK(tt, 15137, 8192, 281); \
2579 t2 -= (tt*15137 + 8192) >> 14; \
2580 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2581 OD_DCT_OVERFLOW_CHECK(t2, 4161, 8192, 282); \
2582 tt += (t2*4161 + 8192) >> 14; \
2583 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2584 OD_DCT_OVERFLOW_CHECK(ts, 4161, 8192, 283); \
2585 t3 += (ts*4161 + 8192) >> 14; \
2586 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2587 OD_DCT_OVERFLOW_CHECK(t3, 15137, 8192, 284); \
2588 ts -= (t3*15137 + 8192) >> 14; \
2589 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2590 OD_DCT_OVERFLOW_CHECK(ts, 14341, 8192, 285); \
2591 t3 += (ts*14341 + 8192) >> 14; \
2592 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2593 OD_DCT_OVERFLOW_CHECK(tm, 19195, 16384, 286); \
2594 t9 -= (tm*19195 + 16384) >> 15; \
2595 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2596 OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 287); \
2597 tm -= (t9*11585 + 8192) >> 14; \
2598 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2599 OD_DCT_OVERFLOW_CHECK(tm, 7489, 4096, 288); \
2600 t9 += (tm*7489 + 4096) >> 13; \
2601 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
2602 OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 289); \
2603 ta += (tl*3259 + 4096) >> 13; \
2604 /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
2605 OD_DCT_OVERFLOW_CHECK(ta, 3135, 8192, 290); \
2606 tl -= (ta*3135 + 8192) >> 14; \
2607 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
2608 OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 291); \
2609 ta += (tl*3259 + 4096) >> 13; \
2610 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2611 OD_DCT_OVERFLOW_CHECK(tk, 4161, 8192, 292); \
2612 tb += (tk*4161 + 8192) >> 14; \
2613 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2614 OD_DCT_OVERFLOW_CHECK(tb, 15137, 8192, 293); \
2615 tk -= (tb*15137 + 8192) >> 14; \
2616 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2617 OD_DCT_OVERFLOW_CHECK(tk, 14341, 8192, 294); \
2618 tb += (tk*14341 + 8192) >> 14; \
2619 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
2620 OD_DCT_OVERFLOW_CHECK(te, 29957, 16384, 295); \
2621 th += (te*29957 + 16384) >> 15; \
2622 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
2623 OD_DCT_OVERFLOW_CHECK(th, 11585, 8192, 296); \
2624 te -= (th*11585 + 8192) >> 14; \
2625 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
2626 OD_DCT_OVERFLOW_CHECK(te, 19195, 16384, 297); \
2627 th -= (te*19195 + 16384) >> 15; \
2628 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2629 OD_DCT_OVERFLOW_CHECK(tc, 28681, 16384, 298); \
2630 tj += (tc*28681 + 16384) >> 15; \
2631 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2632 OD_DCT_OVERFLOW_CHECK(tj, 15137, 8192, 299); \
2633 tc -= (tj*15137 + 8192) >> 14; \
2634 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2635 OD_DCT_OVERFLOW_CHECK(tc, 4161, 8192, 300); \
2636 tj += (tc*4161 + 8192) >> 14; \
2637 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
2638 OD_DCT_OVERFLOW_CHECK(ti, 4161, 8192, 301); \
2639 td += (ti*4161 + 8192) >> 14; \
2640 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
2641 OD_DCT_OVERFLOW_CHECK(td, 15137, 8192, 302); \
2642 ti -= (td*15137 + 8192) >> 14; \
2643 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
2644 OD_DCT_OVERFLOW_CHECK(ti, 14341, 8192, 303); \
2645 td += (ti*14341 + 8192) >> 14; \
2646 \
2647 t1 = -t1; \
2648 t2 = -t2; \
2649 t3 = -t3; \
2650 td = -td; \
2651 tg = -tg; \
2652 to = -to; \
2653 ts = -ts; \
2654 \
2655 tr -= OD_DCT_RSHIFT(t5, 1); \
2656 t5 += tr; \
2657 tq -= OD_DCT_RSHIFT(t4, 1); /* pass */ \
2658 t4 += tq; \
2659 t6 -= OD_DCT_RSHIFT(t7, 1); \
2660 t7 += t6; \
2661 to -= OD_DCT_RSHIFT(tp, 1); /* pass */ \
2662 tp += to; \
2663 t1 += OD_DCT_RSHIFT(t0, 1); /* pass */ \
2664 t0 -= t1; \
2665 tv -= OD_DCT_RSHIFT(tu, 1); \
2666 tu += tv; \
2667 t3 -= OD_DCT_RSHIFT(tt, 1); \
2668 tt += t3; \
2669 t2 += OD_DCT_RSHIFT(ts, 1); \
2670 ts -= t2; \
2671 t9 -= OD_DCT_RSHIFT(t8, 1); /* pass */ \
2672 t8 += t9; \
2673 tn += OD_DCT_RSHIFT(tm, 1); \
2674 tm -= tn; \
2675 tb += OD_DCT_RSHIFT(ta, 1); \
2676 ta -= tb; \
2677 tl -= OD_DCT_RSHIFT(tk, 1); \
2678 tk += tl; \
2679 te -= OD_DCT_RSHIFT(tf, 1); /* pass */ \
2680 tf += te; \
2681 tg -= OD_DCT_RSHIFT(th, 1); \
2682 th += tg; \
2683 tc -= OD_DCT_RSHIFT(ti, 1); \
2684 ti += tc; \
2685 td += OD_DCT_RSHIFT(tj, 1); \
2686 tj -= td; \
2687 \
2688 t4 = -t4; \
2689 \
2690 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2691 OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 304); \
2692 t4 += (tr*6723 + 4096) >> 13; \
2693 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
2694 OD_DCT_OVERFLOW_CHECK(t4, 16069, 8192, 305); \
2695 tr -= (t4*16069 + 8192) >> 14; \
2696 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
2697 OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 306); \
2698 t4 += (tr*6723 + 4096) >> 13; \
2699 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2700 OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 307); \
2701 t5 += (tq*17515 + 16384) >> 15; \
2702 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
2703 OD_DCT_OVERFLOW_CHECK(t5, 13623, 8192, 308); \
2704 tq -= (t5*13623 + 8192) >> 14; \
2705 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
2706 OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 309); \
2707 t5 += (tq*17515 + 16384) >> 15; \
2708 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2709 OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 310); \
2710 t7 += (to*3227 + 16384) >> 15; \
2711 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
2712 OD_DCT_OVERFLOW_CHECK(t7, 6393, 16384, 311); \
2713 to -= (t7*6393 + 16384) >> 15; \
2714 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
2715 OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 312); \
2716 t7 += (to*3227 + 16384) >> 15; \
2717 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2718 OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 313); \
2719 t6 += (tp*2485 + 4096) >> 13; \
2720 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
2721 OD_DCT_OVERFLOW_CHECK(t6, 18205, 16384, 314); \
2722 tp -= (t6*18205 + 16384) >> 15; \
2723 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
2724 OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 315); \
2725 t6 += (tp*2485 + 4096) >> 13; \
2726 \
2727 t5 = -t5; \
2728 \
2729 tr += to; \
2730 trh = OD_DCT_RSHIFT(tr, 1); \
2731 to -= trh; \
2732 t4 += t7; \
2733 t4h = OD_DCT_RSHIFT(t4, 1); \
2734 t7 -= t4h; \
2735 t5 += tp; \
2736 t5h = OD_DCT_RSHIFT(t5, 1); \
2737 tp -= t5h; \
2738 tq += t6; \
2739 tqh = OD_DCT_RSHIFT(tq, 1); \
2740 t6 -= tqh; \
2741 t0 -= t3; \
2742 t0h = OD_DCT_RSHIFT(t0, 1); \
2743 t3 += t0h; \
2744 tv -= ts; \
2745 tvh = OD_DCT_RSHIFT(tv, 1); \
2746 ts += tvh; \
2747 tu += tt; \
2748 tuh = OD_DCT_RSHIFT(tu, 1); \
2749 tt -= tuh; \
2750 t1 -= t2; \
2751 t1h = OD_DCT_RSHIFT(t1, 1); \
2752 t2 += t1h; \
2753 t8 += tb; \
2754 tb -= OD_DCT_RSHIFT(t8, 1); \
2755 tn += tk; \
2756 tk -= OD_DCT_RSHIFT(tn, 1); \
2757 t9 += tl; \
2758 tl -= OD_DCT_RSHIFT(t9, 1); \
2759 tm -= ta; \
2760 ta += OD_DCT_RSHIFT(tm, 1); \
2761 tc -= tf; \
2762 tf += OD_DCT_RSHIFT(tc, 1); \
2763 tj += tg; \
2764 tg -= OD_DCT_RSHIFT(tj, 1); \
2765 td -= te; \
2766 te += OD_DCT_RSHIFT(td, 1); \
2767 ti += th; \
2768 th -= OD_DCT_RSHIFT(ti, 1); \
2769 \
2770 t9 = -t9; \
2771 tl = -tl; \
2772 \
2773 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2774 OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 316); \
2775 t8 += (tn*805 + 8192) >> 14; \
2776 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2777 OD_DCT_OVERFLOW_CHECK(t8, 803, 4096, 317); \
2778 tn -= (t8*803 + 4096) >> 13; \
2779 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2780 OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 318); \
2781 t8 += (tn*805 + 8192) >> 14; \
2782 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2783 OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 319); \
2784 tk += (tb*11725 + 16384) >> 15; \
2785 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2786 OD_DCT_OVERFLOW_CHECK(tk, 5197, 4096, 320); \
2787 tb -= (tk*5197 + 4096) >> 13; \
2788 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2789 OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 321); \
2790 tk += (tb*11725 + 16384) >> 15; \
2791 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2792 OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 322); \
2793 ta += (tl*2455 + 2048) >> 12; \
2794 /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
2795 OD_DCT_OVERFLOW_CHECK(ta, 14449, 8192, 323); \
2796 tl -= (ta*14449 + 8192) >> 14; \
2797 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
2798 OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 324); \
2799 ta += (tl*2455 + 2048) >> 12; \
2800 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2801 OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 325); \
2802 t9 += (tm*4861 + 16384) >> 15; \
2803 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2804 OD_DCT_OVERFLOW_CHECK(t9, 1189, 2048, 326); \
2805 tm -= (t9*1189 + 2048) >> 12; \
2806 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2807 OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 327); \
2808 t9 += (tm*4861 + 16384) >> 15; \
2809 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2810 OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 328); \
2811 tf += (tg*805 + 8192) >> 14; \
2812 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
2813 OD_DCT_OVERFLOW_CHECK(tf, 803, 4096, 329); \
2814 tg -= (tf*803 + 4096) >> 13; \
2815 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
2816 OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 330); \
2817 tf += (tg*805 + 8192) >> 14; \
2818 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2819 OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 331); \
2820 tc += (tj*2931 + 4096) >> 13; \
2821 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
2822 OD_DCT_OVERFLOW_CHECK(tc, 5197, 4096, 332); \
2823 tj -= (tc*5197 + 4096) >> 13; \
2824 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
2825 OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 333); \
2826 tc += (tj*2931 + 4096) >> 13; \
2827 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2828 OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 334); \
2829 td += (ti*513 + 1024) >> 11; \
2830 /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
2831 OD_DCT_OVERFLOW_CHECK(td, 7723, 8192, 335); \
2832 ti -= (td*7723 + 8192) >> 14; \
2833 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
2834 OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 336); \
2835 td += (ti*513 + 1024) >> 11; \
2836 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2837 OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 337); \
2838 te += (th*4861 + 16384) >> 15; \
2839 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
2840 OD_DCT_OVERFLOW_CHECK(te, 1189, 2048, 338); \
2841 th -= (te*1189 + 2048) >> 12; \
2842 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
2843 OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 339); \
2844 te += (th*4861 + 16384) >> 15; \
2845 \
2846 ta = -ta; \
2847 tb = -tb; \
2848 \
2849 tt += t5h; \
2850 t5 -= tt; \
2851 t2 -= tqh; \
2852 tq += t2; \
2853 tp += t1h; \
2854 t1 -= tp; \
2855 t6 -= tuh; \
2856 tu += t6; \
2857 t7 += tvh; \
2858 tv -= t7; \
2859 to += t0h; \
2860 t0 -= to; \
2861 t3 -= t4h; \
2862 t4 += t3; \
2863 ts += trh; \
2864 tr -= ts; \
2865 tf -= OD_DCT_RSHIFT(tn, 1); \
2866 tn += tf; \
2867 tg -= OD_DCT_RSHIFT(t8, 1); \
2868 t8 += tg; \
2869 tk += OD_DCT_RSHIFT(tc, 1); \
2870 tc -= tk; \
2871 tb += OD_DCT_RSHIFT(tj, 1); \
2872 tj -= tb; \
2873 ta += OD_DCT_RSHIFT(ti, 1); \
2874 ti -= ta; \
2875 tl += OD_DCT_RSHIFT(td, 1); \
2876 td -= tl; \
2877 te -= OD_DCT_RSHIFT(tm, 1); \
2878 tm += te; \
2879 th -= OD_DCT_RSHIFT(t9, 1); \
2880 t9 += th; \
2881 ta -= t5; \
2882 t5 += OD_DCT_RSHIFT(ta, 1); \
2883 tq -= tl; \
2884 tl += OD_DCT_RSHIFT(tq, 1); \
2885 t2 -= ti; \
2886 ti += OD_DCT_RSHIFT(t2, 1); \
2887 td -= tt; \
2888 tt += OD_DCT_RSHIFT(td, 1); \
2889 tm += tp; \
2890 tp -= OD_DCT_RSHIFT(tm, 1); \
2891 t6 += t9; \
2892 t9 -= OD_DCT_RSHIFT(t6, 1); \
2893 te -= tu; \
2894 tu += OD_DCT_RSHIFT(te, 1); \
2895 t1 -= th; \
2896 th += OD_DCT_RSHIFT(t1, 1); \
2897 t0 -= tg; \
2898 tg += OD_DCT_RSHIFT(t0, 1); \
2899 tf += tv; \
2900 tv -= OD_DCT_RSHIFT(tf, 1); \
2901 t8 -= t7; \
2902 t7 += OD_DCT_RSHIFT(t8, 1); \
2903 to -= tn; \
2904 tn += OD_DCT_RSHIFT(to, 1); \
2905 t4 -= tk; \
2906 tk += OD_DCT_RSHIFT(t4, 1); \
2907 tb -= tr; \
2908 tr += OD_DCT_RSHIFT(tb, 1); \
2909 t3 -= tj; \
2910 tj += OD_DCT_RSHIFT(t3, 1); \
2911 tc -= ts; \
2912 ts += OD_DCT_RSHIFT(tc, 1); \
2913 \
2914 tr = -tr; \
2915 ts = -ts; \
2916 tt = -tt; \
2917 tu = -tu; \
2918 \
2919 /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
2920 OD_DCT_OVERFLOW_CHECK(t0, 2847, 2048, 340); \
2921 tv += (t0*2847 + 2048) >> 12; \
2922 /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
2923 OD_DCT_OVERFLOW_CHECK(tv, 5791, 2048, 341); \
2924 t0 -= (tv*5791 + 2048) >> 12; \
2925 /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
2926 OD_DCT_OVERFLOW_CHECK(t0, 5593, 4096, 342); \
2927 tv += (t0*5593 + 4096) >> 13; \
2928 /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
2929 OD_DCT_OVERFLOW_CHECK(tf, 4099, 4096, 343); \
2930 tg -= (tf*4099 + 4096) >> 13; \
2931 /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
2932 OD_DCT_OVERFLOW_CHECK(tg, 1997, 1024, 344); \
2933 tf += (tg*1997 + 1024) >> 11; \
2934 /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
2935 OD_DCT_OVERFLOW_CHECK(tf, 815, 16384, 345); \
2936 tg += (tf*815 + 16384) >> 15; \
2937 /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
2938 OD_DCT_OVERFLOW_CHECK(t8, 2527, 2048, 346); \
2939 tn -= (t8*2527 + 2048) >> 12; \
2940 /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
2941 OD_DCT_OVERFLOW_CHECK(tn, 4695, 4096, 347); \
2942 t8 += (tn*4695 + 4096) >> 13; \
2943 /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
2944 OD_DCT_OVERFLOW_CHECK(t8, 4187, 4096, 348); \
2945 tn += (t8*4187 + 4096) >> 13; \
2946 /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
2947 OD_DCT_OVERFLOW_CHECK(to, 5477, 4096, 349); \
2948 t7 += (to*5477 + 4096) >> 13; \
2949 /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
2950 OD_DCT_OVERFLOW_CHECK(t7, 4169, 4096, 350); \
2951 to -= (t7*4169 + 4096) >> 13; \
2952 /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
2953 OD_DCT_OVERFLOW_CHECK(to, 2571, 2048, 351); \
2954 t7 -= (to*2571 + 2048) >> 12; \
2955 /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
2956 OD_DCT_OVERFLOW_CHECK(t2, 5331, 4096, 352); \
2957 tt += (t2*5331 + 4096) >> 13; \
2958 /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
2959 OD_DCT_OVERFLOW_CHECK(tt, 5749, 2048, 353); \
2960 t2 -= (tt*5749 + 2048) >> 12; \
2961 /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
2962 OD_DCT_OVERFLOW_CHECK(t2, 2413, 2048, 354); \
2963 tt += (t2*2413 + 2048) >> 12; \
2964 /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
2965 OD_DCT_OVERFLOW_CHECK(td, 4167, 4096, 355); \
2966 ti -= (td*4167 + 4096) >> 13; \
2967 /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
2968 OD_DCT_OVERFLOW_CHECK(ti, 891, 512, 356); \
2969 td += (ti*891 + 512) >> 10; \
2970 /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
2971 OD_DCT_OVERFLOW_CHECK(td, 4327, 16384, 357); \
2972 ti += (td*4327 + 16384) >> 15; \
2973 /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
2974 OD_DCT_OVERFLOW_CHECK(ta, 2261, 2048, 358); \
2975 tl -= (ta*2261 + 2048) >> 12; \
2976 /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
2977 OD_DCT_OVERFLOW_CHECK(tl, 2855, 2048, 359); \
2978 ta += (tl*2855 + 2048) >> 12; \
2979 /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
2980 OD_DCT_OVERFLOW_CHECK(ta, 5417, 8192, 360); \
2981 tl += (ta*5417 + 8192) >> 14; \
2982 /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
2983 OD_DCT_OVERFLOW_CHECK(tq, 3459, 2048, 361); \
2984 t5 += (tq*3459 + 2048) >> 12; \
2985 /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
2986 OD_DCT_OVERFLOW_CHECK(t5, 1545, 2048, 362); \
2987 tq -= (t5*1545 + 2048) >> 12; \
2988 /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
2989 OD_DCT_OVERFLOW_CHECK(tq, 1971, 1024, 363); \
2990 t5 -= (tq*1971 + 1024) >> 11; \
2991 /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
2992 OD_DCT_OVERFLOW_CHECK(t3, 323, 256, 364); \
2993 ts += (t3*323 + 256) >> 9; \
2994 /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
2995 OD_DCT_OVERFLOW_CHECK(ts, 5707, 2048, 365); \
2996 t3 -= (ts*5707 + 2048) >> 12; \
2997 /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
2998 OD_DCT_OVERFLOW_CHECK(t3, 2229, 2048, 366); \
2999 ts += (t3*2229 + 2048) >> 12; \
3000 /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
3001 OD_DCT_OVERFLOW_CHECK(tc, 1061, 1024, 367); \
3002 tj -= (tc*1061 + 1024) >> 11; \
3003 /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
3004 OD_DCT_OVERFLOW_CHECK(tj, 6671, 4096, 368); \
3005 tc += (tj*6671 + 4096) >> 13; \
3006 /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
3007 OD_DCT_OVERFLOW_CHECK(tc, 6287, 16384, 369); \
3008 tj += (tc*6287 + 16384) >> 15; \
3009 /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
3010 OD_DCT_OVERFLOW_CHECK(tb, 4359, 4096, 370); \
3011 tk -= (tb*4359 + 4096) >> 13; \
3012 /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
3013 OD_DCT_OVERFLOW_CHECK(tk, 3099, 2048, 371); \
3014 tb += (tk*3099 + 2048) >> 12; \
3015 /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
3016 OD_DCT_OVERFLOW_CHECK(tb, 2109, 4096, 372); \
3017 tk += (tb*2109 + 4096) >> 13; \
3018 /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
3019 OD_DCT_OVERFLOW_CHECK(t4, 5017, 4096, 373); \
3020 tr += (t4*5017 + 4096) >> 13; \
3021 /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
3022 OD_DCT_OVERFLOW_CHECK(tr, 1413, 512, 374); \
3023 t4 -= (tr*1413 + 512) >> 10; \
3024 /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
3025 OD_DCT_OVERFLOW_CHECK(t4, 8195, 8192, 375); \
3026 tr += (t4*8195 + 8192) >> 14; \
3027 /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
3028 OD_DCT_OVERFLOW_CHECK(tm, 2373, 2048, 376); \
3029 t9 += (tm*2373 + 2048) >> 12; \
3030 /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
3031 OD_DCT_OVERFLOW_CHECK(t9, 5209, 4096, 377); \
3032 tm -= (t9*5209 + 4096) >> 13; \
3033 /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
3034 OD_DCT_OVERFLOW_CHECK(tm, 3391, 4096, 378); \
3035 t9 -= (tm*3391 + 4096) >> 13; \
3036 /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
3037 OD_DCT_OVERFLOW_CHECK(t6, 1517, 1024, 379); \
3038 tp -= (t6*1517 + 1024) >> 11; \
3039 /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
3040 OD_DCT_OVERFLOW_CHECK(tp, 1817, 2048, 380); \
3041 t6 += (tp*1817 + 2048) >> 12; \
3042 /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
3043 OD_DCT_OVERFLOW_CHECK(t6, 6331, 4096, 381); \
3044 tp += (t6*6331 + 4096) >> 13; \
3045 /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
3046 OD_DCT_OVERFLOW_CHECK(te, 515, 512, 382); \
3047 th -= (te*515 + 512) >> 10; \
3048 /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
3049 OD_DCT_OVERFLOW_CHECK(th, 7567, 4096, 383); \
3050 te += (th*7567 + 4096) >> 13; \
3051 /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
3052 OD_DCT_OVERFLOW_CHECK(te, 2513, 16384, 384); \
3053 th += (te*2513 + 16384) >> 15; \
3054 /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
3055 OD_DCT_OVERFLOW_CHECK(t1, 2753, 2048, 385); \
3056 tu += (t1*2753 + 2048) >> 12; \
3057 /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
3058 OD_DCT_OVERFLOW_CHECK(tu, 5777, 2048, 386); \
3059 t1 -= (tu*5777 + 2048) >> 12; \
3060 /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
3061 OD_DCT_OVERFLOW_CHECK(t1, 1301, 1024, 387); \
3062 tu += (t1*1301 + 1024) >> 11; \
3063 } \
3064 while (0)
3065
3066#define OD_IDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
3067 tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
3068 /* Embedded 32-point asymmetric Type-IV iDST. */ \
3069 do { \
3070 int t0h; \
3071 int t4h; \
3072 int tbh; \
3073 int tfh; \
3074 int tgh; \
3075 int tkh; \
3076 int trh; \
3077 int tvh; \
3078 /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
3079 tf -= (tg*1301 + 1024) >> 11; \
3080 /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
3081 tg += (tf*5777 + 2048) >> 12; \
3082 /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
3083 tf -= (tg*2753 + 2048) >> 12; \
3084 /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
3085 th -= (te*2513 + 16384) >> 15; \
3086 /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
3087 te -= (th*7567 + 4096) >> 13; \
3088 /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
3089 th += (te*515 + 512) >> 10; \
3090 /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
3091 tj -= (tc*6331 + 4096) >> 13; \
3092 /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
3093 tc -= (tj*1817 + 2048) >> 12; \
3094 /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
3095 tj += (tc*1517 + 1024) >> 11; \
3096 /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
3097 ti += (td*3391 + 4096) >> 13; \
3098 /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
3099 td += (ti*5209 + 4096) >> 13; \
3100 /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
3101 ti -= (td*2373 + 2048) >> 12; \
3102 /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
3103 tr -= (t4*8195 + 8192) >> 14; \
3104 /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
3105 t4 += (tr*1413 + 512) >> 10; \
3106 /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
3107 tr -= (t4*5017 + 4096) >> 13; \
3108 /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
3109 t5 -= (tq*2109 + 4096) >> 13; \
3110 /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
3111 tq -= (t5*3099 + 2048) >> 12; \
3112 /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
3113 t5 += (tq*4359 + 4096) >> 13; \
3114 /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
3115 tp -= (t6*6287 + 16384) >> 15; \
3116 /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
3117 t6 -= (tp*6671 + 4096) >> 13; \
3118 /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
3119 tp += (t6*1061 + 1024) >> 11; \
3120 /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
3121 t7 -= (to*2229 + 2048) >> 12; \
3122 /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
3123 to += (t7*5707 + 2048) >> 12; \
3124 /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
3125 t7 -= (to*323 + 256) >> 9; \
3126 /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
3127 tk += (tb*1971 + 1024) >> 11; \
3128 /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
3129 tb += (tk*1545 + 2048) >> 12; \
3130 /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
3131 tk -= (tb*3459 + 2048) >> 12; \
3132 /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
3133 tl -= (ta*5417 + 8192) >> 14; \
3134 /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
3135 ta -= (tl*2855 + 2048) >> 12; \
3136 /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
3137 tl += (ta*2261 + 2048) >> 12; \
3138 /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
3139 t9 -= (tm*4327 + 16384) >> 15; \
3140 /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
3141 tm -= (t9*891 + 512) >> 10; \
3142 /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
3143 t9 += (tm*4167 + 4096) >> 13; \
3144 /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
3145 tn -= (t8*2413 + 2048) >> 12; \
3146 /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
3147 t8 += (tn*5749 + 2048) >> 12; \
3148 /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
3149 tn -= (t8*5331 + 4096) >> 13; \
3150 /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
3151 ts += (t3*2571 + 2048) >> 12; \
3152 /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
3153 t3 += (ts*4169 + 4096) >> 13; \
3154 /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
3155 ts -= (t3*5477 + 4096) >> 13; \
3156 /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
3157 tt -= (t2*4187 + 4096) >> 13; \
3158 /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
3159 t2 -= (tt*4695 + 4096) >> 13; \
3160 /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
3161 tt += (t2*2527 + 2048) >> 12; \
3162 /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
3163 t1 -= (tu*815 + 16384) >> 15; \
3164 /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
3165 tu -= (t1*1997 + 1024) >> 11; \
3166 /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
3167 t1 += (tu*4099 + 4096) >> 13; \
3168 /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
3169 tv -= (t0*5593 + 4096) >> 13; \
3170 /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
3171 t0 += (tv*5791 + 2048) >> 12; \
3172 /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
3173 tv -= (t0*2847 + 2048) >> 12; \
3174 \
3175 t7 = -t7; \
3176 tf = -tf; \
3177 tn = -tn; \
3178 tr = -tr; \
3179 \
3180 t7 -= OD_DCT_RSHIFT(t6, 1); \
3181 t6 += t7; \
3182 tp -= OD_DCT_RSHIFT(to, 1); \
3183 to += tp; \
3184 tr -= OD_DCT_RSHIFT(tq, 1); \
3185 tq += tr; \
3186 t5 -= OD_DCT_RSHIFT(t4, 1); \
3187 t4 += t5; \
3188 tt -= OD_DCT_RSHIFT(t3, 1); \
3189 t3 += tt; \
3190 ts -= OD_DCT_RSHIFT(t2, 1); \
3191 t2 += ts; \
3192 tv += OD_DCT_RSHIFT(tu, 1); \
3193 tu -= tv; \
3194 t1 -= OD_DCT_RSHIFT(t0, 1); \
3195 t0 += t1; \
3196 th -= OD_DCT_RSHIFT(tg, 1); \
3197 tg += th; \
3198 tf -= OD_DCT_RSHIFT(te, 1); \
3199 te += tf; \
3200 ti += OD_DCT_RSHIFT(tc, 1); \
3201 tc -= ti; \
3202 tj += OD_DCT_RSHIFT(td, 1); \
3203 td -= tj; \
3204 tn -= OD_DCT_RSHIFT(tm, 1); \
3205 tm += tn; \
3206 t9 -= OD_DCT_RSHIFT(t8, 1); \
3207 t8 += t9; \
3208 tl -= OD_DCT_RSHIFT(tb, 1); \
3209 tb += tl; \
3210 tk -= OD_DCT_RSHIFT(ta, 1); \
3211 ta += tk; \
3212 \
3213 ti -= th; \
3214 th += OD_DCT_RSHIFT(ti, 1); \
3215 td -= te; \
3216 te += OD_DCT_RSHIFT(td, 1); \
3217 tm += tl; \
3218 tl -= OD_DCT_RSHIFT(tm, 1); \
3219 t9 += ta; \
3220 ta -= OD_DCT_RSHIFT(t9, 1); \
3221 tp += tq; \
3222 tq -= OD_DCT_RSHIFT(tp, 1); \
3223 t6 += t5; \
3224 t5 -= OD_DCT_RSHIFT(t6, 1); \
3225 t2 -= t1; \
3226 t1 += OD_DCT_RSHIFT(t2, 1); \
3227 tt -= tu; \
3228 tu += OD_DCT_RSHIFT(tt, 1); \
3229 tr += t7; \
3230 trh = OD_DCT_RSHIFT(tr, 1); \
3231 t7 -= trh; \
3232 t4 -= to; \
3233 t4h = OD_DCT_RSHIFT(t4, 1); \
3234 to += t4h; \
3235 t0 += t3; \
3236 t0h = OD_DCT_RSHIFT(t0, 1); \
3237 t3 -= t0h; \
3238 tv += ts; \
3239 tvh = OD_DCT_RSHIFT(tv, 1); \
3240 ts -= tvh; \
3241 tf -= tc; \
3242 tfh = OD_DCT_RSHIFT(tf, 1); \
3243 tc += tfh; \
3244 tg += tj; \
3245 tgh = OD_DCT_RSHIFT(tg, 1); \
3246 tj -= tgh; \
3247 tb -= t8; \
3248 tbh = OD_DCT_RSHIFT(tb, 1); \
3249 t8 += tbh; \
3250 tk += tn; \
3251 tkh = OD_DCT_RSHIFT(tk, 1); \
3252 tn -= tkh; \
3253 \
3254 ta = -ta; \
3255 tq = -tq; \
3256 \
3257 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
3258 te -= (th*4861 + 16384) >> 15; \
3259 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
3260 th += (te*1189 + 2048) >> 12; \
3261 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
3262 te -= (th*4861 + 16384) >> 15; \
3263 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
3264 tm -= (t9*513 + 1024) >> 11; \
3265 /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
3266 t9 += (tm*7723 + 8192) >> 14; \
3267 /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
3268 tm -= (t9*513 + 1024) >> 11; \
3269 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
3270 t6 -= (tp*2931 + 4096) >> 13; \
3271 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
3272 tp += (t6*5197 + 4096) >> 13; \
3273 /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
3274 t6 -= (tp*2931 + 4096) >> 13; \
3275 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
3276 tu -= (t1*805 + 8192) >> 14; \
3277 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
3278 t1 += (tu*803 + 4096) >> 13; \
3279 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
3280 tu -= (t1*805 + 8192) >> 14; \
3281 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
3282 ti -= (td*4861 + 16384) >> 15; \
3283 /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
3284 td += (ti*1189 + 2048) >> 12; \
3285 /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
3286 ti -= (td*4861 + 16384) >> 15; \
3287 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
3288 ta -= (tl*2455 + 2048) >> 12; \
3289 /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
3290 tl += (ta*14449 + 8192) >> 14; \
3291 /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
3292 ta -= (tl*2455 + 2048) >> 12; \
3293 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
3294 t5 -= (tq*11725 + 16384) >> 15; \
3295 /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
3296 tq += (t5*5197 + 4096) >> 13; \
3297 /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
3298 t5 -= (tq*11725 + 16384) >> 15; \
3299 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
3300 t2 -= (tt*805 + 8192) >> 14; \
3301 /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
3302 tt += (t2*803 + 4096) >> 13; \
3303 /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
3304 t2 -= (tt*805 + 8192) >> 14; \
3305 \
3306 tl = -tl; \
3307 ti = -ti; \
3308 \
3309 th += OD_DCT_RSHIFT(t9, 1); \
3310 t9 -= th; \
3311 te -= OD_DCT_RSHIFT(tm, 1); \
3312 tm += te; \
3313 t1 += OD_DCT_RSHIFT(tp, 1); \
3314 tp -= t1; \
3315 tu -= OD_DCT_RSHIFT(t6, 1); \
3316 t6 += tu; \
3317 ta -= OD_DCT_RSHIFT(td, 1); \
3318 td += ta; \
3319 tl += OD_DCT_RSHIFT(ti, 1); \
3320 ti -= tl; \
3321 t5 += OD_DCT_RSHIFT(tt, 1); \
3322 tt -= t5; \
3323 tq += OD_DCT_RSHIFT(t2, 1); \
3324 t2 -= tq; \
3325 \
3326 t8 -= tgh; \
3327 tg += t8; \
3328 tn += tfh; \
3329 tf -= tn; \
3330 t7 -= tvh; \
3331 tv += t7; \
3332 to -= t0h; \
3333 t0 += to; \
3334 tc += tbh; \
3335 tb -= tc; \
3336 tj += tkh; \
3337 tk -= tj; \
3338 ts += t4h; \
3339 t4 -= ts; \
3340 t3 += trh; \
3341 tr -= t3; \
3342 \
3343 tk = -tk; \
3344 \
3345 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
3346 tc -= (tj*2485 + 4096) >> 13; \
3347 /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
3348 tj += (tc*18205 + 16384) >> 15; \
3349 /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
3350 tc -= (tj*2485 + 4096) >> 13; \
3351 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
3352 ts -= (t3*3227 + 16384) >> 15; \
3353 /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
3354 t3 += (ts*6393 + 16384) >> 15; \
3355 /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
3356 ts -= (t3*3227 + 16384) >> 15; \
3357 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
3358 tk -= (tb*17515 + 16384) >> 15; \
3359 /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
3360 tb += (tk*13623 + 8192) >> 14; \
3361 /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
3362 tk -= (tb*17515 + 16384) >> 15; \
3363 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
3364 t4 -= (tr*6723 + 4096) >> 13; \
3365 /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
3366 tr += (t4*16069 + 8192) >> 14; \
3367 /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
3368 t4 -= (tr*6723 + 4096) >> 13; \
3369 \
3370 t4 = -t4; \
3371 \
3372 tp += tm; \
3373 tm -= OD_DCT_RSHIFT(tp, 1); \
3374 t9 -= t6; \
3375 t6 += OD_DCT_RSHIFT(t9, 1); \
3376 th -= t1; \
3377 t1 += OD_DCT_RSHIFT(th, 1); \
3378 tu -= te; \
3379 te += OD_DCT_RSHIFT(tu, 1); /* pass */ \
3380 t5 -= tl; \
3381 tl += OD_DCT_RSHIFT(t5, 1); \
3382 ta += tq; \
3383 tq -= OD_DCT_RSHIFT(ta, 1); \
3384 td += tt; \
3385 tt -= OD_DCT_RSHIFT(td, 1); \
3386 t2 -= ti; \
3387 ti += OD_DCT_RSHIFT(t2, 1); /* pass */ \
3388 t7 += t8; \
3389 t8 -= OD_DCT_RSHIFT(t7, 1); \
3390 tn -= to; \
3391 to += OD_DCT_RSHIFT(tn, 1); \
3392 tf -= tv; \
3393 tv += OD_DCT_RSHIFT(tf, 1); \
3394 t0 += tg; \
3395 tg -= OD_DCT_RSHIFT(t0, 1); /* pass */ \
3396 tj -= t3; \
3397 t3 += OD_DCT_RSHIFT(tj, 1); /* pass */ \
3398 ts -= tc; \
3399 tc += OD_DCT_RSHIFT(ts, 1); \
3400 t4 -= tb; \
3401 tb += OD_DCT_RSHIFT(t4, 1); /* pass */ \
3402 tk -= tr; \
3403 tr += OD_DCT_RSHIFT(tk, 1); \
3404 \
3405 t1 = -t1; \
3406 t3 = -t3; \
3407 t7 = -t7; \
3408 t8 = -t8; \
3409 tg = -tg; \
3410 tm = -tm; \
3411 to = -to; \
3412 \
3413 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
3414 tm -= (t9*14341 + 8192) >> 14; \
3415 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
3416 t9 += (tm*15137 + 8192) >> 14; \
3417 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
3418 tm -= (t9*4161 + 8192) >> 14; \
3419 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
3420 tp -= (t6*4161 + 8192) >> 14; \
3421 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
3422 t6 += (tp*15137 + 8192) >> 14; \
3423 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
3424 tp -= (t6*28681 + 16384) >> 15; \
3425 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
3426 th += (te*19195 + 16384) >> 15; \
3427 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
3428 te += (th*11585 + 8192) >> 14; \
3429 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
3430 th -= (te*29957 + 16384) >> 15; \
3431 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
3432 tq -= (t5*14341 + 8192) >> 14; \
3433 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
3434 t5 += (tq*15137 + 8192) >> 14; \
3435 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
3436 tq -= (t5*4161 + 8192) >> 14; \
3437 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
3438 ta -= (tl*3259 + 4096) >> 13; \
3439 /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
3440 tl += (ta*3135 + 8192) >> 14; \
3441 /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
3442 ta -= (tl*3259 + 4096) >> 13; \
3443 /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
3444 ti -= (td*7489 + 4096) >> 13; \
3445 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
3446 td += (ti*11585 + 8192) >> 14; \
3447 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
3448 ti += (td*19195 + 16384) >> 15; \
3449 /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
3450 to -= (t7*14341 + 8192) >> 14; \
3451 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
3452 t7 += (to*15137 + 8192) >> 14; \
3453 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
3454 to -= (t7*4161 + 8192) >> 14; \
3455 /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
3456 tn -= (t8*4161 + 8192) >> 14; \
3457 /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
3458 t8 += (tn*15137 + 8192) >> 14; \
3459 /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
3460 tn -= (t8*28681 + 16384) >> 15; \
3461 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
3462 tf += (tg*19195 + 16384) >> 15; \
3463 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
3464 tg += (tf*11585 + 8192) >> 14; \
3465 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
3466 tf -= (tg*29957 + 16384) >> 15; \
3467 /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
3468 tj += (tc*19195 + 16384) >> 15; \
3469 /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
3470 tc += (tj*11585 + 8192) >> 14; \
3471 /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
3472 tj -= (tc*29957 + 16384) >> 15; \
3473 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
3474 tk += (tb*13573 + 8192) >> 14; \
3475 /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
3476 tb -= (tk*11585 + 16384) >> 15; \
3477 /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
3478 tk += (tb*13573 + 8192) >> 14; \
3479 \
3480 tf = -tf; \
3481 \
3482 } \
3483 while (0)
3484
3485#define OD_FDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
3486 us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
3487 ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
3488 ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
3489 /* Embedded 64-point orthonormal Type-II fDCT. */ \
3490 do { \
3491 int uwh; \
3492 int uxh; \
3493 int uyh; \
3494 int uzh; \
3495 int uAh; \
3496 int uBh; \
3497 int uCh; \
3498 int uDh; \
3499 int uEh; \
3500 int uFh; \
3501 int uGh; \
3502 int uHh; \
3503 int uIh; \
3504 int uJh; \
3505 int uKh; \
3506 int uLh; \
3507 int uMh; \
3508 int uNh; \
3509 int uOh; \
3510 int uPh; \
3511 int uQh; \
3512 int uRh; \
3513 int uSh; \
3514 int uTh; \
3515 int uUh; \
3516 int uVh; \
3517 int uWh; \
3518 int uXh; \
3519 int uYh; \
3520 int uZh; \
3521 int u_h; \
3522 int uh_; \
3523 u = u0 - u; \
3524 uh_ = OD_DCT_RSHIFT(u, 1); \
3525 u0 -= uh_; \
3526 u_ += u1; \
3527 u_h = OD_DCT_RSHIFT(u_, 1); \
3528 u1 = u_h - u1; \
3529 uZ = u2 - uZ; \
3530 uZh = OD_DCT_RSHIFT(uZ, 1); \
3531 u2 -= uZh; \
3532 uY += u3; \
3533 uYh = OD_DCT_RSHIFT(uY, 1); \
3534 u3 = uYh - u3; \
3535 uX = u4 - uX; \
3536 uXh = OD_DCT_RSHIFT(uX, 1); \
3537 u4 -= uXh; \
3538 uW += u5; \
3539 uWh = OD_DCT_RSHIFT(uW, 1); \
3540 u5 = uWh - u5; \
3541 uV = u6 - uV; \
3542 uVh = OD_DCT_RSHIFT(uV, 1); \
3543 u6 -= uVh; \
3544 uU += u7; \
3545 uUh = OD_DCT_RSHIFT(uU, 1); \
3546 u7 = uUh - u7; \
3547 uT = u8 - uT; \
3548 uTh = OD_DCT_RSHIFT(uT, 1); \
3549 u8 -= uTh; \
3550 uS += u9; \
3551 uSh = OD_DCT_RSHIFT(uS, 1); \
3552 u9 = uSh - u9; \
3553 uR = ua - uR; \
3554 uRh = OD_DCT_RSHIFT(uR, 1); \
3555 ua -= uRh; \
3556 uQ += ub; \
3557 uQh = OD_DCT_RSHIFT(uQ, 1); \
3558 ub = uQh - ub; \
3559 uP = uc - uP; \
3560 uPh = OD_DCT_RSHIFT(uP, 1); \
3561 uc -= uPh; \
3562 uO += ud; \
3563 uOh = OD_DCT_RSHIFT(uO, 1); \
3564 ud = uOh - ud; \
3565 uN = ue - uN; \
3566 uNh = OD_DCT_RSHIFT(uN, 1); \
3567 ue -= uNh; \
3568 uM += uf; \
3569 uMh = OD_DCT_RSHIFT(uM, 1); \
3570 uf = uMh - uf; \
3571 uL = ug - uL; \
3572 uLh = OD_DCT_RSHIFT(uL, 1); \
3573 ug -= uLh; \
3574 uK += uh; \
3575 uKh = OD_DCT_RSHIFT(uK, 1); \
3576 uh = uKh - uh; \
3577 uJ = ui - uJ; \
3578 uJh = OD_DCT_RSHIFT(uJ, 1); \
3579 ui -= uJh; \
3580 uI += uj; \
3581 uIh = OD_DCT_RSHIFT(uI, 1); \
3582 uj = uIh - uj; \
3583 uH = uk - uH; \
3584 uHh = OD_DCT_RSHIFT(uH, 1); \
3585 uk -= uHh; \
3586 uG += ul; \
3587 uGh = OD_DCT_RSHIFT(uG, 1); \
3588 ul = uGh - ul; \
3589 uF = um - uF; \
3590 uFh = OD_DCT_RSHIFT(uF, 1); \
3591 um -= uFh; \
3592 uE += un; \
3593 uEh = OD_DCT_RSHIFT(uE, 1); \
3594 un = uEh - un; \
3595 uD = uo - uD; \
3596 uDh = OD_DCT_RSHIFT(uD, 1); \
3597 uo -= uDh; \
3598 uC += up; \
3599 uCh = OD_DCT_RSHIFT(uC, 1); \
3600 up = uCh - up; \
3601 uB = uq - uB; \
3602 uBh = OD_DCT_RSHIFT(uB, 1); \
3603 uq -= uBh; \
3604 uA += ur; \
3605 uAh = OD_DCT_RSHIFT(uA, 1); \
3606 ur = uAh - ur; \
3607 uz = us - uz; \
3608 uzh = OD_DCT_RSHIFT(uz, 1); \
3609 us -= uzh; \
3610 uy += ut; \
3611 uyh = OD_DCT_RSHIFT(uy, 1); \
3612 ut = uyh - ut; \
3613 ux = uu - ux; \
3614 uxh = OD_DCT_RSHIFT(ux, 1); \
3615 uu -= uxh; \
3616 uw += uv; \
3617 uwh = OD_DCT_RSHIFT(uw, 1); \
3618 uv = uwh - uv; \
3619 OD_FDCT_32_ASYM(u0, uw, uwh, ug, uM, uMh, u8, uE, uEh, uo, uU, uUh, \
3620 u4, uA, uAh, uk, uQ, uQh, uc, uI, uIh, us, uY, uYh, u2, uy, uyh, \
3621 ui, uO, uOh, ua, uG, uGh, uq, uW, uWh, u6, uC, uCh, um, uS, uSh, \
3622 ue, uK, uKh, uu, u_, u_h); \
3623 OD_FDST_32_ASYM(u, uv, uL, uf, uT, un, uD, u7, uX, ur, uH, ub, uP, uj, \
3624 uz, u3, uZ, ut, uJ, ud, uR, ul, uB, u5, uV, up, uF, u9, uN, uh, ux, u1); \
3625 } \
3626 while (0)
3627
3628#define OD_IDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
3629 us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
3630 ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
3631 ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
3632 /* Embedded 64-point orthonormal Type-II fDCT. */ \
3633 do { \
3634 int u1h; \
3635 int u3h; \
3636 int u5h; \
3637 int u7h; \
3638 int u9h; \
3639 int ubh; \
3640 int udh; \
3641 int ufh; \
3642 int uhh; \
3643 int ujh; \
3644 int ulh; \
3645 int unh; \
3646 int uph; \
3647 int urh; \
3648 int uth; \
3649 int uvh; \
3650 int uxh; \
3651 int uzh; \
3652 int uBh; \
3653 int uDh; \
3654 int uFh; \
3655 int uHh; \
3656 int uJh; \
3657 int uLh; \
3658 int uNh; \
3659 int uPh; \
3660 int uRh; \
3661 int uTh; \
3662 int uVh; \
3663 int uXh; \
3664 int uZh; \
3665 int uh_; \
3666 OD_IDST_32_ASYM(u, uL, uT, uD, uX, uH, uP, uz, uZ, uJ, uR, uB, uV, uF, \
3667 uN, ux, u_, uK, uS, uC, uW, uG, uO, uy, uY, uI, uQ, uA, uU, uE, uM, uw); \
3668 OD_IDCT_32_ASYM(u0, ug, u8, uo, u4, uk, uc, us, u2, ui, ua, uq, u6, um, \
3669 ue, uu, u1, u1h, uh, uhh, u9, u9h, up, uph, u5, u5h, ul, ulh, ud, udh, \
3670 ut, uth, u3, u3h, uj, ujh, ub, ubh, ur, urh, u7, u7h, un, unh, uf, ufh, \
3671 uv, uvh); \
3672 uh_ = OD_DCT_RSHIFT(u, 1); \
3673 u0 += uh_; \
3674 u = u0 - u; \
3675 u_ = u1h - u_; \
3676 u1 -= u_; \
3677 uZh = OD_DCT_RSHIFT(uZ, 1); \
3678 u2 += uZh; \
3679 uZ = u2 - uZ; \
3680 uY = u3h - uY; \
3681 u3 -= uY; \
3682 uXh = OD_DCT_RSHIFT(uX, 1); \
3683 u4 += uXh; \
3684 uX = u4 - uX; \
3685 uW = u5h - uW; \
3686 u5 -= uW; \
3687 uVh = OD_DCT_RSHIFT(uV, 1); \
3688 u6 += uVh; \
3689 uV = u6 - uV; \
3690 uU = u7h - uU; \
3691 u7 -= uU; \
3692 uTh = OD_DCT_RSHIFT(uT, 1); \
3693 u8 += uTh; \
3694 uT = u8 - uT; \
3695 uS = u9h - uS; \
3696 u9 -= uS; \
3697 uRh = OD_DCT_RSHIFT(uR, 1); \
3698 ua += uRh; \
3699 uR = ua - uR; \
3700 uQ = ubh - uQ; \
3701 ub -= uQ; \
3702 uPh = OD_DCT_RSHIFT(uP, 1); \
3703 uc += uPh; \
3704 uP = uc - uP; \
3705 uO = udh - uO; \
3706 ud -= uO; \
3707 uNh = OD_DCT_RSHIFT(uN, 1); \
3708 ue += uNh; \
3709 uN = ue - uN; \
3710 uM = ufh - uM; \
3711 uf -= uM; \
3712 uLh = OD_DCT_RSHIFT(uL, 1); \
3713 ug += uLh; \
3714 uL = ug - uL; \
3715 uK = uhh - uK; \
3716 uh -= uK; \
3717 uJh = OD_DCT_RSHIFT(uJ, 1); \
3718 ui += uJh; \
3719 uJ = ui - uJ; \
3720 uI = ujh - uI; \
3721 uj -= uI; \
3722 uHh = OD_DCT_RSHIFT(uH, 1); \
3723 uk += uHh; \
3724 uH = uk - uH; \
3725 uG = ulh - uG; \
3726 ul -= uG; \
3727 uFh = OD_DCT_RSHIFT(uF, 1); \
3728 um += uFh; \
3729 uF = um - uF; \
3730 uE = unh - uE; \
3731 un -= uE; \
3732 uDh = OD_DCT_RSHIFT(uD, 1); \
3733 uo += uDh; \
3734 uD = uo - uD; \
3735 uC = uph - uC; \
3736 up -= uC; \
3737 uBh = OD_DCT_RSHIFT(uB, 1); \
3738 uq += uBh; \
3739 uB = uq - uB; \
3740 uA = urh - uA; \
3741 ur -= uA; \
3742 uzh = OD_DCT_RSHIFT(uz, 1); \
3743 us += uzh; \
3744 uz = us - uz; \
3745 uy = uth - uy; \
3746 ut -= uy; \
3747 uxh = OD_DCT_RSHIFT(ux, 1); \
3748 uu += uxh; \
3749 ux = uu - ux; \
3750 uw = uvh - uw; \
3751 uv -= uw; \
3752 } while (0)
3753#endif
3754
Nathan E. Egge945176a2017-10-20 21:37:58 -04003755/* 4-point orthonormal Type-II fDCT. */
Monty Montgomery02078a32017-07-11 21:22:29 -04003756void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) {
Nathan E. Egge945176a2017-10-20 21:37:58 -04003757 /* 4 "muls", 8 adds, 2 shifts */
Monty Montgomery02078a32017-07-11 21:22:29 -04003758 int q0;
3759 int q1;
3760 int q2;
3761 int q3;
Nathan E. Egge945176a2017-10-20 21:37:58 -04003762 int u1;
3763 int t0;
3764 int t1;
3765 int t2;
3766 int t3;
Monty Montgomery02078a32017-07-11 21:22:29 -04003767 q0 = x[0*xstride];
Nathan E. Egge945176a2017-10-20 21:37:58 -04003768 q1 = x[1*xstride];
3769 q2 = x[2*xstride];
Monty Montgomery02078a32017-07-11 21:22:29 -04003770 q3 = x[3*xstride];
Nathan E. Egge945176a2017-10-20 21:37:58 -04003771 q3 = q0 - q3;
3772 q0 -= OD_DCT_RSHIFT(q3, 1);
3773 u1 = q1 + q2;
3774 q2 = q1 - q2;
3775 /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
3776 t0 = (q3*8867 + 16384) >> 15;
3777 /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
3778 t1 = (q2*21407 + 16384) >> 15;
3779 /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
3780 t2 = (q3*21407 + 16384) >> 15;
3781 /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
3782 t3 = (q2*8867 + 16384) >> 15;
3783 q0 += OD_DCT_RSHIFT(u1, 1);
3784 q1 = q0 - u1;
3785 q2 = t3 + t2;
3786 q3 = t0 - t1;
3787 y[0] = q0;
3788 y[1] = q2;
3789 y[2] = q1;
3790 y[3] = q3;
Monty Montgomery02078a32017-07-11 21:22:29 -04003791}
3792
Nathan E. Egge945176a2017-10-20 21:37:58 -04003793/* 4-point orthonormal Type-II iDCT. */
Monty Montgomery02078a32017-07-11 21:22:29 -04003794void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) {
Nathan E. Egge945176a2017-10-20 21:37:58 -04003795 /* 4 "muls", 8 adds, 1 shift */
Monty Montgomery02078a32017-07-11 21:22:29 -04003796 int q0;
3797 int q1;
3798 int q2;
3799 int q3;
Nathan E. Egge945176a2017-10-20 21:37:58 -04003800 int q1h;
3801 int u0;
3802 int t0;
3803 int t1;
3804 int t2;
3805 int t3;
Monty Montgomery02078a32017-07-11 21:22:29 -04003806 q0 = y[0];
3807 q2 = y[1];
3808 q1 = y[2];
3809 q3 = y[3];
Nathan E. Egge945176a2017-10-20 21:37:58 -04003810 /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
3811 t0 = (q3*8867 + 16384) >> 15;
3812 /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
3813 t1 = (q2*21407 + 16384) >> 15;
3814 /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
3815 t2 = (q3*21407 + 16384) >> 15;
3816 /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
3817 t3 = (q2*8867 + 16384) >> 15;
3818 q3 = t0 + t1;
3819 q2 = t3 - t2;
3820 q1 = q0 - q1;
3821 q1h = OD_DCT_RSHIFT(q1, 1);
3822 q0 -= q1h;
3823 u0 = q0 + q3;
3824 q3 = q0 - q3;
3825 q2 = q1h - q2;
3826 q1 -= q2;
3827 x[0*xstride] = u0;
Monty Montgomery02078a32017-07-11 21:22:29 -04003828 x[1*xstride] = q1;
3829 x[2*xstride] = q2;
3830 x[3*xstride] = q3;
3831}
Monty Montgomerycf18fe42017-07-11 21:33:25 -04003832
Nathan E. Egge14a9cb12017-08-21 17:35:04 -04003833/* 4-point orthonormal Type-VII fDST. */
Monty Montgomery573cf252017-08-02 05:45:14 -04003834void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride) {
Nathan E. Egge14a9cb12017-08-21 17:35:04 -04003835 /* 11 adds, 5 "muls".*/
Monty Montgomery573cf252017-08-02 05:45:14 -04003836 int q0;
3837 int q1;
3838 int q2;
3839 int q3;
Nathan E. Egge14a9cb12017-08-21 17:35:04 -04003840 int t0;
3841 int t1;
3842 int t2;
3843 int t3;
3844 int t4;
3845 q0 = x[0*xstride];
Nathan Egge5a5e1ad2017-09-12 12:33:48 +00003846 q1 = x[1*xstride];
Nathan E. Egge14a9cb12017-08-21 17:35:04 -04003847 q2 = x[2*xstride];
3848 q3 = x[3*xstride];
3849 t0 = q1 + q3;
3850 t1 = q0 + q1 - q3;
3851 t2 = q0 - q1;
3852 t3 = q2;
3853 t4 = q0 + q3;
3854 /* 7021/16384 ~= 2*Sin[2*Pi/9]/3 ~= 0.428525073124360 */
3855 t0 = (t0*7021 + 8192) >> 14;
3856 /* 18919/32768 ~= 2*Sin[3*Pi/9]/3 ~= 0.577350269189626 */
3857 t1 = (t1*18919 + 16384) >> 15;
3858 /* 21513/32768 ~= 2*Sin[4*Pi/9]/3 ~= 0.656538502008139 */
3859 t2 = (t2*21513 + 16384) >> 15;
3860 /* 18919/32768 ~= 2*Sin[3*Pi/9]/3 ~= 0.577350269189626 */
3861 t3 = (t3*18919 + 16384) >> 15;
3862 /* 467/2048 ~= 2*Sin[1*Pi/9]/3 ~= 0.228013428883779 */
3863 t4 = (t4*467 + 1024) >> 11;
3864 q0 = t0 + t3 + t4;
3865 q1 = t1;
3866 q2 = t0 + t2 - t3;
3867 q3 = t2 + t3 - t4;
3868 y[0] = (od_coeff)q0;
3869 y[1] = (od_coeff)q1;
3870 y[2] = (od_coeff)q2;
3871 y[3] = (od_coeff)q3;
Monty Montgomery573cf252017-08-02 05:45:14 -04003872}
3873
Nathan E. Egge14a9cb12017-08-21 17:35:04 -04003874/* 4-point orthonormal Type-VII iDST. */
Monty Montgomery573cf252017-08-02 05:45:14 -04003875void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]) {
Nathan E. Egge14a9cb12017-08-21 17:35:04 -04003876 /* 11 adds, 5 "muls".*/
Monty Montgomery573cf252017-08-02 05:45:14 -04003877 int q0;
3878 int q1;
3879 int q2;
3880 int q3;
Nathan E. Egge14a9cb12017-08-21 17:35:04 -04003881 int t0;
3882 int t1;
3883 int t2;
3884 int t3;
3885 int t4;
3886 q0 = y[0];
3887 q1 = y[1];
Nathan E. Egge72c99e12017-08-21 17:35:04 -04003888 q2 = y[2];
Nathan E. Egge14a9cb12017-08-21 17:35:04 -04003889 q3 = y[3];
3890 t0 = q0 - q3;
3891 t1 = q0 + q2;
3892 t2 = q0 - q2 + q3;
3893 t3 = q1;
3894 t4 = q2 + q3;
3895 /* 467/2048 ~= 2*Sin[1*Pi/9]/3 ~= 0.228013428883779 */
3896 t0 = (t0*467 + 1024) >> 11;
3897 /* 7021/16384 ~= 2*Sin[2*Pi/9]/3 ~= 0.428525073124360 */
3898 t1 = (t1*7021 + 8192) >> 14;
3899 /* 18919/32768 ~= 2*Sin[3*Pi/9]/3 ~= 0.577350269189626 */
3900 t2 = (t2*18919 + 16384) >> 15;
3901 /* 18919/32768 ~= 2*Sin[3*Pi/9]/3 ~= 0.577350269189626 */
3902 t3 = (t3*18919 + 16384) >> 15;
3903 /* 21513/32768 ~= 2*Sin[4*Pi/9]/3 ~= 0.656538502008139 */
3904 t4 = (t4*21513 + 16384) >> 15;
3905 q0 = t0 + t3 + t4;
3906 q1 = t1 + t3 - t4;
3907 q2 = t2;
3908 q3 = t0 + t1 - t3;
3909 x[0*xstride] = q0;
3910 x[1*xstride] = q1;
3911 x[2*xstride] = q2;
3912 x[3*xstride] = q3;
Monty Montgomery573cf252017-08-02 05:45:14 -04003913}
3914
Monty Montgomerycf18fe42017-07-11 21:33:25 -04003915void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride) {
3916 int r0;
3917 int r1;
3918 int r2;
3919 int r3;
3920 int r4;
3921 int r5;
3922 int r6;
3923 int r7;
3924 r0 = x[0*xstride];
3925 r4 = x[1*xstride];
3926 r2 = x[2*xstride];
3927 r6 = x[3*xstride];
3928 r1 = x[4*xstride];
3929 r5 = x[5*xstride];
3930 r3 = x[6*xstride];
3931 r7 = x[7*xstride];
3932 OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7);
3933 y[0] = (od_coeff)r0;
3934 y[1] = (od_coeff)r1;
3935 y[2] = (od_coeff)r2;
3936 y[3] = (od_coeff)r3;
3937 y[4] = (od_coeff)r4;
3938 y[5] = (od_coeff)r5;
3939 y[6] = (od_coeff)r6;
3940 y[7] = (od_coeff)r7;
3941}
3942
3943void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]) {
3944 int r0;
3945 int r1;
3946 int r2;
3947 int r3;
3948 int r4;
3949 int r5;
3950 int r6;
3951 int r7;
3952 r0 = y[0];
3953 r4 = y[1];
3954 r2 = y[2];
3955 r6 = y[3];
3956 r1 = y[4];
3957 r5 = y[5];
3958 r3 = y[6];
3959 r7 = y[7];
3960 OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7);
3961 x[0*xstride] = (od_coeff)r0;
3962 x[1*xstride] = (od_coeff)r1;
3963 x[2*xstride] = (od_coeff)r2;
3964 x[3*xstride] = (od_coeff)r3;
3965 x[4*xstride] = (od_coeff)r4;
3966 x[5*xstride] = (od_coeff)r5;
3967 x[6*xstride] = (od_coeff)r6;
3968 x[7*xstride] = (od_coeff)r7;
3969}
3970
Nathan E. Eggeefb44bb2017-10-22 05:42:06 -04003971const int OD_DST_8_PERM[8] = { 0, 7, 1, 6, 2, 5, 3, 4 };
3972
3973/* Computes the Polynomial Product Y(z) ≡ X(z)*H(z) modulo (z^8 + 1) using
3974 Nussbaumer's "short" algorithm [1].
3975 The monomial coefficients in Y(z) are exactly the values of an acyclic
3976 convolution of the monomial coefficients of X(z) and H(z).
3977 Since H(z) is fixed, the multiplication terms are constant and precomputed.
3978
3979 [1] Nussbaumer, Henri J. "Fast Fourier Transform and Convolution Algorithms"
3980 Springer-Verlag: Berlin, Heidelberg, New York (1981) pages 76-78. */
3981static void od_poly_prod_8(od_coeff y[8], const od_coeff x[8]) {
3982 /* 21 "muls", 75 adds, 18 shifts */
3983 od_coeff q0;
3984 od_coeff q1;
3985 od_coeff q2;
3986 od_coeff q3;
3987 od_coeff q4;
3988 od_coeff q5;
3989 od_coeff q6;
3990 od_coeff q7;
3991 od_coeff q8;
3992 od_coeff q9;
3993 od_coeff q10;
3994 od_coeff q11;
3995 od_coeff q12;
3996 od_coeff q13;
3997 od_coeff q14;
3998 od_coeff q15;
3999 od_coeff q16;
4000 od_coeff q17;
4001 od_coeff q18;
4002 od_coeff q19;
4003 od_coeff q20;
4004 od_coeff t0;
4005 od_coeff t1;
4006 od_coeff t2;
4007 od_coeff t3;
4008 od_coeff t4;
4009 od_coeff t5;
4010 od_coeff t6;
4011 od_coeff t7;
4012 od_coeff u0;
4013 od_coeff u1;
4014 od_coeff u1h;
4015 od_coeff u2;
4016 od_coeff u2h;
4017 od_coeff u3;
4018 od_coeff u4;
4019 od_coeff u4h;
4020 od_coeff u5;
4021 od_coeff u6;
4022 od_coeff u7;
4023 od_coeff u7h;
4024 od_coeff u8;
4025 od_coeff u9;
4026 od_coeff u10;
4027 od_coeff u11;
4028 od_coeff u12;
4029 od_coeff u13;
4030 od_coeff u14;
4031 od_coeff u15;
4032 od_coeff u16;
4033 od_coeff u17;
4034 od_coeff u18;
4035 od_coeff u19;
4036 od_coeff u20;
4037 od_coeff u21;
4038 od_coeff u22;
4039 od_coeff u23;
4040 od_coeff u24;
4041 od_coeff u25;
4042 od_coeff u26;
4043 od_coeff u27;
4044 t0 = x[0];
4045 t1 = x[1];
4046 t2 = x[2];
4047 t3 = x[3];
4048 t4 = x[4];
4049 t5 = x[5];
4050 t6 = x[6];
4051 t7 = x[7];
4052 /* Stage 0 Butterfly */
4053 u7 = t0 - t7;
4054 u7h = OD_DCT_RSHIFT(u7, 1);
4055 u0 = t0 - u7h;
4056 u2 = t2 - t6;
4057 u2h = OD_DCT_RSHIFT(u2, 1);
4058 u6 = t2 - u2h;
4059 u4 = t4 + t5;
4060 u4h = OD_DCT_RSHIFT(u4, 1);
4061 u5 = t4 - u4h;
4062 u1 = t3 - t1;
4063 u1h = OD_DCT_RSHIFT(u1, 1);
4064 u3 = t3 - u1h;
4065 /* Stage 1 Butterfly */
4066 q0 = u0 + u2h;
4067 q1 = q0 - u2;
4068 q4 = u3 + u4h;
4069 q5 = q4 - u4;
4070 q2 = u7h + u5;
4071 q7 = u7 - q2;
4072 q6 = u1h + u6;
4073 q3 = u1 - q6;
4074 /* Stage 2 Half-Butterfly */
4075 /*The intermediate sums can overflow 16 bits, but all SIMD instruction sets
4076 should be able to compute them without issue (i.e., using PAVGW or
4077 V{R}HADD.S16).*/
4078 q8 = (q0 + q4 + 1) >> 1;
4079 q9 = (q1 + q5) >> 1;
4080 q10 = (q2 + q3 + 1) >> 1;
4081 q11 = (q7 + q6) >> 1;
4082 /* Stage 3 */
4083 q12 = t0 + t3;
4084 q13 = t0;
4085 q14 = t3;
4086 q15 = t5 - t6;
4087 q16 = t6;
4088 q17 = t5;
4089 q18 = ((q6 + ((t0 + t6 + 1) >> 1)) - (q4 + (t5 >> 1))) >> 1;
4090 q19 = ((q7 + ((t5 + t6 + 1) >> 1)) - (q0 + (t3 >> 1))) >> 1;
4091 q20 = (q18 - q19) >> 1;
4092 /* Stage 4 */
4093 q0 = (-5995*q0 + 8192) >> 14;
4094 q1 = (-1373*q1 + 4096) >> 13;
4095 q2 = (22891*q2 + 16384) >> 15;
4096 q3 = (-217*q3 + 512) >> 10;
4097 q4 = (13427*q4 + 16384) >> 15;
4098 q5 = (-11013*q5 + 8192) >> 14;
4099 q6 = (1373*q6 + 1024) >> 11;
4100 q7 = (-14077*q7 + 16384) >> 15;
4101 q8 = (-1437*q8 + 16384) >> 15;
4102 q9 = (27519*q9 + 16384) >> 15;
4103 q10 = (-15947*q10 + 16384) >> 15;
4104 q11 = (-7891*q11 + 16384) >> 15;
4105 q12 = (4897*q12 + 16384) >> 15;
4106 q13 = (-5079*q13 + 8192) >> 14;
4107 q14 = (365*q14 + 16384) >> 15;
4108 q15 = (3325*q15 + 8192) >> 14;
4109 q16 = (-5225*q16 + 8192) >> 14;
4110 q17 = (-1425*q17 + 8192) >> 14;
4111 q18 = (3453*q18 + 16384) >> 15;
4112 q19 = (-8421*q19 + 8192) >> 14;
4113 q20 = (-20295*q20 + 16384) >> 15;
4114 /* Stage 5 */
4115 u0 = q0 + q8;
4116 u1 = q1 + q9;
4117 u2 = q2 + q10;
4118 u3 = q3 + q10;
4119 u4 = q4 + q8;
4120 u5 = q5 + q9;
4121 u6 = q6 + q11;
4122 u7 = q7 + q11;
4123 /* Stage 6 */
4124 u10 = u0 + u1;
4125 u11 = u0 - u1;
4126 u12 = u2 + u7;
4127 u13 = u2 - u7;
4128 u14 = u3 + u6;
4129 u15 = u3 - u6;
4130 u16 = u5 + u4;
4131 u17 = u5 - u4;
4132 /* Stage 7 */
4133 u8 = q19 + q20;
4134 u9 = q19 - q18;
4135 u18 = q12 + u8;
4136 u19 = u18 + q13;
4137 u20 = u18 + q14;
4138 u21 = u9 << 1;
4139 u22 = q15 + u21;
4140 u23 = q16 - u22;
4141 u24 = u22 + q17;
4142 u25 = u8 << 1;
4143 u26 = u25 << 1;
4144 u27 = u25 - u9;
4145 /* Stage 8 */
4146 y[0] = u14 + u16 + u20;
4147 y[1] = u12 - u10 - u25;
4148 y[2] = u9 + u13 - u17;
4149 y[3] = u9 - u10 - u12 - u19;
4150 y[4] = u15 - u11 - u27;
4151 y[5] = u23 - u11 - u15;
4152 y[6] = u13 + u17 - u24 + u26;
4153 y[7] = u16 - u14 + u21 - u25;
4154}
4155
Monty Montgomerycf18fe42017-07-11 21:33:25 -04004156void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride) {
Nathan E. Eggeefb44bb2017-10-22 05:42:06 -04004157 int i;
4158 od_coeff xp[8];
4159 od_coeff yp[8];
4160 for (i = 0; i < 8; i++) xp[i] = x[i*xstride];
4161 od_poly_prod_8(yp, xp);
4162 for (i = 0; i < 8; i++) y[OD_DST_8_PERM[i]] = yp[i];
Monty Montgomerycf18fe42017-07-11 21:33:25 -04004163}
4164
4165void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]) {
Nathan E. Eggeefb44bb2017-10-22 05:42:06 -04004166 int i;
4167 od_coeff xp[8];
4168 od_coeff yp[8];
4169 for (i = 0; i < 8; i++) yp[i] = y[OD_DST_8_PERM[i]];
4170 od_poly_prod_8(xp, yp);
4171 for (i = 0; i < 8; i++) x[i*xstride] = xp[i];
Monty Montgomerycf18fe42017-07-11 21:33:25 -04004172}
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004173
4174void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride) {
4175 int s0;
4176 int s1;
4177 int s2;
4178 int s3;
4179 int s4;
4180 int s5;
4181 int s6;
4182 int s7;
4183 int s8;
4184 int s9;
4185 int sa;
4186 int sb;
4187 int sc;
4188 int sd;
4189 int se;
4190 int sf;
4191 s0 = x[0*xstride];
4192 s8 = x[1*xstride];
4193 s4 = x[2*xstride];
4194 sc = x[3*xstride];
4195 s2 = x[4*xstride];
4196 sa = x[5*xstride];
4197 s6 = x[6*xstride];
4198 se = x[7*xstride];
4199 s1 = x[8*xstride];
4200 s9 = x[9*xstride];
4201 s5 = x[10*xstride];
4202 sd = x[11*xstride];
4203 s3 = x[12*xstride];
4204 sb = x[13*xstride];
4205 s7 = x[14*xstride];
4206 sf = x[15*xstride];
4207 OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
4208 y[0] = (od_coeff)s0;
4209 y[1] = (od_coeff)s1;
4210 y[2] = (od_coeff)s2;
4211 y[3] = (od_coeff)s3;
4212 y[4] = (od_coeff)s4;
4213 y[5] = (od_coeff)s5;
4214 y[6] = (od_coeff)s6;
4215 y[7] = (od_coeff)s7;
4216 y[8] = (od_coeff)s8;
4217 y[9] = (od_coeff)s9;
4218 y[10] = (od_coeff)sa;
4219 y[11] = (od_coeff)sb;
4220 y[12] = (od_coeff)sc;
4221 y[13] = (od_coeff)sd;
4222 y[14] = (od_coeff)se;
4223 y[15] = (od_coeff)sf;
4224}
4225
4226void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]) {
4227 int s0;
4228 int s1;
4229 int s2;
4230 int s3;
4231 int s4;
4232 int s5;
4233 int s6;
4234 int s7;
4235 int s8;
4236 int s9;
4237 int sa;
4238 int sb;
4239 int sc;
4240 int sd;
4241 int se;
4242 int sf;
4243 s0 = y[0];
4244 s8 = y[1];
4245 s4 = y[2];
4246 sc = y[3];
4247 s2 = y[4];
4248 sa = y[5];
4249 s6 = y[6];
4250 se = y[7];
4251 s1 = y[8];
4252 s9 = y[9];
4253 s5 = y[10];
4254 sd = y[11];
4255 s3 = y[12];
4256 sb = y[13];
4257 s7 = y[14];
4258 sf = y[15];
4259 OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
4260 x[0*xstride] = (od_coeff)s0;
4261 x[1*xstride] = (od_coeff)s1;
4262 x[2*xstride] = (od_coeff)s2;
4263 x[3*xstride] = (od_coeff)s3;
4264 x[4*xstride] = (od_coeff)s4;
4265 x[5*xstride] = (od_coeff)s5;
4266 x[6*xstride] = (od_coeff)s6;
4267 x[7*xstride] = (od_coeff)s7;
4268 x[8*xstride] = (od_coeff)s8;
4269 x[9*xstride] = (od_coeff)s9;
4270 x[10*xstride] = (od_coeff)sa;
4271 x[11*xstride] = (od_coeff)sb;
4272 x[12*xstride] = (od_coeff)sc;
4273 x[13*xstride] = (od_coeff)sd;
4274 x[14*xstride] = (od_coeff)se;
4275 x[15*xstride] = (od_coeff)sf;
4276}
4277
4278void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride) {
4279 int s0;
4280 int s1;
4281 int s2;
4282 int s3;
4283 int s4;
4284 int s5;
4285 int s6;
4286 int s7;
4287 int s8;
4288 int s9;
4289 int sa;
4290 int sb;
4291 int sc;
4292 int sd;
4293 int se;
4294 int sf;
4295 s0 = x[15*xstride];
4296 s8 = x[14*xstride];
4297 s4 = x[13*xstride];
4298 sc = x[12*xstride];
4299 s2 = x[11*xstride];
4300 sa = x[10*xstride];
4301 s6 = x[9*xstride];
4302 se = x[8*xstride];
4303 s1 = x[7*xstride];
4304 s9 = x[6*xstride];
4305 s5 = x[5*xstride];
4306 sd = x[4*xstride];
4307 s3 = x[3*xstride];
4308 sb = x[2*xstride];
4309 s7 = x[1*xstride];
4310 sf = x[0*xstride];
4311 OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
4312 y[0] = (od_coeff)sf;
Nathan E. Egge69a16432017-10-18 12:50:28 -04004313 y[1] = (od_coeff)-se;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004314 y[2] = (od_coeff)sd;
Nathan E. Egge69a16432017-10-18 12:50:28 -04004315 y[3] = (od_coeff)-sc;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004316 y[4] = (od_coeff)sb;
Nathan E. Egge69a16432017-10-18 12:50:28 -04004317 y[5] = (od_coeff)-sa;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004318 y[6] = (od_coeff)s9;
Nathan E. Egge69a16432017-10-18 12:50:28 -04004319 y[7] = (od_coeff)-s8;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004320 y[8] = (od_coeff)s7;
Nathan E. Egge69a16432017-10-18 12:50:28 -04004321 y[9] = (od_coeff)-s6;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004322 y[10] = (od_coeff)s5;
Nathan E. Egge69a16432017-10-18 12:50:28 -04004323 y[11] = (od_coeff)-s4;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004324 y[12] = (od_coeff)s3;
Nathan E. Egge69a16432017-10-18 12:50:28 -04004325 y[13] = (od_coeff)-s2;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004326 y[14] = (od_coeff)s1;
Nathan E. Egge69a16432017-10-18 12:50:28 -04004327 y[15] = (od_coeff)-s0;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004328}
4329
4330void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]) {
4331 int s0;
4332 int s1;
4333 int s2;
4334 int s3;
4335 int s4;
4336 int s5;
4337 int s6;
4338 int s7;
4339 int s8;
4340 int s9;
4341 int sa;
4342 int sb;
4343 int sc;
4344 int sd;
4345 int se;
4346 int sf;
Nathan E. Egge69a16432017-10-18 12:50:28 -04004347 s0 = -y[15];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004348 s8 = y[14];
Nathan E. Egge69a16432017-10-18 12:50:28 -04004349 s4 = -y[13];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004350 sc = y[12];
Nathan E. Egge69a16432017-10-18 12:50:28 -04004351 s2 = -y[11];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004352 sa = y[10];
Nathan E. Egge69a16432017-10-18 12:50:28 -04004353 s6 = -y[9];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004354 se = y[8];
Nathan E. Egge69a16432017-10-18 12:50:28 -04004355 s1 = -y[7];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004356 s9 = y[6];
Nathan E. Egge69a16432017-10-18 12:50:28 -04004357 s5 = -y[5];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004358 sd = y[4];
Nathan E. Egge69a16432017-10-18 12:50:28 -04004359 s3 = -y[3];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004360 sb = y[2];
Nathan E. Egge69a16432017-10-18 12:50:28 -04004361 s7 = -y[1];
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04004362 sf = y[0];
4363 OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
4364 x[0*xstride] = (od_coeff)sf;
4365 x[1*xstride] = (od_coeff)se;
4366 x[2*xstride] = (od_coeff)sd;
4367 x[3*xstride] = (od_coeff)sc;
4368 x[4*xstride] = (od_coeff)sb;
4369 x[5*xstride] = (od_coeff)sa;
4370 x[6*xstride] = (od_coeff)s9;
4371 x[7*xstride] = (od_coeff)s8;
4372 x[8*xstride] = (od_coeff)s7;
4373 x[9*xstride] = (od_coeff)s6;
4374 x[10*xstride] = (od_coeff)s5;
4375 x[11*xstride] = (od_coeff)s4;
4376 x[12*xstride] = (od_coeff)s3;
4377 x[13*xstride] = (od_coeff)s2;
4378 x[14*xstride] = (od_coeff)s1;
4379 x[15*xstride] = (od_coeff)s0;
4380}
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04004381
4382void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride) {
4383 /*215 adds, 38 shifts, 87 "muls".*/
4384 int t0;
4385 int t1;
4386 int t2;
4387 int t3;
4388 int t4;
4389 int t5;
4390 int t6;
4391 int t7;
4392 int t8;
4393 int t9;
4394 int ta;
4395 int tb;
4396 int tc;
4397 int td;
4398 int te;
4399 int tf;
4400 int tg;
4401 int th;
4402 int ti;
4403 int tj;
4404 int tk;
4405 int tl;
4406 int tm;
4407 int tn;
4408 int to;
4409 int tp;
4410 int tq;
4411 int tr;
4412 int ts;
4413 int tt;
4414 int tu;
4415 int tv;
4416 t0 = x[0*xstride];
4417 tg = x[1*xstride];
4418 t8 = x[2*xstride];
4419 to = x[3*xstride];
4420 t4 = x[4*xstride];
4421 tk = x[5*xstride];
4422 tc = x[6*xstride];
4423 ts = x[7*xstride];
4424 t2 = x[8*xstride];
4425 ti = x[9*xstride];
4426 ta = x[10*xstride];
4427 tq = x[11*xstride];
4428 t6 = x[12*xstride];
4429 tm = x[13*xstride];
4430 te = x[14*xstride];
4431 tu = x[15*xstride];
4432 t1 = x[16*xstride];
4433 th = x[17*xstride];
4434 t9 = x[18*xstride];
4435 tp = x[19*xstride];
4436 t5 = x[20*xstride];
4437 tl = x[21*xstride];
4438 td = x[22*xstride];
4439 tt = x[23*xstride];
4440 t3 = x[24*xstride];
4441 tj = x[25*xstride];
4442 tb = x[26*xstride];
4443 tr = x[27*xstride];
4444 t7 = x[28*xstride];
4445 tn = x[29*xstride];
4446 tf = x[30*xstride];
4447 tv = x[31*xstride];
4448 OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
4449 t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
4450 y[0] = (od_coeff)t0;
4451 y[1] = (od_coeff)t1;
4452 y[2] = (od_coeff)t2;
4453 y[3] = (od_coeff)t3;
4454 y[4] = (od_coeff)t4;
4455 y[5] = (od_coeff)t5;
4456 y[6] = (od_coeff)t6;
4457 y[7] = (od_coeff)t7;
4458 y[8] = (od_coeff)t8;
4459 y[9] = (od_coeff)t9;
4460 y[10] = (od_coeff)ta;
4461 y[11] = (od_coeff)tb;
4462 y[12] = (od_coeff)tc;
4463 y[13] = (od_coeff)td;
4464 y[14] = (od_coeff)te;
4465 y[15] = (od_coeff)tf;
4466 y[16] = (od_coeff)tg;
4467 y[17] = (od_coeff)th;
4468 y[18] = (od_coeff)ti;
4469 y[19] = (od_coeff)tj;
4470 y[20] = (od_coeff)tk;
4471 y[21] = (od_coeff)tl;
4472 y[22] = (od_coeff)tm;
4473 y[23] = (od_coeff)tn;
4474 y[24] = (od_coeff)to;
4475 y[25] = (od_coeff)tp;
4476 y[26] = (od_coeff)tq;
4477 y[27] = (od_coeff)tr;
4478 y[28] = (od_coeff)ts;
4479 y[29] = (od_coeff)tt;
4480 y[30] = (od_coeff)tu;
4481 y[31] = (od_coeff)tv;
4482}
4483
4484void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]) {
4485 int t0;
4486 int t1;
4487 int t2;
4488 int t3;
4489 int t4;
4490 int t5;
4491 int t6;
4492 int t7;
4493 int t8;
4494 int t9;
4495 int ta;
4496 int tb;
4497 int tc;
4498 int td;
4499 int te;
4500 int tf;
4501 int tg;
4502 int th;
4503 int ti;
4504 int tj;
4505 int tk;
4506 int tl;
4507 int tm;
4508 int tn;
4509 int to;
4510 int tp;
4511 int tq;
4512 int tr;
4513 int ts;
4514 int tt;
4515 int tu;
4516 int tv;
4517 t0 = y[0];
4518 tg = y[1];
4519 t8 = y[2];
4520 to = y[3];
4521 t4 = y[4];
4522 tk = y[5];
4523 tc = y[6];
4524 ts = y[7];
4525 t2 = y[8];
4526 ti = y[9];
4527 ta = y[10];
4528 tq = y[11];
4529 t6 = y[12];
4530 tm = y[13];
4531 te = y[14];
4532 tu = y[15];
4533 t1 = y[16];
4534 th = y[17];
4535 t9 = y[18];
4536 tp = y[19];
4537 t5 = y[20];
4538 tl = y[21];
4539 td = y[22];
4540 tt = y[23];
4541 t3 = y[24];
4542 tj = y[25];
4543 tb = y[26];
4544 tr = y[27];
4545 t7 = y[28];
4546 tn = y[29];
4547 tf = y[30];
4548 tv = y[31];
4549 OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
4550 t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
4551 x[0*xstride] = (od_coeff)t0;
4552 x[1*xstride] = (od_coeff)t1;
4553 x[2*xstride] = (od_coeff)t2;
4554 x[3*xstride] = (od_coeff)t3;
4555 x[4*xstride] = (od_coeff)t4;
4556 x[5*xstride] = (od_coeff)t5;
4557 x[6*xstride] = (od_coeff)t6;
4558 x[7*xstride] = (od_coeff)t7;
4559 x[8*xstride] = (od_coeff)t8;
4560 x[9*xstride] = (od_coeff)t9;
4561 x[10*xstride] = (od_coeff)ta;
4562 x[11*xstride] = (od_coeff)tb;
4563 x[12*xstride] = (od_coeff)tc;
4564 x[13*xstride] = (od_coeff)td;
4565 x[14*xstride] = (od_coeff)te;
4566 x[15*xstride] = (od_coeff)tf;
4567 x[16*xstride] = (od_coeff)tg;
4568 x[17*xstride] = (od_coeff)th;
4569 x[18*xstride] = (od_coeff)ti;
4570 x[19*xstride] = (od_coeff)tj;
4571 x[20*xstride] = (od_coeff)tk;
4572 x[21*xstride] = (od_coeff)tl;
4573 x[22*xstride] = (od_coeff)tm;
4574 x[23*xstride] = (od_coeff)tn;
4575 x[24*xstride] = (od_coeff)to;
4576 x[25*xstride] = (od_coeff)tp;
4577 x[26*xstride] = (od_coeff)tq;
4578 x[27*xstride] = (od_coeff)tr;
4579 x[28*xstride] = (od_coeff)ts;
4580 x[29*xstride] = (od_coeff)tt;
4581 x[30*xstride] = (od_coeff)tu;
4582 x[31*xstride] = (od_coeff)tv;
4583}
Monty Montgomerya4e245a2017-07-22 00:48:31 -04004584
Nathan E. Eggef73e47e2017-10-22 06:41:55 -04004585void od_bin_fdst32(od_coeff y[32], const od_coeff *x, int xstride) {
4586 od_coeff t0;
4587 od_coeff t1;
4588 od_coeff t2;
4589 od_coeff t3;
4590 od_coeff t4;
4591 od_coeff t5;
4592 od_coeff t6;
4593 od_coeff t7;
4594 od_coeff t8;
4595 od_coeff t9;
4596 od_coeff ta;
4597 od_coeff tb;
4598 od_coeff tc;
4599 od_coeff td;
4600 od_coeff te;
4601 od_coeff tf;
4602 od_coeff tg;
4603 od_coeff th;
4604 od_coeff ti;
4605 od_coeff tj;
4606 od_coeff tk;
4607 od_coeff tl;
4608 od_coeff tm;
4609 od_coeff tn;
4610 od_coeff to;
4611 od_coeff tp;
4612 od_coeff tq;
4613 od_coeff tr;
4614 od_coeff ts;
4615 od_coeff tt;
4616 od_coeff tu;
4617 od_coeff tv;
4618 t0 = x[0*xstride];
4619 t1 = x[1*xstride];
4620 t2 = x[2*xstride];
4621 t3 = x[3*xstride];
4622 t4 = x[4*xstride];
4623 t5 = x[5*xstride];
4624 t6 = x[6*xstride];
4625 t7 = x[7*xstride];
4626 t8 = x[8*xstride];
4627 t9 = x[9*xstride];
4628 ta = x[10*xstride];
4629 tb = x[11*xstride];
4630 tc = x[12*xstride];
4631 td = x[13*xstride];
4632 te = x[14*xstride];
4633 tf = x[15*xstride];
4634 tg = x[16*xstride];
4635 th = x[17*xstride];
4636 ti = x[18*xstride];
4637 tj = x[19*xstride];
4638 tk = x[20*xstride];
4639 tl = x[21*xstride];
4640 tm = x[22*xstride];
4641 tn = x[23*xstride];
4642 to = x[24*xstride];
4643 tp = x[25*xstride];
4644 tq = x[26*xstride];
4645 tr = x[27*xstride];
4646 ts = x[28*xstride];
4647 tt = x[29*xstride];
4648 tu = x[30*xstride];
4649 tv = x[31*xstride];
4650 OD_FDST_32(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, te, tf,
4651 tg, th, ti, tj, tk, tl, tm, tn, to, tp, tq, tr, ts, tt, tu, tv);
4652 y[0] = t0;
4653 y[1] = tg;
4654 y[2] = t8;
4655 y[3] = to;
4656 y[4] = t4;
4657 y[5] = tk;
4658 y[6] = tc;
4659 y[7] = ts;
4660 y[8] = t2;
4661 y[9] = ti;
4662 y[10] = ta;
4663 y[11] = tq;
4664 y[12] = t6;
4665 y[13] = tm;
4666 y[14] = te;
4667 y[15] = tu;
4668 y[16] = t1;
4669 y[17] = th;
4670 y[18] = t9;
4671 y[19] = tp;
4672 y[20] = t5;
4673 y[21] = tl;
4674 y[22] = td;
4675 y[23] = tt;
4676 y[24] = t3;
4677 y[25] = tj;
4678 y[26] = tb;
4679 y[27] = tr;
4680 y[28] = t7;
4681 y[29] = tn;
4682 y[30] = tf;
4683 y[31] = tv;
4684}
4685
4686void od_bin_idst32(od_coeff *x, int xstride, const od_coeff y[32]) {
4687 od_coeff t0;
4688 od_coeff t1;
4689 od_coeff t2;
4690 od_coeff t3;
4691 od_coeff t4;
4692 od_coeff t5;
4693 od_coeff t6;
4694 od_coeff t7;
4695 od_coeff t8;
4696 od_coeff t9;
4697 od_coeff ta;
4698 od_coeff tb;
4699 od_coeff tc;
4700 od_coeff td;
4701 od_coeff te;
4702 od_coeff tf;
4703 od_coeff tg;
4704 od_coeff th;
4705 od_coeff ti;
4706 od_coeff tj;
4707 od_coeff tk;
4708 od_coeff tl;
4709 od_coeff tm;
4710 od_coeff tn;
4711 od_coeff to;
4712 od_coeff tp;
4713 od_coeff tq;
4714 od_coeff tr;
4715 od_coeff ts;
4716 od_coeff tt;
4717 od_coeff tu;
4718 od_coeff tv;
4719 t0 = y[0];
4720 tg = y[1];
4721 t8 = y[2];
4722 to = y[3];
4723 t4 = y[4];
4724 tk = y[5];
4725 tc = y[6];
4726 ts = y[7];
4727 t2 = y[8];
4728 ti = y[9];
4729 ta = y[10];
4730 tq = y[11];
4731 t6 = y[12];
4732 tm = y[13];
4733 te = y[14];
4734 tu = y[15];
4735 t1 = y[16];
4736 th = y[17];
4737 t9 = y[18];
4738 tp = y[19];
4739 t5 = y[20];
4740 tl = y[21];
4741 td = y[22];
4742 tt = y[23];
4743 t3 = y[24];
4744 tj = y[25];
4745 tb = y[26];
4746 tr = y[27];
4747 t7 = y[28];
4748 tn = y[29];
4749 tf = y[30];
4750 tv = y[31];
4751 OD_IDST_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
4752 t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
4753 x[0*xstride] = t0;
4754 x[1*xstride] = t1;
4755 x[2*xstride] = t2;
4756 x[3*xstride] = t3;
4757 x[4*xstride] = t4;
4758 x[5*xstride] = t5;
4759 x[6*xstride] = t6;
4760 x[7*xstride] = t7;
4761 x[8*xstride] = t8;
4762 x[9*xstride] = t9;
4763 x[10*xstride] = ta;
4764 x[11*xstride] = tb;
4765 x[12*xstride] = tc;
4766 x[13*xstride] = td;
4767 x[14*xstride] = te;
4768 x[15*xstride] = tf;
4769 x[16*xstride] = tg;
4770 x[17*xstride] = th;
4771 x[18*xstride] = ti;
4772 x[19*xstride] = tj;
4773 x[20*xstride] = tk;
4774 x[21*xstride] = tl;
4775 x[22*xstride] = tm;
4776 x[23*xstride] = tn;
4777 x[24*xstride] = to;
4778 x[25*xstride] = tp;
4779 x[26*xstride] = tq;
4780 x[27*xstride] = tr;
4781 x[28*xstride] = ts;
4782 x[29*xstride] = tt;
4783 x[30*xstride] = tu;
4784 x[31*xstride] = tv;
4785}
4786
Monty Montgomerya4e245a2017-07-22 00:48:31 -04004787#if CONFIG_TX64X64
4788void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride) {
4789 int t0;
4790 int t1;
4791 int t2;
4792 int t3;
4793 int t4;
4794 int t5;
4795 int t6;
4796 int t7;
4797 int t8;
4798 int t9;
4799 int ta;
4800 int tb;
4801 int tc;
4802 int td;
4803 int te;
4804 int tf;
4805 int tg;
4806 int th;
4807 int ti;
4808 int tj;
4809 int tk;
4810 int tl;
4811 int tm;
4812 int tn;
4813 int to;
4814 int tp;
4815 int tq;
4816 int tr;
4817 int ts;
4818 int tt;
4819 int tu;
4820 int tv;
4821 int tw;
4822 int tx;
4823 int ty;
4824 int tz;
4825 int tA;
4826 int tB;
4827 int tC;
4828 int tD;
4829 int tE;
4830 int tF;
4831 int tG;
4832 int tH;
4833 int tI;
4834 int tJ;
4835 int tK;
4836 int tL;
4837 int tM;
4838 int tN;
4839 int tO;
4840 int tP;
4841 int tQ;
4842 int tR;
4843 int tS;
4844 int tT;
4845 int tU;
4846 int tV;
4847 int tW;
4848 int tX;
4849 int tY;
4850 int tZ;
4851 int t_;
4852 int t;
4853 t0 = x[0*xstride];
4854 tw = x[1*xstride];
4855 tg = x[2*xstride];
4856 tM = x[3*xstride];
4857 t8 = x[4*xstride];
4858 tE = x[5*xstride];
4859 to = x[6*xstride];
4860 tU = x[7*xstride];
4861 t4 = x[8*xstride];
4862 tA = x[9*xstride];
4863 tk = x[10*xstride];
4864 tQ = x[11*xstride];
4865 tc = x[12*xstride];
4866 tI = x[13*xstride];
4867 ts = x[14*xstride];
4868 tY = x[15*xstride];
4869 t2 = x[16*xstride];
4870 ty = x[17*xstride];
4871 ti = x[18*xstride];
4872 tO = x[19*xstride];
4873 ta = x[20*xstride];
4874 tG = x[21*xstride];
4875 tq = x[22*xstride];
4876 tW = x[23*xstride];
4877 t6 = x[24*xstride];
4878 tC = x[25*xstride];
4879 tm = x[26*xstride];
4880 tS = x[27*xstride];
4881 te = x[28*xstride];
4882 tK = x[29*xstride];
4883 tu = x[30*xstride];
4884 t_ = x[31*xstride];
4885 t1 = x[32*xstride];
4886 tx = x[33*xstride];
4887 th = x[34*xstride];
4888 tN = x[35*xstride];
4889 t9 = x[36*xstride];
4890 tF = x[37*xstride];
4891 tp = x[38*xstride];
4892 tV = x[39*xstride];
4893 t5 = x[40*xstride];
4894 tB = x[41*xstride];
4895 tl = x[42*xstride];
4896 tR = x[43*xstride];
4897 td = x[44*xstride];
4898 tJ = x[45*xstride];
4899 tt = x[46*xstride];
4900 tZ = x[47*xstride];
4901 t3 = x[48*xstride];
4902 tz = x[49*xstride];
4903 tj = x[50*xstride];
4904 tP = x[51*xstride];
4905 tb = x[52*xstride];
4906 tH = x[53*xstride];
4907 tr = x[54*xstride];
4908 tX = x[55*xstride];
4909 t7 = x[56*xstride];
4910 tD = x[57*xstride];
4911 tn = x[58*xstride];
4912 tT = x[59*xstride];
4913 tf = x[60*xstride];
4914 tL = x[61*xstride];
4915 tv = x[62*xstride];
4916 t = x[63*xstride];
4917 OD_FDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
4918 t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
4919 th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
4920 tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
4921 y[0] = (od_coeff)t0;
4922 y[1] = (od_coeff)t1;
4923 y[2] = (od_coeff)t2;
4924 y[3] = (od_coeff)t3;
4925 y[4] = (od_coeff)t4;
4926 y[5] = (od_coeff)t5;
4927 y[6] = (od_coeff)t6;
4928 y[7] = (od_coeff)t7;
4929 y[8] = (od_coeff)t8;
4930 y[9] = (od_coeff)t9;
4931 y[10] = (od_coeff)ta;
4932 y[11] = (od_coeff)tb;
4933 y[12] = (od_coeff)tc;
4934 y[13] = (od_coeff)td;
4935 y[14] = (od_coeff)te;
4936 y[15] = (od_coeff)tf;
4937 y[16] = (od_coeff)tg;
4938 y[17] = (od_coeff)th;
4939 y[18] = (od_coeff)ti;
4940 y[19] = (od_coeff)tj;
4941 y[20] = (od_coeff)tk;
4942 y[21] = (od_coeff)tl;
4943 y[22] = (od_coeff)tm;
4944 y[23] = (od_coeff)tn;
4945 y[24] = (od_coeff)to;
4946 y[25] = (od_coeff)tp;
4947 y[26] = (od_coeff)tq;
4948 y[27] = (od_coeff)tr;
4949 y[28] = (od_coeff)ts;
4950 y[29] = (od_coeff)tt;
4951 y[30] = (od_coeff)tu;
4952 y[31] = (od_coeff)tv;
4953 y[32] = (od_coeff)tw;
4954 y[33] = (od_coeff)tx;
4955 y[34] = (od_coeff)ty;
4956 y[35] = (od_coeff)tz;
4957 y[36] = (od_coeff)tA;
4958 y[37] = (od_coeff)tB;
4959 y[38] = (od_coeff)tC;
4960 y[39] = (od_coeff)tD;
4961 y[40] = (od_coeff)tE;
4962 y[41] = (od_coeff)tF;
4963 y[41] = (od_coeff)tF;
4964 y[42] = (od_coeff)tG;
4965 y[43] = (od_coeff)tH;
4966 y[44] = (od_coeff)tI;
4967 y[45] = (od_coeff)tJ;
4968 y[46] = (od_coeff)tK;
4969 y[47] = (od_coeff)tL;
4970 y[48] = (od_coeff)tM;
4971 y[49] = (od_coeff)tN;
4972 y[50] = (od_coeff)tO;
4973 y[51] = (od_coeff)tP;
4974 y[52] = (od_coeff)tQ;
4975 y[53] = (od_coeff)tR;
4976 y[54] = (od_coeff)tS;
4977 y[55] = (od_coeff)tT;
4978 y[56] = (od_coeff)tU;
4979 y[57] = (od_coeff)tV;
4980 y[58] = (od_coeff)tW;
4981 y[59] = (od_coeff)tX;
4982 y[60] = (od_coeff)tY;
4983 y[61] = (od_coeff)tZ;
4984 y[62] = (od_coeff)t_;
4985 y[63] = (od_coeff)t;
4986}
4987
4988void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]) {
4989 int t0;
4990 int t1;
4991 int t2;
4992 int t3;
4993 int t4;
4994 int t5;
4995 int t6;
4996 int t7;
4997 int t8;
4998 int t9;
4999 int ta;
5000 int tb;
5001 int tc;
5002 int td;
5003 int te;
5004 int tf;
5005 int tg;
5006 int th;
5007 int ti;
5008 int tj;
5009 int tk;
5010 int tl;
5011 int tm;
5012 int tn;
5013 int to;
5014 int tp;
5015 int tq;
5016 int tr;
5017 int ts;
5018 int tt;
5019 int tu;
5020 int tv;
5021 int tw;
5022 int tx;
5023 int ty;
5024 int tz;
5025 int tA;
5026 int tB;
5027 int tC;
5028 int tD;
5029 int tE;
5030 int tF;
5031 int tG;
5032 int tH;
5033 int tI;
5034 int tJ;
5035 int tK;
5036 int tL;
5037 int tM;
5038 int tN;
5039 int tO;
5040 int tP;
5041 int tQ;
5042 int tR;
5043 int tS;
5044 int tT;
5045 int tU;
5046 int tV;
5047 int tW;
5048 int tX;
5049 int tY;
5050 int tZ;
5051 int t_;
5052 int t;
5053 t0 = y[0];
5054 tw = y[1];
5055 tg = y[2];
5056 tM = y[3];
5057 t8 = y[4];
5058 tE = y[5];
5059 to = y[6];
5060 tU = y[7];
5061 t4 = y[8];
5062 tA = y[9];
5063 tk = y[10];
5064 tQ = y[11];
5065 tc = y[12];
5066 tI = y[13];
5067 ts = y[14];
5068 tY = y[15];
5069 t2 = y[16];
5070 ty = y[17];
5071 ti = y[18];
5072 tO = y[19];
5073 ta = y[20];
5074 tG = y[21];
5075 tq = y[22];
5076 tW = y[23];
5077 t6 = y[24];
5078 tC = y[25];
5079 tm = y[26];
5080 tS = y[27];
5081 te = y[28];
5082 tK = y[29];
5083 tu = y[30];
5084 t_ = y[31];
5085 t1 = y[32];
5086 tx = y[33];
5087 th = y[34];
5088 tN = y[35];
5089 t9 = y[36];
5090 tF = y[37];
5091 tp = y[38];
5092 tV = y[39];
5093 t5 = y[40];
5094 tB = y[41];
5095 tl = y[42];
5096 tR = y[43];
5097 td = y[44];
5098 tJ = y[45];
5099 tt = y[46];
5100 tZ = y[47];
5101 t3 = y[48];
5102 tz = y[49];
5103 tj = y[50];
5104 tP = y[51];
5105 tb = y[52];
5106 tH = y[53];
5107 tr = y[54];
5108 tX = y[55];
5109 t7 = y[56];
5110 tD = y[57];
5111 tn = y[58];
5112 tT = y[59];
5113 tf = y[60];
5114 tL = y[61];
5115 tv = y[62];
5116 t = y[63];
5117 OD_IDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
5118 t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
5119 th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
5120 tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
5121 x[0*xstride] = (od_coeff)t0;
5122 x[1*xstride] = (od_coeff)t1;
5123 x[2*xstride] = (od_coeff)t2;
5124 x[3*xstride] = (od_coeff)t3;
5125 x[4*xstride] = (od_coeff)t4;
5126 x[5*xstride] = (od_coeff)t5;
5127 x[6*xstride] = (od_coeff)t6;
5128 x[7*xstride] = (od_coeff)t7;
5129 x[8*xstride] = (od_coeff)t8;
5130 x[9*xstride] = (od_coeff)t9;
5131 x[10*xstride] = (od_coeff)ta;
5132 x[11*xstride] = (od_coeff)tb;
5133 x[12*xstride] = (od_coeff)tc;
5134 x[13*xstride] = (od_coeff)td;
5135 x[14*xstride] = (od_coeff)te;
5136 x[15*xstride] = (od_coeff)tf;
5137 x[16*xstride] = (od_coeff)tg;
5138 x[17*xstride] = (od_coeff)th;
5139 x[18*xstride] = (od_coeff)ti;
5140 x[19*xstride] = (od_coeff)tj;
5141 x[20*xstride] = (od_coeff)tk;
5142 x[21*xstride] = (od_coeff)tl;
5143 x[22*xstride] = (od_coeff)tm;
5144 x[23*xstride] = (od_coeff)tn;
5145 x[24*xstride] = (od_coeff)to;
5146 x[25*xstride] = (od_coeff)tp;
5147 x[26*xstride] = (od_coeff)tq;
5148 x[27*xstride] = (od_coeff)tr;
5149 x[28*xstride] = (od_coeff)ts;
5150 x[29*xstride] = (od_coeff)tt;
5151 x[30*xstride] = (od_coeff)tu;
5152 x[31*xstride] = (od_coeff)tv;
5153 x[32*xstride] = (od_coeff)tw;
5154 x[33*xstride] = (od_coeff)tx;
5155 x[34*xstride] = (od_coeff)ty;
5156 x[35*xstride] = (od_coeff)tz;
5157 x[36*xstride] = (od_coeff)tA;
5158 x[37*xstride] = (od_coeff)tB;
5159 x[38*xstride] = (od_coeff)tC;
5160 x[39*xstride] = (od_coeff)tD;
5161 x[40*xstride] = (od_coeff)tE;
5162 x[41*xstride] = (od_coeff)tF;
5163 x[41*xstride] = (od_coeff)tF;
5164 x[42*xstride] = (od_coeff)tG;
5165 x[43*xstride] = (od_coeff)tH;
5166 x[44*xstride] = (od_coeff)tI;
5167 x[45*xstride] = (od_coeff)tJ;
5168 x[46*xstride] = (od_coeff)tK;
5169 x[47*xstride] = (od_coeff)tL;
5170 x[48*xstride] = (od_coeff)tM;
5171 x[49*xstride] = (od_coeff)tN;
5172 x[50*xstride] = (od_coeff)tO;
5173 x[51*xstride] = (od_coeff)tP;
5174 x[52*xstride] = (od_coeff)tQ;
5175 x[53*xstride] = (od_coeff)tR;
5176 x[54*xstride] = (od_coeff)tS;
5177 x[55*xstride] = (od_coeff)tT;
5178 x[56*xstride] = (od_coeff)tU;
5179 x[57*xstride] = (od_coeff)tV;
5180 x[58*xstride] = (od_coeff)tW;
5181 x[59*xstride] = (od_coeff)tX;
5182 x[60*xstride] = (od_coeff)tY;
5183 x[61*xstride] = (od_coeff)tZ;
5184 x[62*xstride] = (od_coeff)t_;
5185 x[63*xstride] = (od_coeff)t;
5186}
5187#endif
Nathan E. Egge5e6bda82017-09-16 10:13:51 -04005188
5189void daala_fdct4(const tran_low_t *input, tran_low_t *output) {
5190 int i;
5191 od_coeff x[4];
5192 od_coeff y[4];
5193 for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
5194 od_bin_fdct4(y, x, 1);
5195 for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
5196}
5197
5198void daala_idct4(const tran_low_t *input, tran_low_t *output) {
5199 int i;
5200 od_coeff x[4];
5201 od_coeff y[4];
5202 for (i = 0; i < 4; i++) y[i] = input[i];
5203 od_bin_idct4(x, 1, y);
5204 for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i];
5205}
Nathan E. Egge1aefb5e2017-09-16 11:28:41 -04005206
5207void daala_fdst4(const tran_low_t *input, tran_low_t *output) {
5208 int i;
5209 od_coeff x[4];
5210 od_coeff y[4];
5211 for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
5212 od_bin_fdst4(y, x, 1);
5213 for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
5214}
5215
5216void daala_idst4(const tran_low_t *input, tran_low_t *output) {
5217 int i;
5218 od_coeff x[4];
5219 od_coeff y[4];
5220 for (i = 0; i < 4; i++) y[i] = input[i];
5221 od_bin_idst4(x, 1, y);
5222 for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i];
5223}
Nathan E. Egge75bfeb82017-09-16 20:41:24 -04005224
Nathan E. Egge31f24ee2017-09-18 11:25:26 -04005225void daala_idtx4(const tran_low_t *input, tran_low_t *output) {
5226 int i;
5227 for (i = 0; i < 4; i++) output[i] = input[i];
5228}
5229
Nathan E. Egge75bfeb82017-09-16 20:41:24 -04005230void daala_fdct8(const tran_low_t *input, tran_low_t *output) {
5231 int i;
5232 od_coeff x[8];
5233 od_coeff y[8];
5234 for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
5235 od_bin_fdct8(y, x, 1);
5236 for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
5237}
5238
5239void daala_idct8(const tran_low_t *input, tran_low_t *output) {
5240 int i;
5241 od_coeff x[8];
5242 od_coeff y[8];
5243 for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i];
5244 od_bin_idct8(x, 1, y);
5245 for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i];
5246}
Nathan E. Egge8a873db2017-09-16 20:55:20 -04005247
5248void daala_fdst8(const tran_low_t *input, tran_low_t *output) {
5249 int i;
5250 od_coeff x[8];
5251 od_coeff y[8];
5252 for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
5253 od_bin_fdst8(y, x, 1);
5254 for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
5255}
5256
5257void daala_idst8(const tran_low_t *input, tran_low_t *output) {
5258 int i;
5259 od_coeff x[8];
5260 od_coeff y[8];
5261 for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i];
5262 od_bin_idst8(x, 1, y);
5263 for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i];
5264}
Nathan E. Eggec5c1e562017-09-16 22:18:18 -04005265
Nathan E. Egge3f45fb32017-09-18 11:34:48 -04005266void daala_idtx8(const tran_low_t *input, tran_low_t *output) {
5267 int i;
5268 for (i = 0; i < 8; i++) output[i] = input[i];
5269}
5270
Nathan E. Eggec5c1e562017-09-16 22:18:18 -04005271void daala_fdct16(const tran_low_t *input, tran_low_t *output) {
5272 int i;
5273 od_coeff x[16];
5274 od_coeff y[16];
5275 for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i];
5276 od_bin_fdct16(y, x, 1);
5277 for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i];
5278}
5279
5280void daala_idct16(const tran_low_t *input, tran_low_t *output) {
5281 int i;
5282 od_coeff x[16];
5283 od_coeff y[16];
5284 for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i];
5285 od_bin_idct16(x, 1, y);
5286 for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i];
5287}
Nathan E. Eggecbcff062017-09-16 22:32:19 -04005288
5289void daala_fdst16(const tran_low_t *input, tran_low_t *output) {
5290 int i;
5291 od_coeff x[16];
5292 od_coeff y[16];
5293 for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i];
5294 od_bin_fdst16(y, x, 1);
5295 for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i];
5296}
5297
5298void daala_idst16(const tran_low_t *input, tran_low_t *output) {
5299 int i;
5300 od_coeff x[16];
5301 od_coeff y[16];
5302 for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i];
5303 od_bin_idst16(x, 1, y);
5304 for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i];
5305}
Nathan E. Eggedfd1a922017-09-16 23:35:30 -04005306
Nathan E. Egge74e7fd02017-09-18 11:40:31 -04005307void daala_idtx16(const tran_low_t *input, tran_low_t *output) {
5308 int i;
5309 for (i = 0; i < 16; i++) output[i] = input[i];
5310}
5311
Nathan E. Eggedfd1a922017-09-16 23:35:30 -04005312void daala_fdct32(const tran_low_t *input, tran_low_t *output) {
5313 int i;
5314 od_coeff x[32];
5315 od_coeff y[32];
5316 for (i = 0; i < 32; i++) x[i] = (od_coeff)input[i];
5317 od_bin_fdct32(y, x, 1);
5318 for (i = 0; i < 32; i++) output[i] = (tran_low_t)y[i];
5319}
5320
5321void daala_idct32(const tran_low_t *input, tran_low_t *output) {
5322 int i;
5323 od_coeff x[32];
5324 od_coeff y[32];
5325 for (i = 0; i < 32; i++) y[i] = (od_coeff)input[i];
5326 od_bin_idct32(x, 1, y);
5327 for (i = 0; i < 32; i++) output[i] = (tran_low_t)x[i];
5328}
Nathan E. Egged8661142017-09-16 23:57:51 -04005329
Nathan E. Eggef6d3ba62017-09-18 15:40:08 -04005330void daala_fdst32(const tran_low_t *input, tran_low_t *output) {
5331 int i;
Nathan E. Eggef73e47e2017-10-22 06:41:55 -04005332 od_coeff x[32];
5333 od_coeff y[32];
5334 for (i = 0; i < 32; i++) x[i] = (od_coeff)input[i];
5335 od_bin_fdst32(y, x, 1);
5336 for (i = 0; i < 32; i++) output[i] = (tran_low_t)y[i];
Nathan E. Eggef6d3ba62017-09-18 15:40:08 -04005337}
5338
Nathan E. Eggef6d3ba62017-09-18 15:40:08 -04005339void daala_idst32(const tran_low_t *input, tran_low_t *output) {
5340 int i;
Nathan E. Eggef73e47e2017-10-22 06:41:55 -04005341 od_coeff x[32];
5342 od_coeff y[32];
5343 for (i = 0; i < 32; i++) y[i] = input[i];
5344 od_bin_idst32(x, 1, y);
5345 for (i = 0; i < 32; i++) output[i] = (tran_low_t)x[i];
Nathan E. Eggef6d3ba62017-09-18 15:40:08 -04005346}
5347
Nathan E. Egge4c77fc02017-09-18 11:47:52 -04005348void daala_idtx32(const tran_low_t *input, tran_low_t *output) {
5349 int i;
5350 for (i = 0; i < 32; i++) output[i] = input[i];
5351}
5352
Nathan E. Egged8661142017-09-16 23:57:51 -04005353#if CONFIG_TX64X64
5354void daala_fdct64(const tran_low_t *input, tran_low_t *output) {
5355 int i;
5356 od_coeff x[64];
5357 od_coeff y[64];
5358 for (i = 0; i < 64; i++) x[i] = (od_coeff)input[i];
5359 od_bin_fdct64(y, x, 1);
5360 for (i = 0; i < 64; i++) output[i] = (tran_low_t)y[i];
5361}
5362
5363void daala_idct64(const tran_low_t *input, tran_low_t *output) {
5364 int i;
5365 od_coeff x[64];
5366 od_coeff y[64];
5367 for (i = 0; i < 64; i++) y[i] = (od_coeff)input[i];
5368 od_bin_idct64(x, 1, y);
5369 for (i = 0; i < 64; i++) output[i] = (tran_low_t)x[i];
5370}
Nathan E. Egge01b1d912017-09-18 12:02:22 -04005371
Nathan E. Egge2496a852017-09-18 15:59:54 -04005372/* Preserve the "half-right" transform behavior. */
5373void daala_fdst64(const tran_low_t *input, tran_low_t *output) {
5374 int i;
5375 tran_low_t inputhalf[32];
5376 for (i = 0; i < 32; ++i) {
5377 output[32 + i] = input[i];
5378 }
5379 for (i = 0; i < 32; ++i) {
5380 inputhalf[i] = input[i + 32];
5381 }
5382 daala_fdct32(inputhalf, output);
5383}
5384
5385/* Preserve the "half-right" transform behavior. */
5386void daala_idst64(const tran_low_t *input, tran_low_t *output) {
5387 int i;
5388 tran_low_t inputhalf[32];
5389 for (i = 0; i < 32; ++i) {
5390 inputhalf[i] = input[i];
5391 }
5392 for (i = 0; i < 32; ++i) {
5393 output[i] = input[32 + i];
5394 }
5395 daala_idct32(inputhalf, output + 32);
5396}
5397
Nathan E. Egge01b1d912017-09-18 12:02:22 -04005398void daala_idtx64(const tran_low_t *input, tran_low_t *output) {
5399 int i;
5400 for (i = 0; i < 64; i++) output[i] = input[i];
5401}
Nathan E. Egged8661142017-09-16 23:57:51 -04005402#endif