Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 1 | #include "av1/common/daala_tx.h" |
| 2 | #include "av1/common/odintrin.h" |
| 3 | |
| 4 | /* clang-format off */ |
| 5 | |
| 6 | # define OD_DCT_RSHIFT(_a, _b) OD_UNBIASED_RSHIFT32(_a, _b) |
| 7 | |
| 8 | /* TODO: Daala DCT overflow checks need to be ported as a later test */ |
| 9 | # if defined(OD_DCT_CHECK_OVERFLOW) |
| 10 | # else |
| 11 | # define OD_DCT_OVERFLOW_CHECK(val, scale, offset, idx) |
| 12 | # endif |
| 13 | |
Monty Montgomery | cf18fe4 | 2017-07-11 21:33:25 -0400 | [diff] [blame] | 14 | #define OD_FDCT_2(p0, p1) \ |
| 15 | /* Embedded 2-point orthonormal Type-II fDCT. */ \ |
| 16 | do { \ |
| 17 | /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 18 | OD_DCT_OVERFLOW_CHECK(p1, 13573, 16384, 100); \ |
| 19 | p0 -= (p1*13573 + 16384) >> 15; \ |
| 20 | /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \ |
| 21 | OD_DCT_OVERFLOW_CHECK(p0, 5793, 4096, 101); \ |
| 22 | p1 += (p0*5793 + 4096) >> 13; \ |
| 23 | /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 24 | OD_DCT_OVERFLOW_CHECK(p1, 3393, 4096, 102); \ |
| 25 | p0 -= (p1*3393 + 4096) >> 13; \ |
| 26 | } \ |
| 27 | while (0) |
| 28 | |
| 29 | #define OD_IDCT_2(p0, p1) \ |
| 30 | /* Embedded 2-point orthonormal Type-II iDCT. */ \ |
| 31 | do { \ |
| 32 | /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 33 | p0 += (p1*3393 + 4096) >> 13; \ |
| 34 | /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \ |
| 35 | p1 -= (p0*5793 + 4096) >> 13; \ |
| 36 | /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 37 | p0 += (p1*13573 + 16384) >> 15; \ |
| 38 | } \ |
| 39 | while (0) |
| 40 | |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 41 | #define OD_FDCT_2_ASYM(p0, p1, p1h) \ |
| 42 | /* Embedded 2-point asymmetric Type-II fDCT. */ \ |
| 43 | do { \ |
| 44 | p0 += p1h; \ |
| 45 | p1 = p0 - p1; \ |
| 46 | } \ |
| 47 | while (0) |
| 48 | |
| 49 | #define OD_IDCT_2_ASYM(p0, p1, p1h) \ |
| 50 | /* Embedded 2-point asymmetric Type-II iDCT. */ \ |
| 51 | do { \ |
| 52 | p1 = p0 - p1; \ |
| 53 | p1h = OD_DCT_RSHIFT(p1, 1); \ |
| 54 | p0 -= p1h; \ |
| 55 | } \ |
| 56 | while (0) |
| 57 | |
Monty Montgomery | cf18fe4 | 2017-07-11 21:33:25 -0400 | [diff] [blame] | 58 | #define OD_FDST_2(p0, p1) \ |
| 59 | /* Embedded 2-point orthonormal Type-IV fDST. */ \ |
| 60 | do { \ |
| 61 | /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 62 | OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 103); \ |
| 63 | p0 -= (p1*10947 + 8192) >> 14; \ |
| 64 | /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 65 | OD_DCT_OVERFLOW_CHECK(p0, 473, 256, 104); \ |
| 66 | p1 += (p0*473 + 256) >> 9; \ |
| 67 | /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 68 | OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 105); \ |
| 69 | p0 -= (p1*10947 + 8192) >> 14; \ |
| 70 | } \ |
| 71 | while (0) |
| 72 | |
| 73 | #define OD_IDST_2(p0, p1) \ |
| 74 | /* Embedded 2-point orthonormal Type-IV iDST. */ \ |
| 75 | do { \ |
| 76 | /* 10947/16384 ~= Tan[3*Pi/16]) ~= 0.668178637919299 */ \ |
| 77 | p0 += (p1*10947 + 8192) >> 14; \ |
| 78 | /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 79 | p1 -= (p0*473 + 256) >> 9; \ |
| 80 | /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 81 | p0 += (p1*10947 + 8192) >> 14; \ |
| 82 | } \ |
| 83 | while (0) |
| 84 | |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 85 | #define OD_FDST_2_ASYM(p0, p1) \ |
| 86 | /* Embedded 2-point asymmetric Type-IV fDST. */ \ |
| 87 | do { \ |
| 88 | /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \ |
| 89 | OD_DCT_OVERFLOW_CHECK(p1, 11507, 8192, 187); \ |
| 90 | p0 -= (p1*11507 + 8192) >> 14; \ |
| 91 | /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \ |
| 92 | OD_DCT_OVERFLOW_CHECK(p0, 669, 512, 188); \ |
| 93 | p1 += (p0*669 + 512) >> 10; \ |
| 94 | /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \ |
| 95 | OD_DCT_OVERFLOW_CHECK(p1, 4573, 2048, 189); \ |
| 96 | p0 -= (p1*4573 + 2048) >> 12; \ |
| 97 | } \ |
| 98 | while (0) |
| 99 | |
| 100 | #define OD_IDST_2_ASYM(p0, p1) \ |
| 101 | /* Embedded 2-point asymmetric Type-IV iDST. */ \ |
| 102 | do { \ |
| 103 | /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \ |
| 104 | p0 += (p1*4573 + 2048) >> 12; \ |
| 105 | /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \ |
| 106 | p1 -= (p0*669 + 512) >> 10; \ |
| 107 | /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \ |
| 108 | p0 += (p1*11507 + 8192) >> 14; \ |
| 109 | } \ |
| 110 | while (0) |
| 111 | |
| 112 | #define OD_FDCT_4(q0, q2, q1, q3) \ |
| 113 | /* Embedded 4-point orthonormal Type-II fDCT. */ \ |
| 114 | do { \ |
| 115 | int q2h; \ |
| 116 | int q3h; \ |
| 117 | q3 = q0 - q3; \ |
| 118 | q3h = OD_DCT_RSHIFT(q3, 1); \ |
| 119 | q0 -= q3h; \ |
| 120 | q2 += q1; \ |
| 121 | q2h = OD_DCT_RSHIFT(q2, 1); \ |
| 122 | q1 = q2h - q1; \ |
| 123 | OD_FDCT_2_ASYM(q0, q2, q2h); \ |
| 124 | OD_FDST_2_ASYM(q3, q1); \ |
| 125 | } \ |
| 126 | while (0) |
| 127 | |
| 128 | #define OD_IDCT_4(q0, q2, q1, q3) \ |
| 129 | /* Embedded 4-point orthonormal Type-II iDCT. */ \ |
| 130 | do { \ |
| 131 | int q1h; \ |
| 132 | int q3h; \ |
| 133 | OD_IDST_2_ASYM(q3, q2); \ |
| 134 | OD_IDCT_2_ASYM(q0, q1, q1h); \ |
| 135 | q3h = OD_DCT_RSHIFT(q3, 1); \ |
| 136 | q0 += q3h; \ |
| 137 | q3 = q0 - q3; \ |
| 138 | q2 = q1h - q2; \ |
| 139 | q1 -= q2; \ |
| 140 | } \ |
| 141 | while (0) |
| 142 | |
Monty Montgomery | cf18fe4 | 2017-07-11 21:33:25 -0400 | [diff] [blame] | 143 | #define OD_FDCT_4_ASYM(q0, q2, q2h, q1, q3, q3h) \ |
| 144 | /* Embedded 4-point asymmetric Type-II fDCT. */ \ |
| 145 | do { \ |
| 146 | q0 += q3h; \ |
| 147 | q3 = q0 - q3; \ |
| 148 | q1 = q2h - q1; \ |
| 149 | q2 = q1 - q2; \ |
| 150 | OD_FDCT_2(q0, q2); \ |
| 151 | OD_FDST_2(q3, q1); \ |
| 152 | } \ |
| 153 | while (0) |
| 154 | |
| 155 | #define OD_IDCT_4_ASYM(q0, q2, q1, q1h, q3, q3h) \ |
| 156 | /* Embedded 4-point asymmetric Type-II iDCT. */ \ |
| 157 | do { \ |
| 158 | OD_IDST_2(q3, q2); \ |
| 159 | OD_IDCT_2(q0, q1); \ |
| 160 | q1 = q2 - q1; \ |
| 161 | q1h = OD_DCT_RSHIFT(q1, 1); \ |
| 162 | q2 = q1h - q2; \ |
| 163 | q3 = q0 - q3; \ |
| 164 | q3h = OD_DCT_RSHIFT(q3, 1); \ |
| 165 | q0 -= q3h; \ |
| 166 | } \ |
| 167 | while (0) |
| 168 | |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 169 | #define OD_FDST_4(q0, q2, q1, q3) \ |
| 170 | /* Embedded 4-point orthonormal Type-IV fDST. */ \ |
| 171 | do { \ |
| 172 | int q0h; \ |
| 173 | int q1h; \ |
| 174 | /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 175 | OD_DCT_OVERFLOW_CHECK(q1, 13573, 16384, 190); \ |
| 176 | q2 += (q1*13573 + 16384) >> 15; \ |
| 177 | /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ |
| 178 | OD_DCT_OVERFLOW_CHECK(q2, 5793, 4096, 191); \ |
| 179 | q1 -= (q2*5793 + 4096) >> 13; \ |
| 180 | /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 181 | OD_DCT_OVERFLOW_CHECK(q1, 3393, 4096, 192); \ |
| 182 | q2 += (q1*3393 + 4096) >> 13; \ |
| 183 | q0 += q2; \ |
| 184 | q0h = OD_DCT_RSHIFT(q0, 1); \ |
| 185 | q2 = q0h - q2; \ |
| 186 | q1 += q3; \ |
| 187 | q1h = OD_DCT_RSHIFT(q1, 1); \ |
| 188 | q3 -= q1h; \ |
| 189 | /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~= |
| 190 | 0.524455699240090 */ \ |
| 191 | OD_DCT_OVERFLOW_CHECK(q1, 537, 512, 193); \ |
| 192 | q2 -= (q1*537 + 512) >> 10; \ |
| 193 | /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \ |
| 194 | OD_DCT_OVERFLOW_CHECK(q2, 1609, 1024, 194); \ |
| 195 | q1 += (q2*1609 + 1024) >> 11; \ |
| 196 | /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~= |
| 197 | 0.223847182092655 */ \ |
| 198 | OD_DCT_OVERFLOW_CHECK(q1, 7335, 16384, 195); \ |
| 199 | q2 += (q1*7335 + 16384) >> 15; \ |
| 200 | /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~= |
| 201 | 0.6215036383171189 */ \ |
| 202 | OD_DCT_OVERFLOW_CHECK(q0, 5091, 4096, 196); \ |
| 203 | q3 += (q0*5091 + 4096) >> 13; \ |
| 204 | /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \ |
| 205 | OD_DCT_OVERFLOW_CHECK(q3, 5681, 2048, 197); \ |
| 206 | q0 -= (q3*5681 + 2048) >> 12; \ |
| 207 | /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~= |
| 208 | 0.52204745462729 */ \ |
| 209 | OD_DCT_OVERFLOW_CHECK(q0, 4277, 4096, 198); \ |
| 210 | q3 += (q0*4277 + 4096) >> 13; \ |
| 211 | } \ |
| 212 | while (0) |
| 213 | |
| 214 | #define OD_IDST_4(q0, q2, q1, q3) \ |
| 215 | /* Embedded 4-point orthonormal Type-IV iDST. */ \ |
| 216 | do { \ |
| 217 | int q0h; \ |
| 218 | int q2h; \ |
| 219 | /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~= |
| 220 | 0.52204745462729 */ \ |
| 221 | q3 -= (q0*4277 + 4096) >> 13; \ |
| 222 | /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \ |
| 223 | q0 += (q3*5681 + 2048) >> 12; \ |
| 224 | /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~= |
| 225 | 0.6215036383171189 */ \ |
| 226 | q3 -= (q0*5091 + 4096) >> 13; \ |
| 227 | /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~= |
| 228 | 0.223847182092655 */ \ |
| 229 | q1 -= (q2*7335 + 16384) >> 15; \ |
| 230 | /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \ |
| 231 | q2 -= (q1*1609 + 1024) >> 11; \ |
| 232 | /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~= |
| 233 | 0.524455699240090 */ \ |
| 234 | q1 += (q2*537 + 512) >> 10; \ |
| 235 | q2h = OD_DCT_RSHIFT(q2, 1); \ |
| 236 | q3 += q2h; \ |
| 237 | q2 -= q3; \ |
| 238 | q0h = OD_DCT_RSHIFT(q0, 1); \ |
| 239 | q1 = q0h - q1; \ |
| 240 | q0 -= q1; \ |
| 241 | /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 242 | q1 -= (q2*3393 + 4096) >> 13; \ |
| 243 | /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ |
| 244 | q2 += (q1*5793 + 4096) >> 13; \ |
| 245 | /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 246 | q1 -= (q2*13573 + 16384) >> 15; \ |
| 247 | } \ |
| 248 | while (0) |
| 249 | |
Monty Montgomery | cf18fe4 | 2017-07-11 21:33:25 -0400 | [diff] [blame] | 250 | #define OD_FDST_4_ASYM(t0, t0h, t2, t1, t3) \ |
| 251 | /* Embedded 4-point asymmetric Type-IV fDST. */ \ |
| 252 | do { \ |
| 253 | /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 254 | OD_DCT_OVERFLOW_CHECK(t1, 7489, 4096, 106); \ |
| 255 | t2 -= (t1*7489 + 4096) >> 13; \ |
| 256 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 257 | OD_DCT_OVERFLOW_CHECK(t1, 11585, 8192, 107); \ |
| 258 | t1 += (t2*11585 + 8192) >> 14; \ |
| 259 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 260 | OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 108); \ |
| 261 | t2 += (t1*19195 + 16384) >> 15; \ |
| 262 | t3 += OD_DCT_RSHIFT(t2, 1); \ |
| 263 | t2 -= t3; \ |
| 264 | t1 = t0h - t1; \ |
| 265 | t0 -= t1; \ |
| 266 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 267 | OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 109); \ |
| 268 | t3 += (t0*6723 + 4096) >> 13; \ |
| 269 | /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ |
| 270 | OD_DCT_OVERFLOW_CHECK(t3, 8035, 4096, 110); \ |
| 271 | t0 -= (t3*8035 + 4096) >> 13; \ |
| 272 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 273 | OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 111); \ |
| 274 | t3 += (t0*6723 + 4096) >> 13; \ |
| 275 | /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ |
| 276 | OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 112); \ |
| 277 | t2 += (t1*8757 + 8192) >> 14; \ |
| 278 | /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ |
| 279 | OD_DCT_OVERFLOW_CHECK(t2, 6811, 4096, 113); \ |
| 280 | t1 -= (t2*6811 + 4096) >> 13; \ |
| 281 | /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ |
| 282 | OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 114); \ |
| 283 | t2 += (t1*8757 + 8192) >> 14; \ |
| 284 | } \ |
| 285 | while (0) |
| 286 | |
| 287 | #define OD_IDST_4_ASYM(t0, t0h, t2, t1, t3) \ |
| 288 | /* Embedded 4-point asymmetric Type-IV iDST. */ \ |
| 289 | do { \ |
| 290 | /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ |
| 291 | t1 -= (t2*8757 + 8192) >> 14; \ |
| 292 | /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ |
| 293 | t2 += (t1*6811 + 4096) >> 13; \ |
| 294 | /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ |
| 295 | t1 -= (t2*8757 + 8192) >> 14; \ |
| 296 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 297 | t3 -= (t0*6723 + 4096) >> 13; \ |
| 298 | /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ |
| 299 | t0 += (t3*8035 + 4096) >> 13; \ |
| 300 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 301 | t3 -= (t0*6723 + 4096) >> 13; \ |
| 302 | t0 += t2; \ |
| 303 | t0h = OD_DCT_RSHIFT(t0, 1); \ |
| 304 | t2 = t0h - t2; \ |
| 305 | t1 += t3; \ |
| 306 | t3 -= OD_DCT_RSHIFT(t1, 1); \ |
| 307 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 308 | t1 -= (t2*19195 + 16384) >> 15; \ |
| 309 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 310 | t2 -= (t1*11585 + 8192) >> 14; \ |
| 311 | /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 312 | t1 += (t2*7489 + 4096) >> 13; \ |
| 313 | } \ |
| 314 | while (0) |
| 315 | |
| 316 | #define OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \ |
| 317 | /* Embedded 8-point orthonormal Type-II fDCT. */ \ |
| 318 | do { \ |
| 319 | int r4h; \ |
| 320 | int r5h; \ |
| 321 | int r6h; \ |
| 322 | int r7h; \ |
| 323 | r7 = r0 - r7; \ |
| 324 | r7h = OD_DCT_RSHIFT(r7, 1); \ |
| 325 | r0 -= r7h; \ |
| 326 | r6 += r1; \ |
| 327 | r6h = OD_DCT_RSHIFT(r6, 1); \ |
| 328 | r1 = r6h - r1; \ |
| 329 | r5 = r2 - r5; \ |
| 330 | r5h = OD_DCT_RSHIFT(r5, 1); \ |
| 331 | r2 -= r5h; \ |
| 332 | r4 += r3; \ |
| 333 | r4h = OD_DCT_RSHIFT(r4, 1); \ |
| 334 | r3 = r4h - r3; \ |
| 335 | OD_FDCT_4_ASYM(r0, r4, r4h, r2, r6, r6h); \ |
| 336 | OD_FDST_4_ASYM(r7, r7h, r3, r5, r1); \ |
| 337 | } \ |
| 338 | while (0) |
| 339 | |
| 340 | #define OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \ |
| 341 | /* Embedded 8-point orthonormal Type-II iDCT. */ \ |
| 342 | do { \ |
| 343 | int r1h; \ |
| 344 | int r3h; \ |
| 345 | int r5h; \ |
| 346 | int r7h; \ |
| 347 | OD_IDST_4_ASYM(r7, r7h, r5, r6, r4); \ |
| 348 | OD_IDCT_4_ASYM(r0, r2, r1, r1h, r3, r3h); \ |
| 349 | r0 += r7h; \ |
| 350 | r7 = r0 - r7; \ |
| 351 | r6 = r1h - r6; \ |
| 352 | r1 -= r6; \ |
| 353 | r5h = OD_DCT_RSHIFT(r5, 1); \ |
| 354 | r2 += r5h; \ |
| 355 | r5 = r2 - r5; \ |
| 356 | r4 = r3h - r4; \ |
| 357 | r3 -= r4; \ |
| 358 | } \ |
| 359 | while (0) |
| 360 | |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 361 | #define OD_FDCT_8_ASYM(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \ |
| 362 | /* Embedded 8-point asymmetric Type-II fDCT. */ \ |
| 363 | do { \ |
| 364 | r0 += r7h; \ |
| 365 | r7 = r0 - r7; \ |
| 366 | r1 = r6h - r1; \ |
| 367 | r6 -= r1; \ |
| 368 | r2 += r5h; \ |
| 369 | r5 = r2 - r5; \ |
| 370 | r3 = r4h - r3; \ |
| 371 | r4 -= r3; \ |
| 372 | OD_FDCT_4(r0, r4, r2, r6); \ |
| 373 | OD_FDST_4(r7, r3, r5, r1); \ |
| 374 | } \ |
| 375 | while (0) |
| 376 | |
| 377 | #define OD_IDCT_8_ASYM(r0, r4, r2, r6, r1, r1h, r5, r5h, r3, r3h, r7, r7h) \ |
| 378 | /* Embedded 8-point asymmetric Type-II iDCT. */ \ |
| 379 | do { \ |
| 380 | OD_IDST_4(r7, r5, r6, r4); \ |
| 381 | OD_IDCT_4(r0, r2, r1, r3); \ |
| 382 | r7 = r0 - r7; \ |
| 383 | r7h = OD_DCT_RSHIFT(r7, 1); \ |
| 384 | r0 -= r7h; \ |
| 385 | r1 += r6; \ |
| 386 | r1h = OD_DCT_RSHIFT(r1, 1); \ |
| 387 | r6 = r1h - r6; \ |
| 388 | r5 = r2 - r5; \ |
| 389 | r5h = OD_DCT_RSHIFT(r5, 1); \ |
| 390 | r2 -= r5h; \ |
| 391 | r3 += r4; \ |
| 392 | r3h = OD_DCT_RSHIFT(r3, 1); \ |
| 393 | r4 = r3h - r4; \ |
| 394 | } \ |
| 395 | while (0) |
| 396 | |
| 397 | #define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \ |
Monty Montgomery | cf18fe4 | 2017-07-11 21:33:25 -0400 | [diff] [blame] | 398 | /* Embedded 8-point orthonormal Type-IV fDST. */ \ |
| 399 | do { \ |
| 400 | int t0h; \ |
| 401 | int t2h; \ |
| 402 | int t5h; \ |
| 403 | int t7h; \ |
| 404 | /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 405 | OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 115); \ |
| 406 | t6 -= (t1*13573 + 16384) >> 15; \ |
| 407 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ |
| 408 | OD_DCT_OVERFLOW_CHECK(t6, 11585, 8192, 116); \ |
| 409 | t1 += (t6*11585 + 8192) >> 14; \ |
| 410 | /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 411 | OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 117); \ |
| 412 | t6 -= (t1*13573 + 16384) >> 15; \ |
| 413 | /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 414 | OD_DCT_OVERFLOW_CHECK(t2, 21895, 16384, 118); \ |
| 415 | t5 -= (t2*21895 + 16384) >> 15; \ |
| 416 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 417 | OD_DCT_OVERFLOW_CHECK(t5, 15137, 8192, 119); \ |
| 418 | t2 += (t5*15137 + 8192) >> 14; \ |
| 419 | /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 420 | OD_DCT_OVERFLOW_CHECK(t2, 10947, 8192, 120); \ |
| 421 | t5 -= (t2*10947 + 8192) >> 14; \ |
| 422 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 423 | OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 121); \ |
| 424 | t4 -= (t3*3259 + 8192) >> 14; \ |
| 425 | /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ |
| 426 | OD_DCT_OVERFLOW_CHECK(t4, 3135, 4096, 122); \ |
| 427 | t3 += (t4*3135 + 4096) >> 13; \ |
| 428 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 429 | OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 123); \ |
| 430 | t4 -= (t3*3259 + 8192) >> 14; \ |
| 431 | t7 += t1; \ |
| 432 | t7h = OD_DCT_RSHIFT(t7, 1); \ |
| 433 | t1 -= t7h; \ |
| 434 | t2 = t3 - t2; \ |
| 435 | t2h = OD_DCT_RSHIFT(t2, 1); \ |
| 436 | t3 -= t2h; \ |
| 437 | t0 -= t6; \ |
| 438 | t0h = OD_DCT_RSHIFT(t0, 1); \ |
| 439 | t6 += t0h; \ |
| 440 | t5 = t4 - t5; \ |
| 441 | t5h = OD_DCT_RSHIFT(t5, 1); \ |
| 442 | t4 -= t5h; \ |
| 443 | t1 += t5h; \ |
| 444 | t5 = t1 - t5; \ |
| 445 | t4 += t0h; \ |
| 446 | t0 -= t4; \ |
| 447 | t6 -= t2h; \ |
| 448 | t2 += t6; \ |
| 449 | t3 -= t7h; \ |
| 450 | t7 += t3; \ |
| 451 | /* TODO: Can we move this into another operation */ \ |
| 452 | t7 = -t7; \ |
| 453 | /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \ |
| 454 | OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 124); \ |
| 455 | t0 -= (t7*7425 + 4096) >> 13; \ |
| 456 | /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \ |
| 457 | OD_DCT_OVERFLOW_CHECK(t0, 8153, 4096, 125); \ |
| 458 | t7 += (t0*8153 + 4096) >> 13; \ |
| 459 | /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \ |
| 460 | OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 126); \ |
| 461 | t0 -= (t7*7425 + 4096) >> 13; \ |
| 462 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \ |
| 463 | OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 127); \ |
| 464 | t6 -= (t1*4861 + 16384) >> 15; \ |
| 465 | /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \ |
| 466 | OD_DCT_OVERFLOW_CHECK(t6, 1189, 2048, 128); \ |
| 467 | t1 += (t6*1189 + 2048) >> 12; \ |
| 468 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \ |
| 469 | OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 129); \ |
| 470 | t6 -= (t1*4861 + 16384) >> 15; \ |
| 471 | /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \ |
| 472 | OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 130); \ |
| 473 | t2 -= (t5*2455 + 2048) >> 12; \ |
| 474 | /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \ |
| 475 | OD_DCT_OVERFLOW_CHECK(t2, 7225, 4096, 131); \ |
| 476 | t5 += (t2*7225 + 4096) >> 13; \ |
| 477 | /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \ |
| 478 | OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 132); \ |
| 479 | t2 -= (t5*2455 + 2048) >> 12; \ |
| 480 | /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \ |
| 481 | OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 133); \ |
| 482 | t4 -= (t3*11725 + 16384) >> 15; \ |
| 483 | /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \ |
| 484 | OD_DCT_OVERFLOW_CHECK(t4, 5197, 4096, 134); \ |
| 485 | t3 += (t4*5197 + 4096) >> 13; \ |
| 486 | /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \ |
| 487 | OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 135); \ |
| 488 | t4 -= (t3*11725 + 16384) >> 15; \ |
| 489 | } \ |
| 490 | while (0) |
| 491 | |
| 492 | #define OD_IDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \ |
| 493 | /* Embedded 8-point orthonormal Type-IV iDST. */ \ |
| 494 | do { \ |
| 495 | int t0h; \ |
| 496 | int t2h; \ |
| 497 | int t5h_; \ |
| 498 | int t7h_; \ |
| 499 | /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \ |
| 500 | t1 += (t6*11725 + 16384) >> 15; \ |
| 501 | /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \ |
| 502 | t6 -= (t1*5197 + 4096) >> 13; \ |
| 503 | /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \ |
| 504 | t1 += (t6*11725 + 16384) >> 15; \ |
| 505 | /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \ |
| 506 | t2 += (t5*2455 + 2048) >> 12; \ |
| 507 | /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \ |
| 508 | t5 -= (t2*7225 + 4096) >> 13; \ |
| 509 | /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \ |
| 510 | t2 += (t5*2455 + 2048) >> 12; \ |
| 511 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \ |
| 512 | t3 += (t4*4861 + 16384) >> 15; \ |
| 513 | /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \ |
| 514 | t4 -= (t3*1189 + 2048) >> 12; \ |
| 515 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \ |
| 516 | t3 += (t4*4861 + 16384) >> 15; \ |
| 517 | /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \ |
| 518 | t0 += (t7*7425 + 4096) >> 13; \ |
| 519 | /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \ |
| 520 | t7 -= (t0*8153 + 4096) >> 13; \ |
| 521 | /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \ |
| 522 | t0 += (t7*7425 + 4096) >> 13; \ |
| 523 | /* TODO: Can we move this into another operation */ \ |
| 524 | t7 = -t7; \ |
| 525 | t7 -= t6; \ |
| 526 | t7h_ = OD_DCT_RSHIFT(t7, 1); \ |
| 527 | t6 += t7h_; \ |
| 528 | t2 -= t3; \ |
| 529 | t2h = OD_DCT_RSHIFT(t2, 1); \ |
| 530 | t3 += t2h; \ |
| 531 | t0 += t1; \ |
| 532 | t0h = OD_DCT_RSHIFT(t0, 1); \ |
| 533 | t1 -= t0h; \ |
| 534 | t5 = t4 - t5; \ |
| 535 | t5h_ = OD_DCT_RSHIFT(t5, 1); \ |
| 536 | t4 -= t5h_; \ |
| 537 | t1 += t5h_; \ |
| 538 | t5 = t1 - t5; \ |
| 539 | t3 -= t0h; \ |
| 540 | t0 += t3; \ |
| 541 | t6 += t2h; \ |
| 542 | t2 = t6 - t2; \ |
| 543 | t4 += t7h_; \ |
| 544 | t7 -= t4; \ |
| 545 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 546 | t1 += (t6*3259 + 8192) >> 14; \ |
| 547 | /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ |
| 548 | t6 -= (t1*3135 + 4096) >> 13; \ |
| 549 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 550 | t1 += (t6*3259 + 8192) >> 14; \ |
| 551 | /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 552 | t5 += (t2*10947 + 8192) >> 14; \ |
| 553 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 554 | t2 -= (t5*15137 + 8192) >> 14; \ |
| 555 | /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 556 | t5 += (t2*21895 + 16384) >> 15; \ |
| 557 | /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 558 | t3 += (t4*13573 + 16384) >> 15; \ |
| 559 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ |
| 560 | t4 -= (t3*11585 + 8192) >> 14; \ |
| 561 | /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 562 | t3 += (t4*13573 + 16384) >> 15; \ |
| 563 | } \ |
| 564 | while (0) |
| 565 | |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 566 | /* Rewrite this so that t0h can be passed in. */ |
| 567 | #define OD_FDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \ |
| 568 | /* Embedded 8-point asymmetric Type-IV fDST. */ \ |
| 569 | do { \ |
| 570 | int t0h; \ |
| 571 | int t2h; \ |
| 572 | int t5h; \ |
| 573 | int t7h; \ |
| 574 | /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \ |
| 575 | OD_DCT_OVERFLOW_CHECK(t1, 1035, 1024, 199); \ |
| 576 | t6 += (t1*1035 + 1024) >> 11; \ |
| 577 | /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \ |
| 578 | OD_DCT_OVERFLOW_CHECK(t6, 3675, 2048, 200); \ |
| 579 | t1 -= (t6*3675 + 2048) >> 12; \ |
| 580 | /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \ |
| 581 | OD_DCT_OVERFLOW_CHECK(t1, 851, 4096, 201); \ |
| 582 | t6 -= (t1*851 + 4096) >> 13; \ |
| 583 | /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \ |
| 584 | OD_DCT_OVERFLOW_CHECK(t2, 4379, 4096, 202); \ |
| 585 | t5 += (t2*4379 + 4096) >> 13; \ |
| 586 | /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \ |
| 587 | OD_DCT_OVERFLOW_CHECK(t5, 10217, 4096, 203); \ |
| 588 | t2 -= (t5*10217 + 4096) >> 13; \ |
| 589 | /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \ |
| 590 | OD_DCT_OVERFLOW_CHECK(t2, 4379, 8192, 204); \ |
| 591 | t5 += (t2*4379 + 8192) >> 14; \ |
| 592 | /* 12905/16384 ~= (Sqrt[2] - Cos[3*Pi/32])/(2*Sin[3*Pi/32]) */ \ |
| 593 | OD_DCT_OVERFLOW_CHECK(t3, 12905, 8192, 205); \ |
| 594 | t4 += (t3*12905 + 8192) >> 14; \ |
| 595 | /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \ |
| 596 | OD_DCT_OVERFLOW_CHECK(t4, 3363, 4096, 206); \ |
| 597 | t3 -= (t4*3363 + 4096) >> 13; \ |
| 598 | /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \ |
| 599 | OD_DCT_OVERFLOW_CHECK(t3, 3525, 2048, 207); \ |
| 600 | t4 -= (t3*3525 + 2048) >> 12; \ |
| 601 | /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \ |
| 602 | OD_DCT_OVERFLOW_CHECK(t0, 5417, 4096, 208); \ |
| 603 | t7 += (t0*5417 + 4096) >> 13; \ |
| 604 | /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \ |
| 605 | OD_DCT_OVERFLOW_CHECK(t7, 5765, 2048, 209); \ |
| 606 | t0 -= (t7*5765 + 2048) >> 12; \ |
| 607 | /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \ |
| 608 | OD_DCT_OVERFLOW_CHECK(t0, 2507, 2048, 210); \ |
| 609 | t7 += (t0*2507 + 2048) >> 12; \ |
| 610 | t0 += t1; \ |
| 611 | t0h = OD_DCT_RSHIFT(t0, 1); \ |
| 612 | t1 -= t0h; \ |
| 613 | t2 -= t3; \ |
| 614 | t2h = OD_DCT_RSHIFT(t2, 1); \ |
| 615 | t3 += t2h; \ |
| 616 | t5 -= t4; \ |
| 617 | t5h = OD_DCT_RSHIFT(t5, 1); \ |
| 618 | t4 += t5h; \ |
| 619 | t7 += t6; \ |
| 620 | t7h = OD_DCT_RSHIFT(t7, 1); \ |
| 621 | t6 = t7h - t6; \ |
| 622 | t4 = t7h - t4; \ |
| 623 | t7 -= t4; \ |
| 624 | t1 += t5h; \ |
| 625 | t5 = t1 - t5; \ |
| 626 | t6 += t2h; \ |
| 627 | t2 = t6 - t2; \ |
| 628 | t3 -= t0h; \ |
| 629 | t0 += t3; \ |
| 630 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 631 | OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 211); \ |
| 632 | t1 += (t6*3259 + 8192) >> 14; \ |
| 633 | /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ |
| 634 | OD_DCT_OVERFLOW_CHECK(t1, 3135, 4096, 212); \ |
| 635 | t6 -= (t1*3135 + 4096) >> 13; \ |
| 636 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 637 | OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 213); \ |
| 638 | t1 += (t6*3259 + 8192) >> 14; \ |
| 639 | /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 640 | OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 214); \ |
| 641 | t5 += (t2*2737 + 2048) >> 12; \ |
| 642 | /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 643 | OD_DCT_OVERFLOW_CHECK(t5, 473, 256, 215); \ |
| 644 | t2 -= (t5*473 + 256) >> 9; \ |
| 645 | /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 646 | OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 216); \ |
| 647 | t5 += (t2*2737 + 2048) >> 12; \ |
| 648 | /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 649 | OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 217); \ |
| 650 | t3 += (t4*3393 + 4096) >> 13; \ |
| 651 | /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ |
| 652 | OD_DCT_OVERFLOW_CHECK(t3, 5793, 4096, 218); \ |
| 653 | t4 -= (t3*5793 + 4096) >> 13; \ |
| 654 | /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 655 | OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 219); \ |
| 656 | t3 += (t4*3393 + 4096) >> 13; \ |
| 657 | } \ |
| 658 | while (0) |
| 659 | |
| 660 | #define OD_IDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \ |
| 661 | /* Embedded 8-point asymmetric Type-IV iDST. */ \ |
| 662 | do { \ |
| 663 | int t0h; \ |
| 664 | int t2h; \ |
| 665 | int t5h__; \ |
| 666 | int t7h__; \ |
| 667 | /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 668 | t6 -= (t1*3393 + 4096) >> 13; \ |
| 669 | /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ |
| 670 | t1 += (t6*5793 + 4096) >> 13; \ |
| 671 | /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 672 | t6 -= (t1*3393 + 4096) >> 13; \ |
| 673 | /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 674 | t5 -= (t2*2737 + 2048) >> 12; \ |
| 675 | /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 676 | t2 += (t5*473 + 256) >> 9; \ |
| 677 | /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 678 | t5 -= (t2*2737 + 2048) >> 12; \ |
| 679 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 680 | t4 -= (t3*3259 + 8192) >> 14; \ |
| 681 | /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ |
| 682 | t3 += (t4*3135 + 4096) >> 13; \ |
| 683 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 684 | t4 -= (t3*3259 + 8192) >> 14; \ |
| 685 | t0 -= t6; \ |
| 686 | t0h = OD_DCT_RSHIFT(t0, 1); \ |
| 687 | t6 += t0h; \ |
| 688 | t2 = t3 - t2; \ |
| 689 | t2h = OD_DCT_RSHIFT(t2, 1); \ |
| 690 | t3 -= t2h; \ |
| 691 | t5 = t4 - t5; \ |
| 692 | t5h__ = OD_DCT_RSHIFT(t5, 1); \ |
| 693 | t4 -= t5h__; \ |
| 694 | t7 += t1; \ |
| 695 | t7h__ = OD_DCT_RSHIFT(t7, 1); \ |
| 696 | t1 = t7h__ - t1; \ |
| 697 | t3 = t7h__ - t3; \ |
| 698 | t7 -= t3; \ |
| 699 | t1 -= t5h__; \ |
| 700 | t5 += t1; \ |
| 701 | t6 -= t2h; \ |
| 702 | t2 += t6; \ |
| 703 | t4 += t0h; \ |
| 704 | t0 -= t4; \ |
| 705 | /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \ |
| 706 | t7 -= (t0*2507 + 2048) >> 12; \ |
| 707 | /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \ |
| 708 | t0 += (t7*5765 + 2048) >> 12; \ |
| 709 | /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \ |
| 710 | t7 -= (t0*5417 + 4096) >> 13; \ |
| 711 | /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \ |
| 712 | t1 += (t6*3525 + 2048) >> 12; \ |
| 713 | /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \ |
| 714 | t6 += (t1*3363 + 4096) >> 13; \ |
| 715 | /* 12905/16384 ~= (1/Sqrt[2] - Cos[3*Pi/32]/1)/Sin[3*Pi/32] */ \ |
| 716 | t1 -= (t6*12905 + 8192) >> 14; \ |
| 717 | /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \ |
| 718 | t5 -= (t2*4379 + 8192) >> 14; \ |
| 719 | /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \ |
| 720 | t2 += (t5*10217 + 4096) >> 13; \ |
| 721 | /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \ |
| 722 | t5 -= (t2*4379 + 4096) >> 13; \ |
| 723 | /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \ |
| 724 | t3 += (t4*851 + 4096) >> 13; \ |
| 725 | /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \ |
| 726 | t4 += (t3*3675 + 2048) >> 12; \ |
| 727 | /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \ |
| 728 | t3 -= (t4*1035 + 1024) >> 11; \ |
| 729 | } \ |
| 730 | while (0) |
| 731 | |
| 732 | #define OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \ |
| 733 | s1, s9, s5, sd, s3, sb, s7, sf) \ |
| 734 | /* Embedded 16-point orthonormal Type-II fDCT. */ \ |
| 735 | do { \ |
| 736 | int s8h; \ |
| 737 | int sah; \ |
| 738 | int sch; \ |
| 739 | int seh; \ |
| 740 | int sfh; \ |
| 741 | sf = s0 - sf; \ |
| 742 | sfh = OD_DCT_RSHIFT(sf, 1); \ |
| 743 | s0 -= sfh; \ |
| 744 | se += s1; \ |
| 745 | seh = OD_DCT_RSHIFT(se, 1); \ |
| 746 | s1 = seh - s1; \ |
| 747 | sd = s2 - sd; \ |
| 748 | s2 -= OD_DCT_RSHIFT(sd, 1); \ |
| 749 | sc += s3; \ |
| 750 | sch = OD_DCT_RSHIFT(sc, 1); \ |
| 751 | s3 = sch - s3; \ |
| 752 | sb = s4 - sb; \ |
| 753 | s4 -= OD_DCT_RSHIFT(sb, 1); \ |
| 754 | sa += s5; \ |
| 755 | sah = OD_DCT_RSHIFT(sa, 1); \ |
| 756 | s5 = sah - s5; \ |
| 757 | s9 = s6 - s9; \ |
| 758 | s6 -= OD_DCT_RSHIFT(s9, 1); \ |
| 759 | s8 += s7; \ |
| 760 | s8h = OD_DCT_RSHIFT(s8, 1); \ |
| 761 | s7 = s8h - s7; \ |
| 762 | OD_FDCT_8_ASYM(s0, s8, s8h, s4, sc, sch, s2, sa, sah, s6, se, seh); \ |
| 763 | OD_FDST_8_ASYM(sf, s7, sb, s3, sd, s5, s9, s1); \ |
| 764 | } \ |
| 765 | while (0) |
| 766 | |
| 767 | #define OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \ |
| 768 | s1, s9, s5, sd, s3, sb, s7, sf) \ |
| 769 | /* Embedded 16-point orthonormal Type-II iDCT. */ \ |
| 770 | do { \ |
| 771 | int s1h; \ |
| 772 | int s3h; \ |
| 773 | int s5h; \ |
| 774 | int s7h; \ |
| 775 | int sfh; \ |
| 776 | OD_IDST_8_ASYM(sf, sb, sd, s9, se, sa, sc, s8); \ |
| 777 | OD_IDCT_8_ASYM(s0, s4, s2, s6, s1, s1h, s5, s5h, s3, s3h, s7, s7h); \ |
| 778 | sfh = OD_DCT_RSHIFT(sf, 1); \ |
| 779 | s0 += sfh; \ |
| 780 | sf = s0 - sf; \ |
| 781 | se = s1h - se; \ |
| 782 | s1 -= se; \ |
| 783 | s2 += OD_DCT_RSHIFT(sd, 1); \ |
| 784 | sd = s2 - sd; \ |
| 785 | sc = s3h - sc; \ |
| 786 | s3 -= sc; \ |
| 787 | s4 += OD_DCT_RSHIFT(sb, 1); \ |
| 788 | sb = s4 - sb; \ |
| 789 | sa = s5h - sa; \ |
| 790 | s5 -= sa; \ |
| 791 | s6 += OD_DCT_RSHIFT(s9, 1); \ |
| 792 | s9 = s6 - s9; \ |
| 793 | s8 = s7h - s8; \ |
| 794 | s7 -= s8; \ |
| 795 | } \ |
| 796 | while (0) |
| 797 | |
Monty Montgomery | 2cb52ba | 2017-07-17 18:27:27 -0400 | [diff] [blame] | 798 | #define OD_FDCT_16_ASYM(t0, t8, t8h, t4, tc, tch, t2, ta, tah, t6, te, teh, \ |
| 799 | t1, t9, t9h, t5, td, tdh, t3, tb, tbh, t7, tf, tfh) \ |
| 800 | /* Embedded 16-point asymmetric Type-II fDCT. */ \ |
| 801 | do { \ |
| 802 | t0 += tfh; \ |
| 803 | tf = t0 - tf; \ |
| 804 | t1 -= teh; \ |
| 805 | te += t1; \ |
| 806 | t2 += tdh; \ |
| 807 | td = t2 - td; \ |
| 808 | t3 -= tch; \ |
| 809 | tc += t3; \ |
| 810 | t4 += tbh; \ |
| 811 | tb = t4 - tb; \ |
| 812 | t5 -= tah; \ |
| 813 | ta += t5; \ |
| 814 | t6 += t9h; \ |
| 815 | t9 = t6 - t9; \ |
| 816 | t7 -= t8h; \ |
| 817 | t8 += t7; \ |
| 818 | OD_FDCT_8(t0, t8, t4, tc, t2, ta, t6, te); \ |
| 819 | OD_FDST_8(tf, t7, tb, t3, td, t5, t9, t1); \ |
| 820 | } \ |
| 821 | while (0) |
| 822 | |
| 823 | #define OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \ |
| 824 | t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh) \ |
| 825 | /* Embedded 16-point asymmetric Type-II iDCT. */ \ |
| 826 | do { \ |
| 827 | OD_IDST_8(tf, tb, td, t9, te, ta, tc, t8); \ |
| 828 | OD_IDCT_8(t0, t4, t2, t6, t1, t5, t3, t7); \ |
| 829 | t1 -= te; \ |
| 830 | t1h = OD_DCT_RSHIFT(t1, 1); \ |
| 831 | te += t1h; \ |
| 832 | t9 = t6 - t9; \ |
| 833 | t9h = OD_DCT_RSHIFT(t9, 1); \ |
| 834 | t6 -= t9h; \ |
| 835 | t5 -= ta; \ |
| 836 | t5h = OD_DCT_RSHIFT(t5, 1); \ |
| 837 | ta += t5h; \ |
| 838 | td = t2 - td; \ |
| 839 | tdh = OD_DCT_RSHIFT(td, 1); \ |
| 840 | t2 -= tdh; \ |
| 841 | t3 -= tc; \ |
| 842 | t3h = OD_DCT_RSHIFT(t3, 1); \ |
| 843 | tc += t3h; \ |
| 844 | tb = t4 - tb; \ |
| 845 | tbh = OD_DCT_RSHIFT(tb, 1); \ |
| 846 | t4 -= tbh; \ |
| 847 | t7 -= t8; \ |
| 848 | t7h = OD_DCT_RSHIFT(t7, 1); \ |
| 849 | t8 += t7h; \ |
| 850 | tf = t0 - tf; \ |
| 851 | tfh = OD_DCT_RSHIFT(tf, 1); \ |
| 852 | t0 -= tfh; \ |
| 853 | } \ |
| 854 | while (0) |
| 855 | |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 856 | #define OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, \ |
| 857 | s1, s9, s5, sd, s3, sb, s7, sf) \ |
| 858 | /* Embedded 16-point orthonormal Type-IV fDST. */ \ |
| 859 | do { \ |
| 860 | int s0h; \ |
| 861 | int s2h; \ |
| 862 | int sdh; \ |
| 863 | int sfh; \ |
| 864 | /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 865 | OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 220); \ |
| 866 | s1 += (se*13573 + 16384) >> 15; \ |
| 867 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ |
| 868 | OD_DCT_OVERFLOW_CHECK(s1, 11585, 8192, 221); \ |
| 869 | se -= (s1*11585 + 8192) >> 14; \ |
| 870 | /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 871 | OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 222); \ |
| 872 | s1 += (se*13573 + 16384) >> 15; \ |
| 873 | /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 874 | OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 223); \ |
| 875 | sd += (s2*21895 + 16384) >> 15; \ |
| 876 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 877 | OD_DCT_OVERFLOW_CHECK(sd, 15137, 16384, 224); \ |
| 878 | s2 -= (sd*15137 + 8192) >> 14; \ |
| 879 | /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 880 | OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 225); \ |
| 881 | sd += (s2*21895 + 16384) >> 15; \ |
| 882 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 883 | OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 226); \ |
| 884 | sc += (s3*3259 + 8192) >> 14; \ |
| 885 | /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ |
| 886 | OD_DCT_OVERFLOW_CHECK(sc, 3135, 4096, 227); \ |
| 887 | s3 -= (sc*3135 + 4096) >> 13; \ |
| 888 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 889 | OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 228); \ |
| 890 | sc += (s3*3259 + 8192) >> 14; \ |
| 891 | /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 892 | OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 229); \ |
| 893 | sa += (s5*13573 + 16384) >> 15; \ |
| 894 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ |
| 895 | OD_DCT_OVERFLOW_CHECK(sa, 11585, 8192, 230); \ |
| 896 | s5 -= (sa*11585 + 8192) >> 14; \ |
| 897 | /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ |
| 898 | OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 231); \ |
| 899 | sa += (s5*13573 + 16384) >> 15; \ |
| 900 | /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 901 | OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 232); \ |
| 902 | s6 += (s9*13573 + 16384) >> 15; \ |
| 903 | /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \ |
| 904 | OD_DCT_OVERFLOW_CHECK(s6, 11585, 8192, 233); \ |
| 905 | s9 -= (s6*11585 + 8192) >> 14; \ |
| 906 | /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 907 | OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 234); \ |
| 908 | s6 += (s9*13573 + 16384) >> 15; \ |
| 909 | sf += se; \ |
| 910 | sfh = OD_DCT_RSHIFT(sf, 1); \ |
| 911 | se = sfh - se; \ |
| 912 | s0 += s1; \ |
| 913 | s0h = OD_DCT_RSHIFT(s0, 1); \ |
| 914 | s1 = s0h - s1; \ |
| 915 | s2 = s3 - s2; \ |
| 916 | s2h = OD_DCT_RSHIFT(s2, 1); \ |
| 917 | s3 -= s2h; \ |
| 918 | sd -= sc; \ |
| 919 | sdh = OD_DCT_RSHIFT(sd, 1); \ |
| 920 | sc += sdh; \ |
| 921 | sa = s4 - sa; \ |
| 922 | s4 -= OD_DCT_RSHIFT(sa, 1); \ |
| 923 | s5 += sb; \ |
| 924 | sb = OD_DCT_RSHIFT(s5, 1) - sb; \ |
| 925 | s8 += s6; \ |
| 926 | s6 -= OD_DCT_RSHIFT(s8, 1); \ |
| 927 | s7 = s9 - s7; \ |
| 928 | s9 -= OD_DCT_RSHIFT(s7, 1); \ |
| 929 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 930 | OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 235); \ |
| 931 | s4 += (sb*6723 + 4096) >> 13; \ |
| 932 | /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ |
| 933 | OD_DCT_OVERFLOW_CHECK(s4, 16069, 8192, 236); \ |
| 934 | sb -= (s4*16069 + 8192) >> 14; \ |
| 935 | /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \ |
| 936 | OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 237); \ |
| 937 | s4 += (sb*6723 + 4096) >> 13; \ |
| 938 | /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \ |
| 939 | OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 238); \ |
| 940 | sa += (s5*8757 + 8192) >> 14; \ |
| 941 | /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ |
| 942 | OD_DCT_OVERFLOW_CHECK(sa, 6811, 4096, 239); \ |
| 943 | s5 -= (sa*6811 + 4096) >> 13; \ |
| 944 | /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ |
| 945 | OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 240); \ |
| 946 | sa += (s5*8757 + 8192) >> 14; \ |
| 947 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 948 | OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 241); \ |
| 949 | s6 += (s9*2485 + 4096) >> 13; \ |
| 950 | /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ |
| 951 | OD_DCT_OVERFLOW_CHECK(s6, 4551, 4096, 242); \ |
| 952 | s9 -= (s6*4551 + 4096) >> 13; \ |
| 953 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 954 | OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 243); \ |
| 955 | s6 += (s9*2485 + 4096) >> 13; \ |
| 956 | /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ |
| 957 | OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 244); \ |
| 958 | s7 += (s8*3227 + 16384) >> 15; \ |
| 959 | /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \ |
| 960 | OD_DCT_OVERFLOW_CHECK(s7, 6393, 16384, 245); \ |
| 961 | s8 -= (s7*6393 + 16384) >> 15; \ |
| 962 | /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ |
| 963 | OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 246); \ |
| 964 | s7 += (s8*3227 + 16384) >> 15; \ |
| 965 | s1 -= s2h; \ |
| 966 | s2 += s1; \ |
| 967 | se += sdh; \ |
| 968 | sd = se - sd; \ |
| 969 | s3 += sfh; \ |
| 970 | sf -= s3; \ |
| 971 | sc = s0h - sc; \ |
| 972 | s0 -= sc; \ |
| 973 | sb += OD_DCT_RSHIFT(s8, 1); \ |
| 974 | s8 = sb - s8; \ |
| 975 | s4 += OD_DCT_RSHIFT(s7, 1); \ |
| 976 | s7 -= s4; \ |
| 977 | s6 += OD_DCT_RSHIFT(s5, 1); \ |
| 978 | s5 = s6 - s5; \ |
| 979 | s9 -= OD_DCT_RSHIFT(sa, 1); \ |
| 980 | sa += s9; \ |
| 981 | s8 += s0; \ |
| 982 | s0 -= OD_DCT_RSHIFT(s8, 1); \ |
| 983 | sf += s7; \ |
| 984 | s7 = OD_DCT_RSHIFT(sf, 1) - s7; \ |
| 985 | s1 -= s6; \ |
| 986 | s6 += OD_DCT_RSHIFT(s1, 1); \ |
| 987 | s9 += se; \ |
| 988 | se = OD_DCT_RSHIFT(s9, 1) - se; \ |
| 989 | s2 += sa; \ |
| 990 | sa = OD_DCT_RSHIFT(s2, 1) - sa; \ |
| 991 | s5 += sd; \ |
| 992 | sd -= OD_DCT_RSHIFT(s5, 1); \ |
| 993 | s4 = sc - s4; \ |
| 994 | sc -= OD_DCT_RSHIFT(s4, 1); \ |
| 995 | s3 -= sb; \ |
| 996 | sb += OD_DCT_RSHIFT(s3, 1); \ |
| 997 | /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \ |
| 998 | OD_DCT_OVERFLOW_CHECK(sf, 2799, 2048, 247); \ |
| 999 | s0 -= (sf*2799 + 2048) >> 12; \ |
| 1000 | /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \ |
| 1001 | OD_DCT_OVERFLOW_CHECK(s0, 2893, 1024, 248); \ |
| 1002 | sf += (s0*2893 + 1024) >> 11; \ |
| 1003 | /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \ |
| 1004 | OD_DCT_OVERFLOW_CHECK(sf, 5397, 4096, 249); \ |
| 1005 | s0 -= (sf*5397 + 4096) >> 13; \ |
| 1006 | /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \ |
| 1007 | OD_DCT_OVERFLOW_CHECK(s1, 41, 32, 250); \ |
| 1008 | se += (s1*41 + 32) >> 6; \ |
| 1009 | /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \ |
| 1010 | OD_DCT_OVERFLOW_CHECK(se, 2865, 1024, 251); \ |
| 1011 | s1 -= (se*2865 + 1024) >> 11; \ |
| 1012 | /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \ |
| 1013 | OD_DCT_OVERFLOW_CHECK(s1, 4641, 4096, 252); \ |
| 1014 | se += (s1*4641 + 4096) >> 13; \ |
| 1015 | /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \ |
| 1016 | OD_DCT_OVERFLOW_CHECK(s2, 2473, 2048, 253); \ |
| 1017 | sd += (s2*2473 + 2048) >> 12; \ |
| 1018 | /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \ |
| 1019 | OD_DCT_OVERFLOW_CHECK(sd, 5619, 2048, 254); \ |
| 1020 | s2 -= (sd*5619 + 2048) >> 12; \ |
| 1021 | /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \ |
| 1022 | OD_DCT_OVERFLOW_CHECK(s2, 7839, 8192, 255); \ |
| 1023 | sd += (s2*7839 + 8192) >> 14; \ |
| 1024 | /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \ |
| 1025 | OD_DCT_OVERFLOW_CHECK(s3, 5747, 4096, 256); \ |
| 1026 | sc -= (s3*5747 + 4096) >> 13; \ |
| 1027 | /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] ~= */ \ |
| 1028 | OD_DCT_OVERFLOW_CHECK(sc, 3903, 4096, 257); \ |
| 1029 | s3 += (sc*3903 + 4096) >> 13; \ |
| 1030 | /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \ |
| 1031 | OD_DCT_OVERFLOW_CHECK(s3, 5701, 4096, 258); \ |
| 1032 | sc += (s3*5701 + 4096) >> 13; \ |
| 1033 | /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \ |
| 1034 | OD_DCT_OVERFLOW_CHECK(s4, 4471, 4096, 259); \ |
| 1035 | sb += (s4*4471 + 4096) >> 13; \ |
| 1036 | /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \ |
| 1037 | OD_DCT_OVERFLOW_CHECK(sb, 1309, 512, 260); \ |
| 1038 | s4 -= (sb*1309 + 512) >> 10; \ |
| 1039 | /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \ |
| 1040 | OD_DCT_OVERFLOW_CHECK(s4, 5067, 8192, 261); \ |
| 1041 | sb += (s4*5067 + 8192) >> 14; \ |
| 1042 | /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \ |
| 1043 | OD_DCT_OVERFLOW_CHECK(s5, 2217, 2048, 262); \ |
| 1044 | sa -= (s5*2217 + 2048) >> 12; \ |
| 1045 | /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] ~= 0.72705107329128 */ \ |
| 1046 | OD_DCT_OVERFLOW_CHECK(sa, 1489, 1024, 263); \ |
| 1047 | s5 += (sa*1489 + 1024) >> 11; \ |
| 1048 | /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \ |
| 1049 | OD_DCT_OVERFLOW_CHECK(s5, 75, 128, 264); \ |
| 1050 | sa += (s5*75 + 128) >> 8; \ |
| 1051 | /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \ |
| 1052 | OD_DCT_OVERFLOW_CHECK(s9, 2087, 2048, 265); \ |
| 1053 | s6 -= (s9*2087 + 2048) >> 12; \ |
| 1054 | /* 4653/4096 ~= Sqrt[2]*Sin[19*Pi/64] */ \ |
| 1055 | OD_DCT_OVERFLOW_CHECK(s6, 4653, 2048, 266); \ |
| 1056 | s9 += (s6*4653 + 2048) >> 12; \ |
| 1057 | /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \ |
| 1058 | OD_DCT_OVERFLOW_CHECK(s9, 4545, 16384, 267); \ |
| 1059 | s6 -= (s9*4545 + 16384) >> 15; \ |
| 1060 | /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \ |
| 1061 | OD_DCT_OVERFLOW_CHECK(s8, 2053, 2048, 268); \ |
| 1062 | s7 += (s8*2053 + 2048) >> 12; \ |
| 1063 | /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \ |
| 1064 | OD_DCT_OVERFLOW_CHECK(s7, 1945, 1024, 269); \ |
| 1065 | s8 -= (s7*1945 + 1024) >> 11; \ |
| 1066 | /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \ |
| 1067 | OD_DCT_OVERFLOW_CHECK(s8, 1651, 16384, 270); \ |
| 1068 | s7 -= (s8*1651 + 16384) >> 15; \ |
| 1069 | } \ |
| 1070 | while (0) |
| 1071 | |
| 1072 | #define OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, \ |
| 1073 | s1, s9, s5, sd, s3, sb, s7, sf) \ |
| 1074 | /* Embedded 16-point orthonormal Type-IV iDST. */ \ |
| 1075 | do { \ |
| 1076 | int s0h; \ |
| 1077 | int s4h; \ |
| 1078 | int sbh; \ |
| 1079 | int sfh; \ |
| 1080 | /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \ |
| 1081 | se += (s1*1651 + 16384) >> 15; \ |
| 1082 | /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \ |
| 1083 | s1 += (se*1945 + 1024) >> 11; \ |
| 1084 | /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \ |
| 1085 | se -= (s1*2053 + 2048) >> 12; \ |
| 1086 | /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \ |
| 1087 | s6 += (s9*4545 + 16384) >> 15; \ |
| 1088 | /* 4653/32768 ~= Sqrt[2]*Sin[19*Pi/64] */ \ |
| 1089 | s9 -= (s6*4653 + 2048) >> 12; \ |
| 1090 | /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \ |
| 1091 | s6 += (s9*2087 + 2048) >> 12; \ |
| 1092 | /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \ |
| 1093 | s5 -= (sa*75 + 128) >> 8; \ |
| 1094 | /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] */ \ |
| 1095 | sa -= (s5*1489 + 1024) >> 11; \ |
| 1096 | /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \ |
| 1097 | s5 += (sa*2217 + 2048) >> 12; \ |
| 1098 | /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \ |
| 1099 | sd -= (s2*5067 + 8192) >> 14; \ |
| 1100 | /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \ |
| 1101 | s2 += (sd*1309 + 512) >> 10; \ |
| 1102 | /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \ |
| 1103 | sd -= (s2*4471 + 4096) >> 13; \ |
| 1104 | /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \ |
| 1105 | s3 -= (sc*5701 + 4096) >> 13; \ |
| 1106 | /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] */ \ |
| 1107 | sc -= (s3*3903 + 4096) >> 13; \ |
| 1108 | /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \ |
| 1109 | s3 += (sc*5747 + 4096) >> 13; \ |
| 1110 | /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \ |
| 1111 | sb -= (s4*7839 + 8192) >> 14; \ |
| 1112 | /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \ |
| 1113 | s4 += (sb*5619 + 2048) >> 12; \ |
| 1114 | /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \ |
| 1115 | sb -= (s4*2473 + 2048) >> 12; \ |
| 1116 | /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \ |
| 1117 | s7 -= (s8*4641 + 4096) >> 13; \ |
| 1118 | /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \ |
| 1119 | s8 += (s7*2865 + 1024) >> 11; \ |
| 1120 | /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \ |
| 1121 | s7 -= (s8*41 + 32) >> 6; \ |
| 1122 | /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \ |
| 1123 | s0 += (sf*5397 + 4096) >> 13; \ |
| 1124 | /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \ |
| 1125 | sf -= (s0*2893 + 1024) >> 11; \ |
| 1126 | /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \ |
| 1127 | s0 += (sf*2799 + 2048) >> 12; \ |
| 1128 | sd -= OD_DCT_RSHIFT(sc, 1); \ |
| 1129 | sc += sd; \ |
| 1130 | s3 += OD_DCT_RSHIFT(s2, 1); \ |
| 1131 | s2 = s3 - s2; \ |
| 1132 | sb += OD_DCT_RSHIFT(sa, 1); \ |
| 1133 | sa -= sb; \ |
| 1134 | s5 = OD_DCT_RSHIFT(s4, 1) - s5; \ |
| 1135 | s4 -= s5; \ |
| 1136 | s7 = OD_DCT_RSHIFT(s9, 1) - s7; \ |
| 1137 | s9 -= s7; \ |
| 1138 | s6 -= OD_DCT_RSHIFT(s8, 1); \ |
| 1139 | s8 += s6; \ |
| 1140 | se = OD_DCT_RSHIFT(sf, 1) - se; \ |
| 1141 | sf -= se; \ |
| 1142 | s0 += OD_DCT_RSHIFT(s1, 1); \ |
| 1143 | s1 -= s0; \ |
| 1144 | s5 -= s9; \ |
| 1145 | s9 += OD_DCT_RSHIFT(s5, 1); \ |
| 1146 | sa = s6 - sa; \ |
| 1147 | s6 -= OD_DCT_RSHIFT(sa, 1); \ |
| 1148 | se += s2; \ |
| 1149 | s2 -= OD_DCT_RSHIFT(se, 1); \ |
| 1150 | s1 = sd - s1; \ |
| 1151 | sd -= OD_DCT_RSHIFT(s1, 1); \ |
| 1152 | s0 += s3; \ |
| 1153 | s0h = OD_DCT_RSHIFT(s0, 1); \ |
| 1154 | s3 = s0h - s3; \ |
| 1155 | sf += sc; \ |
| 1156 | sfh = OD_DCT_RSHIFT(sf, 1); \ |
| 1157 | sc -= sfh; \ |
| 1158 | sb = s7 - sb; \ |
| 1159 | sbh = OD_DCT_RSHIFT(sb, 1); \ |
| 1160 | s7 -= sbh; \ |
| 1161 | s4 -= s8; \ |
| 1162 | s4h = OD_DCT_RSHIFT(s4, 1); \ |
| 1163 | s8 += s4h; \ |
| 1164 | /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ |
| 1165 | se -= (s1*3227 + 16384) >> 15; \ |
| 1166 | /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \ |
| 1167 | s1 += (se*6393 + 16384) >> 15; \ |
| 1168 | /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ |
| 1169 | se -= (s1*3227 + 16384) >> 15; \ |
| 1170 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 1171 | s6 -= (s9*2485 + 4096) >> 13; \ |
| 1172 | /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ |
| 1173 | s9 += (s6*4551 + 4096) >> 13; \ |
| 1174 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 1175 | s6 -= (s9*2485 + 4096) >> 13; \ |
| 1176 | /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ |
| 1177 | s5 -= (sa*8757 + 8192) >> 14; \ |
| 1178 | /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ |
| 1179 | sa += (s5*6811 + 4096) >> 13; \ |
| 1180 | /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \ |
| 1181 | s5 -= (sa*8757 + 8192) >> 14; \ |
| 1182 | /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \ |
| 1183 | s2 -= (sd*6723 + 4096) >> 13; \ |
| 1184 | /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ |
| 1185 | sd += (s2*16069 + 8192) >> 14; \ |
| 1186 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 1187 | s2 -= (sd*6723 + 4096) >> 13; \ |
| 1188 | s9 += OD_DCT_RSHIFT(se, 1); \ |
| 1189 | se = s9 - se; \ |
| 1190 | s6 += OD_DCT_RSHIFT(s1, 1); \ |
| 1191 | s1 -= s6; \ |
| 1192 | sd = OD_DCT_RSHIFT(sa, 1) - sd; \ |
| 1193 | sa -= sd; \ |
| 1194 | s2 += OD_DCT_RSHIFT(s5, 1); \ |
| 1195 | s5 = s2 - s5; \ |
| 1196 | s3 -= sbh; \ |
| 1197 | sb += s3; \ |
| 1198 | sc += s4h; \ |
| 1199 | s4 = sc - s4; \ |
| 1200 | s8 = s0h - s8; \ |
| 1201 | s0 -= s8; \ |
| 1202 | s7 = sfh - s7; \ |
| 1203 | sf -= s7; \ |
| 1204 | /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 1205 | s6 -= (s9*13573 + 16384) >> 15; \ |
| 1206 | /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \ |
| 1207 | s9 += (s6*11585 + 8192) >> 14; \ |
| 1208 | /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 1209 | s6 -= (s9*13573 + 16384) >> 15; \ |
| 1210 | /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 1211 | s5 -= (sa*13573 + 16384) >> 15; \ |
| 1212 | /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \ |
| 1213 | sa += (s5*11585 + 8192) >> 14; \ |
| 1214 | /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 1215 | s5 -= (sa*13573 + 16384) >> 15; \ |
| 1216 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 1217 | s3 -= (sc*3259 + 8192) >> 14; \ |
| 1218 | /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ |
| 1219 | sc += (s3*3135 + 4096) >> 13; \ |
| 1220 | /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ |
| 1221 | s3 -= (sc*3259 + 8192) >> 14; \ |
| 1222 | /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 1223 | sb -= (s4*21895 + 16384) >> 15; \ |
| 1224 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 1225 | s4 += (sb*15137 + 8192) >> 14; \ |
| 1226 | /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ |
| 1227 | sb -= (s4*21895 + 16384) >> 15; \ |
| 1228 | /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 1229 | s8 -= (s7*13573 + 16384) >> 15; \ |
| 1230 | /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \ |
| 1231 | s7 += (s8*11585 + 8192) >> 14; \ |
| 1232 | /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ |
| 1233 | s8 -= (s7*13573 + 16384) >> 15; \ |
| 1234 | } \ |
| 1235 | while (0) |
| 1236 | |
Monty Montgomery | 2cb52ba | 2017-07-17 18:27:27 -0400 | [diff] [blame] | 1237 | /* TODO: rewrite this to match OD_FDST_16. */ |
| 1238 | #define OD_FDST_16_ASYM(t0, t0h, t8, t4, t4h, tc, t2, ta, t6, te, \ |
| 1239 | t1, t9, t5, td, t3, tb, t7, t7h, tf) \ |
| 1240 | /* Embedded 16-point asymmetric Type-IV fDST. */ \ |
| 1241 | do { \ |
| 1242 | int t2h; \ |
| 1243 | int t3h; \ |
| 1244 | int t6h; \ |
| 1245 | int t8h; \ |
| 1246 | int t9h; \ |
| 1247 | int tch; \ |
| 1248 | int tdh; \ |
| 1249 | /* TODO: Can we move these into another operation */ \ |
| 1250 | t8 = -t8; \ |
| 1251 | t9 = -t9; \ |
| 1252 | ta = -ta; \ |
| 1253 | tb = -tb; \ |
| 1254 | td = -td; \ |
| 1255 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 1256 | OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 136); \ |
| 1257 | t1 -= (te*13573 + 8192) >> 14; \ |
| 1258 | /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ |
| 1259 | OD_DCT_OVERFLOW_CHECK(t1, 11585, 16384, 137); \ |
| 1260 | te += (t1*11585 + 16384) >> 15; \ |
| 1261 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 1262 | OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 138); \ |
| 1263 | t1 -= (te*13573 + 8192) >> 14; \ |
| 1264 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 1265 | OD_DCT_OVERFLOW_CHECK(td, 4161, 8192, 139); \ |
| 1266 | t2 += (td*4161 + 8192) >> 14; \ |
| 1267 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 1268 | OD_DCT_OVERFLOW_CHECK(t2, 15137, 8192, 140); \ |
| 1269 | td -= (t2*15137 + 8192) >> 14; \ |
| 1270 | /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 1271 | OD_DCT_OVERFLOW_CHECK(td, 14341, 8192, 141); \ |
| 1272 | t2 += (td*14341 + 8192) >> 14; \ |
| 1273 | /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 1274 | OD_DCT_OVERFLOW_CHECK(t3, 14341, 8192, 142); \ |
| 1275 | tc -= (t3*14341 + 8192) >> 14; \ |
| 1276 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 1277 | OD_DCT_OVERFLOW_CHECK(tc, 15137, 8192, 143); \ |
| 1278 | t3 += (tc*15137 + 8192) >> 14; \ |
| 1279 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 1280 | OD_DCT_OVERFLOW_CHECK(t3, 4161, 8192, 144); \ |
| 1281 | tc -= (t3*4161 + 8192) >> 14; \ |
| 1282 | te = t0h - te; \ |
| 1283 | t0 -= te; \ |
| 1284 | tf = OD_DCT_RSHIFT(t1, 1) - tf; \ |
| 1285 | t1 -= tf; \ |
| 1286 | /* TODO: Can we move this into another operation */ \ |
| 1287 | tc = -tc; \ |
| 1288 | t2 = OD_DCT_RSHIFT(tc, 1) - t2; \ |
| 1289 | tc -= t2; \ |
| 1290 | t3 = OD_DCT_RSHIFT(td, 1) - t3; \ |
| 1291 | td = t3 - td; \ |
| 1292 | /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 1293 | OD_DCT_OVERFLOW_CHECK(t6, 7489, 4096, 145); \ |
| 1294 | t9 -= (t6*7489 + 4096) >> 13; \ |
| 1295 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 1296 | OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 146); \ |
| 1297 | t6 += (t9*11585 + 8192) >> 14; \ |
| 1298 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 1299 | OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 147); \ |
| 1300 | t9 += (t6*19195 + 16384) >> 15; \ |
| 1301 | t8 += OD_DCT_RSHIFT(t9, 1); \ |
| 1302 | t9 -= t8; \ |
| 1303 | t6 = t7h - t6; \ |
| 1304 | t7 -= t6; \ |
| 1305 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 1306 | OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 148); \ |
| 1307 | t8 += (t7*6723 + 4096) >> 13; \ |
| 1308 | /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ |
| 1309 | OD_DCT_OVERFLOW_CHECK(t8, 16069, 8192, 149); \ |
| 1310 | t7 -= (t8*16069 + 8192) >> 14; \ |
| 1311 | /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \ |
| 1312 | OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 150); \ |
| 1313 | t8 += (t7*6723 + 4096) >> 13; \ |
| 1314 | /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \ |
| 1315 | OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 151); \ |
| 1316 | t9 += (t6*17515 + 16384) >> 15; \ |
| 1317 | /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ |
| 1318 | OD_DCT_OVERFLOW_CHECK(t9, 13623, 8192, 152); \ |
| 1319 | t6 -= (t9*13623 + 8192) >> 14; \ |
| 1320 | /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ |
| 1321 | OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 153); \ |
| 1322 | t9 += (t6*17515 + 16384) >> 15; \ |
| 1323 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 1324 | OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 154); \ |
| 1325 | t5 += (ta*13573 + 8192) >> 14; \ |
| 1326 | /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ |
| 1327 | OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 155); \ |
| 1328 | ta -= (t5*11585 + 16384) >> 15; \ |
| 1329 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 1330 | OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 156); \ |
| 1331 | t5 += (ta*13573 + 8192) >> 14; \ |
| 1332 | tb += OD_DCT_RSHIFT(t5, 1); \ |
| 1333 | t5 = tb - t5; \ |
| 1334 | ta += t4h; \ |
| 1335 | t4 -= ta; \ |
| 1336 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 1337 | OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 157); \ |
| 1338 | ta += (t5*2485 + 4096) >> 13; \ |
| 1339 | /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ |
| 1340 | OD_DCT_OVERFLOW_CHECK(ta, 18205, 16384, 158); \ |
| 1341 | t5 -= (ta*18205 + 16384) >> 15; \ |
| 1342 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 1343 | OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 159); \ |
| 1344 | ta += (t5*2485 + 4096) >> 13; \ |
| 1345 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 1346 | OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 160); \ |
| 1347 | tb -= (t4*6723 + 4096) >> 13; \ |
| 1348 | /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ |
| 1349 | OD_DCT_OVERFLOW_CHECK(tb, 16069, 8192, 161); \ |
| 1350 | t4 += (tb*16069 + 8192) >> 14; \ |
| 1351 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 1352 | OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 162); \ |
| 1353 | tb -= (t4*6723 + 4096) >> 13; \ |
| 1354 | /* TODO: Can we move this into another operation */ \ |
| 1355 | t5 = -t5; \ |
| 1356 | tc -= tf; \ |
| 1357 | tch = OD_DCT_RSHIFT(tc, 1); \ |
| 1358 | tf += tch; \ |
| 1359 | t3 += t0; \ |
| 1360 | t3h = OD_DCT_RSHIFT(t3, 1); \ |
| 1361 | t0 -= t3h; \ |
| 1362 | td -= t1; \ |
| 1363 | tdh = OD_DCT_RSHIFT(td, 1); \ |
| 1364 | t1 += tdh; \ |
| 1365 | t2 += te; \ |
| 1366 | t2h = OD_DCT_RSHIFT(t2, 1); \ |
| 1367 | te -= t2h; \ |
| 1368 | t8 += t4; \ |
| 1369 | t8h = OD_DCT_RSHIFT(t8, 1); \ |
| 1370 | t4 = t8h - t4; \ |
| 1371 | t7 = tb - t7; \ |
| 1372 | t7h = OD_DCT_RSHIFT(t7, 1); \ |
| 1373 | tb = t7h - tb; \ |
| 1374 | t6 -= ta; \ |
| 1375 | t6h = OD_DCT_RSHIFT(t6, 1); \ |
| 1376 | ta += t6h; \ |
| 1377 | t9 = t5 - t9; \ |
| 1378 | t9h = OD_DCT_RSHIFT(t9, 1); \ |
| 1379 | t5 -= t9h; \ |
| 1380 | t0 -= t7h; \ |
| 1381 | t7 += t0; \ |
| 1382 | tf += t8h; \ |
| 1383 | t8 -= tf; \ |
| 1384 | te -= t6h; \ |
| 1385 | t6 += te; \ |
| 1386 | t1 += t9h; \ |
| 1387 | t9 -= t1; \ |
| 1388 | tb -= tch; \ |
| 1389 | tc += tb; \ |
| 1390 | t4 += t3h; \ |
| 1391 | t3 -= t4; \ |
| 1392 | ta -= tdh; \ |
| 1393 | td += ta; \ |
| 1394 | t5 = t2h - t5; \ |
| 1395 | t2 -= t5; \ |
| 1396 | /* TODO: Can we move these into another operation */ \ |
| 1397 | t8 = -t8; \ |
| 1398 | t9 = -t9; \ |
| 1399 | ta = -ta; \ |
| 1400 | tb = -tb; \ |
| 1401 | tc = -tc; \ |
| 1402 | td = -td; \ |
| 1403 | tf = -tf; \ |
| 1404 | /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \ |
| 1405 | OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 163); \ |
| 1406 | t0 -= (tf*7799 + 4096) >> 13; \ |
| 1407 | /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \ |
| 1408 | OD_DCT_OVERFLOW_CHECK(t0, 4091, 2048, 164); \ |
| 1409 | tf += (t0*4091 + 2048) >> 12; \ |
| 1410 | /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \ |
| 1411 | OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 165); \ |
| 1412 | t0 -= (tf*7799 + 4096) >> 13; \ |
| 1413 | /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \ |
| 1414 | OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 166); \ |
| 1415 | t1 += (te*2417 + 16384) >> 15; \ |
| 1416 | /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \ |
| 1417 | OD_DCT_OVERFLOW_CHECK(t1, 601, 2048, 167); \ |
| 1418 | te -= (t1*601 + 2048) >> 12; \ |
| 1419 | /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \ |
| 1420 | OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 168); \ |
| 1421 | t1 += (te*2417 + 16384) >> 15; \ |
| 1422 | /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \ |
| 1423 | OD_DCT_OVERFLOW_CHECK(t8, 14525, 16384, 169); \ |
| 1424 | t7 -= (t8*14525 + 16384) >> 15; \ |
| 1425 | /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \ |
| 1426 | OD_DCT_OVERFLOW_CHECK(t7, 3035, 2048, 170); \ |
| 1427 | t8 += (t7*3035 + 2048) >> 12; \ |
| 1428 | /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \ |
| 1429 | OD_DCT_OVERFLOW_CHECK(t8, 7263, 8192, 171); \ |
| 1430 | t7 -= (t8*7263 + 8192) >> 14; \ |
| 1431 | /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \ |
| 1432 | OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 172); \ |
| 1433 | t2 -= (td*6393 + 4096) >> 13; \ |
| 1434 | /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \ |
| 1435 | OD_DCT_OVERFLOW_CHECK(t2, 3973, 2048, 173); \ |
| 1436 | td += (t2*3973 + 2048) >> 12; \ |
| 1437 | /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \ |
| 1438 | OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 174); \ |
| 1439 | t2 -= (td*6393 + 4096) >> 13; \ |
| 1440 | /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \ |
| 1441 | OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 175); \ |
| 1442 | t5 -= (ta*9281 + 8192) >> 14; \ |
| 1443 | /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \ |
| 1444 | OD_DCT_OVERFLOW_CHECK(t5, 7027, 4096, 176); \ |
| 1445 | ta += (t5*7027 + 4096) >> 13; \ |
| 1446 | /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \ |
| 1447 | OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 177); \ |
| 1448 | t5 -= (ta*9281 + 8192) >> 14; \ |
| 1449 | /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \ |
| 1450 | OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 178); \ |
| 1451 | t3 -= (tc*11539 + 8192) >> 14; \ |
| 1452 | /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \ |
| 1453 | OD_DCT_OVERFLOW_CHECK(t3, 7713, 4096, 179); \ |
| 1454 | tc += (t3*7713 + 4096) >> 13; \ |
| 1455 | /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \ |
| 1456 | OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 180); \ |
| 1457 | t3 -= (tc*11539 + 8192) >> 14; \ |
| 1458 | /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \ |
| 1459 | OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 181); \ |
| 1460 | t4 -= (tb*10375 + 8192) >> 14; \ |
| 1461 | /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \ |
| 1462 | OD_DCT_OVERFLOW_CHECK(t4, 7405, 4096, 182); \ |
| 1463 | tb += (t4*7405 + 4096) >> 13; \ |
| 1464 | /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \ |
| 1465 | OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 183); \ |
| 1466 | t4 -= (tb*10375 + 8192) >> 14; \ |
| 1467 | /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \ |
| 1468 | OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 184); \ |
| 1469 | t6 -= (t9*8247 + 8192) >> 14; \ |
| 1470 | /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \ |
| 1471 | OD_DCT_OVERFLOW_CHECK(t6, 1645, 1024, 185); \ |
| 1472 | t9 += (t6*1645 + 1024) >> 11; \ |
| 1473 | /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \ |
| 1474 | OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 186); \ |
| 1475 | t6 -= (t9*8247 + 8192) >> 14; \ |
| 1476 | } \ |
| 1477 | while (0) |
| 1478 | |
| 1479 | #define OD_IDST_16_ASYM(t0, t0h, t8, t4, tc, t2, t2h, ta, t6, te, teh, \ |
| 1480 | t1, t9, t5, td, t3, tb, t7, tf) \ |
| 1481 | /* Embedded 16-point asymmetric Type-IV iDST. */ \ |
| 1482 | do { \ |
| 1483 | int t1h_; \ |
| 1484 | int t3h_; \ |
| 1485 | int t4h; \ |
| 1486 | int t6h; \ |
| 1487 | int t9h_; \ |
| 1488 | int tbh_; \ |
| 1489 | int tch; \ |
| 1490 | /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \ |
| 1491 | t6 += (t9*8247 + 8192) >> 14; \ |
| 1492 | /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \ |
| 1493 | t9 -= (t6*1645 + 1024) >> 11; \ |
| 1494 | /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \ |
| 1495 | t6 += (t9*8247 + 8192) >> 14; \ |
| 1496 | /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \ |
| 1497 | t2 += (td*10375 + 8192) >> 14; \ |
| 1498 | /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \ |
| 1499 | td -= (t2*7405 + 4096) >> 13; \ |
| 1500 | /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \ |
| 1501 | t2 += (td*10375 + 8192) >> 14; \ |
| 1502 | /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \ |
| 1503 | tc += (t3*11539 + 8192) >> 14; \ |
| 1504 | /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \ |
| 1505 | t3 -= (tc*7713 + 4096) >> 13; \ |
| 1506 | /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \ |
| 1507 | tc += (t3*11539 + 8192) >> 14; \ |
| 1508 | /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \ |
| 1509 | ta += (t5*9281 + 8192) >> 14; \ |
| 1510 | /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \ |
| 1511 | t5 -= (ta*7027 + 4096) >> 13; \ |
| 1512 | /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \ |
| 1513 | ta += (t5*9281 + 8192) >> 14; \ |
| 1514 | /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \ |
| 1515 | t4 += (tb*6393 + 4096) >> 13; \ |
| 1516 | /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \ |
| 1517 | tb -= (t4*3973 + 2048) >> 12; \ |
| 1518 | /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \ |
| 1519 | t4 += (tb*6393 + 4096) >> 13; \ |
| 1520 | /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \ |
| 1521 | te += (t1*7263 + 8192) >> 14; \ |
| 1522 | /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \ |
| 1523 | t1 -= (te*3035 + 2048) >> 12; \ |
| 1524 | /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \ |
| 1525 | te += (t1*14525 + 16384) >> 15; \ |
| 1526 | /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \ |
| 1527 | t8 -= (t7*2417 + 16384) >> 15; \ |
| 1528 | /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \ |
| 1529 | t7 += (t8*601 + 2048) >> 12; \ |
| 1530 | /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \ |
| 1531 | t8 -= (t7*2417 + 16384) >> 15; \ |
| 1532 | /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \ |
| 1533 | t0 += (tf*7799 + 4096) >> 13; \ |
| 1534 | /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \ |
| 1535 | tf -= (t0*4091 + 2048) >> 12; \ |
| 1536 | /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \ |
| 1537 | t0 += (tf*7799 + 4096) >> 13; \ |
| 1538 | /* TODO: Can we move these into another operation */ \ |
| 1539 | t1 = -t1; \ |
| 1540 | t3 = -t3; \ |
| 1541 | t5 = -t5; \ |
| 1542 | t9 = -t9; \ |
| 1543 | tb = -tb; \ |
| 1544 | td = -td; \ |
| 1545 | tf = -tf; \ |
| 1546 | t4 += ta; \ |
| 1547 | t4h = OD_DCT_RSHIFT(t4, 1); \ |
| 1548 | ta = t4h - ta; \ |
| 1549 | tb -= t5; \ |
| 1550 | tbh_ = OD_DCT_RSHIFT(tb, 1); \ |
| 1551 | t5 += tbh_; \ |
| 1552 | tc += t2; \ |
| 1553 | tch = OD_DCT_RSHIFT(tc, 1); \ |
| 1554 | t2 -= tch; \ |
| 1555 | t3 -= td; \ |
| 1556 | t3h_ = OD_DCT_RSHIFT(t3, 1); \ |
| 1557 | td += t3h_; \ |
| 1558 | t9 += t8; \ |
| 1559 | t9h_ = OD_DCT_RSHIFT(t9, 1); \ |
| 1560 | t8 -= t9h_; \ |
| 1561 | t6 -= t7; \ |
| 1562 | t6h = OD_DCT_RSHIFT(t6, 1); \ |
| 1563 | t7 += t6h; \ |
| 1564 | t1 += tf; \ |
| 1565 | t1h_ = OD_DCT_RSHIFT(t1, 1); \ |
| 1566 | tf -= t1h_; \ |
| 1567 | te -= t0; \ |
| 1568 | teh = OD_DCT_RSHIFT(te, 1); \ |
| 1569 | t0 += teh; \ |
| 1570 | ta += t9h_; \ |
| 1571 | t9 = ta - t9; \ |
| 1572 | t5 -= t6h; \ |
| 1573 | t6 += t5; \ |
| 1574 | td = teh - td; \ |
| 1575 | te = td - te; \ |
| 1576 | t2 = t1h_ - t2; \ |
| 1577 | t1 -= t2; \ |
| 1578 | t7 += t4h; \ |
| 1579 | t4 -= t7; \ |
| 1580 | t8 -= tbh_; \ |
| 1581 | tb += t8; \ |
| 1582 | t0 += tch; \ |
| 1583 | tc -= t0; \ |
| 1584 | tf -= t3h_; \ |
| 1585 | t3 += tf; \ |
| 1586 | /* TODO: Can we move this into another operation */ \ |
| 1587 | ta = -ta; \ |
| 1588 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 1589 | td += (t2*6723 + 4096) >> 13; \ |
| 1590 | /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ |
| 1591 | t2 -= (td*16069 + 8192) >> 14; \ |
| 1592 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ |
| 1593 | td += (t2*6723 + 4096) >> 13; \ |
| 1594 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 1595 | t5 -= (ta*2485 + 4096) >> 13; \ |
| 1596 | /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ |
| 1597 | ta += (t5*18205 + 16384) >> 15; \ |
| 1598 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 1599 | t5 -= (ta*2485 + 4096) >> 13; \ |
| 1600 | t2 += t5; \ |
| 1601 | t2h = OD_DCT_RSHIFT(t2, 1); \ |
| 1602 | t5 -= t2h; \ |
| 1603 | ta = td - ta; \ |
| 1604 | td -= OD_DCT_RSHIFT(ta, 1); \ |
| 1605 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 1606 | ta -= (t5*13573 + 8192) >> 14; \ |
| 1607 | /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ |
| 1608 | t5 += (ta*11585 + 16384) >> 15; \ |
| 1609 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 1610 | ta -= (t5*13573 + 8192) >> 14; \ |
| 1611 | /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ |
| 1612 | t9 -= (t6*17515 + 16384) >> 15; \ |
| 1613 | /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ |
| 1614 | t6 += (t9*13623 + 8192) >> 14; \ |
| 1615 | /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \ |
| 1616 | t9 -= (t6*17515 + 16384) >> 15; \ |
| 1617 | /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \ |
| 1618 | t1 -= (te*6723 + 4096) >> 13; \ |
| 1619 | /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ |
| 1620 | te += (t1*16069 + 8192) >> 14; \ |
| 1621 | /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \ |
| 1622 | t1 -= (te*6723 + 4096) >> 13; \ |
| 1623 | te += t6; \ |
| 1624 | teh = OD_DCT_RSHIFT(te, 1); \ |
| 1625 | t6 = teh - t6; \ |
| 1626 | t9 += t1; \ |
| 1627 | t1 -= OD_DCT_RSHIFT(t9, 1); \ |
| 1628 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 1629 | t9 -= (t6*19195 + 16384) >> 15; \ |
| 1630 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 1631 | t6 -= (t9*11585 + 8192) >> 14; \ |
| 1632 | /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 1633 | t9 += (t6*7489 + 4096) >> 13; \ |
| 1634 | tb = tc - tb; \ |
| 1635 | tc = OD_DCT_RSHIFT(tb, 1) - tc; \ |
| 1636 | t3 += t4; \ |
| 1637 | t4 = OD_DCT_RSHIFT(t3, 1) - t4; \ |
| 1638 | /* TODO: Can we move this into another operation */ \ |
| 1639 | t3 = -t3; \ |
| 1640 | t8 += tf; \ |
| 1641 | tf = OD_DCT_RSHIFT(t8, 1) - tf; \ |
| 1642 | t0 += t7; \ |
| 1643 | t0h = OD_DCT_RSHIFT(t0, 1); \ |
| 1644 | t7 = t0h - t7; \ |
| 1645 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 1646 | t3 += (tc*4161 + 8192) >> 14; \ |
| 1647 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 1648 | tc -= (t3*15137 + 8192) >> 14; \ |
| 1649 | /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 1650 | t3 += (tc*14341 + 8192) >> 14; \ |
| 1651 | /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 1652 | t4 -= (tb*14341 + 8192) >> 14; \ |
| 1653 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 1654 | tb += (t4*15137 + 8192) >> 14; \ |
| 1655 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 1656 | t4 -= (tb*4161 + 8192) >> 14; \ |
| 1657 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 1658 | t8 += (t7*13573 + 8192) >> 14; \ |
| 1659 | /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ |
| 1660 | t7 -= (t8*11585 + 16384) >> 15; \ |
| 1661 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 1662 | t8 += (t7*13573 + 8192) >> 14; \ |
| 1663 | /* TODO: Can we move these into another operation */ \ |
| 1664 | t1 = -t1; \ |
| 1665 | t5 = -t5; \ |
| 1666 | t9 = -t9; \ |
| 1667 | tb = -tb; \ |
| 1668 | td = -td; \ |
| 1669 | } \ |
| 1670 | while (0) |
| 1671 | |
| 1672 | #define OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \ |
| 1673 | te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \ |
| 1674 | /* Embedded 32-point orthonormal Type-II fDCT. */ \ |
| 1675 | do { \ |
| 1676 | int tgh; \ |
| 1677 | int thh; \ |
| 1678 | int tih; \ |
| 1679 | int tkh; \ |
| 1680 | int tmh; \ |
| 1681 | int tnh; \ |
| 1682 | int toh; \ |
| 1683 | int tqh; \ |
| 1684 | int tsh; \ |
| 1685 | int tuh; \ |
| 1686 | int tvh; \ |
| 1687 | tv = t0 - tv; \ |
| 1688 | tvh = OD_DCT_RSHIFT(tv, 1); \ |
| 1689 | t0 -= tvh; \ |
| 1690 | tu += t1; \ |
| 1691 | tuh = OD_DCT_RSHIFT(tu, 1); \ |
| 1692 | t1 = tuh - t1; \ |
| 1693 | tt = t2 - tt; \ |
| 1694 | t2 -= OD_DCT_RSHIFT(tt, 1); \ |
| 1695 | ts += t3; \ |
| 1696 | tsh = OD_DCT_RSHIFT(ts, 1); \ |
| 1697 | t3 = tsh - t3; \ |
| 1698 | tr = t4 - tr; \ |
| 1699 | t4 -= OD_DCT_RSHIFT(tr, 1); \ |
| 1700 | tq += t5; \ |
| 1701 | tqh = OD_DCT_RSHIFT(tq, 1); \ |
| 1702 | t5 = tqh - t5; \ |
| 1703 | tp = t6 - tp; \ |
| 1704 | t6 -= OD_DCT_RSHIFT(tp, 1); \ |
| 1705 | to += t7; \ |
| 1706 | toh = OD_DCT_RSHIFT(to, 1); \ |
| 1707 | t7 = toh - t7; \ |
| 1708 | tn = t8 - tn; \ |
| 1709 | tnh = OD_DCT_RSHIFT(tn, 1); \ |
| 1710 | t8 -= tnh; \ |
| 1711 | tm += t9; \ |
| 1712 | tmh = OD_DCT_RSHIFT(tm, 1); \ |
| 1713 | t9 = tmh - t9; \ |
| 1714 | tl = ta - tl; \ |
| 1715 | ta -= OD_DCT_RSHIFT(tl, 1); \ |
| 1716 | tk += tb; \ |
| 1717 | tkh = OD_DCT_RSHIFT(tk, 1); \ |
| 1718 | tb = tkh - tb; \ |
| 1719 | tj = tc - tj; \ |
| 1720 | tc -= OD_DCT_RSHIFT(tj, 1); \ |
| 1721 | ti += td; \ |
| 1722 | tih = OD_DCT_RSHIFT(ti, 1); \ |
| 1723 | td = tih - td; \ |
| 1724 | th = te - th; \ |
| 1725 | thh = OD_DCT_RSHIFT(th, 1); \ |
| 1726 | te -= thh; \ |
| 1727 | tg += tf; \ |
| 1728 | tgh = OD_DCT_RSHIFT(tg, 1); \ |
| 1729 | tf = tgh - tf; \ |
| 1730 | OD_FDCT_16_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \ |
| 1731 | t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh); \ |
| 1732 | OD_FDST_16_ASYM(tv, tvh, tf, tn, tnh, t7, tr, tb, tj, t3, \ |
| 1733 | tt, td, tl, t5, tp, t9, th, thh, t1); \ |
| 1734 | } \ |
| 1735 | while (0) |
| 1736 | |
| 1737 | #define OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \ |
| 1738 | te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \ |
| 1739 | /* Embedded 32-point orthonormal Type-II iDCT. */ \ |
| 1740 | do { \ |
| 1741 | int t1h; \ |
| 1742 | int t3h; \ |
| 1743 | int t5h; \ |
| 1744 | int t7h; \ |
| 1745 | int t9h; \ |
| 1746 | int tbh; \ |
| 1747 | int tdh; \ |
| 1748 | int tfh; \ |
| 1749 | int thh; \ |
| 1750 | int tth; \ |
| 1751 | int tvh; \ |
| 1752 | OD_IDST_16_ASYM(tv, tvh, tn, tr, tj, tt, tth, tl, tp, th, thh, \ |
| 1753 | tu, tm, tq, ti, ts, tk, to, tg); \ |
| 1754 | OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \ |
| 1755 | t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh); \ |
| 1756 | tu = t1h - tu; \ |
| 1757 | t1 -= tu; \ |
| 1758 | te += thh; \ |
| 1759 | th = te - th; \ |
| 1760 | tm = t9h - tm; \ |
| 1761 | t9 -= tm; \ |
| 1762 | t6 += OD_DCT_RSHIFT(tp, 1); \ |
| 1763 | tp = t6 - tp; \ |
| 1764 | tq = t5h - tq; \ |
| 1765 | t5 -= tq; \ |
| 1766 | ta += OD_DCT_RSHIFT(tl, 1); \ |
| 1767 | tl = ta - tl; \ |
| 1768 | ti = tdh - ti; \ |
| 1769 | td -= ti; \ |
| 1770 | t2 += tth; \ |
| 1771 | tt = t2 - tt; \ |
| 1772 | ts = t3h - ts; \ |
| 1773 | t3 -= ts; \ |
| 1774 | tc += OD_DCT_RSHIFT(tj, 1); \ |
| 1775 | tj = tc - tj; \ |
| 1776 | tk = tbh - tk; \ |
| 1777 | tb -= tk; \ |
| 1778 | t4 += OD_DCT_RSHIFT(tr, 1); \ |
| 1779 | tr = t4 - tr; \ |
| 1780 | to = t7h - to; \ |
| 1781 | t7 -= to; \ |
| 1782 | t8 += OD_DCT_RSHIFT(tn, 1); \ |
| 1783 | tn = t8 - tn; \ |
| 1784 | tg = tfh - tg; \ |
| 1785 | tf -= tg; \ |
| 1786 | t0 += tvh; \ |
| 1787 | tv = t0 - tv; \ |
| 1788 | } \ |
| 1789 | while (0) |
| 1790 | |
Nathan E. Egge | f73e47e | 2017-10-22 06:41:55 -0400 | [diff] [blame] | 1791 | /* Embedded 32-point orthonormal Type-IV fDST. */ |
| 1792 | #define OD_FDST_32(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, \ |
| 1793 | te, tf, tg, th, ti, tj, tk, tl, tm, tn, to, tp, tq, tr, ts, tt, tu, tv) \ |
| 1794 | /* 117 "muls", 117 + 128 = 245 adds, 36 shifts */ \ |
| 1795 | do { \ |
| 1796 | od_coeff t0h; \ |
| 1797 | od_coeff t1h; \ |
| 1798 | od_coeff t2h; \ |
| 1799 | od_coeff t3h; \ |
| 1800 | od_coeff t4h; \ |
| 1801 | od_coeff t6h; \ |
| 1802 | od_coeff t8h; \ |
| 1803 | od_coeff t9h; \ |
| 1804 | od_coeff tah; \ |
| 1805 | od_coeff tbh; \ |
| 1806 | od_coeff tch; \ |
| 1807 | od_coeff tdh; \ |
| 1808 | od_coeff teh; \ |
| 1809 | od_coeff tfh; \ |
| 1810 | od_coeff tgh; \ |
| 1811 | od_coeff thh; \ |
| 1812 | od_coeff tih; \ |
| 1813 | od_coeff tjh; \ |
| 1814 | od_coeff tkh; \ |
| 1815 | od_coeff tlh; \ |
| 1816 | od_coeff tmh; \ |
| 1817 | od_coeff tnh; \ |
| 1818 | od_coeff tph; \ |
| 1819 | od_coeff trh; \ |
| 1820 | od_coeff tsh; \ |
| 1821 | od_coeff tth; \ |
| 1822 | od_coeff tuh; \ |
| 1823 | od_coeff tvh; \ |
| 1824 | /* Stage 0 */ \ |
| 1825 | tp += (t6*659 + 2048) >> 12; \ |
| 1826 | t6 -= (tp*10279 + 16384) >> 15; \ |
| 1827 | tp += (t6*659 + 2048) >> 12; \ |
| 1828 | th += (te*3045 + 4096) >> 13; \ |
| 1829 | te -= (th*21403 + 16384) >> 15; \ |
| 1830 | th += (te*3045 + 4096) >> 13; \ |
| 1831 | t9 += (tm*20191 + 16384) >> 15; \ |
| 1832 | tm -= (t9*29269 + 16384) >> 15; \ |
| 1833 | t9 += (tm*20191 + 16384) >> 15; \ |
| 1834 | tu += (t1*1207 + 16384) >> 15; \ |
| 1835 | t1 -= (tu*2411 + 16384) >> 15; \ |
| 1836 | tu += (t1*1207 + 16384) >> 15; \ |
| 1837 | t4 += (tr*13113 + 8192) >> 14; \ |
| 1838 | tr -= (t4*7993 + 4096) >> 13; \ |
| 1839 | t4 += (tr*13113 + 8192) >> 14; \ |
| 1840 | tj += (tc*10381 + 16384) >> 15; \ |
| 1841 | tc -= (tj*4717 + 4096) >> 13; \ |
| 1842 | tj += (tc*10381 + 16384) >> 15; \ |
| 1843 | tb += (tk*18035 + 16384) >> 15; \ |
| 1844 | tk -= (tb*6921 + 4096) >> 13; \ |
| 1845 | tb += (tk*18035 + 16384) >> 15; \ |
| 1846 | ts += (t3*1411 + 8192) >> 14; \ |
| 1847 | t3 -= (ts*2801 + 8192) >> 14; \ |
| 1848 | ts += (t3*1411 + 8192) >> 14; \ |
| 1849 | tq += (t5*2225 + 8192) >> 14; \ |
| 1850 | t5 -= (tq*2185 + 4096) >> 13; \ |
| 1851 | tq += (t5*2225 + 8192) >> 14; \ |
| 1852 | ti += (td*11273 + 16384) >> 15; \ |
| 1853 | td -= (ti*315 + 256) >> 9; \ |
| 1854 | ti += (td*11273 + 16384) >> 15; \ |
| 1855 | tl += (ta*8637 + 16384) >> 15; \ |
| 1856 | ta -= (tl*16151 + 16384) >> 15; \ |
| 1857 | tl += (ta*8637 + 16384) >> 15; \ |
| 1858 | tt += (t2*2013 + 16384) >> 15; \ |
| 1859 | t2 -= (tt*4011 + 16384) >> 15; \ |
| 1860 | tt += (t2*2013 + 16384) >> 15; \ |
| 1861 | to += (t7*6101 + 16384) >> 15; \ |
| 1862 | t7 -= (to*11793 + 16384) >> 15; \ |
| 1863 | to += (t7*6101 + 16384) >> 15; \ |
| 1864 | t8 += (tn*10659 + 8192) >> 14; \ |
| 1865 | tn -= (t8*29957 + 16384) >> 15; \ |
| 1866 | t8 += (tn*10659 + 8192) >> 14; \ |
| 1867 | tg += (tf*819 + 1024) >> 11; \ |
| 1868 | tf -= (tg*22595 + 16384) >> 15; \ |
| 1869 | tg += (tf*819 + 1024) >> 11; \ |
| 1870 | t0 += (tv*31973 + 16384) >> 15; \ |
| 1871 | tv -= (t0*16379 + 8192) >> 14; \ |
| 1872 | t0 += (tv*31973 + 16384) >> 15; \ |
| 1873 | /* Stage 1 */ \ |
| 1874 | tj -= ts; \ |
| 1875 | tjh = OD_DCT_RSHIFT(tj, 1); \ |
| 1876 | ts += tjh; \ |
| 1877 | tr = tk - tr; \ |
| 1878 | trh = OD_DCT_RSHIFT(tr, 1); \ |
| 1879 | tk = trh - tk; \ |
| 1880 | tc += t3; \ |
| 1881 | tch = OD_DCT_RSHIFT(tc, 1); \ |
| 1882 | t3 -= tch; \ |
| 1883 | t4 += tb; \ |
| 1884 | t4h = OD_DCT_RSHIFT(t4, 1); \ |
| 1885 | tb -= t4h; \ |
| 1886 | tv += tf; \ |
| 1887 | tvh = OD_DCT_RSHIFT(tv, 1); \ |
| 1888 | tf -= tvh; \ |
| 1889 | t8 -= to; \ |
| 1890 | t8h = OD_DCT_RSHIFT(t8, 1); \ |
| 1891 | to += t8h; \ |
| 1892 | t0 += tg; \ |
| 1893 | t0h = OD_DCT_RSHIFT(t0, 1); \ |
| 1894 | tg -= t0h; \ |
| 1895 | tn = t7 - tn; \ |
| 1896 | tnh = OD_DCT_RSHIFT(tn, 1); \ |
| 1897 | t7 -= tnh; \ |
| 1898 | th -= tu; \ |
| 1899 | thh = OD_DCT_RSHIFT(th, 1); \ |
| 1900 | tu += thh; \ |
| 1901 | t6 += tm; \ |
| 1902 | t6h = OD_DCT_RSHIFT(t6, 1); \ |
| 1903 | tm = t6h - tm; \ |
| 1904 | te += t1; \ |
| 1905 | teh = OD_DCT_RSHIFT(te, 1); \ |
| 1906 | t1 -= teh; \ |
| 1907 | tp += t9; \ |
| 1908 | tph = OD_DCT_RSHIFT(tp, 1); \ |
| 1909 | t9 -= tph; \ |
| 1910 | t2 -= td; \ |
| 1911 | t2h = OD_DCT_RSHIFT(t2, 1); \ |
| 1912 | td += t2h; \ |
| 1913 | tl = tq - tl; \ |
| 1914 | tlh = OD_DCT_RSHIFT(tl, 1); \ |
| 1915 | tq -= tlh; \ |
| 1916 | tt += ti; \ |
| 1917 | tth = OD_DCT_RSHIFT(tt, 1); \ |
| 1918 | ti -= tth; \ |
| 1919 | ta += t5; \ |
| 1920 | tah = OD_DCT_RSHIFT(ta, 1); \ |
| 1921 | t5 -= tah; \ |
| 1922 | /* Stage 2 */ \ |
| 1923 | tm -= thh; \ |
| 1924 | th += tm; \ |
| 1925 | t9 = teh - t9; \ |
| 1926 | te -= t9; \ |
| 1927 | td = tlh - td; \ |
| 1928 | tl -= td; \ |
| 1929 | ti += tah; \ |
| 1930 | ta -= ti; \ |
| 1931 | tk = tjh - tk; \ |
| 1932 | tj -= tk; \ |
| 1933 | tb -= tch; \ |
| 1934 | tc += tb; \ |
| 1935 | tg += tnh; \ |
| 1936 | tn = tg - tn; \ |
| 1937 | tf += t8h; \ |
| 1938 | t8 = tf - t8; \ |
| 1939 | t3 -= trh; \ |
| 1940 | tr += t3; \ |
| 1941 | ts += t4h; \ |
| 1942 | t4 -= ts; \ |
| 1943 | to -= t0h; \ |
| 1944 | t0 += to; \ |
| 1945 | t7 = tvh - t7; \ |
| 1946 | tv = t7 - tv; \ |
| 1947 | t1 -= t6h; \ |
| 1948 | t6 += t1; \ |
| 1949 | tu += tph; \ |
| 1950 | tp -= tu; \ |
| 1951 | tq -= tth; \ |
| 1952 | tt += tq; \ |
| 1953 | t5 += t2h; \ |
| 1954 | t2 -= t5; \ |
| 1955 | /* Stage 3 */ \ |
| 1956 | tj += (tc*11725 + 16384) >> 15; \ |
| 1957 | tc -= (tj*5197 + 4096) >> 13; \ |
| 1958 | tj += (tc*11725 + 16384) >> 15; \ |
| 1959 | td += (ti*513 + 1024) >> 11; \ |
| 1960 | ti -= (td*15447 + 16384) >> 15; \ |
| 1961 | td += (ti*513 + 1024) >> 11; \ |
| 1962 | th += (te*4861 + 16384) >> 15; \ |
| 1963 | te -= (th*1189 + 2048) >> 12; \ |
| 1964 | th += (te*4861 + 16384) >> 15; \ |
| 1965 | tg += (tf*805 + 8192) >> 14; \ |
| 1966 | tf -= (tg*803 + 4096) >> 13; \ |
| 1967 | tg += (tf*805 + 8192) >> 14; \ |
| 1968 | tb += (tk*7749 + 8192) >> 14; \ |
| 1969 | tk -= (tb*12665 + 8192) >> 14; \ |
| 1970 | tb += (tk*7749 + 8192) >> 14; \ |
| 1971 | tl += (ta*2455 + 2048) >> 12; \ |
| 1972 | ta -= (tl*28899 + 16384) >> 15; \ |
| 1973 | tl += (ta*2455 + 2048) >> 12; \ |
| 1974 | t9 += (tm*12151 + 8192) >> 14; \ |
| 1975 | tm -= (t9*31357 + 16384) >> 15; \ |
| 1976 | t9 += (tm*12151 + 8192) >> 14; \ |
| 1977 | tn += (t8*29699 + 16384) >> 15; \ |
| 1978 | t8 -= (tn*16305 + 8192) >> 14; \ |
| 1979 | tn += (t8*29699 + 16384) >> 15; \ |
| 1980 | /* Stage 4 */ \ |
| 1981 | tf -= tc; \ |
| 1982 | tfh = OD_DCT_RSHIFT(tf, 1); \ |
| 1983 | tc += tfh; \ |
| 1984 | ti = th - ti; \ |
| 1985 | tih = OD_DCT_RSHIFT(ti, 1); \ |
| 1986 | th -= tih; \ |
| 1987 | tg += tj; \ |
| 1988 | tgh = OD_DCT_RSHIFT(tg, 1); \ |
| 1989 | tj = tgh - tj; \ |
| 1990 | td -= te; \ |
| 1991 | tdh = OD_DCT_RSHIFT(td, 1); \ |
| 1992 | te += tdh; \ |
| 1993 | tm = ta - tm; \ |
| 1994 | tmh = OD_DCT_RSHIFT(tm, 1); \ |
| 1995 | ta = tmh - ta; \ |
| 1996 | t9 += tl; \ |
| 1997 | t9h = OD_DCT_RSHIFT(t9, 1); \ |
| 1998 | tl -= t9h; \ |
| 1999 | tb += t8; \ |
| 2000 | tbh = OD_DCT_RSHIFT(tb, 1); \ |
| 2001 | t8 -= tbh; \ |
| 2002 | tk += tn; \ |
| 2003 | tkh = OD_DCT_RSHIFT(tk, 1); \ |
| 2004 | tn -= tkh; \ |
| 2005 | t1 -= t2; \ |
| 2006 | t1h = OD_DCT_RSHIFT(t1, 1); \ |
| 2007 | t2 += t1h; \ |
| 2008 | t3 += tv; \ |
| 2009 | t3h = OD_DCT_RSHIFT(t3, 1); \ |
| 2010 | tv -= t3h; \ |
| 2011 | tu += tt; \ |
| 2012 | tuh = OD_DCT_RSHIFT(tu, 1); \ |
| 2013 | tt -= tuh; \ |
| 2014 | ts -= t0; \ |
| 2015 | tsh = OD_DCT_RSHIFT(ts, 1); \ |
| 2016 | t0 += tsh; \ |
| 2017 | tq = t6 - tq; \ |
| 2018 | t6 -= OD_DCT_RSHIFT(tq, 1); \ |
| 2019 | to += tr; \ |
| 2020 | tr = OD_DCT_RSHIFT(to, 1) - tr; \ |
| 2021 | t7 = t4 - t7; \ |
| 2022 | t4 -= OD_DCT_RSHIFT(t7, 1); \ |
| 2023 | t5 -= tp; \ |
| 2024 | tp += OD_DCT_RSHIFT(t5, 1); \ |
| 2025 | /* Stage 5 */ \ |
| 2026 | tp += (t6*2485 + 4096) >> 13; \ |
| 2027 | t6 -= (tp*18205 + 16384) >> 15; \ |
| 2028 | tp += (t6*2485 + 4096) >> 13; \ |
| 2029 | to += (t7*3227 + 16384) >> 15; \ |
| 2030 | t7 -= (to*6393 + 16384) >> 15; \ |
| 2031 | to += (t7*3227 + 16384) >> 15; \ |
| 2032 | tq += (t5*17515 + 16384) >> 15; \ |
| 2033 | t5 -= (tq*13623 + 8192) >> 14; \ |
| 2034 | tq += (t5*17515 + 16384) >> 15; \ |
| 2035 | t4 += (tr*6723 + 4096) >> 13; \ |
| 2036 | tr -= (t4*16069 + 8192) >> 14; \ |
| 2037 | t4 += (tr*6723 + 4096) >> 13; \ |
| 2038 | /* Stage 6 */ \ |
| 2039 | tj += tdh; \ |
| 2040 | td -= tj; \ |
| 2041 | tc -= tih; \ |
| 2042 | ti += tc; \ |
| 2043 | th = tgh - th; \ |
| 2044 | tg -= th; \ |
| 2045 | te += tfh; \ |
| 2046 | tf -= te; \ |
| 2047 | tl = tkh - tl; \ |
| 2048 | tk -= tl; \ |
| 2049 | ta += tbh; \ |
| 2050 | tb -= ta; \ |
| 2051 | tn -= tmh; \ |
| 2052 | tm += tn; \ |
| 2053 | t8 += t9h; \ |
| 2054 | t9 = t8 - t9; \ |
| 2055 | tt = t3h - tt; \ |
| 2056 | t3 -= tt; \ |
| 2057 | t2 -= tsh; \ |
| 2058 | ts += t2; \ |
| 2059 | tv -= t1h; \ |
| 2060 | t1 += tv; \ |
| 2061 | t0 += tuh; \ |
| 2062 | tu -= t0; \ |
| 2063 | tp = OD_DCT_RSHIFT(to, 1) - tp; \ |
| 2064 | to -= tp; \ |
| 2065 | t6 += OD_DCT_RSHIFT(t7, 1); \ |
| 2066 | t7 -= t6; \ |
| 2067 | t4 = OD_DCT_RSHIFT(tq, 1) - t4; \ |
| 2068 | tq -= t4; \ |
| 2069 | tr += OD_DCT_RSHIFT(t5, 1); \ |
| 2070 | t5 = tr - t5; \ |
| 2071 | /* Stage 7 */ \ |
| 2072 | td += (ti*21894 + 16384) >> 15; \ |
| 2073 | ti -= (td*15137 + 8192) >> 14; \ |
| 2074 | td += (ti*21895 + 16384) >> 15; \ |
| 2075 | tj += (tc*21894 + 16384) >> 15; \ |
| 2076 | tc -= (tj*15137 + 8192) >> 14; \ |
| 2077 | tj += (tc*21895 + 16384) >> 15; \ |
| 2078 | th += (te*13573 + 16384) >> 15; \ |
| 2079 | te -= (th*11585 + 8192) >> 14; \ |
| 2080 | th += (te*13573 + 16384) >> 15; \ |
| 2081 | tb += (tk*21894 + 16384) >> 15; \ |
| 2082 | tk -= (tb*15137 + 8192) >> 14; \ |
| 2083 | tb += (tk*21895 + 16384) >> 15; \ |
| 2084 | ta += (tl*3259 + 8192) >> 14; \ |
| 2085 | tl -= (ta*3135 + 4096) >> 13; \ |
| 2086 | ta += (tl*3259 + 8192) >> 14; \ |
| 2087 | t9 += (tm*13573 + 16384) >> 15; \ |
| 2088 | tm -= (t9*11585 + 8192) >> 14; \ |
| 2089 | t9 += (tm*13573 + 16384) >> 15; \ |
| 2090 | ts += (t3*3259 + 8192) >> 14; \ |
| 2091 | t3 -= (ts*3135 + 4096) >> 13; \ |
| 2092 | ts += (t3*3259 + 8192) >> 14; \ |
| 2093 | t2 += (tt*3259 + 8192) >> 14; \ |
| 2094 | tt -= (t2*3135 + 4096) >> 13; \ |
| 2095 | t2 += (tt*3259 + 8192) >> 14; \ |
| 2096 | tu += (t1*13573 + 16384) >> 15; \ |
| 2097 | t1 -= (tu*11585 + 8192) >> 14; \ |
| 2098 | tu += (t1*13573 + 16384) >> 15; \ |
| 2099 | tp += (t6*13573 + 16384) >> 15; \ |
| 2100 | t6 -= (tp*11585 + 8192) >> 14; \ |
| 2101 | tp += (t6*13573 + 16384) >> 15; \ |
| 2102 | tq += (t5*13573 + 16384) >> 15; \ |
| 2103 | t5 -= (tq*11585 + 8192) >> 14; \ |
| 2104 | tq += (t5*13573 + 16384) >> 15; \ |
| 2105 | } \ |
| 2106 | while (0) |
| 2107 | |
| 2108 | /* Embedded 32-point orthonormal Type-IV iDST. */ |
| 2109 | #define OD_IDST_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \ |
| 2110 | te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \ |
| 2111 | /* 117 "muls", 117 + 128 = 245 adds, 36 shifts */ \ |
| 2112 | do { \ |
| 2113 | od_coeff t0h; \ |
| 2114 | od_coeff t1h; \ |
| 2115 | od_coeff t2h; \ |
| 2116 | od_coeff t3h; \ |
| 2117 | od_coeff t4h; \ |
| 2118 | od_coeff t6h; \ |
| 2119 | od_coeff t8h; \ |
| 2120 | od_coeff t9h; \ |
| 2121 | od_coeff tah; \ |
| 2122 | od_coeff tbh; \ |
| 2123 | od_coeff tch; \ |
| 2124 | od_coeff tdh; \ |
| 2125 | od_coeff teh; \ |
| 2126 | od_coeff tfh; \ |
| 2127 | od_coeff tgh; \ |
| 2128 | od_coeff thh; \ |
| 2129 | od_coeff tih; \ |
| 2130 | od_coeff tjh; \ |
| 2131 | od_coeff tkh; \ |
| 2132 | od_coeff tlh; \ |
| 2133 | od_coeff tmh; \ |
| 2134 | od_coeff tnh; \ |
| 2135 | od_coeff tph; \ |
| 2136 | od_coeff trh; \ |
| 2137 | od_coeff tsh; \ |
| 2138 | od_coeff tth; \ |
| 2139 | od_coeff tuh; \ |
| 2140 | od_coeff tvh; \ |
| 2141 | /* Stage 0 */ \ |
| 2142 | tq -= (t5*13573 + 16384) >> 15; \ |
| 2143 | t5 += (tq*11585 + 8192) >> 14; \ |
| 2144 | tq -= (t5*13573 + 16384) >> 15; \ |
| 2145 | tp -= (t6*13573 + 16384) >> 15; \ |
| 2146 | t6 += (tp*11585 + 8192) >> 14; \ |
| 2147 | tp -= (t6*13573 + 16384) >> 15; \ |
| 2148 | tu -= (t1*13573 + 16384) >> 15; \ |
| 2149 | t1 += (tu*11585 + 8192) >> 14; \ |
| 2150 | tu -= (t1*13573 + 16384) >> 15; \ |
| 2151 | t2 -= (tt*3259 + 8192) >> 14; \ |
| 2152 | tt += (t2*3135 + 4096) >> 13; \ |
| 2153 | t2 -= (tt*3259 + 8192) >> 14; \ |
| 2154 | ts -= (t3*3259 + 8192) >> 14; \ |
| 2155 | t3 += (ts*3135 + 4096) >> 13; \ |
| 2156 | ts -= (t3*3259 + 8192) >> 14; \ |
| 2157 | t9 -= (tm*13573 + 16384) >> 15; \ |
| 2158 | tm += (t9*11585 + 8192) >> 14; \ |
| 2159 | t9 -= (tm*13573 + 16384) >> 15; \ |
| 2160 | ta -= (tl*3259 + 8192) >> 14; \ |
| 2161 | tl += (ta*3135 + 4096) >> 13; \ |
| 2162 | ta -= (tl*3259 + 8192) >> 14; \ |
| 2163 | tb -= (tk*21895 + 16384) >> 15; \ |
| 2164 | tk += (tb*15137 + 8192) >> 14; \ |
| 2165 | tb -= (tk*21894 + 16384) >> 15; \ |
| 2166 | th -= (te*13573 + 16384) >> 15; \ |
| 2167 | te += (th*11585 + 8192) >> 14; \ |
| 2168 | th -= (te*13573 + 16384) >> 15; \ |
| 2169 | tj -= (tc*21895 + 16384) >> 15; \ |
| 2170 | tc += (tj*15137 + 8192) >> 14; \ |
| 2171 | tj -= (tc*21894 + 16384) >> 15; \ |
| 2172 | td -= (ti*21895 + 16384) >> 15; \ |
| 2173 | ti += (td*15137 + 8192) >> 14; \ |
| 2174 | td -= (ti*21894 + 16384) >> 15; \ |
| 2175 | /* Stage 1 */ \ |
| 2176 | t5 = tr - t5; \ |
| 2177 | tr -= OD_DCT_RSHIFT(t5, 1); \ |
| 2178 | tq += t4; \ |
| 2179 | t4 = OD_DCT_RSHIFT(tq, 1) - t4; \ |
| 2180 | t7 += t6; \ |
| 2181 | t6 -= OD_DCT_RSHIFT(t7, 1); \ |
| 2182 | to += tp; \ |
| 2183 | tp = OD_DCT_RSHIFT(to, 1) - tp; \ |
| 2184 | tu += t0; \ |
| 2185 | tuh = OD_DCT_RSHIFT(tu, 1); \ |
| 2186 | t0 -= tuh; \ |
| 2187 | t1 -= tv; \ |
| 2188 | t1h = OD_DCT_RSHIFT(t1, 1); \ |
| 2189 | tv += t1h; \ |
| 2190 | ts -= t2; \ |
| 2191 | tsh = OD_DCT_RSHIFT(ts, 1); \ |
| 2192 | t2 += tsh; \ |
| 2193 | t3 += tt; \ |
| 2194 | t3h = OD_DCT_RSHIFT(t3, 1); \ |
| 2195 | tt = t3h - tt; \ |
| 2196 | t9 = t8 - t9; \ |
| 2197 | t9h = OD_DCT_RSHIFT(t9, 1); \ |
| 2198 | t8 -= t9h; \ |
| 2199 | tm -= tn; \ |
| 2200 | tmh = OD_DCT_RSHIFT(tm, 1); \ |
| 2201 | tn += tmh; \ |
| 2202 | tb += ta; \ |
| 2203 | tbh = OD_DCT_RSHIFT(tb, 1); \ |
| 2204 | ta -= tbh; \ |
| 2205 | tk += tl; \ |
| 2206 | tkh = OD_DCT_RSHIFT(tk, 1); \ |
| 2207 | tl = tkh - tl; \ |
| 2208 | tf += te; \ |
| 2209 | tfh = OD_DCT_RSHIFT(tf, 1); \ |
| 2210 | te -= tfh; \ |
| 2211 | tg += th; \ |
| 2212 | tgh = OD_DCT_RSHIFT(tg, 1); \ |
| 2213 | th = tgh - th; \ |
| 2214 | ti -= tc; \ |
| 2215 | tih = OD_DCT_RSHIFT(ti, 1); \ |
| 2216 | tc += tih; \ |
| 2217 | td += tj; \ |
| 2218 | tdh = OD_DCT_RSHIFT(td, 1); \ |
| 2219 | tj -= tdh; \ |
| 2220 | /* Stage 2 */ \ |
| 2221 | t4 -= (tr*6723 + 4096) >> 13; \ |
| 2222 | tr += (t4*16069 + 8192) >> 14; \ |
| 2223 | t4 -= (tr*6723 + 4096) >> 13; \ |
| 2224 | tq -= (t5*17515 + 16384) >> 15; \ |
| 2225 | t5 += (tq*13623 + 8192) >> 14; \ |
| 2226 | tq -= (t5*17515 + 16384) >> 15; \ |
| 2227 | to -= (t7*3227 + 16384) >> 15; \ |
| 2228 | t7 += (to*6393 + 16384) >> 15; \ |
| 2229 | to -= (t7*3227 + 16384) >> 15; \ |
| 2230 | tp -= (t6*2485 + 4096) >> 13; \ |
| 2231 | t6 += (tp*18205 + 16384) >> 15; \ |
| 2232 | tp -= (t6*2485 + 4096) >> 13; \ |
| 2233 | /* Stage 3 */ \ |
| 2234 | tp -= OD_DCT_RSHIFT(t5, 1); \ |
| 2235 | t5 += tp; \ |
| 2236 | t4 += OD_DCT_RSHIFT(t7, 1); \ |
| 2237 | t7 = t4 - t7; \ |
| 2238 | tr = OD_DCT_RSHIFT(to, 1) - tr; \ |
| 2239 | to -= tr; \ |
| 2240 | t6 += OD_DCT_RSHIFT(tq, 1); \ |
| 2241 | tq = t6 - tq; \ |
| 2242 | t0 -= tsh; \ |
| 2243 | ts += t0; \ |
| 2244 | tt += tuh; \ |
| 2245 | tu -= tt; \ |
| 2246 | tv += t3h; \ |
| 2247 | t3 -= tv; \ |
| 2248 | t2 -= t1h; \ |
| 2249 | t1 += t2; \ |
| 2250 | tn += tkh; \ |
| 2251 | tk -= tn; \ |
| 2252 | t8 += tbh; \ |
| 2253 | tb -= t8; \ |
| 2254 | tl += t9h; \ |
| 2255 | t9 -= tl; \ |
| 2256 | ta = tmh - ta; \ |
| 2257 | tm = ta - tm; \ |
| 2258 | te -= tdh; \ |
| 2259 | td += te; \ |
| 2260 | tj = tgh - tj; \ |
| 2261 | tg -= tj; \ |
| 2262 | th += tih; \ |
| 2263 | ti = th - ti; \ |
| 2264 | tc -= tfh; \ |
| 2265 | tf += tc; \ |
| 2266 | /* Stage 4 */ \ |
| 2267 | tn -= (t8*29699 + 16384) >> 15; \ |
| 2268 | t8 += (tn*16305 + 8192) >> 14; \ |
| 2269 | tn -= (t8*29699 + 16384) >> 15; \ |
| 2270 | t9 -= (tm*12151 + 8192) >> 14; \ |
| 2271 | tm += (t9*31357 + 16384) >> 15; \ |
| 2272 | t9 -= (tm*12151 + 8192) >> 14; \ |
| 2273 | tl -= (ta*2455 + 2048) >> 12; \ |
| 2274 | ta += (tl*28899 + 16384) >> 15; \ |
| 2275 | tl -= (ta*2455 + 2048) >> 12; \ |
| 2276 | tb -= (tk*7749 + 8192) >> 14; \ |
| 2277 | tk += (tb*12665 + 8192) >> 14; \ |
| 2278 | tb -= (tk*7749 + 8192) >> 14; \ |
| 2279 | tg -= (tf*805 + 8192) >> 14; \ |
| 2280 | tf += (tg*803 + 4096) >> 13; \ |
| 2281 | tg -= (tf*805 + 8192) >> 14; \ |
| 2282 | th -= (te*4861 + 16384) >> 15; \ |
| 2283 | te += (th*1189 + 2048) >> 12; \ |
| 2284 | th -= (te*4861 + 16384) >> 15; \ |
| 2285 | td -= (ti*513 + 1024) >> 11; \ |
| 2286 | ti += (td*15447 + 16384) >> 15; \ |
| 2287 | td -= (ti*513 + 1024) >> 11; \ |
| 2288 | tj -= (tc*11725 + 16384) >> 15; \ |
| 2289 | tc += (tj*5197 + 4096) >> 13; \ |
| 2290 | tj -= (tc*11725 + 16384) >> 15; \ |
| 2291 | /* Stage 5 */ \ |
| 2292 | t2 += t5; \ |
| 2293 | t2h = OD_DCT_RSHIFT(t2, 1); \ |
| 2294 | t5 -= t2h; \ |
| 2295 | tt -= tq; \ |
| 2296 | tth = OD_DCT_RSHIFT(tt, 1); \ |
| 2297 | tq += tth; \ |
| 2298 | tp += tu; \ |
| 2299 | tph = OD_DCT_RSHIFT(tp, 1); \ |
| 2300 | tu -= tph; \ |
| 2301 | t6 -= t1; \ |
| 2302 | t6h = OD_DCT_RSHIFT(t6, 1); \ |
| 2303 | t1 += t6h; \ |
| 2304 | tv = t7 - tv; \ |
| 2305 | tvh = OD_DCT_RSHIFT(tv, 1); \ |
| 2306 | t7 = tvh - t7; \ |
| 2307 | t0 -= to; \ |
| 2308 | t0h = OD_DCT_RSHIFT(t0, 1); \ |
| 2309 | to += t0h; \ |
| 2310 | t4 += ts; \ |
| 2311 | t4h = OD_DCT_RSHIFT(t4, 1); \ |
| 2312 | ts -= t4h; \ |
| 2313 | tr -= t3; \ |
| 2314 | trh = OD_DCT_RSHIFT(tr, 1); \ |
| 2315 | t3 += trh; \ |
| 2316 | t8 = tf - t8; \ |
| 2317 | t8h = OD_DCT_RSHIFT(t8, 1); \ |
| 2318 | tf -= t8h; \ |
| 2319 | tn = tg - tn; \ |
| 2320 | tnh = OD_DCT_RSHIFT(tn, 1); \ |
| 2321 | tg -= tnh; \ |
| 2322 | tc -= tb; \ |
| 2323 | tch = OD_DCT_RSHIFT(tc, 1); \ |
| 2324 | tb += tch; \ |
| 2325 | tj += tk; \ |
| 2326 | tjh = OD_DCT_RSHIFT(tj, 1); \ |
| 2327 | tk = tjh - tk; \ |
| 2328 | ta += ti; \ |
| 2329 | tah = OD_DCT_RSHIFT(ta, 1); \ |
| 2330 | ti -= tah; \ |
| 2331 | tl += td; \ |
| 2332 | tlh = OD_DCT_RSHIFT(tl, 1); \ |
| 2333 | td = tlh - td; \ |
| 2334 | te += t9; \ |
| 2335 | teh = OD_DCT_RSHIFT(te, 1); \ |
| 2336 | t9 = teh - t9; \ |
| 2337 | th -= tm; \ |
| 2338 | thh = OD_DCT_RSHIFT(th, 1); \ |
| 2339 | tm += thh; \ |
| 2340 | /* Stage 6 */ \ |
| 2341 | t5 += tah; \ |
| 2342 | ta -= t5; \ |
| 2343 | ti += tth; \ |
| 2344 | tt -= ti; \ |
| 2345 | tq += tlh; \ |
| 2346 | tl = tq - tl; \ |
| 2347 | td -= t2h; \ |
| 2348 | t2 += td; \ |
| 2349 | t9 += tph; \ |
| 2350 | tp -= t9; \ |
| 2351 | t1 += teh; \ |
| 2352 | te -= t1; \ |
| 2353 | tm = t6h - tm; \ |
| 2354 | t6 -= tm; \ |
| 2355 | tu -= thh; \ |
| 2356 | th += tu; \ |
| 2357 | t7 += tnh; \ |
| 2358 | tn = t7 - tn; \ |
| 2359 | tg += t0h; \ |
| 2360 | t0 -= tg; \ |
| 2361 | to -= t8h; \ |
| 2362 | t8 += to; \ |
| 2363 | tf += tvh; \ |
| 2364 | tv -= tf; \ |
| 2365 | tb += t4h; \ |
| 2366 | t4 -= tb; \ |
| 2367 | t3 += tch; \ |
| 2368 | tc -= t3; \ |
| 2369 | tk = trh - tk; \ |
| 2370 | tr = tk - tr; \ |
| 2371 | ts -= tjh; \ |
| 2372 | tj += ts; \ |
| 2373 | /* Stage 7 */ \ |
| 2374 | t0 -= (tv*31973 + 16384) >> 15; \ |
| 2375 | tv += (t0*16379 + 8192) >> 14; \ |
| 2376 | t0 -= (tv*31973 + 16384) >> 15; \ |
| 2377 | tg -= (tf*819 + 1024) >> 11; \ |
| 2378 | tf += (tg*22595 + 16384) >> 15; \ |
| 2379 | tg -= (tf*819 + 1024) >> 11; \ |
| 2380 | t8 -= (tn*10659 + 8192) >> 14; \ |
| 2381 | tn += (t8*29957 + 16384) >> 15; \ |
| 2382 | t8 -= (tn*10659 + 8192) >> 14; \ |
| 2383 | to -= (t7*6101 + 16384) >> 15; \ |
| 2384 | t7 += (to*11793 + 16384) >> 15; \ |
| 2385 | to -= (t7*6101 + 16384) >> 15; \ |
| 2386 | tt -= (t2*2013 + 16384) >> 15; \ |
| 2387 | t2 += (tt*4011 + 16384) >> 15; \ |
| 2388 | tt -= (t2*2013 + 16384) >> 15; \ |
| 2389 | tl -= (ta*8637 + 16384) >> 15; \ |
| 2390 | ta += (tl*16151 + 16384) >> 15; \ |
| 2391 | tl -= (ta*8637 + 16384) >> 15; \ |
| 2392 | ti -= (td*11273 + 16384) >> 15; \ |
| 2393 | td += (ti*315 + 256) >> 9; \ |
| 2394 | ti -= (td*11273 + 16384) >> 15; \ |
| 2395 | tq -= (t5*2225 + 8192) >> 14; \ |
| 2396 | t5 += (tq*2185 + 4096) >> 13; \ |
| 2397 | tq -= (t5*2225 + 8192) >> 14; \ |
| 2398 | ts -= (t3*1411 + 8192) >> 14; \ |
| 2399 | t3 += (ts*2801 + 8192) >> 14; \ |
| 2400 | ts -= (t3*1411 + 8192) >> 14; \ |
| 2401 | tb -= (tk*18035 + 16384) >> 15; \ |
| 2402 | tk += (tb*6921 + 4096) >> 13; \ |
| 2403 | tb -= (tk*18035 + 16384) >> 15; \ |
| 2404 | tj -= (tc*10381 + 16384) >> 15; \ |
| 2405 | tc += (tj*4717 + 4096) >> 13; \ |
| 2406 | tj -= (tc*10381 + 16384) >> 15; \ |
| 2407 | t4 -= (tr*13113 + 8192) >> 14; \ |
| 2408 | tr += (t4*7993 + 4096) >> 13; \ |
| 2409 | t4 -= (tr*13113 + 8192) >> 14; \ |
| 2410 | tu -= (t1*1207 + 16384) >> 15; \ |
| 2411 | t1 += (tu*2411 + 16384) >> 15; \ |
| 2412 | tu -= (t1*1207 + 16384) >> 15; \ |
| 2413 | t9 -= (tm*20191 + 16384) >> 15; \ |
| 2414 | tm += (t9*29269 + 16384) >> 15; \ |
| 2415 | t9 -= (tm*20191 + 16384) >> 15; \ |
| 2416 | th -= (te*3045 + 4096) >> 13; \ |
| 2417 | te += (th*21403 + 16384) >> 15; \ |
| 2418 | th -= (te*3045 + 4096) >> 13; \ |
| 2419 | tp -= (t6*659 + 2048) >> 12; \ |
| 2420 | t6 += (tp*10279 + 16384) >> 15; \ |
| 2421 | tp -= (t6*659 + 2048) >> 12; \ |
| 2422 | } \ |
| 2423 | while (0) |
| 2424 | |
Monty Montgomery | a4e245a | 2017-07-22 00:48:31 -0400 | [diff] [blame] | 2425 | #if CONFIG_TX64X64 |
| 2426 | #define OD_FDCT_32_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \ |
| 2427 | t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh, t1, th, thh, \ |
| 2428 | t9, tp, tph, t5, tl, tlh, td, tt, tth, t3, tj, tjh, tb, tr, trh, \ |
| 2429 | t7, tn, tnh, tf, tv, tvh) \ |
| 2430 | /* Embedded 32-point asymmetric Type-II fDCT. */ \ |
| 2431 | do { \ |
| 2432 | t0 += tvh; \ |
| 2433 | tv = t0 - tv; \ |
| 2434 | t1 = tuh - t1; \ |
| 2435 | tu -= t1; \ |
| 2436 | t2 += tth; \ |
| 2437 | tt = t2 - tt; \ |
| 2438 | t3 = tsh - t3; \ |
| 2439 | ts -= t3; \ |
| 2440 | t4 += trh; \ |
| 2441 | tr = t4 - tr; \ |
| 2442 | t5 = tqh - t5; \ |
| 2443 | tq -= t5; \ |
| 2444 | t6 += tph; \ |
| 2445 | tp = t6 - tp; \ |
| 2446 | t7 = toh - t7; \ |
| 2447 | to -= t7; \ |
| 2448 | t8 += tnh; \ |
| 2449 | tn = t8 - tn; \ |
| 2450 | t9 = tmh - t9; \ |
| 2451 | tm -= t9; \ |
| 2452 | ta += tlh; \ |
| 2453 | tl = ta - tl; \ |
| 2454 | tb = tkh - tb; \ |
| 2455 | tk -= tb; \ |
| 2456 | tc += tjh; \ |
| 2457 | tj = tc - tj; \ |
| 2458 | td = tih - td; \ |
| 2459 | ti -= td; \ |
| 2460 | te += thh; \ |
| 2461 | th = te - th; \ |
| 2462 | tf = tgh - tf; \ |
| 2463 | tg -= tf; \ |
| 2464 | OD_FDCT_16(t0, tg, t8, to, t4, tk, tc, ts, \ |
| 2465 | t2, ti, ta, tq, t6, tm, te, tu); \ |
| 2466 | OD_FDST_16(tv, tf, tn, t7, tr, tb, tj, t3, \ |
| 2467 | tt, td, tl, t5, tp, t9, th, t1); \ |
| 2468 | } \ |
| 2469 | while (0) |
| 2470 | |
| 2471 | #define OD_IDCT_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, \ |
| 2472 | t6, tm, te, tu, t1, t1h, th, thh, t9, t9h, tp, tph, t5, t5h, tl, tlh, \ |
| 2473 | td, tdh, tt, tth, t3, t3h, tj, tjh, tb, tbh, tr, trh, t7, t7h, tn, tnh, \ |
| 2474 | tf, tfh, tv, tvh) \ |
| 2475 | /* Embedded 32-point asymmetric Type-II iDCT. */ \ |
| 2476 | do { \ |
| 2477 | OD_IDST_16(tv, tn, tr, tj, tt, tl, tp, th, \ |
| 2478 | tu, tm, tq, ti, ts, tk, to, tg); \ |
| 2479 | OD_IDCT_16(t0, t8, t4, tc, t2, ta, t6, te, \ |
| 2480 | t1, t9, t5, td, t3, tb, t7, tf); \ |
| 2481 | tv = t0 - tv; \ |
| 2482 | tvh = OD_DCT_RSHIFT(tv, 1); \ |
| 2483 | t0 -= tvh; \ |
| 2484 | t1 += tu; \ |
| 2485 | t1h = OD_DCT_RSHIFT(t1, 1); \ |
| 2486 | tu = t1h - tu; \ |
| 2487 | tt = t2 - tt; \ |
| 2488 | tth = OD_DCT_RSHIFT(tt, 1); \ |
| 2489 | t2 -= tth; \ |
| 2490 | t3 += ts; \ |
| 2491 | t3h = OD_DCT_RSHIFT(t3, 1); \ |
| 2492 | ts = t3h - ts; \ |
| 2493 | tr = t4 - tr; \ |
| 2494 | trh = OD_DCT_RSHIFT(tr, 1); \ |
| 2495 | t4 -= trh; \ |
| 2496 | t5 += tq; \ |
| 2497 | t5h = OD_DCT_RSHIFT(t5, 1); \ |
| 2498 | tq = t5h - tq; \ |
| 2499 | tp = t6 - tp; \ |
| 2500 | tph = OD_DCT_RSHIFT(tp, 1); \ |
| 2501 | t6 -= tph; \ |
| 2502 | t7 += to; \ |
| 2503 | t7h = OD_DCT_RSHIFT(t7, 1); \ |
| 2504 | to = t7h - to; \ |
| 2505 | tn = t8 - tn; \ |
| 2506 | tnh = OD_DCT_RSHIFT(tn, 1); \ |
| 2507 | t8 -= tnh; \ |
| 2508 | t9 += tm; \ |
| 2509 | t9h = OD_DCT_RSHIFT(t9, 1); \ |
| 2510 | tm = t9h - tm; \ |
| 2511 | tl = ta - tl; \ |
| 2512 | tlh = OD_DCT_RSHIFT(tl, 1); \ |
| 2513 | ta -= tlh; \ |
| 2514 | tb += tk; \ |
| 2515 | tbh = OD_DCT_RSHIFT(tb, 1); \ |
| 2516 | tk = tbh - tk; \ |
| 2517 | tj = tc - tj; \ |
| 2518 | tjh = OD_DCT_RSHIFT(tj, 1); \ |
| 2519 | tc -= tjh; \ |
| 2520 | td += ti; \ |
| 2521 | tdh = OD_DCT_RSHIFT(td, 1); \ |
| 2522 | ti = tdh - ti; \ |
| 2523 | th = te - th; \ |
| 2524 | thh = OD_DCT_RSHIFT(th, 1); \ |
| 2525 | te -= thh; \ |
| 2526 | tf += tg; \ |
| 2527 | tfh = OD_DCT_RSHIFT(tf, 1); \ |
| 2528 | tg = tfh - tg; \ |
| 2529 | } \ |
| 2530 | while (0) |
| 2531 | |
| 2532 | #define OD_FDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \ |
| 2533 | tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \ |
| 2534 | /* Embedded 32-point asymmetric Type-IV fDST. */ \ |
| 2535 | do { \ |
| 2536 | int t0h; \ |
| 2537 | int t1h; \ |
| 2538 | int t4h; \ |
| 2539 | int t5h; \ |
| 2540 | int tqh; \ |
| 2541 | int trh; \ |
| 2542 | int tuh; \ |
| 2543 | int tvh; \ |
| 2544 | \ |
| 2545 | tu = -tu; \ |
| 2546 | \ |
| 2547 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 2548 | OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 271); \ |
| 2549 | t5 -= (tq*13573 + 8192) >> 14; \ |
| 2550 | /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ |
| 2551 | OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 272); \ |
| 2552 | tq += (t5*11585 + 16384) >> 15; \ |
| 2553 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 2554 | OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 273); \ |
| 2555 | t5 -= (tq*13573 + 8192) >> 14; \ |
| 2556 | /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 2557 | OD_DCT_OVERFLOW_CHECK(t6, 29957, 16384, 274); \ |
| 2558 | tp += (t6*29957 + 16384) >> 15; \ |
| 2559 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 2560 | OD_DCT_OVERFLOW_CHECK(tp, 11585, 8192, 275); \ |
| 2561 | t6 -= (tp*11585 + 8192) >> 14; \ |
| 2562 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 2563 | OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 276); \ |
| 2564 | tp -= (t6*19195 + 16384) >> 15; \ |
| 2565 | /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 2566 | OD_DCT_OVERFLOW_CHECK(t1, 29957, 16384, 277); \ |
| 2567 | tu += (t1*29957 + 16384) >> 15; \ |
| 2568 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 2569 | OD_DCT_OVERFLOW_CHECK(tu, 11585, 8192, 278); \ |
| 2570 | t1 -= (tu*11585 + 8192) >> 14; \ |
| 2571 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 2572 | OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 279); \ |
| 2573 | tu -= (t1*19195 + 16384) >> 15; \ |
| 2574 | /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 2575 | OD_DCT_OVERFLOW_CHECK(t2, 28681, 16384, 280); \ |
| 2576 | tt += (t2*28681 + 16384) >> 15; \ |
| 2577 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 2578 | OD_DCT_OVERFLOW_CHECK(tt, 15137, 8192, 281); \ |
| 2579 | t2 -= (tt*15137 + 8192) >> 14; \ |
| 2580 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 2581 | OD_DCT_OVERFLOW_CHECK(t2, 4161, 8192, 282); \ |
| 2582 | tt += (t2*4161 + 8192) >> 14; \ |
| 2583 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 2584 | OD_DCT_OVERFLOW_CHECK(ts, 4161, 8192, 283); \ |
| 2585 | t3 += (ts*4161 + 8192) >> 14; \ |
| 2586 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 2587 | OD_DCT_OVERFLOW_CHECK(t3, 15137, 8192, 284); \ |
| 2588 | ts -= (t3*15137 + 8192) >> 14; \ |
| 2589 | /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 2590 | OD_DCT_OVERFLOW_CHECK(ts, 14341, 8192, 285); \ |
| 2591 | t3 += (ts*14341 + 8192) >> 14; \ |
| 2592 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 2593 | OD_DCT_OVERFLOW_CHECK(tm, 19195, 16384, 286); \ |
| 2594 | t9 -= (tm*19195 + 16384) >> 15; \ |
| 2595 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 2596 | OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 287); \ |
| 2597 | tm -= (t9*11585 + 8192) >> 14; \ |
| 2598 | /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 2599 | OD_DCT_OVERFLOW_CHECK(tm, 7489, 4096, 288); \ |
| 2600 | t9 += (tm*7489 + 4096) >> 13; \ |
| 2601 | /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \ |
| 2602 | OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 289); \ |
| 2603 | ta += (tl*3259 + 4096) >> 13; \ |
| 2604 | /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \ |
| 2605 | OD_DCT_OVERFLOW_CHECK(ta, 3135, 8192, 290); \ |
| 2606 | tl -= (ta*3135 + 8192) >> 14; \ |
| 2607 | /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \ |
| 2608 | OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 291); \ |
| 2609 | ta += (tl*3259 + 4096) >> 13; \ |
| 2610 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 2611 | OD_DCT_OVERFLOW_CHECK(tk, 4161, 8192, 292); \ |
| 2612 | tb += (tk*4161 + 8192) >> 14; \ |
| 2613 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 2614 | OD_DCT_OVERFLOW_CHECK(tb, 15137, 8192, 293); \ |
| 2615 | tk -= (tb*15137 + 8192) >> 14; \ |
| 2616 | /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 2617 | OD_DCT_OVERFLOW_CHECK(tk, 14341, 8192, 294); \ |
| 2618 | tb += (tk*14341 + 8192) >> 14; \ |
| 2619 | /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 2620 | OD_DCT_OVERFLOW_CHECK(te, 29957, 16384, 295); \ |
| 2621 | th += (te*29957 + 16384) >> 15; \ |
| 2622 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 2623 | OD_DCT_OVERFLOW_CHECK(th, 11585, 8192, 296); \ |
| 2624 | te -= (th*11585 + 8192) >> 14; \ |
| 2625 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 2626 | OD_DCT_OVERFLOW_CHECK(te, 19195, 16384, 297); \ |
| 2627 | th -= (te*19195 + 16384) >> 15; \ |
| 2628 | /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 2629 | OD_DCT_OVERFLOW_CHECK(tc, 28681, 16384, 298); \ |
| 2630 | tj += (tc*28681 + 16384) >> 15; \ |
| 2631 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 2632 | OD_DCT_OVERFLOW_CHECK(tj, 15137, 8192, 299); \ |
| 2633 | tc -= (tj*15137 + 8192) >> 14; \ |
| 2634 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 2635 | OD_DCT_OVERFLOW_CHECK(tc, 4161, 8192, 300); \ |
| 2636 | tj += (tc*4161 + 8192) >> 14; \ |
| 2637 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 2638 | OD_DCT_OVERFLOW_CHECK(ti, 4161, 8192, 301); \ |
| 2639 | td += (ti*4161 + 8192) >> 14; \ |
| 2640 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 2641 | OD_DCT_OVERFLOW_CHECK(td, 15137, 8192, 302); \ |
| 2642 | ti -= (td*15137 + 8192) >> 14; \ |
| 2643 | /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 2644 | OD_DCT_OVERFLOW_CHECK(ti, 14341, 8192, 303); \ |
| 2645 | td += (ti*14341 + 8192) >> 14; \ |
| 2646 | \ |
| 2647 | t1 = -t1; \ |
| 2648 | t2 = -t2; \ |
| 2649 | t3 = -t3; \ |
| 2650 | td = -td; \ |
| 2651 | tg = -tg; \ |
| 2652 | to = -to; \ |
| 2653 | ts = -ts; \ |
| 2654 | \ |
| 2655 | tr -= OD_DCT_RSHIFT(t5, 1); \ |
| 2656 | t5 += tr; \ |
| 2657 | tq -= OD_DCT_RSHIFT(t4, 1); /* pass */ \ |
| 2658 | t4 += tq; \ |
| 2659 | t6 -= OD_DCT_RSHIFT(t7, 1); \ |
| 2660 | t7 += t6; \ |
| 2661 | to -= OD_DCT_RSHIFT(tp, 1); /* pass */ \ |
| 2662 | tp += to; \ |
| 2663 | t1 += OD_DCT_RSHIFT(t0, 1); /* pass */ \ |
| 2664 | t0 -= t1; \ |
| 2665 | tv -= OD_DCT_RSHIFT(tu, 1); \ |
| 2666 | tu += tv; \ |
| 2667 | t3 -= OD_DCT_RSHIFT(tt, 1); \ |
| 2668 | tt += t3; \ |
| 2669 | t2 += OD_DCT_RSHIFT(ts, 1); \ |
| 2670 | ts -= t2; \ |
| 2671 | t9 -= OD_DCT_RSHIFT(t8, 1); /* pass */ \ |
| 2672 | t8 += t9; \ |
| 2673 | tn += OD_DCT_RSHIFT(tm, 1); \ |
| 2674 | tm -= tn; \ |
| 2675 | tb += OD_DCT_RSHIFT(ta, 1); \ |
| 2676 | ta -= tb; \ |
| 2677 | tl -= OD_DCT_RSHIFT(tk, 1); \ |
| 2678 | tk += tl; \ |
| 2679 | te -= OD_DCT_RSHIFT(tf, 1); /* pass */ \ |
| 2680 | tf += te; \ |
| 2681 | tg -= OD_DCT_RSHIFT(th, 1); \ |
| 2682 | th += tg; \ |
| 2683 | tc -= OD_DCT_RSHIFT(ti, 1); \ |
| 2684 | ti += tc; \ |
| 2685 | td += OD_DCT_RSHIFT(tj, 1); \ |
| 2686 | tj -= td; \ |
| 2687 | \ |
| 2688 | t4 = -t4; \ |
| 2689 | \ |
| 2690 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \ |
| 2691 | OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 304); \ |
| 2692 | t4 += (tr*6723 + 4096) >> 13; \ |
| 2693 | /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \ |
| 2694 | OD_DCT_OVERFLOW_CHECK(t4, 16069, 8192, 305); \ |
| 2695 | tr -= (t4*16069 + 8192) >> 14; \ |
| 2696 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \ |
| 2697 | OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 306); \ |
| 2698 | t4 += (tr*6723 + 4096) >> 13; \ |
| 2699 | /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \ |
| 2700 | OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 307); \ |
| 2701 | t5 += (tq*17515 + 16384) >> 15; \ |
| 2702 | /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \ |
| 2703 | OD_DCT_OVERFLOW_CHECK(t5, 13623, 8192, 308); \ |
| 2704 | tq -= (t5*13623 + 8192) >> 14; \ |
| 2705 | /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \ |
| 2706 | OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 309); \ |
| 2707 | t5 += (tq*17515 + 16384) >> 15; \ |
| 2708 | /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ |
| 2709 | OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 310); \ |
| 2710 | t7 += (to*3227 + 16384) >> 15; \ |
| 2711 | /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \ |
| 2712 | OD_DCT_OVERFLOW_CHECK(t7, 6393, 16384, 311); \ |
| 2713 | to -= (t7*6393 + 16384) >> 15; \ |
| 2714 | /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ |
| 2715 | OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 312); \ |
| 2716 | t7 += (to*3227 + 16384) >> 15; \ |
| 2717 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 2718 | OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 313); \ |
| 2719 | t6 += (tp*2485 + 4096) >> 13; \ |
| 2720 | /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ |
| 2721 | OD_DCT_OVERFLOW_CHECK(t6, 18205, 16384, 314); \ |
| 2722 | tp -= (t6*18205 + 16384) >> 15; \ |
| 2723 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 2724 | OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 315); \ |
| 2725 | t6 += (tp*2485 + 4096) >> 13; \ |
| 2726 | \ |
| 2727 | t5 = -t5; \ |
| 2728 | \ |
| 2729 | tr += to; \ |
| 2730 | trh = OD_DCT_RSHIFT(tr, 1); \ |
| 2731 | to -= trh; \ |
| 2732 | t4 += t7; \ |
| 2733 | t4h = OD_DCT_RSHIFT(t4, 1); \ |
| 2734 | t7 -= t4h; \ |
| 2735 | t5 += tp; \ |
| 2736 | t5h = OD_DCT_RSHIFT(t5, 1); \ |
| 2737 | tp -= t5h; \ |
| 2738 | tq += t6; \ |
| 2739 | tqh = OD_DCT_RSHIFT(tq, 1); \ |
| 2740 | t6 -= tqh; \ |
| 2741 | t0 -= t3; \ |
| 2742 | t0h = OD_DCT_RSHIFT(t0, 1); \ |
| 2743 | t3 += t0h; \ |
| 2744 | tv -= ts; \ |
| 2745 | tvh = OD_DCT_RSHIFT(tv, 1); \ |
| 2746 | ts += tvh; \ |
| 2747 | tu += tt; \ |
| 2748 | tuh = OD_DCT_RSHIFT(tu, 1); \ |
| 2749 | tt -= tuh; \ |
| 2750 | t1 -= t2; \ |
| 2751 | t1h = OD_DCT_RSHIFT(t1, 1); \ |
| 2752 | t2 += t1h; \ |
| 2753 | t8 += tb; \ |
| 2754 | tb -= OD_DCT_RSHIFT(t8, 1); \ |
| 2755 | tn += tk; \ |
| 2756 | tk -= OD_DCT_RSHIFT(tn, 1); \ |
| 2757 | t9 += tl; \ |
| 2758 | tl -= OD_DCT_RSHIFT(t9, 1); \ |
| 2759 | tm -= ta; \ |
| 2760 | ta += OD_DCT_RSHIFT(tm, 1); \ |
| 2761 | tc -= tf; \ |
| 2762 | tf += OD_DCT_RSHIFT(tc, 1); \ |
| 2763 | tj += tg; \ |
| 2764 | tg -= OD_DCT_RSHIFT(tj, 1); \ |
| 2765 | td -= te; \ |
| 2766 | te += OD_DCT_RSHIFT(td, 1); \ |
| 2767 | ti += th; \ |
| 2768 | th -= OD_DCT_RSHIFT(ti, 1); \ |
| 2769 | \ |
| 2770 | t9 = -t9; \ |
| 2771 | tl = -tl; \ |
| 2772 | \ |
| 2773 | /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ |
| 2774 | OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 316); \ |
| 2775 | t8 += (tn*805 + 8192) >> 14; \ |
| 2776 | /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \ |
| 2777 | OD_DCT_OVERFLOW_CHECK(t8, 803, 4096, 317); \ |
| 2778 | tn -= (t8*803 + 4096) >> 13; \ |
| 2779 | /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ |
| 2780 | OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 318); \ |
| 2781 | t8 += (tn*805 + 8192) >> 14; \ |
| 2782 | /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ |
| 2783 | OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 319); \ |
| 2784 | tk += (tb*11725 + 16384) >> 15; \ |
| 2785 | /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \ |
| 2786 | OD_DCT_OVERFLOW_CHECK(tk, 5197, 4096, 320); \ |
| 2787 | tb -= (tk*5197 + 4096) >> 13; \ |
| 2788 | /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ |
| 2789 | OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 321); \ |
| 2790 | tk += (tb*11725 + 16384) >> 15; \ |
| 2791 | /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \ |
| 2792 | OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 322); \ |
| 2793 | ta += (tl*2455 + 2048) >> 12; \ |
| 2794 | /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \ |
| 2795 | OD_DCT_OVERFLOW_CHECK(ta, 14449, 8192, 323); \ |
| 2796 | tl -= (ta*14449 + 8192) >> 14; \ |
| 2797 | /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \ |
| 2798 | OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 324); \ |
| 2799 | ta += (tl*2455 + 2048) >> 12; \ |
| 2800 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ |
| 2801 | OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 325); \ |
| 2802 | t9 += (tm*4861 + 16384) >> 15; \ |
| 2803 | /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \ |
| 2804 | OD_DCT_OVERFLOW_CHECK(t9, 1189, 2048, 326); \ |
| 2805 | tm -= (t9*1189 + 2048) >> 12; \ |
| 2806 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ |
| 2807 | OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 327); \ |
| 2808 | t9 += (tm*4861 + 16384) >> 15; \ |
| 2809 | /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ |
| 2810 | OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 328); \ |
| 2811 | tf += (tg*805 + 8192) >> 14; \ |
| 2812 | /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \ |
| 2813 | OD_DCT_OVERFLOW_CHECK(tf, 803, 4096, 329); \ |
| 2814 | tg -= (tf*803 + 4096) >> 13; \ |
| 2815 | /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ |
| 2816 | OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 330); \ |
| 2817 | tf += (tg*805 + 8192) >> 14; \ |
| 2818 | /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ |
| 2819 | OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 331); \ |
| 2820 | tc += (tj*2931 + 4096) >> 13; \ |
| 2821 | /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \ |
| 2822 | OD_DCT_OVERFLOW_CHECK(tc, 5197, 4096, 332); \ |
| 2823 | tj -= (tc*5197 + 4096) >> 13; \ |
| 2824 | /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ |
| 2825 | OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 333); \ |
| 2826 | tc += (tj*2931 + 4096) >> 13; \ |
| 2827 | /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \ |
| 2828 | OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 334); \ |
| 2829 | td += (ti*513 + 1024) >> 11; \ |
| 2830 | /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \ |
| 2831 | OD_DCT_OVERFLOW_CHECK(td, 7723, 8192, 335); \ |
| 2832 | ti -= (td*7723 + 8192) >> 14; \ |
| 2833 | /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \ |
| 2834 | OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 336); \ |
| 2835 | td += (ti*513 + 1024) >> 11; \ |
| 2836 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ |
| 2837 | OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 337); \ |
| 2838 | te += (th*4861 + 16384) >> 15; \ |
| 2839 | /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \ |
| 2840 | OD_DCT_OVERFLOW_CHECK(te, 1189, 2048, 338); \ |
| 2841 | th -= (te*1189 + 2048) >> 12; \ |
| 2842 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ |
| 2843 | OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 339); \ |
| 2844 | te += (th*4861 + 16384) >> 15; \ |
| 2845 | \ |
| 2846 | ta = -ta; \ |
| 2847 | tb = -tb; \ |
| 2848 | \ |
| 2849 | tt += t5h; \ |
| 2850 | t5 -= tt; \ |
| 2851 | t2 -= tqh; \ |
| 2852 | tq += t2; \ |
| 2853 | tp += t1h; \ |
| 2854 | t1 -= tp; \ |
| 2855 | t6 -= tuh; \ |
| 2856 | tu += t6; \ |
| 2857 | t7 += tvh; \ |
| 2858 | tv -= t7; \ |
| 2859 | to += t0h; \ |
| 2860 | t0 -= to; \ |
| 2861 | t3 -= t4h; \ |
| 2862 | t4 += t3; \ |
| 2863 | ts += trh; \ |
| 2864 | tr -= ts; \ |
| 2865 | tf -= OD_DCT_RSHIFT(tn, 1); \ |
| 2866 | tn += tf; \ |
| 2867 | tg -= OD_DCT_RSHIFT(t8, 1); \ |
| 2868 | t8 += tg; \ |
| 2869 | tk += OD_DCT_RSHIFT(tc, 1); \ |
| 2870 | tc -= tk; \ |
| 2871 | tb += OD_DCT_RSHIFT(tj, 1); \ |
| 2872 | tj -= tb; \ |
| 2873 | ta += OD_DCT_RSHIFT(ti, 1); \ |
| 2874 | ti -= ta; \ |
| 2875 | tl += OD_DCT_RSHIFT(td, 1); \ |
| 2876 | td -= tl; \ |
| 2877 | te -= OD_DCT_RSHIFT(tm, 1); \ |
| 2878 | tm += te; \ |
| 2879 | th -= OD_DCT_RSHIFT(t9, 1); \ |
| 2880 | t9 += th; \ |
| 2881 | ta -= t5; \ |
| 2882 | t5 += OD_DCT_RSHIFT(ta, 1); \ |
| 2883 | tq -= tl; \ |
| 2884 | tl += OD_DCT_RSHIFT(tq, 1); \ |
| 2885 | t2 -= ti; \ |
| 2886 | ti += OD_DCT_RSHIFT(t2, 1); \ |
| 2887 | td -= tt; \ |
| 2888 | tt += OD_DCT_RSHIFT(td, 1); \ |
| 2889 | tm += tp; \ |
| 2890 | tp -= OD_DCT_RSHIFT(tm, 1); \ |
| 2891 | t6 += t9; \ |
| 2892 | t9 -= OD_DCT_RSHIFT(t6, 1); \ |
| 2893 | te -= tu; \ |
| 2894 | tu += OD_DCT_RSHIFT(te, 1); \ |
| 2895 | t1 -= th; \ |
| 2896 | th += OD_DCT_RSHIFT(t1, 1); \ |
| 2897 | t0 -= tg; \ |
| 2898 | tg += OD_DCT_RSHIFT(t0, 1); \ |
| 2899 | tf += tv; \ |
| 2900 | tv -= OD_DCT_RSHIFT(tf, 1); \ |
| 2901 | t8 -= t7; \ |
| 2902 | t7 += OD_DCT_RSHIFT(t8, 1); \ |
| 2903 | to -= tn; \ |
| 2904 | tn += OD_DCT_RSHIFT(to, 1); \ |
| 2905 | t4 -= tk; \ |
| 2906 | tk += OD_DCT_RSHIFT(t4, 1); \ |
| 2907 | tb -= tr; \ |
| 2908 | tr += OD_DCT_RSHIFT(tb, 1); \ |
| 2909 | t3 -= tj; \ |
| 2910 | tj += OD_DCT_RSHIFT(t3, 1); \ |
| 2911 | tc -= ts; \ |
| 2912 | ts += OD_DCT_RSHIFT(tc, 1); \ |
| 2913 | \ |
| 2914 | tr = -tr; \ |
| 2915 | ts = -ts; \ |
| 2916 | tt = -tt; \ |
| 2917 | tu = -tu; \ |
| 2918 | \ |
| 2919 | /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \ |
| 2920 | OD_DCT_OVERFLOW_CHECK(t0, 2847, 2048, 340); \ |
| 2921 | tv += (t0*2847 + 2048) >> 12; \ |
| 2922 | /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \ |
| 2923 | OD_DCT_OVERFLOW_CHECK(tv, 5791, 2048, 341); \ |
| 2924 | t0 -= (tv*5791 + 2048) >> 12; \ |
| 2925 | /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \ |
| 2926 | OD_DCT_OVERFLOW_CHECK(t0, 5593, 4096, 342); \ |
| 2927 | tv += (t0*5593 + 4096) >> 13; \ |
| 2928 | /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \ |
| 2929 | OD_DCT_OVERFLOW_CHECK(tf, 4099, 4096, 343); \ |
| 2930 | tg -= (tf*4099 + 4096) >> 13; \ |
| 2931 | /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \ |
| 2932 | OD_DCT_OVERFLOW_CHECK(tg, 1997, 1024, 344); \ |
| 2933 | tf += (tg*1997 + 1024) >> 11; \ |
| 2934 | /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \ |
| 2935 | OD_DCT_OVERFLOW_CHECK(tf, 815, 16384, 345); \ |
| 2936 | tg += (tf*815 + 16384) >> 15; \ |
| 2937 | /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \ |
| 2938 | OD_DCT_OVERFLOW_CHECK(t8, 2527, 2048, 346); \ |
| 2939 | tn -= (t8*2527 + 2048) >> 12; \ |
| 2940 | /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \ |
| 2941 | OD_DCT_OVERFLOW_CHECK(tn, 4695, 4096, 347); \ |
| 2942 | t8 += (tn*4695 + 4096) >> 13; \ |
| 2943 | /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \ |
| 2944 | OD_DCT_OVERFLOW_CHECK(t8, 4187, 4096, 348); \ |
| 2945 | tn += (t8*4187 + 4096) >> 13; \ |
| 2946 | /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \ |
| 2947 | OD_DCT_OVERFLOW_CHECK(to, 5477, 4096, 349); \ |
| 2948 | t7 += (to*5477 + 4096) >> 13; \ |
| 2949 | /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \ |
| 2950 | OD_DCT_OVERFLOW_CHECK(t7, 4169, 4096, 350); \ |
| 2951 | to -= (t7*4169 + 4096) >> 13; \ |
| 2952 | /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \ |
| 2953 | OD_DCT_OVERFLOW_CHECK(to, 2571, 2048, 351); \ |
| 2954 | t7 -= (to*2571 + 2048) >> 12; \ |
| 2955 | /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \ |
| 2956 | OD_DCT_OVERFLOW_CHECK(t2, 5331, 4096, 352); \ |
| 2957 | tt += (t2*5331 + 4096) >> 13; \ |
| 2958 | /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \ |
| 2959 | OD_DCT_OVERFLOW_CHECK(tt, 5749, 2048, 353); \ |
| 2960 | t2 -= (tt*5749 + 2048) >> 12; \ |
| 2961 | /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \ |
| 2962 | OD_DCT_OVERFLOW_CHECK(t2, 2413, 2048, 354); \ |
| 2963 | tt += (t2*2413 + 2048) >> 12; \ |
| 2964 | /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \ |
| 2965 | OD_DCT_OVERFLOW_CHECK(td, 4167, 4096, 355); \ |
| 2966 | ti -= (td*4167 + 4096) >> 13; \ |
| 2967 | /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \ |
| 2968 | OD_DCT_OVERFLOW_CHECK(ti, 891, 512, 356); \ |
| 2969 | td += (ti*891 + 512) >> 10; \ |
| 2970 | /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \ |
| 2971 | OD_DCT_OVERFLOW_CHECK(td, 4327, 16384, 357); \ |
| 2972 | ti += (td*4327 + 16384) >> 15; \ |
| 2973 | /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \ |
| 2974 | OD_DCT_OVERFLOW_CHECK(ta, 2261, 2048, 358); \ |
| 2975 | tl -= (ta*2261 + 2048) >> 12; \ |
| 2976 | /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \ |
| 2977 | OD_DCT_OVERFLOW_CHECK(tl, 2855, 2048, 359); \ |
| 2978 | ta += (tl*2855 + 2048) >> 12; \ |
| 2979 | /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \ |
| 2980 | OD_DCT_OVERFLOW_CHECK(ta, 5417, 8192, 360); \ |
| 2981 | tl += (ta*5417 + 8192) >> 14; \ |
| 2982 | /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \ |
| 2983 | OD_DCT_OVERFLOW_CHECK(tq, 3459, 2048, 361); \ |
| 2984 | t5 += (tq*3459 + 2048) >> 12; \ |
| 2985 | /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \ |
| 2986 | OD_DCT_OVERFLOW_CHECK(t5, 1545, 2048, 362); \ |
| 2987 | tq -= (t5*1545 + 2048) >> 12; \ |
| 2988 | /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \ |
| 2989 | OD_DCT_OVERFLOW_CHECK(tq, 1971, 1024, 363); \ |
| 2990 | t5 -= (tq*1971 + 1024) >> 11; \ |
| 2991 | /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \ |
| 2992 | OD_DCT_OVERFLOW_CHECK(t3, 323, 256, 364); \ |
| 2993 | ts += (t3*323 + 256) >> 9; \ |
| 2994 | /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \ |
| 2995 | OD_DCT_OVERFLOW_CHECK(ts, 5707, 2048, 365); \ |
| 2996 | t3 -= (ts*5707 + 2048) >> 12; \ |
| 2997 | /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \ |
| 2998 | OD_DCT_OVERFLOW_CHECK(t3, 2229, 2048, 366); \ |
| 2999 | ts += (t3*2229 + 2048) >> 12; \ |
| 3000 | /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \ |
| 3001 | OD_DCT_OVERFLOW_CHECK(tc, 1061, 1024, 367); \ |
| 3002 | tj -= (tc*1061 + 1024) >> 11; \ |
| 3003 | /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \ |
| 3004 | OD_DCT_OVERFLOW_CHECK(tj, 6671, 4096, 368); \ |
| 3005 | tc += (tj*6671 + 4096) >> 13; \ |
| 3006 | /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \ |
| 3007 | OD_DCT_OVERFLOW_CHECK(tc, 6287, 16384, 369); \ |
| 3008 | tj += (tc*6287 + 16384) >> 15; \ |
| 3009 | /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \ |
| 3010 | OD_DCT_OVERFLOW_CHECK(tb, 4359, 4096, 370); \ |
| 3011 | tk -= (tb*4359 + 4096) >> 13; \ |
| 3012 | /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \ |
| 3013 | OD_DCT_OVERFLOW_CHECK(tk, 3099, 2048, 371); \ |
| 3014 | tb += (tk*3099 + 2048) >> 12; \ |
| 3015 | /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \ |
| 3016 | OD_DCT_OVERFLOW_CHECK(tb, 2109, 4096, 372); \ |
| 3017 | tk += (tb*2109 + 4096) >> 13; \ |
| 3018 | /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \ |
| 3019 | OD_DCT_OVERFLOW_CHECK(t4, 5017, 4096, 373); \ |
| 3020 | tr += (t4*5017 + 4096) >> 13; \ |
| 3021 | /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \ |
| 3022 | OD_DCT_OVERFLOW_CHECK(tr, 1413, 512, 374); \ |
| 3023 | t4 -= (tr*1413 + 512) >> 10; \ |
| 3024 | /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \ |
| 3025 | OD_DCT_OVERFLOW_CHECK(t4, 8195, 8192, 375); \ |
| 3026 | tr += (t4*8195 + 8192) >> 14; \ |
| 3027 | /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \ |
| 3028 | OD_DCT_OVERFLOW_CHECK(tm, 2373, 2048, 376); \ |
| 3029 | t9 += (tm*2373 + 2048) >> 12; \ |
| 3030 | /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \ |
| 3031 | OD_DCT_OVERFLOW_CHECK(t9, 5209, 4096, 377); \ |
| 3032 | tm -= (t9*5209 + 4096) >> 13; \ |
| 3033 | /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \ |
| 3034 | OD_DCT_OVERFLOW_CHECK(tm, 3391, 4096, 378); \ |
| 3035 | t9 -= (tm*3391 + 4096) >> 13; \ |
| 3036 | /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \ |
| 3037 | OD_DCT_OVERFLOW_CHECK(t6, 1517, 1024, 379); \ |
| 3038 | tp -= (t6*1517 + 1024) >> 11; \ |
| 3039 | /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \ |
| 3040 | OD_DCT_OVERFLOW_CHECK(tp, 1817, 2048, 380); \ |
| 3041 | t6 += (tp*1817 + 2048) >> 12; \ |
| 3042 | /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \ |
| 3043 | OD_DCT_OVERFLOW_CHECK(t6, 6331, 4096, 381); \ |
| 3044 | tp += (t6*6331 + 4096) >> 13; \ |
| 3045 | /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \ |
| 3046 | OD_DCT_OVERFLOW_CHECK(te, 515, 512, 382); \ |
| 3047 | th -= (te*515 + 512) >> 10; \ |
| 3048 | /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \ |
| 3049 | OD_DCT_OVERFLOW_CHECK(th, 7567, 4096, 383); \ |
| 3050 | te += (th*7567 + 4096) >> 13; \ |
| 3051 | /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \ |
| 3052 | OD_DCT_OVERFLOW_CHECK(te, 2513, 16384, 384); \ |
| 3053 | th += (te*2513 + 16384) >> 15; \ |
| 3054 | /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \ |
| 3055 | OD_DCT_OVERFLOW_CHECK(t1, 2753, 2048, 385); \ |
| 3056 | tu += (t1*2753 + 2048) >> 12; \ |
| 3057 | /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \ |
| 3058 | OD_DCT_OVERFLOW_CHECK(tu, 5777, 2048, 386); \ |
| 3059 | t1 -= (tu*5777 + 2048) >> 12; \ |
| 3060 | /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \ |
| 3061 | OD_DCT_OVERFLOW_CHECK(t1, 1301, 1024, 387); \ |
| 3062 | tu += (t1*1301 + 1024) >> 11; \ |
| 3063 | } \ |
| 3064 | while (0) |
| 3065 | |
| 3066 | #define OD_IDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \ |
| 3067 | tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \ |
| 3068 | /* Embedded 32-point asymmetric Type-IV iDST. */ \ |
| 3069 | do { \ |
| 3070 | int t0h; \ |
| 3071 | int t4h; \ |
| 3072 | int tbh; \ |
| 3073 | int tfh; \ |
| 3074 | int tgh; \ |
| 3075 | int tkh; \ |
| 3076 | int trh; \ |
| 3077 | int tvh; \ |
| 3078 | /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \ |
| 3079 | tf -= (tg*1301 + 1024) >> 11; \ |
| 3080 | /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \ |
| 3081 | tg += (tf*5777 + 2048) >> 12; \ |
| 3082 | /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \ |
| 3083 | tf -= (tg*2753 + 2048) >> 12; \ |
| 3084 | /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \ |
| 3085 | th -= (te*2513 + 16384) >> 15; \ |
| 3086 | /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \ |
| 3087 | te -= (th*7567 + 4096) >> 13; \ |
| 3088 | /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \ |
| 3089 | th += (te*515 + 512) >> 10; \ |
| 3090 | /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \ |
| 3091 | tj -= (tc*6331 + 4096) >> 13; \ |
| 3092 | /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \ |
| 3093 | tc -= (tj*1817 + 2048) >> 12; \ |
| 3094 | /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \ |
| 3095 | tj += (tc*1517 + 1024) >> 11; \ |
| 3096 | /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \ |
| 3097 | ti += (td*3391 + 4096) >> 13; \ |
| 3098 | /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \ |
| 3099 | td += (ti*5209 + 4096) >> 13; \ |
| 3100 | /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \ |
| 3101 | ti -= (td*2373 + 2048) >> 12; \ |
| 3102 | /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \ |
| 3103 | tr -= (t4*8195 + 8192) >> 14; \ |
| 3104 | /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \ |
| 3105 | t4 += (tr*1413 + 512) >> 10; \ |
| 3106 | /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \ |
| 3107 | tr -= (t4*5017 + 4096) >> 13; \ |
| 3108 | /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \ |
| 3109 | t5 -= (tq*2109 + 4096) >> 13; \ |
| 3110 | /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \ |
| 3111 | tq -= (t5*3099 + 2048) >> 12; \ |
| 3112 | /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \ |
| 3113 | t5 += (tq*4359 + 4096) >> 13; \ |
| 3114 | /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \ |
| 3115 | tp -= (t6*6287 + 16384) >> 15; \ |
| 3116 | /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \ |
| 3117 | t6 -= (tp*6671 + 4096) >> 13; \ |
| 3118 | /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \ |
| 3119 | tp += (t6*1061 + 1024) >> 11; \ |
| 3120 | /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \ |
| 3121 | t7 -= (to*2229 + 2048) >> 12; \ |
| 3122 | /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \ |
| 3123 | to += (t7*5707 + 2048) >> 12; \ |
| 3124 | /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \ |
| 3125 | t7 -= (to*323 + 256) >> 9; \ |
| 3126 | /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \ |
| 3127 | tk += (tb*1971 + 1024) >> 11; \ |
| 3128 | /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \ |
| 3129 | tb += (tk*1545 + 2048) >> 12; \ |
| 3130 | /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \ |
| 3131 | tk -= (tb*3459 + 2048) >> 12; \ |
| 3132 | /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \ |
| 3133 | tl -= (ta*5417 + 8192) >> 14; \ |
| 3134 | /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \ |
| 3135 | ta -= (tl*2855 + 2048) >> 12; \ |
| 3136 | /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \ |
| 3137 | tl += (ta*2261 + 2048) >> 12; \ |
| 3138 | /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \ |
| 3139 | t9 -= (tm*4327 + 16384) >> 15; \ |
| 3140 | /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \ |
| 3141 | tm -= (t9*891 + 512) >> 10; \ |
| 3142 | /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \ |
| 3143 | t9 += (tm*4167 + 4096) >> 13; \ |
| 3144 | /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \ |
| 3145 | tn -= (t8*2413 + 2048) >> 12; \ |
| 3146 | /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \ |
| 3147 | t8 += (tn*5749 + 2048) >> 12; \ |
| 3148 | /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \ |
| 3149 | tn -= (t8*5331 + 4096) >> 13; \ |
| 3150 | /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \ |
| 3151 | ts += (t3*2571 + 2048) >> 12; \ |
| 3152 | /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \ |
| 3153 | t3 += (ts*4169 + 4096) >> 13; \ |
| 3154 | /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \ |
| 3155 | ts -= (t3*5477 + 4096) >> 13; \ |
| 3156 | /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \ |
| 3157 | tt -= (t2*4187 + 4096) >> 13; \ |
| 3158 | /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \ |
| 3159 | t2 -= (tt*4695 + 4096) >> 13; \ |
| 3160 | /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \ |
| 3161 | tt += (t2*2527 + 2048) >> 12; \ |
| 3162 | /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \ |
| 3163 | t1 -= (tu*815 + 16384) >> 15; \ |
| 3164 | /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \ |
| 3165 | tu -= (t1*1997 + 1024) >> 11; \ |
| 3166 | /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \ |
| 3167 | t1 += (tu*4099 + 4096) >> 13; \ |
| 3168 | /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \ |
| 3169 | tv -= (t0*5593 + 4096) >> 13; \ |
| 3170 | /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \ |
| 3171 | t0 += (tv*5791 + 2048) >> 12; \ |
| 3172 | /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \ |
| 3173 | tv -= (t0*2847 + 2048) >> 12; \ |
| 3174 | \ |
| 3175 | t7 = -t7; \ |
| 3176 | tf = -tf; \ |
| 3177 | tn = -tn; \ |
| 3178 | tr = -tr; \ |
| 3179 | \ |
| 3180 | t7 -= OD_DCT_RSHIFT(t6, 1); \ |
| 3181 | t6 += t7; \ |
| 3182 | tp -= OD_DCT_RSHIFT(to, 1); \ |
| 3183 | to += tp; \ |
| 3184 | tr -= OD_DCT_RSHIFT(tq, 1); \ |
| 3185 | tq += tr; \ |
| 3186 | t5 -= OD_DCT_RSHIFT(t4, 1); \ |
| 3187 | t4 += t5; \ |
| 3188 | tt -= OD_DCT_RSHIFT(t3, 1); \ |
| 3189 | t3 += tt; \ |
| 3190 | ts -= OD_DCT_RSHIFT(t2, 1); \ |
| 3191 | t2 += ts; \ |
| 3192 | tv += OD_DCT_RSHIFT(tu, 1); \ |
| 3193 | tu -= tv; \ |
| 3194 | t1 -= OD_DCT_RSHIFT(t0, 1); \ |
| 3195 | t0 += t1; \ |
| 3196 | th -= OD_DCT_RSHIFT(tg, 1); \ |
| 3197 | tg += th; \ |
| 3198 | tf -= OD_DCT_RSHIFT(te, 1); \ |
| 3199 | te += tf; \ |
| 3200 | ti += OD_DCT_RSHIFT(tc, 1); \ |
| 3201 | tc -= ti; \ |
| 3202 | tj += OD_DCT_RSHIFT(td, 1); \ |
| 3203 | td -= tj; \ |
| 3204 | tn -= OD_DCT_RSHIFT(tm, 1); \ |
| 3205 | tm += tn; \ |
| 3206 | t9 -= OD_DCT_RSHIFT(t8, 1); \ |
| 3207 | t8 += t9; \ |
| 3208 | tl -= OD_DCT_RSHIFT(tb, 1); \ |
| 3209 | tb += tl; \ |
| 3210 | tk -= OD_DCT_RSHIFT(ta, 1); \ |
| 3211 | ta += tk; \ |
| 3212 | \ |
| 3213 | ti -= th; \ |
| 3214 | th += OD_DCT_RSHIFT(ti, 1); \ |
| 3215 | td -= te; \ |
| 3216 | te += OD_DCT_RSHIFT(td, 1); \ |
| 3217 | tm += tl; \ |
| 3218 | tl -= OD_DCT_RSHIFT(tm, 1); \ |
| 3219 | t9 += ta; \ |
| 3220 | ta -= OD_DCT_RSHIFT(t9, 1); \ |
| 3221 | tp += tq; \ |
| 3222 | tq -= OD_DCT_RSHIFT(tp, 1); \ |
| 3223 | t6 += t5; \ |
| 3224 | t5 -= OD_DCT_RSHIFT(t6, 1); \ |
| 3225 | t2 -= t1; \ |
| 3226 | t1 += OD_DCT_RSHIFT(t2, 1); \ |
| 3227 | tt -= tu; \ |
| 3228 | tu += OD_DCT_RSHIFT(tt, 1); \ |
| 3229 | tr += t7; \ |
| 3230 | trh = OD_DCT_RSHIFT(tr, 1); \ |
| 3231 | t7 -= trh; \ |
| 3232 | t4 -= to; \ |
| 3233 | t4h = OD_DCT_RSHIFT(t4, 1); \ |
| 3234 | to += t4h; \ |
| 3235 | t0 += t3; \ |
| 3236 | t0h = OD_DCT_RSHIFT(t0, 1); \ |
| 3237 | t3 -= t0h; \ |
| 3238 | tv += ts; \ |
| 3239 | tvh = OD_DCT_RSHIFT(tv, 1); \ |
| 3240 | ts -= tvh; \ |
| 3241 | tf -= tc; \ |
| 3242 | tfh = OD_DCT_RSHIFT(tf, 1); \ |
| 3243 | tc += tfh; \ |
| 3244 | tg += tj; \ |
| 3245 | tgh = OD_DCT_RSHIFT(tg, 1); \ |
| 3246 | tj -= tgh; \ |
| 3247 | tb -= t8; \ |
| 3248 | tbh = OD_DCT_RSHIFT(tb, 1); \ |
| 3249 | t8 += tbh; \ |
| 3250 | tk += tn; \ |
| 3251 | tkh = OD_DCT_RSHIFT(tk, 1); \ |
| 3252 | tn -= tkh; \ |
| 3253 | \ |
| 3254 | ta = -ta; \ |
| 3255 | tq = -tq; \ |
| 3256 | \ |
| 3257 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ |
| 3258 | te -= (th*4861 + 16384) >> 15; \ |
| 3259 | /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \ |
| 3260 | th += (te*1189 + 2048) >> 12; \ |
| 3261 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ |
| 3262 | te -= (th*4861 + 16384) >> 15; \ |
| 3263 | /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \ |
| 3264 | tm -= (t9*513 + 1024) >> 11; \ |
| 3265 | /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \ |
| 3266 | t9 += (tm*7723 + 8192) >> 14; \ |
| 3267 | /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \ |
| 3268 | tm -= (t9*513 + 1024) >> 11; \ |
| 3269 | /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ |
| 3270 | t6 -= (tp*2931 + 4096) >> 13; \ |
| 3271 | /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \ |
| 3272 | tp += (t6*5197 + 4096) >> 13; \ |
| 3273 | /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ |
| 3274 | t6 -= (tp*2931 + 4096) >> 13; \ |
| 3275 | /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ |
| 3276 | tu -= (t1*805 + 8192) >> 14; \ |
| 3277 | /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \ |
| 3278 | t1 += (tu*803 + 4096) >> 13; \ |
| 3279 | /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ |
| 3280 | tu -= (t1*805 + 8192) >> 14; \ |
| 3281 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ |
| 3282 | ti -= (td*4861 + 16384) >> 15; \ |
| 3283 | /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \ |
| 3284 | td += (ti*1189 + 2048) >> 12; \ |
| 3285 | /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ |
| 3286 | ti -= (td*4861 + 16384) >> 15; \ |
| 3287 | /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \ |
| 3288 | ta -= (tl*2455 + 2048) >> 12; \ |
| 3289 | /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \ |
| 3290 | tl += (ta*14449 + 8192) >> 14; \ |
| 3291 | /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \ |
| 3292 | ta -= (tl*2455 + 2048) >> 12; \ |
| 3293 | /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ |
| 3294 | t5 -= (tq*11725 + 16384) >> 15; \ |
| 3295 | /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \ |
| 3296 | tq += (t5*5197 + 4096) >> 13; \ |
| 3297 | /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ |
| 3298 | t5 -= (tq*11725 + 16384) >> 15; \ |
| 3299 | /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ |
| 3300 | t2 -= (tt*805 + 8192) >> 14; \ |
| 3301 | /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \ |
| 3302 | tt += (t2*803 + 4096) >> 13; \ |
| 3303 | /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ |
| 3304 | t2 -= (tt*805 + 8192) >> 14; \ |
| 3305 | \ |
| 3306 | tl = -tl; \ |
| 3307 | ti = -ti; \ |
| 3308 | \ |
| 3309 | th += OD_DCT_RSHIFT(t9, 1); \ |
| 3310 | t9 -= th; \ |
| 3311 | te -= OD_DCT_RSHIFT(tm, 1); \ |
| 3312 | tm += te; \ |
| 3313 | t1 += OD_DCT_RSHIFT(tp, 1); \ |
| 3314 | tp -= t1; \ |
| 3315 | tu -= OD_DCT_RSHIFT(t6, 1); \ |
| 3316 | t6 += tu; \ |
| 3317 | ta -= OD_DCT_RSHIFT(td, 1); \ |
| 3318 | td += ta; \ |
| 3319 | tl += OD_DCT_RSHIFT(ti, 1); \ |
| 3320 | ti -= tl; \ |
| 3321 | t5 += OD_DCT_RSHIFT(tt, 1); \ |
| 3322 | tt -= t5; \ |
| 3323 | tq += OD_DCT_RSHIFT(t2, 1); \ |
| 3324 | t2 -= tq; \ |
| 3325 | \ |
| 3326 | t8 -= tgh; \ |
| 3327 | tg += t8; \ |
| 3328 | tn += tfh; \ |
| 3329 | tf -= tn; \ |
| 3330 | t7 -= tvh; \ |
| 3331 | tv += t7; \ |
| 3332 | to -= t0h; \ |
| 3333 | t0 += to; \ |
| 3334 | tc += tbh; \ |
| 3335 | tb -= tc; \ |
| 3336 | tj += tkh; \ |
| 3337 | tk -= tj; \ |
| 3338 | ts += t4h; \ |
| 3339 | t4 -= ts; \ |
| 3340 | t3 += trh; \ |
| 3341 | tr -= t3; \ |
| 3342 | \ |
| 3343 | tk = -tk; \ |
| 3344 | \ |
| 3345 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 3346 | tc -= (tj*2485 + 4096) >> 13; \ |
| 3347 | /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ |
| 3348 | tj += (tc*18205 + 16384) >> 15; \ |
| 3349 | /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ |
| 3350 | tc -= (tj*2485 + 4096) >> 13; \ |
| 3351 | /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ |
| 3352 | ts -= (t3*3227 + 16384) >> 15; \ |
| 3353 | /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \ |
| 3354 | t3 += (ts*6393 + 16384) >> 15; \ |
| 3355 | /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ |
| 3356 | ts -= (t3*3227 + 16384) >> 15; \ |
| 3357 | /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \ |
| 3358 | tk -= (tb*17515 + 16384) >> 15; \ |
| 3359 | /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \ |
| 3360 | tb += (tk*13623 + 8192) >> 14; \ |
| 3361 | /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \ |
| 3362 | tk -= (tb*17515 + 16384) >> 15; \ |
| 3363 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \ |
| 3364 | t4 -= (tr*6723 + 4096) >> 13; \ |
| 3365 | /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \ |
| 3366 | tr += (t4*16069 + 8192) >> 14; \ |
| 3367 | /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \ |
| 3368 | t4 -= (tr*6723 + 4096) >> 13; \ |
| 3369 | \ |
| 3370 | t4 = -t4; \ |
| 3371 | \ |
| 3372 | tp += tm; \ |
| 3373 | tm -= OD_DCT_RSHIFT(tp, 1); \ |
| 3374 | t9 -= t6; \ |
| 3375 | t6 += OD_DCT_RSHIFT(t9, 1); \ |
| 3376 | th -= t1; \ |
| 3377 | t1 += OD_DCT_RSHIFT(th, 1); \ |
| 3378 | tu -= te; \ |
| 3379 | te += OD_DCT_RSHIFT(tu, 1); /* pass */ \ |
| 3380 | t5 -= tl; \ |
| 3381 | tl += OD_DCT_RSHIFT(t5, 1); \ |
| 3382 | ta += tq; \ |
| 3383 | tq -= OD_DCT_RSHIFT(ta, 1); \ |
| 3384 | td += tt; \ |
| 3385 | tt -= OD_DCT_RSHIFT(td, 1); \ |
| 3386 | t2 -= ti; \ |
| 3387 | ti += OD_DCT_RSHIFT(t2, 1); /* pass */ \ |
| 3388 | t7 += t8; \ |
| 3389 | t8 -= OD_DCT_RSHIFT(t7, 1); \ |
| 3390 | tn -= to; \ |
| 3391 | to += OD_DCT_RSHIFT(tn, 1); \ |
| 3392 | tf -= tv; \ |
| 3393 | tv += OD_DCT_RSHIFT(tf, 1); \ |
| 3394 | t0 += tg; \ |
| 3395 | tg -= OD_DCT_RSHIFT(t0, 1); /* pass */ \ |
| 3396 | tj -= t3; \ |
| 3397 | t3 += OD_DCT_RSHIFT(tj, 1); /* pass */ \ |
| 3398 | ts -= tc; \ |
| 3399 | tc += OD_DCT_RSHIFT(ts, 1); \ |
| 3400 | t4 -= tb; \ |
| 3401 | tb += OD_DCT_RSHIFT(t4, 1); /* pass */ \ |
| 3402 | tk -= tr; \ |
| 3403 | tr += OD_DCT_RSHIFT(tk, 1); \ |
| 3404 | \ |
| 3405 | t1 = -t1; \ |
| 3406 | t3 = -t3; \ |
| 3407 | t7 = -t7; \ |
| 3408 | t8 = -t8; \ |
| 3409 | tg = -tg; \ |
| 3410 | tm = -tm; \ |
| 3411 | to = -to; \ |
| 3412 | \ |
| 3413 | /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 3414 | tm -= (t9*14341 + 8192) >> 14; \ |
| 3415 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 3416 | t9 += (tm*15137 + 8192) >> 14; \ |
| 3417 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 3418 | tm -= (t9*4161 + 8192) >> 14; \ |
| 3419 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 3420 | tp -= (t6*4161 + 8192) >> 14; \ |
| 3421 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 3422 | t6 += (tp*15137 + 8192) >> 14; \ |
| 3423 | /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 3424 | tp -= (t6*28681 + 16384) >> 15; \ |
| 3425 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 3426 | th += (te*19195 + 16384) >> 15; \ |
| 3427 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 3428 | te += (th*11585 + 8192) >> 14; \ |
| 3429 | /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 3430 | th -= (te*29957 + 16384) >> 15; \ |
| 3431 | /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 3432 | tq -= (t5*14341 + 8192) >> 14; \ |
| 3433 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 3434 | t5 += (tq*15137 + 8192) >> 14; \ |
| 3435 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 3436 | tq -= (t5*4161 + 8192) >> 14; \ |
| 3437 | /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \ |
| 3438 | ta -= (tl*3259 + 4096) >> 13; \ |
| 3439 | /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \ |
| 3440 | tl += (ta*3135 + 8192) >> 14; \ |
| 3441 | /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \ |
| 3442 | ta -= (tl*3259 + 4096) >> 13; \ |
| 3443 | /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 3444 | ti -= (td*7489 + 4096) >> 13; \ |
| 3445 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 3446 | td += (ti*11585 + 8192) >> 14; \ |
| 3447 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 3448 | ti += (td*19195 + 16384) >> 15; \ |
| 3449 | /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 3450 | to -= (t7*14341 + 8192) >> 14; \ |
| 3451 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 3452 | t7 += (to*15137 + 8192) >> 14; \ |
| 3453 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 3454 | to -= (t7*4161 + 8192) >> 14; \ |
| 3455 | /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ |
| 3456 | tn -= (t8*4161 + 8192) >> 14; \ |
| 3457 | /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ |
| 3458 | t8 += (tn*15137 + 8192) >> 14; \ |
| 3459 | /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ |
| 3460 | tn -= (t8*28681 + 16384) >> 15; \ |
| 3461 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 3462 | tf += (tg*19195 + 16384) >> 15; \ |
| 3463 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 3464 | tg += (tf*11585 + 8192) >> 14; \ |
| 3465 | /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 3466 | tf -= (tg*29957 + 16384) >> 15; \ |
| 3467 | /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ |
| 3468 | tj += (tc*19195 + 16384) >> 15; \ |
| 3469 | /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ |
| 3470 | tc += (tj*11585 + 8192) >> 14; \ |
| 3471 | /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ |
| 3472 | tj -= (tc*29957 + 16384) >> 15; \ |
| 3473 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 3474 | tk += (tb*13573 + 8192) >> 14; \ |
| 3475 | /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ |
| 3476 | tb -= (tk*11585 + 16384) >> 15; \ |
| 3477 | /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ |
| 3478 | tk += (tb*13573 + 8192) >> 14; \ |
| 3479 | \ |
| 3480 | tf = -tf; \ |
| 3481 | \ |
| 3482 | } \ |
| 3483 | while (0) |
| 3484 | |
| 3485 | #define OD_FDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \ |
| 3486 | us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \ |
| 3487 | ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \ |
| 3488 | ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \ |
| 3489 | /* Embedded 64-point orthonormal Type-II fDCT. */ \ |
| 3490 | do { \ |
| 3491 | int uwh; \ |
| 3492 | int uxh; \ |
| 3493 | int uyh; \ |
| 3494 | int uzh; \ |
| 3495 | int uAh; \ |
| 3496 | int uBh; \ |
| 3497 | int uCh; \ |
| 3498 | int uDh; \ |
| 3499 | int uEh; \ |
| 3500 | int uFh; \ |
| 3501 | int uGh; \ |
| 3502 | int uHh; \ |
| 3503 | int uIh; \ |
| 3504 | int uJh; \ |
| 3505 | int uKh; \ |
| 3506 | int uLh; \ |
| 3507 | int uMh; \ |
| 3508 | int uNh; \ |
| 3509 | int uOh; \ |
| 3510 | int uPh; \ |
| 3511 | int uQh; \ |
| 3512 | int uRh; \ |
| 3513 | int uSh; \ |
| 3514 | int uTh; \ |
| 3515 | int uUh; \ |
| 3516 | int uVh; \ |
| 3517 | int uWh; \ |
| 3518 | int uXh; \ |
| 3519 | int uYh; \ |
| 3520 | int uZh; \ |
| 3521 | int u_h; \ |
| 3522 | int uh_; \ |
| 3523 | u = u0 - u; \ |
| 3524 | uh_ = OD_DCT_RSHIFT(u, 1); \ |
| 3525 | u0 -= uh_; \ |
| 3526 | u_ += u1; \ |
| 3527 | u_h = OD_DCT_RSHIFT(u_, 1); \ |
| 3528 | u1 = u_h - u1; \ |
| 3529 | uZ = u2 - uZ; \ |
| 3530 | uZh = OD_DCT_RSHIFT(uZ, 1); \ |
| 3531 | u2 -= uZh; \ |
| 3532 | uY += u3; \ |
| 3533 | uYh = OD_DCT_RSHIFT(uY, 1); \ |
| 3534 | u3 = uYh - u3; \ |
| 3535 | uX = u4 - uX; \ |
| 3536 | uXh = OD_DCT_RSHIFT(uX, 1); \ |
| 3537 | u4 -= uXh; \ |
| 3538 | uW += u5; \ |
| 3539 | uWh = OD_DCT_RSHIFT(uW, 1); \ |
| 3540 | u5 = uWh - u5; \ |
| 3541 | uV = u6 - uV; \ |
| 3542 | uVh = OD_DCT_RSHIFT(uV, 1); \ |
| 3543 | u6 -= uVh; \ |
| 3544 | uU += u7; \ |
| 3545 | uUh = OD_DCT_RSHIFT(uU, 1); \ |
| 3546 | u7 = uUh - u7; \ |
| 3547 | uT = u8 - uT; \ |
| 3548 | uTh = OD_DCT_RSHIFT(uT, 1); \ |
| 3549 | u8 -= uTh; \ |
| 3550 | uS += u9; \ |
| 3551 | uSh = OD_DCT_RSHIFT(uS, 1); \ |
| 3552 | u9 = uSh - u9; \ |
| 3553 | uR = ua - uR; \ |
| 3554 | uRh = OD_DCT_RSHIFT(uR, 1); \ |
| 3555 | ua -= uRh; \ |
| 3556 | uQ += ub; \ |
| 3557 | uQh = OD_DCT_RSHIFT(uQ, 1); \ |
| 3558 | ub = uQh - ub; \ |
| 3559 | uP = uc - uP; \ |
| 3560 | uPh = OD_DCT_RSHIFT(uP, 1); \ |
| 3561 | uc -= uPh; \ |
| 3562 | uO += ud; \ |
| 3563 | uOh = OD_DCT_RSHIFT(uO, 1); \ |
| 3564 | ud = uOh - ud; \ |
| 3565 | uN = ue - uN; \ |
| 3566 | uNh = OD_DCT_RSHIFT(uN, 1); \ |
| 3567 | ue -= uNh; \ |
| 3568 | uM += uf; \ |
| 3569 | uMh = OD_DCT_RSHIFT(uM, 1); \ |
| 3570 | uf = uMh - uf; \ |
| 3571 | uL = ug - uL; \ |
| 3572 | uLh = OD_DCT_RSHIFT(uL, 1); \ |
| 3573 | ug -= uLh; \ |
| 3574 | uK += uh; \ |
| 3575 | uKh = OD_DCT_RSHIFT(uK, 1); \ |
| 3576 | uh = uKh - uh; \ |
| 3577 | uJ = ui - uJ; \ |
| 3578 | uJh = OD_DCT_RSHIFT(uJ, 1); \ |
| 3579 | ui -= uJh; \ |
| 3580 | uI += uj; \ |
| 3581 | uIh = OD_DCT_RSHIFT(uI, 1); \ |
| 3582 | uj = uIh - uj; \ |
| 3583 | uH = uk - uH; \ |
| 3584 | uHh = OD_DCT_RSHIFT(uH, 1); \ |
| 3585 | uk -= uHh; \ |
| 3586 | uG += ul; \ |
| 3587 | uGh = OD_DCT_RSHIFT(uG, 1); \ |
| 3588 | ul = uGh - ul; \ |
| 3589 | uF = um - uF; \ |
| 3590 | uFh = OD_DCT_RSHIFT(uF, 1); \ |
| 3591 | um -= uFh; \ |
| 3592 | uE += un; \ |
| 3593 | uEh = OD_DCT_RSHIFT(uE, 1); \ |
| 3594 | un = uEh - un; \ |
| 3595 | uD = uo - uD; \ |
| 3596 | uDh = OD_DCT_RSHIFT(uD, 1); \ |
| 3597 | uo -= uDh; \ |
| 3598 | uC += up; \ |
| 3599 | uCh = OD_DCT_RSHIFT(uC, 1); \ |
| 3600 | up = uCh - up; \ |
| 3601 | uB = uq - uB; \ |
| 3602 | uBh = OD_DCT_RSHIFT(uB, 1); \ |
| 3603 | uq -= uBh; \ |
| 3604 | uA += ur; \ |
| 3605 | uAh = OD_DCT_RSHIFT(uA, 1); \ |
| 3606 | ur = uAh - ur; \ |
| 3607 | uz = us - uz; \ |
| 3608 | uzh = OD_DCT_RSHIFT(uz, 1); \ |
| 3609 | us -= uzh; \ |
| 3610 | uy += ut; \ |
| 3611 | uyh = OD_DCT_RSHIFT(uy, 1); \ |
| 3612 | ut = uyh - ut; \ |
| 3613 | ux = uu - ux; \ |
| 3614 | uxh = OD_DCT_RSHIFT(ux, 1); \ |
| 3615 | uu -= uxh; \ |
| 3616 | uw += uv; \ |
| 3617 | uwh = OD_DCT_RSHIFT(uw, 1); \ |
| 3618 | uv = uwh - uv; \ |
| 3619 | OD_FDCT_32_ASYM(u0, uw, uwh, ug, uM, uMh, u8, uE, uEh, uo, uU, uUh, \ |
| 3620 | u4, uA, uAh, uk, uQ, uQh, uc, uI, uIh, us, uY, uYh, u2, uy, uyh, \ |
| 3621 | ui, uO, uOh, ua, uG, uGh, uq, uW, uWh, u6, uC, uCh, um, uS, uSh, \ |
| 3622 | ue, uK, uKh, uu, u_, u_h); \ |
| 3623 | OD_FDST_32_ASYM(u, uv, uL, uf, uT, un, uD, u7, uX, ur, uH, ub, uP, uj, \ |
| 3624 | uz, u3, uZ, ut, uJ, ud, uR, ul, uB, u5, uV, up, uF, u9, uN, uh, ux, u1); \ |
| 3625 | } \ |
| 3626 | while (0) |
| 3627 | |
| 3628 | #define OD_IDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \ |
| 3629 | us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \ |
| 3630 | ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \ |
| 3631 | ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \ |
| 3632 | /* Embedded 64-point orthonormal Type-II fDCT. */ \ |
| 3633 | do { \ |
| 3634 | int u1h; \ |
| 3635 | int u3h; \ |
| 3636 | int u5h; \ |
| 3637 | int u7h; \ |
| 3638 | int u9h; \ |
| 3639 | int ubh; \ |
| 3640 | int udh; \ |
| 3641 | int ufh; \ |
| 3642 | int uhh; \ |
| 3643 | int ujh; \ |
| 3644 | int ulh; \ |
| 3645 | int unh; \ |
| 3646 | int uph; \ |
| 3647 | int urh; \ |
| 3648 | int uth; \ |
| 3649 | int uvh; \ |
| 3650 | int uxh; \ |
| 3651 | int uzh; \ |
| 3652 | int uBh; \ |
| 3653 | int uDh; \ |
| 3654 | int uFh; \ |
| 3655 | int uHh; \ |
| 3656 | int uJh; \ |
| 3657 | int uLh; \ |
| 3658 | int uNh; \ |
| 3659 | int uPh; \ |
| 3660 | int uRh; \ |
| 3661 | int uTh; \ |
| 3662 | int uVh; \ |
| 3663 | int uXh; \ |
| 3664 | int uZh; \ |
| 3665 | int uh_; \ |
| 3666 | OD_IDST_32_ASYM(u, uL, uT, uD, uX, uH, uP, uz, uZ, uJ, uR, uB, uV, uF, \ |
| 3667 | uN, ux, u_, uK, uS, uC, uW, uG, uO, uy, uY, uI, uQ, uA, uU, uE, uM, uw); \ |
| 3668 | OD_IDCT_32_ASYM(u0, ug, u8, uo, u4, uk, uc, us, u2, ui, ua, uq, u6, um, \ |
| 3669 | ue, uu, u1, u1h, uh, uhh, u9, u9h, up, uph, u5, u5h, ul, ulh, ud, udh, \ |
| 3670 | ut, uth, u3, u3h, uj, ujh, ub, ubh, ur, urh, u7, u7h, un, unh, uf, ufh, \ |
| 3671 | uv, uvh); \ |
| 3672 | uh_ = OD_DCT_RSHIFT(u, 1); \ |
| 3673 | u0 += uh_; \ |
| 3674 | u = u0 - u; \ |
| 3675 | u_ = u1h - u_; \ |
| 3676 | u1 -= u_; \ |
| 3677 | uZh = OD_DCT_RSHIFT(uZ, 1); \ |
| 3678 | u2 += uZh; \ |
| 3679 | uZ = u2 - uZ; \ |
| 3680 | uY = u3h - uY; \ |
| 3681 | u3 -= uY; \ |
| 3682 | uXh = OD_DCT_RSHIFT(uX, 1); \ |
| 3683 | u4 += uXh; \ |
| 3684 | uX = u4 - uX; \ |
| 3685 | uW = u5h - uW; \ |
| 3686 | u5 -= uW; \ |
| 3687 | uVh = OD_DCT_RSHIFT(uV, 1); \ |
| 3688 | u6 += uVh; \ |
| 3689 | uV = u6 - uV; \ |
| 3690 | uU = u7h - uU; \ |
| 3691 | u7 -= uU; \ |
| 3692 | uTh = OD_DCT_RSHIFT(uT, 1); \ |
| 3693 | u8 += uTh; \ |
| 3694 | uT = u8 - uT; \ |
| 3695 | uS = u9h - uS; \ |
| 3696 | u9 -= uS; \ |
| 3697 | uRh = OD_DCT_RSHIFT(uR, 1); \ |
| 3698 | ua += uRh; \ |
| 3699 | uR = ua - uR; \ |
| 3700 | uQ = ubh - uQ; \ |
| 3701 | ub -= uQ; \ |
| 3702 | uPh = OD_DCT_RSHIFT(uP, 1); \ |
| 3703 | uc += uPh; \ |
| 3704 | uP = uc - uP; \ |
| 3705 | uO = udh - uO; \ |
| 3706 | ud -= uO; \ |
| 3707 | uNh = OD_DCT_RSHIFT(uN, 1); \ |
| 3708 | ue += uNh; \ |
| 3709 | uN = ue - uN; \ |
| 3710 | uM = ufh - uM; \ |
| 3711 | uf -= uM; \ |
| 3712 | uLh = OD_DCT_RSHIFT(uL, 1); \ |
| 3713 | ug += uLh; \ |
| 3714 | uL = ug - uL; \ |
| 3715 | uK = uhh - uK; \ |
| 3716 | uh -= uK; \ |
| 3717 | uJh = OD_DCT_RSHIFT(uJ, 1); \ |
| 3718 | ui += uJh; \ |
| 3719 | uJ = ui - uJ; \ |
| 3720 | uI = ujh - uI; \ |
| 3721 | uj -= uI; \ |
| 3722 | uHh = OD_DCT_RSHIFT(uH, 1); \ |
| 3723 | uk += uHh; \ |
| 3724 | uH = uk - uH; \ |
| 3725 | uG = ulh - uG; \ |
| 3726 | ul -= uG; \ |
| 3727 | uFh = OD_DCT_RSHIFT(uF, 1); \ |
| 3728 | um += uFh; \ |
| 3729 | uF = um - uF; \ |
| 3730 | uE = unh - uE; \ |
| 3731 | un -= uE; \ |
| 3732 | uDh = OD_DCT_RSHIFT(uD, 1); \ |
| 3733 | uo += uDh; \ |
| 3734 | uD = uo - uD; \ |
| 3735 | uC = uph - uC; \ |
| 3736 | up -= uC; \ |
| 3737 | uBh = OD_DCT_RSHIFT(uB, 1); \ |
| 3738 | uq += uBh; \ |
| 3739 | uB = uq - uB; \ |
| 3740 | uA = urh - uA; \ |
| 3741 | ur -= uA; \ |
| 3742 | uzh = OD_DCT_RSHIFT(uz, 1); \ |
| 3743 | us += uzh; \ |
| 3744 | uz = us - uz; \ |
| 3745 | uy = uth - uy; \ |
| 3746 | ut -= uy; \ |
| 3747 | uxh = OD_DCT_RSHIFT(ux, 1); \ |
| 3748 | uu += uxh; \ |
| 3749 | ux = uu - ux; \ |
| 3750 | uw = uvh - uw; \ |
| 3751 | uv -= uw; \ |
| 3752 | } while (0) |
| 3753 | #endif |
| 3754 | |
Nathan E. Egge | 945176a | 2017-10-20 21:37:58 -0400 | [diff] [blame] | 3755 | /* 4-point orthonormal Type-II fDCT. */ |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 3756 | void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) { |
Nathan E. Egge | 945176a | 2017-10-20 21:37:58 -0400 | [diff] [blame] | 3757 | /* 4 "muls", 8 adds, 2 shifts */ |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 3758 | int q0; |
| 3759 | int q1; |
| 3760 | int q2; |
| 3761 | int q3; |
Nathan E. Egge | 945176a | 2017-10-20 21:37:58 -0400 | [diff] [blame] | 3762 | int u1; |
| 3763 | int t0; |
| 3764 | int t1; |
| 3765 | int t2; |
| 3766 | int t3; |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 3767 | q0 = x[0*xstride]; |
Nathan E. Egge | 945176a | 2017-10-20 21:37:58 -0400 | [diff] [blame] | 3768 | q1 = x[1*xstride]; |
| 3769 | q2 = x[2*xstride]; |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 3770 | q3 = x[3*xstride]; |
Nathan E. Egge | 945176a | 2017-10-20 21:37:58 -0400 | [diff] [blame] | 3771 | q3 = q0 - q3; |
| 3772 | q0 -= OD_DCT_RSHIFT(q3, 1); |
| 3773 | u1 = q1 + q2; |
| 3774 | q2 = q1 - q2; |
| 3775 | /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */ |
| 3776 | t0 = (q3*8867 + 16384) >> 15; |
| 3777 | /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */ |
| 3778 | t1 = (q2*21407 + 16384) >> 15; |
| 3779 | /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */ |
| 3780 | t2 = (q3*21407 + 16384) >> 15; |
| 3781 | /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */ |
| 3782 | t3 = (q2*8867 + 16384) >> 15; |
| 3783 | q0 += OD_DCT_RSHIFT(u1, 1); |
| 3784 | q1 = q0 - u1; |
| 3785 | q2 = t3 + t2; |
| 3786 | q3 = t0 - t1; |
| 3787 | y[0] = q0; |
| 3788 | y[1] = q2; |
| 3789 | y[2] = q1; |
| 3790 | y[3] = q3; |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 3791 | } |
| 3792 | |
Nathan E. Egge | 945176a | 2017-10-20 21:37:58 -0400 | [diff] [blame] | 3793 | /* 4-point orthonormal Type-II iDCT. */ |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 3794 | void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) { |
Nathan E. Egge | 945176a | 2017-10-20 21:37:58 -0400 | [diff] [blame] | 3795 | /* 4 "muls", 8 adds, 1 shift */ |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 3796 | int q0; |
| 3797 | int q1; |
| 3798 | int q2; |
| 3799 | int q3; |
Nathan E. Egge | 945176a | 2017-10-20 21:37:58 -0400 | [diff] [blame] | 3800 | int q1h; |
| 3801 | int u0; |
| 3802 | int t0; |
| 3803 | int t1; |
| 3804 | int t2; |
| 3805 | int t3; |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 3806 | q0 = y[0]; |
| 3807 | q2 = y[1]; |
| 3808 | q1 = y[2]; |
| 3809 | q3 = y[3]; |
Nathan E. Egge | 945176a | 2017-10-20 21:37:58 -0400 | [diff] [blame] | 3810 | /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */ |
| 3811 | t0 = (q3*8867 + 16384) >> 15; |
| 3812 | /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */ |
| 3813 | t1 = (q2*21407 + 16384) >> 15; |
| 3814 | /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */ |
| 3815 | t2 = (q3*21407 + 16384) >> 15; |
| 3816 | /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */ |
| 3817 | t3 = (q2*8867 + 16384) >> 15; |
| 3818 | q3 = t0 + t1; |
| 3819 | q2 = t3 - t2; |
| 3820 | q1 = q0 - q1; |
| 3821 | q1h = OD_DCT_RSHIFT(q1, 1); |
| 3822 | q0 -= q1h; |
| 3823 | u0 = q0 + q3; |
| 3824 | q3 = q0 - q3; |
| 3825 | q2 = q1h - q2; |
| 3826 | q1 -= q2; |
| 3827 | x[0*xstride] = u0; |
Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame] | 3828 | x[1*xstride] = q1; |
| 3829 | x[2*xstride] = q2; |
| 3830 | x[3*xstride] = q3; |
| 3831 | } |
Monty Montgomery | cf18fe4 | 2017-07-11 21:33:25 -0400 | [diff] [blame] | 3832 | |
Nathan E. Egge | 14a9cb1 | 2017-08-21 17:35:04 -0400 | [diff] [blame] | 3833 | /* 4-point orthonormal Type-VII fDST. */ |
Monty Montgomery | 573cf25 | 2017-08-02 05:45:14 -0400 | [diff] [blame] | 3834 | void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride) { |
Nathan E. Egge | 14a9cb1 | 2017-08-21 17:35:04 -0400 | [diff] [blame] | 3835 | /* 11 adds, 5 "muls".*/ |
Monty Montgomery | 573cf25 | 2017-08-02 05:45:14 -0400 | [diff] [blame] | 3836 | int q0; |
| 3837 | int q1; |
| 3838 | int q2; |
| 3839 | int q3; |
Nathan E. Egge | 14a9cb1 | 2017-08-21 17:35:04 -0400 | [diff] [blame] | 3840 | int t0; |
| 3841 | int t1; |
| 3842 | int t2; |
| 3843 | int t3; |
| 3844 | int t4; |
| 3845 | q0 = x[0*xstride]; |
Nathan Egge | 5a5e1ad | 2017-09-12 12:33:48 +0000 | [diff] [blame] | 3846 | q1 = x[1*xstride]; |
Nathan E. Egge | 14a9cb1 | 2017-08-21 17:35:04 -0400 | [diff] [blame] | 3847 | q2 = x[2*xstride]; |
| 3848 | q3 = x[3*xstride]; |
| 3849 | t0 = q1 + q3; |
| 3850 | t1 = q0 + q1 - q3; |
| 3851 | t2 = q0 - q1; |
| 3852 | t3 = q2; |
| 3853 | t4 = q0 + q3; |
| 3854 | /* 7021/16384 ~= 2*Sin[2*Pi/9]/3 ~= 0.428525073124360 */ |
| 3855 | t0 = (t0*7021 + 8192) >> 14; |
| 3856 | /* 18919/32768 ~= 2*Sin[3*Pi/9]/3 ~= 0.577350269189626 */ |
| 3857 | t1 = (t1*18919 + 16384) >> 15; |
| 3858 | /* 21513/32768 ~= 2*Sin[4*Pi/9]/3 ~= 0.656538502008139 */ |
| 3859 | t2 = (t2*21513 + 16384) >> 15; |
| 3860 | /* 18919/32768 ~= 2*Sin[3*Pi/9]/3 ~= 0.577350269189626 */ |
| 3861 | t3 = (t3*18919 + 16384) >> 15; |
| 3862 | /* 467/2048 ~= 2*Sin[1*Pi/9]/3 ~= 0.228013428883779 */ |
| 3863 | t4 = (t4*467 + 1024) >> 11; |
| 3864 | q0 = t0 + t3 + t4; |
| 3865 | q1 = t1; |
| 3866 | q2 = t0 + t2 - t3; |
| 3867 | q3 = t2 + t3 - t4; |
| 3868 | y[0] = (od_coeff)q0; |
| 3869 | y[1] = (od_coeff)q1; |
| 3870 | y[2] = (od_coeff)q2; |
| 3871 | y[3] = (od_coeff)q3; |
Monty Montgomery | 573cf25 | 2017-08-02 05:45:14 -0400 | [diff] [blame] | 3872 | } |
| 3873 | |
Nathan E. Egge | 14a9cb1 | 2017-08-21 17:35:04 -0400 | [diff] [blame] | 3874 | /* 4-point orthonormal Type-VII iDST. */ |
Monty Montgomery | 573cf25 | 2017-08-02 05:45:14 -0400 | [diff] [blame] | 3875 | void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]) { |
Nathan E. Egge | 14a9cb1 | 2017-08-21 17:35:04 -0400 | [diff] [blame] | 3876 | /* 11 adds, 5 "muls".*/ |
Monty Montgomery | 573cf25 | 2017-08-02 05:45:14 -0400 | [diff] [blame] | 3877 | int q0; |
| 3878 | int q1; |
| 3879 | int q2; |
| 3880 | int q3; |
Nathan E. Egge | 14a9cb1 | 2017-08-21 17:35:04 -0400 | [diff] [blame] | 3881 | int t0; |
| 3882 | int t1; |
| 3883 | int t2; |
| 3884 | int t3; |
| 3885 | int t4; |
| 3886 | q0 = y[0]; |
| 3887 | q1 = y[1]; |
Nathan E. Egge | 72c99e1 | 2017-08-21 17:35:04 -0400 | [diff] [blame] | 3888 | q2 = y[2]; |
Nathan E. Egge | 14a9cb1 | 2017-08-21 17:35:04 -0400 | [diff] [blame] | 3889 | q3 = y[3]; |
| 3890 | t0 = q0 - q3; |
| 3891 | t1 = q0 + q2; |
| 3892 | t2 = q0 - q2 + q3; |
| 3893 | t3 = q1; |
| 3894 | t4 = q2 + q3; |
| 3895 | /* 467/2048 ~= 2*Sin[1*Pi/9]/3 ~= 0.228013428883779 */ |
| 3896 | t0 = (t0*467 + 1024) >> 11; |
| 3897 | /* 7021/16384 ~= 2*Sin[2*Pi/9]/3 ~= 0.428525073124360 */ |
| 3898 | t1 = (t1*7021 + 8192) >> 14; |
| 3899 | /* 18919/32768 ~= 2*Sin[3*Pi/9]/3 ~= 0.577350269189626 */ |
| 3900 | t2 = (t2*18919 + 16384) >> 15; |
| 3901 | /* 18919/32768 ~= 2*Sin[3*Pi/9]/3 ~= 0.577350269189626 */ |
| 3902 | t3 = (t3*18919 + 16384) >> 15; |
| 3903 | /* 21513/32768 ~= 2*Sin[4*Pi/9]/3 ~= 0.656538502008139 */ |
| 3904 | t4 = (t4*21513 + 16384) >> 15; |
| 3905 | q0 = t0 + t3 + t4; |
| 3906 | q1 = t1 + t3 - t4; |
| 3907 | q2 = t2; |
| 3908 | q3 = t0 + t1 - t3; |
| 3909 | x[0*xstride] = q0; |
| 3910 | x[1*xstride] = q1; |
| 3911 | x[2*xstride] = q2; |
| 3912 | x[3*xstride] = q3; |
Monty Montgomery | 573cf25 | 2017-08-02 05:45:14 -0400 | [diff] [blame] | 3913 | } |
| 3914 | |
Monty Montgomery | cf18fe4 | 2017-07-11 21:33:25 -0400 | [diff] [blame] | 3915 | void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride) { |
| 3916 | int r0; |
| 3917 | int r1; |
| 3918 | int r2; |
| 3919 | int r3; |
| 3920 | int r4; |
| 3921 | int r5; |
| 3922 | int r6; |
| 3923 | int r7; |
| 3924 | r0 = x[0*xstride]; |
| 3925 | r4 = x[1*xstride]; |
| 3926 | r2 = x[2*xstride]; |
| 3927 | r6 = x[3*xstride]; |
| 3928 | r1 = x[4*xstride]; |
| 3929 | r5 = x[5*xstride]; |
| 3930 | r3 = x[6*xstride]; |
| 3931 | r7 = x[7*xstride]; |
| 3932 | OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7); |
| 3933 | y[0] = (od_coeff)r0; |
| 3934 | y[1] = (od_coeff)r1; |
| 3935 | y[2] = (od_coeff)r2; |
| 3936 | y[3] = (od_coeff)r3; |
| 3937 | y[4] = (od_coeff)r4; |
| 3938 | y[5] = (od_coeff)r5; |
| 3939 | y[6] = (od_coeff)r6; |
| 3940 | y[7] = (od_coeff)r7; |
| 3941 | } |
| 3942 | |
| 3943 | void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]) { |
| 3944 | int r0; |
| 3945 | int r1; |
| 3946 | int r2; |
| 3947 | int r3; |
| 3948 | int r4; |
| 3949 | int r5; |
| 3950 | int r6; |
| 3951 | int r7; |
| 3952 | r0 = y[0]; |
| 3953 | r4 = y[1]; |
| 3954 | r2 = y[2]; |
| 3955 | r6 = y[3]; |
| 3956 | r1 = y[4]; |
| 3957 | r5 = y[5]; |
| 3958 | r3 = y[6]; |
| 3959 | r7 = y[7]; |
| 3960 | OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7); |
| 3961 | x[0*xstride] = (od_coeff)r0; |
| 3962 | x[1*xstride] = (od_coeff)r1; |
| 3963 | x[2*xstride] = (od_coeff)r2; |
| 3964 | x[3*xstride] = (od_coeff)r3; |
| 3965 | x[4*xstride] = (od_coeff)r4; |
| 3966 | x[5*xstride] = (od_coeff)r5; |
| 3967 | x[6*xstride] = (od_coeff)r6; |
| 3968 | x[7*xstride] = (od_coeff)r7; |
| 3969 | } |
| 3970 | |
Nathan E. Egge | efb44bb | 2017-10-22 05:42:06 -0400 | [diff] [blame] | 3971 | const int OD_DST_8_PERM[8] = { 0, 7, 1, 6, 2, 5, 3, 4 }; |
| 3972 | |
| 3973 | /* Computes the Polynomial Product Y(z) ≡ X(z)*H(z) modulo (z^8 + 1) using |
| 3974 | Nussbaumer's "short" algorithm [1]. |
| 3975 | The monomial coefficients in Y(z) are exactly the values of an acyclic |
| 3976 | convolution of the monomial coefficients of X(z) and H(z). |
| 3977 | Since H(z) is fixed, the multiplication terms are constant and precomputed. |
| 3978 | |
| 3979 | [1] Nussbaumer, Henri J. "Fast Fourier Transform and Convolution Algorithms" |
| 3980 | Springer-Verlag: Berlin, Heidelberg, New York (1981) pages 76-78. */ |
| 3981 | static void od_poly_prod_8(od_coeff y[8], const od_coeff x[8]) { |
| 3982 | /* 21 "muls", 75 adds, 18 shifts */ |
| 3983 | od_coeff q0; |
| 3984 | od_coeff q1; |
| 3985 | od_coeff q2; |
| 3986 | od_coeff q3; |
| 3987 | od_coeff q4; |
| 3988 | od_coeff q5; |
| 3989 | od_coeff q6; |
| 3990 | od_coeff q7; |
| 3991 | od_coeff q8; |
| 3992 | od_coeff q9; |
| 3993 | od_coeff q10; |
| 3994 | od_coeff q11; |
| 3995 | od_coeff q12; |
| 3996 | od_coeff q13; |
| 3997 | od_coeff q14; |
| 3998 | od_coeff q15; |
| 3999 | od_coeff q16; |
| 4000 | od_coeff q17; |
| 4001 | od_coeff q18; |
| 4002 | od_coeff q19; |
| 4003 | od_coeff q20; |
| 4004 | od_coeff t0; |
| 4005 | od_coeff t1; |
| 4006 | od_coeff t2; |
| 4007 | od_coeff t3; |
| 4008 | od_coeff t4; |
| 4009 | od_coeff t5; |
| 4010 | od_coeff t6; |
| 4011 | od_coeff t7; |
| 4012 | od_coeff u0; |
| 4013 | od_coeff u1; |
| 4014 | od_coeff u1h; |
| 4015 | od_coeff u2; |
| 4016 | od_coeff u2h; |
| 4017 | od_coeff u3; |
| 4018 | od_coeff u4; |
| 4019 | od_coeff u4h; |
| 4020 | od_coeff u5; |
| 4021 | od_coeff u6; |
| 4022 | od_coeff u7; |
| 4023 | od_coeff u7h; |
| 4024 | od_coeff u8; |
| 4025 | od_coeff u9; |
| 4026 | od_coeff u10; |
| 4027 | od_coeff u11; |
| 4028 | od_coeff u12; |
| 4029 | od_coeff u13; |
| 4030 | od_coeff u14; |
| 4031 | od_coeff u15; |
| 4032 | od_coeff u16; |
| 4033 | od_coeff u17; |
| 4034 | od_coeff u18; |
| 4035 | od_coeff u19; |
| 4036 | od_coeff u20; |
| 4037 | od_coeff u21; |
| 4038 | od_coeff u22; |
| 4039 | od_coeff u23; |
| 4040 | od_coeff u24; |
| 4041 | od_coeff u25; |
| 4042 | od_coeff u26; |
| 4043 | od_coeff u27; |
| 4044 | t0 = x[0]; |
| 4045 | t1 = x[1]; |
| 4046 | t2 = x[2]; |
| 4047 | t3 = x[3]; |
| 4048 | t4 = x[4]; |
| 4049 | t5 = x[5]; |
| 4050 | t6 = x[6]; |
| 4051 | t7 = x[7]; |
| 4052 | /* Stage 0 Butterfly */ |
| 4053 | u7 = t0 - t7; |
| 4054 | u7h = OD_DCT_RSHIFT(u7, 1); |
| 4055 | u0 = t0 - u7h; |
| 4056 | u2 = t2 - t6; |
| 4057 | u2h = OD_DCT_RSHIFT(u2, 1); |
| 4058 | u6 = t2 - u2h; |
| 4059 | u4 = t4 + t5; |
| 4060 | u4h = OD_DCT_RSHIFT(u4, 1); |
| 4061 | u5 = t4 - u4h; |
| 4062 | u1 = t3 - t1; |
| 4063 | u1h = OD_DCT_RSHIFT(u1, 1); |
| 4064 | u3 = t3 - u1h; |
| 4065 | /* Stage 1 Butterfly */ |
| 4066 | q0 = u0 + u2h; |
| 4067 | q1 = q0 - u2; |
| 4068 | q4 = u3 + u4h; |
| 4069 | q5 = q4 - u4; |
| 4070 | q2 = u7h + u5; |
| 4071 | q7 = u7 - q2; |
| 4072 | q6 = u1h + u6; |
| 4073 | q3 = u1 - q6; |
| 4074 | /* Stage 2 Half-Butterfly */ |
| 4075 | /*The intermediate sums can overflow 16 bits, but all SIMD instruction sets |
| 4076 | should be able to compute them without issue (i.e., using PAVGW or |
| 4077 | V{R}HADD.S16).*/ |
| 4078 | q8 = (q0 + q4 + 1) >> 1; |
| 4079 | q9 = (q1 + q5) >> 1; |
| 4080 | q10 = (q2 + q3 + 1) >> 1; |
| 4081 | q11 = (q7 + q6) >> 1; |
| 4082 | /* Stage 3 */ |
| 4083 | q12 = t0 + t3; |
| 4084 | q13 = t0; |
| 4085 | q14 = t3; |
| 4086 | q15 = t5 - t6; |
| 4087 | q16 = t6; |
| 4088 | q17 = t5; |
| 4089 | q18 = ((q6 + ((t0 + t6 + 1) >> 1)) - (q4 + (t5 >> 1))) >> 1; |
| 4090 | q19 = ((q7 + ((t5 + t6 + 1) >> 1)) - (q0 + (t3 >> 1))) >> 1; |
| 4091 | q20 = (q18 - q19) >> 1; |
| 4092 | /* Stage 4 */ |
| 4093 | q0 = (-5995*q0 + 8192) >> 14; |
| 4094 | q1 = (-1373*q1 + 4096) >> 13; |
| 4095 | q2 = (22891*q2 + 16384) >> 15; |
| 4096 | q3 = (-217*q3 + 512) >> 10; |
| 4097 | q4 = (13427*q4 + 16384) >> 15; |
| 4098 | q5 = (-11013*q5 + 8192) >> 14; |
| 4099 | q6 = (1373*q6 + 1024) >> 11; |
| 4100 | q7 = (-14077*q7 + 16384) >> 15; |
| 4101 | q8 = (-1437*q8 + 16384) >> 15; |
| 4102 | q9 = (27519*q9 + 16384) >> 15; |
| 4103 | q10 = (-15947*q10 + 16384) >> 15; |
| 4104 | q11 = (-7891*q11 + 16384) >> 15; |
| 4105 | q12 = (4897*q12 + 16384) >> 15; |
| 4106 | q13 = (-5079*q13 + 8192) >> 14; |
| 4107 | q14 = (365*q14 + 16384) >> 15; |
| 4108 | q15 = (3325*q15 + 8192) >> 14; |
| 4109 | q16 = (-5225*q16 + 8192) >> 14; |
| 4110 | q17 = (-1425*q17 + 8192) >> 14; |
| 4111 | q18 = (3453*q18 + 16384) >> 15; |
| 4112 | q19 = (-8421*q19 + 8192) >> 14; |
| 4113 | q20 = (-20295*q20 + 16384) >> 15; |
| 4114 | /* Stage 5 */ |
| 4115 | u0 = q0 + q8; |
| 4116 | u1 = q1 + q9; |
| 4117 | u2 = q2 + q10; |
| 4118 | u3 = q3 + q10; |
| 4119 | u4 = q4 + q8; |
| 4120 | u5 = q5 + q9; |
| 4121 | u6 = q6 + q11; |
| 4122 | u7 = q7 + q11; |
| 4123 | /* Stage 6 */ |
| 4124 | u10 = u0 + u1; |
| 4125 | u11 = u0 - u1; |
| 4126 | u12 = u2 + u7; |
| 4127 | u13 = u2 - u7; |
| 4128 | u14 = u3 + u6; |
| 4129 | u15 = u3 - u6; |
| 4130 | u16 = u5 + u4; |
| 4131 | u17 = u5 - u4; |
| 4132 | /* Stage 7 */ |
| 4133 | u8 = q19 + q20; |
| 4134 | u9 = q19 - q18; |
| 4135 | u18 = q12 + u8; |
| 4136 | u19 = u18 + q13; |
| 4137 | u20 = u18 + q14; |
| 4138 | u21 = u9 << 1; |
| 4139 | u22 = q15 + u21; |
| 4140 | u23 = q16 - u22; |
| 4141 | u24 = u22 + q17; |
| 4142 | u25 = u8 << 1; |
| 4143 | u26 = u25 << 1; |
| 4144 | u27 = u25 - u9; |
| 4145 | /* Stage 8 */ |
| 4146 | y[0] = u14 + u16 + u20; |
| 4147 | y[1] = u12 - u10 - u25; |
| 4148 | y[2] = u9 + u13 - u17; |
| 4149 | y[3] = u9 - u10 - u12 - u19; |
| 4150 | y[4] = u15 - u11 - u27; |
| 4151 | y[5] = u23 - u11 - u15; |
| 4152 | y[6] = u13 + u17 - u24 + u26; |
| 4153 | y[7] = u16 - u14 + u21 - u25; |
| 4154 | } |
| 4155 | |
Monty Montgomery | cf18fe4 | 2017-07-11 21:33:25 -0400 | [diff] [blame] | 4156 | void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride) { |
Nathan E. Egge | efb44bb | 2017-10-22 05:42:06 -0400 | [diff] [blame] | 4157 | int i; |
| 4158 | od_coeff xp[8]; |
| 4159 | od_coeff yp[8]; |
| 4160 | for (i = 0; i < 8; i++) xp[i] = x[i*xstride]; |
| 4161 | od_poly_prod_8(yp, xp); |
| 4162 | for (i = 0; i < 8; i++) y[OD_DST_8_PERM[i]] = yp[i]; |
Monty Montgomery | cf18fe4 | 2017-07-11 21:33:25 -0400 | [diff] [blame] | 4163 | } |
| 4164 | |
| 4165 | void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]) { |
Nathan E. Egge | efb44bb | 2017-10-22 05:42:06 -0400 | [diff] [blame] | 4166 | int i; |
| 4167 | od_coeff xp[8]; |
| 4168 | od_coeff yp[8]; |
| 4169 | for (i = 0; i < 8; i++) yp[i] = y[OD_DST_8_PERM[i]]; |
| 4170 | od_poly_prod_8(xp, yp); |
| 4171 | for (i = 0; i < 8; i++) x[i*xstride] = xp[i]; |
Monty Montgomery | cf18fe4 | 2017-07-11 21:33:25 -0400 | [diff] [blame] | 4172 | } |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4173 | |
| 4174 | void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride) { |
| 4175 | int s0; |
| 4176 | int s1; |
| 4177 | int s2; |
| 4178 | int s3; |
| 4179 | int s4; |
| 4180 | int s5; |
| 4181 | int s6; |
| 4182 | int s7; |
| 4183 | int s8; |
| 4184 | int s9; |
| 4185 | int sa; |
| 4186 | int sb; |
| 4187 | int sc; |
| 4188 | int sd; |
| 4189 | int se; |
| 4190 | int sf; |
| 4191 | s0 = x[0*xstride]; |
| 4192 | s8 = x[1*xstride]; |
| 4193 | s4 = x[2*xstride]; |
| 4194 | sc = x[3*xstride]; |
| 4195 | s2 = x[4*xstride]; |
| 4196 | sa = x[5*xstride]; |
| 4197 | s6 = x[6*xstride]; |
| 4198 | se = x[7*xstride]; |
| 4199 | s1 = x[8*xstride]; |
| 4200 | s9 = x[9*xstride]; |
| 4201 | s5 = x[10*xstride]; |
| 4202 | sd = x[11*xstride]; |
| 4203 | s3 = x[12*xstride]; |
| 4204 | sb = x[13*xstride]; |
| 4205 | s7 = x[14*xstride]; |
| 4206 | sf = x[15*xstride]; |
| 4207 | OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf); |
| 4208 | y[0] = (od_coeff)s0; |
| 4209 | y[1] = (od_coeff)s1; |
| 4210 | y[2] = (od_coeff)s2; |
| 4211 | y[3] = (od_coeff)s3; |
| 4212 | y[4] = (od_coeff)s4; |
| 4213 | y[5] = (od_coeff)s5; |
| 4214 | y[6] = (od_coeff)s6; |
| 4215 | y[7] = (od_coeff)s7; |
| 4216 | y[8] = (od_coeff)s8; |
| 4217 | y[9] = (od_coeff)s9; |
| 4218 | y[10] = (od_coeff)sa; |
| 4219 | y[11] = (od_coeff)sb; |
| 4220 | y[12] = (od_coeff)sc; |
| 4221 | y[13] = (od_coeff)sd; |
| 4222 | y[14] = (od_coeff)se; |
| 4223 | y[15] = (od_coeff)sf; |
| 4224 | } |
| 4225 | |
| 4226 | void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]) { |
| 4227 | int s0; |
| 4228 | int s1; |
| 4229 | int s2; |
| 4230 | int s3; |
| 4231 | int s4; |
| 4232 | int s5; |
| 4233 | int s6; |
| 4234 | int s7; |
| 4235 | int s8; |
| 4236 | int s9; |
| 4237 | int sa; |
| 4238 | int sb; |
| 4239 | int sc; |
| 4240 | int sd; |
| 4241 | int se; |
| 4242 | int sf; |
| 4243 | s0 = y[0]; |
| 4244 | s8 = y[1]; |
| 4245 | s4 = y[2]; |
| 4246 | sc = y[3]; |
| 4247 | s2 = y[4]; |
| 4248 | sa = y[5]; |
| 4249 | s6 = y[6]; |
| 4250 | se = y[7]; |
| 4251 | s1 = y[8]; |
| 4252 | s9 = y[9]; |
| 4253 | s5 = y[10]; |
| 4254 | sd = y[11]; |
| 4255 | s3 = y[12]; |
| 4256 | sb = y[13]; |
| 4257 | s7 = y[14]; |
| 4258 | sf = y[15]; |
| 4259 | OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf); |
| 4260 | x[0*xstride] = (od_coeff)s0; |
| 4261 | x[1*xstride] = (od_coeff)s1; |
| 4262 | x[2*xstride] = (od_coeff)s2; |
| 4263 | x[3*xstride] = (od_coeff)s3; |
| 4264 | x[4*xstride] = (od_coeff)s4; |
| 4265 | x[5*xstride] = (od_coeff)s5; |
| 4266 | x[6*xstride] = (od_coeff)s6; |
| 4267 | x[7*xstride] = (od_coeff)s7; |
| 4268 | x[8*xstride] = (od_coeff)s8; |
| 4269 | x[9*xstride] = (od_coeff)s9; |
| 4270 | x[10*xstride] = (od_coeff)sa; |
| 4271 | x[11*xstride] = (od_coeff)sb; |
| 4272 | x[12*xstride] = (od_coeff)sc; |
| 4273 | x[13*xstride] = (od_coeff)sd; |
| 4274 | x[14*xstride] = (od_coeff)se; |
| 4275 | x[15*xstride] = (od_coeff)sf; |
| 4276 | } |
| 4277 | |
| 4278 | void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride) { |
| 4279 | int s0; |
| 4280 | int s1; |
| 4281 | int s2; |
| 4282 | int s3; |
| 4283 | int s4; |
| 4284 | int s5; |
| 4285 | int s6; |
| 4286 | int s7; |
| 4287 | int s8; |
| 4288 | int s9; |
| 4289 | int sa; |
| 4290 | int sb; |
| 4291 | int sc; |
| 4292 | int sd; |
| 4293 | int se; |
| 4294 | int sf; |
| 4295 | s0 = x[15*xstride]; |
| 4296 | s8 = x[14*xstride]; |
| 4297 | s4 = x[13*xstride]; |
| 4298 | sc = x[12*xstride]; |
| 4299 | s2 = x[11*xstride]; |
| 4300 | sa = x[10*xstride]; |
| 4301 | s6 = x[9*xstride]; |
| 4302 | se = x[8*xstride]; |
| 4303 | s1 = x[7*xstride]; |
| 4304 | s9 = x[6*xstride]; |
| 4305 | s5 = x[5*xstride]; |
| 4306 | sd = x[4*xstride]; |
| 4307 | s3 = x[3*xstride]; |
| 4308 | sb = x[2*xstride]; |
| 4309 | s7 = x[1*xstride]; |
| 4310 | sf = x[0*xstride]; |
| 4311 | OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf); |
| 4312 | y[0] = (od_coeff)sf; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4313 | y[1] = (od_coeff)-se; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4314 | y[2] = (od_coeff)sd; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4315 | y[3] = (od_coeff)-sc; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4316 | y[4] = (od_coeff)sb; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4317 | y[5] = (od_coeff)-sa; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4318 | y[6] = (od_coeff)s9; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4319 | y[7] = (od_coeff)-s8; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4320 | y[8] = (od_coeff)s7; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4321 | y[9] = (od_coeff)-s6; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4322 | y[10] = (od_coeff)s5; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4323 | y[11] = (od_coeff)-s4; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4324 | y[12] = (od_coeff)s3; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4325 | y[13] = (od_coeff)-s2; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4326 | y[14] = (od_coeff)s1; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4327 | y[15] = (od_coeff)-s0; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4328 | } |
| 4329 | |
| 4330 | void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]) { |
| 4331 | int s0; |
| 4332 | int s1; |
| 4333 | int s2; |
| 4334 | int s3; |
| 4335 | int s4; |
| 4336 | int s5; |
| 4337 | int s6; |
| 4338 | int s7; |
| 4339 | int s8; |
| 4340 | int s9; |
| 4341 | int sa; |
| 4342 | int sb; |
| 4343 | int sc; |
| 4344 | int sd; |
| 4345 | int se; |
| 4346 | int sf; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4347 | s0 = -y[15]; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4348 | s8 = y[14]; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4349 | s4 = -y[13]; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4350 | sc = y[12]; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4351 | s2 = -y[11]; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4352 | sa = y[10]; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4353 | s6 = -y[9]; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4354 | se = y[8]; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4355 | s1 = -y[7]; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4356 | s9 = y[6]; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4357 | s5 = -y[5]; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4358 | sd = y[4]; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4359 | s3 = -y[3]; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4360 | sb = y[2]; |
Nathan E. Egge | 69a1643 | 2017-10-18 12:50:28 -0400 | [diff] [blame] | 4361 | s7 = -y[1]; |
Monty Montgomery | cb9c1c5 | 2017-07-17 18:15:30 -0400 | [diff] [blame] | 4362 | sf = y[0]; |
| 4363 | OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf); |
| 4364 | x[0*xstride] = (od_coeff)sf; |
| 4365 | x[1*xstride] = (od_coeff)se; |
| 4366 | x[2*xstride] = (od_coeff)sd; |
| 4367 | x[3*xstride] = (od_coeff)sc; |
| 4368 | x[4*xstride] = (od_coeff)sb; |
| 4369 | x[5*xstride] = (od_coeff)sa; |
| 4370 | x[6*xstride] = (od_coeff)s9; |
| 4371 | x[7*xstride] = (od_coeff)s8; |
| 4372 | x[8*xstride] = (od_coeff)s7; |
| 4373 | x[9*xstride] = (od_coeff)s6; |
| 4374 | x[10*xstride] = (od_coeff)s5; |
| 4375 | x[11*xstride] = (od_coeff)s4; |
| 4376 | x[12*xstride] = (od_coeff)s3; |
| 4377 | x[13*xstride] = (od_coeff)s2; |
| 4378 | x[14*xstride] = (od_coeff)s1; |
| 4379 | x[15*xstride] = (od_coeff)s0; |
| 4380 | } |
Monty Montgomery | 2cb52ba | 2017-07-17 18:27:27 -0400 | [diff] [blame] | 4381 | |
| 4382 | void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride) { |
| 4383 | /*215 adds, 38 shifts, 87 "muls".*/ |
| 4384 | int t0; |
| 4385 | int t1; |
| 4386 | int t2; |
| 4387 | int t3; |
| 4388 | int t4; |
| 4389 | int t5; |
| 4390 | int t6; |
| 4391 | int t7; |
| 4392 | int t8; |
| 4393 | int t9; |
| 4394 | int ta; |
| 4395 | int tb; |
| 4396 | int tc; |
| 4397 | int td; |
| 4398 | int te; |
| 4399 | int tf; |
| 4400 | int tg; |
| 4401 | int th; |
| 4402 | int ti; |
| 4403 | int tj; |
| 4404 | int tk; |
| 4405 | int tl; |
| 4406 | int tm; |
| 4407 | int tn; |
| 4408 | int to; |
| 4409 | int tp; |
| 4410 | int tq; |
| 4411 | int tr; |
| 4412 | int ts; |
| 4413 | int tt; |
| 4414 | int tu; |
| 4415 | int tv; |
| 4416 | t0 = x[0*xstride]; |
| 4417 | tg = x[1*xstride]; |
| 4418 | t8 = x[2*xstride]; |
| 4419 | to = x[3*xstride]; |
| 4420 | t4 = x[4*xstride]; |
| 4421 | tk = x[5*xstride]; |
| 4422 | tc = x[6*xstride]; |
| 4423 | ts = x[7*xstride]; |
| 4424 | t2 = x[8*xstride]; |
| 4425 | ti = x[9*xstride]; |
| 4426 | ta = x[10*xstride]; |
| 4427 | tq = x[11*xstride]; |
| 4428 | t6 = x[12*xstride]; |
| 4429 | tm = x[13*xstride]; |
| 4430 | te = x[14*xstride]; |
| 4431 | tu = x[15*xstride]; |
| 4432 | t1 = x[16*xstride]; |
| 4433 | th = x[17*xstride]; |
| 4434 | t9 = x[18*xstride]; |
| 4435 | tp = x[19*xstride]; |
| 4436 | t5 = x[20*xstride]; |
| 4437 | tl = x[21*xstride]; |
| 4438 | td = x[22*xstride]; |
| 4439 | tt = x[23*xstride]; |
| 4440 | t3 = x[24*xstride]; |
| 4441 | tj = x[25*xstride]; |
| 4442 | tb = x[26*xstride]; |
| 4443 | tr = x[27*xstride]; |
| 4444 | t7 = x[28*xstride]; |
| 4445 | tn = x[29*xstride]; |
| 4446 | tf = x[30*xstride]; |
| 4447 | tv = x[31*xstride]; |
| 4448 | OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu, |
| 4449 | t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv); |
| 4450 | y[0] = (od_coeff)t0; |
| 4451 | y[1] = (od_coeff)t1; |
| 4452 | y[2] = (od_coeff)t2; |
| 4453 | y[3] = (od_coeff)t3; |
| 4454 | y[4] = (od_coeff)t4; |
| 4455 | y[5] = (od_coeff)t5; |
| 4456 | y[6] = (od_coeff)t6; |
| 4457 | y[7] = (od_coeff)t7; |
| 4458 | y[8] = (od_coeff)t8; |
| 4459 | y[9] = (od_coeff)t9; |
| 4460 | y[10] = (od_coeff)ta; |
| 4461 | y[11] = (od_coeff)tb; |
| 4462 | y[12] = (od_coeff)tc; |
| 4463 | y[13] = (od_coeff)td; |
| 4464 | y[14] = (od_coeff)te; |
| 4465 | y[15] = (od_coeff)tf; |
| 4466 | y[16] = (od_coeff)tg; |
| 4467 | y[17] = (od_coeff)th; |
| 4468 | y[18] = (od_coeff)ti; |
| 4469 | y[19] = (od_coeff)tj; |
| 4470 | y[20] = (od_coeff)tk; |
| 4471 | y[21] = (od_coeff)tl; |
| 4472 | y[22] = (od_coeff)tm; |
| 4473 | y[23] = (od_coeff)tn; |
| 4474 | y[24] = (od_coeff)to; |
| 4475 | y[25] = (od_coeff)tp; |
| 4476 | y[26] = (od_coeff)tq; |
| 4477 | y[27] = (od_coeff)tr; |
| 4478 | y[28] = (od_coeff)ts; |
| 4479 | y[29] = (od_coeff)tt; |
| 4480 | y[30] = (od_coeff)tu; |
| 4481 | y[31] = (od_coeff)tv; |
| 4482 | } |
| 4483 | |
| 4484 | void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]) { |
| 4485 | int t0; |
| 4486 | int t1; |
| 4487 | int t2; |
| 4488 | int t3; |
| 4489 | int t4; |
| 4490 | int t5; |
| 4491 | int t6; |
| 4492 | int t7; |
| 4493 | int t8; |
| 4494 | int t9; |
| 4495 | int ta; |
| 4496 | int tb; |
| 4497 | int tc; |
| 4498 | int td; |
| 4499 | int te; |
| 4500 | int tf; |
| 4501 | int tg; |
| 4502 | int th; |
| 4503 | int ti; |
| 4504 | int tj; |
| 4505 | int tk; |
| 4506 | int tl; |
| 4507 | int tm; |
| 4508 | int tn; |
| 4509 | int to; |
| 4510 | int tp; |
| 4511 | int tq; |
| 4512 | int tr; |
| 4513 | int ts; |
| 4514 | int tt; |
| 4515 | int tu; |
| 4516 | int tv; |
| 4517 | t0 = y[0]; |
| 4518 | tg = y[1]; |
| 4519 | t8 = y[2]; |
| 4520 | to = y[3]; |
| 4521 | t4 = y[4]; |
| 4522 | tk = y[5]; |
| 4523 | tc = y[6]; |
| 4524 | ts = y[7]; |
| 4525 | t2 = y[8]; |
| 4526 | ti = y[9]; |
| 4527 | ta = y[10]; |
| 4528 | tq = y[11]; |
| 4529 | t6 = y[12]; |
| 4530 | tm = y[13]; |
| 4531 | te = y[14]; |
| 4532 | tu = y[15]; |
| 4533 | t1 = y[16]; |
| 4534 | th = y[17]; |
| 4535 | t9 = y[18]; |
| 4536 | tp = y[19]; |
| 4537 | t5 = y[20]; |
| 4538 | tl = y[21]; |
| 4539 | td = y[22]; |
| 4540 | tt = y[23]; |
| 4541 | t3 = y[24]; |
| 4542 | tj = y[25]; |
| 4543 | tb = y[26]; |
| 4544 | tr = y[27]; |
| 4545 | t7 = y[28]; |
| 4546 | tn = y[29]; |
| 4547 | tf = y[30]; |
| 4548 | tv = y[31]; |
| 4549 | OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu, |
| 4550 | t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv); |
| 4551 | x[0*xstride] = (od_coeff)t0; |
| 4552 | x[1*xstride] = (od_coeff)t1; |
| 4553 | x[2*xstride] = (od_coeff)t2; |
| 4554 | x[3*xstride] = (od_coeff)t3; |
| 4555 | x[4*xstride] = (od_coeff)t4; |
| 4556 | x[5*xstride] = (od_coeff)t5; |
| 4557 | x[6*xstride] = (od_coeff)t6; |
| 4558 | x[7*xstride] = (od_coeff)t7; |
| 4559 | x[8*xstride] = (od_coeff)t8; |
| 4560 | x[9*xstride] = (od_coeff)t9; |
| 4561 | x[10*xstride] = (od_coeff)ta; |
| 4562 | x[11*xstride] = (od_coeff)tb; |
| 4563 | x[12*xstride] = (od_coeff)tc; |
| 4564 | x[13*xstride] = (od_coeff)td; |
| 4565 | x[14*xstride] = (od_coeff)te; |
| 4566 | x[15*xstride] = (od_coeff)tf; |
| 4567 | x[16*xstride] = (od_coeff)tg; |
| 4568 | x[17*xstride] = (od_coeff)th; |
| 4569 | x[18*xstride] = (od_coeff)ti; |
| 4570 | x[19*xstride] = (od_coeff)tj; |
| 4571 | x[20*xstride] = (od_coeff)tk; |
| 4572 | x[21*xstride] = (od_coeff)tl; |
| 4573 | x[22*xstride] = (od_coeff)tm; |
| 4574 | x[23*xstride] = (od_coeff)tn; |
| 4575 | x[24*xstride] = (od_coeff)to; |
| 4576 | x[25*xstride] = (od_coeff)tp; |
| 4577 | x[26*xstride] = (od_coeff)tq; |
| 4578 | x[27*xstride] = (od_coeff)tr; |
| 4579 | x[28*xstride] = (od_coeff)ts; |
| 4580 | x[29*xstride] = (od_coeff)tt; |
| 4581 | x[30*xstride] = (od_coeff)tu; |
| 4582 | x[31*xstride] = (od_coeff)tv; |
| 4583 | } |
Monty Montgomery | a4e245a | 2017-07-22 00:48:31 -0400 | [diff] [blame] | 4584 | |
Nathan E. Egge | f73e47e | 2017-10-22 06:41:55 -0400 | [diff] [blame] | 4585 | void od_bin_fdst32(od_coeff y[32], const od_coeff *x, int xstride) { |
| 4586 | od_coeff t0; |
| 4587 | od_coeff t1; |
| 4588 | od_coeff t2; |
| 4589 | od_coeff t3; |
| 4590 | od_coeff t4; |
| 4591 | od_coeff t5; |
| 4592 | od_coeff t6; |
| 4593 | od_coeff t7; |
| 4594 | od_coeff t8; |
| 4595 | od_coeff t9; |
| 4596 | od_coeff ta; |
| 4597 | od_coeff tb; |
| 4598 | od_coeff tc; |
| 4599 | od_coeff td; |
| 4600 | od_coeff te; |
| 4601 | od_coeff tf; |
| 4602 | od_coeff tg; |
| 4603 | od_coeff th; |
| 4604 | od_coeff ti; |
| 4605 | od_coeff tj; |
| 4606 | od_coeff tk; |
| 4607 | od_coeff tl; |
| 4608 | od_coeff tm; |
| 4609 | od_coeff tn; |
| 4610 | od_coeff to; |
| 4611 | od_coeff tp; |
| 4612 | od_coeff tq; |
| 4613 | od_coeff tr; |
| 4614 | od_coeff ts; |
| 4615 | od_coeff tt; |
| 4616 | od_coeff tu; |
| 4617 | od_coeff tv; |
| 4618 | t0 = x[0*xstride]; |
| 4619 | t1 = x[1*xstride]; |
| 4620 | t2 = x[2*xstride]; |
| 4621 | t3 = x[3*xstride]; |
| 4622 | t4 = x[4*xstride]; |
| 4623 | t5 = x[5*xstride]; |
| 4624 | t6 = x[6*xstride]; |
| 4625 | t7 = x[7*xstride]; |
| 4626 | t8 = x[8*xstride]; |
| 4627 | t9 = x[9*xstride]; |
| 4628 | ta = x[10*xstride]; |
| 4629 | tb = x[11*xstride]; |
| 4630 | tc = x[12*xstride]; |
| 4631 | td = x[13*xstride]; |
| 4632 | te = x[14*xstride]; |
| 4633 | tf = x[15*xstride]; |
| 4634 | tg = x[16*xstride]; |
| 4635 | th = x[17*xstride]; |
| 4636 | ti = x[18*xstride]; |
| 4637 | tj = x[19*xstride]; |
| 4638 | tk = x[20*xstride]; |
| 4639 | tl = x[21*xstride]; |
| 4640 | tm = x[22*xstride]; |
| 4641 | tn = x[23*xstride]; |
| 4642 | to = x[24*xstride]; |
| 4643 | tp = x[25*xstride]; |
| 4644 | tq = x[26*xstride]; |
| 4645 | tr = x[27*xstride]; |
| 4646 | ts = x[28*xstride]; |
| 4647 | tt = x[29*xstride]; |
| 4648 | tu = x[30*xstride]; |
| 4649 | tv = x[31*xstride]; |
| 4650 | OD_FDST_32(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, te, tf, |
| 4651 | tg, th, ti, tj, tk, tl, tm, tn, to, tp, tq, tr, ts, tt, tu, tv); |
| 4652 | y[0] = t0; |
| 4653 | y[1] = tg; |
| 4654 | y[2] = t8; |
| 4655 | y[3] = to; |
| 4656 | y[4] = t4; |
| 4657 | y[5] = tk; |
| 4658 | y[6] = tc; |
| 4659 | y[7] = ts; |
| 4660 | y[8] = t2; |
| 4661 | y[9] = ti; |
| 4662 | y[10] = ta; |
| 4663 | y[11] = tq; |
| 4664 | y[12] = t6; |
| 4665 | y[13] = tm; |
| 4666 | y[14] = te; |
| 4667 | y[15] = tu; |
| 4668 | y[16] = t1; |
| 4669 | y[17] = th; |
| 4670 | y[18] = t9; |
| 4671 | y[19] = tp; |
| 4672 | y[20] = t5; |
| 4673 | y[21] = tl; |
| 4674 | y[22] = td; |
| 4675 | y[23] = tt; |
| 4676 | y[24] = t3; |
| 4677 | y[25] = tj; |
| 4678 | y[26] = tb; |
| 4679 | y[27] = tr; |
| 4680 | y[28] = t7; |
| 4681 | y[29] = tn; |
| 4682 | y[30] = tf; |
| 4683 | y[31] = tv; |
| 4684 | } |
| 4685 | |
| 4686 | void od_bin_idst32(od_coeff *x, int xstride, const od_coeff y[32]) { |
| 4687 | od_coeff t0; |
| 4688 | od_coeff t1; |
| 4689 | od_coeff t2; |
| 4690 | od_coeff t3; |
| 4691 | od_coeff t4; |
| 4692 | od_coeff t5; |
| 4693 | od_coeff t6; |
| 4694 | od_coeff t7; |
| 4695 | od_coeff t8; |
| 4696 | od_coeff t9; |
| 4697 | od_coeff ta; |
| 4698 | od_coeff tb; |
| 4699 | od_coeff tc; |
| 4700 | od_coeff td; |
| 4701 | od_coeff te; |
| 4702 | od_coeff tf; |
| 4703 | od_coeff tg; |
| 4704 | od_coeff th; |
| 4705 | od_coeff ti; |
| 4706 | od_coeff tj; |
| 4707 | od_coeff tk; |
| 4708 | od_coeff tl; |
| 4709 | od_coeff tm; |
| 4710 | od_coeff tn; |
| 4711 | od_coeff to; |
| 4712 | od_coeff tp; |
| 4713 | od_coeff tq; |
| 4714 | od_coeff tr; |
| 4715 | od_coeff ts; |
| 4716 | od_coeff tt; |
| 4717 | od_coeff tu; |
| 4718 | od_coeff tv; |
| 4719 | t0 = y[0]; |
| 4720 | tg = y[1]; |
| 4721 | t8 = y[2]; |
| 4722 | to = y[3]; |
| 4723 | t4 = y[4]; |
| 4724 | tk = y[5]; |
| 4725 | tc = y[6]; |
| 4726 | ts = y[7]; |
| 4727 | t2 = y[8]; |
| 4728 | ti = y[9]; |
| 4729 | ta = y[10]; |
| 4730 | tq = y[11]; |
| 4731 | t6 = y[12]; |
| 4732 | tm = y[13]; |
| 4733 | te = y[14]; |
| 4734 | tu = y[15]; |
| 4735 | t1 = y[16]; |
| 4736 | th = y[17]; |
| 4737 | t9 = y[18]; |
| 4738 | tp = y[19]; |
| 4739 | t5 = y[20]; |
| 4740 | tl = y[21]; |
| 4741 | td = y[22]; |
| 4742 | tt = y[23]; |
| 4743 | t3 = y[24]; |
| 4744 | tj = y[25]; |
| 4745 | tb = y[26]; |
| 4746 | tr = y[27]; |
| 4747 | t7 = y[28]; |
| 4748 | tn = y[29]; |
| 4749 | tf = y[30]; |
| 4750 | tv = y[31]; |
| 4751 | OD_IDST_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu, |
| 4752 | t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv); |
| 4753 | x[0*xstride] = t0; |
| 4754 | x[1*xstride] = t1; |
| 4755 | x[2*xstride] = t2; |
| 4756 | x[3*xstride] = t3; |
| 4757 | x[4*xstride] = t4; |
| 4758 | x[5*xstride] = t5; |
| 4759 | x[6*xstride] = t6; |
| 4760 | x[7*xstride] = t7; |
| 4761 | x[8*xstride] = t8; |
| 4762 | x[9*xstride] = t9; |
| 4763 | x[10*xstride] = ta; |
| 4764 | x[11*xstride] = tb; |
| 4765 | x[12*xstride] = tc; |
| 4766 | x[13*xstride] = td; |
| 4767 | x[14*xstride] = te; |
| 4768 | x[15*xstride] = tf; |
| 4769 | x[16*xstride] = tg; |
| 4770 | x[17*xstride] = th; |
| 4771 | x[18*xstride] = ti; |
| 4772 | x[19*xstride] = tj; |
| 4773 | x[20*xstride] = tk; |
| 4774 | x[21*xstride] = tl; |
| 4775 | x[22*xstride] = tm; |
| 4776 | x[23*xstride] = tn; |
| 4777 | x[24*xstride] = to; |
| 4778 | x[25*xstride] = tp; |
| 4779 | x[26*xstride] = tq; |
| 4780 | x[27*xstride] = tr; |
| 4781 | x[28*xstride] = ts; |
| 4782 | x[29*xstride] = tt; |
| 4783 | x[30*xstride] = tu; |
| 4784 | x[31*xstride] = tv; |
| 4785 | } |
| 4786 | |
Monty Montgomery | a4e245a | 2017-07-22 00:48:31 -0400 | [diff] [blame] | 4787 | #if CONFIG_TX64X64 |
| 4788 | void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride) { |
| 4789 | int t0; |
| 4790 | int t1; |
| 4791 | int t2; |
| 4792 | int t3; |
| 4793 | int t4; |
| 4794 | int t5; |
| 4795 | int t6; |
| 4796 | int t7; |
| 4797 | int t8; |
| 4798 | int t9; |
| 4799 | int ta; |
| 4800 | int tb; |
| 4801 | int tc; |
| 4802 | int td; |
| 4803 | int te; |
| 4804 | int tf; |
| 4805 | int tg; |
| 4806 | int th; |
| 4807 | int ti; |
| 4808 | int tj; |
| 4809 | int tk; |
| 4810 | int tl; |
| 4811 | int tm; |
| 4812 | int tn; |
| 4813 | int to; |
| 4814 | int tp; |
| 4815 | int tq; |
| 4816 | int tr; |
| 4817 | int ts; |
| 4818 | int tt; |
| 4819 | int tu; |
| 4820 | int tv; |
| 4821 | int tw; |
| 4822 | int tx; |
| 4823 | int ty; |
| 4824 | int tz; |
| 4825 | int tA; |
| 4826 | int tB; |
| 4827 | int tC; |
| 4828 | int tD; |
| 4829 | int tE; |
| 4830 | int tF; |
| 4831 | int tG; |
| 4832 | int tH; |
| 4833 | int tI; |
| 4834 | int tJ; |
| 4835 | int tK; |
| 4836 | int tL; |
| 4837 | int tM; |
| 4838 | int tN; |
| 4839 | int tO; |
| 4840 | int tP; |
| 4841 | int tQ; |
| 4842 | int tR; |
| 4843 | int tS; |
| 4844 | int tT; |
| 4845 | int tU; |
| 4846 | int tV; |
| 4847 | int tW; |
| 4848 | int tX; |
| 4849 | int tY; |
| 4850 | int tZ; |
| 4851 | int t_; |
| 4852 | int t; |
| 4853 | t0 = x[0*xstride]; |
| 4854 | tw = x[1*xstride]; |
| 4855 | tg = x[2*xstride]; |
| 4856 | tM = x[3*xstride]; |
| 4857 | t8 = x[4*xstride]; |
| 4858 | tE = x[5*xstride]; |
| 4859 | to = x[6*xstride]; |
| 4860 | tU = x[7*xstride]; |
| 4861 | t4 = x[8*xstride]; |
| 4862 | tA = x[9*xstride]; |
| 4863 | tk = x[10*xstride]; |
| 4864 | tQ = x[11*xstride]; |
| 4865 | tc = x[12*xstride]; |
| 4866 | tI = x[13*xstride]; |
| 4867 | ts = x[14*xstride]; |
| 4868 | tY = x[15*xstride]; |
| 4869 | t2 = x[16*xstride]; |
| 4870 | ty = x[17*xstride]; |
| 4871 | ti = x[18*xstride]; |
| 4872 | tO = x[19*xstride]; |
| 4873 | ta = x[20*xstride]; |
| 4874 | tG = x[21*xstride]; |
| 4875 | tq = x[22*xstride]; |
| 4876 | tW = x[23*xstride]; |
| 4877 | t6 = x[24*xstride]; |
| 4878 | tC = x[25*xstride]; |
| 4879 | tm = x[26*xstride]; |
| 4880 | tS = x[27*xstride]; |
| 4881 | te = x[28*xstride]; |
| 4882 | tK = x[29*xstride]; |
| 4883 | tu = x[30*xstride]; |
| 4884 | t_ = x[31*xstride]; |
| 4885 | t1 = x[32*xstride]; |
| 4886 | tx = x[33*xstride]; |
| 4887 | th = x[34*xstride]; |
| 4888 | tN = x[35*xstride]; |
| 4889 | t9 = x[36*xstride]; |
| 4890 | tF = x[37*xstride]; |
| 4891 | tp = x[38*xstride]; |
| 4892 | tV = x[39*xstride]; |
| 4893 | t5 = x[40*xstride]; |
| 4894 | tB = x[41*xstride]; |
| 4895 | tl = x[42*xstride]; |
| 4896 | tR = x[43*xstride]; |
| 4897 | td = x[44*xstride]; |
| 4898 | tJ = x[45*xstride]; |
| 4899 | tt = x[46*xstride]; |
| 4900 | tZ = x[47*xstride]; |
| 4901 | t3 = x[48*xstride]; |
| 4902 | tz = x[49*xstride]; |
| 4903 | tj = x[50*xstride]; |
| 4904 | tP = x[51*xstride]; |
| 4905 | tb = x[52*xstride]; |
| 4906 | tH = x[53*xstride]; |
| 4907 | tr = x[54*xstride]; |
| 4908 | tX = x[55*xstride]; |
| 4909 | t7 = x[56*xstride]; |
| 4910 | tD = x[57*xstride]; |
| 4911 | tn = x[58*xstride]; |
| 4912 | tT = x[59*xstride]; |
| 4913 | tf = x[60*xstride]; |
| 4914 | tL = x[61*xstride]; |
| 4915 | tv = x[62*xstride]; |
| 4916 | t = x[63*xstride]; |
| 4917 | OD_FDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY, |
| 4918 | t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx, |
| 4919 | th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP, |
| 4920 | tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t); |
| 4921 | y[0] = (od_coeff)t0; |
| 4922 | y[1] = (od_coeff)t1; |
| 4923 | y[2] = (od_coeff)t2; |
| 4924 | y[3] = (od_coeff)t3; |
| 4925 | y[4] = (od_coeff)t4; |
| 4926 | y[5] = (od_coeff)t5; |
| 4927 | y[6] = (od_coeff)t6; |
| 4928 | y[7] = (od_coeff)t7; |
| 4929 | y[8] = (od_coeff)t8; |
| 4930 | y[9] = (od_coeff)t9; |
| 4931 | y[10] = (od_coeff)ta; |
| 4932 | y[11] = (od_coeff)tb; |
| 4933 | y[12] = (od_coeff)tc; |
| 4934 | y[13] = (od_coeff)td; |
| 4935 | y[14] = (od_coeff)te; |
| 4936 | y[15] = (od_coeff)tf; |
| 4937 | y[16] = (od_coeff)tg; |
| 4938 | y[17] = (od_coeff)th; |
| 4939 | y[18] = (od_coeff)ti; |
| 4940 | y[19] = (od_coeff)tj; |
| 4941 | y[20] = (od_coeff)tk; |
| 4942 | y[21] = (od_coeff)tl; |
| 4943 | y[22] = (od_coeff)tm; |
| 4944 | y[23] = (od_coeff)tn; |
| 4945 | y[24] = (od_coeff)to; |
| 4946 | y[25] = (od_coeff)tp; |
| 4947 | y[26] = (od_coeff)tq; |
| 4948 | y[27] = (od_coeff)tr; |
| 4949 | y[28] = (od_coeff)ts; |
| 4950 | y[29] = (od_coeff)tt; |
| 4951 | y[30] = (od_coeff)tu; |
| 4952 | y[31] = (od_coeff)tv; |
| 4953 | y[32] = (od_coeff)tw; |
| 4954 | y[33] = (od_coeff)tx; |
| 4955 | y[34] = (od_coeff)ty; |
| 4956 | y[35] = (od_coeff)tz; |
| 4957 | y[36] = (od_coeff)tA; |
| 4958 | y[37] = (od_coeff)tB; |
| 4959 | y[38] = (od_coeff)tC; |
| 4960 | y[39] = (od_coeff)tD; |
| 4961 | y[40] = (od_coeff)tE; |
| 4962 | y[41] = (od_coeff)tF; |
| 4963 | y[41] = (od_coeff)tF; |
| 4964 | y[42] = (od_coeff)tG; |
| 4965 | y[43] = (od_coeff)tH; |
| 4966 | y[44] = (od_coeff)tI; |
| 4967 | y[45] = (od_coeff)tJ; |
| 4968 | y[46] = (od_coeff)tK; |
| 4969 | y[47] = (od_coeff)tL; |
| 4970 | y[48] = (od_coeff)tM; |
| 4971 | y[49] = (od_coeff)tN; |
| 4972 | y[50] = (od_coeff)tO; |
| 4973 | y[51] = (od_coeff)tP; |
| 4974 | y[52] = (od_coeff)tQ; |
| 4975 | y[53] = (od_coeff)tR; |
| 4976 | y[54] = (od_coeff)tS; |
| 4977 | y[55] = (od_coeff)tT; |
| 4978 | y[56] = (od_coeff)tU; |
| 4979 | y[57] = (od_coeff)tV; |
| 4980 | y[58] = (od_coeff)tW; |
| 4981 | y[59] = (od_coeff)tX; |
| 4982 | y[60] = (od_coeff)tY; |
| 4983 | y[61] = (od_coeff)tZ; |
| 4984 | y[62] = (od_coeff)t_; |
| 4985 | y[63] = (od_coeff)t; |
| 4986 | } |
| 4987 | |
| 4988 | void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]) { |
| 4989 | int t0; |
| 4990 | int t1; |
| 4991 | int t2; |
| 4992 | int t3; |
| 4993 | int t4; |
| 4994 | int t5; |
| 4995 | int t6; |
| 4996 | int t7; |
| 4997 | int t8; |
| 4998 | int t9; |
| 4999 | int ta; |
| 5000 | int tb; |
| 5001 | int tc; |
| 5002 | int td; |
| 5003 | int te; |
| 5004 | int tf; |
| 5005 | int tg; |
| 5006 | int th; |
| 5007 | int ti; |
| 5008 | int tj; |
| 5009 | int tk; |
| 5010 | int tl; |
| 5011 | int tm; |
| 5012 | int tn; |
| 5013 | int to; |
| 5014 | int tp; |
| 5015 | int tq; |
| 5016 | int tr; |
| 5017 | int ts; |
| 5018 | int tt; |
| 5019 | int tu; |
| 5020 | int tv; |
| 5021 | int tw; |
| 5022 | int tx; |
| 5023 | int ty; |
| 5024 | int tz; |
| 5025 | int tA; |
| 5026 | int tB; |
| 5027 | int tC; |
| 5028 | int tD; |
| 5029 | int tE; |
| 5030 | int tF; |
| 5031 | int tG; |
| 5032 | int tH; |
| 5033 | int tI; |
| 5034 | int tJ; |
| 5035 | int tK; |
| 5036 | int tL; |
| 5037 | int tM; |
| 5038 | int tN; |
| 5039 | int tO; |
| 5040 | int tP; |
| 5041 | int tQ; |
| 5042 | int tR; |
| 5043 | int tS; |
| 5044 | int tT; |
| 5045 | int tU; |
| 5046 | int tV; |
| 5047 | int tW; |
| 5048 | int tX; |
| 5049 | int tY; |
| 5050 | int tZ; |
| 5051 | int t_; |
| 5052 | int t; |
| 5053 | t0 = y[0]; |
| 5054 | tw = y[1]; |
| 5055 | tg = y[2]; |
| 5056 | tM = y[3]; |
| 5057 | t8 = y[4]; |
| 5058 | tE = y[5]; |
| 5059 | to = y[6]; |
| 5060 | tU = y[7]; |
| 5061 | t4 = y[8]; |
| 5062 | tA = y[9]; |
| 5063 | tk = y[10]; |
| 5064 | tQ = y[11]; |
| 5065 | tc = y[12]; |
| 5066 | tI = y[13]; |
| 5067 | ts = y[14]; |
| 5068 | tY = y[15]; |
| 5069 | t2 = y[16]; |
| 5070 | ty = y[17]; |
| 5071 | ti = y[18]; |
| 5072 | tO = y[19]; |
| 5073 | ta = y[20]; |
| 5074 | tG = y[21]; |
| 5075 | tq = y[22]; |
| 5076 | tW = y[23]; |
| 5077 | t6 = y[24]; |
| 5078 | tC = y[25]; |
| 5079 | tm = y[26]; |
| 5080 | tS = y[27]; |
| 5081 | te = y[28]; |
| 5082 | tK = y[29]; |
| 5083 | tu = y[30]; |
| 5084 | t_ = y[31]; |
| 5085 | t1 = y[32]; |
| 5086 | tx = y[33]; |
| 5087 | th = y[34]; |
| 5088 | tN = y[35]; |
| 5089 | t9 = y[36]; |
| 5090 | tF = y[37]; |
| 5091 | tp = y[38]; |
| 5092 | tV = y[39]; |
| 5093 | t5 = y[40]; |
| 5094 | tB = y[41]; |
| 5095 | tl = y[42]; |
| 5096 | tR = y[43]; |
| 5097 | td = y[44]; |
| 5098 | tJ = y[45]; |
| 5099 | tt = y[46]; |
| 5100 | tZ = y[47]; |
| 5101 | t3 = y[48]; |
| 5102 | tz = y[49]; |
| 5103 | tj = y[50]; |
| 5104 | tP = y[51]; |
| 5105 | tb = y[52]; |
| 5106 | tH = y[53]; |
| 5107 | tr = y[54]; |
| 5108 | tX = y[55]; |
| 5109 | t7 = y[56]; |
| 5110 | tD = y[57]; |
| 5111 | tn = y[58]; |
| 5112 | tT = y[59]; |
| 5113 | tf = y[60]; |
| 5114 | tL = y[61]; |
| 5115 | tv = y[62]; |
| 5116 | t = y[63]; |
| 5117 | OD_IDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY, |
| 5118 | t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx, |
| 5119 | th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP, |
| 5120 | tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t); |
| 5121 | x[0*xstride] = (od_coeff)t0; |
| 5122 | x[1*xstride] = (od_coeff)t1; |
| 5123 | x[2*xstride] = (od_coeff)t2; |
| 5124 | x[3*xstride] = (od_coeff)t3; |
| 5125 | x[4*xstride] = (od_coeff)t4; |
| 5126 | x[5*xstride] = (od_coeff)t5; |
| 5127 | x[6*xstride] = (od_coeff)t6; |
| 5128 | x[7*xstride] = (od_coeff)t7; |
| 5129 | x[8*xstride] = (od_coeff)t8; |
| 5130 | x[9*xstride] = (od_coeff)t9; |
| 5131 | x[10*xstride] = (od_coeff)ta; |
| 5132 | x[11*xstride] = (od_coeff)tb; |
| 5133 | x[12*xstride] = (od_coeff)tc; |
| 5134 | x[13*xstride] = (od_coeff)td; |
| 5135 | x[14*xstride] = (od_coeff)te; |
| 5136 | x[15*xstride] = (od_coeff)tf; |
| 5137 | x[16*xstride] = (od_coeff)tg; |
| 5138 | x[17*xstride] = (od_coeff)th; |
| 5139 | x[18*xstride] = (od_coeff)ti; |
| 5140 | x[19*xstride] = (od_coeff)tj; |
| 5141 | x[20*xstride] = (od_coeff)tk; |
| 5142 | x[21*xstride] = (od_coeff)tl; |
| 5143 | x[22*xstride] = (od_coeff)tm; |
| 5144 | x[23*xstride] = (od_coeff)tn; |
| 5145 | x[24*xstride] = (od_coeff)to; |
| 5146 | x[25*xstride] = (od_coeff)tp; |
| 5147 | x[26*xstride] = (od_coeff)tq; |
| 5148 | x[27*xstride] = (od_coeff)tr; |
| 5149 | x[28*xstride] = (od_coeff)ts; |
| 5150 | x[29*xstride] = (od_coeff)tt; |
| 5151 | x[30*xstride] = (od_coeff)tu; |
| 5152 | x[31*xstride] = (od_coeff)tv; |
| 5153 | x[32*xstride] = (od_coeff)tw; |
| 5154 | x[33*xstride] = (od_coeff)tx; |
| 5155 | x[34*xstride] = (od_coeff)ty; |
| 5156 | x[35*xstride] = (od_coeff)tz; |
| 5157 | x[36*xstride] = (od_coeff)tA; |
| 5158 | x[37*xstride] = (od_coeff)tB; |
| 5159 | x[38*xstride] = (od_coeff)tC; |
| 5160 | x[39*xstride] = (od_coeff)tD; |
| 5161 | x[40*xstride] = (od_coeff)tE; |
| 5162 | x[41*xstride] = (od_coeff)tF; |
| 5163 | x[41*xstride] = (od_coeff)tF; |
| 5164 | x[42*xstride] = (od_coeff)tG; |
| 5165 | x[43*xstride] = (od_coeff)tH; |
| 5166 | x[44*xstride] = (od_coeff)tI; |
| 5167 | x[45*xstride] = (od_coeff)tJ; |
| 5168 | x[46*xstride] = (od_coeff)tK; |
| 5169 | x[47*xstride] = (od_coeff)tL; |
| 5170 | x[48*xstride] = (od_coeff)tM; |
| 5171 | x[49*xstride] = (od_coeff)tN; |
| 5172 | x[50*xstride] = (od_coeff)tO; |
| 5173 | x[51*xstride] = (od_coeff)tP; |
| 5174 | x[52*xstride] = (od_coeff)tQ; |
| 5175 | x[53*xstride] = (od_coeff)tR; |
| 5176 | x[54*xstride] = (od_coeff)tS; |
| 5177 | x[55*xstride] = (od_coeff)tT; |
| 5178 | x[56*xstride] = (od_coeff)tU; |
| 5179 | x[57*xstride] = (od_coeff)tV; |
| 5180 | x[58*xstride] = (od_coeff)tW; |
| 5181 | x[59*xstride] = (od_coeff)tX; |
| 5182 | x[60*xstride] = (od_coeff)tY; |
| 5183 | x[61*xstride] = (od_coeff)tZ; |
| 5184 | x[62*xstride] = (od_coeff)t_; |
| 5185 | x[63*xstride] = (od_coeff)t; |
| 5186 | } |
| 5187 | #endif |
Nathan E. Egge | 5e6bda8 | 2017-09-16 10:13:51 -0400 | [diff] [blame] | 5188 | |
| 5189 | void daala_fdct4(const tran_low_t *input, tran_low_t *output) { |
| 5190 | int i; |
| 5191 | od_coeff x[4]; |
| 5192 | od_coeff y[4]; |
| 5193 | for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i]; |
| 5194 | od_bin_fdct4(y, x, 1); |
| 5195 | for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i]; |
| 5196 | } |
| 5197 | |
| 5198 | void daala_idct4(const tran_low_t *input, tran_low_t *output) { |
| 5199 | int i; |
| 5200 | od_coeff x[4]; |
| 5201 | od_coeff y[4]; |
| 5202 | for (i = 0; i < 4; i++) y[i] = input[i]; |
| 5203 | od_bin_idct4(x, 1, y); |
| 5204 | for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i]; |
| 5205 | } |
Nathan E. Egge | 1aefb5e | 2017-09-16 11:28:41 -0400 | [diff] [blame] | 5206 | |
| 5207 | void daala_fdst4(const tran_low_t *input, tran_low_t *output) { |
| 5208 | int i; |
| 5209 | od_coeff x[4]; |
| 5210 | od_coeff y[4]; |
| 5211 | for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i]; |
| 5212 | od_bin_fdst4(y, x, 1); |
| 5213 | for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i]; |
| 5214 | } |
| 5215 | |
| 5216 | void daala_idst4(const tran_low_t *input, tran_low_t *output) { |
| 5217 | int i; |
| 5218 | od_coeff x[4]; |
| 5219 | od_coeff y[4]; |
| 5220 | for (i = 0; i < 4; i++) y[i] = input[i]; |
| 5221 | od_bin_idst4(x, 1, y); |
| 5222 | for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i]; |
| 5223 | } |
Nathan E. Egge | 75bfeb8 | 2017-09-16 20:41:24 -0400 | [diff] [blame] | 5224 | |
Nathan E. Egge | 31f24ee | 2017-09-18 11:25:26 -0400 | [diff] [blame] | 5225 | void daala_idtx4(const tran_low_t *input, tran_low_t *output) { |
| 5226 | int i; |
| 5227 | for (i = 0; i < 4; i++) output[i] = input[i]; |
| 5228 | } |
| 5229 | |
Nathan E. Egge | 75bfeb8 | 2017-09-16 20:41:24 -0400 | [diff] [blame] | 5230 | void daala_fdct8(const tran_low_t *input, tran_low_t *output) { |
| 5231 | int i; |
| 5232 | od_coeff x[8]; |
| 5233 | od_coeff y[8]; |
| 5234 | for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i]; |
| 5235 | od_bin_fdct8(y, x, 1); |
| 5236 | for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i]; |
| 5237 | } |
| 5238 | |
| 5239 | void daala_idct8(const tran_low_t *input, tran_low_t *output) { |
| 5240 | int i; |
| 5241 | od_coeff x[8]; |
| 5242 | od_coeff y[8]; |
| 5243 | for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i]; |
| 5244 | od_bin_idct8(x, 1, y); |
| 5245 | for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i]; |
| 5246 | } |
Nathan E. Egge | 8a873db | 2017-09-16 20:55:20 -0400 | [diff] [blame] | 5247 | |
| 5248 | void daala_fdst8(const tran_low_t *input, tran_low_t *output) { |
| 5249 | int i; |
| 5250 | od_coeff x[8]; |
| 5251 | od_coeff y[8]; |
| 5252 | for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i]; |
| 5253 | od_bin_fdst8(y, x, 1); |
| 5254 | for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i]; |
| 5255 | } |
| 5256 | |
| 5257 | void daala_idst8(const tran_low_t *input, tran_low_t *output) { |
| 5258 | int i; |
| 5259 | od_coeff x[8]; |
| 5260 | od_coeff y[8]; |
| 5261 | for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i]; |
| 5262 | od_bin_idst8(x, 1, y); |
| 5263 | for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i]; |
| 5264 | } |
Nathan E. Egge | c5c1e56 | 2017-09-16 22:18:18 -0400 | [diff] [blame] | 5265 | |
Nathan E. Egge | 3f45fb3 | 2017-09-18 11:34:48 -0400 | [diff] [blame] | 5266 | void daala_idtx8(const tran_low_t *input, tran_low_t *output) { |
| 5267 | int i; |
| 5268 | for (i = 0; i < 8; i++) output[i] = input[i]; |
| 5269 | } |
| 5270 | |
Nathan E. Egge | c5c1e56 | 2017-09-16 22:18:18 -0400 | [diff] [blame] | 5271 | void daala_fdct16(const tran_low_t *input, tran_low_t *output) { |
| 5272 | int i; |
| 5273 | od_coeff x[16]; |
| 5274 | od_coeff y[16]; |
| 5275 | for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i]; |
| 5276 | od_bin_fdct16(y, x, 1); |
| 5277 | for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i]; |
| 5278 | } |
| 5279 | |
| 5280 | void daala_idct16(const tran_low_t *input, tran_low_t *output) { |
| 5281 | int i; |
| 5282 | od_coeff x[16]; |
| 5283 | od_coeff y[16]; |
| 5284 | for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i]; |
| 5285 | od_bin_idct16(x, 1, y); |
| 5286 | for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i]; |
| 5287 | } |
Nathan E. Egge | cbcff06 | 2017-09-16 22:32:19 -0400 | [diff] [blame] | 5288 | |
| 5289 | void daala_fdst16(const tran_low_t *input, tran_low_t *output) { |
| 5290 | int i; |
| 5291 | od_coeff x[16]; |
| 5292 | od_coeff y[16]; |
| 5293 | for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i]; |
| 5294 | od_bin_fdst16(y, x, 1); |
| 5295 | for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i]; |
| 5296 | } |
| 5297 | |
| 5298 | void daala_idst16(const tran_low_t *input, tran_low_t *output) { |
| 5299 | int i; |
| 5300 | od_coeff x[16]; |
| 5301 | od_coeff y[16]; |
| 5302 | for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i]; |
| 5303 | od_bin_idst16(x, 1, y); |
| 5304 | for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i]; |
| 5305 | } |
Nathan E. Egge | dfd1a92 | 2017-09-16 23:35:30 -0400 | [diff] [blame] | 5306 | |
Nathan E. Egge | 74e7fd0 | 2017-09-18 11:40:31 -0400 | [diff] [blame] | 5307 | void daala_idtx16(const tran_low_t *input, tran_low_t *output) { |
| 5308 | int i; |
| 5309 | for (i = 0; i < 16; i++) output[i] = input[i]; |
| 5310 | } |
| 5311 | |
Nathan E. Egge | dfd1a92 | 2017-09-16 23:35:30 -0400 | [diff] [blame] | 5312 | void daala_fdct32(const tran_low_t *input, tran_low_t *output) { |
| 5313 | int i; |
| 5314 | od_coeff x[32]; |
| 5315 | od_coeff y[32]; |
| 5316 | for (i = 0; i < 32; i++) x[i] = (od_coeff)input[i]; |
| 5317 | od_bin_fdct32(y, x, 1); |
| 5318 | for (i = 0; i < 32; i++) output[i] = (tran_low_t)y[i]; |
| 5319 | } |
| 5320 | |
| 5321 | void daala_idct32(const tran_low_t *input, tran_low_t *output) { |
| 5322 | int i; |
| 5323 | od_coeff x[32]; |
| 5324 | od_coeff y[32]; |
| 5325 | for (i = 0; i < 32; i++) y[i] = (od_coeff)input[i]; |
| 5326 | od_bin_idct32(x, 1, y); |
| 5327 | for (i = 0; i < 32; i++) output[i] = (tran_low_t)x[i]; |
| 5328 | } |
Nathan E. Egge | d866114 | 2017-09-16 23:57:51 -0400 | [diff] [blame] | 5329 | |
Nathan E. Egge | f6d3ba6 | 2017-09-18 15:40:08 -0400 | [diff] [blame] | 5330 | void daala_fdst32(const tran_low_t *input, tran_low_t *output) { |
| 5331 | int i; |
Nathan E. Egge | f73e47e | 2017-10-22 06:41:55 -0400 | [diff] [blame] | 5332 | od_coeff x[32]; |
| 5333 | od_coeff y[32]; |
| 5334 | for (i = 0; i < 32; i++) x[i] = (od_coeff)input[i]; |
| 5335 | od_bin_fdst32(y, x, 1); |
| 5336 | for (i = 0; i < 32; i++) output[i] = (tran_low_t)y[i]; |
Nathan E. Egge | f6d3ba6 | 2017-09-18 15:40:08 -0400 | [diff] [blame] | 5337 | } |
| 5338 | |
Nathan E. Egge | f6d3ba6 | 2017-09-18 15:40:08 -0400 | [diff] [blame] | 5339 | void daala_idst32(const tran_low_t *input, tran_low_t *output) { |
| 5340 | int i; |
Nathan E. Egge | f73e47e | 2017-10-22 06:41:55 -0400 | [diff] [blame] | 5341 | od_coeff x[32]; |
| 5342 | od_coeff y[32]; |
| 5343 | for (i = 0; i < 32; i++) y[i] = input[i]; |
| 5344 | od_bin_idst32(x, 1, y); |
| 5345 | for (i = 0; i < 32; i++) output[i] = (tran_low_t)x[i]; |
Nathan E. Egge | f6d3ba6 | 2017-09-18 15:40:08 -0400 | [diff] [blame] | 5346 | } |
| 5347 | |
Nathan E. Egge | 4c77fc0 | 2017-09-18 11:47:52 -0400 | [diff] [blame] | 5348 | void daala_idtx32(const tran_low_t *input, tran_low_t *output) { |
| 5349 | int i; |
| 5350 | for (i = 0; i < 32; i++) output[i] = input[i]; |
| 5351 | } |
| 5352 | |
Nathan E. Egge | d866114 | 2017-09-16 23:57:51 -0400 | [diff] [blame] | 5353 | #if CONFIG_TX64X64 |
| 5354 | void daala_fdct64(const tran_low_t *input, tran_low_t *output) { |
| 5355 | int i; |
| 5356 | od_coeff x[64]; |
| 5357 | od_coeff y[64]; |
| 5358 | for (i = 0; i < 64; i++) x[i] = (od_coeff)input[i]; |
| 5359 | od_bin_fdct64(y, x, 1); |
| 5360 | for (i = 0; i < 64; i++) output[i] = (tran_low_t)y[i]; |
| 5361 | } |
| 5362 | |
| 5363 | void daala_idct64(const tran_low_t *input, tran_low_t *output) { |
| 5364 | int i; |
| 5365 | od_coeff x[64]; |
| 5366 | od_coeff y[64]; |
| 5367 | for (i = 0; i < 64; i++) y[i] = (od_coeff)input[i]; |
| 5368 | od_bin_idct64(x, 1, y); |
| 5369 | for (i = 0; i < 64; i++) output[i] = (tran_low_t)x[i]; |
| 5370 | } |
Nathan E. Egge | 01b1d91 | 2017-09-18 12:02:22 -0400 | [diff] [blame] | 5371 | |
Nathan E. Egge | 2496a85 | 2017-09-18 15:59:54 -0400 | [diff] [blame] | 5372 | /* Preserve the "half-right" transform behavior. */ |
| 5373 | void daala_fdst64(const tran_low_t *input, tran_low_t *output) { |
| 5374 | int i; |
| 5375 | tran_low_t inputhalf[32]; |
| 5376 | for (i = 0; i < 32; ++i) { |
| 5377 | output[32 + i] = input[i]; |
| 5378 | } |
| 5379 | for (i = 0; i < 32; ++i) { |
| 5380 | inputhalf[i] = input[i + 32]; |
| 5381 | } |
| 5382 | daala_fdct32(inputhalf, output); |
| 5383 | } |
| 5384 | |
| 5385 | /* Preserve the "half-right" transform behavior. */ |
| 5386 | void daala_idst64(const tran_low_t *input, tran_low_t *output) { |
| 5387 | int i; |
| 5388 | tran_low_t inputhalf[32]; |
| 5389 | for (i = 0; i < 32; ++i) { |
| 5390 | inputhalf[i] = input[i]; |
| 5391 | } |
| 5392 | for (i = 0; i < 32; ++i) { |
| 5393 | output[i] = input[32 + i]; |
| 5394 | } |
| 5395 | daala_idct32(inputhalf, output + 32); |
| 5396 | } |
| 5397 | |
Nathan E. Egge | 01b1d91 | 2017-09-18 12:02:22 -0400 | [diff] [blame] | 5398 | void daala_idtx64(const tran_low_t *input, tran_low_t *output) { |
| 5399 | int i; |
| 5400 | for (i = 0; i < 64; i++) output[i] = input[i]; |
| 5401 | } |
Nathan E. Egge | d866114 | 2017-09-16 23:57:51 -0400 | [diff] [blame] | 5402 | #endif |