blob: 166f6f962ff4d79e366e1f428a1309a7c2f54826 [file] [log] [blame]
#include "av1/common/daala_tx.h"
#include "av1/common/odintrin.h"
/* clang-format off */
# define OD_DCT_RSHIFT(_a, _b) OD_UNBIASED_RSHIFT32(_a, _b)
/* TODO: Daala DCT overflow checks need to be ported as a later test */
# if defined(OD_DCT_CHECK_OVERFLOW)
# else
# define OD_DCT_OVERFLOW_CHECK(val, scale, offset, idx)
# endif
#define OD_FDCT_2(p0, p1) \
/* Embedded 2-point orthonormal Type-II fDCT. */ \
do { \
/* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(p1, 13573, 16384, 100); \
p0 -= (p1*13573 + 16384) >> 15; \
/* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
OD_DCT_OVERFLOW_CHECK(p0, 5793, 4096, 101); \
p1 += (p0*5793 + 4096) >> 13; \
/* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(p1, 3393, 4096, 102); \
p0 -= (p1*3393 + 4096) >> 13; \
} \
while (0)
#define OD_IDCT_2(p0, p1) \
/* Embedded 2-point orthonormal Type-II iDCT. */ \
do { \
/* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \
p0 += (p1*3393 + 4096) >> 13; \
/* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \
p1 -= (p0*5793 + 4096) >> 13; \
/* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
p0 += (p1*13573 + 16384) >> 15; \
} \
while (0)
#define OD_FDCT_2_ASYM(p0, p1, p1h) \
/* Embedded 2-point asymmetric Type-II fDCT. */ \
do { \
p0 += p1h; \
p1 = p0 - p1; \
} \
while (0)
#define OD_IDCT_2_ASYM(p0, p1, p1h) \
/* Embedded 2-point asymmetric Type-II iDCT. */ \
do { \
p1 = p0 - p1; \
p1h = OD_DCT_RSHIFT(p1, 1); \
p0 -= p1h; \
} \
while (0)
#define OD_FDST_2(p0, p1) \
/* Embedded 2-point orthonormal Type-IV fDST. */ \
do { \
/* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 103); \
p0 -= (p1*10947 + 8192) >> 14; \
/* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(p0, 473, 256, 104); \
p1 += (p0*473 + 256) >> 9; \
/* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 105); \
p0 -= (p1*10947 + 8192) >> 14; \
} \
while (0)
#define OD_IDST_2(p0, p1) \
/* Embedded 2-point orthonormal Type-IV iDST. */ \
do { \
/* 10947/16384 ~= Tan[3*Pi/16]) ~= 0.668178637919299 */ \
p0 += (p1*10947 + 8192) >> 14; \
/* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
p1 -= (p0*473 + 256) >> 9; \
/* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
p0 += (p1*10947 + 8192) >> 14; \
} \
while (0)
#define OD_FDST_2_ASYM(p0, p1) \
/* Embedded 2-point asymmetric Type-IV fDST. */ \
do { \
/* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
OD_DCT_OVERFLOW_CHECK(p1, 11507, 8192, 187); \
p0 -= (p1*11507 + 8192) >> 14; \
/* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
OD_DCT_OVERFLOW_CHECK(p0, 669, 512, 188); \
p1 += (p0*669 + 512) >> 10; \
/* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
OD_DCT_OVERFLOW_CHECK(p1, 4573, 2048, 189); \
p0 -= (p1*4573 + 2048) >> 12; \
} \
while (0)
#define OD_IDST_2_ASYM(p0, p1) \
/* Embedded 2-point asymmetric Type-IV iDST. */ \
do { \
/* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \
p0 += (p1*4573 + 2048) >> 12; \
/* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \
p1 -= (p0*669 + 512) >> 10; \
/* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \
p0 += (p1*11507 + 8192) >> 14; \
} \
while (0)
#define OD_FDCT_4(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-II fDCT. */ \
do { \
int q2h; \
int q3h; \
q3 = q0 - q3; \
q3h = OD_DCT_RSHIFT(q3, 1); \
q0 -= q3h; \
q2 += q1; \
q2h = OD_DCT_RSHIFT(q2, 1); \
q1 = q2h - q1; \
OD_FDCT_2_ASYM(q0, q2, q2h); \
OD_FDST_2_ASYM(q3, q1); \
} \
while (0)
#define OD_IDCT_4(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-II iDCT. */ \
do { \
int q1h; \
int q3h; \
OD_IDST_2_ASYM(q3, q2); \
OD_IDCT_2_ASYM(q0, q1, q1h); \
q3h = OD_DCT_RSHIFT(q3, 1); \
q0 += q3h; \
q3 = q0 - q3; \
q2 = q1h - q2; \
q1 -= q2; \
} \
while (0)
#define OD_FDCT_4_ASYM(q0, q2, q2h, q1, q3, q3h) \
/* Embedded 4-point asymmetric Type-II fDCT. */ \
do { \
q0 += q3h; \
q3 = q0 - q3; \
q1 = q2h - q1; \
q2 = q1 - q2; \
OD_FDCT_2(q0, q2); \
OD_FDST_2(q3, q1); \
} \
while (0)
#define OD_IDCT_4_ASYM(q0, q2, q1, q1h, q3, q3h) \
/* Embedded 4-point asymmetric Type-II iDCT. */ \
do { \
OD_IDST_2(q3, q2); \
OD_IDCT_2(q0, q1); \
q1 = q2 - q1; \
q1h = OD_DCT_RSHIFT(q1, 1); \
q2 = q1h - q2; \
q3 = q0 - q3; \
q3h = OD_DCT_RSHIFT(q3, 1); \
q0 -= q3h; \
} \
while (0)
#define OD_FDST_4(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-IV fDST. */ \
do { \
int q0h; \
int q1h; \
/* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(q1, 13573, 16384, 190); \
q2 += (q1*13573 + 16384) >> 15; \
/* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
OD_DCT_OVERFLOW_CHECK(q2, 5793, 4096, 191); \
q1 -= (q2*5793 + 4096) >> 13; \
/* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(q1, 3393, 4096, 192); \
q2 += (q1*3393 + 4096) >> 13; \
q0 += q2; \
q0h = OD_DCT_RSHIFT(q0, 1); \
q2 = q0h - q2; \
q1 += q3; \
q1h = OD_DCT_RSHIFT(q1, 1); \
q3 -= q1h; \
/* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
0.524455699240090 */ \
OD_DCT_OVERFLOW_CHECK(q1, 537, 512, 193); \
q2 -= (q1*537 + 512) >> 10; \
/* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
OD_DCT_OVERFLOW_CHECK(q2, 1609, 1024, 194); \
q1 += (q2*1609 + 1024) >> 11; \
/* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
0.223847182092655 */ \
OD_DCT_OVERFLOW_CHECK(q1, 7335, 16384, 195); \
q2 += (q1*7335 + 16384) >> 15; \
/* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
0.6215036383171189 */ \
OD_DCT_OVERFLOW_CHECK(q0, 5091, 4096, 196); \
q3 += (q0*5091 + 4096) >> 13; \
/* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
OD_DCT_OVERFLOW_CHECK(q3, 5681, 2048, 197); \
q0 -= (q3*5681 + 2048) >> 12; \
/* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
0.52204745462729 */ \
OD_DCT_OVERFLOW_CHECK(q0, 4277, 4096, 198); \
q3 += (q0*4277 + 4096) >> 13; \
} \
while (0)
#define OD_IDST_4(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-IV iDST. */ \
do { \
int q0h; \
int q2h; \
/* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
0.52204745462729 */ \
q3 -= (q0*4277 + 4096) >> 13; \
/* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
q0 += (q3*5681 + 2048) >> 12; \
/* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
0.6215036383171189 */ \
q3 -= (q0*5091 + 4096) >> 13; \
/* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
0.223847182092655 */ \
q1 -= (q2*7335 + 16384) >> 15; \
/* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
q2 -= (q1*1609 + 1024) >> 11; \
/* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
0.524455699240090 */ \
q1 += (q2*537 + 512) >> 10; \
q2h = OD_DCT_RSHIFT(q2, 1); \
q3 += q2h; \
q2 -= q3; \
q0h = OD_DCT_RSHIFT(q0, 1); \
q1 = q0h - q1; \
q0 -= q1; \
/* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
q1 -= (q2*3393 + 4096) >> 13; \
/* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
q2 += (q1*5793 + 4096) >> 13; \
/* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
q1 -= (q2*13573 + 16384) >> 15; \
} \
while (0)
#define OD_FDST_4_ASYM(t0, t0h, t2, t1, t3) \
/* Embedded 4-point asymmetric Type-IV fDST. */ \
do { \
/* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
OD_DCT_OVERFLOW_CHECK(t1, 7489, 4096, 106); \
t2 -= (t1*7489 + 4096) >> 13; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
OD_DCT_OVERFLOW_CHECK(t1, 11585, 8192, 107); \
t1 += (t2*11585 + 8192) >> 14; \
/* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 108); \
t2 += (t1*19195 + 16384) >> 15; \
t3 += OD_DCT_RSHIFT(t2, 1); \
t2 -= t3; \
t1 = t0h - t1; \
t0 -= t1; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 109); \
t3 += (t0*6723 + 4096) >> 13; \
/* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
OD_DCT_OVERFLOW_CHECK(t3, 8035, 4096, 110); \
t0 -= (t3*8035 + 4096) >> 13; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 111); \
t3 += (t0*6723 + 4096) >> 13; \
/* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 112); \
t2 += (t1*8757 + 8192) >> 14; \
/* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
OD_DCT_OVERFLOW_CHECK(t2, 6811, 4096, 113); \
t1 -= (t2*6811 + 4096) >> 13; \
/* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 114); \
t2 += (t1*8757 + 8192) >> 14; \
} \
while (0)
#define OD_IDST_4_ASYM(t0, t0h, t2, t1, t3) \
/* Embedded 4-point asymmetric Type-IV iDST. */ \
do { \
/* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
t1 -= (t2*8757 + 8192) >> 14; \
/* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
t2 += (t1*6811 + 4096) >> 13; \
/* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
t1 -= (t2*8757 + 8192) >> 14; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
t3 -= (t0*6723 + 4096) >> 13; \
/* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
t0 += (t3*8035 + 4096) >> 13; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
t3 -= (t0*6723 + 4096) >> 13; \
t0 += t2; \
t0h = OD_DCT_RSHIFT(t0, 1); \
t2 = t0h - t2; \
t1 += t3; \
t3 -= OD_DCT_RSHIFT(t1, 1); \
/* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
t1 -= (t2*19195 + 16384) >> 15; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
t2 -= (t1*11585 + 8192) >> 14; \
/* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
t1 += (t2*7489 + 4096) >> 13; \
} \
while (0)
#define OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
/* Embedded 8-point orthonormal Type-II fDCT. */ \
do { \
int r4h; \
int r5h; \
int r6h; \
int r7h; \
r7 = r0 - r7; \
r7h = OD_DCT_RSHIFT(r7, 1); \
r0 -= r7h; \
r6 += r1; \
r6h = OD_DCT_RSHIFT(r6, 1); \
r1 = r6h - r1; \
r5 = r2 - r5; \
r5h = OD_DCT_RSHIFT(r5, 1); \
r2 -= r5h; \
r4 += r3; \
r4h = OD_DCT_RSHIFT(r4, 1); \
r3 = r4h - r3; \
OD_FDCT_4_ASYM(r0, r4, r4h, r2, r6, r6h); \
OD_FDST_4_ASYM(r7, r7h, r3, r5, r1); \
} \
while (0)
#define OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \
/* Embedded 8-point orthonormal Type-II iDCT. */ \
do { \
int r1h; \
int r3h; \
int r5h; \
int r7h; \
OD_IDST_4_ASYM(r7, r7h, r5, r6, r4); \
OD_IDCT_4_ASYM(r0, r2, r1, r1h, r3, r3h); \
r0 += r7h; \
r7 = r0 - r7; \
r6 = r1h - r6; \
r1 -= r6; \
r5h = OD_DCT_RSHIFT(r5, 1); \
r2 += r5h; \
r5 = r2 - r5; \
r4 = r3h - r4; \
r3 -= r4; \
} \
while (0)
#define OD_FDCT_8_ASYM(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \
/* Embedded 8-point asymmetric Type-II fDCT. */ \
do { \
r0 += r7h; \
r7 = r0 - r7; \
r1 = r6h - r1; \
r6 -= r1; \
r2 += r5h; \
r5 = r2 - r5; \
r3 = r4h - r3; \
r4 -= r3; \
OD_FDCT_4(r0, r4, r2, r6); \
OD_FDST_4(r7, r3, r5, r1); \
} \
while (0)
#define OD_IDCT_8_ASYM(r0, r4, r2, r6, r1, r1h, r5, r5h, r3, r3h, r7, r7h) \
/* Embedded 8-point asymmetric Type-II iDCT. */ \
do { \
OD_IDST_4(r7, r5, r6, r4); \
OD_IDCT_4(r0, r2, r1, r3); \
r7 = r0 - r7; \
r7h = OD_DCT_RSHIFT(r7, 1); \
r0 -= r7h; \
r1 += r6; \
r1h = OD_DCT_RSHIFT(r1, 1); \
r6 = r1h - r6; \
r5 = r2 - r5; \
r5h = OD_DCT_RSHIFT(r5, 1); \
r2 -= r5h; \
r3 += r4; \
r3h = OD_DCT_RSHIFT(r3, 1); \
r4 = r3h - r4; \
} \
while (0)
#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
/* Embedded 8-point orthonormal Type-IV fDST. */ \
do { \
int t0h; \
int t2h; \
int t5h; \
int t7h; \
/* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 115); \
t6 -= (t1*13573 + 16384) >> 15; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
OD_DCT_OVERFLOW_CHECK(t6, 11585, 8192, 116); \
t1 += (t6*11585 + 8192) >> 14; \
/* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 117); \
t6 -= (t1*13573 + 16384) >> 15; \
/* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
OD_DCT_OVERFLOW_CHECK(t2, 21895, 16384, 118); \
t5 -= (t2*21895 + 16384) >> 15; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(t5, 15137, 8192, 119); \
t2 += (t5*15137 + 8192) >> 14; \
/* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
OD_DCT_OVERFLOW_CHECK(t2, 10947, 8192, 120); \
t5 -= (t2*10947 + 8192) >> 14; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 121); \
t4 -= (t3*3259 + 8192) >> 14; \
/* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
OD_DCT_OVERFLOW_CHECK(t4, 3135, 4096, 122); \
t3 += (t4*3135 + 4096) >> 13; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 123); \
t4 -= (t3*3259 + 8192) >> 14; \
t7 += t1; \
t7h = OD_DCT_RSHIFT(t7, 1); \
t1 -= t7h; \
t2 = t3 - t2; \
t2h = OD_DCT_RSHIFT(t2, 1); \
t3 -= t2h; \
t0 -= t6; \
t0h = OD_DCT_RSHIFT(t0, 1); \
t6 += t0h; \
t5 = t4 - t5; \
t5h = OD_DCT_RSHIFT(t5, 1); \
t4 -= t5h; \
t1 += t5h; \
t5 = t1 - t5; \
t4 += t0h; \
t0 -= t4; \
t6 -= t2h; \
t2 += t6; \
t3 -= t7h; \
t7 += t3; \
/* TODO: Can we move this into another operation */ \
t7 = -t7; \
/* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 124); \
t0 -= (t7*7425 + 4096) >> 13; \
/* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
OD_DCT_OVERFLOW_CHECK(t0, 8153, 4096, 125); \
t7 += (t0*8153 + 4096) >> 13; \
/* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 126); \
t0 -= (t7*7425 + 4096) >> 13; \
/* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 127); \
t6 -= (t1*4861 + 16384) >> 15; \
/* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
OD_DCT_OVERFLOW_CHECK(t6, 1189, 2048, 128); \
t1 += (t6*1189 + 2048) >> 12; \
/* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 129); \
t6 -= (t1*4861 + 16384) >> 15; \
/* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 130); \
t2 -= (t5*2455 + 2048) >> 12; \
/* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
OD_DCT_OVERFLOW_CHECK(t2, 7225, 4096, 131); \
t5 += (t2*7225 + 4096) >> 13; \
/* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 132); \
t2 -= (t5*2455 + 2048) >> 12; \
/* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 133); \
t4 -= (t3*11725 + 16384) >> 15; \
/* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
OD_DCT_OVERFLOW_CHECK(t4, 5197, 4096, 134); \
t3 += (t4*5197 + 4096) >> 13; \
/* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 135); \
t4 -= (t3*11725 + 16384) >> 15; \
} \
while (0)
#define OD_IDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
/* Embedded 8-point orthonormal Type-IV iDST. */ \
do { \
int t0h; \
int t2h; \
int t5h_; \
int t7h_; \
/* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
t1 += (t6*11725 + 16384) >> 15; \
/* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \
t6 -= (t1*5197 + 4096) >> 13; \
/* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \
t1 += (t6*11725 + 16384) >> 15; \
/* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
t2 += (t5*2455 + 2048) >> 12; \
/* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
t5 -= (t2*7225 + 4096) >> 13; \
/* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \
t2 += (t5*2455 + 2048) >> 12; \
/* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
t3 += (t4*4861 + 16384) >> 15; \
/* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \
t4 -= (t3*1189 + 2048) >> 12; \
/* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \
t3 += (t4*4861 + 16384) >> 15; \
/* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
t0 += (t7*7425 + 4096) >> 13; \
/* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \
t7 -= (t0*8153 + 4096) >> 13; \
/* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \
t0 += (t7*7425 + 4096) >> 13; \
/* TODO: Can we move this into another operation */ \
t7 = -t7; \
t7 -= t6; \
t7h_ = OD_DCT_RSHIFT(t7, 1); \
t6 += t7h_; \
t2 -= t3; \
t2h = OD_DCT_RSHIFT(t2, 1); \
t3 += t2h; \
t0 += t1; \
t0h = OD_DCT_RSHIFT(t0, 1); \
t1 -= t0h; \
t5 = t4 - t5; \
t5h_ = OD_DCT_RSHIFT(t5, 1); \
t4 -= t5h_; \
t1 += t5h_; \
t5 = t1 - t5; \
t3 -= t0h; \
t0 += t3; \
t6 += t2h; \
t2 = t6 - t2; \
t4 += t7h_; \
t7 -= t4; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
t1 += (t6*3259 + 8192) >> 14; \
/* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
t6 -= (t1*3135 + 4096) >> 13; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
t1 += (t6*3259 + 8192) >> 14; \
/* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
t5 += (t2*10947 + 8192) >> 14; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
t2 -= (t5*15137 + 8192) >> 14; \
/* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
t5 += (t2*21895 + 16384) >> 15; \
/* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
t3 += (t4*13573 + 16384) >> 15; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
t4 -= (t3*11585 + 8192) >> 14; \
/* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
t3 += (t4*13573 + 16384) >> 15; \
} \
while (0)
/* Rewrite this so that t0h can be passed in. */
#define OD_FDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
/* Embedded 8-point asymmetric Type-IV fDST. */ \
do { \
int t0h; \
int t2h; \
int t5h; \
int t7h; \
/* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
OD_DCT_OVERFLOW_CHECK(t1, 1035, 1024, 199); \
t6 += (t1*1035 + 1024) >> 11; \
/* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
OD_DCT_OVERFLOW_CHECK(t6, 3675, 2048, 200); \
t1 -= (t6*3675 + 2048) >> 12; \
/* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
OD_DCT_OVERFLOW_CHECK(t1, 851, 4096, 201); \
t6 -= (t1*851 + 4096) >> 13; \
/* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
OD_DCT_OVERFLOW_CHECK(t2, 4379, 4096, 202); \
t5 += (t2*4379 + 4096) >> 13; \
/* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
OD_DCT_OVERFLOW_CHECK(t5, 10217, 4096, 203); \
t2 -= (t5*10217 + 4096) >> 13; \
/* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
OD_DCT_OVERFLOW_CHECK(t2, 4379, 8192, 204); \
t5 += (t2*4379 + 8192) >> 14; \
/* 12905/16384 ~= (Sqrt[2] - Cos[3*Pi/32])/(2*Sin[3*Pi/32]) */ \
OD_DCT_OVERFLOW_CHECK(t3, 12905, 8192, 205); \
t4 += (t3*12905 + 8192) >> 14; \
/* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
OD_DCT_OVERFLOW_CHECK(t4, 3363, 4096, 206); \
t3 -= (t4*3363 + 4096) >> 13; \
/* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
OD_DCT_OVERFLOW_CHECK(t3, 3525, 2048, 207); \
t4 -= (t3*3525 + 2048) >> 12; \
/* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
OD_DCT_OVERFLOW_CHECK(t0, 5417, 4096, 208); \
t7 += (t0*5417 + 4096) >> 13; \
/* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
OD_DCT_OVERFLOW_CHECK(t7, 5765, 2048, 209); \
t0 -= (t7*5765 + 2048) >> 12; \
/* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
OD_DCT_OVERFLOW_CHECK(t0, 2507, 2048, 210); \
t7 += (t0*2507 + 2048) >> 12; \
t0 += t1; \
t0h = OD_DCT_RSHIFT(t0, 1); \
t1 -= t0h; \
t2 -= t3; \
t2h = OD_DCT_RSHIFT(t2, 1); \
t3 += t2h; \
t5 -= t4; \
t5h = OD_DCT_RSHIFT(t5, 1); \
t4 += t5h; \
t7 += t6; \
t7h = OD_DCT_RSHIFT(t7, 1); \
t6 = t7h - t6; \
t4 = t7h - t4; \
t7 -= t4; \
t1 += t5h; \
t5 = t1 - t5; \
t6 += t2h; \
t2 = t6 - t2; \
t3 -= t0h; \
t0 += t3; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 211); \
t1 += (t6*3259 + 8192) >> 14; \
/* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
OD_DCT_OVERFLOW_CHECK(t1, 3135, 4096, 212); \
t6 -= (t1*3135 + 4096) >> 13; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 213); \
t1 += (t6*3259 + 8192) >> 14; \
/* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 214); \
t5 += (t2*2737 + 2048) >> 12; \
/* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(t5, 473, 256, 215); \
t2 -= (t5*473 + 256) >> 9; \
/* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 216); \
t5 += (t2*2737 + 2048) >> 12; \
/* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 217); \
t3 += (t4*3393 + 4096) >> 13; \
/* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
OD_DCT_OVERFLOW_CHECK(t3, 5793, 4096, 218); \
t4 -= (t3*5793 + 4096) >> 13; \
/* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 219); \
t3 += (t4*3393 + 4096) >> 13; \
} \
while (0)
#define OD_IDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
/* Embedded 8-point asymmetric Type-IV iDST. */ \
do { \
int t0h; \
int t2h; \
int t5h__; \
int t7h__; \
/* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
t6 -= (t1*3393 + 4096) >> 13; \
/* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
t1 += (t6*5793 + 4096) >> 13; \
/* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
t6 -= (t1*3393 + 4096) >> 13; \
/* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
t5 -= (t2*2737 + 2048) >> 12; \
/* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
t2 += (t5*473 + 256) >> 9; \
/* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
t5 -= (t2*2737 + 2048) >> 12; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
t4 -= (t3*3259 + 8192) >> 14; \
/* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
t3 += (t4*3135 + 4096) >> 13; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
t4 -= (t3*3259 + 8192) >> 14; \
t0 -= t6; \
t0h = OD_DCT_RSHIFT(t0, 1); \
t6 += t0h; \
t2 = t3 - t2; \
t2h = OD_DCT_RSHIFT(t2, 1); \
t3 -= t2h; \
t5 = t4 - t5; \
t5h__ = OD_DCT_RSHIFT(t5, 1); \
t4 -= t5h__; \
t7 += t1; \
t7h__ = OD_DCT_RSHIFT(t7, 1); \
t1 = t7h__ - t1; \
t3 = t7h__ - t3; \
t7 -= t3; \
t1 -= t5h__; \
t5 += t1; \
t6 -= t2h; \
t2 += t6; \
t4 += t0h; \
t0 -= t4; \
/* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
t7 -= (t0*2507 + 2048) >> 12; \
/* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
t0 += (t7*5765 + 2048) >> 12; \
/* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
t7 -= (t0*5417 + 4096) >> 13; \
/* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
t1 += (t6*3525 + 2048) >> 12; \
/* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
t6 += (t1*3363 + 4096) >> 13; \
/* 12905/16384 ~= (1/Sqrt[2] - Cos[3*Pi/32]/1)/Sin[3*Pi/32] */ \
t1 -= (t6*12905 + 8192) >> 14; \
/* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
t5 -= (t2*4379 + 8192) >> 14; \
/* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
t2 += (t5*10217 + 4096) >> 13; \
/* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
t5 -= (t2*4379 + 4096) >> 13; \
/* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
t3 += (t4*851 + 4096) >> 13; \
/* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
t4 += (t3*3675 + 2048) >> 12; \
/* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
t3 -= (t4*1035 + 1024) >> 11; \
} \
while (0)
#define OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
s1, s9, s5, sd, s3, sb, s7, sf) \
/* Embedded 16-point orthonormal Type-II fDCT. */ \
do { \
int s8h; \
int sah; \
int sch; \
int seh; \
int sfh; \
sf = s0 - sf; \
sfh = OD_DCT_RSHIFT(sf, 1); \
s0 -= sfh; \
se += s1; \
seh = OD_DCT_RSHIFT(se, 1); \
s1 = seh - s1; \
sd = s2 - sd; \
s2 -= OD_DCT_RSHIFT(sd, 1); \
sc += s3; \
sch = OD_DCT_RSHIFT(sc, 1); \
s3 = sch - s3; \
sb = s4 - sb; \
s4 -= OD_DCT_RSHIFT(sb, 1); \
sa += s5; \
sah = OD_DCT_RSHIFT(sa, 1); \
s5 = sah - s5; \
s9 = s6 - s9; \
s6 -= OD_DCT_RSHIFT(s9, 1); \
s8 += s7; \
s8h = OD_DCT_RSHIFT(s8, 1); \
s7 = s8h - s7; \
OD_FDCT_8_ASYM(s0, s8, s8h, s4, sc, sch, s2, sa, sah, s6, se, seh); \
OD_FDST_8_ASYM(sf, s7, sb, s3, sd, s5, s9, s1); \
} \
while (0)
#define OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
s1, s9, s5, sd, s3, sb, s7, sf) \
/* Embedded 16-point orthonormal Type-II iDCT. */ \
do { \
int s1h; \
int s3h; \
int s5h; \
int s7h; \
int sfh; \
OD_IDST_8_ASYM(sf, sb, sd, s9, se, sa, sc, s8); \
OD_IDCT_8_ASYM(s0, s4, s2, s6, s1, s1h, s5, s5h, s3, s3h, s7, s7h); \
sfh = OD_DCT_RSHIFT(sf, 1); \
s0 += sfh; \
sf = s0 - sf; \
se = s1h - se; \
s1 -= se; \
s2 += OD_DCT_RSHIFT(sd, 1); \
sd = s2 - sd; \
sc = s3h - sc; \
s3 -= sc; \
s4 += OD_DCT_RSHIFT(sb, 1); \
sb = s4 - sb; \
sa = s5h - sa; \
s5 -= sa; \
s6 += OD_DCT_RSHIFT(s9, 1); \
s9 = s6 - s9; \
s8 = s7h - s8; \
s7 -= s8; \
} \
while (0)
#define OD_FDCT_16_ASYM(t0, t8, t8h, t4, tc, tch, t2, ta, tah, t6, te, teh, \
t1, t9, t9h, t5, td, tdh, t3, tb, tbh, t7, tf, tfh) \
/* Embedded 16-point asymmetric Type-II fDCT. */ \
do { \
t0 += tfh; \
tf = t0 - tf; \
t1 -= teh; \
te += t1; \
t2 += tdh; \
td = t2 - td; \
t3 -= tch; \
tc += t3; \
t4 += tbh; \
tb = t4 - tb; \
t5 -= tah; \
ta += t5; \
t6 += t9h; \
t9 = t6 - t9; \
t7 -= t8h; \
t8 += t7; \
OD_FDCT_8(t0, t8, t4, tc, t2, ta, t6, te); \
OD_FDST_8(tf, t7, tb, t3, td, t5, t9, t1); \
} \
while (0)
#define OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh) \
/* Embedded 16-point asymmetric Type-II iDCT. */ \
do { \
OD_IDST_8(tf, tb, td, t9, te, ta, tc, t8); \
OD_IDCT_8(t0, t4, t2, t6, t1, t5, t3, t7); \
t1 -= te; \
t1h = OD_DCT_RSHIFT(t1, 1); \
te += t1h; \
t9 = t6 - t9; \
t9h = OD_DCT_RSHIFT(t9, 1); \
t6 -= t9h; \
t5 -= ta; \
t5h = OD_DCT_RSHIFT(t5, 1); \
ta += t5h; \
td = t2 - td; \
tdh = OD_DCT_RSHIFT(td, 1); \
t2 -= tdh; \
t3 -= tc; \
t3h = OD_DCT_RSHIFT(t3, 1); \
tc += t3h; \
tb = t4 - tb; \
tbh = OD_DCT_RSHIFT(tb, 1); \
t4 -= tbh; \
t7 -= t8; \
t7h = OD_DCT_RSHIFT(t7, 1); \
t8 += t7h; \
tf = t0 - tf; \
tfh = OD_DCT_RSHIFT(tf, 1); \
t0 -= tfh; \
} \
while (0)
#define OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
s1, s9, s5, sd, s3, sb, s7, sf) \
/* Embedded 16-point orthonormal Type-IV fDST. */ \
do { \
int s0h; \
int s2h; \
int sdh; \
int sfh; \
/* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 220); \
s1 += (se*13573 + 16384) >> 15; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
OD_DCT_OVERFLOW_CHECK(s1, 11585, 8192, 221); \
se -= (s1*11585 + 8192) >> 14; \
/* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 222); \
s1 += (se*13573 + 16384) >> 15; \
/* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 223); \
sd += (s2*21895 + 16384) >> 15; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(sd, 15137, 16384, 224); \
s2 -= (sd*15137 + 8192) >> 14; \
/* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 225); \
sd += (s2*21895 + 16384) >> 15; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 226); \
sc += (s3*3259 + 8192) >> 14; \
/* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
OD_DCT_OVERFLOW_CHECK(sc, 3135, 4096, 227); \
s3 -= (sc*3135 + 4096) >> 13; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 228); \
sc += (s3*3259 + 8192) >> 14; \
/* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 229); \
sa += (s5*13573 + 16384) >> 15; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
OD_DCT_OVERFLOW_CHECK(sa, 11585, 8192, 230); \
s5 -= (sa*11585 + 8192) >> 14; \
/* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 231); \
sa += (s5*13573 + 16384) >> 15; \
/* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 232); \
s6 += (s9*13573 + 16384) >> 15; \
/* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
OD_DCT_OVERFLOW_CHECK(s6, 11585, 8192, 233); \
s9 -= (s6*11585 + 8192) >> 14; \
/* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 234); \
s6 += (s9*13573 + 16384) >> 15; \
sf += se; \
sfh = OD_DCT_RSHIFT(sf, 1); \
se = sfh - se; \
s0 += s1; \
s0h = OD_DCT_RSHIFT(s0, 1); \
s1 = s0h - s1; \
s2 = s3 - s2; \
s2h = OD_DCT_RSHIFT(s2, 1); \
s3 -= s2h; \
sd -= sc; \
sdh = OD_DCT_RSHIFT(sd, 1); \
sc += sdh; \
sa = s4 - sa; \
s4 -= OD_DCT_RSHIFT(sa, 1); \
s5 += sb; \
sb = OD_DCT_RSHIFT(s5, 1) - sb; \
s8 += s6; \
s6 -= OD_DCT_RSHIFT(s8, 1); \
s7 = s9 - s7; \
s9 -= OD_DCT_RSHIFT(s7, 1); \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 235); \
s4 += (sb*6723 + 4096) >> 13; \
/* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
OD_DCT_OVERFLOW_CHECK(s4, 16069, 8192, 236); \
sb -= (s4*16069 + 8192) >> 14; \
/* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 237); \
s4 += (sb*6723 + 4096) >> 13; \
/* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 238); \
sa += (s5*8757 + 8192) >> 14; \
/* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
OD_DCT_OVERFLOW_CHECK(sa, 6811, 4096, 239); \
s5 -= (sa*6811 + 4096) >> 13; \
/* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 240); \
sa += (s5*8757 + 8192) >> 14; \
/* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 241); \
s6 += (s9*2485 + 4096) >> 13; \
/* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
OD_DCT_OVERFLOW_CHECK(s6, 4551, 4096, 242); \
s9 -= (s6*4551 + 4096) >> 13; \
/* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 243); \
s6 += (s9*2485 + 4096) >> 13; \
/* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 244); \
s7 += (s8*3227 + 16384) >> 15; \
/* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
OD_DCT_OVERFLOW_CHECK(s7, 6393, 16384, 245); \
s8 -= (s7*6393 + 16384) >> 15; \
/* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 246); \
s7 += (s8*3227 + 16384) >> 15; \
s1 -= s2h; \
s2 += s1; \
se += sdh; \
sd = se - sd; \
s3 += sfh; \
sf -= s3; \
sc = s0h - sc; \
s0 -= sc; \
sb += OD_DCT_RSHIFT(s8, 1); \
s8 = sb - s8; \
s4 += OD_DCT_RSHIFT(s7, 1); \
s7 -= s4; \
s6 += OD_DCT_RSHIFT(s5, 1); \
s5 = s6 - s5; \
s9 -= OD_DCT_RSHIFT(sa, 1); \
sa += s9; \
s8 += s0; \
s0 -= OD_DCT_RSHIFT(s8, 1); \
sf += s7; \
s7 = OD_DCT_RSHIFT(sf, 1) - s7; \
s1 -= s6; \
s6 += OD_DCT_RSHIFT(s1, 1); \
s9 += se; \
se = OD_DCT_RSHIFT(s9, 1) - se; \
s2 += sa; \
sa = OD_DCT_RSHIFT(s2, 1) - sa; \
s5 += sd; \
sd -= OD_DCT_RSHIFT(s5, 1); \
s4 = sc - s4; \
sc -= OD_DCT_RSHIFT(s4, 1); \
s3 -= sb; \
sb += OD_DCT_RSHIFT(s3, 1); \
/* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(sf, 2799, 2048, 247); \
s0 -= (sf*2799 + 2048) >> 12; \
/* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s0, 2893, 1024, 248); \
sf += (s0*2893 + 1024) >> 11; \
/* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(sf, 5397, 4096, 249); \
s0 -= (sf*5397 + 4096) >> 13; \
/* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s1, 41, 32, 250); \
se += (s1*41 + 32) >> 6; \
/* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(se, 2865, 1024, 251); \
s1 -= (se*2865 + 1024) >> 11; \
/* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s1, 4641, 4096, 252); \
se += (s1*4641 + 4096) >> 13; \
/* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s2, 2473, 2048, 253); \
sd += (s2*2473 + 2048) >> 12; \
/* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(sd, 5619, 2048, 254); \
s2 -= (sd*5619 + 2048) >> 12; \
/* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s2, 7839, 8192, 255); \
sd += (s2*7839 + 8192) >> 14; \
/* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s3, 5747, 4096, 256); \
sc -= (s3*5747 + 4096) >> 13; \
/* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] ~= */ \
OD_DCT_OVERFLOW_CHECK(sc, 3903, 4096, 257); \
s3 += (sc*3903 + 4096) >> 13; \
/* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s3, 5701, 4096, 258); \
sc += (s3*5701 + 4096) >> 13; \
/* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s4, 4471, 4096, 259); \
sb += (s4*4471 + 4096) >> 13; \
/* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(sb, 1309, 512, 260); \
s4 -= (sb*1309 + 512) >> 10; \
/* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s4, 5067, 8192, 261); \
sb += (s4*5067 + 8192) >> 14; \
/* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s5, 2217, 2048, 262); \
sa -= (s5*2217 + 2048) >> 12; \
/* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] ~= 0.72705107329128 */ \
OD_DCT_OVERFLOW_CHECK(sa, 1489, 1024, 263); \
s5 += (sa*1489 + 1024) >> 11; \
/* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s5, 75, 128, 264); \
sa += (s5*75 + 128) >> 8; \
/* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s9, 2087, 2048, 265); \
s6 -= (s9*2087 + 2048) >> 12; \
/* 4653/4096 ~= Sqrt[2]*Sin[19*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s6, 4653, 2048, 266); \
s9 += (s6*4653 + 2048) >> 12; \
/* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s9, 4545, 16384, 267); \
s6 -= (s9*4545 + 16384) >> 15; \
/* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s8, 2053, 2048, 268); \
s7 += (s8*2053 + 2048) >> 12; \
/* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s7, 1945, 1024, 269); \
s8 -= (s7*1945 + 1024) >> 11; \
/* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
OD_DCT_OVERFLOW_CHECK(s8, 1651, 16384, 270); \
s7 -= (s8*1651 + 16384) >> 15; \
} \
while (0)
#define OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
s1, s9, s5, sd, s3, sb, s7, sf) \
/* Embedded 16-point orthonormal Type-IV iDST. */ \
do { \
int s0h; \
int s4h; \
int sbh; \
int sfh; \
/* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
se += (s1*1651 + 16384) >> 15; \
/* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
s1 += (se*1945 + 1024) >> 11; \
/* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
se -= (s1*2053 + 2048) >> 12; \
/* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
s6 += (s9*4545 + 16384) >> 15; \
/* 4653/32768 ~= Sqrt[2]*Sin[19*Pi/64] */ \
s9 -= (s6*4653 + 2048) >> 12; \
/* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
s6 += (s9*2087 + 2048) >> 12; \
/* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
s5 -= (sa*75 + 128) >> 8; \
/* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] */ \
sa -= (s5*1489 + 1024) >> 11; \
/* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
s5 += (sa*2217 + 2048) >> 12; \
/* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
sd -= (s2*5067 + 8192) >> 14; \
/* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
s2 += (sd*1309 + 512) >> 10; \
/* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
sd -= (s2*4471 + 4096) >> 13; \
/* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
s3 -= (sc*5701 + 4096) >> 13; \
/* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] */ \
sc -= (s3*3903 + 4096) >> 13; \
/* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
s3 += (sc*5747 + 4096) >> 13; \
/* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
sb -= (s4*7839 + 8192) >> 14; \
/* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
s4 += (sb*5619 + 2048) >> 12; \
/* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
sb -= (s4*2473 + 2048) >> 12; \
/* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
s7 -= (s8*4641 + 4096) >> 13; \
/* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
s8 += (s7*2865 + 1024) >> 11; \
/* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
s7 -= (s8*41 + 32) >> 6; \
/* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
s0 += (sf*5397 + 4096) >> 13; \
/* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
sf -= (s0*2893 + 1024) >> 11; \
/* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
s0 += (sf*2799 + 2048) >> 12; \
sd -= OD_DCT_RSHIFT(sc, 1); \
sc += sd; \
s3 += OD_DCT_RSHIFT(s2, 1); \
s2 = s3 - s2; \
sb += OD_DCT_RSHIFT(sa, 1); \
sa -= sb; \
s5 = OD_DCT_RSHIFT(s4, 1) - s5; \
s4 -= s5; \
s7 = OD_DCT_RSHIFT(s9, 1) - s7; \
s9 -= s7; \
s6 -= OD_DCT_RSHIFT(s8, 1); \
s8 += s6; \
se = OD_DCT_RSHIFT(sf, 1) - se; \
sf -= se; \
s0 += OD_DCT_RSHIFT(s1, 1); \
s1 -= s0; \
s5 -= s9; \
s9 += OD_DCT_RSHIFT(s5, 1); \
sa = s6 - sa; \
s6 -= OD_DCT_RSHIFT(sa, 1); \
se += s2; \
s2 -= OD_DCT_RSHIFT(se, 1); \
s1 = sd - s1; \
sd -= OD_DCT_RSHIFT(s1, 1); \
s0 += s3; \
s0h = OD_DCT_RSHIFT(s0, 1); \
s3 = s0h - s3; \
sf += sc; \
sfh = OD_DCT_RSHIFT(sf, 1); \
sc -= sfh; \
sb = s7 - sb; \
sbh = OD_DCT_RSHIFT(sb, 1); \
s7 -= sbh; \
s4 -= s8; \
s4h = OD_DCT_RSHIFT(s4, 1); \
s8 += s4h; \
/* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
se -= (s1*3227 + 16384) >> 15; \
/* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
s1 += (se*6393 + 16384) >> 15; \
/* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
se -= (s1*3227 + 16384) >> 15; \
/* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
s6 -= (s9*2485 + 4096) >> 13; \
/* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
s9 += (s6*4551 + 4096) >> 13; \
/* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
s6 -= (s9*2485 + 4096) >> 13; \
/* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
s5 -= (sa*8757 + 8192) >> 14; \
/* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
sa += (s5*6811 + 4096) >> 13; \
/* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
s5 -= (sa*8757 + 8192) >> 14; \
/* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
s2 -= (sd*6723 + 4096) >> 13; \
/* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
sd += (s2*16069 + 8192) >> 14; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
s2 -= (sd*6723 + 4096) >> 13; \
s9 += OD_DCT_RSHIFT(se, 1); \
se = s9 - se; \
s6 += OD_DCT_RSHIFT(s1, 1); \
s1 -= s6; \
sd = OD_DCT_RSHIFT(sa, 1) - sd; \
sa -= sd; \
s2 += OD_DCT_RSHIFT(s5, 1); \
s5 = s2 - s5; \
s3 -= sbh; \
sb += s3; \
sc += s4h; \
s4 = sc - s4; \
s8 = s0h - s8; \
s0 -= s8; \
s7 = sfh - s7; \
sf -= s7; \
/* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
s6 -= (s9*13573 + 16384) >> 15; \
/* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
s9 += (s6*11585 + 8192) >> 14; \
/* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
s6 -= (s9*13573 + 16384) >> 15; \
/* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
s5 -= (sa*13573 + 16384) >> 15; \
/* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
sa += (s5*11585 + 8192) >> 14; \
/* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
s5 -= (sa*13573 + 16384) >> 15; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
s3 -= (sc*3259 + 8192) >> 14; \
/* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
sc += (s3*3135 + 4096) >> 13; \
/* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
s3 -= (sc*3259 + 8192) >> 14; \
/* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
sb -= (s4*21895 + 16384) >> 15; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
s4 += (sb*15137 + 8192) >> 14; \
/* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
sb -= (s4*21895 + 16384) >> 15; \
/* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
s8 -= (s7*13573 + 16384) >> 15; \
/* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
s7 += (s8*11585 + 8192) >> 14; \
/* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
s8 -= (s7*13573 + 16384) >> 15; \
} \
while (0)
/* TODO: rewrite this to match OD_FDST_16. */
#define OD_FDST_16_ASYM(t0, t0h, t8, t4, t4h, tc, t2, ta, t6, te, \
t1, t9, t5, td, t3, tb, t7, t7h, tf) \
/* Embedded 16-point asymmetric Type-IV fDST. */ \
do { \
int t2h; \
int t3h; \
int t6h; \
int t8h; \
int t9h; \
int tch; \
int tdh; \
/* TODO: Can we move these into another operation */ \
t8 = -t8; \
t9 = -t9; \
ta = -ta; \
tb = -tb; \
td = -td; \
/* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 136); \
t1 -= (te*13573 + 8192) >> 14; \
/* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
OD_DCT_OVERFLOW_CHECK(t1, 11585, 16384, 137); \
te += (t1*11585 + 16384) >> 15; \
/* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 138); \
t1 -= (te*13573 + 8192) >> 14; \
/* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
OD_DCT_OVERFLOW_CHECK(td, 4161, 8192, 139); \
t2 += (td*4161 + 8192) >> 14; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(t2, 15137, 8192, 140); \
td -= (t2*15137 + 8192) >> 14; \
/* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
OD_DCT_OVERFLOW_CHECK(td, 14341, 8192, 141); \
t2 += (td*14341 + 8192) >> 14; \
/* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
OD_DCT_OVERFLOW_CHECK(t3, 14341, 8192, 142); \
tc -= (t3*14341 + 8192) >> 14; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(tc, 15137, 8192, 143); \
t3 += (tc*15137 + 8192) >> 14; \
/* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
OD_DCT_OVERFLOW_CHECK(t3, 4161, 8192, 144); \
tc -= (t3*4161 + 8192) >> 14; \
te = t0h - te; \
t0 -= te; \
tf = OD_DCT_RSHIFT(t1, 1) - tf; \
t1 -= tf; \
/* TODO: Can we move this into another operation */ \
tc = -tc; \
t2 = OD_DCT_RSHIFT(tc, 1) - t2; \
tc -= t2; \
t3 = OD_DCT_RSHIFT(td, 1) - t3; \
td = t3 - td; \
/* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
OD_DCT_OVERFLOW_CHECK(t6, 7489, 4096, 145); \
t9 -= (t6*7489 + 4096) >> 13; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 146); \
t6 += (t9*11585 + 8192) >> 14; \
/* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 147); \
t9 += (t6*19195 + 16384) >> 15; \
t8 += OD_DCT_RSHIFT(t9, 1); \
t9 -= t8; \
t6 = t7h - t6; \
t7 -= t6; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 148); \
t8 += (t7*6723 + 4096) >> 13; \
/* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
OD_DCT_OVERFLOW_CHECK(t8, 16069, 8192, 149); \
t7 -= (t8*16069 + 8192) >> 14; \
/* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 150); \
t8 += (t7*6723 + 4096) >> 13; \
/* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 151); \
t9 += (t6*17515 + 16384) >> 15; \
/* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
OD_DCT_OVERFLOW_CHECK(t9, 13623, 8192, 152); \
t6 -= (t9*13623 + 8192) >> 14; \
/* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 153); \
t9 += (t6*17515 + 16384) >> 15; \
/* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 154); \
t5 += (ta*13573 + 8192) >> 14; \
/* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 155); \
ta -= (t5*11585 + 16384) >> 15; \
/* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 156); \
t5 += (ta*13573 + 8192) >> 14; \
tb += OD_DCT_RSHIFT(t5, 1); \
t5 = tb - t5; \
ta += t4h; \
t4 -= ta; \
/* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 157); \
ta += (t5*2485 + 4096) >> 13; \
/* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
OD_DCT_OVERFLOW_CHECK(ta, 18205, 16384, 158); \
t5 -= (ta*18205 + 16384) >> 15; \
/* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 159); \
ta += (t5*2485 + 4096) >> 13; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 160); \
tb -= (t4*6723 + 4096) >> 13; \
/* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
OD_DCT_OVERFLOW_CHECK(tb, 16069, 8192, 161); \
t4 += (tb*16069 + 8192) >> 14; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 162); \
tb -= (t4*6723 + 4096) >> 13; \
/* TODO: Can we move this into another operation */ \
t5 = -t5; \
tc -= tf; \
tch = OD_DCT_RSHIFT(tc, 1); \
tf += tch; \
t3 += t0; \
t3h = OD_DCT_RSHIFT(t3, 1); \
t0 -= t3h; \
td -= t1; \
tdh = OD_DCT_RSHIFT(td, 1); \
t1 += tdh; \
t2 += te; \
t2h = OD_DCT_RSHIFT(t2, 1); \
te -= t2h; \
t8 += t4; \
t8h = OD_DCT_RSHIFT(t8, 1); \
t4 = t8h - t4; \
t7 = tb - t7; \
t7h = OD_DCT_RSHIFT(t7, 1); \
tb = t7h - tb; \
t6 -= ta; \
t6h = OD_DCT_RSHIFT(t6, 1); \
ta += t6h; \
t9 = t5 - t9; \
t9h = OD_DCT_RSHIFT(t9, 1); \
t5 -= t9h; \
t0 -= t7h; \
t7 += t0; \
tf += t8h; \
t8 -= tf; \
te -= t6h; \
t6 += te; \
t1 += t9h; \
t9 -= t1; \
tb -= tch; \
tc += tb; \
t4 += t3h; \
t3 -= t4; \
ta -= tdh; \
td += ta; \
t5 = t2h - t5; \
t2 -= t5; \
/* TODO: Can we move these into another operation */ \
t8 = -t8; \
t9 = -t9; \
ta = -ta; \
tb = -tb; \
tc = -tc; \
td = -td; \
tf = -tf; \
/* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 163); \
t0 -= (tf*7799 + 4096) >> 13; \
/* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
OD_DCT_OVERFLOW_CHECK(t0, 4091, 2048, 164); \
tf += (t0*4091 + 2048) >> 12; \
/* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 165); \
t0 -= (tf*7799 + 4096) >> 13; \
/* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 166); \
t1 += (te*2417 + 16384) >> 15; \
/* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
OD_DCT_OVERFLOW_CHECK(t1, 601, 2048, 167); \
te -= (t1*601 + 2048) >> 12; \
/* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 168); \
t1 += (te*2417 + 16384) >> 15; \
/* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
OD_DCT_OVERFLOW_CHECK(t8, 14525, 16384, 169); \
t7 -= (t8*14525 + 16384) >> 15; \
/* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
OD_DCT_OVERFLOW_CHECK(t7, 3035, 2048, 170); \
t8 += (t7*3035 + 2048) >> 12; \
/* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
OD_DCT_OVERFLOW_CHECK(t8, 7263, 8192, 171); \
t7 -= (t8*7263 + 8192) >> 14; \
/* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 172); \
t2 -= (td*6393 + 4096) >> 13; \
/* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
OD_DCT_OVERFLOW_CHECK(t2, 3973, 2048, 173); \
td += (t2*3973 + 2048) >> 12; \
/* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 174); \
t2 -= (td*6393 + 4096) >> 13; \
/* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 175); \
t5 -= (ta*9281 + 8192) >> 14; \
/* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
OD_DCT_OVERFLOW_CHECK(t5, 7027, 4096, 176); \
ta += (t5*7027 + 4096) >> 13; \
/* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 177); \
t5 -= (ta*9281 + 8192) >> 14; \
/* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 178); \
t3 -= (tc*11539 + 8192) >> 14; \
/* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
OD_DCT_OVERFLOW_CHECK(t3, 7713, 4096, 179); \
tc += (t3*7713 + 4096) >> 13; \
/* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 180); \
t3 -= (tc*11539 + 8192) >> 14; \
/* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 181); \
t4 -= (tb*10375 + 8192) >> 14; \
/* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
OD_DCT_OVERFLOW_CHECK(t4, 7405, 4096, 182); \
tb += (t4*7405 + 4096) >> 13; \
/* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 183); \
t4 -= (tb*10375 + 8192) >> 14; \
/* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 184); \
t6 -= (t9*8247 + 8192) >> 14; \
/* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
OD_DCT_OVERFLOW_CHECK(t6, 1645, 1024, 185); \
t9 += (t6*1645 + 1024) >> 11; \
/* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 186); \
t6 -= (t9*8247 + 8192) >> 14; \
} \
while (0)
#define OD_IDST_16_ASYM(t0, t0h, t8, t4, tc, t2, t2h, ta, t6, te, teh, \
t1, t9, t5, td, t3, tb, t7, tf) \
/* Embedded 16-point asymmetric Type-IV iDST. */ \
do { \
int t1h_; \
int t3h_; \
int t4h; \
int t6h; \
int t9h_; \
int tbh_; \
int tch; \
/* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
t6 += (t9*8247 + 8192) >> 14; \
/* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
t9 -= (t6*1645 + 1024) >> 11; \
/* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
t6 += (t9*8247 + 8192) >> 14; \
/* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
t2 += (td*10375 + 8192) >> 14; \
/* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
td -= (t2*7405 + 4096) >> 13; \
/* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
t2 += (td*10375 + 8192) >> 14; \
/* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
tc += (t3*11539 + 8192) >> 14; \
/* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
t3 -= (tc*7713 + 4096) >> 13; \
/* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
tc += (t3*11539 + 8192) >> 14; \
/* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
ta += (t5*9281 + 8192) >> 14; \
/* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
t5 -= (ta*7027 + 4096) >> 13; \
/* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
ta += (t5*9281 + 8192) >> 14; \
/* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
t4 += (tb*6393 + 4096) >> 13; \
/* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
tb -= (t4*3973 + 2048) >> 12; \
/* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
t4 += (tb*6393 + 4096) >> 13; \
/* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
te += (t1*7263 + 8192) >> 14; \
/* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
t1 -= (te*3035 + 2048) >> 12; \
/* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
te += (t1*14525 + 16384) >> 15; \
/* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
t8 -= (t7*2417 + 16384) >> 15; \
/* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
t7 += (t8*601 + 2048) >> 12; \
/* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
t8 -= (t7*2417 + 16384) >> 15; \
/* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
t0 += (tf*7799 + 4096) >> 13; \
/* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
tf -= (t0*4091 + 2048) >> 12; \
/* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
t0 += (tf*7799 + 4096) >> 13; \
/* TODO: Can we move these into another operation */ \
t1 = -t1; \
t3 = -t3; \
t5 = -t5; \
t9 = -t9; \
tb = -tb; \
td = -td; \
tf = -tf; \
t4 += ta; \
t4h = OD_DCT_RSHIFT(t4, 1); \
ta = t4h - ta; \
tb -= t5; \
tbh_ = OD_DCT_RSHIFT(tb, 1); \
t5 += tbh_; \
tc += t2; \
tch = OD_DCT_RSHIFT(tc, 1); \
t2 -= tch; \
t3 -= td; \
t3h_ = OD_DCT_RSHIFT(t3, 1); \
td += t3h_; \
t9 += t8; \
t9h_ = OD_DCT_RSHIFT(t9, 1); \
t8 -= t9h_; \
t6 -= t7; \
t6h = OD_DCT_RSHIFT(t6, 1); \
t7 += t6h; \
t1 += tf; \
t1h_ = OD_DCT_RSHIFT(t1, 1); \
tf -= t1h_; \
te -= t0; \
teh = OD_DCT_RSHIFT(te, 1); \
t0 += teh; \
ta += t9h_; \
t9 = ta - t9; \
t5 -= t6h; \
t6 += t5; \
td = teh - td; \
te = td - te; \
t2 = t1h_ - t2; \
t1 -= t2; \
t7 += t4h; \
t4 -= t7; \
t8 -= tbh_; \
tb += t8; \
t0 += tch; \
tc -= t0; \
tf -= t3h_; \
t3 += tf; \
/* TODO: Can we move this into another operation */ \
ta = -ta; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
td += (t2*6723 + 4096) >> 13; \
/* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
t2 -= (td*16069 + 8192) >> 14; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
td += (t2*6723 + 4096) >> 13; \
/* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
t5 -= (ta*2485 + 4096) >> 13; \
/* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
ta += (t5*18205 + 16384) >> 15; \
/* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
t5 -= (ta*2485 + 4096) >> 13; \
t2 += t5; \
t2h = OD_DCT_RSHIFT(t2, 1); \
t5 -= t2h; \
ta = td - ta; \
td -= OD_DCT_RSHIFT(ta, 1); \
/* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
ta -= (t5*13573 + 8192) >> 14; \
/* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
t5 += (ta*11585 + 16384) >> 15; \
/* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
ta -= (t5*13573 + 8192) >> 14; \
/* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
t9 -= (t6*17515 + 16384) >> 15; \
/* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
t6 += (t9*13623 + 8192) >> 14; \
/* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
t9 -= (t6*17515 + 16384) >> 15; \
/* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
t1 -= (te*6723 + 4096) >> 13; \
/* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
te += (t1*16069 + 8192) >> 14; \
/* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
t1 -= (te*6723 + 4096) >> 13; \
te += t6; \
teh = OD_DCT_RSHIFT(te, 1); \
t6 = teh - t6; \
t9 += t1; \
t1 -= OD_DCT_RSHIFT(t9, 1); \
/* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
t9 -= (t6*19195 + 16384) >> 15; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
t6 -= (t9*11585 + 8192) >> 14; \
/* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
t9 += (t6*7489 + 4096) >> 13; \
tb = tc - tb; \
tc = OD_DCT_RSHIFT(tb, 1) - tc; \
t3 += t4; \
t4 = OD_DCT_RSHIFT(t3, 1) - t4; \
/* TODO: Can we move this into another operation */ \
t3 = -t3; \
t8 += tf; \
tf = OD_DCT_RSHIFT(t8, 1) - tf; \
t0 += t7; \
t0h = OD_DCT_RSHIFT(t0, 1); \
t7 = t0h - t7; \
/* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
t3 += (tc*4161 + 8192) >> 14; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
tc -= (t3*15137 + 8192) >> 14; \
/* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
t3 += (tc*14341 + 8192) >> 14; \
/* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
t4 -= (tb*14341 + 8192) >> 14; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
tb += (t4*15137 + 8192) >> 14; \
/* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
t4 -= (tb*4161 + 8192) >> 14; \
/* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
t8 += (t7*13573 + 8192) >> 14; \
/* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
t7 -= (t8*11585 + 16384) >> 15; \
/* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
t8 += (t7*13573 + 8192) >> 14; \
/* TODO: Can we move these into another operation */ \
t1 = -t1; \
t5 = -t5; \
t9 = -t9; \
tb = -tb; \
td = -td; \
} \
while (0)
#define OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
/* Embedded 32-point orthonormal Type-II fDCT. */ \
do { \
int tgh; \
int thh; \
int tih; \
int tkh; \
int tmh; \
int tnh; \
int toh; \
int tqh; \
int tsh; \
int tuh; \
int tvh; \
tv = t0 - tv; \
tvh = OD_DCT_RSHIFT(tv, 1); \
t0 -= tvh; \
tu += t1; \
tuh = OD_DCT_RSHIFT(tu, 1); \
t1 = tuh - t1; \
tt = t2 - tt; \
t2 -= OD_DCT_RSHIFT(tt, 1); \
ts += t3; \
tsh = OD_DCT_RSHIFT(ts, 1); \
t3 = tsh - t3; \
tr = t4 - tr; \
t4 -= OD_DCT_RSHIFT(tr, 1); \
tq += t5; \
tqh = OD_DCT_RSHIFT(tq, 1); \
t5 = tqh - t5; \
tp = t6 - tp; \
t6 -= OD_DCT_RSHIFT(tp, 1); \
to += t7; \
toh = OD_DCT_RSHIFT(to, 1); \
t7 = toh - t7; \
tn = t8 - tn; \
tnh = OD_DCT_RSHIFT(tn, 1); \
t8 -= tnh; \
tm += t9; \
tmh = OD_DCT_RSHIFT(tm, 1); \
t9 = tmh - t9; \
tl = ta - tl; \
ta -= OD_DCT_RSHIFT(tl, 1); \
tk += tb; \
tkh = OD_DCT_RSHIFT(tk, 1); \
tb = tkh - tb; \
tj = tc - tj; \
tc -= OD_DCT_RSHIFT(tj, 1); \
ti += td; \
tih = OD_DCT_RSHIFT(ti, 1); \
td = tih - td; \
th = te - th; \
thh = OD_DCT_RSHIFT(th, 1); \
te -= thh; \
tg += tf; \
tgh = OD_DCT_RSHIFT(tg, 1); \
tf = tgh - tf; \
OD_FDCT_16_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh); \
OD_FDST_16_ASYM(tv, tvh, tf, tn, tnh, t7, tr, tb, tj, t3, \
tt, td, tl, t5, tp, t9, th, thh, t1); \
} \
while (0)
#define OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
/* Embedded 32-point orthonormal Type-II iDCT. */ \
do { \
int t1h; \
int t3h; \
int t5h; \
int t7h; \
int t9h; \
int tbh; \
int tdh; \
int tfh; \
int thh; \
int tth; \
int tvh; \
OD_IDST_16_ASYM(tv, tvh, tn, tr, tj, tt, tth, tl, tp, th, thh, \
tu, tm, tq, ti, ts, tk, to, tg); \
OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh); \
tu = t1h - tu; \
t1 -= tu; \
te += thh; \
th = te - th; \
tm = t9h - tm; \
t9 -= tm; \
t6 += OD_DCT_RSHIFT(tp, 1); \
tp = t6 - tp; \
tq = t5h - tq; \
t5 -= tq; \
ta += OD_DCT_RSHIFT(tl, 1); \
tl = ta - tl; \
ti = tdh - ti; \
td -= ti; \
t2 += tth; \
tt = t2 - tt; \
ts = t3h - ts; \
t3 -= ts; \
tc += OD_DCT_RSHIFT(tj, 1); \
tj = tc - tj; \
tk = tbh - tk; \
tb -= tk; \
t4 += OD_DCT_RSHIFT(tr, 1); \
tr = t4 - tr; \
to = t7h - to; \
t7 -= to; \
t8 += OD_DCT_RSHIFT(tn, 1); \
tn = t8 - tn; \
tg = tfh - tg; \
tf -= tg; \
t0 += tvh; \
tv = t0 - tv; \
} \
while (0)
/* Embedded 32-point orthonormal Type-IV fDST. */
#define OD_FDST_32(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, ta, tb, tc, td, \
te, tf, tg, th, ti, tj, tk, tl, tm, tn, to, tp, tq, tr, ts, tt, tu, tv) \
/* 117 "muls", 117 + 128 = 245 adds, 36 shifts */ \
do { \
od_coeff t0h; \
od_coeff t1h; \
od_coeff t2h; \
od_coeff t3h; \
od_coeff t4h; \
od_coeff t6h; \
od_coeff t8h; \
od_coeff t9h; \
od_coeff tah; \
od_coeff tbh; \
od_coeff tch; \
od_coeff tdh; \
od_coeff teh; \
od_coeff tfh; \
od_coeff tgh; \
od_coeff thh; \
od_coeff tih; \
od_coeff tjh; \
od_coeff tkh; \
od_coeff tlh; \
od_coeff tmh; \
od_coeff tnh; \
od_coeff tph; \
od_coeff trh; \
od_coeff tsh; \
od_coeff tth; \
od_coeff tuh; \
od_coeff tvh; \
/* Stage 0 */ \
tp += (t6*659 + 2048) >> 12; \
t6 -= (tp*10279 + 16384) >> 15; \
tp += (t6*659 + 2048) >> 12; \
th += (te*3045 + 4096) >> 13; \
te -= (th*21403 + 16384) >> 15; \
th += (te*3045 + 4096) >> 13; \
t9 += (tm*20191 + 16384) >> 15; \
tm -= (t9*29269 + 16384) >> 15; \
t9 += (tm*20191 + 16384) >> 15; \
tu += (t1*1207 + 16384) >> 15; \
t1 -= (tu*2411 + 16384) >> 15; \
tu += (t1*1207 + 16384) >> 15; \
t4 += (tr*13113 + 8192) >> 14; \
tr -= (t4*7993 + 4096) >> 13; \
t4 += (tr*13113 + 8192) >> 14; \
tj += (tc*10381 + 16384) >> 15; \
tc -= (tj*4717 + 4096) >> 13; \
tj += (tc*10381 + 16384) >> 15; \
tb += (tk*18035 + 16384) >> 15; \
tk -= (tb*6921 + 4096) >> 13; \
tb += (tk*18035 + 16384) >> 15; \
ts += (t3*1411 + 8192) >> 14; \
t3 -= (ts*2801 + 8192) >> 14; \
ts += (t3*1411 + 8192) >> 14; \
tq += (t5*2225 + 8192) >> 14; \
t5 -= (tq*2185 + 4096) >> 13; \
tq += (t5*2225 + 8192) >> 14; \
ti += (td*11273 + 16384) >> 15; \
td -= (ti*315 + 256) >> 9; \
ti += (td*11273 + 16384) >> 15; \
tl += (ta*8637 + 16384) >> 15; \
ta -= (tl*16151 + 16384) >> 15; \
tl += (ta*8637 + 16384) >> 15; \
tt += (t2*2013 + 16384) >> 15; \
t2 -= (tt*4011 + 16384) >> 15; \
tt += (t2*2013 + 16384) >> 15; \
to += (t7*6101 + 16384) >> 15; \
t7 -= (to*11793 + 16384) >> 15; \
to += (t7*6101 + 16384) >> 15; \
t8 += (tn*10659 + 8192) >> 14; \
tn -= (t8*29957 + 16384) >> 15; \
t8 += (tn*10659 + 8192) >> 14; \
tg += (tf*819 + 1024) >> 11; \
tf -= (tg*22595 + 16384) >> 15; \
tg += (tf*819 + 1024) >> 11; \
t0 += (tv*31973 + 16384) >> 15; \
tv -= (t0*16379 + 8192) >> 14; \
t0 += (tv*31973 + 16384) >> 15; \
/* Stage 1 */ \
tj -= ts; \
tjh = OD_DCT_RSHIFT(tj, 1); \
ts += tjh; \
tr = tk - tr; \
trh = OD_DCT_RSHIFT(tr, 1); \
tk = trh - tk; \
tc += t3; \
tch = OD_DCT_RSHIFT(tc, 1); \
t3 -= tch; \
t4 += tb; \
t4h = OD_DCT_RSHIFT(t4, 1); \
tb -= t4h; \
tv += tf; \
tvh = OD_DCT_RSHIFT(tv, 1); \
tf -= tvh; \
t8 -= to; \
t8h = OD_DCT_RSHIFT(t8, 1); \
to += t8h; \
t0 += tg; \
t0h = OD_DCT_RSHIFT(t0, 1); \
tg -= t0h; \
tn = t7 - tn; \
tnh = OD_DCT_RSHIFT(tn, 1); \
t7 -= tnh; \
th -= tu; \
thh = OD_DCT_RSHIFT(th, 1); \
tu += thh; \
t6 += tm; \
t6h = OD_DCT_RSHIFT(t6, 1); \
tm = t6h - tm; \
te += t1; \
teh = OD_DCT_RSHIFT(te, 1); \
t1 -= teh; \
tp += t9; \
tph = OD_DCT_RSHIFT(tp, 1); \
t9 -= tph; \
t2 -= td; \
t2h = OD_DCT_RSHIFT(t2, 1); \
td += t2h; \
tl = tq - tl; \
tlh = OD_DCT_RSHIFT(tl, 1); \
tq -= tlh; \
tt += ti; \
tth = OD_DCT_RSHIFT(tt, 1); \
ti -= tth; \
ta += t5; \
tah = OD_DCT_RSHIFT(ta, 1); \
t5 -= tah; \
/* Stage 2 */ \
tm -= thh; \
th += tm; \
t9 = teh - t9; \
te -= t9; \
td = tlh - td; \
tl -= td; \
ti += tah; \
ta -= ti; \
tk = tjh - tk; \
tj -= tk; \
tb -= tch; \
tc += tb; \
tg += tnh; \
tn = tg - tn; \
tf += t8h; \
t8 = tf - t8; \
t3 -= trh; \
tr += t3; \
ts += t4h; \
t4 -= ts; \
to -= t0h; \
t0 += to; \
t7 = tvh - t7; \
tv = t7 - tv; \
t1 -= t6h; \
t6 += t1; \
tu += tph; \
tp -= tu; \
tq -= tth; \
tt += tq; \
t5 += t2h; \
t2 -= t5; \
/* Stage 3 */ \
tj += (tc*11725 + 16384) >> 15; \
tc -= (tj*5197 + 4096) >> 13; \
tj += (tc*11725 + 16384) >> 15; \
td += (ti*513 + 1024) >> 11; \
ti -= (td*15447 + 16384) >> 15; \
td += (ti*513 + 1024) >> 11; \
th += (te*4861 + 16384) >> 15; \
te -= (th*1189 + 2048) >> 12; \
th += (te*4861 + 16384) >> 15; \
tg += (tf*805 + 8192) >> 14; \
tf -= (tg*803 + 4096) >> 13; \
tg += (tf*805 + 8192) >> 14; \
tb += (tk*7749 + 8192) >> 14; \
tk -= (tb*12665 + 8192) >> 14; \
tb += (tk*7749 + 8192) >> 14; \
tl += (ta*2455 + 2048) >> 12; \
ta -= (tl*28899 + 16384) >> 15; \
tl += (ta*2455 + 2048) >> 12; \
t9 += (tm*12151 + 8192) >> 14; \
tm -= (t9*31357 + 16384) >> 15; \
t9 += (tm*12151 + 8192) >> 14; \
tn += (t8*29699 + 16384) >> 15; \
t8 -= (tn*16305 + 8192) >> 14; \
tn += (t8*29699 + 16384) >> 15; \
/* Stage 4 */ \
tf -= tc; \
tfh = OD_DCT_RSHIFT(tf, 1); \
tc += tfh; \
ti = th - ti; \
tih = OD_DCT_RSHIFT(ti, 1); \
th -= tih; \
tg += tj; \
tgh = OD_DCT_RSHIFT(tg, 1); \
tj = tgh - tj; \
td -= te; \
tdh = OD_DCT_RSHIFT(td, 1); \
te += tdh; \
tm = ta - tm; \
tmh = OD_DCT_RSHIFT(tm, 1); \
ta = tmh - ta; \
t9 += tl; \
t9h = OD_DCT_RSHIFT(t9, 1); \
tl -= t9h; \
tb += t8; \
tbh = OD_DCT_RSHIFT(tb, 1); \
t8 -= tbh; \
tk += tn; \
tkh = OD_DCT_RSHIFT(tk, 1); \
tn -= tkh; \
t1 -= t2; \
t1h = OD_DCT_RSHIFT(t1, 1); \
t2 += t1h; \
t3 += tv; \
t3h = OD_DCT_RSHIFT(t3, 1); \
tv -= t3h; \
tu += tt; \
tuh = OD_DCT_RSHIFT(tu, 1); \
tt -= tuh; \
ts -= t0; \
tsh = OD_DCT_RSHIFT(ts, 1); \
t0 += tsh; \
tq = t6 - tq; \
t6 -= OD_DCT_RSHIFT(tq, 1); \
to += tr; \
tr = OD_DCT_RSHIFT(to, 1) - tr; \
t7 = t4 - t7; \
t4 -= OD_DCT_RSHIFT(t7, 1); \
t5 -= tp; \
tp += OD_DCT_RSHIFT(t5, 1); \
/* Stage 5 */ \
tp += (t6*2485 + 4096) >> 13; \
t6 -= (tp*18205 + 16384) >> 15; \
tp += (t6*2485 + 4096) >> 13; \
to += (t7*3227 + 16384) >> 15; \
t7 -= (to*6393 + 16384) >> 15; \
to += (t7*3227 + 16384) >> 15; \
tq += (t5*17515 + 16384) >> 15; \
t5 -= (tq*13623 + 8192) >> 14; \
tq += (t5*17515 + 16384) >> 15; \
t4 += (tr*6723 + 4096) >> 13; \
tr -= (t4*16069 + 8192) >> 14; \
t4 += (tr*6723 + 4096) >> 13; \
/* Stage 6 */ \
tj += tdh; \
td -= tj; \
tc -= tih; \
ti += tc; \
th = tgh - th; \
tg -= th; \
te += tfh; \
tf -= te; \
tl = tkh - tl; \
tk -= tl; \
ta += tbh; \
tb -= ta; \
tn -= tmh; \
tm += tn; \
t8 += t9h; \
t9 = t8 - t9; \
tt = t3h - tt; \
t3 -= tt; \
t2 -= tsh; \
ts += t2; \
tv -= t1h; \
t1 += tv; \
t0 += tuh; \
tu -= t0; \
tp = OD_DCT_RSHIFT(to, 1) - tp; \
to -= tp; \
t6 += OD_DCT_RSHIFT(t7, 1); \
t7 -= t6; \
t4 = OD_DCT_RSHIFT(tq, 1) - t4; \
tq -= t4; \
tr += OD_DCT_RSHIFT(t5, 1); \
t5 = tr - t5; \
/* Stage 7 */ \
td += (ti*21894 + 16384) >> 15; \
ti -= (td*15137 + 8192) >> 14; \
td += (ti*21895 + 16384) >> 15; \
tj += (tc*21894 + 16384) >> 15; \
tc -= (tj*15137 + 8192) >> 14; \
tj += (tc*21895 + 16384) >> 15; \
th += (te*13573 + 16384) >> 15; \
te -= (th*11585 + 8192) >> 14; \
th += (te*13573 + 16384) >> 15; \
tb += (tk*21894 + 16384) >> 15; \
tk -= (tb*15137 + 8192) >> 14; \
tb += (tk*21895 + 16384) >> 15; \
ta += (tl*3259 + 8192) >> 14; \
tl -= (ta*3135 + 4096) >> 13; \
ta += (tl*3259 + 8192) >> 14; \
t9 += (tm*13573 + 16384) >> 15; \
tm -= (t9*11585 + 8192) >> 14; \
t9 += (tm*13573 + 16384) >> 15; \
ts += (t3*3259 + 8192) >> 14; \
t3 -= (ts*3135 + 4096) >> 13; \
ts += (t3*3259 + 8192) >> 14; \
t2 += (tt*3259 + 8192) >> 14; \
tt -= (t2*3135 + 4096) >> 13; \
t2 += (tt*3259 + 8192) >> 14; \
tu += (t1*13573 + 16384) >> 15; \
t1 -= (tu*11585 + 8192) >> 14; \
tu += (t1*13573 + 16384) >> 15; \
tp += (t6*13573 + 16384) >> 15; \
t6 -= (tp*11585 + 8192) >> 14; \
tp += (t6*13573 + 16384) >> 15; \
tq += (t5*13573 + 16384) >> 15; \
t5 -= (tq*11585 + 8192) >> 14; \
tq += (t5*13573 + 16384) >> 15; \
} \
while (0)
/* Embedded 32-point orthonormal Type-IV iDST. */
#define OD_IDST_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
/* 117 "muls", 117 + 128 = 245 adds, 36 shifts */ \
do { \
od_coeff t0h; \
od_coeff t1h; \
od_coeff t2h; \
od_coeff t3h; \
od_coeff t4h; \
od_coeff t6h; \
od_coeff t8h; \
od_coeff t9h; \
od_coeff tah; \
od_coeff tbh; \
od_coeff tch; \
od_coeff tdh; \
od_coeff teh; \
od_coeff tfh; \
od_coeff tgh; \
od_coeff thh; \
od_coeff tih; \
od_coeff tjh; \
od_coeff tkh; \
od_coeff tlh; \
od_coeff tmh; \
od_coeff tnh; \
od_coeff tph; \
od_coeff trh; \
od_coeff tsh; \
od_coeff tth; \
od_coeff tuh; \
od_coeff tvh; \
/* Stage 0 */ \
tq -= (t5*13573 + 16384) >> 15; \
t5 += (tq*11585 + 8192) >> 14; \
tq -= (t5*13573 + 16384) >> 15; \
tp -= (t6*13573 + 16384) >> 15; \
t6 += (tp*11585 + 8192) >> 14; \
tp -= (t6*13573 + 16384) >> 15; \
tu -= (t1*13573 + 16384) >> 15; \
t1 += (tu*11585 + 8192) >> 14; \
tu -= (t1*13573 + 16384) >> 15; \
t2 -= (tt*3259 + 8192) >> 14; \
tt += (t2*3135 + 4096) >> 13; \
t2 -= (tt*3259 + 8192) >> 14; \
ts -= (t3*3259 + 8192) >> 14; \
t3 += (ts*3135 + 4096) >> 13; \
ts -= (t3*3259 + 8192) >> 14; \
t9 -= (tm*13573 + 16384) >> 15; \
tm += (t9*11585 + 8192) >> 14; \
t9 -= (tm*13573 + 16384) >> 15; \
ta -= (tl*3259 + 8192) >> 14; \
tl += (ta*3135 + 4096) >> 13; \
ta -= (tl*3259 + 8192) >> 14; \
tb -= (tk*21895 + 16384) >> 15; \
tk += (tb*15137 + 8192) >> 14; \
tb -= (tk*21894 + 16384) >> 15; \
th -= (te*13573 + 16384) >> 15; \
te += (th*11585 + 8192) >> 14; \
th -= (te*13573 + 16384) >> 15; \
tj -= (tc*21895 + 16384) >> 15; \
tc += (tj*15137 + 8192) >> 14; \
tj -= (tc*21894 + 16384) >> 15; \
td -= (ti*21895 + 16384) >> 15; \
ti += (td*15137 + 8192) >> 14; \
td -= (ti*21894 + 16384) >> 15; \
/* Stage 1 */ \
t5 = tr - t5; \
tr -= OD_DCT_RSHIFT(t5, 1); \
tq += t4; \
t4 = OD_DCT_RSHIFT(tq, 1) - t4; \
t7 += t6; \
t6 -= OD_DCT_RSHIFT(t7, 1); \
to += tp; \
tp = OD_DCT_RSHIFT(to, 1) - tp; \
tu += t0; \
tuh = OD_DCT_RSHIFT(tu, 1); \
t0 -= tuh; \
t1 -= tv; \
t1h = OD_DCT_RSHIFT(t1, 1); \
tv += t1h; \
ts -= t2; \
tsh = OD_DCT_RSHIFT(ts, 1); \
t2 += tsh; \
t3 += tt; \
t3h = OD_DCT_RSHIFT(t3, 1); \
tt = t3h - tt; \
t9 = t8 - t9; \
t9h = OD_DCT_RSHIFT(t9, 1); \
t8 -= t9h; \
tm -= tn; \
tmh = OD_DCT_RSHIFT(tm, 1); \
tn += tmh; \
tb += ta; \
tbh = OD_DCT_RSHIFT(tb, 1); \
ta -= tbh; \
tk += tl; \
tkh = OD_DCT_RSHIFT(tk, 1); \
tl = tkh - tl; \
tf += te; \
tfh = OD_DCT_RSHIFT(tf, 1); \
te -= tfh; \
tg += th; \
tgh = OD_DCT_RSHIFT(tg, 1); \
th = tgh - th; \
ti -= tc; \
tih = OD_DCT_RSHIFT(ti, 1); \
tc += tih; \
td += tj; \
tdh = OD_DCT_RSHIFT(td, 1); \
tj -= tdh; \
/* Stage 2 */ \
t4 -= (tr*6723 + 4096) >> 13; \
tr += (t4*16069 + 8192) >> 14; \
t4 -= (tr*6723 + 4096) >> 13; \
tq -= (t5*17515 + 16384) >> 15; \
t5 += (tq*13623 + 8192) >> 14; \
tq -= (t5*17515 + 16384) >> 15; \
to -= (t7*3227 + 16384) >> 15; \
t7 += (to*6393 + 16384) >> 15; \
to -= (t7*3227 + 16384) >> 15; \
tp -= (t6*2485 + 4096) >> 13; \
t6 += (tp*18205 + 16384) >> 15; \
tp -= (t6*2485 + 4096) >> 13; \
/* Stage 3 */ \
tp -= OD_DCT_RSHIFT(t5, 1); \
t5 += tp; \
t4 += OD_DCT_RSHIFT(t7, 1); \
t7 = t4 - t7; \
tr = OD_DCT_RSHIFT(to, 1) - tr; \
to -= tr; \
t6 += OD_DCT_RSHIFT(tq, 1); \
tq = t6 - tq; \
t0 -= tsh; \
ts += t0; \
tt += tuh; \
tu -= tt; \
tv += t3h; \
t3 -= tv; \
t2 -= t1h; \
t1 += t2; \
tn += tkh; \
tk -= tn; \
t8 += tbh; \
tb -= t8; \
tl += t9h; \
t9 -= tl; \
ta = tmh - ta; \
tm = ta - tm; \
te -= tdh; \
td += te; \
tj = tgh - tj; \
tg -= tj; \
th += tih; \
ti = th - ti; \
tc -= tfh; \
tf += tc; \
/* Stage 4 */ \
tn -= (t8*29699 + 16384) >> 15; \
t8 += (tn*16305 + 8192) >> 14; \
tn -= (t8*29699 + 16384) >> 15; \
t9 -= (tm*12151 + 8192) >> 14; \
tm += (t9*31357 + 16384) >> 15; \
t9 -= (tm*12151 + 8192) >> 14; \
tl -= (ta*2455 + 2048) >> 12; \
ta += (tl*28899 + 16384) >> 15; \
tl -= (ta*2455 + 2048) >> 12; \
tb -= (tk*7749 + 8192) >> 14; \
tk += (tb*12665 + 8192) >> 14; \
tb -= (tk*7749 + 8192) >> 14; \
tg -= (tf*805 + 8192) >> 14; \
tf += (tg*803 + 4096) >> 13; \
tg -= (tf*805 + 8192) >> 14; \
th -= (te*4861 + 16384) >> 15; \
te += (th*1189 + 2048) >> 12; \
th -= (te*4861 + 16384) >> 15; \
td -= (ti*513 + 1024) >> 11; \
ti += (td*15447 + 16384) >> 15; \
td -= (ti*513 + 1024) >> 11; \
tj -= (tc*11725 + 16384) >> 15; \
tc += (tj*5197 + 4096) >> 13; \
tj -= (tc*11725 + 16384) >> 15; \
/* Stage 5 */ \
t2 += t5; \
t2h = OD_DCT_RSHIFT(t2, 1); \
t5 -= t2h; \
tt -= tq; \
tth = OD_DCT_RSHIFT(tt, 1); \
tq += tth; \
tp += tu; \
tph = OD_DCT_RSHIFT(tp, 1); \
tu -= tph; \
t6 -= t1; \
t6h = OD_DCT_RSHIFT(t6, 1); \
t1 += t6h; \
tv = t7 - tv; \
tvh = OD_DCT_RSHIFT(tv, 1); \
t7 = tvh - t7; \
t0 -= to; \
t0h = OD_DCT_RSHIFT(t0, 1); \
to += t0h; \
t4 +=