daala_tx: New flattened 16-point Type-IV DST.
Change-Id: Ic741f269d0bd5e5e295b55f95bfef05050bc31e5
diff --git a/av1/common/daala_tx.c b/av1/common/daala_tx.c
index 45b7057..4c4f435 100644
--- a/av1/common/daala_tx.c
+++ b/av1/common/daala_tx.c
@@ -2119,6 +2119,471 @@
} \
while (0)
+#define OD_FDST_16_FLAT(s0, s8, s4, sc, s2, sa, s6, se, \
+ s1, s9, s5, sd, s3, sb, s7, sf) \
+ /* Embedded 16-point orthonormal Type-IV fDST with flattened rotations. */ \
+ do { \
+ int t_; \
+ int u_; \
+ int s0h; \
+ int s4h; \
+ int sbh; \
+ int sfh; \
+ t_ = s1 + se; \
+ /* 32729/32768 ~= (Sin[17*Pi/64] + Cos[17*Pi/64])/Sqrt[2] ~=
+ 0.9987954562051723 */ \
+ u_ = (se*32729 + 16384) >> 15; \
+ /* 201/2048 ~= (Sin[17*Pi/64] - Cos[17*Pi/64])*Sqrt[2] ~=
+ 0.09813534865483615 */ \
+ se = (s1*201 + 1024) >> 11; \
+ /* 31121/32768 ~= Cos[17*Pi/64]*Sqrt[2] = 0.9497277818777543 */ \
+ t_ = (t_*31121 + 16384) >> 15; \
+ se += t_; \
+ s1 = u_ - OD_RSHIFT1(t_); \
+ t_ = s6 - s9; \
+ /* 32413/32768 ~= (Sin[19*Pi/64] + Cos[19*Pi/64])/Sqrt[2] ~=
+ 0.9891765099647809 */ \
+ u_ = (s9*32413 + 16384) >> 15; \
+ /* 601/2048 ~= (Sin[19*Pi/64] - Cos[19*Pi/64])*Sqrt[2]
+ ~= 0.29346094891072355 */ \
+ s9 = (s6*601 + 1024) >> 11; \
+ /* 27605/32768 ~= Cos[19*Pi/64]*Sqrt[2] = 0.8424460355094193 */ \
+ t_ = (t_*27605 + 16384) >> 15; \
+ s9 += t_; \
+ s6 = u_ + OD_RSHIFT1(t_); \
+ t_ = s5 + sa; \
+ /* 15893/16384 ~= (Sin[21*Pi/64] + Cos[21*Pi/64])/Sqrt[2] ~=
+ 0.970031253194544 */ \
+ u_ = (sa*15893 + 8192) >> 14; \
+ /* 3981/8192 ~= (Sin[21*Pi/64] - Cos[21*Pi/64])*Sqrt[2] ~=
+ 0.48596035980652796 */ \
+ sa = (s5*3981 + 4096) >> 13; \
+ /* 1489/2048 ~= Cos[21*Pi/64]*Sqrt[2] ~= 0.72705107329128 */ \
+ t_ = (t_*1489 + 1024) >> 11; \
+ sa += t_; \
+ s5 = OD_RSHIFT1(t_) - u_; \
+ t_ = sd - s2; \
+ /* 30853/32768 ~= (Sin[23*Pi/64] + Cos[23*Pi/64])/Sqrt[2] ~=
+ 0.9415440651830208 */ \
+ u_ = (sd*30853 + 16384) >> 15; \
+ /* 11039/16384 ~= (Sin[23*Pi/64] - Cos[23*Pi/64])*Sqrt[2] ~=
+ 0.6737797067844402 */ \
+ sd = (s2*11039 + 8192) >> 14; \
+ /* 19813/32768 ~= Cos[23*Pi/64]*Sqrt[2] ~= 0.6046542117908008 */ \
+ t_ = (t_*19813 + 16384) >> 15; \
+ sd -= t_; \
+ s2 = OD_RSHIFT1(t_) - u_; \
+ t_ = s3 + sc; \
+ /* 14811/16384 ~= (Sin[25*Pi/64] + Cos[25*Pi/64])/Sqrt[2] ~=
+ 0.9039892931234433 */ \
+ u_ = (sc*14811 + 8192) >> 14; \
+ /* 7005/8192 ~= (Sin[25*Pi/64] - Cos[25*Pi/64])*Sqrt[2] ~=
+ 0.8551101868605642 */ \
+ sc = (s3*7005 + 4096) >> 13; \
+ /* 3903/8192 ~= Cos[25*Pi/64]*Sqrt[2] ~= 0.47643419969316125 */ \
+ t_ = (t_*3903 + 4096) >> 13; \
+ sc += t_; \
+ s3 = u_ - OD_RSHIFT1(t_); \
+ t_ = sb - s4; \
+ /* 14053/16384 ~= (Sin[27*Pi/64] + Cos[27*Pi/64])/Sqrt[2] ~=
+ 0.857728610000272 */ \
+ u_ = (sb*14053 + 8192) >> 14; \
+ /* 8423/8192 ~= (Sin[27*Pi/64] - Cos[27*Pi/64])*Sqrt[2] ~=
+ 1.0282054883864435 */ \
+ sb = (s4*8423 + 4096) >> 13; \
+ /* 2815/8192 ~= Cos[27*Pi/64]*Sqrt[2] = 0.34362586580705035 */ \
+ t_ = (t_*2815 + 4096) >> 13; \
+ sb -= t_; \
+ s4 = OD_RSHIFT1(t_) - u_; \
+ t_ = s7 + s8; \
+ /* 1645/2048 ~= (Sin[29*Pi/64] + Cos[29*Pi/64])/Sqrt[2] ~=
+ 0.8032075314806449 */ \
+ u_ = (s8*1645 + 1024) >> 11; \
+ /* 305/256 ~= (Sin[29*Pi/64] - Cos[29*Pi/64])*Sqrt[2] ~=
+ 1.1913986089848667 */ \
+ s8 = (s7*305 + 128) >> 8; \
+ /* 425/2048 ~= Cos[29*Pi/64]*Sqrt[2] ~= 0.20750822698821159 */ \
+ t_ = (t_*425 + 1024) >> 11; \
+ s8 += t_; \
+ s7 = u_ - OD_RSHIFT1(t_); \
+ t_ = s0 - sf; \
+ /* 24279/32768 ~= (Sin[31*Pi/64] + Cos[31*Pi/64])/Sqrt[2] ~=
+ 0.7409511253549591 */ \
+ u_ = (sf*24279 + 16384) >> 15; \
+ /* 44011/32768 ~= (Sin[31*Pi/64] - Cos[31*Pi/64])*Sqrt[2] ~=
+ 1.3431179096940369 */ \
+ sf = (s0*44011 + 16384) >> 15; \
+ /* 1137/16384 ~= Cos[31*Pi/64]*Sqrt[2] ~= 0.0693921705079406 */ \
+ t_ = (t_*1137 + 8192) >> 14; \
+ s0 = u_ + OD_RSHIFT1(t_); \
+ sf += t_; \
+ s3 -= OD_RSHIFT1(sd); \
+ sd += s3; \
+ s2 += OD_RSHIFT1(sc); \
+ sc -= s2; \
+ s5 -= OD_RSHIFT1(sb); \
+ sb += s5; \
+ s4 -= OD_RSHIFT1(sa); \
+ sa += s4; \
+ s1 += OD_RSHIFT1(sf); \
+ sf -= s1; \
+ s7 -= OD_RSHIFT1(s9); \
+ s9 += s7; \
+ s6 -= OD_RSHIFT1(s8); \
+ s8 += s6; \
+ s0 += OD_RSHIFT1(se); \
+ se -= s0; \
+ sa -= s9; \
+ s9 += OD_RSHIFT1(sa); \
+ s5 += s6; \
+ s6 -= OD_RSHIFT1(s5); \
+ s1 -= s2; \
+ s2 += OD_RSHIFT1(s1); \
+ se += sd; \
+ sd -= OD_RSHIFT1(se); \
+ s0 += sc; \
+ s0h = OD_RSHIFT1(s0); \
+ sc -= s0h; \
+ sf -= s3; \
+ sfh = OD_RSHIFT1(sf); \
+ s3 += sfh; \
+ sb += s7; \
+ sbh = OD_RSHIFT1(sb); \
+ s7 -= sbh; \
+ s4 += s8; \
+ s4h = OD_RSHIFT1(s4); \
+ s8 -= s4h; \
+ t_ = OD_PAVG(s1, se); \
+ /* 9633/8192 ~= Sin[7*Pi/16] + Cos[7*Pi/16] ~= 1.1758756024193586 */ \
+ u_ = (s1*9633 + 4096) >> 13; \
+ /* 12873/16384 ~= Sin[7*Pi/16] - Cos[7*Pi/16] ~= 0.7856949583871022 */ \
+ s1 = (se*12873 + 8192) >> 14; \
+ /* 12785/32768 ~= 2*Cos[7*Pi/16] ~= 0.3901806440322565 */ \
+ t_ = (t_*12785 + 16384) >> 15; \
+ s1 += t_; \
+ se = u_ - t_; \
+ t_ = s6 + s9; \
+ /* 45451/32768 ~= Sin[5*Pi/16] + Cos[5*Pi/16] ~= 1.3870398453221475 */ \
+ u_ = (s9*45451 + 16384) >> 15; \
+ /* 9041/32768 ~= Sin[5*Pi/16] - Cos[5*Pi/16] ~= 0.27589937928294306 */ \
+ s9 = (s6*9041 + 16384) >> 15; \
+ /* 18205/32768 ~= Cos[5*Pi/16] ~= 0.5555702330196022 */ \
+ t_ = (t_*18205 + 16384) >> 15; \
+ s9 += t_; \
+ s6 = u_ - t_; \
+ t_ = OD_PAVG(s5, sa); \
+ /* 22725/16384 ~= Sin[5*Pi/16] + Cos[5*Pi/16] ~= 1.3870398453221475 */ \
+ u_ = (sa*22725 + 8192) >> 14; \
+ /* 9041/32768 ~= Sin[5*Pi/16] - Cos[5*Pi/16] ~= 0.27589937928294306 */ \
+ sa = (s5*9041 + 16384) >> 15; \
+ /* 18205/16384 ~= 2*Cos[5*Pi/16] ~= 1.1111404660392044 */ \
+ t_ = (t_*18205 + 8192) >> 14; \
+ sa += t_; \
+ s5 = t_ - u_; \
+ t_ = s2 + sd; \
+ /* 38531/32768 ~= Sin[7*Pi/16] + Cos[7*Pi/16] ~= 1.1758756024193586 */ \
+ u_ = (s2*38531 + 16384) >> 15; \
+ /* 12873/16384 ~= Sin[7*Pi/16] - Cos[7*Pi/16] ~= 0.7856949583871022 */ \
+ s2 = (sd*12873 + 8192) >> 14; \
+ /* 6393/32768 ~= Cos[7*Pi/16] ~= 0.19509032201612825 */ \
+ t_ = (t_*6393 + 16384) >> 15; \
+ s2 += t_; \
+ sd = u_ - t_; \
+ s3 -= s4h; \
+ s4 += s3; \
+ s8 -= s0h; \
+ s0 += s8; \
+ s7 += sfh; \
+ sf -= s7; \
+ sc += sbh; \
+ sb -= sc; \
+ s6 += OD_RSHIFT1(se) ;\
+ se -= s6; \
+ s9 -= OD_RSHIFT1(s1); \
+ s1 += s9; \
+ sd -= OD_RSHIFT1(s5); \
+ s5 += sd; \
+ s2 -= OD_RSHIFT1(sa); \
+ sa += s2; \
+ t_ = OD_PAVG(s3, sc); \
+ /* 21407/16384 ~= Sin[3*Pi/8] + Cos[3*Pi/8] ~= 1.3065629648763766 */ \
+ u_ = (s3*21407 + 8192) >> 14; \
+ /* 8867/16384 ~= Sin[3*Pi/8] - Cos[3*Pi/8] ~= 0.5411961001461969 */ \
+ s3 = (sc*8867 + 8192) >> 14; \
+ /* 3135/4096 ~= 2*Cos[3*Pi/8] ~= 0.7653668647301796 */ \
+ t_ = (t_*3135 + 2048) >> 12; \
+ s3 += t_; \
+ sc = u_ - t_; \
+ t_ = OD_PAVG(s4, sb); \
+ /* 21407/16384 ~= Sin[3*Pi/8] + Cos[3*Pi/8] ~= 1.3065629648763766 */ \
+ u_ = (s4*21407 + 8192) >> 14; \
+ /* 8867/16384 ~= Sin[3*Pi/8] - Cos[3*Pi/8] ~= 0.5411961001461969 */ \
+ s4 = (sb*8867 + 8192) >> 14; \
+ /* 3135/4096 ~= 2*Cos[3*Pi/8] ~= 0.7653668647301796 */ \
+ t_ = (t_*3134 + 2048) >> 12; \
+ s4 += t_; \
+ sb = u_ - t_; \
+ t_ = OD_PAVG(s5, sa); \
+ /* 11585/8192 ~= Sin[Pi/4] + Cos[Pi/4] ~= 1.4142135623730951 */ \
+ u_ = (sa*11585 + 4096) >> 13; \
+ /* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
+ sa = (t_*11585 + 4096) >> 13; \
+ s5 = sa - u_; \
+ t_ = OD_PAVG(s6, -s9); \
+ /* 11585/8192 ~= Sin[Pi/4] + Cos[Pi/4] ~= 1.4142135623730951 */ \
+ s6 = (s9*11585 + 4096) >> 13; \
+ /* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
+ s9 = (t_*11585 + 4096) >> 13; \
+ s6 += s9; \
+ t_ = OD_PAVG(s7, -s8); \
+ /* 11585/8192 ~= Sin[Pi/4] + Cos[Pi/4] ~= 1.4142135623730951 */ \
+ s7 = (s8*11585 + 4096) >> 13; \
+ /* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
+ s8 = (t_*11585 + 4096) >> 13; \
+ s7 += s8; \
+ } \
+ while (0)
+
+#define OD_IDST_16_FLAT(s0, s1, s2, s3, s4, s5, s6, s7, \
+ s8, s9, sa, sb, sc, sd, se, sf) \
+ /* Embedded 16-point orthonormal Type-IV iDST with flattened rotations. */ \
+ do { \
+ int t_; \
+ int u_; \
+ int s0h; \
+ int s1h; \
+ int s2h; \
+ int s3h; \
+ int s4h; \
+ int s5h; \
+ int s6h; \
+ int s7h; \
+ int sbh; \
+ int sfh; \
+ t_ = OD_PAVG(s6, s9); \
+ /* 11585/8192 ~= Sin[Pi/4] + Cos[Pi/4] ~= 1.4142135623730951 */ \
+ s9 = (s6*11585 + 4096) >> 13; \
+ /* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
+ s6 = (t_*11585 + 4096) >> 13; \
+ s9 -= s6; \
+ t_ = OD_PAVG(s5, sa); \
+ /* 11585/8192 ~= Sin[Pi/4] + Cos[Pi/4] ~= 1.4142135623730951 */ \
+ sa = (s5*11585 + 4096) >> 13; \
+ /* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
+ s5 = (t_*11585 + 4096) >> 13; \
+ sa -= s5; \
+ t_ = OD_PAVG(s7, s8); \
+ /* 11585/8192 ~= Sin[Pi/4] + Cos[Pi/4] ~= 1.4142135623730951 */ \
+ s8 = (s7*11585 + 4096) >> 13; \
+ /* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
+ s7 = (t_*11585 + 4096) >> 13; \
+ s8 -= s7; \
+ t_ = OD_PAVG(s3, sc); \
+ /* 21407/16384 ~= Sin[3*Pi/8] + Cos[3*Pi/8] ~= 1.3065629648763766 */ \
+ u_ = (s3*21407 + 8192) >> 14; \
+ /* 8867/16384 ~= Sin[3*Pi/8] - Cos[3*Pi/8] ~= 0.5411961001461969 */ \
+ s3 = (sc*8867 + 8192) >> 14; \
+ /* 3135/4096 ~= 2*Cos[3*Pi/8] ~= 0.7653668647301796 */ \
+ t_ = (t_*3135 + 2048) >> 12; \
+ s3 += t_; \
+ sc = u_ - t_; \
+ t_ = OD_PAVG(sb, -s4); \
+ /* 21407/16384 ~= Sin[3*Pi/8] + Cos[3*Pi/8] ~= 1.3065629648763766 */ \
+ u_ = (sb*21407 + 8192) >> 14; \
+ /* 8867/16384 ~= Sin[3*Pi/8] - Cos[3*Pi/8] ~= 0.5411961001461969 */ \
+ sb = (s4*8867 + 8192) >> 14; \
+ /* 3135/4096 ~= 2*Cos[3*Pi/8] ~= 0.7653668647301796 */ \
+ t_ = (t_*3135 + 2048) >> 12; \
+ sb -= t_; \
+ s4 = t_ - u_; \
+ sa += s2; \
+ s2 -= OD_RSHIFT1(sa); \
+ s5 -= sd; \
+ sd += OD_RSHIFT1(s5); \
+ s1 -= s9; \
+ s9 += OD_RSHIFT1(s1); \
+ se += s6; \
+ s6 -= OD_RSHIFT1(se); \
+ sb += sc; \
+ sbh = OD_RSHIFT1(sb); \
+ sc -= sbh; \
+ sf += s7; \
+ sfh = OD_RSHIFT1(sf); \
+ s7 -= sfh; \
+ s0 -= s8; \
+ s0h = OD_RSHIFT1(s0); \
+ s8 += s0h; \
+ s4 += s3; \
+ s4h = OD_RSHIFT1(s4); \
+ s3 -= s4h; \
+ t_ = sd - s2; \
+ /* 38531/32768 ~= Sin[7*Pi/16] + Cos[7*Pi/16] ~= 1.1758756024193586 */ \
+ u_ = (sd*38531 + 16384) >> 15; \
+ /* 12873/16384 ~= Sin[7*Pi/16] - Cos[7*Pi/16] ~= 0.7856949583871022 */ \
+ sd = (s2*12873 + 8192) >> 14; \
+ /* 6393/32768 ~= Cos[7*Pi/16] ~= 0.19509032201612825 */ \
+ t_ = (t_*6393 + 16384) >> 15; \
+ sd -= t_; \
+ s2 = t_ - u_; \
+ t_ = OD_PAVG(s5, -sa); \
+ /* 22725/16384 ~= Sin[5*Pi/16] + Cos[5*Pi/16] ~= 1.3870398453221475 */ \
+ u_ = (s5*22725 + 8192) >> 14; \
+ /* 9041/32768 ~= Sin[5*Pi/16] - Cos[5*Pi/16] ~= 0.27589937928294306 */ \
+ s5 = (sa*9041 + 16384) >> 15; \
+ /* 18205/16384 ~= 2*Cos[5*Pi/16] ~= 1.1111404660392044 */ \
+ t_ = (t_*18205 + 8192) >> 14; \
+ s5 -= t_; \
+ sa = t_ - u_; \
+ t_ = s6 + s9; \
+ /* 45451/32768 ~= Sin[5*Pi/16] + Cos[5*Pi/16] ~= 1.3870398453221475 */ \
+ u_ = (s9*45451 + 16384) >> 15; \
+ /* 9041/32768 ~= Sin[5*Pi/16] - Cos[5*Pi/16] ~= 0.27589937928294306 */ \
+ s9 = (s6*9041 + 16384) >> 15; \
+ /* 18205/32768 ~= Cos[5*Pi/16] ~= 0.5555702330196022 */ \
+ t_ = (t_*18205 + 16384) >> 15; \
+ s9 += t_; \
+ s6 = u_ - t_; \
+ t_ = OD_PAVG(s1, se); \
+ /* 9633/8192 ~= Sin[7*Pi/16] + Cos[7*Pi/16] ~= 1.1758756024193586 */ \
+ u_ = (s1*9633 + 4096) >> 13; \
+ /* 12873/16384 ~= Sin[7*Pi/16] - Cos[7*Pi/16] ~= 0.7856949583871022 */ \
+ s1 = (se*12873 + 8192) >> 14; \
+ /* 12785/32768 ~= 2*Cos[7*Pi/16] ~= 0.3901806440322565 */ \
+ t_ = (t_*12785 + 16384) >> 15; \
+ s1 += t_; \
+ se = u_ - t_; \
+ s8 -= s4h; \
+ s4 += s8; \
+ s7 += sbh; \
+ sb -= s7; \
+ s3 -= sfh; \
+ sf += s3; \
+ sc += s0h; \
+ s0 -= sc; \
+ sd += OD_RSHIFT1(se); \
+ se -= sd; \
+ s2 += OD_RSHIFT1(s1); \
+ s1 -= s2; \
+ s6 -= OD_RSHIFT1(s5); \
+ s5 += s6; \
+ s9 -= OD_RSHIFT1(sa); \
+ sa += s9; \
+ s0 -= se; \
+ s0h = OD_RSHIFT1(s0); \
+ se += s0h; \
+ s1 -= sf; \
+ s1h = OD_RSHIFT1(s1); \
+ sf += s1h; \
+ s2 += sc; \
+ s2h = OD_RSHIFT1(s2); \
+ sc -= s2h; \
+ s3 += sd; \
+ s3h = OD_RSHIFT1(s3); \
+ sd -= s3h; \
+ s4 -= sa; \
+ s4h = OD_RSHIFT1(s4); \
+ sa += s4h; \
+ s5 -= sb; \
+ s5h = OD_RSHIFT1(s5); \
+ sb += s5h; \
+ s6 += s8; \
+ s6h = OD_RSHIFT1(s6); \
+ s8 -= s6h; \
+ s7 += s9; \
+ s7h = OD_RSHIFT1(s7); \
+ s9 -= s7h; \
+ t_ = se - s1h; \
+ /* 32729/32768 ~= (Sin[17*Pi/64] + Cos[17*Pi/64])/Sqrt[2] ~=
+ 0.9987954562051723 */ \
+ u_ = (s1*32729 + 16384) >> 15; \
+ /* 201/2048 ~= (Sin[17*Pi/64] - Cos[17*Pi/64])*Sqrt[2] ~=
+ 0.09813534865483615 */ \
+ s1 = (se*201 + 1024) >> 11; \
+ /* 31121/32768 ~= Cos[17*Pi/64]*Sqrt[2] ~=
+ 0.9497277818777543 */ \
+ t_ = (t_*31121 + 16384) >> 15; \
+ s1 += t_; \
+ se = u_ + t_; \
+ t_ = s6h + s9; \
+ /* 32413/32768 ~= (Sin[19*Pi/64] + Cos[19*Pi/64])/Sqrt[2] ~=
+ 0.9891765099647809 */ \
+ u_ = (s6*32413 + 16384) >> 15; \
+ /* 601/2048 ~= (Sin[19*Pi/64] - Cos[19*Pi/64])*Sqrt[2] ~=
+ 0.29346094891072355 */ \
+ s6 = (s9*601 + 1024) >> 11; \
+ /* 27605/32768 ~= Cos[19*Pi/64]*Sqrt[2] ~= 0.8424460355094193 */ \
+ t_ = (t_*27605 + 16384) >> 15; \
+ s6 += t_; \
+ s9 = u_ - t_; \
+ t_ = sa - s5h; \
+ /* 15893/16384 ~= (Sin[21*Pi/64] + Cos[21*Pi/64])/Sqrt[2] ~=
+ 0.970031253194544 */ \
+ u_ = (s5*15893 + 8192) >> 14; \
+ /* 3981/8192 ~= (Sin[21*Pi/64] - Cos[21*Pi/64])*Sqrt[2] ~=
+ 0.48596035980652796 */ \
+ s5 = (sa*3981 + 4096) >> 13; \
+ /* 1489/2048 ~= Cos[21*Pi/64]*Sqrt[2] ~= 0.72705107329128 */ \
+ t_ = (t_*1489 + 1024) >> 11; \
+ s5 += t_; \
+ sa = u_ + t_; \
+ t_ = s2h + sd; \
+ /* 30853/32768 ~= (Sin[23*Pi/64] + Cos[23*Pi/64])/Sqrt[2] ~=
+ 0.9415440651830208 */ \
+ u_ = (s2*30853 + 16384) >> 15; \
+ /* 11039/16384 ~= (Sin[23*Pi/64] - Cos[23*Pi/64])*Sqrt[2] ~=
+ 0.6737797067844402 */ \
+ s2 = (sd*11039 + 8192) >> 14; \
+ /* 19813/32768 ~= Cos[23*Pi/64]*Sqrt[2] ~= 0.6046542117908008 */ \
+ t_ = (t_*19813 + 16384) >> 15; \
+ s2 += t_; \
+ sd = u_ - t_; \
+ t_ = sc - s3h; \
+ /* 14811/16384 ~= (Sin[25*Pi/64] + Cos[25*Pi/64])/Sqrt[2] ~=
+ 0.9039892931234433 */ \
+ u_ = (s3*14811 + 8192) >> 14; \
+ /* 7005/8192 ~= (Sin[25*Pi/64] - Cos[25*Pi/64])*Sqrt[2] ~=
+ 0.8551101868605642 */ \
+ s3 = (sc*7005 + 4096) >> 13; \
+ /* 3903/8192 ~= Cos[25*Pi/64]*Sqrt[2] ~= 0.47643419969316125 */ \
+ t_ = (t_*3903 + 4096) >> 13; \
+ s3 += t_; \
+ sc = u_ + t_; \
+ t_ = s4h + sb; \
+ /* 14053/16384 ~= (Sin[27*Pi/64] + Cos[27*Pi/64])/Sqrt[2] ~=
+ 0.857728610000272 */ \
+ u_ = (s4*14053 + 8192) >> 14; \
+ /* 8423/8192 ~= (Sin[27*Pi/64] - Cos[27*Pi/64])*Sqrt[2] ~=
+ 1.0282054883864435 */ \
+ s4 = (sb*8423 + 4096) >> 13; \
+ /* 2815/8192 ~= Cos[27*Pi/64]*Sqrt[2] ~= 0.34362586580705035 */ \
+ t_ = (t_*2815 + 4096) >> 13; \
+ s4 += t_; \
+ sb = u_ - t_; \
+ t_ = s8 - s7h; \
+ /* 1645/2048 ~= (Sin[29*Pi/64] + Cos[29*Pi/64])/Sqrt[2] ~=
+ 0.8032075314806449 */ \
+ u_ = (s7*1645 + 1024) >> 11; \
+ /* 305/256 ~= (Sin[29*Pi/64] - Cos[29*Pi/64])*Sqrt[2] ~=
+ 1.1913986089848667 */ \
+ s7 = (s8*305 + 128) >> 8; \
+ /* 425/2048 ~= Cos[29*Pi/64]*Sqrt[2] ~= 0.20750822698821159 */ \
+ t_ = (t_*425 + 1024) >> 11; \
+ s7 += t_; \
+ s8 = u_ + t_; \
+ t_ = s0h + sf; \
+ /* 24279/32768 ~= (Sin[31*Pi/64] + Cos[31*Pi/64])/Sqrt[2] ~=
+ 0.7409511253549591 */ \
+ u_ = (s0*24279 + 16384) >> 15; \
+ /* 44011/32768 ~= (Sin[31*Pi/64] - Cos[31*Pi/64])*Sqrt[2] ~=
+ 1.3431179096940369 */ \
+ s0 = (sf*44011 + 16384) >> 15; \
+ /* 1137/16384 ~= Cos[31*Pi/64]*Sqrt[2] ~= 0.06939217050794069 */ \
+ t_ = (t_*1137 + 8192) >> 14; \
+ s0 += t_; \
+ sf = u_ - t_; \
+ } \
+ while (0)
+
/* TODO: rewrite this to match OD_FDST_16. */
#define OD_FDST_16_ASYM_PR(t0, t0h, t8, t4, t4h, tc, t2, ta, t6, te, \
t1, t9, t5, td, t3, tb, t7, t7h, tf) \
@@ -5772,39 +6237,40 @@
int sd;
int se;
int sf;
- s0 = x[15*xstride];
- s8 = x[14*xstride];
- s4 = x[13*xstride];
- sc = x[12*xstride];
- s2 = x[11*xstride];
- sa = x[10*xstride];
- s6 = x[9*xstride];
- se = x[8*xstride];
- s1 = x[7*xstride];
- s9 = x[6*xstride];
- s5 = x[5*xstride];
- sd = x[4*xstride];
- s3 = x[3*xstride];
- sb = x[2*xstride];
- s7 = x[1*xstride];
- sf = x[0*xstride];
- OD_FDST_16_PR(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
- y[0] = (od_coeff)sf;
- y[1] = (od_coeff)-se;
- y[2] = (od_coeff)sd;
- y[3] = (od_coeff)-sc;
- y[4] = (od_coeff)sb;
- y[5] = (od_coeff)-sa;
- y[6] = (od_coeff)s9;
- y[7] = (od_coeff)-s8;
- y[8] = (od_coeff)s7;
- y[9] = (od_coeff)-s6;
- y[10] = (od_coeff)s5;
- y[11] = (od_coeff)-s4;
- y[12] = (od_coeff)s3;
- y[13] = (od_coeff)-s2;
- y[14] = (od_coeff)s1;
- y[15] = (od_coeff)-s0;
+ s0 = x[0*xstride];
+ s8 = x[1*xstride];
+ s4 = x[2*xstride];
+ sc = x[3*xstride];
+ s2 = x[4*xstride];
+ sa = x[5*xstride];
+ s6 = x[6*xstride];
+ se = x[7*xstride];
+ s1 = x[8*xstride];
+ s9 = x[9*xstride];
+ s5 = x[10*xstride];
+ sd = x[11*xstride];
+ s3 = x[12*xstride];
+ sb = x[13*xstride];
+ s7 = x[14*xstride];
+ sf = x[15*xstride];
+ OD_FDST_16_FLAT(s0, s8, s4, sc, s2, sa, s6, se,
+ s1, s9, s5, sd, s3, sb, s7, sf);
+ y[0] = (od_coeff)s0;
+ y[1] = (od_coeff)s1;
+ y[2] = (od_coeff)s2;
+ y[3] = (od_coeff)s3;
+ y[4] = (od_coeff)s4;
+ y[5] = (od_coeff)s5;
+ y[6] = (od_coeff)s6;
+ y[7] = (od_coeff)s7;
+ y[8] = (od_coeff)s8;
+ y[9] = (od_coeff)s9;
+ y[10] = (od_coeff)sa;
+ y[11] = (od_coeff)sb;
+ y[12] = (od_coeff)sc;
+ y[13] = (od_coeff)sd;
+ y[14] = (od_coeff)se;
+ y[15] = (od_coeff)sf;
}
void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]) {
@@ -5824,39 +6290,40 @@
int sd;
int se;
int sf;
- s0 = -y[15];
- s8 = y[14];
- s4 = -y[13];
- sc = y[12];
- s2 = -y[11];
- sa = y[10];
- s6 = -y[9];
- se = y[8];
- s1 = -y[7];
- s9 = y[6];
- s5 = -y[5];
- sd = y[4];
- s3 = -y[3];
- sb = y[2];
- s7 = -y[1];
- sf = y[0];
- OD_IDST_16_PR(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
- x[0*xstride] = (od_coeff)sf;
- x[1*xstride] = (od_coeff)se;
- x[2*xstride] = (od_coeff)sd;
- x[3*xstride] = (od_coeff)sc;
- x[4*xstride] = (od_coeff)sb;
- x[5*xstride] = (od_coeff)sa;
- x[6*xstride] = (od_coeff)s9;
- x[7*xstride] = (od_coeff)s8;
- x[8*xstride] = (od_coeff)s7;
- x[9*xstride] = (od_coeff)s6;
- x[10*xstride] = (od_coeff)s5;
- x[11*xstride] = (od_coeff)s4;
- x[12*xstride] = (od_coeff)s3;
- x[13*xstride] = (od_coeff)s2;
- x[14*xstride] = (od_coeff)s1;
- x[15*xstride] = (od_coeff)s0;
+ s0 = y[0];
+ s8 = y[1];
+ s4 = y[2];
+ sc = y[3];
+ s2 = y[4];
+ sa = y[5];
+ s6 = y[6];
+ se = y[7];
+ s1 = y[8];
+ s9 = y[9];
+ s5 = y[10];
+ sd = y[11];
+ s3 = y[12];
+ sb = y[13];
+ s7 = y[14];
+ sf = y[15];
+ OD_IDST_16_FLAT(s0, s8, s4, sc, s2, sa, s6, se,
+ s1, s9, s5, sd, s3, sb, s7, sf);
+ x[0*xstride] = (od_coeff)s0;
+ x[1*xstride] = (od_coeff)s1;
+ x[2*xstride] = (od_coeff)s2;
+ x[3*xstride] = (od_coeff)s3;
+ x[4*xstride] = (od_coeff)s4;
+ x[5*xstride] = (od_coeff)s5;
+ x[6*xstride] = (od_coeff)s6;
+ x[7*xstride] = (od_coeff)s7;
+ x[8*xstride] = (od_coeff)s8;
+ x[9*xstride] = (od_coeff)s9;
+ x[10*xstride] = (od_coeff)sa;
+ x[11*xstride] = (od_coeff)sb;
+ x[12*xstride] = (od_coeff)sc;
+ x[13*xstride] = (od_coeff)sd;
+ x[14*xstride] = (od_coeff)se;
+ x[15*xstride] = (od_coeff)sf;
}
void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride) {