Monty Montgomery | 02078a3 | 2017-07-11 21:22:29 -0400 | [diff] [blame^] | 1 | #include "av1/common/daala_tx.h" |
| 2 | #include "av1/common/odintrin.h" |
| 3 | |
| 4 | /* clang-format off */ |
| 5 | |
| 6 | # define OD_DCT_RSHIFT(_a, _b) OD_UNBIASED_RSHIFT32(_a, _b) |
| 7 | |
| 8 | /* TODO: Daala DCT overflow checks need to be ported as a later test */ |
| 9 | # if defined(OD_DCT_CHECK_OVERFLOW) |
| 10 | # else |
| 11 | # define OD_DCT_OVERFLOW_CHECK(val, scale, offset, idx) |
| 12 | # endif |
| 13 | |
| 14 | #define OD_FDCT_2_ASYM(p0, p1, p1h) \ |
| 15 | /* Embedded 2-point asymmetric Type-II fDCT. */ \ |
| 16 | do { \ |
| 17 | p0 += p1h; \ |
| 18 | p1 = p0 - p1; \ |
| 19 | } \ |
| 20 | while (0) |
| 21 | |
| 22 | #define OD_IDCT_2_ASYM(p0, p1, p1h) \ |
| 23 | /* Embedded 2-point asymmetric Type-II iDCT. */ \ |
| 24 | do { \ |
| 25 | p1 = p0 - p1; \ |
| 26 | p1h = OD_DCT_RSHIFT(p1, 1); \ |
| 27 | p0 -= p1h; \ |
| 28 | } \ |
| 29 | while (0) |
| 30 | |
| 31 | #define OD_FDST_2_ASYM(p0, p1) \ |
| 32 | /* Embedded 2-point asymmetric Type-IV fDST. */ \ |
| 33 | do { \ |
| 34 | /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \ |
| 35 | OD_DCT_OVERFLOW_CHECK(p1, 11507, 8192, 187); \ |
| 36 | p0 -= (p1*11507 + 8192) >> 14; \ |
| 37 | /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \ |
| 38 | OD_DCT_OVERFLOW_CHECK(p0, 669, 512, 188); \ |
| 39 | p1 += (p0*669 + 512) >> 10; \ |
| 40 | /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \ |
| 41 | OD_DCT_OVERFLOW_CHECK(p1, 4573, 2048, 189); \ |
| 42 | p0 -= (p1*4573 + 2048) >> 12; \ |
| 43 | } \ |
| 44 | while (0) |
| 45 | |
| 46 | #define OD_IDST_2_ASYM(p0, p1) \ |
| 47 | /* Embedded 2-point asymmetric Type-IV iDST. */ \ |
| 48 | do { \ |
| 49 | /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \ |
| 50 | p0 += (p1*4573 + 2048) >> 12; \ |
| 51 | /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \ |
| 52 | p1 -= (p0*669 + 512) >> 10; \ |
| 53 | /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \ |
| 54 | p0 += (p1*11507 + 8192) >> 14; \ |
| 55 | } \ |
| 56 | while (0) |
| 57 | |
| 58 | #define OD_FDCT_4(q0, q2, q1, q3) \ |
| 59 | /* Embedded 4-point orthonormal Type-II fDCT. */ \ |
| 60 | do { \ |
| 61 | int q2h; \ |
| 62 | int q3h; \ |
| 63 | q3 = q0 - q3; \ |
| 64 | q3h = OD_DCT_RSHIFT(q3, 1); \ |
| 65 | q0 -= q3h; \ |
| 66 | q2 += q1; \ |
| 67 | q2h = OD_DCT_RSHIFT(q2, 1); \ |
| 68 | q1 = q2h - q1; \ |
| 69 | OD_FDCT_2_ASYM(q0, q2, q2h); \ |
| 70 | OD_FDST_2_ASYM(q3, q1); \ |
| 71 | } \ |
| 72 | while (0) |
| 73 | |
| 74 | #define OD_IDCT_4(q0, q2, q1, q3) \ |
| 75 | /* Embedded 4-point orthonormal Type-II iDCT. */ \ |
| 76 | do { \ |
| 77 | int q1h; \ |
| 78 | int q3h; \ |
| 79 | OD_IDST_2_ASYM(q3, q2); \ |
| 80 | OD_IDCT_2_ASYM(q0, q1, q1h); \ |
| 81 | q3h = OD_DCT_RSHIFT(q3, 1); \ |
| 82 | q0 += q3h; \ |
| 83 | q3 = q0 - q3; \ |
| 84 | q2 = q1h - q2; \ |
| 85 | q1 -= q2; \ |
| 86 | } \ |
| 87 | while (0) |
| 88 | |
| 89 | void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) { |
| 90 | int q0; |
| 91 | int q1; |
| 92 | int q2; |
| 93 | int q3; |
| 94 | q0 = x[0*xstride]; |
| 95 | q2 = x[1*xstride]; |
| 96 | q1 = x[2*xstride]; |
| 97 | q3 = x[3*xstride]; |
| 98 | OD_FDCT_4(q0, q2, q1, q3); |
| 99 | y[0] = (od_coeff)q0; |
| 100 | y[1] = (od_coeff)q1; |
| 101 | y[2] = (od_coeff)q2; |
| 102 | y[3] = (od_coeff)q3; |
| 103 | } |
| 104 | |
| 105 | void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) { |
| 106 | int q0; |
| 107 | int q1; |
| 108 | int q2; |
| 109 | int q3; |
| 110 | q0 = y[0]; |
| 111 | q2 = y[1]; |
| 112 | q1 = y[2]; |
| 113 | q3 = y[3]; |
| 114 | OD_IDCT_4(q0, q2, q1, q3); |
| 115 | x[0*xstride] = q0; |
| 116 | x[1*xstride] = q1; |
| 117 | x[2*xstride] = q2; |
| 118 | x[3*xstride] = q3; |
| 119 | } |