Add new 4-point Type-II DCT implementation. subset-1: master@2017-11-27T19:24:03.517Z -> new_dct4@2017-11-27T20:52:07.119Z PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 -0.0113 | 0.0459 | 0.1285 | 0.0085 | 0.0005 | 0.0093 | 0.0352 Change-Id: I0a76037ea2a08071ca9c4013979cca3ee3efe55c
diff --git a/av1/common/daala_tx.c b/av1/common/daala_tx.c index f77f4af..d0aab49 100644 --- a/av1/common/daala_tx.c +++ b/av1/common/daala_tx.c
@@ -55,6 +55,9 @@ } \ while (0) +#define OD_FDCT_2_ASYM_FLAT OD_FDCT_2_ASYM_PR +#define OD_IDCT_2_ASYM_FLAT OD_IDCT_2_ASYM_PR + #define OD_FDST_2_PR(p0, p1) \ /* Embedded 2-point orthonormal Type-IV fDST. */ \ do { \ @@ -109,6 +112,40 @@ } \ while (0) +#define OD_FDST_2_ASYM_FLAT(p0, p0h, p1) \ + /* Embedded 2-point asymmetric Type-IV fDST with flattened rotations. */ \ + do { \ + int t_; \ + int u_; \ + t_ = p0h - p1; \ + /* 3135/8192 ~= (Cos[3*Pi/8] - Sin[3*Pi/8])/Sqrt[2] = 0.38268343236509 */ \ + u_ = (p0*3135 + 4096) >> 13; \ + /* 60547/32768 ~= (Cos[3*Pi/8] + Sin[3*Pi/8])*Sqrt[2] = 1.847759065023 */ \ + p0 = (p1*60547 + 16384) >> 15; \ + /* 8867/16384 ~= Cos[3*Pi/8]*Sqrt[2] = 0.5411961001461971 */ \ + t_ = (t_*8867 + 8192) >> 14; \ + p0 += t_; \ + p1 = u_ + t_; \ + } \ + while (0) + +#define OD_IDST_2_ASYM_FLAT(p0, p1) \ + /* Embedded 2-point asymmetric Type-IV iDST with flattened rotations. */ \ + do { \ + int t_; \ + int u_; \ + t_ = (p0 + p1 + 1) >> 1; \ + /* 3135/4096 ~= (Cos[Pi/8] - Sin[Pi/8])*Sqrt[2] = 0.7653668647301795 */ \ + u_ = (p1*3135 + 2048) >> 12; \ + /* 15137/16384 ~= (Cos[Pi/8] + Sin[Pi/8])/Sqrt[2] = 0.9238795325112867 */ \ + p1 = (p0*15137 + 8192) >> 14; \ + /* 8867/16384 ~= Cos[3*Pi/8]*2*Sqrt[2] = 1.082392200292394 */ \ + t_ = (t_*8867 + 4096) >> 13; \ + p0 = u_ + t_; \ + p1 -= OD_RSHIFT1(t_); \ + } \ + while (0) + #define OD_FDCT_4_PR(q0, q2, q1, q3) \ /* Embedded 4-point orthonormal Type-II fDCT. */ \ do { \ @@ -166,6 +203,35 @@ } \ while (0) +#define OD_FDCT_4_FLAT(q0, q1, q2, q3) \ + /* Embedded 4-point orthonormal Type-II fDCT with flattened rotations. */ \ + do { \ + int q1h; \ + int q3h; \ + q3 = q0 - q3; \ + q3h = OD_RSHIFT1(q3); \ + q0 -= q3h; \ + q1 += q2; \ + q1h = OD_RSHIFT1(q1); \ + q2 -= q1h; \ + OD_FDCT_2_ASYM_FLAT(q0, q1, q1h); \ + OD_FDST_2_ASYM_FLAT(q3, q3h, q2); \ + } \ + while (0) + +#define OD_IDCT_4_FLAT(q0, q2, q1, q3) \ + /* Embedded 4-point orthonormal Type-II iDCT with flattened rotations. */ \ + do { \ + int q1h; \ + OD_IDST_2_ASYM_FLAT(q3, q2); \ + OD_IDCT_2_ASYM_FLAT(q0, q1, q1h); \ + q2 += q1h; \ + q1 -= q2; \ + q0 += OD_RSHIFT1(q3); \ + q3 = q0 - q3; \ + } \ + while (0) + #define OD_FDST_4_PR(q0, q2, q1, q3) \ /* Embedded 4-point orthonormal Type-IV fDST. */ \ do { \ @@ -3754,80 +3820,36 @@ /* 4-point orthonormal Type-II fDCT. */ void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) { - /* 4 "muls", 8 adds, 2 shifts */ int q0; int q1; int q2; int q3; - int u1; - int t0; - int t1; - int t2; - int t3; q0 = x[0*xstride]; q1 = x[1*xstride]; q2 = x[2*xstride]; q3 = x[3*xstride]; - q3 = q0 - q3; - q0 -= OD_RSHIFT1(q3); - u1 = q1 + q2; - q2 = q1 - q2; - /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */ - t0 = (q3*8867 + 16384) >> 15; - /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */ - t1 = (q2*21407 + 16384) >> 15; - /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */ - t2 = (q3*21407 + 16384) >> 15; - /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */ - t3 = (q2*8867 + 16384) >> 15; - q0 += OD_RSHIFT1(u1); - q1 = q0 - u1; - q2 = t3 + t2; - q3 = t0 - t1; - y[0] = q0; - y[1] = q2; - y[2] = q1; - y[3] = q3; + OD_FDCT_4_FLAT(q0, q1, q2, q3); + y[0] = (od_coeff)q0; + y[1] = (od_coeff)q2; + y[2] = (od_coeff)q1; + y[3] = (od_coeff)q3; } /* 4-point orthonormal Type-II iDCT. */ void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) { - /* 4 "muls", 8 adds, 1 shift */ int q0; int q1; int q2; int q3; - int q1h; - int u0; - int t0; - int t1; - int t2; - int t3; q0 = y[0]; q2 = y[1]; q1 = y[2]; q3 = y[3]; - /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */ - t0 = (q3*8867 + 16384) >> 15; - /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */ - t1 = (q2*21407 + 16384) >> 15; - /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */ - t2 = (q3*21407 + 16384) >> 15; - /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */ - t3 = (q2*8867 + 16384) >> 15; - q3 = t0 + t1; - q2 = t3 - t2; - q1 = q0 - q1; - q1h = OD_RSHIFT1(q1); - q0 -= q1h; - u0 = q0 + q3; - q3 = q0 - q3; - q2 = q1h - q2; - q1 -= q2; - x[0*xstride] = u0; - x[1*xstride] = q1; - x[2*xstride] = q2; - x[3*xstride] = q3; + OD_IDCT_4_FLAT(q0, q2, q1, q3); + x[0*xstride] = (od_coeff)q0; + x[1*xstride] = (od_coeff)q1; + x[2*xstride] = (od_coeff)q2; + x[3*xstride] = (od_coeff)q3; } /* 4-point orthonormal Type-VII fDST. */