Add new 4-point Type-II DCT implementation.
subset-1:
master@2017-11-27T19:24:03.517Z -> new_dct4@2017-11-27T20:52:07.119Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
-0.0113 | 0.0459 | 0.1285 | 0.0085 | 0.0005 | 0.0093 | 0.0352
Change-Id: I0a76037ea2a08071ca9c4013979cca3ee3efe55c
diff --git a/av1/common/daala_tx.c b/av1/common/daala_tx.c
index f77f4af..d0aab49 100644
--- a/av1/common/daala_tx.c
+++ b/av1/common/daala_tx.c
@@ -55,6 +55,9 @@
} \
while (0)
+#define OD_FDCT_2_ASYM_FLAT OD_FDCT_2_ASYM_PR
+#define OD_IDCT_2_ASYM_FLAT OD_IDCT_2_ASYM_PR
+
#define OD_FDST_2_PR(p0, p1) \
/* Embedded 2-point orthonormal Type-IV fDST. */ \
do { \
@@ -109,6 +112,40 @@
} \
while (0)
+#define OD_FDST_2_ASYM_FLAT(p0, p0h, p1) \
+ /* Embedded 2-point asymmetric Type-IV fDST with flattened rotations. */ \
+ do { \
+ int t_; \
+ int u_; \
+ t_ = p0h - p1; \
+ /* 3135/8192 ~= (Cos[3*Pi/8] - Sin[3*Pi/8])/Sqrt[2] = 0.38268343236509 */ \
+ u_ = (p0*3135 + 4096) >> 13; \
+ /* 60547/32768 ~= (Cos[3*Pi/8] + Sin[3*Pi/8])*Sqrt[2] = 1.847759065023 */ \
+ p0 = (p1*60547 + 16384) >> 15; \
+ /* 8867/16384 ~= Cos[3*Pi/8]*Sqrt[2] = 0.5411961001461971 */ \
+ t_ = (t_*8867 + 8192) >> 14; \
+ p0 += t_; \
+ p1 = u_ + t_; \
+ } \
+ while (0)
+
+#define OD_IDST_2_ASYM_FLAT(p0, p1) \
+ /* Embedded 2-point asymmetric Type-IV iDST with flattened rotations. */ \
+ do { \
+ int t_; \
+ int u_; \
+ t_ = (p0 + p1 + 1) >> 1; \
+ /* 3135/4096 ~= (Cos[Pi/8] - Sin[Pi/8])*Sqrt[2] = 0.7653668647301795 */ \
+ u_ = (p1*3135 + 2048) >> 12; \
+ /* 15137/16384 ~= (Cos[Pi/8] + Sin[Pi/8])/Sqrt[2] = 0.9238795325112867 */ \
+ p1 = (p0*15137 + 8192) >> 14; \
+ /* 8867/16384 ~= Cos[3*Pi/8]*2*Sqrt[2] = 1.082392200292394 */ \
+ t_ = (t_*8867 + 4096) >> 13; \
+ p0 = u_ + t_; \
+ p1 -= OD_RSHIFT1(t_); \
+ } \
+ while (0)
+
#define OD_FDCT_4_PR(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-II fDCT. */ \
do { \
@@ -166,6 +203,35 @@
} \
while (0)
+#define OD_FDCT_4_FLAT(q0, q1, q2, q3) \
+ /* Embedded 4-point orthonormal Type-II fDCT with flattened rotations. */ \
+ do { \
+ int q1h; \
+ int q3h; \
+ q3 = q0 - q3; \
+ q3h = OD_RSHIFT1(q3); \
+ q0 -= q3h; \
+ q1 += q2; \
+ q1h = OD_RSHIFT1(q1); \
+ q2 -= q1h; \
+ OD_FDCT_2_ASYM_FLAT(q0, q1, q1h); \
+ OD_FDST_2_ASYM_FLAT(q3, q3h, q2); \
+ } \
+ while (0)
+
+#define OD_IDCT_4_FLAT(q0, q2, q1, q3) \
+ /* Embedded 4-point orthonormal Type-II iDCT with flattened rotations. */ \
+ do { \
+ int q1h; \
+ OD_IDST_2_ASYM_FLAT(q3, q2); \
+ OD_IDCT_2_ASYM_FLAT(q0, q1, q1h); \
+ q2 += q1h; \
+ q1 -= q2; \
+ q0 += OD_RSHIFT1(q3); \
+ q3 = q0 - q3; \
+ } \
+ while (0)
+
#define OD_FDST_4_PR(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-IV fDST. */ \
do { \
@@ -3754,80 +3820,36 @@
/* 4-point orthonormal Type-II fDCT. */
void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) {
- /* 4 "muls", 8 adds, 2 shifts */
int q0;
int q1;
int q2;
int q3;
- int u1;
- int t0;
- int t1;
- int t2;
- int t3;
q0 = x[0*xstride];
q1 = x[1*xstride];
q2 = x[2*xstride];
q3 = x[3*xstride];
- q3 = q0 - q3;
- q0 -= OD_RSHIFT1(q3);
- u1 = q1 + q2;
- q2 = q1 - q2;
- /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
- t0 = (q3*8867 + 16384) >> 15;
- /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
- t1 = (q2*21407 + 16384) >> 15;
- /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
- t2 = (q3*21407 + 16384) >> 15;
- /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
- t3 = (q2*8867 + 16384) >> 15;
- q0 += OD_RSHIFT1(u1);
- q1 = q0 - u1;
- q2 = t3 + t2;
- q3 = t0 - t1;
- y[0] = q0;
- y[1] = q2;
- y[2] = q1;
- y[3] = q3;
+ OD_FDCT_4_FLAT(q0, q1, q2, q3);
+ y[0] = (od_coeff)q0;
+ y[1] = (od_coeff)q2;
+ y[2] = (od_coeff)q1;
+ y[3] = (od_coeff)q3;
}
/* 4-point orthonormal Type-II iDCT. */
void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) {
- /* 4 "muls", 8 adds, 1 shift */
int q0;
int q1;
int q2;
int q3;
- int q1h;
- int u0;
- int t0;
- int t1;
- int t2;
- int t3;
q0 = y[0];
q2 = y[1];
q1 = y[2];
q3 = y[3];
- /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
- t0 = (q3*8867 + 16384) >> 15;
- /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
- t1 = (q2*21407 + 16384) >> 15;
- /* Cos[Pi/8]/Sqrt[2] = 0.65328148243818826392832158671359 */
- t2 = (q3*21407 + 16384) >> 15;
- /* Cos[3*Pi/8]/Sqrt[2] = 0.27059805007309849219986160268319 */
- t3 = (q2*8867 + 16384) >> 15;
- q3 = t0 + t1;
- q2 = t3 - t2;
- q1 = q0 - q1;
- q1h = OD_RSHIFT1(q1);
- q0 -= q1h;
- u0 = q0 + q3;
- q3 = q0 - q3;
- q2 = q1h - q2;
- q1 -= q2;
- x[0*xstride] = u0;
- x[1*xstride] = q1;
- x[2*xstride] = q2;
- x[3*xstride] = q3;
+ OD_IDCT_4_FLAT(q0, q2, q1, q3);
+ x[0*xstride] = (od_coeff)q0;
+ x[1*xstride] = (od_coeff)q1;
+ x[2*xstride] = (od_coeff)q2;
+ x[3*xstride] = (od_coeff)q3;
}
/* 4-point orthonormal Type-VII fDST. */