Add new 8-point Type-II DCT implementation.
subset-1:
new_dct4@2017-11-27T20:52:07.119Z -> new_dct8@2017-11-27T23:57:04.520Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
-0.0030 | 0.0566 | -0.1127 | -0.0244 | -0.0078 | -0.0154 | 0.0026
Change-Id: I1fde36a5ed454a50acf81004a618fc0a0c8c9073
diff --git a/av1/common/daala_tx.c b/av1/common/daala_tx.c
index d0aab49..2c9ed20 100644
--- a/av1/common/daala_tx.c
+++ b/av1/common/daala_tx.c
@@ -38,6 +38,32 @@
} \
while (0)
+#define OD_FDCT_2_FLAT(p0, p1) \
+ /* Embedded 2-point orthonormal Type-II fDCT with flattened rotations. */ \
+ do { \
+ int t_; \
+ t_ = (p0 - p1 + 1) >> 1; \
+ /* 46341/32768 ~= 2*Sin[Pi/4] = 1.4142135623730951 */ \
+ p0 = (p1*46341 + 16384) >> 15; \
+ /* 46341/32768 ~= 2*Cos[Pi/4] = 1.4142135623730951 */ \
+ p1 = (t_*46341 + 16384) >> 15; \
+ p0 += p1; \
+ } \
+ while (0)
+
+#define OD_IDCT_2_FLAT(p0, p1) \
+ /* Embedded 2-point orthonormal Type-II iDCT with flattened rotations. */ \
+ do { \
+ int t_; \
+ t_ = p0 + p1; \
+ /* 11585/8192 ~= 2*Sin[Pi/4] = 1.4142135623730951 */ \
+ p1 = (p0*11585 + 4096) >> 13; \
+ /* 11585/16384 ~= Cos[Pi/4] = 0.7071067811865475 */ \
+ p0 = (t_*11585 + 8192) >> 14; \
+ p1 -= p0; \
+ } \
+ while (0)
+
#define OD_FDCT_2_ASYM_PR(p0, p1, p1h) \
/* Embedded 2-point asymmetric Type-II fDCT. */ \
do { \
@@ -85,6 +111,23 @@
} \
while (0)
+#define OD_FDST_2_FLAT(p0, p1) \
+ do { \
+ int t_; \
+ t_ = (p0 + p1 + 1) >> 1; \
+ /* 8867/16384 ~= Cos[3*Pi/8]*Sqrt[2] = 0.541196100146197 */ \
+ p0 = (p0*8867 + 8192) >> 14; \
+ /* 21407/16384 ~= Sin[3*Pi/8]*Sqrt[2] = 1.3065629648763766 */ \
+ p1 = (p1*21407 + 8192) >> 14; \
+ /* 15137/8192 ~= 2*Cos[Pi/8] = 1.8477590650225735 */ \
+ t_ = (t_*15137 + 4096) >> 13; \
+ p0 = t_ - p0; \
+ p1 = t_ - p1; \
+ } \
+ while (0)
+
+#define OD_IDST_2_FLAT OD_FDST_2_FLAT
+
#define OD_FDST_2_ASYM_PR(p0, p1) \
/* Embedded 2-point asymmetric Type-IV fDST. */ \
do { \
@@ -232,6 +275,32 @@
} \
while (0)
+#define OD_FDCT_4_ASYM_FLAT(q0, q1, q1h, q2, q3, q3h) \
+ /* Embedded 4-point asymmetric Type-II fDCT with flattened rotations. */ \
+ do { \
+ q0 += q3h; \
+ q3 = q0 - q3; \
+ q2 -= q1h; \
+ q1 += q2; \
+ OD_FDCT_2_FLAT(q0, q1); \
+ OD_FDST_2_FLAT(q3, q2); \
+ } \
+ while (0)
+
+#define OD_IDCT_4_ASYM_FLAT(q0, q2, q1, q1h, q3, q3h) \
+ /* Embedded 4-point asymmetric Type-II iDCT with flattened rotations. */ \
+ do { \
+ OD_IDST_2_FLAT(q3, q2); \
+ OD_IDCT_2_FLAT(q0, q1); \
+ q1 -= q2; \
+ q1h = OD_RSHIFT1(q1); \
+ q2 += q1h; \
+ q3 = q0 - q3; \
+ q3h = OD_RSHIFT1(q3); \
+ q0 -= q3h; \
+ } \
+ while (0)
+
#define OD_FDST_4_PR(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-IV fDST. */ \
do { \
@@ -379,6 +448,81 @@
} \
while (0)
+#define OD_FDST_4_ASYM_FLAT(q0, q0h, q1, q2, q2h, q3) \
+ /* Embedded 4-point asymmetric Type-IV fDST with flattened rotations. */ \
+ do { \
+ int t_; \
+ int u_; \
+ t_ = q0h + q3; \
+ /* 38531/32768 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 */ \
+ u_ = (q0*38531 + 16384) >> 15; \
+ /* 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 */ \
+ q0 = (q3*12873 + 8192) >> 14; \
+ /* 12785/32768 = 2*Cos[7*Pi/16] = 0.3901806440322565 */ \
+ t_ = (t_*12785 + 16384) >> 15; \
+ q0 += OD_RSHIFT1(t_); \
+ q3 = u_ - t_; \
+ t_ = q1 + q2h; \
+ /* 45451/32768 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 */ \
+ u_ = (q1*45451 + 16384) >> 15; \
+ /* 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.27589937928294306 */ \
+ q1 = (q2*9041 + 16384) >> 15; \
+ /* 18205/16384 = 2*Cos[5*Pi/16] = 1.1111404660392044 */ \
+ t_ = (t_*18205 + 8192) >> 14; \
+ q1 += t_; \
+ q2 = OD_RSHIFT1(t_) - u_; \
+ q2 -= OD_RSHIFT1(q3); \
+ q3 += q2; \
+ q0 += OD_RSHIFT1(q1); \
+ q1 -= q0; \
+ t_ = (q1 + q2 + 1) >> 1; \
+ /* 11585/8192 2*Sin[Pi/4] = 1.4142135623730951 */ \
+ q2 = (q1*11585 + 4096) >> 13; \
+ /* -46341/32768 = -2*Cos[Pi/4] = -1.4142135623730951 */ \
+ q1 = (t_*-46341 + 16384) >> 15; \
+ q2 += q1; \
+ } \
+ while (0)
+
+#define OD_IDST_4_ASYM_FLAT(q0, q2, q1, q3) \
+ do { \
+ int t_; \
+ int u_; \
+ int q1h; \
+ int q3h; \
+ t_ = (q1 + q2 + 1) >> 1; \
+ /* 11585/8192 2*Sin[Pi/4] = 1.4142135623730951 */ \
+ q1 = (q2*11585 + 4096) >> 13; \
+ /* -46341/32768 = -2*Cos[Pi/4] = -1.4142135623730951 */ \
+ q2 = (t_*-46341 + 16384) >> 15; \
+ q1 += q2; \
+ q1 += q0; \
+ q1h = OD_RSHIFT1(q1); \
+ q0 -= q1h; \
+ q3 -= q2; \
+ q3h = OD_RSHIFT1(q3); \
+ q2 += q3h; \
+ t_ = q1h + q2; \
+ /* 45451/32768 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 */ \
+ u_ = (q2*45451 + 16384) >> 15; \
+ /* 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.27589937928294306 */ \
+ q2 = (q1*9041 + 16384) >> 15; \
+ /* 18205/16384 = 2*Cos[5*Pi/16] = 1.1111404660392044 */ \
+ t_ = (t_*18205 + 8192) >> 14; \
+ q1 = OD_RSHIFT1(t_) - u_; \
+ q2 += t_; \
+ t_ = q0 + q3h; \
+ /* 38531/32768 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 */ \
+ u_ = (q0*38531 + 16384) >> 15; \
+ /* 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 */ \
+ q0 = (q3*12873 + 8192) >> 14; \
+ /* 12785/32768 = 2*Cos[7*Pi/16] = 0.3901806440322565 */ \
+ t_ = (t_*12785 + 16384) >> 15; \
+ q3 = u_ - OD_RSHIFT1(t_); \
+ q0 += t_; \
+ } \
+ while (0)
+
#define OD_FDCT_8_PR(r0, r4, r2, r6, r1, r5, r3, r7) \
/* Embedded 8-point orthonormal Type-II fDCT. */ \
do { \
@@ -424,6 +568,48 @@
} \
while (0)
+#define OD_FDCT_8_FLAT(r0, r1, r2, r3, r4, r5, r6, r7) \
+ /* Embedded 8-point orthonormal Type-II fDCT with flattened rotations. */ \
+ do { \
+ int r1h; \
+ int r3h; \
+ int r5h; \
+ int r7h; \
+ r7 = r0 - r7; \
+ r7h = OD_RSHIFT1(r7); \
+ r0 -= r7h; \
+ r1 += r6; \
+ r1h = OD_RSHIFT1(r1); \
+ r6 -= r1h; \
+ r5 = r2 - r5; \
+ r5h = OD_RSHIFT1(r5); \
+ r2 -= r5h; \
+ r3 += r4; \
+ r3h = OD_RSHIFT1(r3); \
+ r4 -= r3h; \
+ OD_FDCT_4_ASYM_FLAT(r0, r1, r1h, r2, r3, r3h); \
+ OD_FDST_4_ASYM_FLAT(r7, r7h, r6, r5, r5h, r4); \
+ } \
+ while (0)
+
+#define OD_IDCT_8_FLAT(r0, r4, r2, r6, r1, r5, r3, r7) \
+ /* Embedded 8-point orthonormal Type-II iDCT with flattened rotations. */ \
+ do { \
+ int r1h; \
+ int r3h; \
+ OD_IDST_4_ASYM_FLAT(r7, r5, r6, r4); \
+ OD_IDCT_4_ASYM_FLAT(r0, r2, r1, r1h, r3, r3h); \
+ r4 += r3h; \
+ r3 -= r4; \
+ r2 += OD_RSHIFT1(r5); \
+ r5 = r2 - r5; \
+ r6 += r1h; \
+ r1 -= r6; \
+ r0 += OD_RSHIFT1(r7); \
+ r7 = r0 - r7; \
+ } \
+ while (0)
+
#define OD_FDCT_8_ASYM_PR(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \
/* Embedded 8-point asymmetric Type-II fDCT. */ \
do { \
@@ -3944,21 +4130,21 @@
int r6;
int r7;
r0 = x[0*xstride];
- r4 = x[1*xstride];
+ r1 = x[1*xstride];
r2 = x[2*xstride];
- r6 = x[3*xstride];
- r1 = x[4*xstride];
+ r3 = x[3*xstride];
+ r4 = x[4*xstride];
r5 = x[5*xstride];
- r3 = x[6*xstride];
+ r6 = x[6*xstride];
r7 = x[7*xstride];
- OD_FDCT_8_PR(r0, r4, r2, r6, r1, r5, r3, r7);
+ OD_FDCT_8_FLAT(r0, r1, r2, r3, r4, r5, r6, r7);
y[0] = (od_coeff)r0;
- y[1] = (od_coeff)r1;
+ y[1] = (od_coeff)r4;
y[2] = (od_coeff)r2;
- y[3] = (od_coeff)r3;
- y[4] = (od_coeff)r4;
+ y[3] = (od_coeff)r6;
+ y[4] = (od_coeff)r1;
y[5] = (od_coeff)r5;
- y[6] = (od_coeff)r6;
+ y[6] = (od_coeff)r3;
y[7] = (od_coeff)r7;
}
@@ -3979,7 +4165,7 @@
r5 = y[5];
r3 = y[6];
r7 = y[7];
- OD_IDCT_8_PR(r0, r4, r2, r6, r1, r5, r3, r7);
+ OD_IDCT_8_FLAT(r0, r4, r2, r6, r1, r5, r3, r7);
x[0*xstride] = (od_coeff)r0;
x[1*xstride] = (od_coeff)r1;
x[2*xstride] = (od_coeff)r2;