Add new 8-point Type-VII DST implementation.
subset-1:
new_dct8@2017-11-27T23:57:04.520Z -> new_dst8@2017-11-29T13:49:40.011Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0034 | 0.0257 | 0.0831 | 0.0270 | 0.0023 | 0.0135 | -0.0235
Change-Id: Ia4873465e7c70e9a0e663d4185cba3ae619c77a0
diff --git a/av1/common/daala_tx.c b/av1/common/daala_tx.c
index 4c4f435..ab5f9ec 100644
--- a/av1/common/daala_tx.c
+++ b/av1/common/daala_tx.c
@@ -5922,7 +5922,7 @@
[1] Nussbaumer, Henri J. "Fast Fourier Transform and Convolution Algorithms"
Springer-Verlag: Berlin, Heidelberg, New York (1981) pages 76-78. */
static void od_poly_prod_8(od_coeff y[8], const od_coeff x[8]) {
- /* 21 "muls", 75 adds, 18 shifts */
+ /* 21 "muls", 76 adds, 21 shifts */
od_coeff q0;
od_coeff q1;
od_coeff q2;
@@ -5944,6 +5944,14 @@
od_coeff q18;
od_coeff q19;
od_coeff q20;
+ od_coeff r0;
+ od_coeff r1;
+ od_coeff r2;
+ od_coeff r3;
+ od_coeff r4;
+ od_coeff r5;
+ od_coeff r6;
+ od_coeff r7;
od_coeff t0;
od_coeff t1;
od_coeff t2;
@@ -6029,8 +6037,20 @@
q15 = t5 - t6;
q16 = t6;
q17 = t5;
- q18 = ((q6 + ((t0 + t6 + 1) >> 1)) - (q4 + (t5 >> 1))) >> 1;
- q19 = ((q7 + ((t5 + t6 + 1) >> 1)) - (q0 + (t3 >> 1))) >> 1;
+ r0 = t2 + t4;
+ r1 = t2 - OD_RSHIFT1(r0);
+ r2 = (r1 - q15 + 1) >> 1;
+ r3 = OD_RSHIFT1(t0);
+ r4 = (r3 - t1 + 1) >> 1;
+ /* q18 = (q6 - q4)/2 + (t0 - q15)/4
+ = (t0 + t2 - t4)/4 - (t1 + t5 - t6)/2 */
+ q18 = r2 + r4;
+ r5 = t5 - (q15 >> 1);
+ r6 = (r0 + t3 + 1) >> 1;
+ r7 = (t7 + r6 + 1) >> 1;
+ /* q19 = (q7 - q0)/2 + (t5 + t6 - t3)/4
+ = (t5 + t6 - t7)/2 - (t2 + t3 + t4)/4 */
+ q19 = r5 - r7;
q20 = (q18 - q19) >> 1;
/* Stage 4 */
q0 = (-5995*q0 + 8192) >> 14;
@@ -6078,12 +6098,12 @@
u18 = q12 + u8;
u19 = u18 + q13;
u20 = u18 + q14;
- u21 = u9 << 1;
+ u21 = 2*u9;
u22 = q15 + u21;
u23 = q16 - u22;
u24 = u22 + q17;
- u25 = u8 << 1;
- u26 = u25 << 1;
+ u25 = 2*u8;
+ u26 = 2*u25;
u27 = u25 - u9;
/* Stage 8 */
y[0] = u14 + u16 + u20;