blob: 546b1c1f6446a725dd541f4729999eaf872cb5d9 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <math.h>
13#include <string.h>
14
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070016#include "aom_dsp/inv_txfm.h"
17
Yaowu Xuf883b422016-08-30 14:01:10 -070018void aom_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070019 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
20 0.5 shifts per pixel. */
21 int i;
22 tran_low_t output[16];
23 tran_high_t a1, b1, c1, d1, e1;
24 const tran_low_t *ip = input;
25 tran_low_t *op = output;
26
27 for (i = 0; i < 4; i++) {
28 a1 = ip[0] >> UNIT_QUANT_SHIFT;
29 c1 = ip[1] >> UNIT_QUANT_SHIFT;
30 d1 = ip[2] >> UNIT_QUANT_SHIFT;
31 b1 = ip[3] >> UNIT_QUANT_SHIFT;
32 a1 += c1;
33 d1 -= b1;
34 e1 = (a1 - d1) >> 1;
35 b1 = e1 - b1;
36 c1 = e1 - c1;
37 a1 -= b1;
38 d1 += c1;
39 op[0] = WRAPLOW(a1);
40 op[1] = WRAPLOW(b1);
41 op[2] = WRAPLOW(c1);
42 op[3] = WRAPLOW(d1);
43 ip += 4;
44 op += 4;
45 }
46
47 ip = output;
48 for (i = 0; i < 4; i++) {
49 a1 = ip[4 * 0];
50 c1 = ip[4 * 1];
51 d1 = ip[4 * 2];
52 b1 = ip[4 * 3];
53 a1 += c1;
54 d1 -= b1;
55 e1 = (a1 - d1) >> 1;
56 b1 = e1 - b1;
57 c1 = e1 - c1;
58 a1 -= b1;
59 d1 += c1;
60 dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1));
61 dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1));
62 dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1));
63 dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1));
64
65 ip++;
66 dest++;
67 }
68}
69
Yaowu Xuf883b422016-08-30 14:01:10 -070070void aom_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070071 int i;
72 tran_high_t a1, e1;
73 tran_low_t tmp[4];
74 const tran_low_t *ip = in;
75 tran_low_t *op = tmp;
76
77 a1 = ip[0] >> UNIT_QUANT_SHIFT;
78 e1 = a1 >> 1;
79 a1 -= e1;
80 op[0] = WRAPLOW(a1);
81 op[1] = op[2] = op[3] = WRAPLOW(e1);
82
83 ip = tmp;
84 for (i = 0; i < 4; i++) {
85 e1 = ip[0] >> 1;
86 a1 = ip[0] - e1;
87 dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1);
88 dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1);
89 dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1);
90 dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1);
91 ip++;
92 dest++;
93 }
94}
95
Luca Barbatof0f98572016-09-03 12:14:15 +020096void aom_idct4_c(const tran_low_t *input, tran_low_t *output) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070097 tran_low_t step[4];
98 tran_high_t temp1, temp2;
99 // stage 1
100 temp1 = (input[0] + input[2]) * cospi_16_64;
101 temp2 = (input[0] - input[2]) * cospi_16_64;
102 step[0] = WRAPLOW(dct_const_round_shift(temp1));
103 step[1] = WRAPLOW(dct_const_round_shift(temp2));
104 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
105 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
106 step[2] = WRAPLOW(dct_const_round_shift(temp1));
107 step[3] = WRAPLOW(dct_const_round_shift(temp2));
108
109 // stage 2
110 output[0] = WRAPLOW(step[0] + step[3]);
111 output[1] = WRAPLOW(step[1] + step[2]);
112 output[2] = WRAPLOW(step[1] - step[2]);
113 output[3] = WRAPLOW(step[0] - step[3]);
114}
115
Yaowu Xuf883b422016-08-30 14:01:10 -0700116void aom_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700117 tran_low_t out[4 * 4];
118 tran_low_t *outptr = out;
119 int i, j;
120 tran_low_t temp_in[4], temp_out[4];
121
122 // Rows
123 for (i = 0; i < 4; ++i) {
Luca Barbatof0f98572016-09-03 12:14:15 +0200124 aom_idct4_c(input, outptr);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700125 input += 4;
126 outptr += 4;
127 }
128
129 // Columns
130 for (i = 0; i < 4; ++i) {
131 for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
Luca Barbatof0f98572016-09-03 12:14:15 +0200132 aom_idct4_c(temp_in, temp_out);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700133 for (j = 0; j < 4; ++j) {
134 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
135 ROUND_POWER_OF_TWO(temp_out[j], 4));
136 }
137 }
138}
139
Yaowu Xuf883b422016-08-30 14:01:10 -0700140void aom_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700141 int dest_stride) {
142 int i;
143 tran_high_t a1;
144 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
145 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
146 a1 = ROUND_POWER_OF_TWO(out, 4);
147
Yushin Cho27acc472017-03-30 11:58:20 -0700148 if (a1 == 0) return;
149
Yaowu Xuc27fc142016-08-22 16:08:15 -0700150 for (i = 0; i < 4; i++) {
151 dest[0] = clip_pixel_add(dest[0], a1);
152 dest[1] = clip_pixel_add(dest[1], a1);
153 dest[2] = clip_pixel_add(dest[2], a1);
154 dest[3] = clip_pixel_add(dest[3], a1);
155 dest += dest_stride;
156 }
157}
158
Luca Barbatof0f98572016-09-03 12:14:15 +0200159void aom_idct8_c(const tran_low_t *input, tran_low_t *output) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700160 tran_low_t step1[8], step2[8];
161 tran_high_t temp1, temp2;
162 // stage 1
163 step1[0] = input[0];
164 step1[2] = input[4];
165 step1[1] = input[2];
166 step1[3] = input[6];
167 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
168 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
169 step1[4] = WRAPLOW(dct_const_round_shift(temp1));
170 step1[7] = WRAPLOW(dct_const_round_shift(temp2));
171 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
172 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
173 step1[5] = WRAPLOW(dct_const_round_shift(temp1));
174 step1[6] = WRAPLOW(dct_const_round_shift(temp2));
175
176 // stage 2
177 temp1 = (step1[0] + step1[2]) * cospi_16_64;
178 temp2 = (step1[0] - step1[2]) * cospi_16_64;
179 step2[0] = WRAPLOW(dct_const_round_shift(temp1));
180 step2[1] = WRAPLOW(dct_const_round_shift(temp2));
181 temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
182 temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
183 step2[2] = WRAPLOW(dct_const_round_shift(temp1));
184 step2[3] = WRAPLOW(dct_const_round_shift(temp2));
185 step2[4] = WRAPLOW(step1[4] + step1[5]);
186 step2[5] = WRAPLOW(step1[4] - step1[5]);
187 step2[6] = WRAPLOW(-step1[6] + step1[7]);
188 step2[7] = WRAPLOW(step1[6] + step1[7]);
189
190 // stage 3
191 step1[0] = WRAPLOW(step2[0] + step2[3]);
192 step1[1] = WRAPLOW(step2[1] + step2[2]);
193 step1[2] = WRAPLOW(step2[1] - step2[2]);
194 step1[3] = WRAPLOW(step2[0] - step2[3]);
195 step1[4] = step2[4];
196 temp1 = (step2[6] - step2[5]) * cospi_16_64;
197 temp2 = (step2[5] + step2[6]) * cospi_16_64;
198 step1[5] = WRAPLOW(dct_const_round_shift(temp1));
199 step1[6] = WRAPLOW(dct_const_round_shift(temp2));
200 step1[7] = step2[7];
201
202 // stage 4
203 output[0] = WRAPLOW(step1[0] + step1[7]);
204 output[1] = WRAPLOW(step1[1] + step1[6]);
205 output[2] = WRAPLOW(step1[2] + step1[5]);
206 output[3] = WRAPLOW(step1[3] + step1[4]);
207 output[4] = WRAPLOW(step1[3] - step1[4]);
208 output[5] = WRAPLOW(step1[2] - step1[5]);
209 output[6] = WRAPLOW(step1[1] - step1[6]);
210 output[7] = WRAPLOW(step1[0] - step1[7]);
211}
212
Luca Barbatof0f98572016-09-03 12:14:15 +0200213void aom_iadst4_c(const tran_low_t *input, tran_low_t *output) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700214 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
215
216 tran_low_t x0 = input[0];
217 tran_low_t x1 = input[1];
218 tran_low_t x2 = input[2];
219 tran_low_t x3 = input[3];
220
221 if (!(x0 | x1 | x2 | x3)) {
222 output[0] = output[1] = output[2] = output[3] = 0;
223 return;
224 }
225
226 s0 = sinpi_1_9 * x0;
227 s1 = sinpi_2_9 * x0;
228 s2 = sinpi_3_9 * x1;
229 s3 = sinpi_4_9 * x2;
230 s4 = sinpi_1_9 * x2;
231 s5 = sinpi_2_9 * x3;
232 s6 = sinpi_4_9 * x3;
233 s7 = WRAPLOW(x0 - x2 + x3);
234
235 s0 = s0 + s3 + s5;
236 s1 = s1 - s4 - s6;
237 s3 = s2;
238 s2 = sinpi_3_9 * s7;
239
240 // 1-D transform scaling factor is sqrt(2).
241 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
242 // + 1b (addition) = 29b.
243 // Hence the output bit depth is 15b.
244 output[0] = WRAPLOW(dct_const_round_shift(s0 + s3));
245 output[1] = WRAPLOW(dct_const_round_shift(s1 + s3));
246 output[2] = WRAPLOW(dct_const_round_shift(s2));
247 output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3));
248}
249
Luca Barbatof0f98572016-09-03 12:14:15 +0200250void aom_iadst8_c(const tran_low_t *input, tran_low_t *output) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700251 int s0, s1, s2, s3, s4, s5, s6, s7;
252
253 tran_high_t x0 = input[7];
254 tran_high_t x1 = input[0];
255 tran_high_t x2 = input[5];
256 tran_high_t x3 = input[2];
257 tran_high_t x4 = input[3];
258 tran_high_t x5 = input[4];
259 tran_high_t x6 = input[1];
260 tran_high_t x7 = input[6];
261
262 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
263 output[0] = output[1] = output[2] = output[3] = output[4] = output[5] =
264 output[6] = output[7] = 0;
265 return;
266 }
267
268 // stage 1
269 s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1);
270 s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1);
271 s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3);
272 s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3);
273 s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5);
274 s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5);
275 s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7);
276 s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7);
277
278 x0 = WRAPLOW(dct_const_round_shift(s0 + s4));
279 x1 = WRAPLOW(dct_const_round_shift(s1 + s5));
280 x2 = WRAPLOW(dct_const_round_shift(s2 + s6));
281 x3 = WRAPLOW(dct_const_round_shift(s3 + s7));
282 x4 = WRAPLOW(dct_const_round_shift(s0 - s4));
283 x5 = WRAPLOW(dct_const_round_shift(s1 - s5));
284 x6 = WRAPLOW(dct_const_round_shift(s2 - s6));
285 x7 = WRAPLOW(dct_const_round_shift(s3 - s7));
286
287 // stage 2
288 s0 = (int)x0;
289 s1 = (int)x1;
290 s2 = (int)x2;
291 s3 = (int)x3;
292 s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5);
293 s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5);
294 s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7);
295 s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7);
296
297 x0 = WRAPLOW(s0 + s2);
298 x1 = WRAPLOW(s1 + s3);
299 x2 = WRAPLOW(s0 - s2);
300 x3 = WRAPLOW(s1 - s3);
301 x4 = WRAPLOW(dct_const_round_shift(s4 + s6));
302 x5 = WRAPLOW(dct_const_round_shift(s5 + s7));
303 x6 = WRAPLOW(dct_const_round_shift(s4 - s6));
304 x7 = WRAPLOW(dct_const_round_shift(s5 - s7));
305
306 // stage 3
307 s2 = (int)(cospi_16_64 * (x2 + x3));
308 s3 = (int)(cospi_16_64 * (x2 - x3));
309 s6 = (int)(cospi_16_64 * (x6 + x7));
310 s7 = (int)(cospi_16_64 * (x6 - x7));
311
312 x2 = WRAPLOW(dct_const_round_shift(s2));
313 x3 = WRAPLOW(dct_const_round_shift(s3));
314 x6 = WRAPLOW(dct_const_round_shift(s6));
315 x7 = WRAPLOW(dct_const_round_shift(s7));
316
317 output[0] = WRAPLOW(x0);
318 output[1] = WRAPLOW(-x4);
319 output[2] = WRAPLOW(x6);
320 output[3] = WRAPLOW(-x2);
321 output[4] = WRAPLOW(x3);
322 output[5] = WRAPLOW(-x7);
323 output[6] = WRAPLOW(x5);
324 output[7] = WRAPLOW(-x1);
325}
326
Luca Barbatof0f98572016-09-03 12:14:15 +0200327void aom_idct16_c(const tran_low_t *input, tran_low_t *output) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700328 tran_low_t step1[16], step2[16];
329 tran_high_t temp1, temp2;
330
331 // stage 1
332 step1[0] = input[0 / 2];
333 step1[1] = input[16 / 2];
334 step1[2] = input[8 / 2];
335 step1[3] = input[24 / 2];
336 step1[4] = input[4 / 2];
337 step1[5] = input[20 / 2];
338 step1[6] = input[12 / 2];
339 step1[7] = input[28 / 2];
340 step1[8] = input[2 / 2];
341 step1[9] = input[18 / 2];
342 step1[10] = input[10 / 2];
343 step1[11] = input[26 / 2];
344 step1[12] = input[6 / 2];
345 step1[13] = input[22 / 2];
346 step1[14] = input[14 / 2];
347 step1[15] = input[30 / 2];
348
349 // stage 2
350 step2[0] = step1[0];
351 step2[1] = step1[1];
352 step2[2] = step1[2];
353 step2[3] = step1[3];
354 step2[4] = step1[4];
355 step2[5] = step1[5];
356 step2[6] = step1[6];
357 step2[7] = step1[7];
358
359 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
360 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
361 step2[8] = WRAPLOW(dct_const_round_shift(temp1));
362 step2[15] = WRAPLOW(dct_const_round_shift(temp2));
363
364 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
365 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
366 step2[9] = WRAPLOW(dct_const_round_shift(temp1));
367 step2[14] = WRAPLOW(dct_const_round_shift(temp2));
368
369 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
370 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
371 step2[10] = WRAPLOW(dct_const_round_shift(temp1));
372 step2[13] = WRAPLOW(dct_const_round_shift(temp2));
373
374 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
375 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
376 step2[11] = WRAPLOW(dct_const_round_shift(temp1));
377 step2[12] = WRAPLOW(dct_const_round_shift(temp2));
378
379 // stage 3
380 step1[0] = step2[0];
381 step1[1] = step2[1];
382 step1[2] = step2[2];
383 step1[3] = step2[3];
384
385 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
386 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
387 step1[4] = WRAPLOW(dct_const_round_shift(temp1));
388 step1[7] = WRAPLOW(dct_const_round_shift(temp2));
389 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
390 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
391 step1[5] = WRAPLOW(dct_const_round_shift(temp1));
392 step1[6] = WRAPLOW(dct_const_round_shift(temp2));
393
394 step1[8] = WRAPLOW(step2[8] + step2[9]);
395 step1[9] = WRAPLOW(step2[8] - step2[9]);
396 step1[10] = WRAPLOW(-step2[10] + step2[11]);
397 step1[11] = WRAPLOW(step2[10] + step2[11]);
398 step1[12] = WRAPLOW(step2[12] + step2[13]);
399 step1[13] = WRAPLOW(step2[12] - step2[13]);
400 step1[14] = WRAPLOW(-step2[14] + step2[15]);
401 step1[15] = WRAPLOW(step2[14] + step2[15]);
402
403 // stage 4
404 temp1 = (step1[0] + step1[1]) * cospi_16_64;
405 temp2 = (step1[0] - step1[1]) * cospi_16_64;
406 step2[0] = WRAPLOW(dct_const_round_shift(temp1));
407 step2[1] = WRAPLOW(dct_const_round_shift(temp2));
408 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
409 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
410 step2[2] = WRAPLOW(dct_const_round_shift(temp1));
411 step2[3] = WRAPLOW(dct_const_round_shift(temp2));
412 step2[4] = WRAPLOW(step1[4] + step1[5]);
413 step2[5] = WRAPLOW(step1[4] - step1[5]);
414 step2[6] = WRAPLOW(-step1[6] + step1[7]);
415 step2[7] = WRAPLOW(step1[6] + step1[7]);
416
417 step2[8] = step1[8];
418 step2[15] = step1[15];
419 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
420 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
421 step2[9] = WRAPLOW(dct_const_round_shift(temp1));
422 step2[14] = WRAPLOW(dct_const_round_shift(temp2));
423 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
424 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
425 step2[10] = WRAPLOW(dct_const_round_shift(temp1));
426 step2[13] = WRAPLOW(dct_const_round_shift(temp2));
427 step2[11] = step1[11];
428 step2[12] = step1[12];
429
430 // stage 5
431 step1[0] = WRAPLOW(step2[0] + step2[3]);
432 step1[1] = WRAPLOW(step2[1] + step2[2]);
433 step1[2] = WRAPLOW(step2[1] - step2[2]);
434 step1[3] = WRAPLOW(step2[0] - step2[3]);
435 step1[4] = step2[4];
436 temp1 = (step2[6] - step2[5]) * cospi_16_64;
437 temp2 = (step2[5] + step2[6]) * cospi_16_64;
438 step1[5] = WRAPLOW(dct_const_round_shift(temp1));
439 step1[6] = WRAPLOW(dct_const_round_shift(temp2));
440 step1[7] = step2[7];
441
442 step1[8] = WRAPLOW(step2[8] + step2[11]);
443 step1[9] = WRAPLOW(step2[9] + step2[10]);
444 step1[10] = WRAPLOW(step2[9] - step2[10]);
445 step1[11] = WRAPLOW(step2[8] - step2[11]);
446 step1[12] = WRAPLOW(-step2[12] + step2[15]);
447 step1[13] = WRAPLOW(-step2[13] + step2[14]);
448 step1[14] = WRAPLOW(step2[13] + step2[14]);
449 step1[15] = WRAPLOW(step2[12] + step2[15]);
450
451 // stage 6
452 step2[0] = WRAPLOW(step1[0] + step1[7]);
453 step2[1] = WRAPLOW(step1[1] + step1[6]);
454 step2[2] = WRAPLOW(step1[2] + step1[5]);
455 step2[3] = WRAPLOW(step1[3] + step1[4]);
456 step2[4] = WRAPLOW(step1[3] - step1[4]);
457 step2[5] = WRAPLOW(step1[2] - step1[5]);
458 step2[6] = WRAPLOW(step1[1] - step1[6]);
459 step2[7] = WRAPLOW(step1[0] - step1[7]);
460 step2[8] = step1[8];
461 step2[9] = step1[9];
462 temp1 = (-step1[10] + step1[13]) * cospi_16_64;
463 temp2 = (step1[10] + step1[13]) * cospi_16_64;
464 step2[10] = WRAPLOW(dct_const_round_shift(temp1));
465 step2[13] = WRAPLOW(dct_const_round_shift(temp2));
466 temp1 = (-step1[11] + step1[12]) * cospi_16_64;
467 temp2 = (step1[11] + step1[12]) * cospi_16_64;
468 step2[11] = WRAPLOW(dct_const_round_shift(temp1));
469 step2[12] = WRAPLOW(dct_const_round_shift(temp2));
470 step2[14] = step1[14];
471 step2[15] = step1[15];
472
473 // stage 7
474 output[0] = WRAPLOW(step2[0] + step2[15]);
475 output[1] = WRAPLOW(step2[1] + step2[14]);
476 output[2] = WRAPLOW(step2[2] + step2[13]);
477 output[3] = WRAPLOW(step2[3] + step2[12]);
478 output[4] = WRAPLOW(step2[4] + step2[11]);
479 output[5] = WRAPLOW(step2[5] + step2[10]);
480 output[6] = WRAPLOW(step2[6] + step2[9]);
481 output[7] = WRAPLOW(step2[7] + step2[8]);
482 output[8] = WRAPLOW(step2[7] - step2[8]);
483 output[9] = WRAPLOW(step2[6] - step2[9]);
484 output[10] = WRAPLOW(step2[5] - step2[10]);
485 output[11] = WRAPLOW(step2[4] - step2[11]);
486 output[12] = WRAPLOW(step2[3] - step2[12]);
487 output[13] = WRAPLOW(step2[2] - step2[13]);
488 output[14] = WRAPLOW(step2[1] - step2[14]);
489 output[15] = WRAPLOW(step2[0] - step2[15]);
490}
491
Luca Barbatof0f98572016-09-03 12:14:15 +0200492void aom_iadst16_c(const tran_low_t *input, tran_low_t *output) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700493 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
494 tran_high_t s9, s10, s11, s12, s13, s14, s15;
495
496 tran_high_t x0 = input[15];
497 tran_high_t x1 = input[0];
498 tran_high_t x2 = input[13];
499 tran_high_t x3 = input[2];
500 tran_high_t x4 = input[11];
501 tran_high_t x5 = input[4];
502 tran_high_t x6 = input[9];
503 tran_high_t x7 = input[6];
504 tran_high_t x8 = input[7];
505 tran_high_t x9 = input[8];
506 tran_high_t x10 = input[5];
507 tran_high_t x11 = input[10];
508 tran_high_t x12 = input[3];
509 tran_high_t x13 = input[12];
510 tran_high_t x14 = input[1];
511 tran_high_t x15 = input[14];
512
513 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 |
514 x13 | x14 | x15)) {
515 output[0] = output[1] = output[2] = output[3] = output[4] = output[5] =
516 output[6] = output[7] = output[8] = output[9] = output[10] =
517 output[11] = output[12] = output[13] = output[14] = output[15] = 0;
518 return;
519 }
520
521 // stage 1
522 s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
523 s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
524 s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
525 s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
526 s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
527 s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
528 s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
529 s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
530 s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
531 s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
532 s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
533 s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
534 s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
535 s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
536 s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
537 s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
538
539 x0 = WRAPLOW(dct_const_round_shift(s0 + s8));
540 x1 = WRAPLOW(dct_const_round_shift(s1 + s9));
541 x2 = WRAPLOW(dct_const_round_shift(s2 + s10));
542 x3 = WRAPLOW(dct_const_round_shift(s3 + s11));
543 x4 = WRAPLOW(dct_const_round_shift(s4 + s12));
544 x5 = WRAPLOW(dct_const_round_shift(s5 + s13));
545 x6 = WRAPLOW(dct_const_round_shift(s6 + s14));
546 x7 = WRAPLOW(dct_const_round_shift(s7 + s15));
547 x8 = WRAPLOW(dct_const_round_shift(s0 - s8));
548 x9 = WRAPLOW(dct_const_round_shift(s1 - s9));
549 x10 = WRAPLOW(dct_const_round_shift(s2 - s10));
550 x11 = WRAPLOW(dct_const_round_shift(s3 - s11));
551 x12 = WRAPLOW(dct_const_round_shift(s4 - s12));
552 x13 = WRAPLOW(dct_const_round_shift(s5 - s13));
553 x14 = WRAPLOW(dct_const_round_shift(s6 - s14));
554 x15 = WRAPLOW(dct_const_round_shift(s7 - s15));
555
556 // stage 2
557 s0 = x0;
558 s1 = x1;
559 s2 = x2;
560 s3 = x3;
561 s4 = x4;
562 s5 = x5;
563 s6 = x6;
564 s7 = x7;
565 s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
566 s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
567 s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
568 s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
569 s12 = -x12 * cospi_28_64 + x13 * cospi_4_64;
570 s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
571 s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
572 s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
573
574 x0 = WRAPLOW(s0 + s4);
575 x1 = WRAPLOW(s1 + s5);
576 x2 = WRAPLOW(s2 + s6);
577 x3 = WRAPLOW(s3 + s7);
578 x4 = WRAPLOW(s0 - s4);
579 x5 = WRAPLOW(s1 - s5);
580 x6 = WRAPLOW(s2 - s6);
581 x7 = WRAPLOW(s3 - s7);
582 x8 = WRAPLOW(dct_const_round_shift(s8 + s12));
583 x9 = WRAPLOW(dct_const_round_shift(s9 + s13));
584 x10 = WRAPLOW(dct_const_round_shift(s10 + s14));
585 x11 = WRAPLOW(dct_const_round_shift(s11 + s15));
586 x12 = WRAPLOW(dct_const_round_shift(s8 - s12));
587 x13 = WRAPLOW(dct_const_round_shift(s9 - s13));
588 x14 = WRAPLOW(dct_const_round_shift(s10 - s14));
589 x15 = WRAPLOW(dct_const_round_shift(s11 - s15));
590
591 // stage 3
592 s0 = x0;
593 s1 = x1;
594 s2 = x2;
595 s3 = x3;
596 s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
597 s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
598 s6 = -x6 * cospi_24_64 + x7 * cospi_8_64;
599 s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
600 s8 = x8;
601 s9 = x9;
602 s10 = x10;
603 s11 = x11;
604 s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
605 s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
606 s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
607 s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
608
609 x0 = WRAPLOW(s0 + s2);
610 x1 = WRAPLOW(s1 + s3);
611 x2 = WRAPLOW(s0 - s2);
612 x3 = WRAPLOW(s1 - s3);
613 x4 = WRAPLOW(dct_const_round_shift(s4 + s6));
614 x5 = WRAPLOW(dct_const_round_shift(s5 + s7));
615 x6 = WRAPLOW(dct_const_round_shift(s4 - s6));
616 x7 = WRAPLOW(dct_const_round_shift(s5 - s7));
617 x8 = WRAPLOW(s8 + s10);
618 x9 = WRAPLOW(s9 + s11);
619 x10 = WRAPLOW(s8 - s10);
620 x11 = WRAPLOW(s9 - s11);
621 x12 = WRAPLOW(dct_const_round_shift(s12 + s14));
622 x13 = WRAPLOW(dct_const_round_shift(s13 + s15));
623 x14 = WRAPLOW(dct_const_round_shift(s12 - s14));
624 x15 = WRAPLOW(dct_const_round_shift(s13 - s15));
625
626 // stage 4
627 s2 = (-cospi_16_64) * (x2 + x3);
628 s3 = cospi_16_64 * (x2 - x3);
629 s6 = cospi_16_64 * (x6 + x7);
630 s7 = cospi_16_64 * (-x6 + x7);
631 s10 = cospi_16_64 * (x10 + x11);
632 s11 = cospi_16_64 * (-x10 + x11);
633 s14 = (-cospi_16_64) * (x14 + x15);
634 s15 = cospi_16_64 * (x14 - x15);
635
636 x2 = WRAPLOW(dct_const_round_shift(s2));
637 x3 = WRAPLOW(dct_const_round_shift(s3));
638 x6 = WRAPLOW(dct_const_round_shift(s6));
639 x7 = WRAPLOW(dct_const_round_shift(s7));
640 x10 = WRAPLOW(dct_const_round_shift(s10));
641 x11 = WRAPLOW(dct_const_round_shift(s11));
642 x14 = WRAPLOW(dct_const_round_shift(s14));
643 x15 = WRAPLOW(dct_const_round_shift(s15));
644
645 output[0] = WRAPLOW(x0);
646 output[1] = WRAPLOW(-x8);
647 output[2] = WRAPLOW(x12);
648 output[3] = WRAPLOW(-x4);
649 output[4] = WRAPLOW(x6);
650 output[5] = WRAPLOW(x14);
651 output[6] = WRAPLOW(x10);
652 output[7] = WRAPLOW(x2);
653 output[8] = WRAPLOW(x3);
654 output[9] = WRAPLOW(x11);
655 output[10] = WRAPLOW(x15);
656 output[11] = WRAPLOW(x7);
657 output[12] = WRAPLOW(x5);
658 output[13] = WRAPLOW(-x13);
659 output[14] = WRAPLOW(x9);
660 output[15] = WRAPLOW(-x1);
661}
662
Luca Barbatof0f98572016-09-03 12:14:15 +0200663void aom_idct32_c(const tran_low_t *input, tran_low_t *output) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700664 tran_low_t step1[32], step2[32];
665 tran_high_t temp1, temp2;
666
667 // stage 1
668 step1[0] = input[0];
669 step1[1] = input[16];
670 step1[2] = input[8];
671 step1[3] = input[24];
672 step1[4] = input[4];
673 step1[5] = input[20];
674 step1[6] = input[12];
675 step1[7] = input[28];
676 step1[8] = input[2];
677 step1[9] = input[18];
678 step1[10] = input[10];
679 step1[11] = input[26];
680 step1[12] = input[6];
681 step1[13] = input[22];
682 step1[14] = input[14];
683 step1[15] = input[30];
684
685 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
686 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
687 step1[16] = WRAPLOW(dct_const_round_shift(temp1));
688 step1[31] = WRAPLOW(dct_const_round_shift(temp2));
689
690 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
691 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
692 step1[17] = WRAPLOW(dct_const_round_shift(temp1));
693 step1[30] = WRAPLOW(dct_const_round_shift(temp2));
694
695 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
696 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
697 step1[18] = WRAPLOW(dct_const_round_shift(temp1));
698 step1[29] = WRAPLOW(dct_const_round_shift(temp2));
699
700 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
701 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
702 step1[19] = WRAPLOW(dct_const_round_shift(temp1));
703 step1[28] = WRAPLOW(dct_const_round_shift(temp2));
704
705 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
706 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
707 step1[20] = WRAPLOW(dct_const_round_shift(temp1));
708 step1[27] = WRAPLOW(dct_const_round_shift(temp2));
709
710 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
711 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
712 step1[21] = WRAPLOW(dct_const_round_shift(temp1));
713 step1[26] = WRAPLOW(dct_const_round_shift(temp2));
714
715 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
716 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
717 step1[22] = WRAPLOW(dct_const_round_shift(temp1));
718 step1[25] = WRAPLOW(dct_const_round_shift(temp2));
719
720 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
721 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
722 step1[23] = WRAPLOW(dct_const_round_shift(temp1));
723 step1[24] = WRAPLOW(dct_const_round_shift(temp2));
724
725 // stage 2
726 step2[0] = step1[0];
727 step2[1] = step1[1];
728 step2[2] = step1[2];
729 step2[3] = step1[3];
730 step2[4] = step1[4];
731 step2[5] = step1[5];
732 step2[6] = step1[6];
733 step2[7] = step1[7];
734
735 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
736 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
737 step2[8] = WRAPLOW(dct_const_round_shift(temp1));
738 step2[15] = WRAPLOW(dct_const_round_shift(temp2));
739
740 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
741 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
742 step2[9] = WRAPLOW(dct_const_round_shift(temp1));
743 step2[14] = WRAPLOW(dct_const_round_shift(temp2));
744
745 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
746 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
747 step2[10] = WRAPLOW(dct_const_round_shift(temp1));
748 step2[13] = WRAPLOW(dct_const_round_shift(temp2));
749
750 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
751 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
752 step2[11] = WRAPLOW(dct_const_round_shift(temp1));
753 step2[12] = WRAPLOW(dct_const_round_shift(temp2));
754
755 step2[16] = WRAPLOW(step1[16] + step1[17]);
756 step2[17] = WRAPLOW(step1[16] - step1[17]);
757 step2[18] = WRAPLOW(-step1[18] + step1[19]);
758 step2[19] = WRAPLOW(step1[18] + step1[19]);
759 step2[20] = WRAPLOW(step1[20] + step1[21]);
760 step2[21] = WRAPLOW(step1[20] - step1[21]);
761 step2[22] = WRAPLOW(-step1[22] + step1[23]);
762 step2[23] = WRAPLOW(step1[22] + step1[23]);
763 step2[24] = WRAPLOW(step1[24] + step1[25]);
764 step2[25] = WRAPLOW(step1[24] - step1[25]);
765 step2[26] = WRAPLOW(-step1[26] + step1[27]);
766 step2[27] = WRAPLOW(step1[26] + step1[27]);
767 step2[28] = WRAPLOW(step1[28] + step1[29]);
768 step2[29] = WRAPLOW(step1[28] - step1[29]);
769 step2[30] = WRAPLOW(-step1[30] + step1[31]);
770 step2[31] = WRAPLOW(step1[30] + step1[31]);
771
772 // stage 3
773 step1[0] = step2[0];
774 step1[1] = step2[1];
775 step1[2] = step2[2];
776 step1[3] = step2[3];
777
778 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
779 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
780 step1[4] = WRAPLOW(dct_const_round_shift(temp1));
781 step1[7] = WRAPLOW(dct_const_round_shift(temp2));
782 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
783 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
784 step1[5] = WRAPLOW(dct_const_round_shift(temp1));
785 step1[6] = WRAPLOW(dct_const_round_shift(temp2));
786
787 step1[8] = WRAPLOW(step2[8] + step2[9]);
788 step1[9] = WRAPLOW(step2[8] - step2[9]);
789 step1[10] = WRAPLOW(-step2[10] + step2[11]);
790 step1[11] = WRAPLOW(step2[10] + step2[11]);
791 step1[12] = WRAPLOW(step2[12] + step2[13]);
792 step1[13] = WRAPLOW(step2[12] - step2[13]);
793 step1[14] = WRAPLOW(-step2[14] + step2[15]);
794 step1[15] = WRAPLOW(step2[14] + step2[15]);
795
796 step1[16] = step2[16];
797 step1[31] = step2[31];
798 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
799 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
800 step1[17] = WRAPLOW(dct_const_round_shift(temp1));
801 step1[30] = WRAPLOW(dct_const_round_shift(temp2));
802 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
803 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
804 step1[18] = WRAPLOW(dct_const_round_shift(temp1));
805 step1[29] = WRAPLOW(dct_const_round_shift(temp2));
806 step1[19] = step2[19];
807 step1[20] = step2[20];
808 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
809 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
810 step1[21] = WRAPLOW(dct_const_round_shift(temp1));
811 step1[26] = WRAPLOW(dct_const_round_shift(temp2));
812 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
813 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
814 step1[22] = WRAPLOW(dct_const_round_shift(temp1));
815 step1[25] = WRAPLOW(dct_const_round_shift(temp2));
816 step1[23] = step2[23];
817 step1[24] = step2[24];
818 step1[27] = step2[27];
819 step1[28] = step2[28];
820
821 // stage 4
822 temp1 = (step1[0] + step1[1]) * cospi_16_64;
823 temp2 = (step1[0] - step1[1]) * cospi_16_64;
824 step2[0] = WRAPLOW(dct_const_round_shift(temp1));
825 step2[1] = WRAPLOW(dct_const_round_shift(temp2));
826 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
827 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
828 step2[2] = WRAPLOW(dct_const_round_shift(temp1));
829 step2[3] = WRAPLOW(dct_const_round_shift(temp2));
830 step2[4] = WRAPLOW(step1[4] + step1[5]);
831 step2[5] = WRAPLOW(step1[4] - step1[5]);
832 step2[6] = WRAPLOW(-step1[6] + step1[7]);
833 step2[7] = WRAPLOW(step1[6] + step1[7]);
834
835 step2[8] = step1[8];
836 step2[15] = step1[15];
837 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
838 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
839 step2[9] = WRAPLOW(dct_const_round_shift(temp1));
840 step2[14] = WRAPLOW(dct_const_round_shift(temp2));
841 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
842 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
843 step2[10] = WRAPLOW(dct_const_round_shift(temp1));
844 step2[13] = WRAPLOW(dct_const_round_shift(temp2));
845 step2[11] = step1[11];
846 step2[12] = step1[12];
847
848 step2[16] = WRAPLOW(step1[16] + step1[19]);
849 step2[17] = WRAPLOW(step1[17] + step1[18]);
850 step2[18] = WRAPLOW(step1[17] - step1[18]);
851 step2[19] = WRAPLOW(step1[16] - step1[19]);
852 step2[20] = WRAPLOW(-step1[20] + step1[23]);
853 step2[21] = WRAPLOW(-step1[21] + step1[22]);
854 step2[22] = WRAPLOW(step1[21] + step1[22]);
855 step2[23] = WRAPLOW(step1[20] + step1[23]);
856
857 step2[24] = WRAPLOW(step1[24] + step1[27]);
858 step2[25] = WRAPLOW(step1[25] + step1[26]);
859 step2[26] = WRAPLOW(step1[25] - step1[26]);
860 step2[27] = WRAPLOW(step1[24] - step1[27]);
861 step2[28] = WRAPLOW(-step1[28] + step1[31]);
862 step2[29] = WRAPLOW(-step1[29] + step1[30]);
863 step2[30] = WRAPLOW(step1[29] + step1[30]);
864 step2[31] = WRAPLOW(step1[28] + step1[31]);
865
866 // stage 5
867 step1[0] = WRAPLOW(step2[0] + step2[3]);
868 step1[1] = WRAPLOW(step2[1] + step2[2]);
869 step1[2] = WRAPLOW(step2[1] - step2[2]);
870 step1[3] = WRAPLOW(step2[0] - step2[3]);
871 step1[4] = step2[4];
872 temp1 = (step2[6] - step2[5]) * cospi_16_64;
873 temp2 = (step2[5] + step2[6]) * cospi_16_64;
874 step1[5] = WRAPLOW(dct_const_round_shift(temp1));
875 step1[6] = WRAPLOW(dct_const_round_shift(temp2));
876 step1[7] = step2[7];
877
878 step1[8] = WRAPLOW(step2[8] + step2[11]);
879 step1[9] = WRAPLOW(step2[9] + step2[10]);
880 step1[10] = WRAPLOW(step2[9] - step2[10]);
881 step1[11] = WRAPLOW(step2[8] - step2[11]);
882 step1[12] = WRAPLOW(-step2[12] + step2[15]);
883 step1[13] = WRAPLOW(-step2[13] + step2[14]);
884 step1[14] = WRAPLOW(step2[13] + step2[14]);
885 step1[15] = WRAPLOW(step2[12] + step2[15]);
886
887 step1[16] = step2[16];
888 step1[17] = step2[17];
889 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
890 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
891 step1[18] = WRAPLOW(dct_const_round_shift(temp1));
892 step1[29] = WRAPLOW(dct_const_round_shift(temp2));
893 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
894 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
895 step1[19] = WRAPLOW(dct_const_round_shift(temp1));
896 step1[28] = WRAPLOW(dct_const_round_shift(temp2));
897 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
898 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
899 step1[20] = WRAPLOW(dct_const_round_shift(temp1));
900 step1[27] = WRAPLOW(dct_const_round_shift(temp2));
901 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
902 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
903 step1[21] = WRAPLOW(dct_const_round_shift(temp1));
904 step1[26] = WRAPLOW(dct_const_round_shift(temp2));
905 step1[22] = step2[22];
906 step1[23] = step2[23];
907 step1[24] = step2[24];
908 step1[25] = step2[25];
909 step1[30] = step2[30];
910 step1[31] = step2[31];
911
912 // stage 6
913 step2[0] = WRAPLOW(step1[0] + step1[7]);
914 step2[1] = WRAPLOW(step1[1] + step1[6]);
915 step2[2] = WRAPLOW(step1[2] + step1[5]);
916 step2[3] = WRAPLOW(step1[3] + step1[4]);
917 step2[4] = WRAPLOW(step1[3] - step1[4]);
918 step2[5] = WRAPLOW(step1[2] - step1[5]);
919 step2[6] = WRAPLOW(step1[1] - step1[6]);
920 step2[7] = WRAPLOW(step1[0] - step1[7]);
921 step2[8] = step1[8];
922 step2[9] = step1[9];
923 temp1 = (-step1[10] + step1[13]) * cospi_16_64;
924 temp2 = (step1[10] + step1[13]) * cospi_16_64;
925 step2[10] = WRAPLOW(dct_const_round_shift(temp1));
926 step2[13] = WRAPLOW(dct_const_round_shift(temp2));
927 temp1 = (-step1[11] + step1[12]) * cospi_16_64;
928 temp2 = (step1[11] + step1[12]) * cospi_16_64;
929 step2[11] = WRAPLOW(dct_const_round_shift(temp1));
930 step2[12] = WRAPLOW(dct_const_round_shift(temp2));
931 step2[14] = step1[14];
932 step2[15] = step1[15];
933
934 step2[16] = WRAPLOW(step1[16] + step1[23]);
935 step2[17] = WRAPLOW(step1[17] + step1[22]);
936 step2[18] = WRAPLOW(step1[18] + step1[21]);
937 step2[19] = WRAPLOW(step1[19] + step1[20]);
938 step2[20] = WRAPLOW(step1[19] - step1[20]);
939 step2[21] = WRAPLOW(step1[18] - step1[21]);
940 step2[22] = WRAPLOW(step1[17] - step1[22]);
941 step2[23] = WRAPLOW(step1[16] - step1[23]);
942
943 step2[24] = WRAPLOW(-step1[24] + step1[31]);
944 step2[25] = WRAPLOW(-step1[25] + step1[30]);
945 step2[26] = WRAPLOW(-step1[26] + step1[29]);
946 step2[27] = WRAPLOW(-step1[27] + step1[28]);
947 step2[28] = WRAPLOW(step1[27] + step1[28]);
948 step2[29] = WRAPLOW(step1[26] + step1[29]);
949 step2[30] = WRAPLOW(step1[25] + step1[30]);
950 step2[31] = WRAPLOW(step1[24] + step1[31]);
951
952 // stage 7
953 step1[0] = WRAPLOW(step2[0] + step2[15]);
954 step1[1] = WRAPLOW(step2[1] + step2[14]);
955 step1[2] = WRAPLOW(step2[2] + step2[13]);
956 step1[3] = WRAPLOW(step2[3] + step2[12]);
957 step1[4] = WRAPLOW(step2[4] + step2[11]);
958 step1[5] = WRAPLOW(step2[5] + step2[10]);
959 step1[6] = WRAPLOW(step2[6] + step2[9]);
960 step1[7] = WRAPLOW(step2[7] + step2[8]);
961 step1[8] = WRAPLOW(step2[7] - step2[8]);
962 step1[9] = WRAPLOW(step2[6] - step2[9]);
963 step1[10] = WRAPLOW(step2[5] - step2[10]);
964 step1[11] = WRAPLOW(step2[4] - step2[11]);
965 step1[12] = WRAPLOW(step2[3] - step2[12]);
966 step1[13] = WRAPLOW(step2[2] - step2[13]);
967 step1[14] = WRAPLOW(step2[1] - step2[14]);
968 step1[15] = WRAPLOW(step2[0] - step2[15]);
969
970 step1[16] = step2[16];
971 step1[17] = step2[17];
972 step1[18] = step2[18];
973 step1[19] = step2[19];
974 temp1 = (-step2[20] + step2[27]) * cospi_16_64;
975 temp2 = (step2[20] + step2[27]) * cospi_16_64;
976 step1[20] = WRAPLOW(dct_const_round_shift(temp1));
977 step1[27] = WRAPLOW(dct_const_round_shift(temp2));
978 temp1 = (-step2[21] + step2[26]) * cospi_16_64;
979 temp2 = (step2[21] + step2[26]) * cospi_16_64;
980 step1[21] = WRAPLOW(dct_const_round_shift(temp1));
981 step1[26] = WRAPLOW(dct_const_round_shift(temp2));
982 temp1 = (-step2[22] + step2[25]) * cospi_16_64;
983 temp2 = (step2[22] + step2[25]) * cospi_16_64;
984 step1[22] = WRAPLOW(dct_const_round_shift(temp1));
985 step1[25] = WRAPLOW(dct_const_round_shift(temp2));
986 temp1 = (-step2[23] + step2[24]) * cospi_16_64;
987 temp2 = (step2[23] + step2[24]) * cospi_16_64;
988 step1[23] = WRAPLOW(dct_const_round_shift(temp1));
989 step1[24] = WRAPLOW(dct_const_round_shift(temp2));
990 step1[28] = step2[28];
991 step1[29] = step2[29];
992 step1[30] = step2[30];
993 step1[31] = step2[31];
994
995 // final stage
996 output[0] = WRAPLOW(step1[0] + step1[31]);
997 output[1] = WRAPLOW(step1[1] + step1[30]);
998 output[2] = WRAPLOW(step1[2] + step1[29]);
999 output[3] = WRAPLOW(step1[3] + step1[28]);
1000 output[4] = WRAPLOW(step1[4] + step1[27]);
1001 output[5] = WRAPLOW(step1[5] + step1[26]);
1002 output[6] = WRAPLOW(step1[6] + step1[25]);
1003 output[7] = WRAPLOW(step1[7] + step1[24]);
1004 output[8] = WRAPLOW(step1[8] + step1[23]);
1005 output[9] = WRAPLOW(step1[9] + step1[22]);
1006 output[10] = WRAPLOW(step1[10] + step1[21]);
1007 output[11] = WRAPLOW(step1[11] + step1[20]);
1008 output[12] = WRAPLOW(step1[12] + step1[19]);
1009 output[13] = WRAPLOW(step1[13] + step1[18]);
1010 output[14] = WRAPLOW(step1[14] + step1[17]);
1011 output[15] = WRAPLOW(step1[15] + step1[16]);
1012 output[16] = WRAPLOW(step1[15] - step1[16]);
1013 output[17] = WRAPLOW(step1[14] - step1[17]);
1014 output[18] = WRAPLOW(step1[13] - step1[18]);
1015 output[19] = WRAPLOW(step1[12] - step1[19]);
1016 output[20] = WRAPLOW(step1[11] - step1[20]);
1017 output[21] = WRAPLOW(step1[10] - step1[21]);
1018 output[22] = WRAPLOW(step1[9] - step1[22]);
1019 output[23] = WRAPLOW(step1[8] - step1[23]);
1020 output[24] = WRAPLOW(step1[7] - step1[24]);
1021 output[25] = WRAPLOW(step1[6] - step1[25]);
1022 output[26] = WRAPLOW(step1[5] - step1[26]);
1023 output[27] = WRAPLOW(step1[4] - step1[27]);
1024 output[28] = WRAPLOW(step1[3] - step1[28]);
1025 output[29] = WRAPLOW(step1[2] - step1[29]);
1026 output[30] = WRAPLOW(step1[1] - step1[30]);
1027 output[31] = WRAPLOW(step1[0] - step1[31]);
1028}
Yaowu Xuf883b422016-08-30 14:01:10 -07001029void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001030 int stride, int bd) {
1031 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
1032 0.5 shifts per pixel. */
1033 int i;
1034 tran_low_t output[16];
1035 tran_high_t a1, b1, c1, d1, e1;
1036 const tran_low_t *ip = input;
1037 tran_low_t *op = output;
1038 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1039
1040 for (i = 0; i < 4; i++) {
1041 a1 = ip[0] >> UNIT_QUANT_SHIFT;
1042 c1 = ip[1] >> UNIT_QUANT_SHIFT;
1043 d1 = ip[2] >> UNIT_QUANT_SHIFT;
1044 b1 = ip[3] >> UNIT_QUANT_SHIFT;
1045 a1 += c1;
1046 d1 -= b1;
1047 e1 = (a1 - d1) >> 1;
1048 b1 = e1 - b1;
1049 c1 = e1 - c1;
1050 a1 -= b1;
1051 d1 += c1;
1052 op[0] = HIGHBD_WRAPLOW(a1, bd);
1053 op[1] = HIGHBD_WRAPLOW(b1, bd);
1054 op[2] = HIGHBD_WRAPLOW(c1, bd);
1055 op[3] = HIGHBD_WRAPLOW(d1, bd);
1056 ip += 4;
1057 op += 4;
1058 }
1059
1060 ip = output;
1061 for (i = 0; i < 4; i++) {
1062 a1 = ip[4 * 0];
1063 c1 = ip[4 * 1];
1064 d1 = ip[4 * 2];
1065 b1 = ip[4 * 3];
1066 a1 += c1;
1067 d1 -= b1;
1068 e1 = (a1 - d1) >> 1;
1069 b1 = e1 - b1;
1070 c1 = e1 - c1;
1071 a1 -= b1;
1072 d1 += c1;
1073 dest[stride * 0] =
1074 highbd_clip_pixel_add(dest[stride * 0], HIGHBD_WRAPLOW(a1, bd), bd);
1075 dest[stride * 1] =
1076 highbd_clip_pixel_add(dest[stride * 1], HIGHBD_WRAPLOW(b1, bd), bd);
1077 dest[stride * 2] =
1078 highbd_clip_pixel_add(dest[stride * 2], HIGHBD_WRAPLOW(c1, bd), bd);
1079 dest[stride * 3] =
1080 highbd_clip_pixel_add(dest[stride * 3], HIGHBD_WRAPLOW(d1, bd), bd);
1081
1082 ip++;
1083 dest++;
1084 }
1085}
1086
Yaowu Xuf883b422016-08-30 14:01:10 -07001087void aom_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001088 int dest_stride, int bd) {
1089 int i;
1090 tran_high_t a1, e1;
1091 tran_low_t tmp[4];
1092 const tran_low_t *ip = in;
1093 tran_low_t *op = tmp;
1094 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1095 (void)bd;
1096
1097 a1 = ip[0] >> UNIT_QUANT_SHIFT;
1098 e1 = a1 >> 1;
1099 a1 -= e1;
1100 op[0] = HIGHBD_WRAPLOW(a1, bd);
1101 op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd);
1102
1103 ip = tmp;
1104 for (i = 0; i < 4; i++) {
1105 e1 = ip[0] >> 1;
1106 a1 = ip[0] - e1;
1107 dest[dest_stride * 0] =
1108 highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd);
1109 dest[dest_stride * 1] =
1110 highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd);
1111 dest[dest_stride * 2] =
1112 highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd);
1113 dest[dest_stride * 3] =
1114 highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd);
1115 ip++;
1116 dest++;
1117 }
1118}