blob: eedbc79800e963bbbe6e472d0378544464c6ef1f [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <math.h>
13
Yaowu Xuf883b422016-08-30 14:01:10 -070014#include "./aom_dsp_rtcd.h"
Geza Lorea1ddae52016-09-02 09:51:34 +010015#include "./av1_rtcd.h"
16#include "aom_dsp/inv_txfm.h"
17#include "aom_ports/mem.h"
18#include "av1/common/av1_inv_txfm2d_cfg.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070019#include "av1/common/blockd.h"
20#include "av1/common/enums.h"
21#include "av1/common/idct.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070022
23int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
24 const TX_SIZE tx_size) {
25 (void)tx_type;
Yaowu Xuf883b422016-08-30 14:01:10 -070026#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070027 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
28 return txsize_sqr_up_map[tx_size] == TX_32X32;
29 }
30#else
31 (void)xd;
32#endif
33 return txsize_sqr_up_map[tx_size] == TX_32X32;
34}
35
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070036// NOTE: The implementation of all inverses need to be aware of the fact
37// that input and output could be the same buffer.
38
Yaowu Xuc27fc142016-08-22 16:08:15 -070039#if CONFIG_EXT_TX
40static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
41 int i;
42 for (i = 0; i < 4; ++i)
43 output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
44}
45
46static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
47 int i;
48 for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
49}
50
51static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
52 int i;
53 for (i = 0; i < 16; ++i)
54 output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
55}
56
57static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
58 int i;
59 for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
60}
61
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070062// For use in lieu of ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -070063static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
64 int i;
65 tran_low_t inputhalf[16];
Yaowu Xuc27fc142016-08-22 16:08:15 -070066 // Multiply input by sqrt(2)
67 for (i = 0; i < 16; ++i) {
68 inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
69 }
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070070 for (i = 0; i < 16; ++i) {
71 output[i] = input[16 + i] * 4;
72 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070073 idct16_c(inputhalf, output + 16);
74 // Note overall scaling factor is 4 times orthogonal
75}
76
Yaowu Xuf883b422016-08-30 14:01:10 -070077#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070078static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
79 int bd) {
80 int i;
81 for (i = 0; i < 4; ++i)
82 output[i] =
83 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
84}
85
86static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
87 int bd) {
88 int i;
89 (void)bd;
90 for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
91}
92
93static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
94 int bd) {
95 int i;
96 for (i = 0; i < 16; ++i)
97 output[i] =
98 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
99}
100
101static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
102 int bd) {
103 int i;
104 (void)bd;
105 for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
106}
107
108static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
109 int bd) {
110 int i;
111 tran_low_t inputhalf[16];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700112 // Multiply input by sqrt(2)
113 for (i = 0; i < 16; ++i) {
114 inputhalf[i] =
115 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
116 }
Debargha Mukherjeee52816b2016-10-12 10:49:29 -0700117 for (i = 0; i < 16; ++i) {
118 output[i] = input[16 + i] * 4;
119 }
Yaowu Xuf883b422016-08-30 14:01:10 -0700120 aom_highbd_idct16_c(inputhalf, output + 16, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700121 // Note overall scaling factor is 4 times orthogonal
122}
Yaowu Xuf883b422016-08-30 14:01:10 -0700123#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700124
125// Inverse identity transform and add.
126static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
127 int bs, int tx_type) {
128 int r, c;
129 const int shift = bs < 32 ? 3 : 2;
130 if (tx_type == IDTX) {
131 for (r = 0; r < bs; ++r) {
132 for (c = 0; c < bs; ++c)
133 dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
134 dest += stride;
135 input += bs;
136 }
137 }
138}
139
140#define FLIPUD_PTR(dest, stride, size) \
141 do { \
142 (dest) = (dest) + ((size)-1) * (stride); \
143 (stride) = -(stride); \
144 } while (0)
145
146static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
147 int *sstride, int tx_type, int sizey,
148 int sizex) {
149 // Note that the transpose of src will be added to dst. In order to LR
150 // flip the addends (in dst coordinates), we UD flip the src. To UD flip
151 // the addends, we UD flip the dst.
152 switch (tx_type) {
153 case DCT_DCT:
154 case ADST_DCT:
155 case DCT_ADST:
156 case ADST_ADST:
157 case IDTX:
158 case V_DCT:
159 case H_DCT:
160 case V_ADST:
161 case H_ADST: break;
162 case FLIPADST_DCT:
163 case FLIPADST_ADST:
164 case V_FLIPADST:
165 // flip UD
166 FLIPUD_PTR(*dst, *dstride, sizey);
167 break;
168 case DCT_FLIPADST:
169 case ADST_FLIPADST:
170 case H_FLIPADST:
171 // flip LR
172 FLIPUD_PTR(*src, *sstride, sizex);
173 break;
174 case FLIPADST_FLIPADST:
175 // flip UD
176 FLIPUD_PTR(*dst, *dstride, sizey);
177 // flip LR
178 FLIPUD_PTR(*src, *sstride, sizex);
179 break;
180 default: assert(0); break;
181 }
182}
183
Yaowu Xuf883b422016-08-30 14:01:10 -0700184#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700185static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
186 int stride, int bs, int tx_type, int bd) {
187 int r, c;
188 const int shift = bs < 32 ? 3 : 2;
189 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
190
191 if (tx_type == IDTX) {
192 for (r = 0; r < bs; ++r) {
193 for (c = 0; c < bs; ++c)
194 dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
195 dest += stride;
196 input += bs;
197 }
198 }
199}
200
201static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
202 int *sstride, int tx_type, int sizey,
203 int sizex) {
204 // Note that the transpose of src will be added to dst. In order to LR
205 // flip the addends (in dst coordinates), we UD flip the src. To UD flip
206 // the addends, we UD flip the dst.
207 switch (tx_type) {
208 case DCT_DCT:
209 case ADST_DCT:
210 case DCT_ADST:
211 case ADST_ADST:
212 case IDTX:
213 case V_DCT:
214 case H_DCT:
215 case V_ADST:
216 case H_ADST: break;
217 case FLIPADST_DCT:
218 case FLIPADST_ADST:
219 case V_FLIPADST:
220 // flip UD
221 FLIPUD_PTR(*dst, *dstride, sizey);
222 break;
223 case DCT_FLIPADST:
224 case ADST_FLIPADST:
225 case H_FLIPADST:
226 // flip LR
227 FLIPUD_PTR(*src, *sstride, sizex);
228 break;
229 case FLIPADST_FLIPADST:
230 // flip UD
231 FLIPUD_PTR(*dst, *dstride, sizey);
232 // flip LR
233 FLIPUD_PTR(*src, *sstride, sizex);
234 break;
235 default: assert(0); break;
236 }
237}
Yaowu Xuf883b422016-08-30 14:01:10 -0700238#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700239#endif // CONFIG_EXT_TX
240
Yaowu Xuf883b422016-08-30 14:01:10 -0700241void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
242 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700243 static const transform_2d IHT_4[] = {
244 { idct4_c, idct4_c }, // DCT_DCT
245 { iadst4_c, idct4_c }, // ADST_DCT
246 { idct4_c, iadst4_c }, // DCT_ADST
247 { iadst4_c, iadst4_c }, // ADST_ADST
248#if CONFIG_EXT_TX
249 { iadst4_c, idct4_c }, // FLIPADST_DCT
250 { idct4_c, iadst4_c }, // DCT_FLIPADST
251 { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST
252 { iadst4_c, iadst4_c }, // ADST_FLIPADST
253 { iadst4_c, iadst4_c }, // FLIPADST_ADST
254 { iidtx4_c, iidtx4_c }, // IDTX
255 { idct4_c, iidtx4_c }, // V_DCT
256 { iidtx4_c, idct4_c }, // H_DCT
257 { iadst4_c, iidtx4_c }, // V_ADST
258 { iidtx4_c, iadst4_c }, // H_ADST
259 { iadst4_c, iidtx4_c }, // V_FLIPADST
260 { iidtx4_c, iadst4_c }, // H_FLIPADST
261#endif // CONFIG_EXT_TX
262 };
263
264 int i, j;
265 tran_low_t tmp;
266 tran_low_t out[4][4];
267 tran_low_t *outp = &out[0][0];
268 int outstride = 4;
269
270 // inverse transform row vectors
271 for (i = 0; i < 4; ++i) {
272 IHT_4[tx_type].rows(input, out[i]);
273 input += 4;
274 }
275
276 // transpose
277 for (i = 1; i < 4; i++) {
278 for (j = 0; j < i; j++) {
279 tmp = out[i][j];
280 out[i][j] = out[j][i];
281 out[j][i] = tmp;
282 }
283 }
284
285 // inverse transform column vectors
286 for (i = 0; i < 4; ++i) {
287 IHT_4[tx_type].cols(out[i], out[i]);
288 }
289
290#if CONFIG_EXT_TX
291 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
292#endif
293
294 // Sum with the destination
295 for (i = 0; i < 4; ++i) {
296 for (j = 0; j < 4; ++j) {
297 int d = i * stride + j;
298 int s = j * outstride + i;
299 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
300 }
301 }
302}
303
304#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -0700305void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
306 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700307 static const transform_2d IHT_4x8[] = {
308 { idct8_c, idct4_c }, // DCT_DCT
309 { iadst8_c, idct4_c }, // ADST_DCT
310 { idct8_c, iadst4_c }, // DCT_ADST
311 { iadst8_c, iadst4_c }, // ADST_ADST
312 { iadst8_c, idct4_c }, // FLIPADST_DCT
313 { idct8_c, iadst4_c }, // DCT_FLIPADST
314 { iadst8_c, iadst4_c }, // FLIPADST_FLIPADST
315 { iadst8_c, iadst4_c }, // ADST_FLIPADST
316 { iadst8_c, iadst4_c }, // FLIPADST_ADST
317 { iidtx8_c, iidtx4_c }, // IDTX
318 { idct8_c, iidtx4_c }, // V_DCT
319 { iidtx8_c, idct4_c }, // H_DCT
320 { iadst8_c, iidtx4_c }, // V_ADST
321 { iidtx8_c, iadst4_c }, // H_ADST
322 { iadst8_c, iidtx4_c }, // V_FLIPADST
323 { iidtx8_c, iadst4_c }, // H_FLIPADST
324 };
325
326 const int n = 4;
327 const int n2 = 8;
328 int i, j;
329 tran_low_t out[4][8], outtmp[4];
330 tran_low_t *outp = &out[0][0];
331 int outstride = n2;
332
333 // inverse transform row vectors and transpose
334 for (i = 0; i < n2; ++i) {
335 IHT_4x8[tx_type].rows(input, outtmp);
336 for (j = 0; j < n; ++j)
337 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
338 input += n;
339 }
340
341 // inverse transform column vectors
342 for (i = 0; i < n; ++i) {
343 IHT_4x8[tx_type].cols(out[i], out[i]);
344 }
345
346 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
347
348 // Sum with the destination
349 for (i = 0; i < n2; ++i) {
350 for (j = 0; j < n; ++j) {
351 int d = i * stride + j;
352 int s = j * outstride + i;
353 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
354 }
355 }
356}
357
Yaowu Xuf883b422016-08-30 14:01:10 -0700358void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
359 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700360 static const transform_2d IHT_8x4[] = {
361 { idct4_c, idct8_c }, // DCT_DCT
362 { iadst4_c, idct8_c }, // ADST_DCT
363 { idct4_c, iadst8_c }, // DCT_ADST
364 { iadst4_c, iadst8_c }, // ADST_ADST
365 { iadst4_c, idct8_c }, // FLIPADST_DCT
366 { idct4_c, iadst8_c }, // DCT_FLIPADST
367 { iadst4_c, iadst8_c }, // FLIPADST_FLIPADST
368 { iadst4_c, iadst8_c }, // ADST_FLIPADST
369 { iadst4_c, iadst8_c }, // FLIPADST_ADST
370 { iidtx4_c, iidtx8_c }, // IDTX
371 { idct4_c, iidtx8_c }, // V_DCT
372 { iidtx4_c, idct8_c }, // H_DCT
373 { iadst4_c, iidtx8_c }, // V_ADST
374 { iidtx4_c, iadst8_c }, // H_ADST
375 { iadst4_c, iidtx8_c }, // V_FLIPADST
376 { iidtx4_c, iadst8_c }, // H_FLIPADST
377 };
378 const int n = 4;
379 const int n2 = 8;
380
381 int i, j;
382 tran_low_t out[8][4], outtmp[8];
383 tran_low_t *outp = &out[0][0];
384 int outstride = n;
385
386 // inverse transform row vectors and transpose
387 for (i = 0; i < n; ++i) {
388 IHT_8x4[tx_type].rows(input, outtmp);
389 for (j = 0; j < n2; ++j)
390 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
391 input += n2;
392 }
393
394 // inverse transform column vectors
395 for (i = 0; i < n2; ++i) {
396 IHT_8x4[tx_type].cols(out[i], out[i]);
397 }
398
399 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
400
401 // Sum with the destination
402 for (i = 0; i < n; ++i) {
403 for (j = 0; j < n2; ++j) {
404 int d = i * stride + j;
405 int s = j * outstride + i;
406 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
407 }
408 }
409}
410
Yaowu Xuf883b422016-08-30 14:01:10 -0700411void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
412 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700413 static const transform_2d IHT_8x16[] = {
414 { idct16_c, idct8_c }, // DCT_DCT
415 { iadst16_c, idct8_c }, // ADST_DCT
416 { idct16_c, iadst8_c }, // DCT_ADST
417 { iadst16_c, iadst8_c }, // ADST_ADST
418 { iadst16_c, idct8_c }, // FLIPADST_DCT
419 { idct16_c, iadst8_c }, // DCT_FLIPADST
420 { iadst16_c, iadst8_c }, // FLIPADST_FLIPADST
421 { iadst16_c, iadst8_c }, // ADST_FLIPADST
422 { iadst16_c, iadst8_c }, // FLIPADST_ADST
423 { iidtx16_c, iidtx8_c }, // IDTX
424 { idct16_c, iidtx8_c }, // V_DCT
425 { iidtx16_c, idct8_c }, // H_DCT
426 { iadst16_c, iidtx8_c }, // V_ADST
427 { iidtx16_c, iadst8_c }, // H_ADST
428 { iadst16_c, iidtx8_c }, // V_FLIPADST
429 { iidtx16_c, iadst8_c }, // H_FLIPADST
430 };
431
432 const int n = 8;
433 const int n2 = 16;
434 int i, j;
435 tran_low_t out[8][16], outtmp[8];
436 tran_low_t *outp = &out[0][0];
437 int outstride = n2;
438
439 // inverse transform row vectors and transpose
440 for (i = 0; i < n2; ++i) {
441 IHT_8x16[tx_type].rows(input, outtmp);
442 for (j = 0; j < n; ++j)
443 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
444 input += n;
445 }
446
447 // inverse transform column vectors
448 for (i = 0; i < n; ++i) {
449 IHT_8x16[tx_type].cols(out[i], out[i]);
450 }
451
452 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
453
454 // Sum with the destination
455 for (i = 0; i < n2; ++i) {
456 for (j = 0; j < n; ++j) {
457 int d = i * stride + j;
458 int s = j * outstride + i;
459 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
460 }
461 }
462}
463
Yaowu Xuf883b422016-08-30 14:01:10 -0700464void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
465 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700466 static const transform_2d IHT_16x8[] = {
467 { idct8_c, idct16_c }, // DCT_DCT
468 { iadst8_c, idct16_c }, // ADST_DCT
469 { idct8_c, iadst16_c }, // DCT_ADST
470 { iadst8_c, iadst16_c }, // ADST_ADST
471 { iadst8_c, idct16_c }, // FLIPADST_DCT
472 { idct8_c, iadst16_c }, // DCT_FLIPADST
473 { iadst8_c, iadst16_c }, // FLIPADST_FLIPADST
474 { iadst8_c, iadst16_c }, // ADST_FLIPADST
475 { iadst8_c, iadst16_c }, // FLIPADST_ADST
476 { iidtx8_c, iidtx16_c }, // IDTX
477 { idct8_c, iidtx16_c }, // V_DCT
478 { iidtx8_c, idct16_c }, // H_DCT
479 { iadst8_c, iidtx16_c }, // V_ADST
480 { iidtx8_c, iadst16_c }, // H_ADST
481 { iadst8_c, iidtx16_c }, // V_FLIPADST
482 { iidtx8_c, iadst16_c }, // H_FLIPADST
483 };
484 const int n = 8;
485 const int n2 = 16;
486
487 int i, j;
488 tran_low_t out[16][8], outtmp[16];
489 tran_low_t *outp = &out[0][0];
490 int outstride = n;
491
492 // inverse transform row vectors and transpose
493 for (i = 0; i < n; ++i) {
494 IHT_16x8[tx_type].rows(input, outtmp);
495 for (j = 0; j < n2; ++j)
496 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
497 input += n2;
498 }
499
500 // inverse transform column vectors
501 for (i = 0; i < n2; ++i) {
502 IHT_16x8[tx_type].cols(out[i], out[i]);
503 }
504
505 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
506
507 // Sum with the destination
508 for (i = 0; i < n; ++i) {
509 for (j = 0; j < n2; ++j) {
510 int d = i * stride + j;
511 int s = j * outstride + i;
512 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
513 }
514 }
515}
516
Yaowu Xuf883b422016-08-30 14:01:10 -0700517void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
518 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700519 static const transform_2d IHT_16x32[] = {
520 { idct32_c, idct16_c }, // DCT_DCT
521 { ihalfright32_c, idct16_c }, // ADST_DCT
522 { idct32_c, iadst16_c }, // DCT_ADST
523 { ihalfright32_c, iadst16_c }, // ADST_ADST
524 { ihalfright32_c, idct16_c }, // FLIPADST_DCT
525 { idct32_c, iadst16_c }, // DCT_FLIPADST
526 { ihalfright32_c, iadst16_c }, // FLIPADST_FLIPADST
527 { ihalfright32_c, iadst16_c }, // ADST_FLIPADST
528 { ihalfright32_c, iadst16_c }, // FLIPADST_ADST
529 { iidtx32_c, iidtx16_c }, // IDTX
530 { idct32_c, iidtx16_c }, // V_DCT
531 { iidtx32_c, idct16_c }, // H_DCT
532 { ihalfright32_c, iidtx16_c }, // V_ADST
533 { iidtx32_c, iadst16_c }, // H_ADST
534 { ihalfright32_c, iidtx16_c }, // V_FLIPADST
535 { iidtx32_c, iadst16_c }, // H_FLIPADST
536 };
537
538 const int n = 16;
539 const int n2 = 32;
540 int i, j;
541 tran_low_t out[16][32], outtmp[16];
542 tran_low_t *outp = &out[0][0];
543 int outstride = n2;
544
545 // inverse transform row vectors and transpose
546 for (i = 0; i < n2; ++i) {
547 IHT_16x32[tx_type].rows(input, outtmp);
548 for (j = 0; j < n; ++j)
549 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
550 input += n;
551 }
552
553 // inverse transform column vectors
554 for (i = 0; i < n; ++i) {
555 IHT_16x32[tx_type].cols(out[i], out[i]);
556 }
557
558 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
559
560 // Sum with the destination
561 for (i = 0; i < n2; ++i) {
562 for (j = 0; j < n; ++j) {
563 int d = i * stride + j;
564 int s = j * outstride + i;
565 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
566 }
567 }
568}
569
Yaowu Xuf883b422016-08-30 14:01:10 -0700570void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
571 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700572 static const transform_2d IHT_32x16[] = {
573 { idct16_c, idct32_c }, // DCT_DCT
574 { iadst16_c, idct32_c }, // ADST_DCT
575 { idct16_c, ihalfright32_c }, // DCT_ADST
576 { iadst16_c, ihalfright32_c }, // ADST_ADST
577 { iadst16_c, idct32_c }, // FLIPADST_DCT
578 { idct16_c, ihalfright32_c }, // DCT_FLIPADST
579 { iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
580 { iadst16_c, ihalfright32_c }, // ADST_FLIPADST
581 { iadst16_c, ihalfright32_c }, // FLIPADST_ADST
582 { iidtx16_c, iidtx32_c }, // IDTX
583 { idct16_c, iidtx32_c }, // V_DCT
584 { iidtx16_c, idct32_c }, // H_DCT
585 { iadst16_c, iidtx32_c }, // V_ADST
586 { iidtx16_c, ihalfright32_c }, // H_ADST
587 { iadst16_c, iidtx32_c }, // V_FLIPADST
588 { iidtx16_c, ihalfright32_c }, // H_FLIPADST
589 };
590 const int n = 16;
591 const int n2 = 32;
592
593 int i, j;
594 tran_low_t out[32][16], outtmp[32];
595 tran_low_t *outp = &out[0][0];
596 int outstride = n;
597
598 // inverse transform row vectors and transpose
599 for (i = 0; i < n; ++i) {
600 IHT_32x16[tx_type].rows(input, outtmp);
601 for (j = 0; j < n2; ++j)
602 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
603 input += n2;
604 }
605
606 // inverse transform column vectors
607 for (i = 0; i < n2; ++i) {
608 IHT_32x16[tx_type].cols(out[i], out[i]);
609 }
610
611 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
612
613 // Sum with the destination
614 for (i = 0; i < n; ++i) {
615 for (j = 0; j < n2; ++j) {
616 int d = i * stride + j;
617 int s = j * outstride + i;
618 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
619 }
620 }
621}
622#endif // CONFIG_EXT_TX
623
Yaowu Xuf883b422016-08-30 14:01:10 -0700624void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
625 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700626 static const transform_2d IHT_8[] = {
627 { idct8_c, idct8_c }, // DCT_DCT
628 { iadst8_c, idct8_c }, // ADST_DCT
629 { idct8_c, iadst8_c }, // DCT_ADST
630 { iadst8_c, iadst8_c }, // ADST_ADST
631#if CONFIG_EXT_TX
632 { iadst8_c, idct8_c }, // FLIPADST_DCT
633 { idct8_c, iadst8_c }, // DCT_FLIPADST
634 { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST
635 { iadst8_c, iadst8_c }, // ADST_FLIPADST
636 { iadst8_c, iadst8_c }, // FLIPADST_ADST
637 { iidtx8_c, iidtx8_c }, // IDTX
638 { idct8_c, iidtx8_c }, // V_DCT
639 { iidtx8_c, idct8_c }, // H_DCT
640 { iadst8_c, iidtx8_c }, // V_ADST
641 { iidtx8_c, iadst8_c }, // H_ADST
642 { iadst8_c, iidtx8_c }, // V_FLIPADST
643 { iidtx8_c, iadst8_c }, // H_FLIPADST
644#endif // CONFIG_EXT_TX
645 };
646
647 int i, j;
648 tran_low_t tmp;
649 tran_low_t out[8][8];
650 tran_low_t *outp = &out[0][0];
651 int outstride = 8;
652
653 // inverse transform row vectors
654 for (i = 0; i < 8; ++i) {
655 IHT_8[tx_type].rows(input, out[i]);
656 input += 8;
657 }
658
659 // transpose
660 for (i = 1; i < 8; i++) {
661 for (j = 0; j < i; j++) {
662 tmp = out[i][j];
663 out[i][j] = out[j][i];
664 out[j][i] = tmp;
665 }
666 }
667
668 // inverse transform column vectors
669 for (i = 0; i < 8; ++i) {
670 IHT_8[tx_type].cols(out[i], out[i]);
671 }
672
673#if CONFIG_EXT_TX
674 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
675#endif
676
677 // Sum with the destination
678 for (i = 0; i < 8; ++i) {
679 for (j = 0; j < 8; ++j) {
680 int d = i * stride + j;
681 int s = j * outstride + i;
682 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
683 }
684 }
685}
686
Yaowu Xuf883b422016-08-30 14:01:10 -0700687void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
688 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700689 static const transform_2d IHT_16[] = {
690 { idct16_c, idct16_c }, // DCT_DCT
691 { iadst16_c, idct16_c }, // ADST_DCT
692 { idct16_c, iadst16_c }, // DCT_ADST
693 { iadst16_c, iadst16_c }, // ADST_ADST
694#if CONFIG_EXT_TX
695 { iadst16_c, idct16_c }, // FLIPADST_DCT
696 { idct16_c, iadst16_c }, // DCT_FLIPADST
697 { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST
698 { iadst16_c, iadst16_c }, // ADST_FLIPADST
699 { iadst16_c, iadst16_c }, // FLIPADST_ADST
700 { iidtx16_c, iidtx16_c }, // IDTX
701 { idct16_c, iidtx16_c }, // V_DCT
702 { iidtx16_c, idct16_c }, // H_DCT
703 { iadst16_c, iidtx16_c }, // V_ADST
704 { iidtx16_c, iadst16_c }, // H_ADST
705 { iadst16_c, iidtx16_c }, // V_FLIPADST
706 { iidtx16_c, iadst16_c }, // H_FLIPADST
707#endif // CONFIG_EXT_TX
708 };
709
710 int i, j;
711 tran_low_t tmp;
712 tran_low_t out[16][16];
713 tran_low_t *outp = &out[0][0];
714 int outstride = 16;
715
716 // inverse transform row vectors
717 for (i = 0; i < 16; ++i) {
718 IHT_16[tx_type].rows(input, out[i]);
719 input += 16;
720 }
721
722 // transpose
723 for (i = 1; i < 16; i++) {
724 for (j = 0; j < i; j++) {
725 tmp = out[i][j];
726 out[i][j] = out[j][i];
727 out[j][i] = tmp;
728 }
729 }
730
731 // inverse transform column vectors
732 for (i = 0; i < 16; ++i) {
733 IHT_16[tx_type].cols(out[i], out[i]);
734 }
735
736#if CONFIG_EXT_TX
737 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
738#endif
739
740 // Sum with the destination
741 for (i = 0; i < 16; ++i) {
742 for (j = 0; j < 16; ++j) {
743 int d = i * stride + j;
744 int s = j * outstride + i;
745 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
746 }
747 }
748}
749
750#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -0700751void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
752 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700753 static const transform_2d IHT_32[] = {
754 { idct32_c, idct32_c }, // DCT_DCT
755 { ihalfright32_c, idct32_c }, // ADST_DCT
756 { idct32_c, ihalfright32_c }, // DCT_ADST
757 { ihalfright32_c, ihalfright32_c }, // ADST_ADST
758 { ihalfright32_c, idct32_c }, // FLIPADST_DCT
759 { idct32_c, ihalfright32_c }, // DCT_FLIPADST
760 { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
761 { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
762 { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
763 { iidtx32_c, iidtx32_c }, // IDTX
764 { idct32_c, iidtx32_c }, // V_DCT
765 { iidtx32_c, idct32_c }, // H_DCT
766 { ihalfright32_c, iidtx16_c }, // V_ADST
767 { iidtx16_c, ihalfright32_c }, // H_ADST
768 { ihalfright32_c, iidtx16_c }, // V_FLIPADST
769 { iidtx16_c, ihalfright32_c }, // H_FLIPADST
770 };
771
772 int i, j;
773 tran_low_t tmp;
774 tran_low_t out[32][32];
775 tran_low_t *outp = &out[0][0];
776 int outstride = 32;
777
778 // inverse transform row vectors
779 for (i = 0; i < 32; ++i) {
780 IHT_32[tx_type].rows(input, out[i]);
781 input += 32;
782 }
783
784 // transpose
785 for (i = 1; i < 32; i++) {
786 for (j = 0; j < i; j++) {
787 tmp = out[i][j];
788 out[i][j] = out[j][i];
789 out[j][i] = tmp;
790 }
791 }
792
793 // inverse transform column vectors
794 for (i = 0; i < 32; ++i) {
795 IHT_32[tx_type].cols(out[i], out[i]);
796 }
797
798 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
799
800 // Sum with the destination
801 for (i = 0; i < 32; ++i) {
802 for (j = 0; j < 32; ++j) {
803 int d = i * stride + j;
804 int s = j * outstride + i;
805 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
806 }
807 }
808}
809#endif // CONFIG_EXT_TX
810
811// idct
Yaowu Xuf883b422016-08-30 14:01:10 -0700812void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
813 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700814 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -0700815 aom_idct4x4_16_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700816 else
Yaowu Xuf883b422016-08-30 14:01:10 -0700817 aom_idct4x4_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700818}
819
Yaowu Xuf883b422016-08-30 14:01:10 -0700820void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
821 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700822 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -0700823 aom_iwht4x4_16_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700824 else
Yaowu Xuf883b422016-08-30 14:01:10 -0700825 aom_iwht4x4_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700826}
827
Yaowu Xuf883b422016-08-30 14:01:10 -0700828void av1_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
829 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700830 // If dc is 1, then input[0] is the reconstructed value, do not need
831 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
832
833 // The calculation can be simplified if there are not many non-zero dct
834 // coefficients. Use eobs to decide what to do.
Yaowu Xuf883b422016-08-30 14:01:10 -0700835 // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
Yaowu Xuc27fc142016-08-22 16:08:15 -0700836 // Combine that with code here.
837 if (eob == 1)
838 // DC only DCT coefficient
Yaowu Xuf883b422016-08-30 14:01:10 -0700839 aom_idct8x8_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700840 else if (eob <= 12)
Yaowu Xuf883b422016-08-30 14:01:10 -0700841 aom_idct8x8_12_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700842 else
Yaowu Xuf883b422016-08-30 14:01:10 -0700843 aom_idct8x8_64_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700844}
845
Yaowu Xuf883b422016-08-30 14:01:10 -0700846void av1_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
847 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700848 /* The calculation can be simplified if there are not many non-zero dct
849 * coefficients. Use eobs to separate different cases. */
850 if (eob == 1) /* DC only DCT coefficient. */
Yaowu Xuf883b422016-08-30 14:01:10 -0700851 aom_idct16x16_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700852 else if (eob <= 10)
Yaowu Xuf883b422016-08-30 14:01:10 -0700853 aom_idct16x16_10_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700854 else
Yaowu Xuf883b422016-08-30 14:01:10 -0700855 aom_idct16x16_256_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700856}
857
Yaowu Xuf883b422016-08-30 14:01:10 -0700858void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
859 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700860 if (eob == 1)
Yaowu Xuf883b422016-08-30 14:01:10 -0700861 aom_idct32x32_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700862 else if (eob <= 34)
863 // non-zero coeff only in upper-left 8x8
Yaowu Xuf883b422016-08-30 14:01:10 -0700864 aom_idct32x32_34_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700865 else
Yaowu Xuf883b422016-08-30 14:01:10 -0700866 aom_idct32x32_1024_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700867}
868
Yaowu Xuf883b422016-08-30 14:01:10 -0700869void av1_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
870 int eob, TX_TYPE tx_type, int lossless) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700871 if (lossless) {
872 assert(tx_type == DCT_DCT);
Yaowu Xuf883b422016-08-30 14:01:10 -0700873 av1_iwht4x4_add(input, dest, stride, eob);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700874 return;
875 }
876
877 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700878 case DCT_DCT: av1_idct4x4_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700879 case ADST_DCT:
880 case DCT_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -0700881 case ADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700882#if CONFIG_EXT_TX
883 case FLIPADST_DCT:
884 case DCT_FLIPADST:
885 case FLIPADST_FLIPADST:
886 case ADST_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -0700887 case FLIPADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700888 case V_DCT:
889 case H_DCT:
890 case V_ADST:
891 case H_ADST:
892 case V_FLIPADST:
893 case H_FLIPADST:
894 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -0700895 av1_iht4x4_16_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700896 break;
897 case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break;
898#endif // CONFIG_EXT_TX
899 default: assert(0); break;
900 }
901}
902
903#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -0700904void av1_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
905 int eob, TX_TYPE tx_type) {
906 (void)eob;
907 av1_iht4x8_32_add(input, dest, stride, tx_type);
908}
909
910void av1_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
911 int eob, TX_TYPE tx_type) {
912 (void)eob;
913 av1_iht8x4_32_add(input, dest, stride, tx_type);
914}
915
916void av1_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700917 int eob, TX_TYPE tx_type) {
918 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -0700919 av1_iht8x16_128_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700920}
921
Yaowu Xuf883b422016-08-30 14:01:10 -0700922void av1_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700923 int eob, TX_TYPE tx_type) {
924 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -0700925 av1_iht16x8_128_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700926}
927
Yaowu Xuf883b422016-08-30 14:01:10 -0700928void av1_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700929 int eob, TX_TYPE tx_type) {
930 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -0700931 av1_iht16x32_512_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700932}
933
Yaowu Xuf883b422016-08-30 14:01:10 -0700934void av1_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700935 int eob, TX_TYPE tx_type) {
936 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -0700937 av1_iht32x16_512_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700938}
939#endif // CONFIG_EXT_TX
940
Yaowu Xuf883b422016-08-30 14:01:10 -0700941void av1_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
942 int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700943 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700944 case DCT_DCT: av1_idct8x8_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700945 case ADST_DCT:
946 case DCT_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -0700947 case ADST_ADST: av1_iht8x8_64_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700948#if CONFIG_EXT_TX
949 case FLIPADST_DCT:
950 case DCT_FLIPADST:
951 case FLIPADST_FLIPADST:
952 case ADST_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -0700953 case FLIPADST_ADST: av1_iht8x8_64_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700954 case V_DCT:
955 case H_DCT:
956 case V_ADST:
957 case H_ADST:
958 case V_FLIPADST:
959 case H_FLIPADST:
960 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -0700961 av1_iht8x8_64_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700962 break;
963 case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break;
964#endif // CONFIG_EXT_TX
965 default: assert(0); break;
966 }
967}
968
Yaowu Xuf883b422016-08-30 14:01:10 -0700969void av1_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, int stride,
970 int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700971 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700972 case DCT_DCT: av1_idct16x16_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700973 case ADST_DCT:
974 case DCT_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -0700975 case ADST_ADST: av1_iht16x16_256_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700976#if CONFIG_EXT_TX
977 case FLIPADST_DCT:
978 case DCT_FLIPADST:
979 case FLIPADST_FLIPADST:
980 case ADST_FLIPADST:
981 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -0700982 av1_iht16x16_256_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700983 break;
984 case V_DCT:
985 case H_DCT:
986 case V_ADST:
987 case H_ADST:
988 case V_FLIPADST:
989 case H_FLIPADST:
990 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -0700991 av1_iht16x16_256_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700992 break;
993 case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break;
994#endif // CONFIG_EXT_TX
995 default: assert(0); break;
996 }
997}
998
Yaowu Xuf883b422016-08-30 14:01:10 -0700999void av1_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride,
1000 int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001001 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001002 case DCT_DCT: av1_idct32x32_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001003#if CONFIG_EXT_TX
1004 case ADST_DCT:
1005 case DCT_ADST:
1006 case ADST_ADST:
1007 case FLIPADST_DCT:
1008 case DCT_FLIPADST:
1009 case FLIPADST_FLIPADST:
1010 case ADST_FLIPADST:
1011 case FLIPADST_ADST:
1012 case V_DCT:
1013 case H_DCT:
1014 case V_ADST:
1015 case H_ADST:
1016 case V_FLIPADST:
1017 case H_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001018 av1_iht32x32_1024_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001019 break;
1020 case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break;
1021#endif // CONFIG_EXT_TX
1022 default: assert(0); break;
1023 }
1024}
1025
Yaowu Xuf883b422016-08-30 14:01:10 -07001026#if CONFIG_AOM_HIGHBITDEPTH
1027void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
1028 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001029 static const highbd_transform_2d HIGH_IHT_4[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001030 { aom_highbd_idct4_c, aom_highbd_idct4_c }, // DCT_DCT
1031 { aom_highbd_iadst4_c, aom_highbd_idct4_c }, // ADST_DCT
1032 { aom_highbd_idct4_c, aom_highbd_iadst4_c }, // DCT_ADST
1033 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // ADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001034#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001035 { aom_highbd_iadst4_c, aom_highbd_idct4_c }, // FLIPADST_DCT
1036 { aom_highbd_idct4_c, aom_highbd_iadst4_c }, // DCT_FLIPADST
1037 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // FLIPADST_FLIPADST
1038 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // ADST_FLIPADST
1039 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001040 { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001041 { aom_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT
1042 { highbd_iidtx4_c, aom_highbd_idct4_c }, // H_DCT
1043 { aom_highbd_iadst4_c, highbd_iidtx4_c }, // V_ADST
1044 { highbd_iidtx4_c, aom_highbd_iadst4_c }, // H_ADST
1045 { aom_highbd_iadst4_c, highbd_iidtx4_c }, // V_FLIPADST
1046 { highbd_iidtx4_c, aom_highbd_iadst4_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001047#endif // CONFIG_EXT_TX
1048 };
1049
1050 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1051
1052 int i, j;
1053 tran_low_t tmp;
1054 tran_low_t out[4][4];
1055 tran_low_t *outp = &out[0][0];
1056 int outstride = 4;
1057
1058 // inverse transform row vectors
1059 for (i = 0; i < 4; ++i) {
1060 HIGH_IHT_4[tx_type].rows(input, out[i], bd);
1061 input += 4;
1062 }
1063
1064 // transpose
1065 for (i = 1; i < 4; i++) {
1066 for (j = 0; j < i; j++) {
1067 tmp = out[i][j];
1068 out[i][j] = out[j][i];
1069 out[j][i] = tmp;
1070 }
1071 }
1072
1073 // inverse transform column vectors
1074 for (i = 0; i < 4; ++i) {
1075 HIGH_IHT_4[tx_type].cols(out[i], out[i], bd);
1076 }
1077
1078#if CONFIG_EXT_TX
1079 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
1080#endif
1081
1082 // Sum with the destination
1083 for (i = 0; i < 4; ++i) {
1084 for (j = 0; j < 4; ++j) {
1085 int d = i * stride + j;
1086 int s = j * outstride + i;
1087 dest[d] =
1088 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4), bd);
1089 }
1090 }
1091}
1092
1093#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001094void av1_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8,
1095 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001096 static const highbd_transform_2d HIGH_IHT_4x8[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001097 { aom_highbd_idct8_c, aom_highbd_idct4_c }, // DCT_DCT
1098 { aom_highbd_iadst8_c, aom_highbd_idct4_c }, // ADST_DCT
1099 { aom_highbd_idct8_c, aom_highbd_iadst4_c }, // DCT_ADST
1100 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // ADST_ADST
1101 { aom_highbd_iadst8_c, aom_highbd_idct4_c }, // FLIPADST_DCT
1102 { aom_highbd_idct8_c, aom_highbd_iadst4_c }, // DCT_FLIPADST
1103 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // FLIPADST_FLIPADST
1104 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // ADST_FLIPADST
1105 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001106 { highbd_iidtx8_c, highbd_iidtx4_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001107 { aom_highbd_idct8_c, highbd_iidtx4_c }, // V_DCT
1108 { highbd_iidtx8_c, aom_highbd_idct4_c }, // H_DCT
1109 { aom_highbd_iadst8_c, highbd_iidtx4_c }, // V_ADST
1110 { highbd_iidtx8_c, aom_highbd_iadst4_c }, // H_ADST
1111 { aom_highbd_iadst8_c, highbd_iidtx4_c }, // V_FLIPADST
1112 { highbd_iidtx8_c, aom_highbd_iadst4_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001113 };
1114 const int n = 4;
1115 const int n2 = 8;
1116
1117 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1118
1119 int i, j;
1120 tran_low_t out[4][8], outtmp[4];
1121 tran_low_t *outp = &out[0][0];
1122 int outstride = n2;
1123
1124 // inverse transform row vectors, and transpose
1125 for (i = 0; i < n2; ++i) {
1126 HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd);
1127 for (j = 0; j < n; ++j) {
1128 out[j][i] =
1129 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1130 }
1131 input += n;
1132 }
1133
1134 // inverse transform column vectors
1135 for (i = 0; i < n; ++i) {
1136 HIGH_IHT_4x8[tx_type].cols(out[i], out[i], bd);
1137 }
1138
1139 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1140
1141 // Sum with the destination
1142 for (i = 0; i < n2; ++i) {
1143 for (j = 0; j < n; ++j) {
1144 int d = i * stride + j;
1145 int s = j * outstride + i;
1146 dest[d] =
1147 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1148 }
1149 }
1150}
1151
Yaowu Xuf883b422016-08-30 14:01:10 -07001152void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
1153 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001154 static const highbd_transform_2d HIGH_IHT_8x4[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001155 { aom_highbd_idct4_c, aom_highbd_idct8_c }, // DCT_DCT
1156 { aom_highbd_iadst4_c, aom_highbd_idct8_c }, // ADST_DCT
1157 { aom_highbd_idct4_c, aom_highbd_iadst8_c }, // DCT_ADST
1158 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // ADST_ADST
1159 { aom_highbd_iadst4_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1160 { aom_highbd_idct4_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1161 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1162 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1163 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001164 { highbd_iidtx4_c, highbd_iidtx8_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001165 { aom_highbd_idct4_c, highbd_iidtx8_c }, // V_DCT
1166 { highbd_iidtx4_c, aom_highbd_idct8_c }, // H_DCT
1167 { aom_highbd_iadst4_c, highbd_iidtx8_c }, // V_ADST
1168 { highbd_iidtx4_c, aom_highbd_iadst8_c }, // H_ADST
1169 { aom_highbd_iadst4_c, highbd_iidtx8_c }, // V_FLIPADST
1170 { highbd_iidtx4_c, aom_highbd_iadst8_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001171 };
1172 const int n = 4;
1173 const int n2 = 8;
1174
1175 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1176
1177 int i, j;
1178 tran_low_t out[8][4], outtmp[8];
1179 tran_low_t *outp = &out[0][0];
1180 int outstride = n;
1181
1182 // inverse transform row vectors, and transpose
1183 for (i = 0; i < n; ++i) {
1184 HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd);
1185 for (j = 0; j < n2; ++j) {
1186 out[j][i] =
1187 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1188 }
1189 input += n2;
1190 }
1191
1192 // inverse transform column vectors
1193 for (i = 0; i < n2; ++i) {
1194 HIGH_IHT_8x4[tx_type].cols(out[i], out[i], bd);
1195 }
1196
1197 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1198
1199 // Sum with the destination
1200 for (i = 0; i < n; ++i) {
1201 for (j = 0; j < n2; ++j) {
1202 int d = i * stride + j;
1203 int s = j * outstride + i;
1204 dest[d] =
1205 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1206 }
1207 }
1208}
1209
Yaowu Xuf883b422016-08-30 14:01:10 -07001210void av1_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest8,
1211 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001212 static const highbd_transform_2d HIGH_IHT_8x16[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001213 { aom_highbd_idct16_c, aom_highbd_idct8_c }, // DCT_DCT
1214 { aom_highbd_iadst16_c, aom_highbd_idct8_c }, // ADST_DCT
1215 { aom_highbd_idct16_c, aom_highbd_iadst8_c }, // DCT_ADST
1216 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // ADST_ADST
1217 { aom_highbd_iadst16_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1218 { aom_highbd_idct16_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1219 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1220 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1221 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001222 { highbd_iidtx16_c, highbd_iidtx8_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001223 { aom_highbd_idct16_c, highbd_iidtx8_c }, // V_DCT
1224 { highbd_iidtx16_c, aom_highbd_idct8_c }, // H_DCT
1225 { aom_highbd_iadst16_c, highbd_iidtx8_c }, // V_ADST
1226 { highbd_iidtx16_c, aom_highbd_iadst8_c }, // H_ADST
1227 { aom_highbd_iadst16_c, highbd_iidtx8_c }, // V_FLIPADST
1228 { highbd_iidtx16_c, aom_highbd_iadst8_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001229 };
1230 const int n = 8;
1231 const int n2 = 16;
1232
1233 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1234
1235 int i, j;
1236 tran_low_t out[8][16], outtmp[8];
1237 tran_low_t *outp = &out[0][0];
1238 int outstride = n2;
1239
1240 // inverse transform row vectors, and transpose
1241 for (i = 0; i < n2; ++i) {
1242 HIGH_IHT_8x16[tx_type].rows(input, outtmp, bd);
1243 for (j = 0; j < n; ++j)
1244 out[j][i] =
1245 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1246 input += n;
1247 }
1248
1249 // inverse transform column vectors
1250 for (i = 0; i < n; ++i) {
1251 HIGH_IHT_8x16[tx_type].cols(out[i], out[i], bd);
1252 }
1253
1254 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1255
1256 // Sum with the destination
1257 for (i = 0; i < n2; ++i) {
1258 for (j = 0; j < n; ++j) {
1259 int d = i * stride + j;
1260 int s = j * outstride + i;
1261 dest[d] =
1262 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1263 }
1264 }
1265}
1266
Yaowu Xuf883b422016-08-30 14:01:10 -07001267void av1_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest8,
1268 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001269 static const highbd_transform_2d HIGH_IHT_16x8[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001270 { aom_highbd_idct8_c, aom_highbd_idct16_c }, // DCT_DCT
1271 { aom_highbd_iadst8_c, aom_highbd_idct16_c }, // ADST_DCT
1272 { aom_highbd_idct8_c, aom_highbd_iadst16_c }, // DCT_ADST
1273 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // ADST_ADST
1274 { aom_highbd_iadst8_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1275 { aom_highbd_idct8_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1276 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1277 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1278 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001279 { highbd_iidtx8_c, highbd_iidtx16_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001280 { aom_highbd_idct8_c, highbd_iidtx16_c }, // V_DCT
1281 { highbd_iidtx8_c, aom_highbd_idct16_c }, // H_DCT
1282 { aom_highbd_iadst8_c, highbd_iidtx16_c }, // V_ADST
1283 { highbd_iidtx8_c, aom_highbd_iadst16_c }, // H_ADST
1284 { aom_highbd_iadst8_c, highbd_iidtx16_c }, // V_FLIPADST
1285 { highbd_iidtx8_c, aom_highbd_iadst16_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001286 };
1287 const int n = 8;
1288 const int n2 = 16;
1289
1290 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1291
1292 int i, j;
1293 tran_low_t out[16][8], outtmp[16];
1294 tran_low_t *outp = &out[0][0];
1295 int outstride = n;
1296
1297 // inverse transform row vectors, and transpose
1298 for (i = 0; i < n; ++i) {
1299 HIGH_IHT_16x8[tx_type].rows(input, outtmp, bd);
1300 for (j = 0; j < n2; ++j)
1301 out[j][i] =
1302 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1303 input += n2;
1304 }
1305
1306 // inverse transform column vectors
1307 for (i = 0; i < n2; ++i) {
1308 HIGH_IHT_16x8[tx_type].cols(out[i], out[i], bd);
1309 }
1310
1311 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1312
1313 // Sum with the destination
1314 for (i = 0; i < n; ++i) {
1315 for (j = 0; j < n2; ++j) {
1316 int d = i * stride + j;
1317 int s = j * outstride + i;
1318 dest[d] =
1319 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1320 }
1321 }
1322}
1323
Yaowu Xuf883b422016-08-30 14:01:10 -07001324void av1_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest8,
1325 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001326 static const highbd_transform_2d HIGH_IHT_16x32[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001327 { aom_highbd_idct32_c, aom_highbd_idct16_c }, // DCT_DCT
1328 { highbd_ihalfright32_c, aom_highbd_idct16_c }, // ADST_DCT
1329 { aom_highbd_idct32_c, aom_highbd_iadst16_c }, // DCT_ADST
1330 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // ADST_ADST
1331 { highbd_ihalfright32_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1332 { aom_highbd_idct32_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1333 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1334 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1335 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001336 { highbd_iidtx32_c, highbd_iidtx16_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001337 { aom_highbd_idct32_c, highbd_iidtx16_c }, // V_DCT
1338 { highbd_iidtx32_c, aom_highbd_idct16_c }, // H_DCT
Yaowu Xuc27fc142016-08-22 16:08:15 -07001339 { highbd_ihalfright32_c, highbd_iidtx16_c }, // V_ADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001340 { highbd_iidtx32_c, aom_highbd_iadst16_c }, // H_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001341 { highbd_ihalfright32_c, highbd_iidtx16_c }, // V_FLIPADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001342 { highbd_iidtx32_c, aom_highbd_iadst16_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001343 };
1344 const int n = 16;
1345 const int n2 = 32;
1346
1347 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1348
1349 int i, j;
1350 tran_low_t out[16][32], outtmp[16];
1351 tran_low_t *outp = &out[0][0];
1352 int outstride = n2;
1353
1354 // inverse transform row vectors, and transpose
1355 for (i = 0; i < n2; ++i) {
1356 HIGH_IHT_16x32[tx_type].rows(input, outtmp, bd);
1357 for (j = 0; j < n; ++j)
1358 out[j][i] =
1359 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1360 input += n;
1361 }
1362
1363 // inverse transform column vectors
1364 for (i = 0; i < n; ++i) {
1365 HIGH_IHT_16x32[tx_type].cols(out[i], out[i], bd);
1366 }
1367
1368 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1369
1370 // Sum with the destination
1371 for (i = 0; i < n2; ++i) {
1372 for (j = 0; j < n; ++j) {
1373 int d = i * stride + j;
1374 int s = j * outstride + i;
1375 dest[d] =
1376 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1377 }
1378 }
1379}
1380
Yaowu Xuf883b422016-08-30 14:01:10 -07001381void av1_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest8,
1382 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001383 static const highbd_transform_2d HIGH_IHT_32x16[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001384 { aom_highbd_idct16_c, aom_highbd_idct32_c }, // DCT_DCT
1385 { aom_highbd_iadst16_c, aom_highbd_idct32_c }, // ADST_DCT
1386 { aom_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_ADST
1387 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_ADST
1388 { aom_highbd_iadst16_c, aom_highbd_idct32_c }, // FLIPADST_DCT
1389 { aom_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_FLIPADST
1390 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
1391 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_FLIPADST
1392 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001393 { highbd_iidtx16_c, highbd_iidtx32_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001394 { aom_highbd_idct16_c, highbd_iidtx32_c }, // V_DCT
1395 { highbd_iidtx16_c, aom_highbd_idct32_c }, // H_DCT
1396 { aom_highbd_iadst16_c, highbd_iidtx32_c }, // V_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001397 { highbd_iidtx16_c, highbd_ihalfright32_c }, // H_ADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001398 { aom_highbd_iadst16_c, highbd_iidtx32_c }, // V_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001399 { highbd_iidtx16_c, highbd_ihalfright32_c }, // H_FLIPADST
1400 };
1401 const int n = 16;
1402 const int n2 = 32;
1403
1404 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1405
1406 int i, j;
1407 tran_low_t out[32][16], outtmp[32];
1408 tran_low_t *outp = &out[0][0];
1409 int outstride = n;
1410
1411 // inverse transform row vectors, and transpose
1412 for (i = 0; i < n; ++i) {
1413 HIGH_IHT_32x16[tx_type].rows(input, outtmp, bd);
1414 for (j = 0; j < n2; ++j)
1415 out[j][i] =
1416 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1417 input += n2;
1418 }
1419
1420 // inverse transform column vectors
1421 for (i = 0; i < n2; ++i) {
1422 HIGH_IHT_32x16[tx_type].cols(out[i], out[i], bd);
1423 }
1424
1425 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1426
1427 // Sum with the destination
1428 for (i = 0; i < n; ++i) {
1429 for (j = 0; j < n2; ++j) {
1430 int d = i * stride + j;
1431 int s = j * outstride + i;
1432 dest[d] =
1433 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1434 }
1435 }
1436}
1437#endif // CONFIG_EXT_TX
1438
Yaowu Xuf883b422016-08-30 14:01:10 -07001439void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
1440 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001441 static const highbd_transform_2d HIGH_IHT_8[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001442 { aom_highbd_idct8_c, aom_highbd_idct8_c }, // DCT_DCT
1443 { aom_highbd_iadst8_c, aom_highbd_idct8_c }, // ADST_DCT
1444 { aom_highbd_idct8_c, aom_highbd_iadst8_c }, // DCT_ADST
1445 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // ADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001446#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001447 { aom_highbd_iadst8_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1448 { aom_highbd_idct8_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1449 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1450 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1451 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001452 { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001453 { aom_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT
1454 { highbd_iidtx8_c, aom_highbd_idct8_c }, // H_DCT
1455 { aom_highbd_iadst8_c, highbd_iidtx8_c }, // V_ADST
1456 { highbd_iidtx8_c, aom_highbd_iadst8_c }, // H_ADST
1457 { aom_highbd_iadst8_c, highbd_iidtx8_c }, // V_FLIPADST
1458 { highbd_iidtx8_c, aom_highbd_iadst8_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001459#endif // CONFIG_EXT_TX
1460 };
1461
1462 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1463
1464 int i, j;
1465 tran_low_t tmp;
1466 tran_low_t out[8][8];
1467 tran_low_t *outp = &out[0][0];
1468 int outstride = 8;
1469
1470 // inverse transform row vectors
1471 for (i = 0; i < 8; ++i) {
1472 HIGH_IHT_8[tx_type].rows(input, out[i], bd);
1473 input += 8;
1474 }
1475
1476 // transpose
1477 for (i = 1; i < 8; i++) {
1478 for (j = 0; j < i; j++) {
1479 tmp = out[i][j];
1480 out[i][j] = out[j][i];
1481 out[j][i] = tmp;
1482 }
1483 }
1484
1485 // inverse transform column vectors
1486 for (i = 0; i < 8; ++i) {
1487 HIGH_IHT_8[tx_type].cols(out[i], out[i], bd);
1488 }
1489
1490#if CONFIG_EXT_TX
1491 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
1492#endif
1493
1494 // Sum with the destination
1495 for (i = 0; i < 8; ++i) {
1496 for (j = 0; j < 8; ++j) {
1497 int d = i * stride + j;
1498 int s = j * outstride + i;
1499 dest[d] =
1500 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1501 }
1502 }
1503}
1504
Yaowu Xuf883b422016-08-30 14:01:10 -07001505void av1_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
1506 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001507 static const highbd_transform_2d HIGH_IHT_16[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001508 { aom_highbd_idct16_c, aom_highbd_idct16_c }, // DCT_DCT
1509 { aom_highbd_iadst16_c, aom_highbd_idct16_c }, // ADST_DCT
1510 { aom_highbd_idct16_c, aom_highbd_iadst16_c }, // DCT_ADST
1511 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // ADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001512#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001513 { aom_highbd_iadst16_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1514 { aom_highbd_idct16_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1515 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1516 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1517 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001518 { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001519 { aom_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT
1520 { highbd_iidtx16_c, aom_highbd_idct16_c }, // H_DCT
1521 { aom_highbd_iadst16_c, highbd_iidtx16_c }, // V_ADST
1522 { highbd_iidtx16_c, aom_highbd_iadst16_c }, // H_ADST
1523 { aom_highbd_iadst16_c, highbd_iidtx16_c }, // V_FLIPADST
1524 { highbd_iidtx16_c, aom_highbd_iadst16_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001525#endif // CONFIG_EXT_TX
1526 };
1527
1528 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1529
1530 int i, j;
1531 tran_low_t tmp;
1532 tran_low_t out[16][16];
1533 tran_low_t *outp = &out[0][0];
1534 int outstride = 16;
1535
1536 // inverse transform row vectors
1537 for (i = 0; i < 16; ++i) {
1538 HIGH_IHT_16[tx_type].rows(input, out[i], bd);
1539 input += 16;
1540 }
1541
1542 // transpose
1543 for (i = 1; i < 16; i++) {
1544 for (j = 0; j < i; j++) {
1545 tmp = out[i][j];
1546 out[i][j] = out[j][i];
1547 out[j][i] = tmp;
1548 }
1549 }
1550
1551 // inverse transform column vectors
1552 for (i = 0; i < 16; ++i) {
1553 HIGH_IHT_16[tx_type].cols(out[i], out[i], bd);
1554 }
1555
1556#if CONFIG_EXT_TX
1557 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
1558#endif
1559
1560 // Sum with the destination
1561 for (i = 0; i < 16; ++i) {
1562 for (j = 0; j < 16; ++j) {
1563 int d = i * stride + j;
1564 int s = j * outstride + i;
1565 dest[d] =
1566 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1567 }
1568 }
1569}
1570
1571#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001572void av1_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
1573 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001574 static const highbd_transform_2d HIGH_IHT_32[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001575 { aom_highbd_idct32_c, aom_highbd_idct32_c }, // DCT_DCT
1576 { highbd_ihalfright32_c, aom_highbd_idct32_c }, // ADST_DCT
1577 { aom_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001578 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001579 { highbd_ihalfright32_c, aom_highbd_idct32_c }, // FLIPADST_DCT
1580 { aom_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001581 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
1582 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
1583 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
1584 { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001585 { aom_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT
1586 { highbd_iidtx32_c, aom_highbd_idct32_c }, // H_DCT
Yaowu Xuc27fc142016-08-22 16:08:15 -07001587 { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_ADST
1588 { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_ADST
1589 { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_FLIPADST
1590 { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_FLIPADST
1591 };
1592
1593 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1594
1595 int i, j;
1596 tran_low_t tmp;
1597 tran_low_t out[32][32];
1598 tran_low_t *outp = &out[0][0];
1599 int outstride = 32;
1600
1601 // inverse transform row vectors
1602 for (i = 0; i < 32; ++i) {
1603 HIGH_IHT_32[tx_type].rows(input, out[i], bd);
1604 input += 32;
1605 }
1606
1607 // transpose
1608 for (i = 1; i < 32; i++) {
1609 for (j = 0; j < i; j++) {
1610 tmp = out[i][j];
1611 out[i][j] = out[j][i];
1612 out[j][i] = tmp;
1613 }
1614 }
1615
1616 // inverse transform column vectors
1617 for (i = 0; i < 32; ++i) {
1618 HIGH_IHT_32[tx_type].cols(out[i], out[i], bd);
1619 }
1620
1621 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
1622
1623 // Sum with the destination
1624 for (i = 0; i < 32; ++i) {
1625 for (j = 0; j < 32; ++j) {
1626 int d = i * stride + j;
1627 int s = j * outstride + i;
1628 dest[d] =
1629 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1630 }
1631 }
1632}
1633#endif // CONFIG_EXT_TX
1634
1635// idct
Yaowu Xuf883b422016-08-30 14:01:10 -07001636void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1637 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001638 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001639 aom_highbd_idct4x4_16_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001640 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001641 aom_highbd_idct4x4_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001642}
1643
Yaowu Xuf883b422016-08-30 14:01:10 -07001644void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1645 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001646 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001647 aom_highbd_iwht4x4_16_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001648 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001649 aom_highbd_iwht4x4_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001650}
1651
Yaowu Xuf883b422016-08-30 14:01:10 -07001652void av1_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
1653 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001654 // If dc is 1, then input[0] is the reconstructed value, do not need
1655 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
1656
1657 // The calculation can be simplified if there are not many non-zero dct
1658 // coefficients. Use eobs to decide what to do.
Yaowu Xuf883b422016-08-30 14:01:10 -07001659 // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
Yaowu Xuc27fc142016-08-22 16:08:15 -07001660 // Combine that with code here.
1661 // DC only DCT coefficient
1662 if (eob == 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001663 aom_highbd_idct8x8_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001664 } else if (eob <= 10) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001665 aom_highbd_idct8x8_10_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001666 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001667 aom_highbd_idct8x8_64_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001668 }
1669}
1670
Yaowu Xuf883b422016-08-30 14:01:10 -07001671void av1_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
1672 int stride, int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001673 // The calculation can be simplified if there are not many non-zero dct
1674 // coefficients. Use eobs to separate different cases.
1675 // DC only DCT coefficient.
1676 if (eob == 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001677 aom_highbd_idct16x16_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001678 } else if (eob <= 10) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001679 aom_highbd_idct16x16_10_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001680 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001681 aom_highbd_idct16x16_256_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001682 }
1683}
1684
Yaowu Xuf883b422016-08-30 14:01:10 -07001685void av1_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
1686 int stride, int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001687 // Non-zero coeff only in upper-left 8x8
1688 if (eob == 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001689 aom_highbd_idct32x32_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001690 } else if (eob <= 34) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001691 aom_highbd_idct32x32_34_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001692 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001693 aom_highbd_idct32x32_1024_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001694 }
1695}
1696
Yaowu Xuf883b422016-08-30 14:01:10 -07001697void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
1698 int stride, int eob, int bd, TX_TYPE tx_type,
1699 int lossless) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001700 if (lossless) {
1701 assert(tx_type == DCT_DCT);
Yaowu Xuf883b422016-08-30 14:01:10 -07001702 av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001703 return;
1704 }
1705
1706 switch (tx_type) {
1707 case DCT_DCT:
1708 case ADST_DCT:
1709 case DCT_ADST:
1710 case ADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001711 av1_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1712 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001713 break;
1714#if CONFIG_EXT_TX
1715 case FLIPADST_DCT:
1716 case DCT_FLIPADST:
1717 case FLIPADST_FLIPADST:
1718 case ADST_FLIPADST:
1719 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001720 av1_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1721 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001722 break;
1723 case V_DCT:
1724 case H_DCT:
1725 case V_ADST:
1726 case H_ADST:
1727 case V_FLIPADST:
1728 case H_FLIPADST:
1729 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001730 av1_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001731 break;
1732 case IDTX:
1733 highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd);
1734 break;
1735#endif // CONFIG_EXT_TX
1736 default: assert(0); break;
1737 }
1738}
1739
1740#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001741void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
1742 int stride, int eob, int bd, TX_TYPE tx_type) {
1743 (void)eob;
1744 av1_highbd_iht4x8_32_add_c(input, dest, stride, tx_type, bd);
1745}
1746
1747void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
1748 int stride, int eob, int bd, TX_TYPE tx_type) {
1749 (void)eob;
1750 av1_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd);
1751}
1752
1753void av1_highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001754 int stride, int eob, int bd,
1755 TX_TYPE tx_type) {
1756 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001757 av1_highbd_iht8x16_128_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001758}
1759
Yaowu Xuf883b422016-08-30 14:01:10 -07001760void av1_highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001761 int stride, int eob, int bd,
1762 TX_TYPE tx_type) {
1763 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001764 av1_highbd_iht16x8_128_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001765}
1766
Yaowu Xuf883b422016-08-30 14:01:10 -07001767void av1_highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001768 int stride, int eob, int bd,
1769 TX_TYPE tx_type) {
1770 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001771 av1_highbd_iht16x32_512_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001772}
1773
Yaowu Xuf883b422016-08-30 14:01:10 -07001774void av1_highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001775 int stride, int eob, int bd,
1776 TX_TYPE tx_type) {
1777 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001778 av1_highbd_iht32x16_512_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001779}
1780#endif // CONFIG_EXT_TX
1781
Yaowu Xuf883b422016-08-30 14:01:10 -07001782void av1_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
1783 int stride, int eob, int bd, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001784 (void)eob;
1785 switch (tx_type) {
1786 case DCT_DCT:
1787 case ADST_DCT:
1788 case DCT_ADST:
1789 case ADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001790 av1_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1791 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001792 break;
1793#if CONFIG_EXT_TX
1794 case FLIPADST_DCT:
1795 case DCT_FLIPADST:
1796 case FLIPADST_FLIPADST:
1797 case ADST_FLIPADST:
1798 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001799 av1_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1800 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001801 break;
1802 case V_DCT:
1803 case H_DCT:
1804 case V_ADST:
1805 case H_ADST:
1806 case V_FLIPADST:
1807 case H_FLIPADST:
1808 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001809 av1_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001810 break;
1811 case IDTX:
1812 highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd);
1813 break;
1814#endif // CONFIG_EXT_TX
1815 default: assert(0); break;
1816 }
1817}
1818
Yaowu Xuf883b422016-08-30 14:01:10 -07001819void av1_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
1820 int stride, int eob, int bd,
1821 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001822 (void)eob;
1823 switch (tx_type) {
1824 case DCT_DCT:
1825 case ADST_DCT:
1826 case DCT_ADST:
1827 case ADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001828 av1_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
1829 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001830 break;
1831#if CONFIG_EXT_TX
1832 case FLIPADST_DCT:
1833 case DCT_FLIPADST:
1834 case FLIPADST_FLIPADST:
1835 case ADST_FLIPADST:
1836 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001837 av1_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
1838 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001839 break;
1840 case V_DCT:
1841 case H_DCT:
1842 case V_ADST:
1843 case H_ADST:
1844 case V_FLIPADST:
1845 case H_FLIPADST:
1846 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001847 av1_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001848 break;
1849 case IDTX:
1850 highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd);
1851 break;
1852#endif // CONFIG_EXT_TX
1853 default: assert(0); break;
1854 }
1855}
1856
Yaowu Xuf883b422016-08-30 14:01:10 -07001857void av1_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
1858 int stride, int eob, int bd,
1859 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001860 (void)eob;
1861 switch (tx_type) {
1862 case DCT_DCT:
Yaowu Xuf883b422016-08-30 14:01:10 -07001863 av1_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
1864 DCT_DCT, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001865 break;
1866#if CONFIG_EXT_TX
1867 case ADST_DCT:
1868 case DCT_ADST:
1869 case ADST_ADST:
1870 case FLIPADST_DCT:
1871 case DCT_FLIPADST:
1872 case FLIPADST_FLIPADST:
1873 case ADST_FLIPADST:
1874 case FLIPADST_ADST:
1875 case V_DCT:
1876 case H_DCT:
1877 case V_ADST:
1878 case H_ADST:
1879 case V_FLIPADST:
1880 case H_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001881 av1_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001882 break;
1883 case IDTX:
1884 highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd);
1885 break;
1886#endif // CONFIG_EXT_TX
1887 default: assert(0); break;
1888 }
1889}
Yaowu Xuf883b422016-08-30 14:01:10 -07001890#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001891
1892void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
1893 INV_TXFM_PARAM *inv_txfm_param) {
1894 const TX_TYPE tx_type = inv_txfm_param->tx_type;
1895 const TX_SIZE tx_size = inv_txfm_param->tx_size;
1896 const int eob = inv_txfm_param->eob;
1897 const int lossless = inv_txfm_param->lossless;
1898
1899 switch (tx_size) {
1900 case TX_32X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07001901 av1_inv_txfm_add_32x32(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001902 break;
1903 case TX_16X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07001904 av1_inv_txfm_add_16x16(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001905 break;
Yaowu Xuf883b422016-08-30 14:01:10 -07001906 case TX_8X8: av1_inv_txfm_add_8x8(input, dest, stride, eob, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001907#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001908 case TX_4X8: av1_inv_txfm_add_4x8(input, dest, stride, eob, tx_type); break;
1909 case TX_8X4: av1_inv_txfm_add_8x4(input, dest, stride, eob, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001910 case TX_8X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07001911 av1_inv_txfm_add_8x16(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001912 break;
1913 case TX_16X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07001914 av1_inv_txfm_add_16x8(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001915 break;
1916 case TX_16X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07001917 av1_inv_txfm_add_16x32(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001918 break;
1919 case TX_32X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07001920 av1_inv_txfm_add_32x16(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001921 break;
1922#endif // CONFIG_EXT_TX
1923 case TX_4X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07001924 // this is like av1_short_idct4x4 but has a special case around eob<=1
Yaowu Xuc27fc142016-08-22 16:08:15 -07001925 // which is significant (not just an optimization) for the lossless
1926 // case.
Yaowu Xuf883b422016-08-30 14:01:10 -07001927 av1_inv_txfm_add_4x4(input, dest, stride, eob, tx_type, lossless);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001928 break;
1929 default: assert(0 && "Invalid transform size"); break;
1930 }
1931}
1932
Yaowu Xuf883b422016-08-30 14:01:10 -07001933#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001934void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
1935 INV_TXFM_PARAM *inv_txfm_param) {
1936 const TX_TYPE tx_type = inv_txfm_param->tx_type;
1937 const TX_SIZE tx_size = inv_txfm_param->tx_size;
1938 const int eob = inv_txfm_param->eob;
1939 const int bd = inv_txfm_param->bd;
1940 const int lossless = inv_txfm_param->lossless;
1941
1942 switch (tx_size) {
1943 case TX_32X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07001944 av1_highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001945 break;
1946 case TX_16X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07001947 av1_highbd_inv_txfm_add_16x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001948 break;
1949 case TX_8X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07001950 av1_highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001951 break;
1952#if CONFIG_EXT_TX
1953 case TX_4X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07001954 av1_highbd_inv_txfm_add_4x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001955 break;
1956 case TX_8X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07001957 av1_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001958 break;
1959 case TX_8X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07001960 av1_highbd_inv_txfm_add_8x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001961 break;
1962 case TX_16X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07001963 av1_highbd_inv_txfm_add_16x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001964 break;
1965 case TX_16X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07001966 av1_highbd_inv_txfm_add_16x32(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001967 break;
1968 case TX_32X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07001969 av1_highbd_inv_txfm_add_32x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001970 break;
1971#endif // CONFIG_EXT_TX
1972 case TX_4X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07001973 // this is like av1_short_idct4x4 but has a special case around eob<=1
Yaowu Xuc27fc142016-08-22 16:08:15 -07001974 // which is significant (not just an optimization) for the lossless
1975 // case.
Yaowu Xuf883b422016-08-30 14:01:10 -07001976 av1_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type,
1977 lossless);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001978 break;
1979 default: assert(0 && "Invalid transform size"); break;
1980 }
1981}
Yaowu Xuf883b422016-08-30 14:01:10 -07001982#endif // CONFIG_AOM_HIGHBITDEPTH