blob: 2663d2d360262dbe1ca274deb823ad661f07fe62 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <math.h>
13
Yaowu Xuf883b422016-08-30 14:01:10 -070014#include "./aom_dsp_rtcd.h"
Geza Lorea1ddae52016-09-02 09:51:34 +010015#include "./av1_rtcd.h"
16#include "aom_dsp/inv_txfm.h"
17#include "aom_ports/mem.h"
18#include "av1/common/av1_inv_txfm2d_cfg.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070019#include "av1/common/blockd.h"
20#include "av1/common/enums.h"
21#include "av1/common/idct.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070022
23int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
24 const TX_SIZE tx_size) {
25 (void)tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -070026 (void)xd;
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -070027 if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
28#if CONFIG_TX64X64
29 else if (txsize_sqr_up_map[tx_size] == TX_64X64)
30 return 2;
31#endif // CONFIG_TX64X64
32 else
33 return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070034}
35
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070036// NOTE: The implementation of all inverses need to be aware of the fact
37// that input and output could be the same buffer.
38
Yaowu Xuc27fc142016-08-22 16:08:15 -070039#if CONFIG_EXT_TX
40static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
41 int i;
42 for (i = 0; i < 4; ++i)
43 output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
44}
45
46static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
47 int i;
48 for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
49}
50
51static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
52 int i;
53 for (i = 0; i < 16; ++i)
54 output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
55}
56
57static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
58 int i;
59 for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
60}
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -070061
62#if CONFIG_TX64X64
63static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
64 int i;
65 for (i = 0; i < 64; ++i)
66 output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
67}
68#endif // CONFIG_TX64X64
Jingning Hanec419e02016-11-01 18:19:30 -070069#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -070070
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070071// For use in lieu of ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -070072static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
73 int i;
74 tran_low_t inputhalf[16];
Yaowu Xuc27fc142016-08-22 16:08:15 -070075 // Multiply input by sqrt(2)
76 for (i = 0; i < 16; ++i) {
77 inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
78 }
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070079 for (i = 0; i < 16; ++i) {
80 output[i] = input[16 + i] * 4;
81 }
Luca Barbatof0f98572016-09-03 12:14:15 +020082 aom_idct16_c(inputhalf, output + 16);
Yaowu Xuc27fc142016-08-22 16:08:15 -070083 // Note overall scaling factor is 4 times orthogonal
84}
85
Debargha Mukherjee67d13472016-11-01 14:37:39 -070086#if CONFIG_TX64X64
87static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
88 int32_t in[64], out[64];
89 int i;
90 for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
91 av1_idct64_new(in, out, inv_cos_bit_col_dct_dct_64,
92 inv_stage_range_col_dct_dct_64);
93 for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
94}
95
96static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
97 int32_t in[64], out[64];
98 int i;
99 for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
100 av1_idct64_new(in, out, inv_cos_bit_row_dct_dct_64,
101 inv_stage_range_row_dct_dct_64);
102 for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
103}
104
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700105// For use in lieu of ADST
106static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
107 int i;
108 tran_low_t inputhalf[32];
109 // Multiply input by sqrt(2)
110 for (i = 0; i < 32; ++i) {
111 inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
112 }
113 for (i = 0; i < 32; ++i) {
114 output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
115 }
116 aom_idct32_c(inputhalf, output + 32);
117 // Note overall scaling factor is 4 * sqrt(2) times orthogonal
118}
119#endif // CONFIG_TX64X64
120
Yaowu Xuf883b422016-08-30 14:01:10 -0700121#if CONFIG_AOM_HIGHBITDEPTH
Jingning Hanec419e02016-11-01 18:19:30 -0700122#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700123static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
124 int bd) {
125 int i;
126 for (i = 0; i < 4; ++i)
127 output[i] =
128 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
129}
130
131static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
132 int bd) {
133 int i;
134 (void)bd;
135 for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
136}
137
138static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
139 int bd) {
140 int i;
141 for (i = 0; i < 16; ++i)
142 output[i] =
143 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
144}
145
146static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
147 int bd) {
148 int i;
149 (void)bd;
150 for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
151}
Jingning Han5238e6e2016-11-02 08:53:12 -0700152#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700153
154static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
155 int bd) {
156 int i;
157 tran_low_t inputhalf[16];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700158 // Multiply input by sqrt(2)
159 for (i = 0; i < 16; ++i) {
160 inputhalf[i] =
161 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
162 }
Debargha Mukherjeee52816b2016-10-12 10:49:29 -0700163 for (i = 0; i < 16; ++i) {
164 output[i] = input[16 + i] * 4;
165 }
Yaowu Xuf883b422016-08-30 14:01:10 -0700166 aom_highbd_idct16_c(inputhalf, output + 16, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700167 // Note overall scaling factor is 4 times orthogonal
168}
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700169
Jingning Han5238e6e2016-11-02 08:53:12 -0700170#if CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700171#if CONFIG_TX64X64
172static void highbd_iidtx64_c(const tran_low_t *input, tran_low_t *output,
173 int bd) {
174 int i;
175 for (i = 0; i < 64; ++i)
176 output[i] =
177 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 4 * Sqrt2), bd);
178}
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -0700179#endif // CONFIG_TX64X64
180#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700181
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -0700182#if CONFIG_TX64X64
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700183// For use in lieu of ADST
184static void highbd_ihalfright64_c(const tran_low_t *input, tran_low_t *output,
185 int bd) {
186 int i;
187 tran_low_t inputhalf[32];
188 // Multiply input by sqrt(2)
189 for (i = 0; i < 32; ++i) {
190 inputhalf[i] =
191 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
192 }
193 for (i = 0; i < 32; ++i) {
194 output[i] = HIGHBD_WRAPLOW(
195 highbd_dct_const_round_shift(input[32 + i] * 4 * Sqrt2), bd);
196 }
197 aom_highbd_idct32_c(inputhalf, output + 32, bd);
198 // Note overall scaling factor is 4 * sqrt(2) times orthogonal
199}
200
201static void highbd_idct64_col_c(const tran_low_t *input, tran_low_t *output,
202 int bd) {
203 int32_t in[64], out[64];
204 int i;
205 (void)bd;
206 for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
207 av1_idct64_new(in, out, inv_cos_bit_col_dct_dct_64,
208 inv_stage_range_col_dct_dct_64);
209 for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
210}
211
212static void highbd_idct64_row_c(const tran_low_t *input, tran_low_t *output,
213 int bd) {
214 int32_t in[64], out[64];
215 int i;
216 (void)bd;
217 for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
218 av1_idct64_new(in, out, inv_cos_bit_row_dct_dct_64,
219 inv_stage_range_row_dct_dct_64);
220 for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
221}
222#endif // CONFIG_TX64X64
Yaowu Xuf883b422016-08-30 14:01:10 -0700223#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700224
225// Inverse identity transform and add.
Jingning Hanec419e02016-11-01 18:19:30 -0700226#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700227static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
228 int bs, int tx_type) {
229 int r, c;
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -0700230 const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700231 if (tx_type == IDTX) {
232 for (r = 0; r < bs; ++r) {
233 for (c = 0; c < bs; ++c)
234 dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
235 dest += stride;
236 input += bs;
237 }
238 }
239}
Jingning Hanec419e02016-11-01 18:19:30 -0700240#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700241
242#define FLIPUD_PTR(dest, stride, size) \
243 do { \
244 (dest) = (dest) + ((size)-1) * (stride); \
245 (stride) = -(stride); \
246 } while (0)
247
Jingning Hanec419e02016-11-01 18:19:30 -0700248#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700249static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
250 int *sstride, int tx_type, int sizey,
251 int sizex) {
252 // Note that the transpose of src will be added to dst. In order to LR
253 // flip the addends (in dst coordinates), we UD flip the src. To UD flip
254 // the addends, we UD flip the dst.
255 switch (tx_type) {
256 case DCT_DCT:
257 case ADST_DCT:
258 case DCT_ADST:
259 case ADST_ADST:
260 case IDTX:
261 case V_DCT:
262 case H_DCT:
263 case V_ADST:
264 case H_ADST: break;
265 case FLIPADST_DCT:
266 case FLIPADST_ADST:
267 case V_FLIPADST:
268 // flip UD
269 FLIPUD_PTR(*dst, *dstride, sizey);
270 break;
271 case DCT_FLIPADST:
272 case ADST_FLIPADST:
273 case H_FLIPADST:
274 // flip LR
275 FLIPUD_PTR(*src, *sstride, sizex);
276 break;
277 case FLIPADST_FLIPADST:
278 // flip UD
279 FLIPUD_PTR(*dst, *dstride, sizey);
280 // flip LR
281 FLIPUD_PTR(*src, *sstride, sizex);
282 break;
283 default: assert(0); break;
284 }
285}
Jingning Hanec419e02016-11-01 18:19:30 -0700286#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700287
Yaowu Xuf883b422016-08-30 14:01:10 -0700288#if CONFIG_AOM_HIGHBITDEPTH
Jingning Hanec419e02016-11-01 18:19:30 -0700289#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700290static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
291 int stride, int bs, int tx_type, int bd) {
292 int r, c;
293 const int shift = bs < 32 ? 3 : 2;
294 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
295
296 if (tx_type == IDTX) {
297 for (r = 0; r < bs; ++r) {
298 for (c = 0; c < bs; ++c)
299 dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
300 dest += stride;
301 input += bs;
302 }
303 }
304}
305
306static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
307 int *sstride, int tx_type, int sizey,
308 int sizex) {
309 // Note that the transpose of src will be added to dst. In order to LR
310 // flip the addends (in dst coordinates), we UD flip the src. To UD flip
311 // the addends, we UD flip the dst.
312 switch (tx_type) {
313 case DCT_DCT:
314 case ADST_DCT:
315 case DCT_ADST:
316 case ADST_ADST:
317 case IDTX:
318 case V_DCT:
319 case H_DCT:
320 case V_ADST:
321 case H_ADST: break;
322 case FLIPADST_DCT:
323 case FLIPADST_ADST:
324 case V_FLIPADST:
325 // flip UD
326 FLIPUD_PTR(*dst, *dstride, sizey);
327 break;
328 case DCT_FLIPADST:
329 case ADST_FLIPADST:
330 case H_FLIPADST:
331 // flip LR
332 FLIPUD_PTR(*src, *sstride, sizex);
333 break;
334 case FLIPADST_FLIPADST:
335 // flip UD
336 FLIPUD_PTR(*dst, *dstride, sizey);
337 // flip LR
338 FLIPUD_PTR(*src, *sstride, sizex);
339 break;
340 default: assert(0); break;
341 }
342}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700343#endif // CONFIG_EXT_TX
Jingning Hanec419e02016-11-01 18:19:30 -0700344#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700345
Yaowu Xuf883b422016-08-30 14:01:10 -0700346void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
347 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700348 static const transform_2d IHT_4[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200349 { aom_idct4_c, aom_idct4_c }, // DCT_DCT = 0
350 { aom_iadst4_c, aom_idct4_c }, // ADST_DCT = 1
351 { aom_idct4_c, aom_iadst4_c }, // DCT_ADST = 2
352 { aom_iadst4_c, aom_iadst4_c }, // ADST_ADST = 3
Yaowu Xuc27fc142016-08-22 16:08:15 -0700353#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200354 { aom_iadst4_c, aom_idct4_c }, // FLIPADST_DCT
355 { aom_idct4_c, aom_iadst4_c }, // DCT_FLIPADST
356 { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_FLIPADST
357 { aom_iadst4_c, aom_iadst4_c }, // ADST_FLIPADST
358 { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_ADST
359 { iidtx4_c, iidtx4_c }, // IDTX
360 { aom_idct4_c, iidtx4_c }, // V_DCT
361 { iidtx4_c, aom_idct4_c }, // H_DCT
362 { aom_iadst4_c, iidtx4_c }, // V_ADST
363 { iidtx4_c, aom_iadst4_c }, // H_ADST
364 { aom_iadst4_c, iidtx4_c }, // V_FLIPADST
365 { iidtx4_c, aom_iadst4_c }, // H_FLIPADST
366#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700367 };
368
369 int i, j;
370 tran_low_t tmp;
371 tran_low_t out[4][4];
372 tran_low_t *outp = &out[0][0];
373 int outstride = 4;
374
375 // inverse transform row vectors
376 for (i = 0; i < 4; ++i) {
377 IHT_4[tx_type].rows(input, out[i]);
378 input += 4;
379 }
380
381 // transpose
382 for (i = 1; i < 4; i++) {
383 for (j = 0; j < i; j++) {
384 tmp = out[i][j];
385 out[i][j] = out[j][i];
386 out[j][i] = tmp;
387 }
388 }
389
390 // inverse transform column vectors
391 for (i = 0; i < 4; ++i) {
392 IHT_4[tx_type].cols(out[i], out[i]);
393 }
394
395#if CONFIG_EXT_TX
396 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
397#endif
398
399 // Sum with the destination
400 for (i = 0; i < 4; ++i) {
401 for (j = 0; j < 4; ++j) {
402 int d = i * stride + j;
403 int s = j * outstride + i;
404 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
405 }
406 }
407}
408
Yaowu Xuf883b422016-08-30 14:01:10 -0700409void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
410 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700411 static const transform_2d IHT_4x8[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200412 { aom_idct8_c, aom_idct4_c }, // DCT_DCT
413 { aom_iadst8_c, aom_idct4_c }, // ADST_DCT
414 { aom_idct8_c, aom_iadst4_c }, // DCT_ADST
415 { aom_iadst8_c, aom_iadst4_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700416#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200417 { aom_iadst8_c, aom_idct4_c }, // FLIPADST_DCT
418 { aom_idct8_c, aom_iadst4_c }, // DCT_FLIPADST
419 { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_FLIPADST
420 { aom_iadst8_c, aom_iadst4_c }, // ADST_FLIPADST
421 { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_ADST
422 { iidtx8_c, iidtx4_c }, // IDTX
423 { aom_idct8_c, iidtx4_c }, // V_DCT
424 { iidtx8_c, aom_idct4_c }, // H_DCT
425 { aom_iadst8_c, iidtx4_c }, // V_ADST
426 { iidtx8_c, aom_iadst4_c }, // H_ADST
427 { aom_iadst8_c, iidtx4_c }, // V_FLIPADST
428 { iidtx8_c, aom_iadst4_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700429#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700430 };
431
432 const int n = 4;
433 const int n2 = 8;
434 int i, j;
435 tran_low_t out[4][8], outtmp[4];
436 tran_low_t *outp = &out[0][0];
437 int outstride = n2;
438
439 // inverse transform row vectors and transpose
440 for (i = 0; i < n2; ++i) {
441 IHT_4x8[tx_type].rows(input, outtmp);
442 for (j = 0; j < n; ++j)
443 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
444 input += n;
445 }
446
447 // inverse transform column vectors
448 for (i = 0; i < n; ++i) {
449 IHT_4x8[tx_type].cols(out[i], out[i]);
450 }
451
Jingning Hanec419e02016-11-01 18:19:30 -0700452#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700453 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Hanec419e02016-11-01 18:19:30 -0700454#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700455
456 // Sum with the destination
457 for (i = 0; i < n2; ++i) {
458 for (j = 0; j < n; ++j) {
459 int d = i * stride + j;
460 int s = j * outstride + i;
461 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
462 }
463 }
464}
465
Yaowu Xuf883b422016-08-30 14:01:10 -0700466void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
467 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700468 static const transform_2d IHT_8x4[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200469 { aom_idct4_c, aom_idct8_c }, // DCT_DCT
470 { aom_iadst4_c, aom_idct8_c }, // ADST_DCT
471 { aom_idct4_c, aom_iadst8_c }, // DCT_ADST
472 { aom_iadst4_c, aom_iadst8_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700473#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200474 { aom_iadst4_c, aom_idct8_c }, // FLIPADST_DCT
475 { aom_idct4_c, aom_iadst8_c }, // DCT_FLIPADST
476 { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_FLIPADST
477 { aom_iadst4_c, aom_iadst8_c }, // ADST_FLIPADST
478 { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_ADST
479 { iidtx4_c, iidtx8_c }, // IDTX
480 { aom_idct4_c, iidtx8_c }, // V_DCT
481 { iidtx4_c, aom_idct8_c }, // H_DCT
482 { aom_iadst4_c, iidtx8_c }, // V_ADST
483 { iidtx4_c, aom_iadst8_c }, // H_ADST
484 { aom_iadst4_c, iidtx8_c }, // V_FLIPADST
485 { iidtx4_c, aom_iadst8_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700486#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700487 };
488 const int n = 4;
489 const int n2 = 8;
490
491 int i, j;
492 tran_low_t out[8][4], outtmp[8];
493 tran_low_t *outp = &out[0][0];
494 int outstride = n;
495
496 // inverse transform row vectors and transpose
497 for (i = 0; i < n; ++i) {
498 IHT_8x4[tx_type].rows(input, outtmp);
499 for (j = 0; j < n2; ++j)
500 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
501 input += n2;
502 }
503
504 // inverse transform column vectors
505 for (i = 0; i < n2; ++i) {
506 IHT_8x4[tx_type].cols(out[i], out[i]);
507 }
508
Jingning Hanec419e02016-11-01 18:19:30 -0700509#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700510 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Hanec419e02016-11-01 18:19:30 -0700511#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700512
513 // Sum with the destination
514 for (i = 0; i < n; ++i) {
515 for (j = 0; j < n2; ++j) {
516 int d = i * stride + j;
517 int s = j * outstride + i;
518 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
519 }
520 }
521}
522
Yaowu Xuf883b422016-08-30 14:01:10 -0700523void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
524 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700525 static const transform_2d IHT_8x16[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200526 { aom_idct16_c, aom_idct8_c }, // DCT_DCT
527 { aom_iadst16_c, aom_idct8_c }, // ADST_DCT
528 { aom_idct16_c, aom_iadst8_c }, // DCT_ADST
529 { aom_iadst16_c, aom_iadst8_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700530#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200531 { aom_iadst16_c, aom_idct8_c }, // FLIPADST_DCT
532 { aom_idct16_c, aom_iadst8_c }, // DCT_FLIPADST
533 { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_FLIPADST
534 { aom_iadst16_c, aom_iadst8_c }, // ADST_FLIPADST
535 { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_ADST
536 { iidtx16_c, iidtx8_c }, // IDTX
537 { aom_idct16_c, iidtx8_c }, // V_DCT
538 { iidtx16_c, aom_idct8_c }, // H_DCT
539 { aom_iadst16_c, iidtx8_c }, // V_ADST
540 { iidtx16_c, aom_iadst8_c }, // H_ADST
541 { aom_iadst16_c, iidtx8_c }, // V_FLIPADST
542 { iidtx16_c, aom_iadst8_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700543#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700544 };
545
546 const int n = 8;
547 const int n2 = 16;
548 int i, j;
549 tran_low_t out[8][16], outtmp[8];
550 tran_low_t *outp = &out[0][0];
551 int outstride = n2;
552
553 // inverse transform row vectors and transpose
554 for (i = 0; i < n2; ++i) {
555 IHT_8x16[tx_type].rows(input, outtmp);
556 for (j = 0; j < n; ++j)
557 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
558 input += n;
559 }
560
561 // inverse transform column vectors
562 for (i = 0; i < n; ++i) {
563 IHT_8x16[tx_type].cols(out[i], out[i]);
564 }
565
Jingning Hanec419e02016-11-01 18:19:30 -0700566#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700567 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Hanec419e02016-11-01 18:19:30 -0700568#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700569
570 // Sum with the destination
571 for (i = 0; i < n2; ++i) {
572 for (j = 0; j < n; ++j) {
573 int d = i * stride + j;
574 int s = j * outstride + i;
575 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
576 }
577 }
578}
579
Yaowu Xuf883b422016-08-30 14:01:10 -0700580void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
581 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700582 static const transform_2d IHT_16x8[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200583 { aom_idct8_c, aom_idct16_c }, // DCT_DCT
584 { aom_iadst8_c, aom_idct16_c }, // ADST_DCT
585 { aom_idct8_c, aom_iadst16_c }, // DCT_ADST
586 { aom_iadst8_c, aom_iadst16_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700587#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200588 { aom_iadst8_c, aom_idct16_c }, // FLIPADST_DCT
589 { aom_idct8_c, aom_iadst16_c }, // DCT_FLIPADST
590 { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_FLIPADST
591 { aom_iadst8_c, aom_iadst16_c }, // ADST_FLIPADST
592 { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_ADST
593 { iidtx8_c, iidtx16_c }, // IDTX
594 { aom_idct8_c, iidtx16_c }, // V_DCT
595 { iidtx8_c, aom_idct16_c }, // H_DCT
596 { aom_iadst8_c, iidtx16_c }, // V_ADST
597 { iidtx8_c, aom_iadst16_c }, // H_ADST
598 { aom_iadst8_c, iidtx16_c }, // V_FLIPADST
599 { iidtx8_c, aom_iadst16_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700600#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700601 };
602 const int n = 8;
603 const int n2 = 16;
604
605 int i, j;
606 tran_low_t out[16][8], outtmp[16];
607 tran_low_t *outp = &out[0][0];
608 int outstride = n;
609
610 // inverse transform row vectors and transpose
611 for (i = 0; i < n; ++i) {
612 IHT_16x8[tx_type].rows(input, outtmp);
613 for (j = 0; j < n2; ++j)
614 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
615 input += n2;
616 }
617
618 // inverse transform column vectors
619 for (i = 0; i < n2; ++i) {
620 IHT_16x8[tx_type].cols(out[i], out[i]);
621 }
622
Jingning Hanec419e02016-11-01 18:19:30 -0700623#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700624 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Hanec419e02016-11-01 18:19:30 -0700625#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700626
627 // Sum with the destination
628 for (i = 0; i < n; ++i) {
629 for (j = 0; j < n2; ++j) {
630 int d = i * stride + j;
631 int s = j * outstride + i;
632 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
633 }
634 }
635}
636
Yaowu Xuf883b422016-08-30 14:01:10 -0700637void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
638 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700639 static const transform_2d IHT_16x32[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200640 { aom_idct32_c, aom_idct16_c }, // DCT_DCT
641 { ihalfright32_c, aom_idct16_c }, // ADST_DCT
642 { aom_idct32_c, aom_iadst16_c }, // DCT_ADST
643 { ihalfright32_c, aom_iadst16_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700644#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200645 { ihalfright32_c, aom_idct16_c }, // FLIPADST_DCT
646 { aom_idct32_c, aom_iadst16_c }, // DCT_FLIPADST
647 { ihalfright32_c, aom_iadst16_c }, // FLIPADST_FLIPADST
648 { ihalfright32_c, aom_iadst16_c }, // ADST_FLIPADST
649 { ihalfright32_c, aom_iadst16_c }, // FLIPADST_ADST
650 { iidtx32_c, iidtx16_c }, // IDTX
651 { aom_idct32_c, iidtx16_c }, // V_DCT
652 { iidtx32_c, aom_idct16_c }, // H_DCT
653 { ihalfright32_c, iidtx16_c }, // V_ADST
654 { iidtx32_c, aom_iadst16_c }, // H_ADST
655 { ihalfright32_c, iidtx16_c }, // V_FLIPADST
656 { iidtx32_c, aom_iadst16_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700657#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700658 };
659
660 const int n = 16;
661 const int n2 = 32;
662 int i, j;
663 tran_low_t out[16][32], outtmp[16];
664 tran_low_t *outp = &out[0][0];
665 int outstride = n2;
666
667 // inverse transform row vectors and transpose
668 for (i = 0; i < n2; ++i) {
669 IHT_16x32[tx_type].rows(input, outtmp);
670 for (j = 0; j < n; ++j)
671 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
672 input += n;
673 }
674
675 // inverse transform column vectors
676 for (i = 0; i < n; ++i) {
677 IHT_16x32[tx_type].cols(out[i], out[i]);
678 }
679
Jingning Hanec419e02016-11-01 18:19:30 -0700680#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700681 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Hanec419e02016-11-01 18:19:30 -0700682#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700683
684 // Sum with the destination
685 for (i = 0; i < n2; ++i) {
686 for (j = 0; j < n; ++j) {
687 int d = i * stride + j;
688 int s = j * outstride + i;
689 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
690 }
691 }
692}
693
Yaowu Xuf883b422016-08-30 14:01:10 -0700694void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
695 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700696 static const transform_2d IHT_32x16[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200697 { aom_idct16_c, aom_idct32_c }, // DCT_DCT
698 { aom_iadst16_c, aom_idct32_c }, // ADST_DCT
699 { aom_idct16_c, ihalfright32_c }, // DCT_ADST
700 { aom_iadst16_c, ihalfright32_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700701#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200702 { aom_iadst16_c, aom_idct32_c }, // FLIPADST_DCT
703 { aom_idct16_c, ihalfright32_c }, // DCT_FLIPADST
704 { aom_iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
705 { aom_iadst16_c, ihalfright32_c }, // ADST_FLIPADST
706 { aom_iadst16_c, ihalfright32_c }, // FLIPADST_ADST
707 { iidtx16_c, iidtx32_c }, // IDTX
708 { aom_idct16_c, iidtx32_c }, // V_DCT
709 { iidtx16_c, aom_idct32_c }, // H_DCT
710 { aom_iadst16_c, iidtx32_c }, // V_ADST
711 { iidtx16_c, ihalfright32_c }, // H_ADST
712 { aom_iadst16_c, iidtx32_c }, // V_FLIPADST
713 { iidtx16_c, ihalfright32_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700714#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700715 };
716 const int n = 16;
717 const int n2 = 32;
718
719 int i, j;
720 tran_low_t out[32][16], outtmp[32];
721 tran_low_t *outp = &out[0][0];
722 int outstride = n;
723
724 // inverse transform row vectors and transpose
725 for (i = 0; i < n; ++i) {
726 IHT_32x16[tx_type].rows(input, outtmp);
727 for (j = 0; j < n2; ++j)
728 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
729 input += n2;
730 }
731
732 // inverse transform column vectors
733 for (i = 0; i < n2; ++i) {
734 IHT_32x16[tx_type].cols(out[i], out[i]);
735 }
736
Jingning Hanec419e02016-11-01 18:19:30 -0700737#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700738 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Hanec419e02016-11-01 18:19:30 -0700739#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700740
741 // Sum with the destination
742 for (i = 0; i < n; ++i) {
743 for (j = 0; j < n2; ++j) {
744 int d = i * stride + j;
745 int s = j * outstride + i;
746 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
747 }
748 }
749}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700750
Yaowu Xuf883b422016-08-30 14:01:10 -0700751void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
752 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700753 static const transform_2d IHT_8[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200754 { aom_idct8_c, aom_idct8_c }, // DCT_DCT = 0
755 { aom_iadst8_c, aom_idct8_c }, // ADST_DCT = 1
756 { aom_idct8_c, aom_iadst8_c }, // DCT_ADST = 2
757 { aom_iadst8_c, aom_iadst8_c }, // ADST_ADST = 3
Yaowu Xuc27fc142016-08-22 16:08:15 -0700758#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200759 { aom_iadst8_c, aom_idct8_c }, // FLIPADST_DCT
760 { aom_idct8_c, aom_iadst8_c }, // DCT_FLIPADST
761 { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_FLIPADST
762 { aom_iadst8_c, aom_iadst8_c }, // ADST_FLIPADST
763 { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_ADST
764 { iidtx8_c, iidtx8_c }, // IDTX
765 { aom_idct8_c, iidtx8_c }, // V_DCT
766 { iidtx8_c, aom_idct8_c }, // H_DCT
767 { aom_iadst8_c, iidtx8_c }, // V_ADST
768 { iidtx8_c, aom_iadst8_c }, // H_ADST
769 { aom_iadst8_c, iidtx8_c }, // V_FLIPADST
770 { iidtx8_c, aom_iadst8_c }, // H_FLIPADST
771#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700772 };
773
774 int i, j;
775 tran_low_t tmp;
776 tran_low_t out[8][8];
777 tran_low_t *outp = &out[0][0];
778 int outstride = 8;
779
780 // inverse transform row vectors
781 for (i = 0; i < 8; ++i) {
782 IHT_8[tx_type].rows(input, out[i]);
783 input += 8;
784 }
785
786 // transpose
787 for (i = 1; i < 8; i++) {
788 for (j = 0; j < i; j++) {
789 tmp = out[i][j];
790 out[i][j] = out[j][i];
791 out[j][i] = tmp;
792 }
793 }
794
795 // inverse transform column vectors
796 for (i = 0; i < 8; ++i) {
797 IHT_8[tx_type].cols(out[i], out[i]);
798 }
799
800#if CONFIG_EXT_TX
801 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
802#endif
803
804 // Sum with the destination
805 for (i = 0; i < 8; ++i) {
806 for (j = 0; j < 8; ++j) {
807 int d = i * stride + j;
808 int s = j * outstride + i;
809 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
810 }
811 }
812}
813
Yaowu Xuf883b422016-08-30 14:01:10 -0700814void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
815 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700816 static const transform_2d IHT_16[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200817 { aom_idct16_c, aom_idct16_c }, // DCT_DCT = 0
818 { aom_iadst16_c, aom_idct16_c }, // ADST_DCT = 1
819 { aom_idct16_c, aom_iadst16_c }, // DCT_ADST = 2
820 { aom_iadst16_c, aom_iadst16_c }, // ADST_ADST = 3
Yaowu Xuc27fc142016-08-22 16:08:15 -0700821#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200822 { aom_iadst16_c, aom_idct16_c }, // FLIPADST_DCT
823 { aom_idct16_c, aom_iadst16_c }, // DCT_FLIPADST
824 { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_FLIPADST
825 { aom_iadst16_c, aom_iadst16_c }, // ADST_FLIPADST
826 { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_ADST
827 { iidtx16_c, iidtx16_c }, // IDTX
828 { aom_idct16_c, iidtx16_c }, // V_DCT
829 { iidtx16_c, aom_idct16_c }, // H_DCT
830 { aom_iadst16_c, iidtx16_c }, // V_ADST
831 { iidtx16_c, aom_iadst16_c }, // H_ADST
832 { aom_iadst16_c, iidtx16_c }, // V_FLIPADST
833 { iidtx16_c, aom_iadst16_c }, // H_FLIPADST
834#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700835 };
836
837 int i, j;
838 tran_low_t tmp;
839 tran_low_t out[16][16];
840 tran_low_t *outp = &out[0][0];
841 int outstride = 16;
842
843 // inverse transform row vectors
844 for (i = 0; i < 16; ++i) {
845 IHT_16[tx_type].rows(input, out[i]);
846 input += 16;
847 }
848
849 // transpose
850 for (i = 1; i < 16; i++) {
851 for (j = 0; j < i; j++) {
852 tmp = out[i][j];
853 out[i][j] = out[j][i];
854 out[j][i] = tmp;
855 }
856 }
857
858 // inverse transform column vectors
859 for (i = 0; i < 16; ++i) {
860 IHT_16[tx_type].cols(out[i], out[i]);
861 }
862
863#if CONFIG_EXT_TX
864 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
865#endif
866
867 // Sum with the destination
868 for (i = 0; i < 16; ++i) {
869 for (j = 0; j < 16; ++j) {
870 int d = i * stride + j;
871 int s = j * outstride + i;
872 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
873 }
874 }
875}
876
877#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -0700878void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
879 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700880 static const transform_2d IHT_32[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200881 { aom_idct32_c, aom_idct32_c }, // DCT_DCT
882 { ihalfright32_c, aom_idct32_c }, // ADST_DCT
883 { aom_idct32_c, ihalfright32_c }, // DCT_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -0700884 { ihalfright32_c, ihalfright32_c }, // ADST_ADST
Luca Barbatof0f98572016-09-03 12:14:15 +0200885 { ihalfright32_c, aom_idct32_c }, // FLIPADST_DCT
886 { aom_idct32_c, ihalfright32_c }, // DCT_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -0700887 { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
888 { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
889 { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
890 { iidtx32_c, iidtx32_c }, // IDTX
Luca Barbatof0f98572016-09-03 12:14:15 +0200891 { aom_idct32_c, iidtx32_c }, // V_DCT
892 { iidtx32_c, aom_idct32_c }, // H_DCT
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700893 { ihalfright32_c, iidtx32_c }, // V_ADST
894 { iidtx32_c, ihalfright32_c }, // H_ADST
895 { ihalfright32_c, iidtx32_c }, // V_FLIPADST
896 { iidtx32_c, ihalfright32_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -0700897 };
898
899 int i, j;
900 tran_low_t tmp;
901 tran_low_t out[32][32];
902 tran_low_t *outp = &out[0][0];
903 int outstride = 32;
904
905 // inverse transform row vectors
906 for (i = 0; i < 32; ++i) {
907 IHT_32[tx_type].rows(input, out[i]);
908 input += 32;
909 }
910
911 // transpose
912 for (i = 1; i < 32; i++) {
913 for (j = 0; j < i; j++) {
914 tmp = out[i][j];
915 out[i][j] = out[j][i];
916 out[j][i] = tmp;
917 }
918 }
919
920 // inverse transform column vectors
921 for (i = 0; i < 32; ++i) {
922 IHT_32[tx_type].cols(out[i], out[i]);
923 }
924
925 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
926
927 // Sum with the destination
928 for (i = 0; i < 32; ++i) {
929 for (j = 0; j < 32; ++j) {
930 int d = i * stride + j;
931 int s = j * outstride + i;
932 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
933 }
934 }
935}
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -0700936#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700937
938#if CONFIG_TX64X64
939void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
940 int tx_type) {
941 static const transform_2d IHT_64[] = {
942 { idct64_col_c, idct64_row_c }, // DCT_DCT
943 { ihalfright64_c, idct64_row_c }, // ADST_DCT
944 { idct64_col_c, ihalfright64_c }, // DCT_ADST
945 { ihalfright64_c, ihalfright64_c }, // ADST_ADST
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -0700946#if CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700947 { ihalfright64_c, idct64_row_c }, // FLIPADST_DCT
948 { idct64_col_c, ihalfright64_c }, // DCT_FLIPADST
949 { ihalfright64_c, ihalfright64_c }, // FLIPADST_FLIPADST
950 { ihalfright64_c, ihalfright64_c }, // ADST_FLIPADST
951 { ihalfright64_c, ihalfright64_c }, // FLIPADST_ADST
952 { iidtx64_c, iidtx64_c }, // IDTX
953 { idct64_col_c, iidtx64_c }, // V_DCT
954 { iidtx64_c, idct64_row_c }, // H_DCT
955 { ihalfright64_c, iidtx64_c }, // V_ADST
956 { iidtx64_c, ihalfright64_c }, // H_ADST
957 { ihalfright64_c, iidtx64_c }, // V_FLIPADST
958 { iidtx64_c, ihalfright64_c }, // H_FLIPADST
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -0700959#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700960 };
961
962 int i, j;
963 tran_low_t tmp;
964 tran_low_t out[64][64];
965 tran_low_t *outp = &out[0][0];
966 int outstride = 64;
967
968 // inverse transform row vectors
969 for (i = 0; i < 64; ++i) {
970 IHT_64[tx_type].rows(input, out[i]);
971 for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
972 input += 64;
973 }
974
975 // transpose
976 for (i = 1; i < 64; i++) {
977 for (j = 0; j < i; j++) {
978 tmp = out[i][j];
979 out[i][j] = out[j][i];
980 out[j][i] = tmp;
981 }
982 }
983
984 // inverse transform column vectors
985 for (i = 0; i < 64; ++i) {
986 IHT_64[tx_type].cols(out[i], out[i]);
987 }
988
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -0700989#if CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700990 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -0700991#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700992
993 // Sum with the destination
994 for (i = 0; i < 64; ++i) {
995 for (j = 0; j < 64; ++j) {
996 int d = i * stride + j;
997 int s = j * outstride + i;
998 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
999 }
1000 }
1001}
1002#endif // CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -07001003
1004// idct
Yaowu Xuf883b422016-08-30 14:01:10 -07001005void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1006 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001007 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001008 aom_idct4x4_16_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001009 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001010 aom_idct4x4_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001011}
1012
Yaowu Xuf883b422016-08-30 14:01:10 -07001013void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1014 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001015 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001016 aom_iwht4x4_16_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001017 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001018 aom_iwht4x4_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001019}
1020
Yaowu Xuf883b422016-08-30 14:01:10 -07001021void av1_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
1022 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001023 // If dc is 1, then input[0] is the reconstructed value, do not need
1024 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
1025
1026 // The calculation can be simplified if there are not many non-zero dct
1027 // coefficients. Use eobs to decide what to do.
Yaowu Xuf883b422016-08-30 14:01:10 -07001028 // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
Yaowu Xuc27fc142016-08-22 16:08:15 -07001029 // Combine that with code here.
1030 if (eob == 1)
1031 // DC only DCT coefficient
Yaowu Xuf883b422016-08-30 14:01:10 -07001032 aom_idct8x8_1_add(input, dest, stride);
Angie Chianged8cd9a2016-10-21 16:44:47 -07001033#if !CONFIG_ADAPT_SCAN
Yaowu Xuc27fc142016-08-22 16:08:15 -07001034 else if (eob <= 12)
Yaowu Xuf883b422016-08-30 14:01:10 -07001035 aom_idct8x8_12_add(input, dest, stride);
Angie Chianged8cd9a2016-10-21 16:44:47 -07001036#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001037 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001038 aom_idct8x8_64_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001039}
1040
Yaowu Xuf883b422016-08-30 14:01:10 -07001041void av1_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
1042 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001043 /* The calculation can be simplified if there are not many non-zero dct
1044 * coefficients. Use eobs to separate different cases. */
1045 if (eob == 1) /* DC only DCT coefficient. */
Yaowu Xuf883b422016-08-30 14:01:10 -07001046 aom_idct16x16_1_add(input, dest, stride);
Angie Chianged8cd9a2016-10-21 16:44:47 -07001047#if !CONFIG_ADAPT_SCAN
Yaowu Xuc27fc142016-08-22 16:08:15 -07001048 else if (eob <= 10)
Yaowu Xuf883b422016-08-30 14:01:10 -07001049 aom_idct16x16_10_add(input, dest, stride);
Angie Chianged8cd9a2016-10-21 16:44:47 -07001050#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001051 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001052 aom_idct16x16_256_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001053}
1054
Yaowu Xuf883b422016-08-30 14:01:10 -07001055void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
1056 int eob) {
Angie Chianged8cd9a2016-10-21 16:44:47 -07001057 if (eob == 1) aom_idct32x32_1_add(input, dest, stride);
1058#if !CONFIG_ADAPT_SCAN
Yaowu Xuc27fc142016-08-22 16:08:15 -07001059 else if (eob <= 34)
1060 // non-zero coeff only in upper-left 8x8
Yaowu Xuf883b422016-08-30 14:01:10 -07001061 aom_idct32x32_34_add(input, dest, stride);
Angie Chianged8cd9a2016-10-21 16:44:47 -07001062#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001063 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001064 aom_idct32x32_1024_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001065}
1066
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001067#if CONFIG_TX64X64
1068void av1_idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
1069 int eob) {
1070 (void)eob;
1071 av1_iht64x64_4096_add(input, dest, stride, DCT_DCT);
1072}
1073#endif // CONFIG_TX64X64
1074
Yaowu Xuf883b422016-08-30 14:01:10 -07001075void av1_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
1076 int eob, TX_TYPE tx_type, int lossless) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001077 if (lossless) {
1078 assert(tx_type == DCT_DCT);
Yaowu Xuf883b422016-08-30 14:01:10 -07001079 av1_iwht4x4_add(input, dest, stride, eob);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001080 return;
1081 }
1082
1083 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001084 case DCT_DCT: av1_idct4x4_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001085 case ADST_DCT:
1086 case DCT_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001087 case ADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001088#if CONFIG_EXT_TX
1089 case FLIPADST_DCT:
1090 case DCT_FLIPADST:
1091 case FLIPADST_FLIPADST:
1092 case ADST_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001093 case FLIPADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001094 case V_DCT:
1095 case H_DCT:
1096 case V_ADST:
1097 case H_ADST:
1098 case V_FLIPADST:
1099 case H_FLIPADST:
1100 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001101 av1_iht4x4_16_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001102 break;
1103 case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break;
1104#endif // CONFIG_EXT_TX
1105 default: assert(0); break;
1106 }
1107}
1108
Yaowu Xuf883b422016-08-30 14:01:10 -07001109void av1_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
1110 int eob, TX_TYPE tx_type) {
1111 (void)eob;
1112 av1_iht4x8_32_add(input, dest, stride, tx_type);
1113}
1114
1115void av1_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
1116 int eob, TX_TYPE tx_type) {
1117 (void)eob;
1118 av1_iht8x4_32_add(input, dest, stride, tx_type);
1119}
1120
1121void av1_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001122 int eob, TX_TYPE tx_type) {
1123 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001124 av1_iht8x16_128_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001125}
1126
Yaowu Xuf883b422016-08-30 14:01:10 -07001127void av1_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001128 int eob, TX_TYPE tx_type) {
1129 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001130 av1_iht16x8_128_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001131}
1132
Yaowu Xuf883b422016-08-30 14:01:10 -07001133void av1_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001134 int eob, TX_TYPE tx_type) {
1135 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001136 av1_iht16x32_512_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001137}
1138
Yaowu Xuf883b422016-08-30 14:01:10 -07001139void av1_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001140 int eob, TX_TYPE tx_type) {
1141 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001142 av1_iht32x16_512_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001143}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001144
Yaowu Xuf883b422016-08-30 14:01:10 -07001145void av1_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
1146 int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001147 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001148 case DCT_DCT: av1_idct8x8_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001149 case ADST_DCT:
1150 case DCT_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001151 case ADST_ADST: av1_iht8x8_64_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001152#if CONFIG_EXT_TX
1153 case FLIPADST_DCT:
1154 case DCT_FLIPADST:
1155 case FLIPADST_FLIPADST:
1156 case ADST_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001157 case FLIPADST_ADST: av1_iht8x8_64_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001158 case V_DCT:
1159 case H_DCT:
1160 case V_ADST:
1161 case H_ADST:
1162 case V_FLIPADST:
1163 case H_FLIPADST:
1164 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001165 av1_iht8x8_64_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001166 break;
1167 case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break;
1168#endif // CONFIG_EXT_TX
1169 default: assert(0); break;
1170 }
1171}
1172
Yaowu Xuf883b422016-08-30 14:01:10 -07001173void av1_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, int stride,
1174 int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001175 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001176 case DCT_DCT: av1_idct16x16_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001177 case ADST_DCT:
1178 case DCT_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001179 case ADST_ADST: av1_iht16x16_256_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001180#if CONFIG_EXT_TX
1181 case FLIPADST_DCT:
1182 case DCT_FLIPADST:
1183 case FLIPADST_FLIPADST:
1184 case ADST_FLIPADST:
1185 case FLIPADST_ADST:
Yaowu Xuc27fc142016-08-22 16:08:15 -07001186 case V_DCT:
1187 case H_DCT:
1188 case V_ADST:
1189 case H_ADST:
1190 case V_FLIPADST:
Yi Luo73172002016-10-28 10:52:04 -07001191 case H_FLIPADST: av1_iht16x16_256_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001192 case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break;
1193#endif // CONFIG_EXT_TX
1194 default: assert(0); break;
1195 }
1196}
1197
Yaowu Xuf883b422016-08-30 14:01:10 -07001198void av1_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride,
1199 int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001200 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001201 case DCT_DCT: av1_idct32x32_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001202#if CONFIG_EXT_TX
1203 case ADST_DCT:
1204 case DCT_ADST:
1205 case ADST_ADST:
1206 case FLIPADST_DCT:
1207 case DCT_FLIPADST:
1208 case FLIPADST_FLIPADST:
1209 case ADST_FLIPADST:
1210 case FLIPADST_ADST:
1211 case V_DCT:
1212 case H_DCT:
1213 case V_ADST:
1214 case H_ADST:
1215 case V_FLIPADST:
1216 case H_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001217 av1_iht32x32_1024_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001218 break;
1219 case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break;
1220#endif // CONFIG_EXT_TX
1221 default: assert(0); break;
1222 }
1223}
1224
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001225#if CONFIG_TX64X64
1226void av1_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest, int stride,
1227 int eob, TX_TYPE tx_type) {
1228 switch (tx_type) {
1229 case DCT_DCT: av1_idct64x64_add(input, dest, stride, eob); break;
1230#if CONFIG_EXT_TX
1231 case ADST_DCT:
1232 case DCT_ADST:
1233 case ADST_ADST:
1234 case FLIPADST_DCT:
1235 case DCT_FLIPADST:
1236 case FLIPADST_FLIPADST:
1237 case ADST_FLIPADST:
1238 case FLIPADST_ADST:
1239 case V_DCT:
1240 case H_DCT:
1241 case V_ADST:
1242 case H_ADST:
1243 case V_FLIPADST:
1244 case H_FLIPADST:
1245 av1_iht64x64_4096_add_c(input, dest, stride, tx_type);
1246 break;
1247 case IDTX: inv_idtx_add_c(input, dest, stride, 64, tx_type); break;
1248#endif // CONFIG_EXT_TX
1249 default: assert(0); break;
1250 }
1251}
1252#endif // CONFIG_TX64X64
1253
Yaowu Xuf883b422016-08-30 14:01:10 -07001254#if CONFIG_AOM_HIGHBITDEPTH
1255void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
1256 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001257 static const highbd_transform_2d HIGH_IHT_4[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001258 { aom_highbd_idct4_c, aom_highbd_idct4_c }, // DCT_DCT
1259 { aom_highbd_iadst4_c, aom_highbd_idct4_c }, // ADST_DCT
1260 { aom_highbd_idct4_c, aom_highbd_iadst4_c }, // DCT_ADST
1261 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // ADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001262#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001263 { aom_highbd_iadst4_c, aom_highbd_idct4_c }, // FLIPADST_DCT
1264 { aom_highbd_idct4_c, aom_highbd_iadst4_c }, // DCT_FLIPADST
1265 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // FLIPADST_FLIPADST
1266 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // ADST_FLIPADST
1267 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001268 { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001269 { aom_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT
1270 { highbd_iidtx4_c, aom_highbd_idct4_c }, // H_DCT
1271 { aom_highbd_iadst4_c, highbd_iidtx4_c }, // V_ADST
1272 { highbd_iidtx4_c, aom_highbd_iadst4_c }, // H_ADST
1273 { aom_highbd_iadst4_c, highbd_iidtx4_c }, // V_FLIPADST
1274 { highbd_iidtx4_c, aom_highbd_iadst4_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001275#endif // CONFIG_EXT_TX
1276 };
1277
1278 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1279
1280 int i, j;
1281 tran_low_t tmp;
1282 tran_low_t out[4][4];
1283 tran_low_t *outp = &out[0][0];
1284 int outstride = 4;
1285
1286 // inverse transform row vectors
1287 for (i = 0; i < 4; ++i) {
1288 HIGH_IHT_4[tx_type].rows(input, out[i], bd);
1289 input += 4;
1290 }
1291
1292 // transpose
1293 for (i = 1; i < 4; i++) {
1294 for (j = 0; j < i; j++) {
1295 tmp = out[i][j];
1296 out[i][j] = out[j][i];
1297 out[j][i] = tmp;
1298 }
1299 }
1300
1301 // inverse transform column vectors
1302 for (i = 0; i < 4; ++i) {
1303 HIGH_IHT_4[tx_type].cols(out[i], out[i], bd);
1304 }
1305
1306#if CONFIG_EXT_TX
1307 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
1308#endif
1309
1310 // Sum with the destination
1311 for (i = 0; i < 4; ++i) {
1312 for (j = 0; j < 4; ++j) {
1313 int d = i * stride + j;
1314 int s = j * outstride + i;
1315 dest[d] =
1316 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4), bd);
1317 }
1318 }
1319}
1320
Yaowu Xuf883b422016-08-30 14:01:10 -07001321void av1_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8,
1322 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001323 static const highbd_transform_2d HIGH_IHT_4x8[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001324 { aom_highbd_idct8_c, aom_highbd_idct4_c }, // DCT_DCT
1325 { aom_highbd_iadst8_c, aom_highbd_idct4_c }, // ADST_DCT
1326 { aom_highbd_idct8_c, aom_highbd_iadst4_c }, // DCT_ADST
1327 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // ADST_ADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001328#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001329 { aom_highbd_iadst8_c, aom_highbd_idct4_c }, // FLIPADST_DCT
1330 { aom_highbd_idct8_c, aom_highbd_iadst4_c }, // DCT_FLIPADST
1331 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // FLIPADST_FLIPADST
1332 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // ADST_FLIPADST
1333 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001334 { highbd_iidtx8_c, highbd_iidtx4_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001335 { aom_highbd_idct8_c, highbd_iidtx4_c }, // V_DCT
1336 { highbd_iidtx8_c, aom_highbd_idct4_c }, // H_DCT
1337 { aom_highbd_iadst8_c, highbd_iidtx4_c }, // V_ADST
1338 { highbd_iidtx8_c, aom_highbd_iadst4_c }, // H_ADST
1339 { aom_highbd_iadst8_c, highbd_iidtx4_c }, // V_FLIPADST
1340 { highbd_iidtx8_c, aom_highbd_iadst4_c }, // H_FLIPADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001341#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001342 };
1343 const int n = 4;
1344 const int n2 = 8;
1345
1346 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1347
1348 int i, j;
1349 tran_low_t out[4][8], outtmp[4];
1350 tran_low_t *outp = &out[0][0];
1351 int outstride = n2;
1352
1353 // inverse transform row vectors, and transpose
1354 for (i = 0; i < n2; ++i) {
1355 HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd);
1356 for (j = 0; j < n; ++j) {
1357 out[j][i] =
1358 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1359 }
1360 input += n;
1361 }
1362
1363 // inverse transform column vectors
1364 for (i = 0; i < n; ++i) {
1365 HIGH_IHT_4x8[tx_type].cols(out[i], out[i], bd);
1366 }
1367
Jingning Han5238e6e2016-11-02 08:53:12 -07001368#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001369 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Han5238e6e2016-11-02 08:53:12 -07001370#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001371
1372 // Sum with the destination
1373 for (i = 0; i < n2; ++i) {
1374 for (j = 0; j < n; ++j) {
1375 int d = i * stride + j;
1376 int s = j * outstride + i;
1377 dest[d] =
1378 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1379 }
1380 }
1381}
1382
Yaowu Xuf883b422016-08-30 14:01:10 -07001383void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
1384 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001385 static const highbd_transform_2d HIGH_IHT_8x4[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001386 { aom_highbd_idct4_c, aom_highbd_idct8_c }, // DCT_DCT
1387 { aom_highbd_iadst4_c, aom_highbd_idct8_c }, // ADST_DCT
1388 { aom_highbd_idct4_c, aom_highbd_iadst8_c }, // DCT_ADST
1389 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // ADST_ADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001390#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001391 { aom_highbd_iadst4_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1392 { aom_highbd_idct4_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1393 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1394 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1395 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001396 { highbd_iidtx4_c, highbd_iidtx8_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001397 { aom_highbd_idct4_c, highbd_iidtx8_c }, // V_DCT
1398 { highbd_iidtx4_c, aom_highbd_idct8_c }, // H_DCT
1399 { aom_highbd_iadst4_c, highbd_iidtx8_c }, // V_ADST
1400 { highbd_iidtx4_c, aom_highbd_iadst8_c }, // H_ADST
1401 { aom_highbd_iadst4_c, highbd_iidtx8_c }, // V_FLIPADST
1402 { highbd_iidtx4_c, aom_highbd_iadst8_c }, // H_FLIPADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001403#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001404 };
1405 const int n = 4;
1406 const int n2 = 8;
1407
1408 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1409
1410 int i, j;
1411 tran_low_t out[8][4], outtmp[8];
1412 tran_low_t *outp = &out[0][0];
1413 int outstride = n;
1414
1415 // inverse transform row vectors, and transpose
1416 for (i = 0; i < n; ++i) {
1417 HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd);
1418 for (j = 0; j < n2; ++j) {
1419 out[j][i] =
1420 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1421 }
1422 input += n2;
1423 }
1424
1425 // inverse transform column vectors
1426 for (i = 0; i < n2; ++i) {
1427 HIGH_IHT_8x4[tx_type].cols(out[i], out[i], bd);
1428 }
1429
Jingning Han5238e6e2016-11-02 08:53:12 -07001430#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001431 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Han5238e6e2016-11-02 08:53:12 -07001432#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001433
1434 // Sum with the destination
1435 for (i = 0; i < n; ++i) {
1436 for (j = 0; j < n2; ++j) {
1437 int d = i * stride + j;
1438 int s = j * outstride + i;
1439 dest[d] =
1440 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1441 }
1442 }
1443}
1444
Yaowu Xuf883b422016-08-30 14:01:10 -07001445void av1_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest8,
1446 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001447 static const highbd_transform_2d HIGH_IHT_8x16[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001448 { aom_highbd_idct16_c, aom_highbd_idct8_c }, // DCT_DCT
1449 { aom_highbd_iadst16_c, aom_highbd_idct8_c }, // ADST_DCT
1450 { aom_highbd_idct16_c, aom_highbd_iadst8_c }, // DCT_ADST
1451 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // ADST_ADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001452#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001453 { aom_highbd_iadst16_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1454 { aom_highbd_idct16_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1455 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1456 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1457 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001458 { highbd_iidtx16_c, highbd_iidtx8_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001459 { aom_highbd_idct16_c, highbd_iidtx8_c }, // V_DCT
1460 { highbd_iidtx16_c, aom_highbd_idct8_c }, // H_DCT
1461 { aom_highbd_iadst16_c, highbd_iidtx8_c }, // V_ADST
1462 { highbd_iidtx16_c, aom_highbd_iadst8_c }, // H_ADST
1463 { aom_highbd_iadst16_c, highbd_iidtx8_c }, // V_FLIPADST
1464 { highbd_iidtx16_c, aom_highbd_iadst8_c }, // H_FLIPADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001465#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001466 };
1467 const int n = 8;
1468 const int n2 = 16;
1469
1470 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1471
1472 int i, j;
1473 tran_low_t out[8][16], outtmp[8];
1474 tran_low_t *outp = &out[0][0];
1475 int outstride = n2;
1476
1477 // inverse transform row vectors, and transpose
1478 for (i = 0; i < n2; ++i) {
1479 HIGH_IHT_8x16[tx_type].rows(input, outtmp, bd);
1480 for (j = 0; j < n; ++j)
1481 out[j][i] =
1482 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1483 input += n;
1484 }
1485
1486 // inverse transform column vectors
1487 for (i = 0; i < n; ++i) {
1488 HIGH_IHT_8x16[tx_type].cols(out[i], out[i], bd);
1489 }
1490
Jingning Han5238e6e2016-11-02 08:53:12 -07001491#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001492 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Han5238e6e2016-11-02 08:53:12 -07001493#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001494
1495 // Sum with the destination
1496 for (i = 0; i < n2; ++i) {
1497 for (j = 0; j < n; ++j) {
1498 int d = i * stride + j;
1499 int s = j * outstride + i;
1500 dest[d] =
1501 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1502 }
1503 }
1504}
1505
Yaowu Xuf883b422016-08-30 14:01:10 -07001506void av1_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest8,
1507 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001508 static const highbd_transform_2d HIGH_IHT_16x8[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001509 { aom_highbd_idct8_c, aom_highbd_idct16_c }, // DCT_DCT
1510 { aom_highbd_iadst8_c, aom_highbd_idct16_c }, // ADST_DCT
1511 { aom_highbd_idct8_c, aom_highbd_iadst16_c }, // DCT_ADST
1512 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // ADST_ADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001513#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001514 { aom_highbd_iadst8_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1515 { aom_highbd_idct8_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1516 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1517 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1518 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001519 { highbd_iidtx8_c, highbd_iidtx16_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001520 { aom_highbd_idct8_c, highbd_iidtx16_c }, // V_DCT
1521 { highbd_iidtx8_c, aom_highbd_idct16_c }, // H_DCT
1522 { aom_highbd_iadst8_c, highbd_iidtx16_c }, // V_ADST
1523 { highbd_iidtx8_c, aom_highbd_iadst16_c }, // H_ADST
1524 { aom_highbd_iadst8_c, highbd_iidtx16_c }, // V_FLIPADST
1525 { highbd_iidtx8_c, aom_highbd_iadst16_c }, // H_FLIPADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001526#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001527 };
1528 const int n = 8;
1529 const int n2 = 16;
1530
1531 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1532
1533 int i, j;
1534 tran_low_t out[16][8], outtmp[16];
1535 tran_low_t *outp = &out[0][0];
1536 int outstride = n;
1537
1538 // inverse transform row vectors, and transpose
1539 for (i = 0; i < n; ++i) {
1540 HIGH_IHT_16x8[tx_type].rows(input, outtmp, bd);
1541 for (j = 0; j < n2; ++j)
1542 out[j][i] =
1543 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1544 input += n2;
1545 }
1546
1547 // inverse transform column vectors
1548 for (i = 0; i < n2; ++i) {
1549 HIGH_IHT_16x8[tx_type].cols(out[i], out[i], bd);
1550 }
1551
Jingning Han5238e6e2016-11-02 08:53:12 -07001552#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001553 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Han5238e6e2016-11-02 08:53:12 -07001554#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001555
1556 // Sum with the destination
1557 for (i = 0; i < n; ++i) {
1558 for (j = 0; j < n2; ++j) {
1559 int d = i * stride + j;
1560 int s = j * outstride + i;
1561 dest[d] =
1562 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1563 }
1564 }
1565}
1566
Yaowu Xuf883b422016-08-30 14:01:10 -07001567void av1_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest8,
1568 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001569 static const highbd_transform_2d HIGH_IHT_16x32[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001570 { aom_highbd_idct32_c, aom_highbd_idct16_c }, // DCT_DCT
1571 { highbd_ihalfright32_c, aom_highbd_idct16_c }, // ADST_DCT
1572 { aom_highbd_idct32_c, aom_highbd_iadst16_c }, // DCT_ADST
1573 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // ADST_ADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001574#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001575 { highbd_ihalfright32_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1576 { aom_highbd_idct32_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1577 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1578 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1579 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001580 { highbd_iidtx32_c, highbd_iidtx16_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001581 { aom_highbd_idct32_c, highbd_iidtx16_c }, // V_DCT
1582 { highbd_iidtx32_c, aom_highbd_idct16_c }, // H_DCT
Yaowu Xuc27fc142016-08-22 16:08:15 -07001583 { highbd_ihalfright32_c, highbd_iidtx16_c }, // V_ADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001584 { highbd_iidtx32_c, aom_highbd_iadst16_c }, // H_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001585 { highbd_ihalfright32_c, highbd_iidtx16_c }, // V_FLIPADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001586 { highbd_iidtx32_c, aom_highbd_iadst16_c }, // H_FLIPADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001587#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001588 };
1589 const int n = 16;
1590 const int n2 = 32;
1591
1592 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1593
1594 int i, j;
1595 tran_low_t out[16][32], outtmp[16];
1596 tran_low_t *outp = &out[0][0];
1597 int outstride = n2;
1598
1599 // inverse transform row vectors, and transpose
1600 for (i = 0; i < n2; ++i) {
1601 HIGH_IHT_16x32[tx_type].rows(input, outtmp, bd);
1602 for (j = 0; j < n; ++j)
1603 out[j][i] =
1604 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1605 input += n;
1606 }
1607
1608 // inverse transform column vectors
1609 for (i = 0; i < n; ++i) {
1610 HIGH_IHT_16x32[tx_type].cols(out[i], out[i], bd);
1611 }
1612
Jingning Han5238e6e2016-11-02 08:53:12 -07001613#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001614 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Han5238e6e2016-11-02 08:53:12 -07001615#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001616
1617 // Sum with the destination
1618 for (i = 0; i < n2; ++i) {
1619 for (j = 0; j < n; ++j) {
1620 int d = i * stride + j;
1621 int s = j * outstride + i;
1622 dest[d] =
1623 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1624 }
1625 }
1626}
1627
Yaowu Xuf883b422016-08-30 14:01:10 -07001628void av1_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest8,
1629 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001630 static const highbd_transform_2d HIGH_IHT_32x16[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001631 { aom_highbd_idct16_c, aom_highbd_idct32_c }, // DCT_DCT
1632 { aom_highbd_iadst16_c, aom_highbd_idct32_c }, // ADST_DCT
1633 { aom_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_ADST
1634 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_ADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001635#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001636 { aom_highbd_iadst16_c, aom_highbd_idct32_c }, // FLIPADST_DCT
1637 { aom_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_FLIPADST
1638 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
1639 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_FLIPADST
1640 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001641 { highbd_iidtx16_c, highbd_iidtx32_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001642 { aom_highbd_idct16_c, highbd_iidtx32_c }, // V_DCT
1643 { highbd_iidtx16_c, aom_highbd_idct32_c }, // H_DCT
1644 { aom_highbd_iadst16_c, highbd_iidtx32_c }, // V_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001645 { highbd_iidtx16_c, highbd_ihalfright32_c }, // H_ADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001646 { aom_highbd_iadst16_c, highbd_iidtx32_c }, // V_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001647 { highbd_iidtx16_c, highbd_ihalfright32_c }, // H_FLIPADST
Jingning Han5238e6e2016-11-02 08:53:12 -07001648#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001649 };
1650 const int n = 16;
1651 const int n2 = 32;
1652
1653 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1654
1655 int i, j;
1656 tran_low_t out[32][16], outtmp[32];
1657 tran_low_t *outp = &out[0][0];
1658 int outstride = n;
1659
1660 // inverse transform row vectors, and transpose
1661 for (i = 0; i < n; ++i) {
1662 HIGH_IHT_32x16[tx_type].rows(input, outtmp, bd);
1663 for (j = 0; j < n2; ++j)
1664 out[j][i] =
1665 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1666 input += n2;
1667 }
1668
1669 // inverse transform column vectors
1670 for (i = 0; i < n2; ++i) {
1671 HIGH_IHT_32x16[tx_type].cols(out[i], out[i], bd);
1672 }
1673
Jingning Han5238e6e2016-11-02 08:53:12 -07001674#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001675 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Han5238e6e2016-11-02 08:53:12 -07001676#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001677
1678 // Sum with the destination
1679 for (i = 0; i < n; ++i) {
1680 for (j = 0; j < n2; ++j) {
1681 int d = i * stride + j;
1682 int s = j * outstride + i;
1683 dest[d] =
1684 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1685 }
1686 }
1687}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001688
Yaowu Xuf883b422016-08-30 14:01:10 -07001689void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
1690 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001691 static const highbd_transform_2d HIGH_IHT_8[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001692 { aom_highbd_idct8_c, aom_highbd_idct8_c }, // DCT_DCT
1693 { aom_highbd_iadst8_c, aom_highbd_idct8_c }, // ADST_DCT
1694 { aom_highbd_idct8_c, aom_highbd_iadst8_c }, // DCT_ADST
1695 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // ADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001696#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001697 { aom_highbd_iadst8_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1698 { aom_highbd_idct8_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1699 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1700 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1701 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001702 { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001703 { aom_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT
1704 { highbd_iidtx8_c, aom_highbd_idct8_c }, // H_DCT
1705 { aom_highbd_iadst8_c, highbd_iidtx8_c }, // V_ADST
1706 { highbd_iidtx8_c, aom_highbd_iadst8_c }, // H_ADST
1707 { aom_highbd_iadst8_c, highbd_iidtx8_c }, // V_FLIPADST
1708 { highbd_iidtx8_c, aom_highbd_iadst8_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001709#endif // CONFIG_EXT_TX
1710 };
1711
1712 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1713
1714 int i, j;
1715 tran_low_t tmp;
1716 tran_low_t out[8][8];
1717 tran_low_t *outp = &out[0][0];
1718 int outstride = 8;
1719
1720 // inverse transform row vectors
1721 for (i = 0; i < 8; ++i) {
1722 HIGH_IHT_8[tx_type].rows(input, out[i], bd);
1723 input += 8;
1724 }
1725
1726 // transpose
1727 for (i = 1; i < 8; i++) {
1728 for (j = 0; j < i; j++) {
1729 tmp = out[i][j];
1730 out[i][j] = out[j][i];
1731 out[j][i] = tmp;
1732 }
1733 }
1734
1735 // inverse transform column vectors
1736 for (i = 0; i < 8; ++i) {
1737 HIGH_IHT_8[tx_type].cols(out[i], out[i], bd);
1738 }
1739
1740#if CONFIG_EXT_TX
1741 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
1742#endif
1743
1744 // Sum with the destination
1745 for (i = 0; i < 8; ++i) {
1746 for (j = 0; j < 8; ++j) {
1747 int d = i * stride + j;
1748 int s = j * outstride + i;
1749 dest[d] =
1750 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1751 }
1752 }
1753}
1754
Yaowu Xuf883b422016-08-30 14:01:10 -07001755void av1_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
1756 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001757 static const highbd_transform_2d HIGH_IHT_16[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001758 { aom_highbd_idct16_c, aom_highbd_idct16_c }, // DCT_DCT
1759 { aom_highbd_iadst16_c, aom_highbd_idct16_c }, // ADST_DCT
1760 { aom_highbd_idct16_c, aom_highbd_iadst16_c }, // DCT_ADST
1761 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // ADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001762#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001763 { aom_highbd_iadst16_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1764 { aom_highbd_idct16_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1765 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1766 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1767 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001768 { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001769 { aom_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT
1770 { highbd_iidtx16_c, aom_highbd_idct16_c }, // H_DCT
1771 { aom_highbd_iadst16_c, highbd_iidtx16_c }, // V_ADST
1772 { highbd_iidtx16_c, aom_highbd_iadst16_c }, // H_ADST
1773 { aom_highbd_iadst16_c, highbd_iidtx16_c }, // V_FLIPADST
1774 { highbd_iidtx16_c, aom_highbd_iadst16_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001775#endif // CONFIG_EXT_TX
1776 };
1777
1778 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1779
1780 int i, j;
1781 tran_low_t tmp;
1782 tran_low_t out[16][16];
1783 tran_low_t *outp = &out[0][0];
1784 int outstride = 16;
1785
1786 // inverse transform row vectors
1787 for (i = 0; i < 16; ++i) {
1788 HIGH_IHT_16[tx_type].rows(input, out[i], bd);
1789 input += 16;
1790 }
1791
1792 // transpose
1793 for (i = 1; i < 16; i++) {
1794 for (j = 0; j < i; j++) {
1795 tmp = out[i][j];
1796 out[i][j] = out[j][i];
1797 out[j][i] = tmp;
1798 }
1799 }
1800
1801 // inverse transform column vectors
1802 for (i = 0; i < 16; ++i) {
1803 HIGH_IHT_16[tx_type].cols(out[i], out[i], bd);
1804 }
1805
1806#if CONFIG_EXT_TX
1807 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
1808#endif
1809
1810 // Sum with the destination
1811 for (i = 0; i < 16; ++i) {
1812 for (j = 0; j < 16; ++j) {
1813 int d = i * stride + j;
1814 int s = j * outstride + i;
1815 dest[d] =
1816 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1817 }
1818 }
1819}
1820
1821#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001822void av1_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
1823 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001824 static const highbd_transform_2d HIGH_IHT_32[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001825 { aom_highbd_idct32_c, aom_highbd_idct32_c }, // DCT_DCT
1826 { highbd_ihalfright32_c, aom_highbd_idct32_c }, // ADST_DCT
1827 { aom_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001828 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001829 { highbd_ihalfright32_c, aom_highbd_idct32_c }, // FLIPADST_DCT
1830 { aom_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001831 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
1832 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
1833 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
1834 { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001835 { aom_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT
1836 { highbd_iidtx32_c, aom_highbd_idct32_c }, // H_DCT
Yaowu Xuc27fc142016-08-22 16:08:15 -07001837 { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_ADST
1838 { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_ADST
1839 { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_FLIPADST
1840 { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_FLIPADST
1841 };
1842
1843 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1844
1845 int i, j;
1846 tran_low_t tmp;
1847 tran_low_t out[32][32];
1848 tran_low_t *outp = &out[0][0];
1849 int outstride = 32;
1850
1851 // inverse transform row vectors
1852 for (i = 0; i < 32; ++i) {
1853 HIGH_IHT_32[tx_type].rows(input, out[i], bd);
1854 input += 32;
1855 }
1856
1857 // transpose
1858 for (i = 1; i < 32; i++) {
1859 for (j = 0; j < i; j++) {
1860 tmp = out[i][j];
1861 out[i][j] = out[j][i];
1862 out[j][i] = tmp;
1863 }
1864 }
1865
1866 // inverse transform column vectors
1867 for (i = 0; i < 32; ++i) {
1868 HIGH_IHT_32[tx_type].cols(out[i], out[i], bd);
1869 }
1870
1871 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
1872
1873 // Sum with the destination
1874 for (i = 0; i < 32; ++i) {
1875 for (j = 0; j < 32; ++j) {
1876 int d = i * stride + j;
1877 int s = j * outstride + i;
1878 dest[d] =
1879 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1880 }
1881 }
1882}
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001883#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001884
1885#if CONFIG_TX64X64
1886void av1_highbd_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest8,
1887 int stride, int tx_type, int bd) {
1888 static const highbd_transform_2d HIGH_IHT_64[] = {
1889 { highbd_idct64_col_c, highbd_idct64_row_c }, // DCT_DCT
1890 { highbd_ihalfright64_c, highbd_idct64_row_c }, // ADST_DCT
1891 { highbd_idct64_col_c, highbd_ihalfright64_c }, // DCT_ADST
1892 { highbd_ihalfright64_c, highbd_ihalfright64_c }, // ADST_ADST
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001893#if CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001894 { highbd_ihalfright64_c, highbd_idct64_row_c }, // FLIPADST_DCT
1895 { highbd_idct64_col_c, highbd_ihalfright64_c }, // DCT_FLIPADST
1896 { highbd_ihalfright64_c, highbd_ihalfright64_c }, // FLIPADST_FLIPADST
1897 { highbd_ihalfright64_c, highbd_ihalfright64_c }, // ADST_FLIPADST
1898 { highbd_ihalfright64_c, highbd_ihalfright64_c }, // FLIPADST_ADST
1899 { highbd_iidtx64_c, highbd_iidtx64_c }, // IDTX
1900 { highbd_idct64_col_c, highbd_iidtx64_c }, // V_DCT
1901 { highbd_iidtx64_c, highbd_idct64_row_c }, // H_DCT
1902 { highbd_ihalfright64_c, highbd_iidtx64_c }, // V_ADST
1903 { highbd_iidtx64_c, highbd_ihalfright64_c }, // H_ADST
1904 { highbd_ihalfright64_c, highbd_iidtx64_c }, // V_FLIPADST
1905 { highbd_iidtx64_c, highbd_ihalfright64_c }, // H_FLIPADST
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001906#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001907 };
1908
1909 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1910
1911 int i, j;
1912 tran_low_t tmp;
1913 tran_low_t out[64][64];
1914 tran_low_t *outp = &out[0][0];
1915 int outstride = 64;
1916
1917 // inverse transform row vectors
1918 for (i = 0; i < 64; ++i) {
1919 HIGH_IHT_64[tx_type].rows(input, out[i], bd);
1920 for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
1921 input += 64;
1922 }
1923
1924 // transpose
1925 for (i = 1; i < 64; i++) {
1926 for (j = 0; j < i; j++) {
1927 tmp = out[i][j];
1928 out[i][j] = out[j][i];
1929 out[j][i] = tmp;
1930 }
1931 }
1932
1933 // inverse transform column vectors
1934 for (i = 0; i < 64; ++i) {
1935 HIGH_IHT_64[tx_type].cols(out[i], out[i], bd);
1936 }
1937
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001938#if CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001939 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001940#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001941
1942 // Sum with the destination
1943 for (i = 0; i < 64; ++i) {
1944 for (j = 0; j < 64; ++j) {
1945 int d = i * stride + j;
1946 int s = j * outstride + i;
1947 dest[d] =
1948 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1949 }
1950 }
1951}
1952#endif // CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -07001953
1954// idct
Yaowu Xuf883b422016-08-30 14:01:10 -07001955void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1956 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001957 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001958 aom_highbd_idct4x4_16_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001959 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001960 aom_highbd_idct4x4_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001961}
1962
Yaowu Xuf883b422016-08-30 14:01:10 -07001963void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1964 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001965 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001966 aom_highbd_iwht4x4_16_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001967 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001968 aom_highbd_iwht4x4_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001969}
1970
Yaowu Xuf883b422016-08-30 14:01:10 -07001971void av1_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
1972 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001973 // If dc is 1, then input[0] is the reconstructed value, do not need
1974 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
1975
1976 // The calculation can be simplified if there are not many non-zero dct
1977 // coefficients. Use eobs to decide what to do.
Yaowu Xuf883b422016-08-30 14:01:10 -07001978 // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
Yaowu Xuc27fc142016-08-22 16:08:15 -07001979 // Combine that with code here.
1980 // DC only DCT coefficient
Angie Chianged8cd9a2016-10-21 16:44:47 -07001981 if (eob == 1) aom_highbd_idct8x8_1_add(input, dest, stride, bd);
1982#if !CONFIG_ADAPT_SCAN
1983 else if (eob <= 10)
Yaowu Xuf883b422016-08-30 14:01:10 -07001984 aom_highbd_idct8x8_10_add(input, dest, stride, bd);
Angie Chianged8cd9a2016-10-21 16:44:47 -07001985#endif
1986 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001987 aom_highbd_idct8x8_64_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001988}
1989
Yaowu Xuf883b422016-08-30 14:01:10 -07001990void av1_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
1991 int stride, int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001992 // The calculation can be simplified if there are not many non-zero dct
1993 // coefficients. Use eobs to separate different cases.
1994 // DC only DCT coefficient.
Angie Chianged8cd9a2016-10-21 16:44:47 -07001995 if (eob == 1) aom_highbd_idct16x16_1_add(input, dest, stride, bd);
1996#if !CONFIG_ADAPT_SCAN
1997 else if (eob <= 10)
Yaowu Xuf883b422016-08-30 14:01:10 -07001998 aom_highbd_idct16x16_10_add(input, dest, stride, bd);
Angie Chianged8cd9a2016-10-21 16:44:47 -07001999#endif
2000 else
Yaowu Xuf883b422016-08-30 14:01:10 -07002001 aom_highbd_idct16x16_256_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002002}
2003
Yaowu Xuf883b422016-08-30 14:01:10 -07002004void av1_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
2005 int stride, int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002006 // Non-zero coeff only in upper-left 8x8
Angie Chianged8cd9a2016-10-21 16:44:47 -07002007 if (eob == 1) aom_highbd_idct32x32_1_add(input, dest, stride, bd);
2008#if !CONFIG_ADAPT_SCAN
2009 else if (eob <= 34)
Yaowu Xuf883b422016-08-30 14:01:10 -07002010 aom_highbd_idct32x32_34_add(input, dest, stride, bd);
Angie Chianged8cd9a2016-10-21 16:44:47 -07002011#endif
2012 else
Yaowu Xuf883b422016-08-30 14:01:10 -07002013 aom_highbd_idct32x32_1024_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002014}
2015
Yaowu Xuf883b422016-08-30 14:01:10 -07002016void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
2017 int stride, int eob, int bd, TX_TYPE tx_type,
2018 int lossless) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002019 if (lossless) {
2020 assert(tx_type == DCT_DCT);
Yaowu Xuf883b422016-08-30 14:01:10 -07002021 av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002022 return;
2023 }
2024
2025 switch (tx_type) {
2026 case DCT_DCT:
2027 case ADST_DCT:
2028 case DCT_ADST:
2029 case ADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002030 av1_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2031 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002032 break;
2033#if CONFIG_EXT_TX
2034 case FLIPADST_DCT:
2035 case DCT_FLIPADST:
2036 case FLIPADST_FLIPADST:
2037 case ADST_FLIPADST:
2038 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002039 av1_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2040 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002041 break;
2042 case V_DCT:
2043 case H_DCT:
2044 case V_ADST:
2045 case H_ADST:
2046 case V_FLIPADST:
2047 case H_FLIPADST:
2048 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07002049 av1_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002050 break;
2051 case IDTX:
2052 highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd);
2053 break;
2054#endif // CONFIG_EXT_TX
2055 default: assert(0); break;
2056 }
2057}
2058
Yaowu Xuf883b422016-08-30 14:01:10 -07002059void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
2060 int stride, int eob, int bd, TX_TYPE tx_type) {
2061 (void)eob;
2062 av1_highbd_iht4x8_32_add_c(input, dest, stride, tx_type, bd);
2063}
2064
2065void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
2066 int stride, int eob, int bd, TX_TYPE tx_type) {
2067 (void)eob;
2068 av1_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd);
2069}
2070
2071void av1_highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002072 int stride, int eob, int bd,
2073 TX_TYPE tx_type) {
2074 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07002075 av1_highbd_iht8x16_128_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002076}
2077
Yaowu Xuf883b422016-08-30 14:01:10 -07002078void av1_highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002079 int stride, int eob, int bd,
2080 TX_TYPE tx_type) {
2081 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07002082 av1_highbd_iht16x8_128_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002083}
2084
Yaowu Xuf883b422016-08-30 14:01:10 -07002085void av1_highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002086 int stride, int eob, int bd,
2087 TX_TYPE tx_type) {
2088 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07002089 av1_highbd_iht16x32_512_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002090}
2091
Yaowu Xuf883b422016-08-30 14:01:10 -07002092void av1_highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002093 int stride, int eob, int bd,
2094 TX_TYPE tx_type) {
2095 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07002096 av1_highbd_iht32x16_512_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002097}
Yaowu Xuc27fc142016-08-22 16:08:15 -07002098
Yaowu Xuf883b422016-08-30 14:01:10 -07002099void av1_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
2100 int stride, int eob, int bd, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002101 (void)eob;
2102 switch (tx_type) {
2103 case DCT_DCT:
2104 case ADST_DCT:
2105 case DCT_ADST:
2106 case ADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002107 av1_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2108 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002109 break;
2110#if CONFIG_EXT_TX
2111 case FLIPADST_DCT:
2112 case DCT_FLIPADST:
2113 case FLIPADST_FLIPADST:
2114 case ADST_FLIPADST:
2115 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002116 av1_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2117 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002118 break;
2119 case V_DCT:
2120 case H_DCT:
2121 case V_ADST:
2122 case H_ADST:
2123 case V_FLIPADST:
2124 case H_FLIPADST:
2125 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07002126 av1_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002127 break;
2128 case IDTX:
2129 highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd);
2130 break;
2131#endif // CONFIG_EXT_TX
2132 default: assert(0); break;
2133 }
2134}
2135
Yaowu Xuf883b422016-08-30 14:01:10 -07002136void av1_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
2137 int stride, int eob, int bd,
2138 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002139 (void)eob;
2140 switch (tx_type) {
2141 case DCT_DCT:
2142 case ADST_DCT:
2143 case DCT_ADST:
2144 case ADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002145 av1_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
2146 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002147 break;
2148#if CONFIG_EXT_TX
2149 case FLIPADST_DCT:
2150 case DCT_FLIPADST:
2151 case FLIPADST_FLIPADST:
2152 case ADST_FLIPADST:
2153 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002154 av1_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
2155 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002156 break;
2157 case V_DCT:
2158 case H_DCT:
2159 case V_ADST:
2160 case H_ADST:
2161 case V_FLIPADST:
2162 case H_FLIPADST:
2163 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07002164 av1_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002165 break;
2166 case IDTX:
2167 highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd);
2168 break;
2169#endif // CONFIG_EXT_TX
2170 default: assert(0); break;
2171 }
2172}
2173
Yaowu Xuf883b422016-08-30 14:01:10 -07002174void av1_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
2175 int stride, int eob, int bd,
2176 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002177 (void)eob;
2178 switch (tx_type) {
2179 case DCT_DCT:
Yaowu Xuf883b422016-08-30 14:01:10 -07002180 av1_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
2181 DCT_DCT, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002182 break;
2183#if CONFIG_EXT_TX
2184 case ADST_DCT:
2185 case DCT_ADST:
2186 case ADST_ADST:
2187 case FLIPADST_DCT:
2188 case DCT_FLIPADST:
2189 case FLIPADST_FLIPADST:
2190 case ADST_FLIPADST:
2191 case FLIPADST_ADST:
2192 case V_DCT:
2193 case H_DCT:
2194 case V_ADST:
2195 case H_ADST:
2196 case V_FLIPADST:
2197 case H_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002198 av1_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002199 break;
2200 case IDTX:
2201 highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd);
2202 break;
2203#endif // CONFIG_EXT_TX
2204 default: assert(0); break;
2205 }
2206}
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002207
2208#if CONFIG_TX64X64
2209void av1_highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
2210 int stride, int eob, int bd,
2211 TX_TYPE tx_type) {
2212 (void)eob;
2213 switch (tx_type) {
2214 case DCT_DCT:
2215 av1_inv_txfm2d_add_64x64(input, CONVERT_TO_SHORTPTR(dest), stride,
2216 DCT_DCT, bd);
2217 break;
2218#if CONFIG_EXT_TX
2219 case ADST_DCT:
2220 case DCT_ADST:
2221 case ADST_ADST:
2222 case FLIPADST_DCT:
2223 case DCT_FLIPADST:
2224 case FLIPADST_FLIPADST:
2225 case ADST_FLIPADST:
2226 case FLIPADST_ADST:
2227 case V_DCT:
2228 case H_DCT:
2229 case V_ADST:
2230 case H_ADST:
2231 case V_FLIPADST:
2232 case H_FLIPADST:
2233 av1_highbd_iht64x64_4096_add_c(input, dest, stride, tx_type, bd);
2234 break;
2235 case IDTX:
2236 highbd_inv_idtx_add_c(input, dest, stride, 64, tx_type, bd);
2237 break;
2238#endif // CONFIG_EXT_TX
2239 default: assert(0); break;
2240 }
2241}
2242#endif // CONFIG_TX64X64
Yaowu Xuf883b422016-08-30 14:01:10 -07002243#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002244
2245void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
2246 INV_TXFM_PARAM *inv_txfm_param) {
2247 const TX_TYPE tx_type = inv_txfm_param->tx_type;
2248 const TX_SIZE tx_size = inv_txfm_param->tx_size;
2249 const int eob = inv_txfm_param->eob;
2250 const int lossless = inv_txfm_param->lossless;
2251
2252 switch (tx_size) {
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002253#if CONFIG_TX64X64
2254 case TX_64X64:
2255 av1_inv_txfm_add_64x64(input, dest, stride, eob, tx_type);
2256 break;
2257#endif // CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -07002258 case TX_32X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07002259 av1_inv_txfm_add_32x32(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002260 break;
2261 case TX_16X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002262 av1_inv_txfm_add_16x16(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002263 break;
Yaowu Xuf883b422016-08-30 14:01:10 -07002264 case TX_8X8: av1_inv_txfm_add_8x8(input, dest, stride, eob, tx_type); break;
Yaowu Xuf883b422016-08-30 14:01:10 -07002265 case TX_4X8: av1_inv_txfm_add_4x8(input, dest, stride, eob, tx_type); break;
2266 case TX_8X4: av1_inv_txfm_add_8x4(input, dest, stride, eob, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002267 case TX_8X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002268 av1_inv_txfm_add_8x16(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002269 break;
2270 case TX_16X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07002271 av1_inv_txfm_add_16x8(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002272 break;
2273 case TX_16X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07002274 av1_inv_txfm_add_16x32(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002275 break;
2276 case TX_32X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002277 av1_inv_txfm_add_32x16(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002278 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002279 case TX_4X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07002280 // this is like av1_short_idct4x4 but has a special case around eob<=1
Yaowu Xuc27fc142016-08-22 16:08:15 -07002281 // which is significant (not just an optimization) for the lossless
2282 // case.
Yaowu Xuf883b422016-08-30 14:01:10 -07002283 av1_inv_txfm_add_4x4(input, dest, stride, eob, tx_type, lossless);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002284 break;
2285 default: assert(0 && "Invalid transform size"); break;
2286 }
2287}
2288
Yaowu Xuf883b422016-08-30 14:01:10 -07002289#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002290void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
2291 INV_TXFM_PARAM *inv_txfm_param) {
2292 const TX_TYPE tx_type = inv_txfm_param->tx_type;
2293 const TX_SIZE tx_size = inv_txfm_param->tx_size;
2294 const int eob = inv_txfm_param->eob;
2295 const int bd = inv_txfm_param->bd;
2296 const int lossless = inv_txfm_param->lossless;
2297
2298 switch (tx_size) {
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002299#if CONFIG_TX64X64
2300 case TX_64X64:
2301 av1_highbd_inv_txfm_add_64x64(input, dest, stride, eob, bd, tx_type);
2302 break;
2303#endif // CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -07002304 case TX_32X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07002305 av1_highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002306 break;
2307 case TX_16X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002308 av1_highbd_inv_txfm_add_16x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002309 break;
2310 case TX_8X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07002311 av1_highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002312 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002313 case TX_4X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07002314 av1_highbd_inv_txfm_add_4x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002315 break;
2316 case TX_8X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07002317 av1_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002318 break;
2319 case TX_8X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002320 av1_highbd_inv_txfm_add_8x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002321 break;
2322 case TX_16X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07002323 av1_highbd_inv_txfm_add_16x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002324 break;
2325 case TX_16X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07002326 av1_highbd_inv_txfm_add_16x32(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002327 break;
2328 case TX_32X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002329 av1_highbd_inv_txfm_add_32x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002330 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002331 case TX_4X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07002332 // this is like av1_short_idct4x4 but has a special case around eob<=1
Yaowu Xuc27fc142016-08-22 16:08:15 -07002333 // which is significant (not just an optimization) for the lossless
2334 // case.
Yaowu Xuf883b422016-08-30 14:01:10 -07002335 av1_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type,
2336 lossless);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002337 break;
2338 default: assert(0 && "Invalid transform size"); break;
2339 }
2340}
Yaowu Xuf883b422016-08-30 14:01:10 -07002341#endif // CONFIG_AOM_HIGHBITDEPTH