blob: 56af73f4d845fcff51e25b086f7ef476d8da5c88 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <math.h>
13
Yaowu Xuf883b422016-08-30 14:01:10 -070014#include "./av1_rtcd.h"
15#include "./aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070016#include "av1/common/blockd.h"
17#include "av1/common/enums.h"
18#include "av1/common/idct.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "av1/common/av1_inv_txfm2d_cfg.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070020#include "aom_dsp/inv_txfm.h"
21#include "aom_ports/mem.h"
22
23int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
24 const TX_SIZE tx_size) {
25 (void)tx_type;
Yaowu Xuf883b422016-08-30 14:01:10 -070026#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070027 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
28 return txsize_sqr_up_map[tx_size] == TX_32X32;
29 }
30#else
31 (void)xd;
32#endif
33 return txsize_sqr_up_map[tx_size] == TX_32X32;
34}
35
36#if CONFIG_EXT_TX
37static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
38 int i;
39 for (i = 0; i < 4; ++i)
40 output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
41}
42
43static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
44 int i;
45 for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
46}
47
48static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
49 int i;
50 for (i = 0; i < 16; ++i)
51 output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
52}
53
54static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
55 int i;
56 for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
57}
58
59// For use in lieu of DST
60static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
61 int i;
62 tran_low_t inputhalf[16];
63 for (i = 0; i < 16; ++i) {
64 output[i] = input[16 + i] * 4;
65 }
66 // Multiply input by sqrt(2)
67 for (i = 0; i < 16; ++i) {
68 inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
69 }
70 idct16_c(inputhalf, output + 16);
71 // Note overall scaling factor is 4 times orthogonal
72}
73
Yaowu Xuf883b422016-08-30 14:01:10 -070074#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070075static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
76 int bd) {
77 int i;
78 for (i = 0; i < 4; ++i)
79 output[i] =
80 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
81}
82
83static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
84 int bd) {
85 int i;
86 (void)bd;
87 for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
88}
89
90static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
91 int bd) {
92 int i;
93 for (i = 0; i < 16; ++i)
94 output[i] =
95 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
96}
97
98static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
99 int bd) {
100 int i;
101 (void)bd;
102 for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
103}
104
105static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
106 int bd) {
107 int i;
108 tran_low_t inputhalf[16];
109 for (i = 0; i < 16; ++i) {
110 output[i] = input[16 + i] * 4;
111 }
112 // Multiply input by sqrt(2)
113 for (i = 0; i < 16; ++i) {
114 inputhalf[i] =
115 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
116 }
Yaowu Xuf883b422016-08-30 14:01:10 -0700117 aom_highbd_idct16_c(inputhalf, output + 16, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700118 // Note overall scaling factor is 4 times orthogonal
119}
Yaowu Xuf883b422016-08-30 14:01:10 -0700120#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700121
122// Inverse identity transform and add.
123static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
124 int bs, int tx_type) {
125 int r, c;
126 const int shift = bs < 32 ? 3 : 2;
127 if (tx_type == IDTX) {
128 for (r = 0; r < bs; ++r) {
129 for (c = 0; c < bs; ++c)
130 dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
131 dest += stride;
132 input += bs;
133 }
134 }
135}
136
137#define FLIPUD_PTR(dest, stride, size) \
138 do { \
139 (dest) = (dest) + ((size)-1) * (stride); \
140 (stride) = -(stride); \
141 } while (0)
142
143static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
144 int *sstride, int tx_type, int sizey,
145 int sizex) {
146 // Note that the transpose of src will be added to dst. In order to LR
147 // flip the addends (in dst coordinates), we UD flip the src. To UD flip
148 // the addends, we UD flip the dst.
149 switch (tx_type) {
150 case DCT_DCT:
151 case ADST_DCT:
152 case DCT_ADST:
153 case ADST_ADST:
154 case IDTX:
155 case V_DCT:
156 case H_DCT:
157 case V_ADST:
158 case H_ADST: break;
159 case FLIPADST_DCT:
160 case FLIPADST_ADST:
161 case V_FLIPADST:
162 // flip UD
163 FLIPUD_PTR(*dst, *dstride, sizey);
164 break;
165 case DCT_FLIPADST:
166 case ADST_FLIPADST:
167 case H_FLIPADST:
168 // flip LR
169 FLIPUD_PTR(*src, *sstride, sizex);
170 break;
171 case FLIPADST_FLIPADST:
172 // flip UD
173 FLIPUD_PTR(*dst, *dstride, sizey);
174 // flip LR
175 FLIPUD_PTR(*src, *sstride, sizex);
176 break;
177 default: assert(0); break;
178 }
179}
180
Yaowu Xuf883b422016-08-30 14:01:10 -0700181#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700182void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
183 tran_low_t step[4];
184 tran_high_t temp1, temp2;
185 (void)bd;
186 // stage 1
187 temp1 = (input[3] + input[1]) * cospi_16_64;
188 temp2 = (input[3] - input[1]) * cospi_16_64;
189 step[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
190 step[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
191 temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64;
192 temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64;
193 step[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
194 step[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
195
196 // stage 2
197 output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd);
198 output[1] = HIGHBD_WRAPLOW(-step[1] - step[2], bd);
199 output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd);
200 output[3] = HIGHBD_WRAPLOW(step[3] - step[0], bd);
201}
202
203void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
204 tran_low_t step1[8], step2[8];
205 tran_high_t temp1, temp2;
206 (void)bd;
207 // stage 1
208 step1[0] = input[7];
209 step1[2] = input[3];
210 step1[1] = input[5];
211 step1[3] = input[1];
212 temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64;
213 temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64;
214 step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
215 step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
216 temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64;
217 temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64;
218 step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
219 step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
220
221 // stage 2
222 temp1 = (step1[0] + step1[2]) * cospi_16_64;
223 temp2 = (step1[0] - step1[2]) * cospi_16_64;
224 step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
225 step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
226 temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
227 temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
228 step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
229 step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
230 step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
231 step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
232 step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
233 step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
234
235 // stage 3
236 step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
237 step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
238 step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
239 step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
240 step1[4] = step2[4];
241 temp1 = (step2[6] - step2[5]) * cospi_16_64;
242 temp2 = (step2[5] + step2[6]) * cospi_16_64;
243 step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
244 step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
245 step1[7] = step2[7];
246
247 // stage 4
248 output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
249 output[1] = HIGHBD_WRAPLOW(-step1[1] - step1[6], bd);
250 output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
251 output[3] = HIGHBD_WRAPLOW(-step1[3] - step1[4], bd);
252 output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
253 output[5] = HIGHBD_WRAPLOW(-step1[2] + step1[5], bd);
254 output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
255 output[7] = HIGHBD_WRAPLOW(-step1[0] + step1[7], bd);
256}
257
258void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700259 // av1_highbd_igentx16(input, output, bd, Tx16);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700260 tran_low_t step1[16], step2[16];
261 tran_high_t temp1, temp2;
262 (void)bd;
263
264 // stage 1
265 step1[0] = input[15];
266 step1[1] = input[7];
267 step1[2] = input[11];
268 step1[3] = input[3];
269 step1[4] = input[13];
270 step1[5] = input[5];
271 step1[6] = input[9];
272 step1[7] = input[1];
273 step1[8] = input[14];
274 step1[9] = input[6];
275 step1[10] = input[10];
276 step1[11] = input[2];
277 step1[12] = input[12];
278 step1[13] = input[4];
279 step1[14] = input[8];
280 step1[15] = input[0];
281
282 // stage 2
283 step2[0] = step1[0];
284 step2[1] = step1[1];
285 step2[2] = step1[2];
286 step2[3] = step1[3];
287 step2[4] = step1[4];
288 step2[5] = step1[5];
289 step2[6] = step1[6];
290 step2[7] = step1[7];
291
292 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
293 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
294 step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
295 step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
296
297 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
298 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
299 step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
300 step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
301
302 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
303 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
304 step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
305 step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
306
307 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
308 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
309 step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
310 step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
311
312 // stage 3
313 step1[0] = step2[0];
314 step1[1] = step2[1];
315 step1[2] = step2[2];
316 step1[3] = step2[3];
317
318 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
319 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
320 step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
321 step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
322 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
323 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
324 step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
325 step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
326
327 step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
328 step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
329 step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd);
330 step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd);
331 step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd);
332 step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd);
333 step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd);
334 step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd);
335
336 // stage 4
337 temp1 = (step1[0] + step1[1]) * cospi_16_64;
338 temp2 = (step1[0] - step1[1]) * cospi_16_64;
339 step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
340 step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
341 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
342 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
343 step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
344 step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
345 step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
346 step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
347 step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
348 step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
349
350 step2[8] = step1[8];
351 step2[15] = step1[15];
352 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
353 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
354 step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
355 step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
356 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
357 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
358 step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
359 step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
360 step2[11] = step1[11];
361 step2[12] = step1[12];
362
363 // stage 5
364 step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
365 step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
366 step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
367 step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
368 step1[4] = step2[4];
369 temp1 = (step2[6] - step2[5]) * cospi_16_64;
370 temp2 = (step2[5] + step2[6]) * cospi_16_64;
371 step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
372 step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
373 step1[7] = step2[7];
374
375 step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
376 step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd);
377 step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd);
378 step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd);
379 step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd);
380 step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd);
381 step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd);
382 step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd);
383
384 // stage 6
385 step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
386 step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
387 step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
388 step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
389 step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
390 step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
391 step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
392 step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
393 step2[8] = step1[8];
394 step2[9] = step1[9];
395 temp1 = (-step1[10] + step1[13]) * cospi_16_64;
396 temp2 = (step1[10] + step1[13]) * cospi_16_64;
397 step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
398 step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
399 temp1 = (-step1[11] + step1[12]) * cospi_16_64;
400 temp2 = (step1[11] + step1[12]) * cospi_16_64;
401 step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
402 step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
403 step2[14] = step1[14];
404 step2[15] = step1[15];
405
406 // stage 7
407 output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd);
408 output[1] = HIGHBD_WRAPLOW(-step2[1] - step2[14], bd);
409 output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd);
410 output[3] = HIGHBD_WRAPLOW(-step2[3] - step2[12], bd);
411 output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd);
412 output[5] = HIGHBD_WRAPLOW(-step2[5] - step2[10], bd);
413 output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd);
414 output[7] = HIGHBD_WRAPLOW(-step2[7] - step2[8], bd);
415 output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd);
416 output[9] = HIGHBD_WRAPLOW(-step2[6] + step2[9], bd);
417 output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd);
418 output[11] = HIGHBD_WRAPLOW(-step2[4] + step2[11], bd);
419 output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd);
420 output[13] = HIGHBD_WRAPLOW(-step2[2] + step2[13], bd);
421 output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd);
422 output[15] = HIGHBD_WRAPLOW(-step2[0] + step2[15], bd);
423}
424
425static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
426 int stride, int bs, int tx_type, int bd) {
427 int r, c;
428 const int shift = bs < 32 ? 3 : 2;
429 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
430
431 if (tx_type == IDTX) {
432 for (r = 0; r < bs; ++r) {
433 for (c = 0; c < bs; ++c)
434 dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
435 dest += stride;
436 input += bs;
437 }
438 }
439}
440
441static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
442 int *sstride, int tx_type, int sizey,
443 int sizex) {
444 // Note that the transpose of src will be added to dst. In order to LR
445 // flip the addends (in dst coordinates), we UD flip the src. To UD flip
446 // the addends, we UD flip the dst.
447 switch (tx_type) {
448 case DCT_DCT:
449 case ADST_DCT:
450 case DCT_ADST:
451 case ADST_ADST:
452 case IDTX:
453 case V_DCT:
454 case H_DCT:
455 case V_ADST:
456 case H_ADST: break;
457 case FLIPADST_DCT:
458 case FLIPADST_ADST:
459 case V_FLIPADST:
460 // flip UD
461 FLIPUD_PTR(*dst, *dstride, sizey);
462 break;
463 case DCT_FLIPADST:
464 case ADST_FLIPADST:
465 case H_FLIPADST:
466 // flip LR
467 FLIPUD_PTR(*src, *sstride, sizex);
468 break;
469 case FLIPADST_FLIPADST:
470 // flip UD
471 FLIPUD_PTR(*dst, *dstride, sizey);
472 // flip LR
473 FLIPUD_PTR(*src, *sstride, sizex);
474 break;
475 default: assert(0); break;
476 }
477}
Yaowu Xuf883b422016-08-30 14:01:10 -0700478#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700479#endif // CONFIG_EXT_TX
480
Yaowu Xuf883b422016-08-30 14:01:10 -0700481void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
482 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700483 static const transform_2d IHT_4[] = {
484 { idct4_c, idct4_c }, // DCT_DCT
485 { iadst4_c, idct4_c }, // ADST_DCT
486 { idct4_c, iadst4_c }, // DCT_ADST
487 { iadst4_c, iadst4_c }, // ADST_ADST
488#if CONFIG_EXT_TX
489 { iadst4_c, idct4_c }, // FLIPADST_DCT
490 { idct4_c, iadst4_c }, // DCT_FLIPADST
491 { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST
492 { iadst4_c, iadst4_c }, // ADST_FLIPADST
493 { iadst4_c, iadst4_c }, // FLIPADST_ADST
494 { iidtx4_c, iidtx4_c }, // IDTX
495 { idct4_c, iidtx4_c }, // V_DCT
496 { iidtx4_c, idct4_c }, // H_DCT
497 { iadst4_c, iidtx4_c }, // V_ADST
498 { iidtx4_c, iadst4_c }, // H_ADST
499 { iadst4_c, iidtx4_c }, // V_FLIPADST
500 { iidtx4_c, iadst4_c }, // H_FLIPADST
501#endif // CONFIG_EXT_TX
502 };
503
504 int i, j;
505 tran_low_t tmp;
506 tran_low_t out[4][4];
507 tran_low_t *outp = &out[0][0];
508 int outstride = 4;
509
510 // inverse transform row vectors
511 for (i = 0; i < 4; ++i) {
512 IHT_4[tx_type].rows(input, out[i]);
513 input += 4;
514 }
515
516 // transpose
517 for (i = 1; i < 4; i++) {
518 for (j = 0; j < i; j++) {
519 tmp = out[i][j];
520 out[i][j] = out[j][i];
521 out[j][i] = tmp;
522 }
523 }
524
525 // inverse transform column vectors
526 for (i = 0; i < 4; ++i) {
527 IHT_4[tx_type].cols(out[i], out[i]);
528 }
529
530#if CONFIG_EXT_TX
531 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
532#endif
533
534 // Sum with the destination
535 for (i = 0; i < 4; ++i) {
536 for (j = 0; j < 4; ++j) {
537 int d = i * stride + j;
538 int s = j * outstride + i;
539 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
540 }
541 }
542}
543
544#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -0700545void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
546 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700547 static const transform_2d IHT_4x8[] = {
548 { idct8_c, idct4_c }, // DCT_DCT
549 { iadst8_c, idct4_c }, // ADST_DCT
550 { idct8_c, iadst4_c }, // DCT_ADST
551 { iadst8_c, iadst4_c }, // ADST_ADST
552 { iadst8_c, idct4_c }, // FLIPADST_DCT
553 { idct8_c, iadst4_c }, // DCT_FLIPADST
554 { iadst8_c, iadst4_c }, // FLIPADST_FLIPADST
555 { iadst8_c, iadst4_c }, // ADST_FLIPADST
556 { iadst8_c, iadst4_c }, // FLIPADST_ADST
557 { iidtx8_c, iidtx4_c }, // IDTX
558 { idct8_c, iidtx4_c }, // V_DCT
559 { iidtx8_c, idct4_c }, // H_DCT
560 { iadst8_c, iidtx4_c }, // V_ADST
561 { iidtx8_c, iadst4_c }, // H_ADST
562 { iadst8_c, iidtx4_c }, // V_FLIPADST
563 { iidtx8_c, iadst4_c }, // H_FLIPADST
564 };
565
566 const int n = 4;
567 const int n2 = 8;
568 int i, j;
569 tran_low_t out[4][8], outtmp[4];
570 tran_low_t *outp = &out[0][0];
571 int outstride = n2;
572
573 // inverse transform row vectors and transpose
574 for (i = 0; i < n2; ++i) {
575 IHT_4x8[tx_type].rows(input, outtmp);
576 for (j = 0; j < n; ++j)
577 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
578 input += n;
579 }
580
581 // inverse transform column vectors
582 for (i = 0; i < n; ++i) {
583 IHT_4x8[tx_type].cols(out[i], out[i]);
584 }
585
586 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
587
588 // Sum with the destination
589 for (i = 0; i < n2; ++i) {
590 for (j = 0; j < n; ++j) {
591 int d = i * stride + j;
592 int s = j * outstride + i;
593 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
594 }
595 }
596}
597
Yaowu Xuf883b422016-08-30 14:01:10 -0700598void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
599 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700600 static const transform_2d IHT_8x4[] = {
601 { idct4_c, idct8_c }, // DCT_DCT
602 { iadst4_c, idct8_c }, // ADST_DCT
603 { idct4_c, iadst8_c }, // DCT_ADST
604 { iadst4_c, iadst8_c }, // ADST_ADST
605 { iadst4_c, idct8_c }, // FLIPADST_DCT
606 { idct4_c, iadst8_c }, // DCT_FLIPADST
607 { iadst4_c, iadst8_c }, // FLIPADST_FLIPADST
608 { iadst4_c, iadst8_c }, // ADST_FLIPADST
609 { iadst4_c, iadst8_c }, // FLIPADST_ADST
610 { iidtx4_c, iidtx8_c }, // IDTX
611 { idct4_c, iidtx8_c }, // V_DCT
612 { iidtx4_c, idct8_c }, // H_DCT
613 { iadst4_c, iidtx8_c }, // V_ADST
614 { iidtx4_c, iadst8_c }, // H_ADST
615 { iadst4_c, iidtx8_c }, // V_FLIPADST
616 { iidtx4_c, iadst8_c }, // H_FLIPADST
617 };
618 const int n = 4;
619 const int n2 = 8;
620
621 int i, j;
622 tran_low_t out[8][4], outtmp[8];
623 tran_low_t *outp = &out[0][0];
624 int outstride = n;
625
626 // inverse transform row vectors and transpose
627 for (i = 0; i < n; ++i) {
628 IHT_8x4[tx_type].rows(input, outtmp);
629 for (j = 0; j < n2; ++j)
630 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
631 input += n2;
632 }
633
634 // inverse transform column vectors
635 for (i = 0; i < n2; ++i) {
636 IHT_8x4[tx_type].cols(out[i], out[i]);
637 }
638
639 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
640
641 // Sum with the destination
642 for (i = 0; i < n; ++i) {
643 for (j = 0; j < n2; ++j) {
644 int d = i * stride + j;
645 int s = j * outstride + i;
646 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
647 }
648 }
649}
650
Yaowu Xuf883b422016-08-30 14:01:10 -0700651void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
652 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700653 static const transform_2d IHT_8x16[] = {
654 { idct16_c, idct8_c }, // DCT_DCT
655 { iadst16_c, idct8_c }, // ADST_DCT
656 { idct16_c, iadst8_c }, // DCT_ADST
657 { iadst16_c, iadst8_c }, // ADST_ADST
658 { iadst16_c, idct8_c }, // FLIPADST_DCT
659 { idct16_c, iadst8_c }, // DCT_FLIPADST
660 { iadst16_c, iadst8_c }, // FLIPADST_FLIPADST
661 { iadst16_c, iadst8_c }, // ADST_FLIPADST
662 { iadst16_c, iadst8_c }, // FLIPADST_ADST
663 { iidtx16_c, iidtx8_c }, // IDTX
664 { idct16_c, iidtx8_c }, // V_DCT
665 { iidtx16_c, idct8_c }, // H_DCT
666 { iadst16_c, iidtx8_c }, // V_ADST
667 { iidtx16_c, iadst8_c }, // H_ADST
668 { iadst16_c, iidtx8_c }, // V_FLIPADST
669 { iidtx16_c, iadst8_c }, // H_FLIPADST
670 };
671
672 const int n = 8;
673 const int n2 = 16;
674 int i, j;
675 tran_low_t out[8][16], outtmp[8];
676 tran_low_t *outp = &out[0][0];
677 int outstride = n2;
678
679 // inverse transform row vectors and transpose
680 for (i = 0; i < n2; ++i) {
681 IHT_8x16[tx_type].rows(input, outtmp);
682 for (j = 0; j < n; ++j)
683 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
684 input += n;
685 }
686
687 // inverse transform column vectors
688 for (i = 0; i < n; ++i) {
689 IHT_8x16[tx_type].cols(out[i], out[i]);
690 }
691
692 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
693
694 // Sum with the destination
695 for (i = 0; i < n2; ++i) {
696 for (j = 0; j < n; ++j) {
697 int d = i * stride + j;
698 int s = j * outstride + i;
699 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
700 }
701 }
702}
703
Yaowu Xuf883b422016-08-30 14:01:10 -0700704void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
705 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700706 static const transform_2d IHT_16x8[] = {
707 { idct8_c, idct16_c }, // DCT_DCT
708 { iadst8_c, idct16_c }, // ADST_DCT
709 { idct8_c, iadst16_c }, // DCT_ADST
710 { iadst8_c, iadst16_c }, // ADST_ADST
711 { iadst8_c, idct16_c }, // FLIPADST_DCT
712 { idct8_c, iadst16_c }, // DCT_FLIPADST
713 { iadst8_c, iadst16_c }, // FLIPADST_FLIPADST
714 { iadst8_c, iadst16_c }, // ADST_FLIPADST
715 { iadst8_c, iadst16_c }, // FLIPADST_ADST
716 { iidtx8_c, iidtx16_c }, // IDTX
717 { idct8_c, iidtx16_c }, // V_DCT
718 { iidtx8_c, idct16_c }, // H_DCT
719 { iadst8_c, iidtx16_c }, // V_ADST
720 { iidtx8_c, iadst16_c }, // H_ADST
721 { iadst8_c, iidtx16_c }, // V_FLIPADST
722 { iidtx8_c, iadst16_c }, // H_FLIPADST
723 };
724 const int n = 8;
725 const int n2 = 16;
726
727 int i, j;
728 tran_low_t out[16][8], outtmp[16];
729 tran_low_t *outp = &out[0][0];
730 int outstride = n;
731
732 // inverse transform row vectors and transpose
733 for (i = 0; i < n; ++i) {
734 IHT_16x8[tx_type].rows(input, outtmp);
735 for (j = 0; j < n2; ++j)
736 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
737 input += n2;
738 }
739
740 // inverse transform column vectors
741 for (i = 0; i < n2; ++i) {
742 IHT_16x8[tx_type].cols(out[i], out[i]);
743 }
744
745 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
746
747 // Sum with the destination
748 for (i = 0; i < n; ++i) {
749 for (j = 0; j < n2; ++j) {
750 int d = i * stride + j;
751 int s = j * outstride + i;
752 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
753 }
754 }
755}
756
Yaowu Xuf883b422016-08-30 14:01:10 -0700757void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
758 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700759 static const transform_2d IHT_16x32[] = {
760 { idct32_c, idct16_c }, // DCT_DCT
761 { ihalfright32_c, idct16_c }, // ADST_DCT
762 { idct32_c, iadst16_c }, // DCT_ADST
763 { ihalfright32_c, iadst16_c }, // ADST_ADST
764 { ihalfright32_c, idct16_c }, // FLIPADST_DCT
765 { idct32_c, iadst16_c }, // DCT_FLIPADST
766 { ihalfright32_c, iadst16_c }, // FLIPADST_FLIPADST
767 { ihalfright32_c, iadst16_c }, // ADST_FLIPADST
768 { ihalfright32_c, iadst16_c }, // FLIPADST_ADST
769 { iidtx32_c, iidtx16_c }, // IDTX
770 { idct32_c, iidtx16_c }, // V_DCT
771 { iidtx32_c, idct16_c }, // H_DCT
772 { ihalfright32_c, iidtx16_c }, // V_ADST
773 { iidtx32_c, iadst16_c }, // H_ADST
774 { ihalfright32_c, iidtx16_c }, // V_FLIPADST
775 { iidtx32_c, iadst16_c }, // H_FLIPADST
776 };
777
778 const int n = 16;
779 const int n2 = 32;
780 int i, j;
781 tran_low_t out[16][32], outtmp[16];
782 tran_low_t *outp = &out[0][0];
783 int outstride = n2;
784
785 // inverse transform row vectors and transpose
786 for (i = 0; i < n2; ++i) {
787 IHT_16x32[tx_type].rows(input, outtmp);
788 for (j = 0; j < n; ++j)
789 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
790 input += n;
791 }
792
793 // inverse transform column vectors
794 for (i = 0; i < n; ++i) {
795 IHT_16x32[tx_type].cols(out[i], out[i]);
796 }
797
798 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
799
800 // Sum with the destination
801 for (i = 0; i < n2; ++i) {
802 for (j = 0; j < n; ++j) {
803 int d = i * stride + j;
804 int s = j * outstride + i;
805 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
806 }
807 }
808}
809
Yaowu Xuf883b422016-08-30 14:01:10 -0700810void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
811 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700812 static const transform_2d IHT_32x16[] = {
813 { idct16_c, idct32_c }, // DCT_DCT
814 { iadst16_c, idct32_c }, // ADST_DCT
815 { idct16_c, ihalfright32_c }, // DCT_ADST
816 { iadst16_c, ihalfright32_c }, // ADST_ADST
817 { iadst16_c, idct32_c }, // FLIPADST_DCT
818 { idct16_c, ihalfright32_c }, // DCT_FLIPADST
819 { iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
820 { iadst16_c, ihalfright32_c }, // ADST_FLIPADST
821 { iadst16_c, ihalfright32_c }, // FLIPADST_ADST
822 { iidtx16_c, iidtx32_c }, // IDTX
823 { idct16_c, iidtx32_c }, // V_DCT
824 { iidtx16_c, idct32_c }, // H_DCT
825 { iadst16_c, iidtx32_c }, // V_ADST
826 { iidtx16_c, ihalfright32_c }, // H_ADST
827 { iadst16_c, iidtx32_c }, // V_FLIPADST
828 { iidtx16_c, ihalfright32_c }, // H_FLIPADST
829 };
830 const int n = 16;
831 const int n2 = 32;
832
833 int i, j;
834 tran_low_t out[32][16], outtmp[32];
835 tran_low_t *outp = &out[0][0];
836 int outstride = n;
837
838 // inverse transform row vectors and transpose
839 for (i = 0; i < n; ++i) {
840 IHT_32x16[tx_type].rows(input, outtmp);
841 for (j = 0; j < n2; ++j)
842 out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
843 input += n2;
844 }
845
846 // inverse transform column vectors
847 for (i = 0; i < n2; ++i) {
848 IHT_32x16[tx_type].cols(out[i], out[i]);
849 }
850
851 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
852
853 // Sum with the destination
854 for (i = 0; i < n; ++i) {
855 for (j = 0; j < n2; ++j) {
856 int d = i * stride + j;
857 int s = j * outstride + i;
858 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
859 }
860 }
861}
862#endif // CONFIG_EXT_TX
863
Yaowu Xuf883b422016-08-30 14:01:10 -0700864void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
865 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700866 static const transform_2d IHT_8[] = {
867 { idct8_c, idct8_c }, // DCT_DCT
868 { iadst8_c, idct8_c }, // ADST_DCT
869 { idct8_c, iadst8_c }, // DCT_ADST
870 { iadst8_c, iadst8_c }, // ADST_ADST
871#if CONFIG_EXT_TX
872 { iadst8_c, idct8_c }, // FLIPADST_DCT
873 { idct8_c, iadst8_c }, // DCT_FLIPADST
874 { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST
875 { iadst8_c, iadst8_c }, // ADST_FLIPADST
876 { iadst8_c, iadst8_c }, // FLIPADST_ADST
877 { iidtx8_c, iidtx8_c }, // IDTX
878 { idct8_c, iidtx8_c }, // V_DCT
879 { iidtx8_c, idct8_c }, // H_DCT
880 { iadst8_c, iidtx8_c }, // V_ADST
881 { iidtx8_c, iadst8_c }, // H_ADST
882 { iadst8_c, iidtx8_c }, // V_FLIPADST
883 { iidtx8_c, iadst8_c }, // H_FLIPADST
884#endif // CONFIG_EXT_TX
885 };
886
887 int i, j;
888 tran_low_t tmp;
889 tran_low_t out[8][8];
890 tran_low_t *outp = &out[0][0];
891 int outstride = 8;
892
893 // inverse transform row vectors
894 for (i = 0; i < 8; ++i) {
895 IHT_8[tx_type].rows(input, out[i]);
896 input += 8;
897 }
898
899 // transpose
900 for (i = 1; i < 8; i++) {
901 for (j = 0; j < i; j++) {
902 tmp = out[i][j];
903 out[i][j] = out[j][i];
904 out[j][i] = tmp;
905 }
906 }
907
908 // inverse transform column vectors
909 for (i = 0; i < 8; ++i) {
910 IHT_8[tx_type].cols(out[i], out[i]);
911 }
912
913#if CONFIG_EXT_TX
914 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
915#endif
916
917 // Sum with the destination
918 for (i = 0; i < 8; ++i) {
919 for (j = 0; j < 8; ++j) {
920 int d = i * stride + j;
921 int s = j * outstride + i;
922 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
923 }
924 }
925}
926
Yaowu Xuf883b422016-08-30 14:01:10 -0700927void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
928 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700929 static const transform_2d IHT_16[] = {
930 { idct16_c, idct16_c }, // DCT_DCT
931 { iadst16_c, idct16_c }, // ADST_DCT
932 { idct16_c, iadst16_c }, // DCT_ADST
933 { iadst16_c, iadst16_c }, // ADST_ADST
934#if CONFIG_EXT_TX
935 { iadst16_c, idct16_c }, // FLIPADST_DCT
936 { idct16_c, iadst16_c }, // DCT_FLIPADST
937 { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST
938 { iadst16_c, iadst16_c }, // ADST_FLIPADST
939 { iadst16_c, iadst16_c }, // FLIPADST_ADST
940 { iidtx16_c, iidtx16_c }, // IDTX
941 { idct16_c, iidtx16_c }, // V_DCT
942 { iidtx16_c, idct16_c }, // H_DCT
943 { iadst16_c, iidtx16_c }, // V_ADST
944 { iidtx16_c, iadst16_c }, // H_ADST
945 { iadst16_c, iidtx16_c }, // V_FLIPADST
946 { iidtx16_c, iadst16_c }, // H_FLIPADST
947#endif // CONFIG_EXT_TX
948 };
949
950 int i, j;
951 tran_low_t tmp;
952 tran_low_t out[16][16];
953 tran_low_t *outp = &out[0][0];
954 int outstride = 16;
955
956 // inverse transform row vectors
957 for (i = 0; i < 16; ++i) {
958 IHT_16[tx_type].rows(input, out[i]);
959 input += 16;
960 }
961
962 // transpose
963 for (i = 1; i < 16; i++) {
964 for (j = 0; j < i; j++) {
965 tmp = out[i][j];
966 out[i][j] = out[j][i];
967 out[j][i] = tmp;
968 }
969 }
970
971 // inverse transform column vectors
972 for (i = 0; i < 16; ++i) {
973 IHT_16[tx_type].cols(out[i], out[i]);
974 }
975
976#if CONFIG_EXT_TX
977 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
978#endif
979
980 // Sum with the destination
981 for (i = 0; i < 16; ++i) {
982 for (j = 0; j < 16; ++j) {
983 int d = i * stride + j;
984 int s = j * outstride + i;
985 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
986 }
987 }
988}
989
990#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -0700991void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
992 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700993 static const transform_2d IHT_32[] = {
994 { idct32_c, idct32_c }, // DCT_DCT
995 { ihalfright32_c, idct32_c }, // ADST_DCT
996 { idct32_c, ihalfright32_c }, // DCT_ADST
997 { ihalfright32_c, ihalfright32_c }, // ADST_ADST
998 { ihalfright32_c, idct32_c }, // FLIPADST_DCT
999 { idct32_c, ihalfright32_c }, // DCT_FLIPADST
1000 { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
1001 { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
1002 { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
1003 { iidtx32_c, iidtx32_c }, // IDTX
1004 { idct32_c, iidtx32_c }, // V_DCT
1005 { iidtx32_c, idct32_c }, // H_DCT
1006 { ihalfright32_c, iidtx16_c }, // V_ADST
1007 { iidtx16_c, ihalfright32_c }, // H_ADST
1008 { ihalfright32_c, iidtx16_c }, // V_FLIPADST
1009 { iidtx16_c, ihalfright32_c }, // H_FLIPADST
1010 };
1011
1012 int i, j;
1013 tran_low_t tmp;
1014 tran_low_t out[32][32];
1015 tran_low_t *outp = &out[0][0];
1016 int outstride = 32;
1017
1018 // inverse transform row vectors
1019 for (i = 0; i < 32; ++i) {
1020 IHT_32[tx_type].rows(input, out[i]);
1021 input += 32;
1022 }
1023
1024 // transpose
1025 for (i = 1; i < 32; i++) {
1026 for (j = 0; j < i; j++) {
1027 tmp = out[i][j];
1028 out[i][j] = out[j][i];
1029 out[j][i] = tmp;
1030 }
1031 }
1032
1033 // inverse transform column vectors
1034 for (i = 0; i < 32; ++i) {
1035 IHT_32[tx_type].cols(out[i], out[i]);
1036 }
1037
1038 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
1039
1040 // Sum with the destination
1041 for (i = 0; i < 32; ++i) {
1042 for (j = 0; j < 32; ++j) {
1043 int d = i * stride + j;
1044 int s = j * outstride + i;
1045 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1046 }
1047 }
1048}
1049#endif // CONFIG_EXT_TX
1050
1051// idct
Yaowu Xuf883b422016-08-30 14:01:10 -07001052void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1053 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001054 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001055 aom_idct4x4_16_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001056 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001057 aom_idct4x4_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001058}
1059
Yaowu Xuf883b422016-08-30 14:01:10 -07001060void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1061 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001062 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001063 aom_iwht4x4_16_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001064 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001065 aom_iwht4x4_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001066}
1067
Yaowu Xuf883b422016-08-30 14:01:10 -07001068void av1_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
1069 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001070 // If dc is 1, then input[0] is the reconstructed value, do not need
1071 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
1072
1073 // The calculation can be simplified if there are not many non-zero dct
1074 // coefficients. Use eobs to decide what to do.
Yaowu Xuf883b422016-08-30 14:01:10 -07001075 // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
Yaowu Xuc27fc142016-08-22 16:08:15 -07001076 // Combine that with code here.
1077 if (eob == 1)
1078 // DC only DCT coefficient
Yaowu Xuf883b422016-08-30 14:01:10 -07001079 aom_idct8x8_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001080 else if (eob <= 12)
Yaowu Xuf883b422016-08-30 14:01:10 -07001081 aom_idct8x8_12_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001082 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001083 aom_idct8x8_64_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001084}
1085
Yaowu Xuf883b422016-08-30 14:01:10 -07001086void av1_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
1087 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001088 /* The calculation can be simplified if there are not many non-zero dct
1089 * coefficients. Use eobs to separate different cases. */
1090 if (eob == 1) /* DC only DCT coefficient. */
Yaowu Xuf883b422016-08-30 14:01:10 -07001091 aom_idct16x16_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001092 else if (eob <= 10)
Yaowu Xuf883b422016-08-30 14:01:10 -07001093 aom_idct16x16_10_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001094 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001095 aom_idct16x16_256_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001096}
1097
Yaowu Xuf883b422016-08-30 14:01:10 -07001098void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
1099 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001100 if (eob == 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001101 aom_idct32x32_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001102 else if (eob <= 34)
1103 // non-zero coeff only in upper-left 8x8
Yaowu Xuf883b422016-08-30 14:01:10 -07001104 aom_idct32x32_34_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001105 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001106 aom_idct32x32_1024_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001107}
1108
Yaowu Xuf883b422016-08-30 14:01:10 -07001109void av1_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
1110 int eob, TX_TYPE tx_type, int lossless) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001111 if (lossless) {
1112 assert(tx_type == DCT_DCT);
Yaowu Xuf883b422016-08-30 14:01:10 -07001113 av1_iwht4x4_add(input, dest, stride, eob);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001114 return;
1115 }
1116
1117 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001118 case DCT_DCT: av1_idct4x4_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001119 case ADST_DCT:
1120 case DCT_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001121 case ADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001122#if CONFIG_EXT_TX
1123 case FLIPADST_DCT:
1124 case DCT_FLIPADST:
1125 case FLIPADST_FLIPADST:
1126 case ADST_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001127 case FLIPADST_ADST: av1_iht4x4_16_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001128 case V_DCT:
1129 case H_DCT:
1130 case V_ADST:
1131 case H_ADST:
1132 case V_FLIPADST:
1133 case H_FLIPADST:
1134 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001135 av1_iht4x4_16_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001136 break;
1137 case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break;
1138#endif // CONFIG_EXT_TX
1139 default: assert(0); break;
1140 }
1141}
1142
1143#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001144void av1_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
1145 int eob, TX_TYPE tx_type) {
1146 (void)eob;
1147 av1_iht4x8_32_add(input, dest, stride, tx_type);
1148}
1149
1150void av1_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
1151 int eob, TX_TYPE tx_type) {
1152 (void)eob;
1153 av1_iht8x4_32_add(input, dest, stride, tx_type);
1154}
1155
1156void av1_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001157 int eob, TX_TYPE tx_type) {
1158 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001159 av1_iht8x16_128_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001160}
1161
Yaowu Xuf883b422016-08-30 14:01:10 -07001162void av1_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001163 int eob, TX_TYPE tx_type) {
1164 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001165 av1_iht16x8_128_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001166}
1167
Yaowu Xuf883b422016-08-30 14:01:10 -07001168void av1_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001169 int eob, TX_TYPE tx_type) {
1170 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001171 av1_iht16x32_512_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001172}
1173
Yaowu Xuf883b422016-08-30 14:01:10 -07001174void av1_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest, int stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001175 int eob, TX_TYPE tx_type) {
1176 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001177 av1_iht32x16_512_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001178}
1179#endif // CONFIG_EXT_TX
1180
Yaowu Xuf883b422016-08-30 14:01:10 -07001181void av1_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
1182 int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001183 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001184 case DCT_DCT: av1_idct8x8_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001185 case ADST_DCT:
1186 case DCT_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001187 case ADST_ADST: av1_iht8x8_64_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001188#if CONFIG_EXT_TX
1189 case FLIPADST_DCT:
1190 case DCT_FLIPADST:
1191 case FLIPADST_FLIPADST:
1192 case ADST_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001193 case FLIPADST_ADST: av1_iht8x8_64_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001194 case V_DCT:
1195 case H_DCT:
1196 case V_ADST:
1197 case H_ADST:
1198 case V_FLIPADST:
1199 case H_FLIPADST:
1200 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001201 av1_iht8x8_64_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001202 break;
1203 case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break;
1204#endif // CONFIG_EXT_TX
1205 default: assert(0); break;
1206 }
1207}
1208
Yaowu Xuf883b422016-08-30 14:01:10 -07001209void av1_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, int stride,
1210 int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001211 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001212 case DCT_DCT: av1_idct16x16_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001213 case ADST_DCT:
1214 case DCT_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001215 case ADST_ADST: av1_iht16x16_256_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001216#if CONFIG_EXT_TX
1217 case FLIPADST_DCT:
1218 case DCT_FLIPADST:
1219 case FLIPADST_FLIPADST:
1220 case ADST_FLIPADST:
1221 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001222 av1_iht16x16_256_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001223 break;
1224 case V_DCT:
1225 case H_DCT:
1226 case V_ADST:
1227 case H_ADST:
1228 case V_FLIPADST:
1229 case H_FLIPADST:
1230 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001231 av1_iht16x16_256_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001232 break;
1233 case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break;
1234#endif // CONFIG_EXT_TX
1235 default: assert(0); break;
1236 }
1237}
1238
Yaowu Xuf883b422016-08-30 14:01:10 -07001239void av1_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride,
1240 int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001241 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001242 case DCT_DCT: av1_idct32x32_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001243#if CONFIG_EXT_TX
1244 case ADST_DCT:
1245 case DCT_ADST:
1246 case ADST_ADST:
1247 case FLIPADST_DCT:
1248 case DCT_FLIPADST:
1249 case FLIPADST_FLIPADST:
1250 case ADST_FLIPADST:
1251 case FLIPADST_ADST:
1252 case V_DCT:
1253 case H_DCT:
1254 case V_ADST:
1255 case H_ADST:
1256 case V_FLIPADST:
1257 case H_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001258 av1_iht32x32_1024_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001259 break;
1260 case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break;
1261#endif // CONFIG_EXT_TX
1262 default: assert(0); break;
1263 }
1264}
1265
Yaowu Xuf883b422016-08-30 14:01:10 -07001266#if CONFIG_AOM_HIGHBITDEPTH
1267void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
1268 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001269 static const highbd_transform_2d HIGH_IHT_4[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001270 { aom_highbd_idct4_c, aom_highbd_idct4_c }, // DCT_DCT
1271 { aom_highbd_iadst4_c, aom_highbd_idct4_c }, // ADST_DCT
1272 { aom_highbd_idct4_c, aom_highbd_iadst4_c }, // DCT_ADST
1273 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // ADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001274#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001275 { aom_highbd_iadst4_c, aom_highbd_idct4_c }, // FLIPADST_DCT
1276 { aom_highbd_idct4_c, aom_highbd_iadst4_c }, // DCT_FLIPADST
1277 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // FLIPADST_FLIPADST
1278 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // ADST_FLIPADST
1279 { aom_highbd_iadst4_c, aom_highbd_iadst4_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001280 { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001281 { aom_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT
1282 { highbd_iidtx4_c, aom_highbd_idct4_c }, // H_DCT
1283 { aom_highbd_iadst4_c, highbd_iidtx4_c }, // V_ADST
1284 { highbd_iidtx4_c, aom_highbd_iadst4_c }, // H_ADST
1285 { aom_highbd_iadst4_c, highbd_iidtx4_c }, // V_FLIPADST
1286 { highbd_iidtx4_c, aom_highbd_iadst4_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001287#endif // CONFIG_EXT_TX
1288 };
1289
1290 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1291
1292 int i, j;
1293 tran_low_t tmp;
1294 tran_low_t out[4][4];
1295 tran_low_t *outp = &out[0][0];
1296 int outstride = 4;
1297
1298 // inverse transform row vectors
1299 for (i = 0; i < 4; ++i) {
1300 HIGH_IHT_4[tx_type].rows(input, out[i], bd);
1301 input += 4;
1302 }
1303
1304 // transpose
1305 for (i = 1; i < 4; i++) {
1306 for (j = 0; j < i; j++) {
1307 tmp = out[i][j];
1308 out[i][j] = out[j][i];
1309 out[j][i] = tmp;
1310 }
1311 }
1312
1313 // inverse transform column vectors
1314 for (i = 0; i < 4; ++i) {
1315 HIGH_IHT_4[tx_type].cols(out[i], out[i], bd);
1316 }
1317
1318#if CONFIG_EXT_TX
1319 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
1320#endif
1321
1322 // Sum with the destination
1323 for (i = 0; i < 4; ++i) {
1324 for (j = 0; j < 4; ++j) {
1325 int d = i * stride + j;
1326 int s = j * outstride + i;
1327 dest[d] =
1328 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4), bd);
1329 }
1330 }
1331}
1332
1333#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001334void av1_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8,
1335 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001336 static const highbd_transform_2d HIGH_IHT_4x8[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001337 { aom_highbd_idct8_c, aom_highbd_idct4_c }, // DCT_DCT
1338 { aom_highbd_iadst8_c, aom_highbd_idct4_c }, // ADST_DCT
1339 { aom_highbd_idct8_c, aom_highbd_iadst4_c }, // DCT_ADST
1340 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // ADST_ADST
1341 { aom_highbd_iadst8_c, aom_highbd_idct4_c }, // FLIPADST_DCT
1342 { aom_highbd_idct8_c, aom_highbd_iadst4_c }, // DCT_FLIPADST
1343 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // FLIPADST_FLIPADST
1344 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // ADST_FLIPADST
1345 { aom_highbd_iadst8_c, aom_highbd_iadst4_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001346 { highbd_iidtx8_c, highbd_iidtx4_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001347 { aom_highbd_idct8_c, highbd_iidtx4_c }, // V_DCT
1348 { highbd_iidtx8_c, aom_highbd_idct4_c }, // H_DCT
1349 { aom_highbd_iadst8_c, highbd_iidtx4_c }, // V_ADST
1350 { highbd_iidtx8_c, aom_highbd_iadst4_c }, // H_ADST
1351 { aom_highbd_iadst8_c, highbd_iidtx4_c }, // V_FLIPADST
1352 { highbd_iidtx8_c, aom_highbd_iadst4_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001353 };
1354 const int n = 4;
1355 const int n2 = 8;
1356
1357 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1358
1359 int i, j;
1360 tran_low_t out[4][8], outtmp[4];
1361 tran_low_t *outp = &out[0][0];
1362 int outstride = n2;
1363
1364 // inverse transform row vectors, and transpose
1365 for (i = 0; i < n2; ++i) {
1366 HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd);
1367 for (j = 0; j < n; ++j) {
1368 out[j][i] =
1369 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1370 }
1371 input += n;
1372 }
1373
1374 // inverse transform column vectors
1375 for (i = 0; i < n; ++i) {
1376 HIGH_IHT_4x8[tx_type].cols(out[i], out[i], bd);
1377 }
1378
1379 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1380
1381 // Sum with the destination
1382 for (i = 0; i < n2; ++i) {
1383 for (j = 0; j < n; ++j) {
1384 int d = i * stride + j;
1385 int s = j * outstride + i;
1386 dest[d] =
1387 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1388 }
1389 }
1390}
1391
Yaowu Xuf883b422016-08-30 14:01:10 -07001392void av1_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
1393 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001394 static const highbd_transform_2d HIGH_IHT_8x4[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001395 { aom_highbd_idct4_c, aom_highbd_idct8_c }, // DCT_DCT
1396 { aom_highbd_iadst4_c, aom_highbd_idct8_c }, // ADST_DCT
1397 { aom_highbd_idct4_c, aom_highbd_iadst8_c }, // DCT_ADST
1398 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // ADST_ADST
1399 { aom_highbd_iadst4_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1400 { aom_highbd_idct4_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1401 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1402 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1403 { aom_highbd_iadst4_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001404 { highbd_iidtx4_c, highbd_iidtx8_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001405 { aom_highbd_idct4_c, highbd_iidtx8_c }, // V_DCT
1406 { highbd_iidtx4_c, aom_highbd_idct8_c }, // H_DCT
1407 { aom_highbd_iadst4_c, highbd_iidtx8_c }, // V_ADST
1408 { highbd_iidtx4_c, aom_highbd_iadst8_c }, // H_ADST
1409 { aom_highbd_iadst4_c, highbd_iidtx8_c }, // V_FLIPADST
1410 { highbd_iidtx4_c, aom_highbd_iadst8_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001411 };
1412 const int n = 4;
1413 const int n2 = 8;
1414
1415 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1416
1417 int i, j;
1418 tran_low_t out[8][4], outtmp[8];
1419 tran_low_t *outp = &out[0][0];
1420 int outstride = n;
1421
1422 // inverse transform row vectors, and transpose
1423 for (i = 0; i < n; ++i) {
1424 HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd);
1425 for (j = 0; j < n2; ++j) {
1426 out[j][i] =
1427 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1428 }
1429 input += n2;
1430 }
1431
1432 // inverse transform column vectors
1433 for (i = 0; i < n2; ++i) {
1434 HIGH_IHT_8x4[tx_type].cols(out[i], out[i], bd);
1435 }
1436
1437 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1438
1439 // Sum with the destination
1440 for (i = 0; i < n; ++i) {
1441 for (j = 0; j < n2; ++j) {
1442 int d = i * stride + j;
1443 int s = j * outstride + i;
1444 dest[d] =
1445 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1446 }
1447 }
1448}
1449
Yaowu Xuf883b422016-08-30 14:01:10 -07001450void av1_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest8,
1451 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001452 static const highbd_transform_2d HIGH_IHT_8x16[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001453 { aom_highbd_idct16_c, aom_highbd_idct8_c }, // DCT_DCT
1454 { aom_highbd_iadst16_c, aom_highbd_idct8_c }, // ADST_DCT
1455 { aom_highbd_idct16_c, aom_highbd_iadst8_c }, // DCT_ADST
1456 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // ADST_ADST
1457 { aom_highbd_iadst16_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1458 { aom_highbd_idct16_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1459 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1460 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1461 { aom_highbd_iadst16_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001462 { highbd_iidtx16_c, highbd_iidtx8_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001463 { aom_highbd_idct16_c, highbd_iidtx8_c }, // V_DCT
1464 { highbd_iidtx16_c, aom_highbd_idct8_c }, // H_DCT
1465 { aom_highbd_iadst16_c, highbd_iidtx8_c }, // V_ADST
1466 { highbd_iidtx16_c, aom_highbd_iadst8_c }, // H_ADST
1467 { aom_highbd_iadst16_c, highbd_iidtx8_c }, // V_FLIPADST
1468 { highbd_iidtx16_c, aom_highbd_iadst8_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001469 };
1470 const int n = 8;
1471 const int n2 = 16;
1472
1473 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1474
1475 int i, j;
1476 tran_low_t out[8][16], outtmp[8];
1477 tran_low_t *outp = &out[0][0];
1478 int outstride = n2;
1479
1480 // inverse transform row vectors, and transpose
1481 for (i = 0; i < n2; ++i) {
1482 HIGH_IHT_8x16[tx_type].rows(input, outtmp, bd);
1483 for (j = 0; j < n; ++j)
1484 out[j][i] =
1485 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1486 input += n;
1487 }
1488
1489 // inverse transform column vectors
1490 for (i = 0; i < n; ++i) {
1491 HIGH_IHT_8x16[tx_type].cols(out[i], out[i], bd);
1492 }
1493
1494 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1495
1496 // Sum with the destination
1497 for (i = 0; i < n2; ++i) {
1498 for (j = 0; j < n; ++j) {
1499 int d = i * stride + j;
1500 int s = j * outstride + i;
1501 dest[d] =
1502 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1503 }
1504 }
1505}
1506
Yaowu Xuf883b422016-08-30 14:01:10 -07001507void av1_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest8,
1508 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001509 static const highbd_transform_2d HIGH_IHT_16x8[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001510 { aom_highbd_idct8_c, aom_highbd_idct16_c }, // DCT_DCT
1511 { aom_highbd_iadst8_c, aom_highbd_idct16_c }, // ADST_DCT
1512 { aom_highbd_idct8_c, aom_highbd_iadst16_c }, // DCT_ADST
1513 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // ADST_ADST
1514 { aom_highbd_iadst8_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1515 { aom_highbd_idct8_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1516 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1517 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1518 { aom_highbd_iadst8_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001519 { highbd_iidtx8_c, highbd_iidtx16_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001520 { aom_highbd_idct8_c, highbd_iidtx16_c }, // V_DCT
1521 { highbd_iidtx8_c, aom_highbd_idct16_c }, // H_DCT
1522 { aom_highbd_iadst8_c, highbd_iidtx16_c }, // V_ADST
1523 { highbd_iidtx8_c, aom_highbd_iadst16_c }, // H_ADST
1524 { aom_highbd_iadst8_c, highbd_iidtx16_c }, // V_FLIPADST
1525 { highbd_iidtx8_c, aom_highbd_iadst16_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001526 };
1527 const int n = 8;
1528 const int n2 = 16;
1529
1530 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1531
1532 int i, j;
1533 tran_low_t out[16][8], outtmp[16];
1534 tran_low_t *outp = &out[0][0];
1535 int outstride = n;
1536
1537 // inverse transform row vectors, and transpose
1538 for (i = 0; i < n; ++i) {
1539 HIGH_IHT_16x8[tx_type].rows(input, outtmp, bd);
1540 for (j = 0; j < n2; ++j)
1541 out[j][i] =
1542 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1543 input += n2;
1544 }
1545
1546 // inverse transform column vectors
1547 for (i = 0; i < n2; ++i) {
1548 HIGH_IHT_16x8[tx_type].cols(out[i], out[i], bd);
1549 }
1550
1551 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1552
1553 // Sum with the destination
1554 for (i = 0; i < n; ++i) {
1555 for (j = 0; j < n2; ++j) {
1556 int d = i * stride + j;
1557 int s = j * outstride + i;
1558 dest[d] =
1559 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1560 }
1561 }
1562}
1563
Yaowu Xuf883b422016-08-30 14:01:10 -07001564void av1_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest8,
1565 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001566 static const highbd_transform_2d HIGH_IHT_16x32[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001567 { aom_highbd_idct32_c, aom_highbd_idct16_c }, // DCT_DCT
1568 { highbd_ihalfright32_c, aom_highbd_idct16_c }, // ADST_DCT
1569 { aom_highbd_idct32_c, aom_highbd_iadst16_c }, // DCT_ADST
1570 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // ADST_ADST
1571 { highbd_ihalfright32_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1572 { aom_highbd_idct32_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1573 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1574 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1575 { highbd_ihalfright32_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001576 { highbd_iidtx32_c, highbd_iidtx16_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001577 { aom_highbd_idct32_c, highbd_iidtx16_c }, // V_DCT
1578 { highbd_iidtx32_c, aom_highbd_idct16_c }, // H_DCT
Yaowu Xuc27fc142016-08-22 16:08:15 -07001579 { highbd_ihalfright32_c, highbd_iidtx16_c }, // V_ADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001580 { highbd_iidtx32_c, aom_highbd_iadst16_c }, // H_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001581 { highbd_ihalfright32_c, highbd_iidtx16_c }, // V_FLIPADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001582 { highbd_iidtx32_c, aom_highbd_iadst16_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001583 };
1584 const int n = 16;
1585 const int n2 = 32;
1586
1587 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1588
1589 int i, j;
1590 tran_low_t out[16][32], outtmp[16];
1591 tran_low_t *outp = &out[0][0];
1592 int outstride = n2;
1593
1594 // inverse transform row vectors, and transpose
1595 for (i = 0; i < n2; ++i) {
1596 HIGH_IHT_16x32[tx_type].rows(input, outtmp, bd);
1597 for (j = 0; j < n; ++j)
1598 out[j][i] =
1599 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1600 input += n;
1601 }
1602
1603 // inverse transform column vectors
1604 for (i = 0; i < n; ++i) {
1605 HIGH_IHT_16x32[tx_type].cols(out[i], out[i], bd);
1606 }
1607
1608 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1609
1610 // Sum with the destination
1611 for (i = 0; i < n2; ++i) {
1612 for (j = 0; j < n; ++j) {
1613 int d = i * stride + j;
1614 int s = j * outstride + i;
1615 dest[d] =
1616 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1617 }
1618 }
1619}
1620
Yaowu Xuf883b422016-08-30 14:01:10 -07001621void av1_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest8,
1622 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001623 static const highbd_transform_2d HIGH_IHT_32x16[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001624 { aom_highbd_idct16_c, aom_highbd_idct32_c }, // DCT_DCT
1625 { aom_highbd_iadst16_c, aom_highbd_idct32_c }, // ADST_DCT
1626 { aom_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_ADST
1627 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_ADST
1628 { aom_highbd_iadst16_c, aom_highbd_idct32_c }, // FLIPADST_DCT
1629 { aom_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_FLIPADST
1630 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
1631 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_FLIPADST
1632 { aom_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001633 { highbd_iidtx16_c, highbd_iidtx32_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001634 { aom_highbd_idct16_c, highbd_iidtx32_c }, // V_DCT
1635 { highbd_iidtx16_c, aom_highbd_idct32_c }, // H_DCT
1636 { aom_highbd_iadst16_c, highbd_iidtx32_c }, // V_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001637 { highbd_iidtx16_c, highbd_ihalfright32_c }, // H_ADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001638 { aom_highbd_iadst16_c, highbd_iidtx32_c }, // V_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001639 { highbd_iidtx16_c, highbd_ihalfright32_c }, // H_FLIPADST
1640 };
1641 const int n = 16;
1642 const int n2 = 32;
1643
1644 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1645
1646 int i, j;
1647 tran_low_t out[32][16], outtmp[32];
1648 tran_low_t *outp = &out[0][0];
1649 int outstride = n;
1650
1651 // inverse transform row vectors, and transpose
1652 for (i = 0; i < n; ++i) {
1653 HIGH_IHT_32x16[tx_type].rows(input, outtmp, bd);
1654 for (j = 0; j < n2; ++j)
1655 out[j][i] =
1656 HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
1657 input += n2;
1658 }
1659
1660 // inverse transform column vectors
1661 for (i = 0; i < n2; ++i) {
1662 HIGH_IHT_32x16[tx_type].cols(out[i], out[i], bd);
1663 }
1664
1665 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1666
1667 // Sum with the destination
1668 for (i = 0; i < n; ++i) {
1669 for (j = 0; j < n2; ++j) {
1670 int d = i * stride + j;
1671 int s = j * outstride + i;
1672 dest[d] =
1673 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1674 }
1675 }
1676}
1677#endif // CONFIG_EXT_TX
1678
Yaowu Xuf883b422016-08-30 14:01:10 -07001679void av1_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
1680 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001681 static const highbd_transform_2d HIGH_IHT_8[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001682 { aom_highbd_idct8_c, aom_highbd_idct8_c }, // DCT_DCT
1683 { aom_highbd_iadst8_c, aom_highbd_idct8_c }, // ADST_DCT
1684 { aom_highbd_idct8_c, aom_highbd_iadst8_c }, // DCT_ADST
1685 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // ADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001686#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001687 { aom_highbd_iadst8_c, aom_highbd_idct8_c }, // FLIPADST_DCT
1688 { aom_highbd_idct8_c, aom_highbd_iadst8_c }, // DCT_FLIPADST
1689 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // FLIPADST_FLIPADST
1690 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // ADST_FLIPADST
1691 { aom_highbd_iadst8_c, aom_highbd_iadst8_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001692 { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001693 { aom_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT
1694 { highbd_iidtx8_c, aom_highbd_idct8_c }, // H_DCT
1695 { aom_highbd_iadst8_c, highbd_iidtx8_c }, // V_ADST
1696 { highbd_iidtx8_c, aom_highbd_iadst8_c }, // H_ADST
1697 { aom_highbd_iadst8_c, highbd_iidtx8_c }, // V_FLIPADST
1698 { highbd_iidtx8_c, aom_highbd_iadst8_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001699#endif // CONFIG_EXT_TX
1700 };
1701
1702 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1703
1704 int i, j;
1705 tran_low_t tmp;
1706 tran_low_t out[8][8];
1707 tran_low_t *outp = &out[0][0];
1708 int outstride = 8;
1709
1710 // inverse transform row vectors
1711 for (i = 0; i < 8; ++i) {
1712 HIGH_IHT_8[tx_type].rows(input, out[i], bd);
1713 input += 8;
1714 }
1715
1716 // transpose
1717 for (i = 1; i < 8; i++) {
1718 for (j = 0; j < i; j++) {
1719 tmp = out[i][j];
1720 out[i][j] = out[j][i];
1721 out[j][i] = tmp;
1722 }
1723 }
1724
1725 // inverse transform column vectors
1726 for (i = 0; i < 8; ++i) {
1727 HIGH_IHT_8[tx_type].cols(out[i], out[i], bd);
1728 }
1729
1730#if CONFIG_EXT_TX
1731 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
1732#endif
1733
1734 // Sum with the destination
1735 for (i = 0; i < 8; ++i) {
1736 for (j = 0; j < 8; ++j) {
1737 int d = i * stride + j;
1738 int s = j * outstride + i;
1739 dest[d] =
1740 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
1741 }
1742 }
1743}
1744
Yaowu Xuf883b422016-08-30 14:01:10 -07001745void av1_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
1746 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001747 static const highbd_transform_2d HIGH_IHT_16[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001748 { aom_highbd_idct16_c, aom_highbd_idct16_c }, // DCT_DCT
1749 { aom_highbd_iadst16_c, aom_highbd_idct16_c }, // ADST_DCT
1750 { aom_highbd_idct16_c, aom_highbd_iadst16_c }, // DCT_ADST
1751 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // ADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001752#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001753 { aom_highbd_iadst16_c, aom_highbd_idct16_c }, // FLIPADST_DCT
1754 { aom_highbd_idct16_c, aom_highbd_iadst16_c }, // DCT_FLIPADST
1755 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // FLIPADST_FLIPADST
1756 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // ADST_FLIPADST
1757 { aom_highbd_iadst16_c, aom_highbd_iadst16_c }, // FLIPADST_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001758 { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001759 { aom_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT
1760 { highbd_iidtx16_c, aom_highbd_idct16_c }, // H_DCT
1761 { aom_highbd_iadst16_c, highbd_iidtx16_c }, // V_ADST
1762 { highbd_iidtx16_c, aom_highbd_iadst16_c }, // H_ADST
1763 { aom_highbd_iadst16_c, highbd_iidtx16_c }, // V_FLIPADST
1764 { highbd_iidtx16_c, aom_highbd_iadst16_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001765#endif // CONFIG_EXT_TX
1766 };
1767
1768 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1769
1770 int i, j;
1771 tran_low_t tmp;
1772 tran_low_t out[16][16];
1773 tran_low_t *outp = &out[0][0];
1774 int outstride = 16;
1775
1776 // inverse transform row vectors
1777 for (i = 0; i < 16; ++i) {
1778 HIGH_IHT_16[tx_type].rows(input, out[i], bd);
1779 input += 16;
1780 }
1781
1782 // transpose
1783 for (i = 1; i < 16; i++) {
1784 for (j = 0; j < i; j++) {
1785 tmp = out[i][j];
1786 out[i][j] = out[j][i];
1787 out[j][i] = tmp;
1788 }
1789 }
1790
1791 // inverse transform column vectors
1792 for (i = 0; i < 16; ++i) {
1793 HIGH_IHT_16[tx_type].cols(out[i], out[i], bd);
1794 }
1795
1796#if CONFIG_EXT_TX
1797 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
1798#endif
1799
1800 // Sum with the destination
1801 for (i = 0; i < 16; ++i) {
1802 for (j = 0; j < 16; ++j) {
1803 int d = i * stride + j;
1804 int s = j * outstride + i;
1805 dest[d] =
1806 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1807 }
1808 }
1809}
1810
1811#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001812void av1_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
1813 int stride, int tx_type, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001814 static const highbd_transform_2d HIGH_IHT_32[] = {
Yaowu Xuf883b422016-08-30 14:01:10 -07001815 { aom_highbd_idct32_c, aom_highbd_idct32_c }, // DCT_DCT
1816 { highbd_ihalfright32_c, aom_highbd_idct32_c }, // ADST_DCT
1817 { aom_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001818 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
Yaowu Xuf883b422016-08-30 14:01:10 -07001819 { highbd_ihalfright32_c, aom_highbd_idct32_c }, // FLIPADST_DCT
1820 { aom_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07001821 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
1822 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
1823 { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
1824 { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
Yaowu Xuf883b422016-08-30 14:01:10 -07001825 { aom_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT
1826 { highbd_iidtx32_c, aom_highbd_idct32_c }, // H_DCT
Yaowu Xuc27fc142016-08-22 16:08:15 -07001827 { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_ADST
1828 { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_ADST
1829 { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_FLIPADST
1830 { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_FLIPADST
1831 };
1832
1833 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1834
1835 int i, j;
1836 tran_low_t tmp;
1837 tran_low_t out[32][32];
1838 tran_low_t *outp = &out[0][0];
1839 int outstride = 32;
1840
1841 // inverse transform row vectors
1842 for (i = 0; i < 32; ++i) {
1843 HIGH_IHT_32[tx_type].rows(input, out[i], bd);
1844 input += 32;
1845 }
1846
1847 // transpose
1848 for (i = 1; i < 32; i++) {
1849 for (j = 0; j < i; j++) {
1850 tmp = out[i][j];
1851 out[i][j] = out[j][i];
1852 out[j][i] = tmp;
1853 }
1854 }
1855
1856 // inverse transform column vectors
1857 for (i = 0; i < 32; ++i) {
1858 HIGH_IHT_32[tx_type].cols(out[i], out[i], bd);
1859 }
1860
1861 maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
1862
1863 // Sum with the destination
1864 for (i = 0; i < 32; ++i) {
1865 for (j = 0; j < 32; ++j) {
1866 int d = i * stride + j;
1867 int s = j * outstride + i;
1868 dest[d] =
1869 highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
1870 }
1871 }
1872}
1873#endif // CONFIG_EXT_TX
1874
1875// idct
Yaowu Xuf883b422016-08-30 14:01:10 -07001876void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1877 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001878 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001879 aom_highbd_idct4x4_16_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001880 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001881 aom_highbd_idct4x4_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001882}
1883
Yaowu Xuf883b422016-08-30 14:01:10 -07001884void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1885 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001886 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001887 aom_highbd_iwht4x4_16_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001888 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001889 aom_highbd_iwht4x4_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001890}
1891
Yaowu Xuf883b422016-08-30 14:01:10 -07001892void av1_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
1893 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001894 // If dc is 1, then input[0] is the reconstructed value, do not need
1895 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
1896
1897 // The calculation can be simplified if there are not many non-zero dct
1898 // coefficients. Use eobs to decide what to do.
Yaowu Xuf883b422016-08-30 14:01:10 -07001899 // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
Yaowu Xuc27fc142016-08-22 16:08:15 -07001900 // Combine that with code here.
1901 // DC only DCT coefficient
1902 if (eob == 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001903 aom_highbd_idct8x8_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001904 } else if (eob <= 10) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001905 aom_highbd_idct8x8_10_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001906 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001907 aom_highbd_idct8x8_64_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001908 }
1909}
1910
Yaowu Xuf883b422016-08-30 14:01:10 -07001911void av1_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
1912 int stride, int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001913 // The calculation can be simplified if there are not many non-zero dct
1914 // coefficients. Use eobs to separate different cases.
1915 // DC only DCT coefficient.
1916 if (eob == 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001917 aom_highbd_idct16x16_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001918 } else if (eob <= 10) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001919 aom_highbd_idct16x16_10_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001920 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001921 aom_highbd_idct16x16_256_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001922 }
1923}
1924
Yaowu Xuf883b422016-08-30 14:01:10 -07001925void av1_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
1926 int stride, int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001927 // Non-zero coeff only in upper-left 8x8
1928 if (eob == 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001929 aom_highbd_idct32x32_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001930 } else if (eob <= 34) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001931 aom_highbd_idct32x32_34_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001932 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001933 aom_highbd_idct32x32_1024_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001934 }
1935}
1936
Yaowu Xuf883b422016-08-30 14:01:10 -07001937void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
1938 int stride, int eob, int bd, TX_TYPE tx_type,
1939 int lossless) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001940 if (lossless) {
1941 assert(tx_type == DCT_DCT);
Yaowu Xuf883b422016-08-30 14:01:10 -07001942 av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001943 return;
1944 }
1945
1946 switch (tx_type) {
1947 case DCT_DCT:
1948 case ADST_DCT:
1949 case DCT_ADST:
1950 case ADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001951 av1_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1952 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001953 break;
1954#if CONFIG_EXT_TX
1955 case FLIPADST_DCT:
1956 case DCT_FLIPADST:
1957 case FLIPADST_FLIPADST:
1958 case ADST_FLIPADST:
1959 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001960 av1_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1961 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001962 break;
1963 case V_DCT:
1964 case H_DCT:
1965 case V_ADST:
1966 case H_ADST:
1967 case V_FLIPADST:
1968 case H_FLIPADST:
1969 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001970 av1_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001971 break;
1972 case IDTX:
1973 highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd);
1974 break;
1975#endif // CONFIG_EXT_TX
1976 default: assert(0); break;
1977 }
1978}
1979
1980#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07001981void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
1982 int stride, int eob, int bd, TX_TYPE tx_type) {
1983 (void)eob;
1984 av1_highbd_iht4x8_32_add_c(input, dest, stride, tx_type, bd);
1985}
1986
1987void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
1988 int stride, int eob, int bd, TX_TYPE tx_type) {
1989 (void)eob;
1990 av1_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd);
1991}
1992
1993void av1_highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001994 int stride, int eob, int bd,
1995 TX_TYPE tx_type) {
1996 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001997 av1_highbd_iht8x16_128_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001998}
1999
Yaowu Xuf883b422016-08-30 14:01:10 -07002000void av1_highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002001 int stride, int eob, int bd,
2002 TX_TYPE tx_type) {
2003 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07002004 av1_highbd_iht16x8_128_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002005}
2006
Yaowu Xuf883b422016-08-30 14:01:10 -07002007void av1_highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002008 int stride, int eob, int bd,
2009 TX_TYPE tx_type) {
2010 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07002011 av1_highbd_iht16x32_512_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002012}
2013
Yaowu Xuf883b422016-08-30 14:01:10 -07002014void av1_highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002015 int stride, int eob, int bd,
2016 TX_TYPE tx_type) {
2017 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07002018 av1_highbd_iht32x16_512_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002019}
2020#endif // CONFIG_EXT_TX
2021
Yaowu Xuf883b422016-08-30 14:01:10 -07002022void av1_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
2023 int stride, int eob, int bd, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002024 (void)eob;
2025 switch (tx_type) {
2026 case DCT_DCT:
2027 case ADST_DCT:
2028 case DCT_ADST:
2029 case ADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002030 av1_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2031 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002032 break;
2033#if CONFIG_EXT_TX
2034 case FLIPADST_DCT:
2035 case DCT_FLIPADST:
2036 case FLIPADST_FLIPADST:
2037 case ADST_FLIPADST:
2038 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002039 av1_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2040 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002041 break;
2042 case V_DCT:
2043 case H_DCT:
2044 case V_ADST:
2045 case H_ADST:
2046 case V_FLIPADST:
2047 case H_FLIPADST:
2048 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07002049 av1_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002050 break;
2051 case IDTX:
2052 highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd);
2053 break;
2054#endif // CONFIG_EXT_TX
2055 default: assert(0); break;
2056 }
2057}
2058
Yaowu Xuf883b422016-08-30 14:01:10 -07002059void av1_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
2060 int stride, int eob, int bd,
2061 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002062 (void)eob;
2063 switch (tx_type) {
2064 case DCT_DCT:
2065 case ADST_DCT:
2066 case DCT_ADST:
2067 case ADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002068 av1_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
2069 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002070 break;
2071#if CONFIG_EXT_TX
2072 case FLIPADST_DCT:
2073 case DCT_FLIPADST:
2074 case FLIPADST_FLIPADST:
2075 case ADST_FLIPADST:
2076 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002077 av1_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
2078 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002079 break;
2080 case V_DCT:
2081 case H_DCT:
2082 case V_ADST:
2083 case H_ADST:
2084 case V_FLIPADST:
2085 case H_FLIPADST:
2086 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07002087 av1_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002088 break;
2089 case IDTX:
2090 highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd);
2091 break;
2092#endif // CONFIG_EXT_TX
2093 default: assert(0); break;
2094 }
2095}
2096
Yaowu Xuf883b422016-08-30 14:01:10 -07002097void av1_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
2098 int stride, int eob, int bd,
2099 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002100 (void)eob;
2101 switch (tx_type) {
2102 case DCT_DCT:
Yaowu Xuf883b422016-08-30 14:01:10 -07002103 av1_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
2104 DCT_DCT, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002105 break;
2106#if CONFIG_EXT_TX
2107 case ADST_DCT:
2108 case DCT_ADST:
2109 case ADST_ADST:
2110 case FLIPADST_DCT:
2111 case DCT_FLIPADST:
2112 case FLIPADST_FLIPADST:
2113 case ADST_FLIPADST:
2114 case FLIPADST_ADST:
2115 case V_DCT:
2116 case H_DCT:
2117 case V_ADST:
2118 case H_ADST:
2119 case V_FLIPADST:
2120 case H_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07002121 av1_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002122 break;
2123 case IDTX:
2124 highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd);
2125 break;
2126#endif // CONFIG_EXT_TX
2127 default: assert(0); break;
2128 }
2129}
Yaowu Xuf883b422016-08-30 14:01:10 -07002130#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002131
2132void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
2133 INV_TXFM_PARAM *inv_txfm_param) {
2134 const TX_TYPE tx_type = inv_txfm_param->tx_type;
2135 const TX_SIZE tx_size = inv_txfm_param->tx_size;
2136 const int eob = inv_txfm_param->eob;
2137 const int lossless = inv_txfm_param->lossless;
2138
2139 switch (tx_size) {
2140 case TX_32X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07002141 av1_inv_txfm_add_32x32(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002142 break;
2143 case TX_16X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002144 av1_inv_txfm_add_16x16(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002145 break;
Yaowu Xuf883b422016-08-30 14:01:10 -07002146 case TX_8X8: av1_inv_txfm_add_8x8(input, dest, stride, eob, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002147#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -07002148 case TX_4X8: av1_inv_txfm_add_4x8(input, dest, stride, eob, tx_type); break;
2149 case TX_8X4: av1_inv_txfm_add_8x4(input, dest, stride, eob, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002150 case TX_8X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002151 av1_inv_txfm_add_8x16(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002152 break;
2153 case TX_16X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07002154 av1_inv_txfm_add_16x8(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002155 break;
2156 case TX_16X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07002157 av1_inv_txfm_add_16x32(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002158 break;
2159 case TX_32X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002160 av1_inv_txfm_add_32x16(input, dest, stride, eob, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002161 break;
2162#endif // CONFIG_EXT_TX
2163 case TX_4X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07002164 // this is like av1_short_idct4x4 but has a special case around eob<=1
Yaowu Xuc27fc142016-08-22 16:08:15 -07002165 // which is significant (not just an optimization) for the lossless
2166 // case.
Yaowu Xuf883b422016-08-30 14:01:10 -07002167 av1_inv_txfm_add_4x4(input, dest, stride, eob, tx_type, lossless);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002168 break;
2169 default: assert(0 && "Invalid transform size"); break;
2170 }
2171}
2172
Yaowu Xuf883b422016-08-30 14:01:10 -07002173#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002174void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
2175 INV_TXFM_PARAM *inv_txfm_param) {
2176 const TX_TYPE tx_type = inv_txfm_param->tx_type;
2177 const TX_SIZE tx_size = inv_txfm_param->tx_size;
2178 const int eob = inv_txfm_param->eob;
2179 const int bd = inv_txfm_param->bd;
2180 const int lossless = inv_txfm_param->lossless;
2181
2182 switch (tx_size) {
2183 case TX_32X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07002184 av1_highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002185 break;
2186 case TX_16X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002187 av1_highbd_inv_txfm_add_16x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002188 break;
2189 case TX_8X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07002190 av1_highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002191 break;
2192#if CONFIG_EXT_TX
2193 case TX_4X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07002194 av1_highbd_inv_txfm_add_4x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002195 break;
2196 case TX_8X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07002197 av1_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002198 break;
2199 case TX_8X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002200 av1_highbd_inv_txfm_add_8x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002201 break;
2202 case TX_16X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07002203 av1_highbd_inv_txfm_add_16x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002204 break;
2205 case TX_16X32:
Yaowu Xuf883b422016-08-30 14:01:10 -07002206 av1_highbd_inv_txfm_add_16x32(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002207 break;
2208 case TX_32X16:
Yaowu Xuf883b422016-08-30 14:01:10 -07002209 av1_highbd_inv_txfm_add_32x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002210 break;
2211#endif // CONFIG_EXT_TX
2212 case TX_4X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07002213 // this is like av1_short_idct4x4 but has a special case around eob<=1
Yaowu Xuc27fc142016-08-22 16:08:15 -07002214 // which is significant (not just an optimization) for the lossless
2215 // case.
Yaowu Xuf883b422016-08-30 14:01:10 -07002216 av1_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type,
2217 lossless);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002218 break;
2219 default: assert(0 && "Invalid transform size"); break;
2220 }
2221}
Yaowu Xuf883b422016-08-30 14:01:10 -07002222#endif // CONFIG_AOM_HIGHBITDEPTH