blob: 6500d30a3cdfdeee25bc7eeb5b06c586ade7574b [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <math.h>
13
Yaowu Xuf883b422016-08-30 14:01:10 -070014#include "./aom_dsp_rtcd.h"
Geza Lorea1ddae52016-09-02 09:51:34 +010015#include "./av1_rtcd.h"
16#include "aom_dsp/inv_txfm.h"
17#include "aom_ports/mem.h"
Sarah Parkereec47e62017-05-15 20:49:22 -070018#include "av1/common/av1_inv_txfm1d_cfg.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070019#include "av1/common/blockd.h"
20#include "av1/common/enums.h"
21#include "av1/common/idct.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070022
Jingning Hanff705452017-04-27 11:32:15 -070023int av1_get_tx_scale(const TX_SIZE tx_size) {
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -070024 if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
25#if CONFIG_TX64X64
26 else if (txsize_sqr_up_map[tx_size] == TX_64X64)
27 return 2;
28#endif // CONFIG_TX64X64
29 else
30 return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070031}
32
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070033// NOTE: The implementation of all inverses need to be aware of the fact
34// that input and output could be the same buffer.
35
Yaowu Xuc27fc142016-08-22 16:08:15 -070036#if CONFIG_EXT_TX
37static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
38 int i;
39 for (i = 0; i < 4; ++i)
40 output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
41}
42
43static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
44 int i;
45 for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
46}
47
48static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
49 int i;
50 for (i = 0; i < 16; ++i)
51 output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
52}
53
54static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
55 int i;
56 for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
57}
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -070058
59#if CONFIG_TX64X64
60static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
61 int i;
62 for (i = 0; i < 64; ++i)
63 output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
64}
65#endif // CONFIG_TX64X64
Jingning Hanec419e02016-11-01 18:19:30 -070066#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -070067
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070068// For use in lieu of ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -070069static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
70 int i;
71 tran_low_t inputhalf[16];
Yaowu Xuc27fc142016-08-22 16:08:15 -070072 // Multiply input by sqrt(2)
73 for (i = 0; i < 16; ++i) {
74 inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
75 }
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070076 for (i = 0; i < 16; ++i) {
77 output[i] = input[16 + i] * 4;
78 }
Luca Barbatof0f98572016-09-03 12:14:15 +020079 aom_idct16_c(inputhalf, output + 16);
Yaowu Xuc27fc142016-08-22 16:08:15 -070080 // Note overall scaling factor is 4 times orthogonal
81}
82
Debargha Mukherjee67d13472016-11-01 14:37:39 -070083#if CONFIG_TX64X64
84static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
85 int32_t in[64], out[64];
86 int i;
87 for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
Sarah Parkereec47e62017-05-15 20:49:22 -070088 av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
Debargha Mukherjee67d13472016-11-01 14:37:39 -070089 for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
90}
91
92static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
93 int32_t in[64], out[64];
94 int i;
95 for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
Sarah Parkereec47e62017-05-15 20:49:22 -070096 av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
Debargha Mukherjee67d13472016-11-01 14:37:39 -070097 for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
98}
99
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700100// For use in lieu of ADST
101static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
102 int i;
103 tran_low_t inputhalf[32];
104 // Multiply input by sqrt(2)
105 for (i = 0; i < 32; ++i) {
106 inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
107 }
108 for (i = 0; i < 32; ++i) {
109 output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
110 }
111 aom_idct32_c(inputhalf, output + 32);
112 // Note overall scaling factor is 4 * sqrt(2) times orthogonal
113}
114#endif // CONFIG_TX64X64
115
Yaowu Xuc27fc142016-08-22 16:08:15 -0700116// Inverse identity transform and add.
Jingning Hanec419e02016-11-01 18:19:30 -0700117#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700118static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
119 int bs, int tx_type) {
120 int r, c;
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -0700121 const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700122 if (tx_type == IDTX) {
123 for (r = 0; r < bs; ++r) {
124 for (c = 0; c < bs; ++c)
125 dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
126 dest += stride;
127 input += bs;
128 }
129 }
130}
Jingning Hanec419e02016-11-01 18:19:30 -0700131#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700132
133#define FLIPUD_PTR(dest, stride, size) \
134 do { \
135 (dest) = (dest) + ((size)-1) * (stride); \
136 (stride) = -(stride); \
137 } while (0)
138
Jingning Hanec419e02016-11-01 18:19:30 -0700139#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700140static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
141 int *sstride, int tx_type, int sizey,
142 int sizex) {
143 // Note that the transpose of src will be added to dst. In order to LR
144 // flip the addends (in dst coordinates), we UD flip the src. To UD flip
145 // the addends, we UD flip the dst.
146 switch (tx_type) {
147 case DCT_DCT:
148 case ADST_DCT:
149 case DCT_ADST:
150 case ADST_ADST:
151 case IDTX:
152 case V_DCT:
153 case H_DCT:
154 case V_ADST:
155 case H_ADST: break;
156 case FLIPADST_DCT:
157 case FLIPADST_ADST:
158 case V_FLIPADST:
159 // flip UD
160 FLIPUD_PTR(*dst, *dstride, sizey);
161 break;
162 case DCT_FLIPADST:
163 case ADST_FLIPADST:
164 case H_FLIPADST:
165 // flip LR
166 FLIPUD_PTR(*src, *sstride, sizex);
167 break;
168 case FLIPADST_FLIPADST:
169 // flip UD
170 FLIPUD_PTR(*dst, *dstride, sizey);
171 // flip LR
172 FLIPUD_PTR(*src, *sstride, sizex);
173 break;
174 default: assert(0); break;
175 }
176}
Jingning Hanec419e02016-11-01 18:19:30 -0700177#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700178
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200179#if CONFIG_HIGHBITDEPTH
Sarah Parker31c66502017-05-19 16:51:07 -0700180#if CONFIG_EXT_TX && CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -0700181static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
182 int stride, int bs, int tx_type, int bd) {
183 int r, c;
184 const int shift = bs < 32 ? 3 : 2;
185 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
186
187 if (tx_type == IDTX) {
188 for (r = 0; r < bs; ++r) {
189 for (c = 0; c < bs; ++c)
190 dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
191 dest += stride;
192 input += bs;
193 }
194 }
195}
Sarah Parker31c66502017-05-19 16:51:07 -0700196#endif // CONFIG_EXT_TX && CONFIG_TX64X64
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200197#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700198
Yaowu Xuf883b422016-08-30 14:01:10 -0700199void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
200 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700201 static const transform_2d IHT_4[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200202 { aom_idct4_c, aom_idct4_c }, // DCT_DCT = 0
203 { aom_iadst4_c, aom_idct4_c }, // ADST_DCT = 1
204 { aom_idct4_c, aom_iadst4_c }, // DCT_ADST = 2
205 { aom_iadst4_c, aom_iadst4_c }, // ADST_ADST = 3
Yaowu Xuc27fc142016-08-22 16:08:15 -0700206#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200207 { aom_iadst4_c, aom_idct4_c }, // FLIPADST_DCT
208 { aom_idct4_c, aom_iadst4_c }, // DCT_FLIPADST
209 { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_FLIPADST
210 { aom_iadst4_c, aom_iadst4_c }, // ADST_FLIPADST
211 { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_ADST
212 { iidtx4_c, iidtx4_c }, // IDTX
213 { aom_idct4_c, iidtx4_c }, // V_DCT
214 { iidtx4_c, aom_idct4_c }, // H_DCT
215 { aom_iadst4_c, iidtx4_c }, // V_ADST
216 { iidtx4_c, aom_iadst4_c }, // H_ADST
217 { aom_iadst4_c, iidtx4_c }, // V_FLIPADST
218 { iidtx4_c, aom_iadst4_c }, // H_FLIPADST
219#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700220 };
221
222 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100223 tran_low_t tmp[4][4];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700224 tran_low_t out[4][4];
225 tran_low_t *outp = &out[0][0];
226 int outstride = 4;
227
228 // inverse transform row vectors
229 for (i = 0; i < 4; ++i) {
230 IHT_4[tx_type].rows(input, out[i]);
231 input += 4;
232 }
233
234 // transpose
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100235 for (i = 0; i < 4; i++) {
236 for (j = 0; j < 4; j++) {
237 tmp[j][i] = out[i][j];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700238 }
239 }
240
241 // inverse transform column vectors
242 for (i = 0; i < 4; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100243 IHT_4[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700244 }
245
246#if CONFIG_EXT_TX
247 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
248#endif
249
250 // Sum with the destination
251 for (i = 0; i < 4; ++i) {
252 for (j = 0; j < 4; ++j) {
253 int d = i * stride + j;
254 int s = j * outstride + i;
255 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
256 }
257 }
258}
259
Yaowu Xuf883b422016-08-30 14:01:10 -0700260void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
261 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700262 static const transform_2d IHT_4x8[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200263 { aom_idct8_c, aom_idct4_c }, // DCT_DCT
264 { aom_iadst8_c, aom_idct4_c }, // ADST_DCT
265 { aom_idct8_c, aom_iadst4_c }, // DCT_ADST
266 { aom_iadst8_c, aom_iadst4_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700267#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200268 { aom_iadst8_c, aom_idct4_c }, // FLIPADST_DCT
269 { aom_idct8_c, aom_iadst4_c }, // DCT_FLIPADST
270 { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_FLIPADST
271 { aom_iadst8_c, aom_iadst4_c }, // ADST_FLIPADST
272 { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_ADST
273 { iidtx8_c, iidtx4_c }, // IDTX
274 { aom_idct8_c, iidtx4_c }, // V_DCT
275 { iidtx8_c, aom_idct4_c }, // H_DCT
276 { aom_iadst8_c, iidtx4_c }, // V_ADST
277 { iidtx8_c, aom_iadst4_c }, // H_ADST
278 { aom_iadst8_c, iidtx4_c }, // V_FLIPADST
279 { iidtx8_c, aom_iadst4_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700280#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700281 };
282
283 const int n = 4;
284 const int n2 = 8;
285 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100286 tran_low_t out[4][8], tmp[4][8], outtmp[4];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700287 tran_low_t *outp = &out[0][0];
288 int outstride = n2;
289
290 // inverse transform row vectors and transpose
291 for (i = 0; i < n2; ++i) {
292 IHT_4x8[tx_type].rows(input, outtmp);
293 for (j = 0; j < n; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100294 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700295 input += n;
296 }
297
298 // inverse transform column vectors
299 for (i = 0; i < n; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100300 IHT_4x8[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700301 }
302
Jingning Hanec419e02016-11-01 18:19:30 -0700303#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700304 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Hanec419e02016-11-01 18:19:30 -0700305#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700306
307 // Sum with the destination
308 for (i = 0; i < n2; ++i) {
309 for (j = 0; j < n; ++j) {
310 int d = i * stride + j;
311 int s = j * outstride + i;
312 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
313 }
314 }
315}
316
Yaowu Xuf883b422016-08-30 14:01:10 -0700317void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
318 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700319 static const transform_2d IHT_8x4[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200320 { aom_idct4_c, aom_idct8_c }, // DCT_DCT
321 { aom_iadst4_c, aom_idct8_c }, // ADST_DCT
322 { aom_idct4_c, aom_iadst8_c }, // DCT_ADST
323 { aom_iadst4_c, aom_iadst8_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700324#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200325 { aom_iadst4_c, aom_idct8_c }, // FLIPADST_DCT
326 { aom_idct4_c, aom_iadst8_c }, // DCT_FLIPADST
327 { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_FLIPADST
328 { aom_iadst4_c, aom_iadst8_c }, // ADST_FLIPADST
329 { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_ADST
330 { iidtx4_c, iidtx8_c }, // IDTX
331 { aom_idct4_c, iidtx8_c }, // V_DCT
332 { iidtx4_c, aom_idct8_c }, // H_DCT
333 { aom_iadst4_c, iidtx8_c }, // V_ADST
334 { iidtx4_c, aom_iadst8_c }, // H_ADST
335 { aom_iadst4_c, iidtx8_c }, // V_FLIPADST
336 { iidtx4_c, aom_iadst8_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700337#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700338 };
339 const int n = 4;
340 const int n2 = 8;
341
342 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100343 tran_low_t out[8][4], tmp[8][4], outtmp[8];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700344 tran_low_t *outp = &out[0][0];
345 int outstride = n;
346
347 // inverse transform row vectors and transpose
348 for (i = 0; i < n; ++i) {
349 IHT_8x4[tx_type].rows(input, outtmp);
350 for (j = 0; j < n2; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100351 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700352 input += n2;
353 }
354
355 // inverse transform column vectors
356 for (i = 0; i < n2; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100357 IHT_8x4[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700358 }
359
Jingning Hanec419e02016-11-01 18:19:30 -0700360#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700361 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Hanec419e02016-11-01 18:19:30 -0700362#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700363
364 // Sum with the destination
365 for (i = 0; i < n; ++i) {
366 for (j = 0; j < n2; ++j) {
367 int d = i * stride + j;
368 int s = j * outstride + i;
369 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
370 }
371 }
372}
373
Debargha Mukherjee751de382016-12-13 02:54:22 -0800374void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
375 int tx_type) {
376 static const transform_2d IHT_4x16[] = {
377 { aom_idct16_c, aom_idct4_c }, // DCT_DCT
378 { aom_iadst16_c, aom_idct4_c }, // ADST_DCT
379 { aom_idct16_c, aom_iadst4_c }, // DCT_ADST
380 { aom_iadst16_c, aom_iadst4_c }, // ADST_ADST
381#if CONFIG_EXT_TX
382 { aom_iadst16_c, aom_idct4_c }, // FLIPADST_DCT
383 { aom_idct16_c, aom_iadst4_c }, // DCT_FLIPADST
384 { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_FLIPADST
385 { aom_iadst16_c, aom_iadst4_c }, // ADST_FLIPADST
386 { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_ADST
387 { iidtx16_c, iidtx4_c }, // IDTX
388 { aom_idct16_c, iidtx4_c }, // V_DCT
389 { iidtx16_c, aom_idct4_c }, // H_DCT
390 { aom_iadst16_c, iidtx4_c }, // V_ADST
391 { iidtx16_c, aom_iadst4_c }, // H_ADST
392 { aom_iadst16_c, iidtx4_c }, // V_FLIPADST
393 { iidtx16_c, aom_iadst4_c }, // H_FLIPADST
394#endif
395 };
396
397 const int n = 4;
398 const int n4 = 16;
399 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100400 tran_low_t out[4][16], tmp[4][16], outtmp[4];
Debargha Mukherjee751de382016-12-13 02:54:22 -0800401 tran_low_t *outp = &out[0][0];
402 int outstride = n4;
403
404 // inverse transform row vectors and transpose
405 for (i = 0; i < n4; ++i) {
406 IHT_4x16[tx_type].rows(input, outtmp);
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100407 for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
Debargha Mukherjee751de382016-12-13 02:54:22 -0800408 input += n;
409 }
410
411 // inverse transform column vectors
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100412 for (i = 0; i < n; ++i) IHT_4x16[tx_type].cols(tmp[i], out[i]);
Debargha Mukherjee751de382016-12-13 02:54:22 -0800413
414#if CONFIG_EXT_TX
415 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
416#endif
417
418 // Sum with the destination
419 for (i = 0; i < n4; ++i) {
420 for (j = 0; j < n; ++j) {
421 int d = i * stride + j;
422 int s = j * outstride + i;
423 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
424 }
425 }
426}
427
428void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
429 int tx_type) {
430 static const transform_2d IHT_16x4[] = {
431 { aom_idct4_c, aom_idct16_c }, // DCT_DCT
432 { aom_iadst4_c, aom_idct16_c }, // ADST_DCT
433 { aom_idct4_c, aom_iadst16_c }, // DCT_ADST
434 { aom_iadst4_c, aom_iadst16_c }, // ADST_ADST
435#if CONFIG_EXT_TX
436 { aom_iadst4_c, aom_idct16_c }, // FLIPADST_DCT
437 { aom_idct4_c, aom_iadst16_c }, // DCT_FLIPADST
438 { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_FLIPADST
439 { aom_iadst4_c, aom_iadst16_c }, // ADST_FLIPADST
440 { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_ADST
441 { iidtx4_c, iidtx16_c }, // IDTX
442 { aom_idct4_c, iidtx16_c }, // V_DCT
443 { iidtx4_c, aom_idct16_c }, // H_DCT
444 { aom_iadst4_c, iidtx16_c }, // V_ADST
445 { iidtx4_c, aom_iadst16_c }, // H_ADST
446 { aom_iadst4_c, iidtx16_c }, // V_FLIPADST
447 { iidtx4_c, aom_iadst16_c }, // H_FLIPADST
448#endif
449 };
450 const int n = 4;
451 const int n4 = 16;
452
453 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100454 tran_low_t out[16][4], tmp[16][4], outtmp[16];
Debargha Mukherjee751de382016-12-13 02:54:22 -0800455 tran_low_t *outp = &out[0][0];
456 int outstride = n;
457
458 // inverse transform row vectors and transpose
459 for (i = 0; i < n; ++i) {
460 IHT_16x4[tx_type].rows(input, outtmp);
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100461 for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
Debargha Mukherjee751de382016-12-13 02:54:22 -0800462 input += n4;
463 }
464
465 // inverse transform column vectors
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100466 for (i = 0; i < n4; ++i) IHT_16x4[tx_type].cols(tmp[i], out[i]);
Debargha Mukherjee751de382016-12-13 02:54:22 -0800467
468#if CONFIG_EXT_TX
469 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
470#endif
471
472 // Sum with the destination
473 for (i = 0; i < n; ++i) {
474 for (j = 0; j < n4; ++j) {
475 int d = i * stride + j;
476 int s = j * outstride + i;
477 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
478 }
479 }
480}
481
Yaowu Xuf883b422016-08-30 14:01:10 -0700482void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
483 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700484 static const transform_2d IHT_8x16[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200485 { aom_idct16_c, aom_idct8_c }, // DCT_DCT
486 { aom_iadst16_c, aom_idct8_c }, // ADST_DCT
487 { aom_idct16_c, aom_iadst8_c }, // DCT_ADST
488 { aom_iadst16_c, aom_iadst8_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700489#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200490 { aom_iadst16_c, aom_idct8_c }, // FLIPADST_DCT
491 { aom_idct16_c, aom_iadst8_c }, // DCT_FLIPADST
492 { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_FLIPADST
493 { aom_iadst16_c, aom_iadst8_c }, // ADST_FLIPADST
494 { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_ADST
495 { iidtx16_c, iidtx8_c }, // IDTX
496 { aom_idct16_c, iidtx8_c }, // V_DCT
497 { iidtx16_c, aom_idct8_c }, // H_DCT
498 { aom_iadst16_c, iidtx8_c }, // V_ADST
499 { iidtx16_c, aom_iadst8_c }, // H_ADST
500 { aom_iadst16_c, iidtx8_c }, // V_FLIPADST
501 { iidtx16_c, aom_iadst8_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700502#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700503 };
504
505 const int n = 8;
506 const int n2 = 16;
507 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100508 tran_low_t out[8][16], tmp[8][16], outtmp[8];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700509 tran_low_t *outp = &out[0][0];
510 int outstride = n2;
511
512 // inverse transform row vectors and transpose
513 for (i = 0; i < n2; ++i) {
514 IHT_8x16[tx_type].rows(input, outtmp);
515 for (j = 0; j < n; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100516 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700517 input += n;
518 }
519
520 // inverse transform column vectors
521 for (i = 0; i < n; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100522 IHT_8x16[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700523 }
524
Jingning Hanec419e02016-11-01 18:19:30 -0700525#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700526 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Hanec419e02016-11-01 18:19:30 -0700527#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700528
529 // Sum with the destination
530 for (i = 0; i < n2; ++i) {
531 for (j = 0; j < n; ++j) {
532 int d = i * stride + j;
533 int s = j * outstride + i;
534 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
535 }
536 }
537}
538
Yaowu Xuf883b422016-08-30 14:01:10 -0700539void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
540 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700541 static const transform_2d IHT_16x8[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200542 { aom_idct8_c, aom_idct16_c }, // DCT_DCT
543 { aom_iadst8_c, aom_idct16_c }, // ADST_DCT
544 { aom_idct8_c, aom_iadst16_c }, // DCT_ADST
545 { aom_iadst8_c, aom_iadst16_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700546#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200547 { aom_iadst8_c, aom_idct16_c }, // FLIPADST_DCT
548 { aom_idct8_c, aom_iadst16_c }, // DCT_FLIPADST
549 { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_FLIPADST
550 { aom_iadst8_c, aom_iadst16_c }, // ADST_FLIPADST
551 { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_ADST
552 { iidtx8_c, iidtx16_c }, // IDTX
553 { aom_idct8_c, iidtx16_c }, // V_DCT
554 { iidtx8_c, aom_idct16_c }, // H_DCT
555 { aom_iadst8_c, iidtx16_c }, // V_ADST
556 { iidtx8_c, aom_iadst16_c }, // H_ADST
557 { aom_iadst8_c, iidtx16_c }, // V_FLIPADST
558 { iidtx8_c, aom_iadst16_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700559#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700560 };
561 const int n = 8;
562 const int n2 = 16;
563
564 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100565 tran_low_t out[16][8], tmp[16][8], outtmp[16];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700566 tran_low_t *outp = &out[0][0];
567 int outstride = n;
568
569 // inverse transform row vectors and transpose
570 for (i = 0; i < n; ++i) {
571 IHT_16x8[tx_type].rows(input, outtmp);
572 for (j = 0; j < n2; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100573 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700574 input += n2;
575 }
576
577 // inverse transform column vectors
578 for (i = 0; i < n2; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100579 IHT_16x8[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700580 }
581
Jingning Hanec419e02016-11-01 18:19:30 -0700582#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700583 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Hanec419e02016-11-01 18:19:30 -0700584#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700585
586 // Sum with the destination
587 for (i = 0; i < n; ++i) {
588 for (j = 0; j < n2; ++j) {
589 int d = i * stride + j;
590 int s = j * outstride + i;
591 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
592 }
593 }
594}
595
Debargha Mukherjee751de382016-12-13 02:54:22 -0800596void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
597 int tx_type) {
598 static const transform_2d IHT_8x32[] = {
599 { aom_idct32_c, aom_idct8_c }, // DCT_DCT
600 { ihalfright32_c, aom_idct8_c }, // ADST_DCT
601 { aom_idct32_c, aom_iadst8_c }, // DCT_ADST
602 { ihalfright32_c, aom_iadst8_c }, // ADST_ADST
603#if CONFIG_EXT_TX
604 { ihalfright32_c, aom_idct8_c }, // FLIPADST_DCT
605 { aom_idct32_c, aom_iadst8_c }, // DCT_FLIPADST
606 { ihalfright32_c, aom_iadst8_c }, // FLIPADST_FLIPADST
607 { ihalfright32_c, aom_iadst8_c }, // ADST_FLIPADST
608 { ihalfright32_c, aom_iadst8_c }, // FLIPADST_ADST
609 { iidtx32_c, iidtx8_c }, // IDTX
610 { aom_idct32_c, iidtx8_c }, // V_DCT
611 { iidtx32_c, aom_idct8_c }, // H_DCT
612 { ihalfright32_c, iidtx8_c }, // V_ADST
613 { iidtx32_c, aom_iadst8_c }, // H_ADST
614 { ihalfright32_c, iidtx8_c }, // V_FLIPADST
615 { iidtx32_c, aom_iadst8_c }, // H_FLIPADST
616#endif
617 };
618
619 const int n = 8;
620 const int n4 = 32;
621 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100622 tran_low_t out[8][32], tmp[8][32], outtmp[8];
Debargha Mukherjee751de382016-12-13 02:54:22 -0800623 tran_low_t *outp = &out[0][0];
624 int outstride = n4;
625
626 // inverse transform row vectors and transpose
627 for (i = 0; i < n4; ++i) {
628 IHT_8x32[tx_type].rows(input, outtmp);
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100629 for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
Debargha Mukherjee751de382016-12-13 02:54:22 -0800630 input += n;
631 }
632
633 // inverse transform column vectors
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100634 for (i = 0; i < n; ++i) IHT_8x32[tx_type].cols(tmp[i], out[i]);
Debargha Mukherjee751de382016-12-13 02:54:22 -0800635
636#if CONFIG_EXT_TX
637 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
638#endif
639
640 // Sum with the destination
641 for (i = 0; i < n4; ++i) {
642 for (j = 0; j < n; ++j) {
643 int d = i * stride + j;
644 int s = j * outstride + i;
645 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
646 }
647 }
648}
649
650void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
651 int tx_type) {
652 static const transform_2d IHT_32x8[] = {
653 { aom_idct8_c, aom_idct32_c }, // DCT_DCT
654 { aom_iadst8_c, aom_idct32_c }, // ADST_DCT
655 { aom_idct8_c, ihalfright32_c }, // DCT_ADST
656 { aom_iadst8_c, ihalfright32_c }, // ADST_ADST
657#if CONFIG_EXT_TX
658 { aom_iadst8_c, aom_idct32_c }, // FLIPADST_DCT
659 { aom_idct8_c, ihalfright32_c }, // DCT_FLIPADST
660 { aom_iadst8_c, ihalfright32_c }, // FLIPADST_FLIPADST
661 { aom_iadst8_c, ihalfright32_c }, // ADST_FLIPADST
662 { aom_iadst8_c, ihalfright32_c }, // FLIPADST_ADST
663 { iidtx8_c, iidtx32_c }, // IDTX
664 { aom_idct8_c, iidtx32_c }, // V_DCT
665 { iidtx8_c, aom_idct32_c }, // H_DCT
666 { aom_iadst8_c, iidtx32_c }, // V_ADST
667 { iidtx8_c, ihalfright32_c }, // H_ADST
668 { aom_iadst8_c, iidtx32_c }, // V_FLIPADST
669 { iidtx8_c, ihalfright32_c }, // H_FLIPADST
670#endif
671 };
672 const int n = 8;
673 const int n4 = 32;
674
675 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100676 tran_low_t out[32][8], tmp[32][8], outtmp[32];
Debargha Mukherjee751de382016-12-13 02:54:22 -0800677 tran_low_t *outp = &out[0][0];
678 int outstride = n;
679
680 // inverse transform row vectors and transpose
681 for (i = 0; i < n; ++i) {
682 IHT_32x8[tx_type].rows(input, outtmp);
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100683 for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
Debargha Mukherjee751de382016-12-13 02:54:22 -0800684 input += n4;
685 }
686
687 // inverse transform column vectors
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100688 for (i = 0; i < n4; ++i) IHT_32x8[tx_type].cols(tmp[i], out[i]);
Debargha Mukherjee751de382016-12-13 02:54:22 -0800689
690#if CONFIG_EXT_TX
691 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
692#endif
693
694 // Sum with the destination
695 for (i = 0; i < n; ++i) {
696 for (j = 0; j < n4; ++j) {
697 int d = i * stride + j;
698 int s = j * outstride + i;
699 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
700 }
701 }
702}
703
Yaowu Xuf883b422016-08-30 14:01:10 -0700704void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
705 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700706 static const transform_2d IHT_16x32[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200707 { aom_idct32_c, aom_idct16_c }, // DCT_DCT
708 { ihalfright32_c, aom_idct16_c }, // ADST_DCT
709 { aom_idct32_c, aom_iadst16_c }, // DCT_ADST
710 { ihalfright32_c, aom_iadst16_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700711#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200712 { ihalfright32_c, aom_idct16_c }, // FLIPADST_DCT
713 { aom_idct32_c, aom_iadst16_c }, // DCT_FLIPADST
714 { ihalfright32_c, aom_iadst16_c }, // FLIPADST_FLIPADST
715 { ihalfright32_c, aom_iadst16_c }, // ADST_FLIPADST
716 { ihalfright32_c, aom_iadst16_c }, // FLIPADST_ADST
717 { iidtx32_c, iidtx16_c }, // IDTX
718 { aom_idct32_c, iidtx16_c }, // V_DCT
719 { iidtx32_c, aom_idct16_c }, // H_DCT
720 { ihalfright32_c, iidtx16_c }, // V_ADST
721 { iidtx32_c, aom_iadst16_c }, // H_ADST
722 { ihalfright32_c, iidtx16_c }, // V_FLIPADST
723 { iidtx32_c, aom_iadst16_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700724#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700725 };
726
727 const int n = 16;
728 const int n2 = 32;
729 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100730 tran_low_t out[16][32], tmp[16][32], outtmp[16];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700731 tran_low_t *outp = &out[0][0];
732 int outstride = n2;
733
734 // inverse transform row vectors and transpose
735 for (i = 0; i < n2; ++i) {
736 IHT_16x32[tx_type].rows(input, outtmp);
737 for (j = 0; j < n; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100738 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700739 input += n;
740 }
741
742 // inverse transform column vectors
743 for (i = 0; i < n; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100744 IHT_16x32[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700745 }
746
Jingning Hanec419e02016-11-01 18:19:30 -0700747#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700748 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Hanec419e02016-11-01 18:19:30 -0700749#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700750
751 // Sum with the destination
752 for (i = 0; i < n2; ++i) {
753 for (j = 0; j < n; ++j) {
754 int d = i * stride + j;
755 int s = j * outstride + i;
756 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
757 }
758 }
759}
760
Yaowu Xuf883b422016-08-30 14:01:10 -0700761void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
762 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700763 static const transform_2d IHT_32x16[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200764 { aom_idct16_c, aom_idct32_c }, // DCT_DCT
765 { aom_iadst16_c, aom_idct32_c }, // ADST_DCT
766 { aom_idct16_c, ihalfright32_c }, // DCT_ADST
767 { aom_iadst16_c, ihalfright32_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700768#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200769 { aom_iadst16_c, aom_idct32_c }, // FLIPADST_DCT
770 { aom_idct16_c, ihalfright32_c }, // DCT_FLIPADST
771 { aom_iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
772 { aom_iadst16_c, ihalfright32_c }, // ADST_FLIPADST
773 { aom_iadst16_c, ihalfright32_c }, // FLIPADST_ADST
774 { iidtx16_c, iidtx32_c }, // IDTX
775 { aom_idct16_c, iidtx32_c }, // V_DCT
776 { iidtx16_c, aom_idct32_c }, // H_DCT
777 { aom_iadst16_c, iidtx32_c }, // V_ADST
778 { iidtx16_c, ihalfright32_c }, // H_ADST
779 { aom_iadst16_c, iidtx32_c }, // V_FLIPADST
780 { iidtx16_c, ihalfright32_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700781#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700782 };
783 const int n = 16;
784 const int n2 = 32;
785
786 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100787 tran_low_t out[32][16], tmp[32][16], outtmp[32];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700788 tran_low_t *outp = &out[0][0];
789 int outstride = n;
790
791 // inverse transform row vectors and transpose
792 for (i = 0; i < n; ++i) {
793 IHT_32x16[tx_type].rows(input, outtmp);
794 for (j = 0; j < n2; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100795 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700796 input += n2;
797 }
798
799 // inverse transform column vectors
800 for (i = 0; i < n2; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100801 IHT_32x16[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700802 }
803
Jingning Hanec419e02016-11-01 18:19:30 -0700804#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700805 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Hanec419e02016-11-01 18:19:30 -0700806#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700807
808 // Sum with the destination
809 for (i = 0; i < n; ++i) {
810 for (j = 0; j < n2; ++j) {
811 int d = i * stride + j;
812 int s = j * outstride + i;
813 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
814 }
815 }
816}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700817
Yaowu Xuf883b422016-08-30 14:01:10 -0700818void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
819 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700820 static const transform_2d IHT_8[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200821 { aom_idct8_c, aom_idct8_c }, // DCT_DCT = 0
822 { aom_iadst8_c, aom_idct8_c }, // ADST_DCT = 1
823 { aom_idct8_c, aom_iadst8_c }, // DCT_ADST = 2
824 { aom_iadst8_c, aom_iadst8_c }, // ADST_ADST = 3
Yaowu Xuc27fc142016-08-22 16:08:15 -0700825#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200826 { aom_iadst8_c, aom_idct8_c }, // FLIPADST_DCT
827 { aom_idct8_c, aom_iadst8_c }, // DCT_FLIPADST
828 { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_FLIPADST
829 { aom_iadst8_c, aom_iadst8_c }, // ADST_FLIPADST
830 { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_ADST
831 { iidtx8_c, iidtx8_c }, // IDTX
832 { aom_idct8_c, iidtx8_c }, // V_DCT
833 { iidtx8_c, aom_idct8_c }, // H_DCT
834 { aom_iadst8_c, iidtx8_c }, // V_ADST
835 { iidtx8_c, aom_iadst8_c }, // H_ADST
836 { aom_iadst8_c, iidtx8_c }, // V_FLIPADST
837 { iidtx8_c, aom_iadst8_c }, // H_FLIPADST
838#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700839 };
840
841 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100842 tran_low_t tmp[8][8];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700843 tran_low_t out[8][8];
844 tran_low_t *outp = &out[0][0];
845 int outstride = 8;
846
847 // inverse transform row vectors
848 for (i = 0; i < 8; ++i) {
849 IHT_8[tx_type].rows(input, out[i]);
850 input += 8;
851 }
852
853 // transpose
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100854 for (i = 0; i < 8; i++) {
855 for (j = 0; j < 8; j++) {
856 tmp[j][i] = out[i][j];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700857 }
858 }
859
860 // inverse transform column vectors
861 for (i = 0; i < 8; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100862 IHT_8[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700863 }
864
865#if CONFIG_EXT_TX
866 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
867#endif
868
869 // Sum with the destination
870 for (i = 0; i < 8; ++i) {
871 for (j = 0; j < 8; ++j) {
872 int d = i * stride + j;
873 int s = j * outstride + i;
874 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
875 }
876 }
877}
878
Yaowu Xuf883b422016-08-30 14:01:10 -0700879void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
880 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700881 static const transform_2d IHT_16[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200882 { aom_idct16_c, aom_idct16_c }, // DCT_DCT = 0
883 { aom_iadst16_c, aom_idct16_c }, // ADST_DCT = 1
884 { aom_idct16_c, aom_iadst16_c }, // DCT_ADST = 2
885 { aom_iadst16_c, aom_iadst16_c }, // ADST_ADST = 3
Yaowu Xuc27fc142016-08-22 16:08:15 -0700886#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200887 { aom_iadst16_c, aom_idct16_c }, // FLIPADST_DCT
888 { aom_idct16_c, aom_iadst16_c }, // DCT_FLIPADST
889 { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_FLIPADST
890 { aom_iadst16_c, aom_iadst16_c }, // ADST_FLIPADST
891 { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_ADST
892 { iidtx16_c, iidtx16_c }, // IDTX
893 { aom_idct16_c, iidtx16_c }, // V_DCT
894 { iidtx16_c, aom_idct16_c }, // H_DCT
895 { aom_iadst16_c, iidtx16_c }, // V_ADST
896 { iidtx16_c, aom_iadst16_c }, // H_ADST
897 { aom_iadst16_c, iidtx16_c }, // V_FLIPADST
898 { iidtx16_c, aom_iadst16_c }, // H_FLIPADST
899#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700900 };
901
902 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100903 tran_low_t tmp[16][16];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700904 tran_low_t out[16][16];
905 tran_low_t *outp = &out[0][0];
906 int outstride = 16;
907
908 // inverse transform row vectors
909 for (i = 0; i < 16; ++i) {
910 IHT_16[tx_type].rows(input, out[i]);
911 input += 16;
912 }
913
914 // transpose
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100915 for (i = 0; i < 16; i++) {
916 for (j = 0; j < 16; j++) {
917 tmp[j][i] = out[i][j];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700918 }
919 }
920
921 // inverse transform column vectors
922 for (i = 0; i < 16; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100923 IHT_16[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700924 }
925
926#if CONFIG_EXT_TX
927 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
928#endif
929
930 // Sum with the destination
931 for (i = 0; i < 16; ++i) {
932 for (j = 0; j < 16; ++j) {
933 int d = i * stride + j;
934 int s = j * outstride + i;
935 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
936 }
937 }
938}
939
940#if CONFIG_EXT_TX
Yaowu Xuf883b422016-08-30 14:01:10 -0700941void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
942 int tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700943 static const transform_2d IHT_32[] = {
Luca Barbatof0f98572016-09-03 12:14:15 +0200944 { aom_idct32_c, aom_idct32_c }, // DCT_DCT
945 { ihalfright32_c, aom_idct32_c }, // ADST_DCT
946 { aom_idct32_c, ihalfright32_c }, // DCT_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -0700947 { ihalfright32_c, ihalfright32_c }, // ADST_ADST
Luca Barbatof0f98572016-09-03 12:14:15 +0200948 { ihalfright32_c, aom_idct32_c }, // FLIPADST_DCT
949 { aom_idct32_c, ihalfright32_c }, // DCT_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -0700950 { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
951 { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
952 { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
953 { iidtx32_c, iidtx32_c }, // IDTX
Luca Barbatof0f98572016-09-03 12:14:15 +0200954 { aom_idct32_c, iidtx32_c }, // V_DCT
955 { iidtx32_c, aom_idct32_c }, // H_DCT
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700956 { ihalfright32_c, iidtx32_c }, // V_ADST
957 { iidtx32_c, ihalfright32_c }, // H_ADST
958 { ihalfright32_c, iidtx32_c }, // V_FLIPADST
959 { iidtx32_c, ihalfright32_c }, // H_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -0700960 };
961
962 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100963 tran_low_t tmp[32][32];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700964 tran_low_t out[32][32];
965 tran_low_t *outp = &out[0][0];
966 int outstride = 32;
967
968 // inverse transform row vectors
969 for (i = 0; i < 32; ++i) {
970 IHT_32[tx_type].rows(input, out[i]);
971 input += 32;
972 }
973
974 // transpose
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100975 for (i = 0; i < 32; i++) {
976 for (j = 0; j < 32; j++) {
977 tmp[j][i] = out[i][j];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700978 }
979 }
980
981 // inverse transform column vectors
982 for (i = 0; i < 32; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100983 IHT_32[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700984 }
985
986 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
987
988 // Sum with the destination
989 for (i = 0; i < 32; ++i) {
990 for (j = 0; j < 32; ++j) {
991 int d = i * stride + j;
992 int s = j * outstride + i;
993 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
994 }
995 }
996}
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -0700997#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700998
999#if CONFIG_TX64X64
1000void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1001 int tx_type) {
1002 static const transform_2d IHT_64[] = {
1003 { idct64_col_c, idct64_row_c }, // DCT_DCT
1004 { ihalfright64_c, idct64_row_c }, // ADST_DCT
1005 { idct64_col_c, ihalfright64_c }, // DCT_ADST
1006 { ihalfright64_c, ihalfright64_c }, // ADST_ADST
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001007#if CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001008 { ihalfright64_c, idct64_row_c }, // FLIPADST_DCT
1009 { idct64_col_c, ihalfright64_c }, // DCT_FLIPADST
1010 { ihalfright64_c, ihalfright64_c }, // FLIPADST_FLIPADST
1011 { ihalfright64_c, ihalfright64_c }, // ADST_FLIPADST
1012 { ihalfright64_c, ihalfright64_c }, // FLIPADST_ADST
1013 { iidtx64_c, iidtx64_c }, // IDTX
1014 { idct64_col_c, iidtx64_c }, // V_DCT
1015 { iidtx64_c, idct64_row_c }, // H_DCT
1016 { ihalfright64_c, iidtx64_c }, // V_ADST
1017 { iidtx64_c, ihalfright64_c }, // H_ADST
1018 { ihalfright64_c, iidtx64_c }, // V_FLIPADST
1019 { iidtx64_c, ihalfright64_c }, // H_FLIPADST
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001020#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001021 };
1022
1023 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001024 tran_low_t tmp[64][64];
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001025 tran_low_t out[64][64];
1026 tran_low_t *outp = &out[0][0];
1027 int outstride = 64;
1028
1029 // inverse transform row vectors
1030 for (i = 0; i < 64; ++i) {
1031 IHT_64[tx_type].rows(input, out[i]);
1032 for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
1033 input += 64;
1034 }
1035
1036 // transpose
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001037 for (i = 0; i < 64; i++) {
1038 for (j = 0; j < 64; j++) {
1039 tmp[j][i] = out[i][j];
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001040 }
1041 }
1042
1043 // inverse transform column vectors
1044 for (i = 0; i < 64; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001045 IHT_64[tx_type].cols(tmp[i], out[i]);
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001046 }
1047
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001048#if CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001049 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001050#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07001051
1052 // Sum with the destination
1053 for (i = 0; i < 64; ++i) {
1054 for (j = 0; j < 64; ++j) {
1055 int d = i * stride + j;
1056 int s = j * outstride + i;
1057 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1058 }
1059 }
1060}
1061#endif // CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -07001062
1063// idct
Yaowu Xuf883b422016-08-30 14:01:10 -07001064void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1065 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001066 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001067 aom_idct4x4_16_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001068 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001069 aom_idct4x4_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001070}
1071
Yaowu Xuf883b422016-08-30 14:01:10 -07001072void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1073 int eob) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001074 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001075 aom_iwht4x4_16_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001076 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001077 aom_iwht4x4_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001078}
1079
hui sua5315712017-03-20 11:37:15 -07001080static void idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
Yi Luo2ab63cb2017-05-11 16:44:22 -07001081 const INV_TXFM_PARAM *param) {
1082// If dc is 1, then input[0] is the reconstructed value, do not need
1083// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
Yaowu Xuc27fc142016-08-22 16:08:15 -07001084
Yi Luo2ab63cb2017-05-11 16:44:22 -07001085// The calculation can be simplified if there are not many non-zero dct
1086// coefficients. Use eobs to decide what to do.
1087// TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
1088// Combine that with code here.
1089#if CONFIG_ADAPT_SCAN
1090 const int16_t half = param->eob_threshold[0];
1091#else
1092 const int16_t half = 12;
1093#endif
1094
1095 const int eob = param->eob;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001096 if (eob == 1)
1097 // DC only DCT coefficient
Yaowu Xuf883b422016-08-30 14:01:10 -07001098 aom_idct8x8_1_add(input, dest, stride);
Yi Luo2ab63cb2017-05-11 16:44:22 -07001099 else if (eob <= half)
Yaowu Xuf883b422016-08-30 14:01:10 -07001100 aom_idct8x8_12_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001101 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001102 aom_idct8x8_64_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001103}
1104
hui sua5315712017-03-20 11:37:15 -07001105static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
Yi Luo2ab63cb2017-05-11 16:44:22 -07001106 const INV_TXFM_PARAM *param) {
1107// The calculation can be simplified if there are not many non-zero dct
1108// coefficients. Use eobs to separate different cases.
1109#if CONFIG_ADAPT_SCAN
1110 const int16_t half = param->eob_threshold[0];
1111 const int16_t quarter = param->eob_threshold[1];
1112#else
1113 const int16_t half = 38;
1114 const int16_t quarter = 10;
1115#endif
1116
1117 const int eob = param->eob;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001118 if (eob == 1) /* DC only DCT coefficient. */
Yaowu Xuf883b422016-08-30 14:01:10 -07001119 aom_idct16x16_1_add(input, dest, stride);
Yi Luo2ab63cb2017-05-11 16:44:22 -07001120 else if (eob <= quarter)
Yaowu Xuf883b422016-08-30 14:01:10 -07001121 aom_idct16x16_10_add(input, dest, stride);
Yi Luo2ab63cb2017-05-11 16:44:22 -07001122 else if (eob <= half)
Yi Luof6176ab2017-04-28 15:48:56 -07001123 aom_idct16x16_38_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001124 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001125 aom_idct16x16_256_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001126}
1127
hui sua5315712017-03-20 11:37:15 -07001128static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
Yi Luo2ab63cb2017-05-11 16:44:22 -07001129 const INV_TXFM_PARAM *param) {
1130#if CONFIG_ADAPT_SCAN
1131 const int16_t half = param->eob_threshold[0];
1132 const int16_t quarter = param->eob_threshold[1];
1133#else
1134 const int16_t half = 135;
1135 const int16_t quarter = 34;
1136#endif
1137
1138 const int eob = param->eob;
1139 if (eob == 1)
1140 aom_idct32x32_1_add(input, dest, stride);
1141 else if (eob <= quarter)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001142 // non-zero coeff only in upper-left 8x8
Yaowu Xuf883b422016-08-30 14:01:10 -07001143 aom_idct32x32_34_add(input, dest, stride);
Yi Luo2ab63cb2017-05-11 16:44:22 -07001144 else if (eob <= half)
Yi Luo40f22ef2017-05-08 16:29:39 -07001145 // non-zero coeff only in upper-left 16x16
1146 aom_idct32x32_135_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001147 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001148 aom_idct32x32_1024_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001149}
1150
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001151#if CONFIG_TX64X64
hui sua5315712017-03-20 11:37:15 -07001152static void idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
Yi Luo2ab63cb2017-05-11 16:44:22 -07001153 const INV_TXFM_PARAM *param) {
1154 (void)param;
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001155 av1_iht64x64_4096_add(input, dest, stride, DCT_DCT);
1156}
1157#endif // CONFIG_TX64X64
1158
Timothy B. Terriberryfe67ed62017-04-26 16:53:47 -07001159#if CONFIG_CHROMA_2X2
hui sua5315712017-03-20 11:37:15 -07001160static void inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest, int stride,
1161 int eob, TX_TYPE tx_type, int lossless) {
Jingning Hand7ec47f2016-11-29 16:34:40 -08001162 tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
1163 tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
1164 tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
1165 tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
1166
1167 tran_high_t a2 = a1 + c1;
1168 tran_high_t b2 = b1 + d1;
1169 tran_high_t c2 = a1 - c1;
1170 tran_high_t d2 = b1 - d1;
1171
1172 (void)tx_type;
1173 (void)lossless;
1174 (void)eob;
1175
Jingning Han46296612016-11-30 09:54:01 -08001176 a1 = (a2 + b2) >> 2;
1177 b1 = (a2 - b2) >> 2;
1178 c1 = (c2 + d2) >> 2;
1179 d1 = (c2 - d2) >> 2;
Jingning Hand7ec47f2016-11-29 16:34:40 -08001180
1181 dest[0] = clip_pixel_add(dest[0], WRAPLOW(a1));
1182 dest[1] = clip_pixel_add(dest[1], WRAPLOW(b1));
1183 dest[stride] = clip_pixel_add(dest[stride], WRAPLOW(c1));
1184 dest[stride + 1] = clip_pixel_add(dest[stride + 1], WRAPLOW(d1));
1185}
1186#endif
1187
Yi Luo2ab63cb2017-05-11 16:44:22 -07001188static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
1189 int eob, TX_TYPE tx_type, int lossless) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001190 if (lossless) {
1191 assert(tx_type == DCT_DCT);
Yaowu Xuf883b422016-08-30 14:01:10 -07001192 av1_iwht4x4_add(input, dest, stride, eob);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001193 return;
1194 }
1195
1196 switch (tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001197 case DCT_DCT: av1_idct4x4_add(input, dest, stride, eob); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001198 case ADST_DCT:
1199 case DCT_ADST:
Lester Luad8290b2017-06-12 18:26:18 -07001200 case ADST_ADST:
1201#if CONFIG_LGT
1202 // LGT only exists in C verson
1203 av1_iht4x4_16_add_c(input, dest, stride, tx_type);
1204 break;
1205#else
1206 av1_iht4x4_16_add(input, dest, stride, tx_type);
1207 break;
1208#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001209#if CONFIG_EXT_TX
1210 case FLIPADST_DCT:
1211 case DCT_FLIPADST:
1212 case FLIPADST_FLIPADST:
1213 case ADST_FLIPADST:
Lester Luad8290b2017-06-12 18:26:18 -07001214 case FLIPADST_ADST:
1215#if CONFIG_LGT
1216 av1_iht4x4_16_add_c(input, dest, stride, tx_type);
1217 break;
1218#else
1219 av1_iht4x4_16_add(input, dest, stride, tx_type);
1220 break;
1221#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001222 case V_DCT:
1223 case H_DCT:
1224 case V_ADST:
1225 case H_ADST:
1226 case V_FLIPADST:
1227 case H_FLIPADST:
1228 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001229 av1_iht4x4_16_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001230 break;
1231 case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break;
1232#endif // CONFIG_EXT_TX
1233 default: assert(0); break;
1234 }
1235}
1236
Yi Luo2ab63cb2017-05-11 16:44:22 -07001237static void inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
1238 int eob, TX_TYPE tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001239 (void)eob;
Lester Luad8290b2017-06-12 18:26:18 -07001240#if CONFIG_LGT
1241 av1_iht4x8_32_add_c(input, dest, stride, tx_type);
1242#else
Yaowu Xuf883b422016-08-30 14:01:10 -07001243 av1_iht4x8_32_add(input, dest, stride, tx_type);
Lester Luad8290b2017-06-12 18:26:18 -07001244#endif
Yaowu Xuf883b422016-08-30 14:01:10 -07001245}
1246
Yi Luo2ab63cb2017-05-11 16:44:22 -07001247static void inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
1248 int eob, TX_TYPE tx_type) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001249 (void)eob;
Lester Luad8290b2017-06-12 18:26:18 -07001250#if CONFIG_LGT
1251 av1_iht8x4_32_add_c(input, dest, stride, tx_type);
1252#else
Yaowu Xuf883b422016-08-30 14:01:10 -07001253 av1_iht8x4_32_add(input, dest, stride, tx_type);
Lester Luad8290b2017-06-12 18:26:18 -07001254#endif
Yaowu Xuf883b422016-08-30 14:01:10 -07001255}
1256
hui sua5315712017-03-20 11:37:15 -07001257// These will be used by the masked-tx experiment in the future.
Yue Chen56e226e2017-05-02 16:21:40 -07001258#if CONFIG_RECT_TX && CONFIG_EXT_TX && CONFIG_RECT_TX_EXT
hui sua5315712017-03-20 11:37:15 -07001259static void inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
1260 int stride, int eob, TX_TYPE tx_type) {
Debargha Mukherjee751de382016-12-13 02:54:22 -08001261 (void)eob;
Lester Luad8290b2017-06-12 18:26:18 -07001262#if CONFIG_LGT
1263 av1_iht4x16_64_add_c(input, dest, stride, tx_type);
1264#else
Debargha Mukherjee751de382016-12-13 02:54:22 -08001265 av1_iht4x16_64_add(input, dest, stride, tx_type);
Lester Luad8290b2017-06-12 18:26:18 -07001266#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08001267}
1268
hui sua5315712017-03-20 11:37:15 -07001269static void inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest,
1270 int stride, int eob, TX_TYPE tx_type) {
Debargha Mukherjee751de382016-12-13 02:54:22 -08001271 (void)eob;
Lester Luad8290b2017-06-12 18:26:18 -07001272#if CONFIG_LGT
1273 av1_iht16x4_64_add_c(input, dest, stride, tx_type);
1274#else
Debargha Mukherjee751de382016-12-13 02:54:22 -08001275 av1_iht16x4_64_add(input, dest, stride, tx_type);
Lester Luad8290b2017-06-12 18:26:18 -07001276#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08001277}
1278
hui sua5315712017-03-20 11:37:15 -07001279static void inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest,
1280 int stride, int eob, TX_TYPE tx_type) {
Debargha Mukherjee751de382016-12-13 02:54:22 -08001281 (void)eob;
Lester Luad8290b2017-06-12 18:26:18 -07001282#if CONFIG_LGT
1283 av1_iht8x32_256_add_c(input, dest, stride, tx_type);
1284#else
Debargha Mukherjee751de382016-12-13 02:54:22 -08001285 av1_iht8x32_256_add(input, dest, stride, tx_type);
Lester Luad8290b2017-06-12 18:26:18 -07001286#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08001287}
1288
hui sua5315712017-03-20 11:37:15 -07001289static void inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
1290 int stride, int eob, TX_TYPE tx_type) {
Debargha Mukherjee751de382016-12-13 02:54:22 -08001291 (void)eob;
Lester Luad8290b2017-06-12 18:26:18 -07001292#if CONFIG_LGT
1293 av1_iht32x8_256_add_c(input, dest, stride, tx_type);
1294#else
Debargha Mukherjee751de382016-12-13 02:54:22 -08001295 av1_iht32x8_256_add(input, dest, stride, tx_type);
Lester Luad8290b2017-06-12 18:26:18 -07001296#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08001297}
Yue Chen56e226e2017-05-02 16:21:40 -07001298#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08001299
hui sua5315712017-03-20 11:37:15 -07001300static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
1301 int stride, int eob, TX_TYPE tx_type) {
1302 (void)eob;
Lester Luad8290b2017-06-12 18:26:18 -07001303#if CONFIG_LGT
1304 av1_iht8x16_128_add_c(input, dest, stride, tx_type);
1305#else
hui sua5315712017-03-20 11:37:15 -07001306 av1_iht8x16_128_add(input, dest, stride, tx_type);
Lester Luad8290b2017-06-12 18:26:18 -07001307#endif
hui sua5315712017-03-20 11:37:15 -07001308}
1309
1310static void inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
1311 int stride, int eob, TX_TYPE tx_type) {
1312 (void)eob;
Lester Luad8290b2017-06-12 18:26:18 -07001313#if CONFIG_LGT
1314 av1_iht16x8_128_add_c(input, dest, stride, tx_type);
1315#else
hui sua5315712017-03-20 11:37:15 -07001316 av1_iht16x8_128_add(input, dest, stride, tx_type);
Lester Luad8290b2017-06-12 18:26:18 -07001317#endif
hui sua5315712017-03-20 11:37:15 -07001318}
1319
1320static void inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
1321 int stride, int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001322 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001323 av1_iht16x32_512_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001324}
1325
hui sua5315712017-03-20 11:37:15 -07001326static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
1327 int stride, int eob, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001328 (void)eob;
Yaowu Xuf883b422016-08-30 14:01:10 -07001329 av1_iht32x16_512_add(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001330}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001331
hui sua5315712017-03-20 11:37:15 -07001332static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
Yi Luo2ab63cb2017-05-11 16:44:22 -07001333 const INV_TXFM_PARAM *param) {
1334 const TX_TYPE tx_type = param->tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001335 switch (tx_type) {
Yi Luo2ab63cb2017-05-11 16:44:22 -07001336 case DCT_DCT: idct8x8_add(input, dest, stride, param); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001337 case ADST_DCT:
1338 case DCT_ADST:
Lester Luad8290b2017-06-12 18:26:18 -07001339 case ADST_ADST:
1340#if CONFIG_LGT
1341 av1_iht8x8_64_add_c(input, dest, stride, tx_type);
1342 break;
1343#else
1344 av1_iht8x8_64_add(input, dest, stride, tx_type);
1345 break;
1346#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001347#if CONFIG_EXT_TX
1348 case FLIPADST_DCT:
1349 case DCT_FLIPADST:
1350 case FLIPADST_FLIPADST:
1351 case ADST_FLIPADST:
Lester Luad8290b2017-06-12 18:26:18 -07001352 case FLIPADST_ADST:
1353#if CONFIG_LGT
1354 av1_iht8x8_64_add_c(input, dest, stride, tx_type);
1355 break;
1356#else
1357 av1_iht8x8_64_add(input, dest, stride, tx_type);
1358 break;
1359#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001360 case V_DCT:
1361 case H_DCT:
1362 case V_ADST:
1363 case H_ADST:
1364 case V_FLIPADST:
1365 case H_FLIPADST:
1366 // Use C version since DST only exists in C code
Yaowu Xuf883b422016-08-30 14:01:10 -07001367 av1_iht8x8_64_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001368 break;
1369 case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break;
1370#endif // CONFIG_EXT_TX
1371 default: assert(0); break;
1372 }
1373}
1374
hui sua5315712017-03-20 11:37:15 -07001375static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
Yi Luo2ab63cb2017-05-11 16:44:22 -07001376 int stride, const INV_TXFM_PARAM *param) {
1377 const TX_TYPE tx_type = param->tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001378 switch (tx_type) {
Yi Luo2ab63cb2017-05-11 16:44:22 -07001379 case DCT_DCT: idct16x16_add(input, dest, stride, param); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001380 case ADST_DCT:
1381 case DCT_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001382 case ADST_ADST: av1_iht16x16_256_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001383#if CONFIG_EXT_TX
1384 case FLIPADST_DCT:
1385 case DCT_FLIPADST:
1386 case FLIPADST_FLIPADST:
1387 case ADST_FLIPADST:
1388 case FLIPADST_ADST:
Yaowu Xuc27fc142016-08-22 16:08:15 -07001389 case V_DCT:
1390 case H_DCT:
1391 case V_ADST:
1392 case H_ADST:
1393 case V_FLIPADST:
Yi Luo73172002016-10-28 10:52:04 -07001394 case H_FLIPADST: av1_iht16x16_256_add(input, dest, stride, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001395 case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break;
1396#endif // CONFIG_EXT_TX
1397 default: assert(0); break;
1398 }
1399}
1400
hui sua5315712017-03-20 11:37:15 -07001401static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
Yi Luo2ab63cb2017-05-11 16:44:22 -07001402 int stride, const INV_TXFM_PARAM *param) {
1403 const TX_TYPE tx_type = param->tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001404 switch (tx_type) {
Yi Luo2ab63cb2017-05-11 16:44:22 -07001405 case DCT_DCT: idct32x32_add(input, dest, stride, param); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001406#if CONFIG_EXT_TX
1407 case ADST_DCT:
1408 case DCT_ADST:
1409 case ADST_ADST:
1410 case FLIPADST_DCT:
1411 case DCT_FLIPADST:
1412 case FLIPADST_FLIPADST:
1413 case ADST_FLIPADST:
1414 case FLIPADST_ADST:
1415 case V_DCT:
1416 case H_DCT:
1417 case V_ADST:
1418 case H_ADST:
1419 case V_FLIPADST:
1420 case H_FLIPADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001421 av1_iht32x32_1024_add_c(input, dest, stride, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001422 break;
1423 case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break;
1424#endif // CONFIG_EXT_TX
1425 default: assert(0); break;
1426 }
1427}
1428
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001429#if CONFIG_TX64X64
hui sua5315712017-03-20 11:37:15 -07001430static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
Yi Luo2ab63cb2017-05-11 16:44:22 -07001431 int stride, const INV_TXFM_PARAM *param) {
1432 const TX_TYPE tx_type = param->tx_type;
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001433 switch (tx_type) {
Yi Luo2ab63cb2017-05-11 16:44:22 -07001434 case DCT_DCT: idct64x64_add(input, dest, stride, param); break;
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001435#if CONFIG_EXT_TX
1436 case ADST_DCT:
1437 case DCT_ADST:
1438 case ADST_ADST:
1439 case FLIPADST_DCT:
1440 case DCT_FLIPADST:
1441 case FLIPADST_FLIPADST:
1442 case ADST_FLIPADST:
1443 case FLIPADST_ADST:
1444 case V_DCT:
1445 case H_DCT:
1446 case V_ADST:
1447 case H_ADST:
1448 case V_FLIPADST:
1449 case H_FLIPADST:
1450 av1_iht64x64_4096_add_c(input, dest, stride, tx_type);
1451 break;
1452 case IDTX: inv_idtx_add_c(input, dest, stride, 64, tx_type); break;
1453#endif // CONFIG_EXT_TX
1454 default: assert(0); break;
1455 }
1456}
1457#endif // CONFIG_TX64X64
1458
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001459#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001460// idct
Yaowu Xuf883b422016-08-30 14:01:10 -07001461void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1462 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001463 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001464 aom_highbd_idct4x4_16_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001465 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001466 aom_highbd_idct4x4_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001467}
1468
Yaowu Xuf883b422016-08-30 14:01:10 -07001469void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
1470 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001471 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001472 aom_highbd_iwht4x4_16_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001473 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001474 aom_highbd_iwht4x4_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001475}
1476
Timothy B. Terriberryfe67ed62017-04-26 16:53:47 -07001477#if CONFIG_CHROMA_2X2
hui sua5315712017-03-20 11:37:15 -07001478static void highbd_inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest,
1479 int stride, int eob, int bd,
1480 TX_TYPE tx_type, int lossless) {
Jingning Hanb1ed8d72016-12-19 10:21:14 -08001481 tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
1482 tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
1483 tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
1484 tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
1485
1486 tran_high_t a2 = a1 + c1;
1487 tran_high_t b2 = b1 + d1;
1488 tran_high_t c2 = a1 - c1;
1489 tran_high_t d2 = b1 - d1;
1490
1491 uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
1492
1493 (void)tx_type;
1494 (void)lossless;
1495 (void)eob;
1496
1497 a1 = (a2 + b2) >> 2;
1498 b1 = (a2 - b2) >> 2;
1499 c1 = (c2 + d2) >> 2;
1500 d1 = (c2 - d2) >> 2;
1501
1502 dst[0] = highbd_clip_pixel_add(dst[0], a1, bd);
1503 dst[1] = highbd_clip_pixel_add(dst[1], b1, bd);
1504 dst[stride] = highbd_clip_pixel_add(dst[stride], c1, bd);
1505 dst[stride + 1] = highbd_clip_pixel_add(dst[stride + 1], d1, bd);
1506}
1507#endif
1508
Yaowu Xuf883b422016-08-30 14:01:10 -07001509void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
1510 int stride, int eob, int bd, TX_TYPE tx_type,
1511 int lossless) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001512 if (lossless) {
1513 assert(tx_type == DCT_DCT);
Yaowu Xuf883b422016-08-30 14:01:10 -07001514 av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001515 return;
1516 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001517 switch (tx_type) {
1518 case DCT_DCT:
1519 case ADST_DCT:
1520 case DCT_ADST:
1521 case ADST_ADST:
Sarah Parker31c66502017-05-19 16:51:07 -07001522 av1_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1523 bd);
1524 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001525#if CONFIG_EXT_TX
1526 case FLIPADST_DCT:
1527 case DCT_FLIPADST:
1528 case FLIPADST_FLIPADST:
1529 case ADST_FLIPADST:
1530 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001531 av1_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1532 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001533 break;
Sarah Parker31c66502017-05-19 16:51:07 -07001534 // use the c version for anything including identity for now
Yaowu Xuc27fc142016-08-22 16:08:15 -07001535 case V_DCT:
1536 case H_DCT:
1537 case V_ADST:
1538 case H_ADST:
1539 case V_FLIPADST:
1540 case H_FLIPADST:
Yaowu Xuc27fc142016-08-22 16:08:15 -07001541 case IDTX:
Sarah Parker31c66502017-05-19 16:51:07 -07001542 av1_inv_txfm2d_add_4x4_c(input, CONVERT_TO_SHORTPTR(dest), stride,
1543 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001544 break;
1545#endif // CONFIG_EXT_TX
1546 default: assert(0); break;
1547 }
1548}
1549
Yaowu Xuf883b422016-08-30 14:01:10 -07001550void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
1551 int stride, int eob, int bd, TX_TYPE tx_type) {
1552 (void)eob;
Sarah Parker31c66502017-05-19 16:51:07 -07001553 av1_inv_txfm2d_add_4x8_c(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1554 bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07001555}
1556
1557void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
1558 int stride, int eob, int bd, TX_TYPE tx_type) {
1559 (void)eob;
Sarah Parker31c66502017-05-19 16:51:07 -07001560 av1_inv_txfm2d_add_8x4_c(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1561 bd);
Debargha Mukherjee751de382016-12-13 02:54:22 -08001562}
1563
hui sua5315712017-03-20 11:37:15 -07001564static void highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
1565 int stride, int eob, int bd,
1566 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001567 (void)eob;
Sarah Parker31c66502017-05-19 16:51:07 -07001568 av1_inv_txfm2d_add_8x16_c(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1569 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001570}
1571
hui sua5315712017-03-20 11:37:15 -07001572static void highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
1573 int stride, int eob, int bd,
1574 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001575 (void)eob;
Sarah Parker31c66502017-05-19 16:51:07 -07001576 av1_inv_txfm2d_add_16x8_c(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1577 bd);
Debargha Mukherjee751de382016-12-13 02:54:22 -08001578}
1579
hui sua5315712017-03-20 11:37:15 -07001580static void highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
1581 int stride, int eob, int bd,
1582 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001583 (void)eob;
Sarah Parker31c66502017-05-19 16:51:07 -07001584 av1_inv_txfm2d_add_16x32_c(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1585 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001586}
1587
hui sua5315712017-03-20 11:37:15 -07001588static void highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
1589 int stride, int eob, int bd,
1590 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001591 (void)eob;
Sarah Parker31c66502017-05-19 16:51:07 -07001592 av1_inv_txfm2d_add_32x16_c(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1593 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001594}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001595
hui sua5315712017-03-20 11:37:15 -07001596static void highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
1597 int stride, int eob, int bd,
1598 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001599 (void)eob;
1600 switch (tx_type) {
1601 case DCT_DCT:
1602 case ADST_DCT:
1603 case DCT_ADST:
1604 case ADST_ADST:
Sarah Parker31c66502017-05-19 16:51:07 -07001605 av1_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1606 bd);
1607 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001608#if CONFIG_EXT_TX
1609 case FLIPADST_DCT:
1610 case DCT_FLIPADST:
1611 case FLIPADST_FLIPADST:
1612 case ADST_FLIPADST:
1613 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001614 av1_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
1615 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001616 break;
Sarah Parker31c66502017-05-19 16:51:07 -07001617 // use the c version for anything including identity for now
Yaowu Xuc27fc142016-08-22 16:08:15 -07001618 case V_DCT:
1619 case H_DCT:
1620 case V_ADST:
1621 case H_ADST:
1622 case V_FLIPADST:
1623 case H_FLIPADST:
Yaowu Xuc27fc142016-08-22 16:08:15 -07001624 case IDTX:
Sarah Parker31c66502017-05-19 16:51:07 -07001625 av1_inv_txfm2d_add_8x8_c(input, CONVERT_TO_SHORTPTR(dest), stride,
1626 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001627 break;
1628#endif // CONFIG_EXT_TX
Sarah Parker31c66502017-05-19 16:51:07 -07001629 default: assert(0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001630 }
1631}
1632
hui sua5315712017-03-20 11:37:15 -07001633static void highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
1634 int stride, int eob, int bd,
1635 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001636 (void)eob;
1637 switch (tx_type) {
1638 case DCT_DCT:
1639 case ADST_DCT:
1640 case DCT_ADST:
1641 case ADST_ADST:
Sarah Parker31c66502017-05-19 16:51:07 -07001642 av1_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
1643 tx_type, bd);
1644 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001645#if CONFIG_EXT_TX
1646 case FLIPADST_DCT:
1647 case DCT_FLIPADST:
1648 case FLIPADST_FLIPADST:
1649 case ADST_FLIPADST:
1650 case FLIPADST_ADST:
Yaowu Xuf883b422016-08-30 14:01:10 -07001651 av1_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
1652 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001653 break;
Sarah Parker31c66502017-05-19 16:51:07 -07001654 // use the c version for anything including identity for now
Yaowu Xuc27fc142016-08-22 16:08:15 -07001655 case V_DCT:
1656 case H_DCT:
1657 case V_ADST:
1658 case H_ADST:
1659 case V_FLIPADST:
1660 case H_FLIPADST:
Yaowu Xuc27fc142016-08-22 16:08:15 -07001661 case IDTX:
Sarah Parker31c66502017-05-19 16:51:07 -07001662 av1_inv_txfm2d_add_16x16_c(input, CONVERT_TO_SHORTPTR(dest), stride,
1663 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001664 break;
1665#endif // CONFIG_EXT_TX
Sarah Parker31c66502017-05-19 16:51:07 -07001666 default: assert(0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001667 }
1668}
1669
hui sua5315712017-03-20 11:37:15 -07001670static void highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
1671 int stride, int eob, int bd,
1672 TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001673 (void)eob;
1674 switch (tx_type) {
1675 case DCT_DCT:
Yaowu Xuc27fc142016-08-22 16:08:15 -07001676 case ADST_DCT:
1677 case DCT_ADST:
1678 case ADST_ADST:
Sarah Parker31c66502017-05-19 16:51:07 -07001679 av1_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
1680 tx_type, bd);
1681 break;
1682#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001683 case FLIPADST_DCT:
1684 case DCT_FLIPADST:
1685 case FLIPADST_FLIPADST:
1686 case ADST_FLIPADST:
1687 case FLIPADST_ADST:
Sarah Parker31c66502017-05-19 16:51:07 -07001688 av1_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
1689 tx_type, bd);
1690 break;
1691 // use the c version for anything including identity for now
Yaowu Xuc27fc142016-08-22 16:08:15 -07001692 case V_DCT:
1693 case H_DCT:
1694 case V_ADST:
1695 case H_ADST:
1696 case V_FLIPADST:
1697 case H_FLIPADST:
Yaowu Xuc27fc142016-08-22 16:08:15 -07001698 case IDTX:
Sarah Parker31c66502017-05-19 16:51:07 -07001699 av1_inv_txfm2d_add_32x32_c(input, CONVERT_TO_SHORTPTR(dest), stride,
1700 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001701 break;
1702#endif // CONFIG_EXT_TX
Sarah Parker31c66502017-05-19 16:51:07 -07001703 default: assert(0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001704 }
1705}
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001706
1707#if CONFIG_TX64X64
hui sua5315712017-03-20 11:37:15 -07001708static void highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
1709 int stride, int eob, int bd,
1710 TX_TYPE tx_type) {
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001711 (void)eob;
1712 switch (tx_type) {
1713 case DCT_DCT:
1714 av1_inv_txfm2d_add_64x64(input, CONVERT_TO_SHORTPTR(dest), stride,
1715 DCT_DCT, bd);
1716 break;
1717#if CONFIG_EXT_TX
1718 case ADST_DCT:
1719 case DCT_ADST:
1720 case ADST_ADST:
1721 case FLIPADST_DCT:
1722 case DCT_FLIPADST:
1723 case FLIPADST_FLIPADST:
1724 case ADST_FLIPADST:
1725 case FLIPADST_ADST:
1726 case V_DCT:
1727 case H_DCT:
1728 case V_ADST:
1729 case H_ADST:
1730 case V_FLIPADST:
1731 case H_FLIPADST:
Sarah Parker31c66502017-05-19 16:51:07 -07001732 // TODO(sarahparker)
1733 // I've deleted the 64x64 implementations that existed in lieu
1734 // of adst, flipadst and identity for simplicity but will bring back
1735 // in a later change. This shouldn't impact performance since
1736 // DCT_DCT is the only extended type currently allowed for 64x64,
1737 // as dictated by get_ext_tx_set_type in blockd.h.
1738 av1_inv_txfm2d_add_64x64_c(input, CONVERT_TO_SHORTPTR(dest), stride,
1739 DCT_DCT, bd);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001740 break;
1741 case IDTX:
1742 highbd_inv_idtx_add_c(input, dest, stride, 64, tx_type, bd);
1743 break;
1744#endif // CONFIG_EXT_TX
1745 default: assert(0); break;
1746 }
1747}
1748#endif // CONFIG_TX64X64
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001749#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001750
hui subb9c73b2017-03-17 15:51:02 -07001751void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
Yi Luo2ab63cb2017-05-11 16:44:22 -07001752 INV_TXFM_PARAM *param) {
1753 const TX_TYPE tx_type = param->tx_type;
1754 const TX_SIZE tx_size = param->tx_size;
1755 const int eob = param->eob;
1756 const int lossless = param->lossless;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001757
1758 switch (tx_size) {
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001759#if CONFIG_TX64X64
Yi Luo2ab63cb2017-05-11 16:44:22 -07001760 case TX_64X64: inv_txfm_add_64x64(input, dest, stride, param); break;
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001761#endif // CONFIG_TX64X64
Yi Luo2ab63cb2017-05-11 16:44:22 -07001762 case TX_32X32: inv_txfm_add_32x32(input, dest, stride, param); break;
1763 case TX_16X16: inv_txfm_add_16x16(input, dest, stride, param); break;
1764 case TX_8X8: inv_txfm_add_8x8(input, dest, stride, param); break;
1765 case TX_4X8: inv_txfm_add_4x8(input, dest, stride, eob, tx_type); break;
1766 case TX_8X4: inv_txfm_add_8x4(input, dest, stride, eob, tx_type); break;
hui sua5315712017-03-20 11:37:15 -07001767 case TX_8X16: inv_txfm_add_8x16(input, dest, stride, eob, tx_type); break;
1768 case TX_16X8: inv_txfm_add_16x8(input, dest, stride, eob, tx_type); break;
1769 case TX_16X32: inv_txfm_add_16x32(input, dest, stride, eob, tx_type); break;
1770 case TX_32X16: inv_txfm_add_32x16(input, dest, stride, eob, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001771 case TX_4X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07001772 // this is like av1_short_idct4x4 but has a special case around eob<=1
Yaowu Xuc27fc142016-08-22 16:08:15 -07001773 // which is significant (not just an optimization) for the lossless
1774 // case.
Yi Luo2ab63cb2017-05-11 16:44:22 -07001775 inv_txfm_add_4x4(input, dest, stride, eob, tx_type, lossless);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001776 break;
Timothy B. Terriberryfe67ed62017-04-26 16:53:47 -07001777#if CONFIG_CHROMA_2X2
Jingning Hand7ec47f2016-11-29 16:34:40 -08001778 case TX_2X2:
hui sua5315712017-03-20 11:37:15 -07001779 inv_txfm_add_2x2(input, dest, stride, eob, tx_type, lossless);
Jingning Hand7ec47f2016-11-29 16:34:40 -08001780 break;
1781#endif
Yue Chen56e226e2017-05-02 16:21:40 -07001782#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
1783 case TX_32X8: inv_txfm_add_32x8(input, dest, stride, eob, tx_type); break;
1784 case TX_8X32: inv_txfm_add_8x32(input, dest, stride, eob, tx_type); break;
1785 case TX_16X4: inv_txfm_add_16x4(input, dest, stride, eob, tx_type); break;
1786 case TX_4X16: inv_txfm_add_4x16(input, dest, stride, eob, tx_type); break;
1787#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001788 default: assert(0 && "Invalid transform size"); break;
1789 }
1790}
1791
Yi Luof8e87b42017-04-14 17:20:27 -07001792static void init_inv_txfm_param(const MACROBLOCKD *xd, TX_SIZE tx_size,
1793 TX_TYPE tx_type, int eob, INV_TXFM_PARAM *inv) {
1794 inv->tx_type = tx_type;
1795 inv->tx_size = tx_size;
1796 inv->eob = eob;
1797 inv->lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
1798#if CONFIG_HIGHBITDEPTH
1799 inv->bd = xd->bd;
1800#endif
1801#if CONFIG_ADAPT_SCAN
Yi Luo2ab63cb2017-05-11 16:44:22 -07001802 inv->eob_threshold =
1803 (const int16_t *)&xd->eob_threshold_md[tx_size][tx_type][0];
Yi Luof8e87b42017-04-14 17:20:27 -07001804#endif
1805}
1806
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07001807void av1_inverse_transform_block(const MACROBLOCKD *xd,
1808 const tran_low_t *dqcoeff, TX_TYPE tx_type,
1809 TX_SIZE tx_size, uint8_t *dst, int stride,
1810 int eob) {
Angie Chiangd92d4bf2017-04-02 17:49:18 -07001811 if (!eob) return;
Angie Chiang50910f62017-04-03 12:31:34 -07001812#if CONFIG_PVQ
1813 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
1814 const int txb_width = block_size_wide[tx_bsize];
1815 const int txb_height = block_size_high[tx_bsize];
1816 int r, c;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001817#if CONFIG_HIGHBITDEPTH
Angie Chiang50910f62017-04-03 12:31:34 -07001818 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Angie Chiang50910f62017-04-03 12:31:34 -07001819 for (r = 0; r < txb_height; r++)
1820 for (c = 0; c < txb_width; c++)
1821 CONVERT_TO_SHORTPTR(dst)[r * stride + c] = 0;
1822 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001823#endif // CONFIG_HIGHBITDEPTH
Angie Chiang50910f62017-04-03 12:31:34 -07001824 for (r = 0; r < txb_height; r++)
1825 for (c = 0; c < txb_width; c++) dst[r * stride + c] = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001826#if CONFIG_HIGHBITDEPTH
Angie Chiang50910f62017-04-03 12:31:34 -07001827 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001828#endif // CONFIG_HIGHBITDEPTH
Angie Chiang50910f62017-04-03 12:31:34 -07001829#endif // CONFIG_PVQ
Angie Chiangd92d4bf2017-04-02 17:49:18 -07001830 INV_TXFM_PARAM inv_txfm_param;
Yi Luof8e87b42017-04-14 17:20:27 -07001831 init_inv_txfm_param(xd, tx_size, tx_type, eob, &inv_txfm_param);
Angie Chiangd92d4bf2017-04-02 17:49:18 -07001832
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001833#if CONFIG_HIGHBITDEPTH
Angie Chiangd92d4bf2017-04-02 17:49:18 -07001834 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Angie Chiangd92d4bf2017-04-02 17:49:18 -07001835 av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
1836 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001837#endif // CONFIG_HIGHBITDEPTH
Angie Chiangd92d4bf2017-04-02 17:49:18 -07001838 av1_inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001839#if CONFIG_HIGHBITDEPTH
Angie Chiangd92d4bf2017-04-02 17:49:18 -07001840 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001841#endif // CONFIG_HIGHBITDEPTH
Angie Chiangd92d4bf2017-04-02 17:49:18 -07001842}
1843
Angie Chiangbc2288c2017-04-09 15:41:17 -07001844void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
1845 int blk_row, int blk_col, int eob) {
1846 struct macroblockd_plane *const pd = &xd->plane[plane];
1847 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1848 const PLANE_TYPE plane_type = get_plane_type(plane);
1849 const TX_SIZE tx_size = get_tx_size(plane, xd);
1850 const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
1851 const int dst_stride = pd->dst.stride;
1852 uint8_t *dst =
1853 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
1854 av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst, dst_stride,
1855 eob);
1856}
1857
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001858#if CONFIG_HIGHBITDEPTH
hui subb9c73b2017-03-17 15:51:02 -07001859void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
1860 INV_TXFM_PARAM *inv_txfm_param) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001861 const TX_TYPE tx_type = inv_txfm_param->tx_type;
1862 const TX_SIZE tx_size = inv_txfm_param->tx_size;
1863 const int eob = inv_txfm_param->eob;
1864 const int bd = inv_txfm_param->bd;
1865 const int lossless = inv_txfm_param->lossless;
1866
1867 switch (tx_size) {
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001868#if CONFIG_TX64X64
1869 case TX_64X64:
hui sua5315712017-03-20 11:37:15 -07001870 highbd_inv_txfm_add_64x64(input, dest, stride, eob, bd, tx_type);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07001871 break;
1872#endif // CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -07001873 case TX_32X32:
hui sua5315712017-03-20 11:37:15 -07001874 highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001875 break;
1876 case TX_16X16:
hui sua5315712017-03-20 11:37:15 -07001877 highbd_inv_txfm_add_16x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001878 break;
1879 case TX_8X8:
hui sua5315712017-03-20 11:37:15 -07001880 highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001881 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001882 case TX_4X8:
Yaowu Xuf883b422016-08-30 14:01:10 -07001883 av1_highbd_inv_txfm_add_4x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001884 break;
1885 case TX_8X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07001886 av1_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001887 break;
1888 case TX_8X16:
hui sua5315712017-03-20 11:37:15 -07001889 highbd_inv_txfm_add_8x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001890 break;
1891 case TX_16X8:
hui sua5315712017-03-20 11:37:15 -07001892 highbd_inv_txfm_add_16x8(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001893 break;
1894 case TX_16X32:
hui sua5315712017-03-20 11:37:15 -07001895 highbd_inv_txfm_add_16x32(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001896 break;
1897 case TX_32X16:
hui sua5315712017-03-20 11:37:15 -07001898 highbd_inv_txfm_add_32x16(input, dest, stride, eob, bd, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001899 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001900 case TX_4X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07001901 // this is like av1_short_idct4x4 but has a special case around eob<=1
Yaowu Xuc27fc142016-08-22 16:08:15 -07001902 // which is significant (not just an optimization) for the lossless
1903 // case.
Yaowu Xuf883b422016-08-30 14:01:10 -07001904 av1_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type,
1905 lossless);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001906 break;
Timothy B. Terriberryfe67ed62017-04-26 16:53:47 -07001907#if CONFIG_CHROMA_2X2
Jingning Hanb1ed8d72016-12-19 10:21:14 -08001908 case TX_2X2:
hui sua5315712017-03-20 11:37:15 -07001909 highbd_inv_txfm_add_2x2(input, dest, stride, eob, bd, tx_type, lossless);
Jingning Hanb1ed8d72016-12-19 10:21:14 -08001910 break;
Jingning Hanb1ed8d72016-12-19 10:21:14 -08001911#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001912 default: assert(0 && "Invalid transform size"); break;
1913 }
1914}
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001915#endif // CONFIG_HIGHBITDEPTH
hui sub8a6fd62017-05-10 10:57:57 -07001916
1917#if CONFIG_DPCM_INTRA
1918void av1_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
1919 TX_TYPE_1D tx_type, uint8_t *dest) {
1920 assert(tx_type < TX_TYPES_1D);
1921 static const transform_1d IHT[] = { aom_idct4_c, aom_iadst4_c, aom_iadst4_c,
1922 iidtx4_c };
1923 const transform_1d inv_tx = IHT[tx_type];
1924 tran_low_t out[4];
1925 inv_tx(input, out);
1926 for (int i = 0; i < 4; ++i) {
1927 out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
1928 dest[i * stride] =
1929 clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
1930 }
1931}
1932
1933void av1_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
1934 TX_TYPE_1D tx_type, uint8_t *dest) {
1935 assert(tx_type < TX_TYPES_1D);
1936 static const transform_1d IHT[] = { aom_idct8_c, aom_iadst8_c, aom_iadst8_c,
1937 iidtx8_c };
1938 const transform_1d inv_tx = IHT[tx_type];
1939 tran_low_t out[8];
1940 inv_tx(input, out);
1941 for (int i = 0; i < 8; ++i) {
1942 dest[i * stride] =
1943 clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
1944 }
1945}
1946
1947void av1_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
1948 TX_TYPE_1D tx_type, uint8_t *dest) {
1949 assert(tx_type < TX_TYPES_1D);
1950 static const transform_1d IHT[] = { aom_idct16_c, aom_iadst16_c,
1951 aom_iadst16_c, iidtx16_c };
1952 const transform_1d inv_tx = IHT[tx_type];
1953 tran_low_t out[16];
1954 inv_tx(input, out);
1955 for (int i = 0; i < 16; ++i) {
1956 out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
1957 dest[i * stride] =
1958 clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 5));
1959 }
1960}
1961
1962void av1_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
1963 TX_TYPE_1D tx_type, uint8_t *dest) {
1964 assert(tx_type < TX_TYPES_1D);
1965 static const transform_1d IHT[] = { aom_idct32_c, ihalfright32_c,
1966 ihalfright32_c, iidtx32_c };
1967 const transform_1d inv_tx = IHT[tx_type];
1968 tran_low_t out[32];
1969 inv_tx(input, out);
1970 for (int i = 0; i < 32; ++i) {
1971 dest[i * stride] =
1972 clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
1973 }
1974}
1975
1976dpcm_inv_txfm_add_func av1_get_dpcm_inv_txfm_add_func(int tx_length) {
1977 switch (tx_length) {
1978 case 4: return av1_dpcm_inv_txfm_add_4_c;
1979 case 8: return av1_dpcm_inv_txfm_add_8_c;
1980 case 16: return av1_dpcm_inv_txfm_add_16_c;
1981 case 32:
1982 return av1_dpcm_inv_txfm_add_32_c;
1983 // TODO(huisu): add support for TX_64X64.
1984 default: assert(0); return NULL;
1985 }
1986}
1987
1988#if CONFIG_HIGHBITDEPTH
Sarah Parker31c66502017-05-19 16:51:07 -07001989// TODO(sarahparker) I am adding a quick workaround for these functions
1990// to remove the old hbd transforms. This will be cleaned up in a followup.
hui sub8a6fd62017-05-10 10:57:57 -07001991void av1_hbd_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
Sarah Parker31c66502017-05-19 16:51:07 -07001992 TX_TYPE_1D tx_type, int bd, uint16_t *dest,
1993 int dir) {
hui sub8a6fd62017-05-10 10:57:57 -07001994 assert(tx_type < TX_TYPES_1D);
Sarah Parker31c66502017-05-19 16:51:07 -07001995 static const TxfmFunc IHT[] = { av1_idct4_new, av1_iadst4_new, av1_iadst4_new,
1996 av1_iidentity4_c };
1997 // In order { horizontal, vertical }
1998 static const TXFM_1D_CFG *inv_txfm_cfg_ls[TX_TYPES_1D][2] = {
1999 { &inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_col_cfg_dct_4 },
2000 { &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_4 },
2001 { &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_4 },
2002 { &inv_txfm_1d_cfg_identity_4, &inv_txfm_1d_cfg_identity_4 }
2003 };
2004
2005 const TXFM_1D_CFG *inv_txfm_cfg = inv_txfm_cfg_ls[tx_type][dir];
2006 const TxfmFunc inv_tx = IHT[tx_type];
2007
hui sub8a6fd62017-05-10 10:57:57 -07002008 tran_low_t out[4];
Sarah Parker31c66502017-05-19 16:51:07 -07002009 inv_tx(input, out, inv_txfm_cfg->cos_bit, inv_txfm_cfg->stage_range);
hui sub8a6fd62017-05-10 10:57:57 -07002010 for (int i = 0; i < 4; ++i) {
2011 out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
2012 dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
2013 ROUND_POWER_OF_TWO(out[i], 4), bd);
2014 }
2015}
2016
2017void av1_hbd_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
Sarah Parker31c66502017-05-19 16:51:07 -07002018 TX_TYPE_1D tx_type, int bd, uint16_t *dest,
2019 int dir) {
hui sub8a6fd62017-05-10 10:57:57 -07002020 assert(tx_type < TX_TYPES_1D);
Sarah Parker31c66502017-05-19 16:51:07 -07002021 static const TxfmFunc IHT[] = { av1_idct4_new, av1_iadst4_new, av1_iadst4_new,
2022 av1_iidentity4_c };
2023 // In order { horizontal, vertical }
2024 static const TXFM_1D_CFG *inv_txfm_cfg_ls[TX_TYPES_1D][2] = {
2025 { &inv_txfm_1d_row_cfg_dct_8, &inv_txfm_1d_col_cfg_dct_8 },
2026 { &inv_txfm_1d_row_cfg_adst_8, &inv_txfm_1d_col_cfg_adst_8 },
2027 { &inv_txfm_1d_row_cfg_adst_8, &inv_txfm_1d_col_cfg_adst_8 },
2028 { &inv_txfm_1d_cfg_identity_8, &inv_txfm_1d_cfg_identity_8 }
2029 };
2030
2031 const TXFM_1D_CFG *inv_txfm_cfg = inv_txfm_cfg_ls[tx_type][dir];
2032 const TxfmFunc inv_tx = IHT[tx_type];
2033
hui sub8a6fd62017-05-10 10:57:57 -07002034 tran_low_t out[8];
Sarah Parker31c66502017-05-19 16:51:07 -07002035 inv_tx(input, out, inv_txfm_cfg->cos_bit, inv_txfm_cfg->stage_range);
hui sub8a6fd62017-05-10 10:57:57 -07002036 for (int i = 0; i < 8; ++i) {
2037 dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
2038 ROUND_POWER_OF_TWO(out[i], 4), bd);
2039 }
2040}
2041
2042void av1_hbd_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
Sarah Parker31c66502017-05-19 16:51:07 -07002043 TX_TYPE_1D tx_type, int bd, uint16_t *dest,
2044 int dir) {
hui sub8a6fd62017-05-10 10:57:57 -07002045 assert(tx_type < TX_TYPES_1D);
Sarah Parker31c66502017-05-19 16:51:07 -07002046 static const TxfmFunc IHT[] = { av1_idct4_new, av1_iadst4_new, av1_iadst4_new,
2047 av1_iidentity4_c };
2048 // In order { horizontal, vertical }
2049 static const TXFM_1D_CFG *inv_txfm_cfg_ls[TX_TYPES_1D][2] = {
2050 { &inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_16 },
2051 { &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_16 },
2052 { &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_16 },
2053 { &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_16 }
2054 };
2055
2056 const TXFM_1D_CFG *inv_txfm_cfg = inv_txfm_cfg_ls[tx_type][dir];
2057 const TxfmFunc inv_tx = IHT[tx_type];
2058
hui sub8a6fd62017-05-10 10:57:57 -07002059 tran_low_t out[16];
Sarah Parker31c66502017-05-19 16:51:07 -07002060 inv_tx(input, out, inv_txfm_cfg->cos_bit, inv_txfm_cfg->stage_range);
hui sub8a6fd62017-05-10 10:57:57 -07002061 for (int i = 0; i < 16; ++i) {
2062 out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
2063 dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
2064 ROUND_POWER_OF_TWO(out[i], 5), bd);
2065 }
2066}
2067
2068void av1_hbd_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
Sarah Parker31c66502017-05-19 16:51:07 -07002069 TX_TYPE_1D tx_type, int bd, uint16_t *dest,
2070 int dir) {
hui sub8a6fd62017-05-10 10:57:57 -07002071 assert(tx_type < TX_TYPES_1D);
Sarah Parker31c66502017-05-19 16:51:07 -07002072 static const TxfmFunc IHT[] = { av1_idct4_new, av1_iadst4_new, av1_iadst4_new,
2073 av1_iidentity4_c };
2074 // In order { horizontal, vertical }
2075 static const TXFM_1D_CFG *inv_txfm_cfg_ls[TX_TYPES_1D][2] = {
2076 { &inv_txfm_1d_row_cfg_dct_32, &inv_txfm_1d_col_cfg_dct_32 },
2077 { &inv_txfm_1d_row_cfg_adst_32, &inv_txfm_1d_col_cfg_adst_32 },
2078 { &inv_txfm_1d_row_cfg_adst_32, &inv_txfm_1d_col_cfg_adst_32 },
2079 { &inv_txfm_1d_cfg_identity_32, &inv_txfm_1d_cfg_identity_32 }
2080 };
2081
2082 const TXFM_1D_CFG *inv_txfm_cfg = inv_txfm_cfg_ls[tx_type][dir];
2083 const TxfmFunc inv_tx = IHT[tx_type];
2084
hui sub8a6fd62017-05-10 10:57:57 -07002085 tran_low_t out[32];
Sarah Parker31c66502017-05-19 16:51:07 -07002086 inv_tx(input, out, inv_txfm_cfg->cos_bit, inv_txfm_cfg->stage_range);
hui sub8a6fd62017-05-10 10:57:57 -07002087 for (int i = 0; i < 32; ++i) {
2088 dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
2089 ROUND_POWER_OF_TWO(out[i], 4), bd);
2090 }
2091}
2092
2093hbd_dpcm_inv_txfm_add_func av1_get_hbd_dpcm_inv_txfm_add_func(int tx_length) {
2094 switch (tx_length) {
2095 case 4: return av1_hbd_dpcm_inv_txfm_add_4_c;
2096 case 8: return av1_hbd_dpcm_inv_txfm_add_8_c;
2097 case 16: return av1_hbd_dpcm_inv_txfm_add_16_c;
2098 case 32:
2099 return av1_hbd_dpcm_inv_txfm_add_32_c;
2100 // TODO(huisu): add support for TX_64X64.
2101 default: assert(0); return NULL;
2102 }
2103}
2104#endif // CONFIG_HIGHBITDEPTH
2105#endif // CONFIG_DPCM_INTRA