blob: 358f558b9e9f6d5ec3117a118ca98b8741981cf4 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <math.h>
13
Yaowu Xuf883b422016-08-30 14:01:10 -070014#include "./aom_dsp_rtcd.h"
Geza Lorea1ddae52016-09-02 09:51:34 +010015#include "./av1_rtcd.h"
16#include "aom_dsp/inv_txfm.h"
17#include "aom_ports/mem.h"
Sarah Parkereec47e62017-05-15 20:49:22 -070018#include "av1/common/av1_inv_txfm1d_cfg.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070019#include "av1/common/blockd.h"
20#include "av1/common/enums.h"
21#include "av1/common/idct.h"
Nathan E. Eggee554f362017-10-04 14:44:38 -040022#if CONFIG_DAALA_TX4 || CONFIG_DAALA_TX8 || CONFIG_DAALA_TX16 || \
23 CONFIG_DAALA_TX32 || CONFIG_DAALA_TX64
Nathan E. Egge5e6bda82017-09-16 10:13:51 -040024#include "av1/common/daala_tx.h"
25#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -070026
Jingning Hanff705452017-04-27 11:32:15 -070027int av1_get_tx_scale(const TX_SIZE tx_size) {
Yue Chenaa0d90f2017-08-24 17:56:24 -070028 const int pels = tx_size_2d[tx_size];
29 return (pels > 256) + (pels > 1024) + (pels > 4096);
Yaowu Xuc27fc142016-08-22 16:08:15 -070030}
31
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070032// NOTE: The implementation of all inverses need to be aware of the fact
33// that input and output could be the same buffer.
34
Yaowu Xuc27fc142016-08-22 16:08:15 -070035#if CONFIG_EXT_TX
36static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
37 int i;
Monty Montgomery02078a32017-07-11 21:22:29 -040038 for (i = 0; i < 4; ++i) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070039 output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
Monty Montgomery02078a32017-07-11 21:22:29 -040040 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070041}
42
43static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
44 int i;
Monty Montgomerycf18fe42017-07-11 21:33:25 -040045 for (i = 0; i < 8; ++i) {
Monty Montgomerycf18fe42017-07-11 21:33:25 -040046 output[i] = input[i] * 2;
Monty Montgomerycf18fe42017-07-11 21:33:25 -040047 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070048}
49
50static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
51 int i;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -040052 for (i = 0; i < 16; ++i) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070053 output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
Monty Montgomerycb9c1c52017-07-17 18:15:30 -040054 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070055}
56
57static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
58 int i;
Monty Montgomery2cb52ba2017-07-17 18:27:27 -040059 for (i = 0; i < 32; ++i) {
Monty Montgomery2cb52ba2017-07-17 18:27:27 -040060 output[i] = input[i] * 4;
Monty Montgomery2cb52ba2017-07-17 18:27:27 -040061 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070062}
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -070063
Nathan E. Eggee554f362017-10-04 14:44:38 -040064#if CONFIG_TX64X64 && !CONFIG_DAALA_TX64
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -070065static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
66 int i;
Monty Montgomerya4e245a2017-07-22 00:48:31 -040067 for (i = 0; i < 64; ++i) {
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -070068 output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
Monty Montgomerya4e245a2017-07-22 00:48:31 -040069 }
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -070070}
71#endif // CONFIG_TX64X64
Jingning Hanec419e02016-11-01 18:19:30 -070072#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -070073
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070074// For use in lieu of ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -070075static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
76 int i;
77 tran_low_t inputhalf[16];
Yaowu Xuc27fc142016-08-22 16:08:15 -070078 // Multiply input by sqrt(2)
79 for (i = 0; i < 16; ++i) {
80 inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
81 }
Debargha Mukherjeee52816b2016-10-12 10:49:29 -070082 for (i = 0; i < 16; ++i) {
83 output[i] = input[16 + i] * 4;
84 }
Luca Barbatof0f98572016-09-03 12:14:15 +020085 aom_idct16_c(inputhalf, output + 16);
Yaowu Xuc27fc142016-08-22 16:08:15 -070086 // Note overall scaling factor is 4 times orthogonal
87}
88
Nathan E. Eggee554f362017-10-04 14:44:38 -040089#if CONFIG_TX64X64 && !CONFIG_DAALA_TX64
Debargha Mukherjee67d13472016-11-01 14:37:39 -070090static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
91 int32_t in[64], out[64];
92 int i;
93 for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
Sarah Parkereec47e62017-05-15 20:49:22 -070094 av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
Debargha Mukherjee67d13472016-11-01 14:37:39 -070095 for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
96}
97
98static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
99 int32_t in[64], out[64];
100 int i;
101 for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
Sarah Parkereec47e62017-05-15 20:49:22 -0700102 av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700103 for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
104}
105
Debargha Mukherjee67d13472016-11-01 14:37:39 -0700106// For use in lieu of ADST
107static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
108 int i;
109 tran_low_t inputhalf[32];
110 // Multiply input by sqrt(2)
111 for (i = 0; i < 32; ++i) {
112 inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
113 }
114 for (i = 0; i < 32; ++i) {
115 output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
116 }
117 aom_idct32_c(inputhalf, output + 32);
118 // Note overall scaling factor is 4 * sqrt(2) times orthogonal
119}
120#endif // CONFIG_TX64X64
121
Yaowu Xuc27fc142016-08-22 16:08:15 -0700122// Inverse identity transform and add.
Jingning Hanec419e02016-11-01 18:19:30 -0700123#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700124static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Urvang Joshi2283d372017-10-02 17:16:45 -0700125 int bsx, int bsy, TX_TYPE tx_type) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700126 int r, c;
Debargha Mukherjee2b435012017-09-28 08:30:35 -0700127 const int pels = bsx * bsy;
128 const int shift = 3 - ((pels > 256) + (pels > 1024));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700129 if (tx_type == IDTX) {
Debargha Mukherjee2b435012017-09-28 08:30:35 -0700130 for (r = 0; r < bsy; ++r) {
131 for (c = 0; c < bsx; ++c)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700132 dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
133 dest += stride;
Debargha Mukherjee2b435012017-09-28 08:30:35 -0700134 input += bsx;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700135 }
136 }
137}
Jingning Hanec419e02016-11-01 18:19:30 -0700138#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700139
140#define FLIPUD_PTR(dest, stride, size) \
141 do { \
142 (dest) = (dest) + ((size)-1) * (stride); \
143 (stride) = -(stride); \
144 } while (0)
145
Jingning Hanec419e02016-11-01 18:19:30 -0700146#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700147static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
Urvang Joshi2283d372017-10-02 17:16:45 -0700148 int *sstride, TX_TYPE tx_type, int sizey,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700149 int sizex) {
150 // Note that the transpose of src will be added to dst. In order to LR
151 // flip the addends (in dst coordinates), we UD flip the src. To UD flip
152 // the addends, we UD flip the dst.
153 switch (tx_type) {
154 case DCT_DCT:
155 case ADST_DCT:
156 case DCT_ADST:
157 case ADST_ADST:
158 case IDTX:
159 case V_DCT:
160 case H_DCT:
161 case V_ADST:
162 case H_ADST: break;
163 case FLIPADST_DCT:
164 case FLIPADST_ADST:
165 case V_FLIPADST:
166 // flip UD
167 FLIPUD_PTR(*dst, *dstride, sizey);
168 break;
169 case DCT_FLIPADST:
170 case ADST_FLIPADST:
171 case H_FLIPADST:
172 // flip LR
173 FLIPUD_PTR(*src, *sstride, sizex);
174 break;
175 case FLIPADST_FLIPADST:
176 // flip UD
177 FLIPUD_PTR(*dst, *dstride, sizey);
178 // flip LR
179 FLIPUD_PTR(*src, *sstride, sizex);
180 break;
181 default: assert(0); break;
182 }
183}
Jingning Hanec419e02016-11-01 18:19:30 -0700184#endif // CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700185
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200186#if CONFIG_HIGHBITDEPTH
Sarah Parker31c66502017-05-19 16:51:07 -0700187#if CONFIG_EXT_TX && CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -0700188static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
Urvang Joshi2283d372017-10-02 17:16:45 -0700189 int stride, int bsx, int bsy, TX_TYPE tx_type,
Debargha Mukherjee2b435012017-09-28 08:30:35 -0700190 int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700191 int r, c;
Debargha Mukherjee2b435012017-09-28 08:30:35 -0700192 const int pels = bsx * bsy;
193 const int shift = 3 - ((pels > 256) + (pels > 1024));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700194 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
195
196 if (tx_type == IDTX) {
Debargha Mukherjee2b435012017-09-28 08:30:35 -0700197 for (r = 0; r < bsy; ++r) {
198 for (c = 0; c < bsx; ++c)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700199 dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
200 dest += stride;
Debargha Mukherjee2b435012017-09-28 08:30:35 -0700201 input += bsx;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700202 }
203 }
204}
Sarah Parker31c66502017-05-19 16:51:07 -0700205#endif // CONFIG_EXT_TX && CONFIG_TX64X64
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200206#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700207
Lester Lu432012f2017-08-17 14:39:29 -0700208#if CONFIG_LGT || CONFIG_LGT_FROM_PRED
Lester Lu708c1ec2017-06-14 14:54:49 -0700209void ilgt4(const tran_low_t *input, tran_low_t *output,
210 const tran_high_t *lgtmtx) {
Lester Lu918fe692017-08-17 14:39:29 -0700211 if (!lgtmtx) assert(0);
Lester Lu432012f2017-08-17 14:39:29 -0700212#if CONFIG_LGT_FROM_PRED
213 // For DCT/ADST, use butterfly implementations
214 if (lgtmtx[0] == DCT4) {
215 aom_idct4_c(input, output);
216 return;
217 } else if (lgtmtx[0] == ADST4) {
218 aom_iadst4_c(input, output);
219 return;
220 }
221#endif // CONFIG_LGT_FROM_PRED
222
Lester Lu918fe692017-08-17 14:39:29 -0700223 // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,4
Lester Lu708c1ec2017-06-14 14:54:49 -0700224 tran_high_t s[4] = { 0 };
225 for (int i = 0; i < 4; ++i)
226 for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];
227
228 for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
229}
230
231void ilgt8(const tran_low_t *input, tran_low_t *output,
232 const tran_high_t *lgtmtx) {
Lester Lu918fe692017-08-17 14:39:29 -0700233 if (!lgtmtx) assert(0);
Lester Lu432012f2017-08-17 14:39:29 -0700234#if CONFIG_LGT_FROM_PRED
235 // For DCT/ADST, use butterfly implementations
236 if (lgtmtx[0] == DCT8) {
237 aom_idct8_c(input, output);
238 return;
239 } else if (lgtmtx[0] == ADST8) {
240 aom_iadst8_c(input, output);
241 return;
242 }
243#endif // CONFIG_LGT_FROM_PRED
244
Lester Lu918fe692017-08-17 14:39:29 -0700245 // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,8
Lester Lu708c1ec2017-06-14 14:54:49 -0700246 tran_high_t s[8] = { 0 };
247 for (int i = 0; i < 8; ++i)
248 for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];
249
250 for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
251}
Lester Lu432012f2017-08-17 14:39:29 -0700252#endif // CONFIG_LGT || CONFIG_LGT_FROM_PRED
Lester Lu708c1ec2017-06-14 14:54:49 -0700253
Lester Lu432012f2017-08-17 14:39:29 -0700254#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -0700255// get_lgt4 and get_lgt8 return 1 and pick a lgt matrix if LGT is chosen to
256// apply. Otherwise they return 0
257int get_lgt4(const TxfmParam *txfm_param, int is_col,
258 const tran_high_t **lgtmtx) {
259 if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D ||
260 vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
261 lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
262 return 1;
263 } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D ||
264 htx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
265 lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
Lester Lu708c1ec2017-06-14 14:54:49 -0700266 return 1;
267 }
Lester Lu918fe692017-08-17 14:39:29 -0700268 lgtmtx[0] = NULL;
Lester Lu708c1ec2017-06-14 14:54:49 -0700269 return 0;
270}
271
Lester Lu918fe692017-08-17 14:39:29 -0700272int get_lgt8(const TxfmParam *txfm_param, int is_col,
273 const tran_high_t **lgtmtx) {
274 if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D ||
275 vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
276 lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
277 return 1;
278 } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D ||
279 htx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
280 lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
Lester Lu708c1ec2017-06-14 14:54:49 -0700281 return 1;
282 }
Lester Lu918fe692017-08-17 14:39:29 -0700283 lgtmtx[0] = NULL;
Lester Lu708c1ec2017-06-14 14:54:49 -0700284 return 0;
285}
286#endif // CONFIG_LGT
287
Lester Lu432012f2017-08-17 14:39:29 -0700288#if CONFIG_LGT_FROM_PRED
289void ilgt16up(const tran_low_t *input, tran_low_t *output,
290 const tran_high_t *lgtmtx) {
291 if (lgtmtx[0] == DCT16) {
292 aom_idct16_c(input, output);
293 return;
294 } else if (lgtmtx[0] == ADST16) {
295 aom_iadst16_c(input, output);
296 return;
297 } else if (lgtmtx[0] == DCT32) {
298 aom_idct32_c(input, output);
299 return;
300 } else if (lgtmtx[0] == ADST32) {
301 ihalfright32_c(input, output);
302 return;
303 } else {
304 assert(0);
305 }
306}
307
308void get_discontinuity_1d(uint8_t *arr, int n, int *idx_max_diff) {
309 *idx_max_diff = -1;
310
311 int temp = 0, max_diff = 0, min_diff = INT_MAX;
312 for (int i = 1; i < n; ++i) {
313 temp = abs(arr[i] - arr[i - 1]);
314 if (temp > max_diff) {
315 max_diff = temp;
316 *idx_max_diff = i;
317 }
318 if (temp < min_diff) min_diff = temp;
319 }
320}
321
322void get_discontinuity_2d(uint8_t *dst, int stride, int n, int is_col,
323 int *idx_max_diff, int ntx) {
324 *idx_max_diff = -1;
325
326 int diff = 0, temp = 0, max_diff = 0, min_diff = INT_MAX;
327 for (int i = 1; i < n; ++i) {
328 temp = 0;
329 for (int j = 0; j < ntx; ++j) {
330 if (is_col) // vertical diff
331 diff = dst[i * stride + j] - dst[(i - 1) * stride + j];
332 else // horizontal diff
333 diff = dst[j * stride + i] - dst[j * stride + i - 1];
334 temp += diff * diff;
335 }
336 // temp/w is the i-th avg square diff
337 if (temp > max_diff) {
338 max_diff = temp;
339 *idx_max_diff = i;
340 }
341 if (temp < min_diff) min_diff = temp;
342 }
343}
344
345int idx_selfloop_wrt_mode(PREDICTION_MODE mode, int is_col) {
346 // 0: no self-loop
347 // 1: small self-loop
348 // 2: medium self-loop
349 // 3: large self-loop
350 switch (mode) {
351 case DC_PRED:
352 case SMOOTH_PRED:
353 // predition is good for both directions: large SLs for row and col
354 return 3;
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700355 case PAETH_PRED: return 0;
Lester Lu432012f2017-08-17 14:39:29 -0700356#if CONFIG_SMOOTH_HV
357 case SMOOTH_H_PRED:
358#endif
359 case H_PRED:
360 // prediction is good for H direction: large SL for row only
361 return is_col ? 0 : 3;
362#if CONFIG_SMOOTH_HV
363 case SMOOTH_V_PRED:
364#endif
365 case V_PRED:
366 // prediction is good for V direction: large SL for col only
367 return is_col ? 3 : 0;
368#if LGT_SL_INTRA
369 // directional mode: choose SL based on the direction
370 case D45_PRED: return is_col ? 2 : 0;
371 case D63_PRED: return is_col ? 3 : 0;
372 case D117_PRED: return is_col ? 3 : 1;
373 case D135_PRED: return 2;
374 case D153_PRED: return is_col ? 1 : 3;
375 case D207_PRED: return is_col ? 0 : 3;
376#else
377 case D45_PRED:
378 case D63_PRED:
379 case D117_PRED: return is_col ? 3 : 0;
380 case D135_PRED:
381 case D153_PRED:
382 case D207_PRED: return is_col ? 0 : 3;
383#endif
384 // inter: no SL
385 default: return 0;
386 }
387}
388
389void get_lgt4_from_pred(const TxfmParam *txfm_param, int is_col,
390 const tran_high_t **lgtmtx, int ntx) {
391 PREDICTION_MODE mode = txfm_param->mode;
392 int stride = txfm_param->stride;
393 uint8_t *dst = txfm_param->dst;
394 int bp = -1;
395 uint8_t arr[4];
396
397 // Each lgt4mtx_arr[k][i] corresponds to a line graph with a self-loop on
398 // the first node, and possibly a weak edge within the line graph. i is
399 // the index of the weak edge (between the i-th and (i+1)-th pixels, i=0
400 // means no weak edge). k corresponds to the first self-loop's weight
401 const tran_high_t *lgt4mtx_arr[4][4] = {
402 { &lgt4_000[0][0], &lgt4_000w1[0][0], &lgt4_000w2[0][0],
403 &lgt4_000w3[0][0] },
404 { &lgt4_060[0][0], &lgt4_060_000w1[0][0], &lgt4_060_000w2[0][0],
405 &lgt4_060_000w3[0][0] },
406 { &lgt4_100[0][0], &lgt4_100_000w1[0][0], &lgt4_100_000w2[0][0],
407 &lgt4_100_000w3[0][0] },
408 { &lgt4_150[0][0], &lgt4_150_000w1[0][0], &lgt4_150_000w2[0][0],
409 &lgt4_150_000w3[0][0] },
410 };
411
412 // initialize to DCT or some LGTs, and then change later if necessary
413 int idx_sl = idx_selfloop_wrt_mode(mode, is_col);
414 lgtmtx[0] = lgt4mtx_arr[idx_sl][0];
415
416 // find the break point and replace the line graph by the one with a
417 // break point
418 if (mode == DC_PRED || mode == SMOOTH_PRED) {
419 // Do not use break point, since 1) is_left_available and is_top_available
420 // in DC_PRED are not known by txfm_param for now, so accessing
421 // both boundaries anyway may cause a mismatch 2) DC prediciton
422 // typically yields very smooth residues so having the break point
423 // does not usually improve the RD result.
424 return;
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700425 } else if (mode == PAETH_PRED) {
426 // PAETH_PRED: use both 1D top boundary and 1D left boundary
Lester Lu432012f2017-08-17 14:39:29 -0700427 if (is_col)
428 for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride];
429 else
430 for (int i = 0; i < 4; ++i) arr[i] = dst[i];
431 get_discontinuity_1d(&arr[0], 4, &bp);
432 } else if (mode == V_PRED) {
433 // V_PRED: use 1D top boundary only
434 if (is_col) return;
435 for (int i = 0; i < 4; ++i) arr[i] = dst[i];
436 get_discontinuity_1d(&arr[0], 4, &bp);
437 } else if (mode == H_PRED) {
438 // H_PRED: use 1D left boundary only
439 if (!is_col) return;
440 for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride];
441 get_discontinuity_1d(&arr[0], 4, &bp);
442#if CONFIG_SMOOTH_HV
443 } else if (mode == SMOOTH_V_PRED) {
444 if (is_col) return;
445 for (int i = 0; i < 4; ++i) arr[i] = dst[-stride + i];
446 get_discontinuity_1d(&arr[0], 4, &bp);
447 } else if (mode == SMOOTH_H_PRED) {
448 if (!is_col) return;
449 for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride - 1];
450 get_discontinuity_1d(&arr[0], 4, &bp);
451#endif
452 } else if (mode == D45_PRED || mode == D63_PRED || mode == D117_PRED) {
453 // directional modes closer to vertical (maybe include D135 later)
454 if (!is_col) get_discontinuity_2d(dst, stride, 4, 0, &bp, ntx);
455 } else if (mode == D135_PRED || mode == D153_PRED || mode == D207_PRED) {
456 // directional modes closer to horizontal
457 if (is_col) get_discontinuity_2d(dst, stride, 4, 1, &bp, ntx);
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700458 } else if (mode > PAETH_PRED) {
Lester Lu432012f2017-08-17 14:39:29 -0700459 // inter
460 get_discontinuity_2d(dst, stride, 4, is_col, &bp, ntx);
461 }
462
463#if LGT_SL_INTRA
464 if (bp != -1) lgtmtx[0] = lgt4mtx_arr[idx_sl][bp];
465#else
466 if (bp != -1) lgtmtx[0] = lgt4mtx_arr[0][bp];
467#endif
468}
469
470void get_lgt8_from_pred(const TxfmParam *txfm_param, int is_col,
471 const tran_high_t **lgtmtx, int ntx) {
472 PREDICTION_MODE mode = txfm_param->mode;
473 int stride = txfm_param->stride;
474 uint8_t *dst = txfm_param->dst;
475 int bp = -1;
476 uint8_t arr[8];
477
478 const tran_high_t *lgt8mtx_arr[4][8] = {
479 { &lgt8_000[0][0], &lgt8_000w1[0][0], &lgt8_000w2[0][0], &lgt8_000w3[0][0],
480 &lgt8_000w4[0][0], &lgt8_000w5[0][0], &lgt8_000w6[0][0],
481 &lgt8_000w7[0][0] },
482 { &lgt8_060[0][0], &lgt8_060_000w1[0][0], &lgt8_060_000w2[0][0],
483 &lgt8_060_000w3[0][0], &lgt8_060_000w4[0][0], &lgt8_060_000w5[0][0],
484 &lgt8_060_000w6[0][0], &lgt8_060_000w7[0][0] },
485 { &lgt8_100[0][0], &lgt8_100_000w1[0][0], &lgt8_100_000w2[0][0],
486 &lgt8_100_000w3[0][0], &lgt8_100_000w4[0][0], &lgt8_100_000w5[0][0],
487 &lgt8_100_000w6[0][0], &lgt8_100_000w7[0][0] },
488 { &lgt8_150[0][0], &lgt8_150_000w1[0][0], &lgt8_150_000w2[0][0],
489 &lgt8_150_000w3[0][0], &lgt8_150_000w4[0][0], &lgt8_150_000w5[0][0],
490 &lgt8_150_000w6[0][0], &lgt8_150_000w7[0][0] },
491 };
492
493 int idx_sl = idx_selfloop_wrt_mode(mode, is_col);
494 lgtmtx[0] = lgt8mtx_arr[idx_sl][0];
495
496 if (mode == DC_PRED || mode == SMOOTH_PRED) {
497 return;
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700498 } else if (mode == PAETH_PRED) {
Lester Lu432012f2017-08-17 14:39:29 -0700499 if (is_col)
500 for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride];
501 else
502 for (int i = 0; i < 8; ++i) arr[i] = dst[i];
503 get_discontinuity_1d(&arr[0], 8, &bp);
504 } else if (mode == V_PRED) {
505 if (is_col) return;
506 for (int i = 0; i < 8; ++i) arr[i] = dst[i];
507 get_discontinuity_1d(&arr[0], 8, &bp);
508 } else if (mode == H_PRED) {
509 if (!is_col) return;
510 for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride];
511 get_discontinuity_1d(&arr[0], 8, &bp);
512#if CONFIG_SMOOTH_HV
513 } else if (mode == SMOOTH_V_PRED) {
514 if (is_col) return;
515 for (int i = 0; i < 8; ++i) arr[i] = dst[-stride + i];
516 get_discontinuity_1d(&arr[0], 8, &bp);
517 } else if (mode == SMOOTH_H_PRED) {
518 if (!is_col) return;
519 for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride - 1];
520 get_discontinuity_1d(&arr[0], 8, &bp);
521#endif
522 } else if (mode == D45_PRED || mode == D63_PRED || mode == D117_PRED) {
523 if (!is_col) get_discontinuity_2d(dst, stride, 8, 0, &bp, ntx);
524 } else if (mode == D135_PRED || mode == D153_PRED || mode == D207_PRED) {
525 if (is_col) get_discontinuity_2d(dst, stride, 8, 1, &bp, ntx);
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700526 } else if (mode > PAETH_PRED) {
Lester Lu432012f2017-08-17 14:39:29 -0700527 get_discontinuity_2d(dst, stride, 8, is_col, &bp, ntx);
528 }
529
530#if LGT_SL_INTRA
531 if (bp != -1) lgtmtx[0] = lgt8mtx_arr[idx_sl][bp];
532#else
533 if (bp != -1) lgtmtx[0] = lgt8mtx_arr[0][bp];
534#endif
535}
536
537// Since LGTs with length >8 are not implemented now, the following function
538// will just call DCT or ADST
539void get_lgt16up_from_pred(const TxfmParam *txfm_param, int is_col,
540 const tran_high_t **lgtmtx, int ntx) {
541 int tx_length = is_col ? tx_size_high[txfm_param->tx_size]
542 : tx_size_wide[txfm_param->tx_size];
543 assert(tx_length == 16 || tx_length == 32);
544 PREDICTION_MODE mode = txfm_param->mode;
545
546 (void)ntx;
547 const tran_high_t *dctmtx =
548 tx_length == 16 ? &lgt16_000[0][0] : &lgt32_000[0][0];
549 const tran_high_t *adstmtx =
550 tx_length == 16 ? &lgt16_200[0][0] : &lgt32_200[0][0];
551
552 switch (mode) {
553 case DC_PRED:
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700554 case PAETH_PRED:
Lester Lu432012f2017-08-17 14:39:29 -0700555 case SMOOTH_PRED:
556 // prediction from both top and left -> ADST
557 lgtmtx[0] = adstmtx;
558 break;
559 case V_PRED:
560 case D45_PRED:
561 case D63_PRED:
562 case D117_PRED:
563#if CONFIG_SMOOTH_HV
564 case SMOOTH_V_PRED:
565#endif
566 // prediction from the top more than from the left -> ADST
567 lgtmtx[0] = is_col ? adstmtx : dctmtx;
568 break;
569 case H_PRED:
570 case D135_PRED:
571 case D153_PRED:
572 case D207_PRED:
573#if CONFIG_SMOOTH_HV
574 case SMOOTH_H_PRED:
575#endif
576 // prediction from the left more than from the top -> DCT
577 lgtmtx[0] = is_col ? dctmtx : adstmtx;
578 break;
579 default: lgtmtx[0] = dctmtx; break;
580 }
581}
582
583typedef void (*IlgtFunc)(const tran_low_t *input, tran_low_t *output,
584 const tran_high_t *lgtmtx);
585
586static IlgtFunc ilgt_func[4] = { ilgt4, ilgt8, ilgt16up, ilgt16up };
587
588typedef void (*GetLgtFunc)(const TxfmParam *txfm_param, int is_col,
589 const tran_high_t **lgtmtx, int ntx);
590
591static GetLgtFunc get_lgt_func[4] = { get_lgt4_from_pred, get_lgt8_from_pred,
592 get_lgt16up_from_pred,
593 get_lgt16up_from_pred };
594
595// this inline function corresponds to the up scaling before the transpose
596// operation in the av1_iht* functions
597static INLINE tran_low_t inv_upscale_wrt_txsize(const tran_high_t val,
598 const TX_SIZE tx_size) {
599 switch (tx_size) {
600 case TX_4X4:
601 case TX_8X8:
602 case TX_4X16:
603 case TX_16X4:
604 case TX_8X32:
605 case TX_32X8: return (tran_low_t)val;
606 case TX_4X8:
607 case TX_8X4:
608 case TX_8X16:
609 case TX_16X8: return (tran_low_t)dct_const_round_shift(val * Sqrt2);
610 default: assert(0); break;
611 }
612 return 0;
613}
614
615// This inline function corresponds to the bit shift before summing with the
616// destination in the av1_iht* functions
617static INLINE tran_low_t inv_downscale_wrt_txsize(const tran_low_t val,
618 const TX_SIZE tx_size) {
619 switch (tx_size) {
620 case TX_4X4: return ROUND_POWER_OF_TWO(val, 4);
621 case TX_4X8:
622 case TX_8X4:
623 case TX_8X8:
624 case TX_4X16:
625 case TX_16X4: return ROUND_POWER_OF_TWO(val, 5);
626 case TX_8X16:
627 case TX_16X8:
628 case TX_8X32:
629 case TX_32X8: return ROUND_POWER_OF_TWO(val, 6);
630 default: assert(0); break;
631 }
632 return 0;
633}
634
635void ilgt2d_from_pred_add(const tran_low_t *input, uint8_t *dest, int stride,
636 const TxfmParam *txfm_param) {
637 const TX_SIZE tx_size = txfm_param->tx_size;
638 const int w = tx_size_wide[tx_size];
639 const int h = tx_size_high[tx_size];
640 const int wlog2 = tx_size_wide_log2[tx_size];
641 const int hlog2 = tx_size_high_log2[tx_size];
642 assert(w <= 8 || h <= 8);
643
644 int i, j;
645 // largest 1D size allowed for LGT: 32
646 // largest 2D size allowed for LGT: 8x32=256
647 tran_low_t tmp[256], out[256], temp1d[32];
648 const tran_high_t *lgtmtx_col[1];
649 const tran_high_t *lgtmtx_row[1];
650 get_lgt_func[hlog2 - 2](txfm_param, 1, lgtmtx_col, w);
651 get_lgt_func[wlog2 - 2](txfm_param, 0, lgtmtx_row, h);
652
653// for inverse transform, to be consistent with av1_iht functions, we always
654// apply row transforms first and column transforms second, but both
655// row-first and column-first versions are implemented here for future
656// tests (use different lgtmtx_col[i], and choose row or column tx first
657// depending on transforms).
658#if 1
659 // inverse column transforms
660 for (i = 0; i < w; ++i) {
661 // transpose
662 for (j = 0; j < h; ++j) tmp[i * h + j] = input[j * w + i];
663 ilgt_func[hlog2 - 2](&tmp[i * h], temp1d, lgtmtx_col[0]);
664 // upscale, and store in place
665 for (j = 0; j < h; ++j)
666 tmp[i * h + j] = inv_upscale_wrt_txsize(temp1d[j], tx_size);
667 }
668 // inverse row transforms
669 for (i = 0; i < h; ++i) {
670 for (j = 0; j < w; ++j) temp1d[j] = tmp[j * h + i];
671 ilgt_func[wlog2 - 2](temp1d, &out[i * w], lgtmtx_row[0]);
672 }
673 // downscale + sum with the destination
674 for (i = 0; i < h; ++i) {
675 for (j = 0; j < w; ++j) {
676 int d = i * stride + j;
677 int s = i * w + j;
678 dest[d] =
679 clip_pixel_add(dest[d], inv_downscale_wrt_txsize(out[s], tx_size));
680 }
681 }
682#else
683 // inverse row transforms
684 for (i = 0; i < h; ++i) {
685 ilgt_func[wlog2 - 2](input, temp1d, lgtmtx_row[0]);
686 // upscale and transpose (tmp[j*h+i] <--> tmp[j][i])
687 for (j = 0; j < w; ++j)
688 tmp[j * h + i] = inv_upscale_wrt_txsize(temp1d[j], tx_size);
689 input += w;
690 }
691 // inverse column transforms
692 for (i = 0; i < w; ++i)
693 ilgt_func[hlog2 - 2](&tmp[i * h], &out[i * h], lgtmtx_col[0]);
694 // here, out[] is the transpose of 2D block of transform coefficients
695
696 // downscale + transform + sum with dest
697 for (i = 0; i < h; ++i) {
698 for (j = 0; j < w; ++j) {
699 int d = i * stride + j;
700 int s = j * h + i;
701 dest[d] =
702 clip_pixel_add(dest[d], inv_downscale_wrt_txsize(out[s], tx_size));
703 }
704 }
705#endif
706}
707#endif // CONFIG_LGT_FROM_PRED
708
Yaowu Xuf883b422016-08-30 14:01:10 -0700709void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -0700710 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -0700711 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -0700712#if CONFIG_MRC_TX
713 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
714#endif // CONFIG_MRC_TX
Nathan E. Eggee554f362017-10-04 14:44:38 -0400715#if !CONFIG_DAALA_TX4
Monty Montgomery554d2c32017-07-11 21:01:07 -0400716 if (tx_type == DCT_DCT) {
717 aom_idct4x4_16_add(input, dest, stride);
718 return;
719 }
Monty Montgomery02078a32017-07-11 21:22:29 -0400720#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700721 static const transform_2d IHT_4[] = {
Nathan E. Eggee554f362017-10-04 14:44:38 -0400722#if CONFIG_DAALA_TX4
Nathan E. Egge1aefb5e2017-09-16 11:28:41 -0400723 { daala_idct4, daala_idct4 }, // DCT_DCT = 0
724 { daala_idst4, daala_idct4 }, // ADST_DCT = 1
725 { daala_idct4, daala_idst4 }, // DCT_ADST = 2
726 { daala_idst4, daala_idst4 }, // ADST_ADST = 3
Nathan E. Egge5e6bda82017-09-16 10:13:51 -0400727#if CONFIG_EXT_TX
Nathan E. Egge1aefb5e2017-09-16 11:28:41 -0400728 { daala_idst4, daala_idct4 }, // FLIPADST_DCT
729 { daala_idct4, daala_idst4 }, // DCT_FLIPADST
730 { daala_idst4, daala_idst4 }, // FLIPADST_FLIPADST
731 { daala_idst4, daala_idst4 }, // ADST_FLIPADST
732 { daala_idst4, daala_idst4 }, // FLIPADST_ADST
Nathan E. Egge31f24ee2017-09-18 11:25:26 -0400733 { daala_idtx4, daala_idtx4 }, // IDTX
734 { daala_idct4, daala_idtx4 }, // V_DCT
735 { daala_idtx4, daala_idct4 }, // H_DCT
736 { daala_idst4, daala_idtx4 }, // V_ADST
737 { daala_idtx4, daala_idst4 }, // H_ADST
738 { daala_idst4, daala_idtx4 }, // V_FLIPADST
739 { daala_idtx4, daala_idst4 }, // H_FLIPADST
Nathan E. Egge5e6bda82017-09-16 10:13:51 -0400740#endif
741#else
Luca Barbatof0f98572016-09-03 12:14:15 +0200742 { aom_idct4_c, aom_idct4_c }, // DCT_DCT = 0
743 { aom_iadst4_c, aom_idct4_c }, // ADST_DCT = 1
744 { aom_idct4_c, aom_iadst4_c }, // DCT_ADST = 2
745 { aom_iadst4_c, aom_iadst4_c }, // ADST_ADST = 3
Yaowu Xuc27fc142016-08-22 16:08:15 -0700746#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200747 { aom_iadst4_c, aom_idct4_c }, // FLIPADST_DCT
748 { aom_idct4_c, aom_iadst4_c }, // DCT_FLIPADST
749 { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_FLIPADST
750 { aom_iadst4_c, aom_iadst4_c }, // ADST_FLIPADST
751 { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_ADST
752 { iidtx4_c, iidtx4_c }, // IDTX
753 { aom_idct4_c, iidtx4_c }, // V_DCT
754 { iidtx4_c, aom_idct4_c }, // H_DCT
755 { aom_iadst4_c, iidtx4_c }, // V_ADST
756 { iidtx4_c, aom_iadst4_c }, // H_ADST
757 { aom_iadst4_c, iidtx4_c }, // V_FLIPADST
758 { iidtx4_c, aom_iadst4_c }, // H_FLIPADST
Lester Lud8b1ddc2017-07-06 16:13:29 -0700759#endif
Nathan E. Egge5e6bda82017-09-16 10:13:51 -0400760#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700761 };
762
763 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100764 tran_low_t tmp[4][4];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700765 tran_low_t out[4][4];
766 tran_low_t *outp = &out[0][0];
767 int outstride = 4;
768
Monty Montgomerycb55dad2017-07-11 16:59:52 -0400769#if CONFIG_DCT_ONLY
770 assert(tx_type == DCT_DCT);
771#endif
772
Lester Lu708c1ec2017-06-14 14:54:49 -0700773#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -0700774 const tran_high_t *lgtmtx_col[1];
775 const tran_high_t *lgtmtx_row[1];
776 int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
777 int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
Lester Lu708c1ec2017-06-14 14:54:49 -0700778#endif
779
Yaowu Xuc27fc142016-08-22 16:08:15 -0700780 // inverse transform row vectors
781 for (i = 0; i < 4; ++i) {
Nathan E. Eggee554f362017-10-04 14:44:38 -0400782#if CONFIG_DAALA_TX4
Monty Montgomery02078a32017-07-11 21:22:29 -0400783 tran_low_t temp_in[4];
Sebastien Alaiwan77323262017-08-21 11:34:56 +0200784 for (j = 0; j < 4; j++) temp_in[j] = input[j] * 2;
Monty Montgomery02078a32017-07-11 21:22:29 -0400785 IHT_4[tx_type].rows(temp_in, out[i]);
786#else
Lester Lu708c1ec2017-06-14 14:54:49 -0700787#if CONFIG_LGT
788 if (use_lgt_row)
Lester Lu918fe692017-08-17 14:39:29 -0700789 ilgt4(input, out[i], lgtmtx_row[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -0700790 else
791#endif
792 IHT_4[tx_type].rows(input, out[i]);
Monty Montgomery02078a32017-07-11 21:22:29 -0400793#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700794 input += 4;
795 }
796
797 // transpose
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100798 for (i = 0; i < 4; i++) {
799 for (j = 0; j < 4; j++) {
800 tmp[j][i] = out[i][j];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700801 }
802 }
803
804 // inverse transform column vectors
805 for (i = 0; i < 4; ++i) {
Lester Lu708c1ec2017-06-14 14:54:49 -0700806#if CONFIG_LGT
807 if (use_lgt_col)
Lester Lu918fe692017-08-17 14:39:29 -0700808 ilgt4(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -0700809 else
810#endif
811 IHT_4[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700812 }
813
814#if CONFIG_EXT_TX
815 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
816#endif
817
818 // Sum with the destination
819 for (i = 0; i < 4; ++i) {
820 for (j = 0; j < 4; ++j) {
821 int d = i * stride + j;
822 int s = j * outstride + i;
Nathan E. Eggee554f362017-10-04 14:44:38 -0400823#if CONFIG_DAALA_TX4
Yaowu Xuc27fc142016-08-22 16:08:15 -0700824 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
Monty Montgomery02078a32017-07-11 21:22:29 -0400825#else
826 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
827#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700828 }
829 }
830}
831
Yaowu Xuf883b422016-08-30 14:01:10 -0700832void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -0700833 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -0700834 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -0700835#if CONFIG_MRC_TX
836 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
837#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -0400838#if CONFIG_DCT_ONLY
839 assert(tx_type == DCT_DCT);
840#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700841 static const transform_2d IHT_4x8[] = {
Monty Montgomeryabd94512017-10-14 00:41:42 -0400842#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
843 { daala_idct8, daala_idct4 }, // DCT_DCT = 0
844 { daala_idst8, daala_idct4 }, // ADST_DCT = 1
845 { daala_idct8, daala_idst4 }, // DCT_ADST = 2
846 { daala_idst8, daala_idst4 }, // ADST_ADST = 3
847#if CONFIG_EXT_TX
848 { daala_idst8, daala_idct4 }, // FLIPADST_DCT
849 { daala_idct8, daala_idst4 }, // DCT_FLIPADST
850 { daala_idst8, daala_idst4 }, // FLIPADST_FLIPADST
851 { daala_idst8, daala_idst4 }, // ADST_FLIPADST
852 { daala_idst8, daala_idst4 }, // FLIPADST_ADST
853 { daala_idtx8, daala_idtx4 }, // IDTX
854 { daala_idct8, daala_idtx4 }, // V_DCT
855 { daala_idtx8, daala_idct4 }, // H_DCT
856 { daala_idst8, daala_idtx4 }, // V_ADST
857 { daala_idtx8, daala_idst4 }, // H_ADST
858 { daala_idst8, daala_idtx4 }, // V_FLIPADST
859 { daala_idtx8, daala_idst4 }, // H_FLIPADST
860#endif
861#else
Luca Barbatof0f98572016-09-03 12:14:15 +0200862 { aom_idct8_c, aom_idct4_c }, // DCT_DCT
863 { aom_iadst8_c, aom_idct4_c }, // ADST_DCT
864 { aom_idct8_c, aom_iadst4_c }, // DCT_ADST
865 { aom_iadst8_c, aom_iadst4_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -0700866#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +0200867 { aom_iadst8_c, aom_idct4_c }, // FLIPADST_DCT
868 { aom_idct8_c, aom_iadst4_c }, // DCT_FLIPADST
869 { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_FLIPADST
870 { aom_iadst8_c, aom_iadst4_c }, // ADST_FLIPADST
871 { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_ADST
872 { iidtx8_c, iidtx4_c }, // IDTX
873 { aom_idct8_c, iidtx4_c }, // V_DCT
874 { iidtx8_c, aom_idct4_c }, // H_DCT
875 { aom_iadst8_c, iidtx4_c }, // V_ADST
876 { iidtx8_c, aom_iadst4_c }, // H_ADST
877 { aom_iadst8_c, iidtx4_c }, // V_FLIPADST
878 { iidtx8_c, aom_iadst4_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -0700879#endif
Monty Montgomeryabd94512017-10-14 00:41:42 -0400880#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700881 };
882
883 const int n = 4;
884 const int n2 = 8;
885 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100886 tran_low_t out[4][8], tmp[4][8], outtmp[4];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700887 tran_low_t *outp = &out[0][0];
888 int outstride = n2;
889
Lester Lu708c1ec2017-06-14 14:54:49 -0700890#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -0700891 const tran_high_t *lgtmtx_col[1];
892 const tran_high_t *lgtmtx_row[1];
893 int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
894 int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
Lester Lu708c1ec2017-06-14 14:54:49 -0700895#endif
896
Monty Montgomeryabd94512017-10-14 00:41:42 -0400897 // Multi-way scaling matrix (bits):
898 // LGT/AV1 row,col input+0, rowTX+.5, mid+.5, colTX+1, out-5 == -3
899 // LGT row, Daala col input+0, rowTX+.5, mid+.5, colTX+0, out-4 == -3
900 // Daala row, LGT col input+1, rowTX+0, mid+0, colTX+1, out-5 == -3
901 // Daala row,col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3
902
Yaowu Xuc27fc142016-08-22 16:08:15 -0700903 // inverse transform row vectors and transpose
904 for (i = 0; i < n2; ++i) {
Lester Lu708c1ec2017-06-14 14:54:49 -0700905#if CONFIG_LGT
Monty Montgomeryabd94512017-10-14 00:41:42 -0400906 if (use_lgt_row) {
907 // Scaling cases 1 and 2 above
908 // No input scaling
909 // Row transform (LGT; scales up .5 bits)
Lester Lu918fe692017-08-17 14:39:29 -0700910 ilgt4(input, outtmp, lgtmtx_row[0]);
Monty Montgomeryabd94512017-10-14 00:41:42 -0400911 // Transpose and mid scaling up by .5 bit
912 for (j = 0; j < n; ++j)
913 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
914 } else {
Lester Lu708c1ec2017-06-14 14:54:49 -0700915#endif
Monty Montgomeryabd94512017-10-14 00:41:42 -0400916#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
917 // Daala row transform; Scaling cases 3 and 4 above
918 tran_low_t temp_in[4];
919 // Input scaling up by 1 bit
920 for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
921 // Row transform; Daala does not scale
922 IHT_4x8[tx_type].rows(temp_in, outtmp);
923 // Transpose; no mid scaling
924 for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
925#else
926 // AV1 row transform; Scaling case 1 only
927 // Row transform (AV1 scales up .5 bits)
928 IHT_4x8[tx_type].rows(input, outtmp);
929 // Transpose and mid scaling up by .5 bit
Yaowu Xuc27fc142016-08-22 16:08:15 -0700930 for (j = 0; j < n; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +0100931 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Monty Montgomeryabd94512017-10-14 00:41:42 -0400932#endif
933#if CONFIG_LGT
934 }
935#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700936 input += n;
937 }
938
939 // inverse transform column vectors
Monty Montgomeryabd94512017-10-14 00:41:42 -0400940 // AV1/LGT column TX scales up by 1 bit, Daala does not scale
Yaowu Xuc27fc142016-08-22 16:08:15 -0700941 for (i = 0; i < n; ++i) {
Lester Lu708c1ec2017-06-14 14:54:49 -0700942#if CONFIG_LGT
943 if (use_lgt_col)
Lester Lu918fe692017-08-17 14:39:29 -0700944 ilgt8(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -0700945 else
946#endif
947 IHT_4x8[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700948 }
949
Jingning Hanec419e02016-11-01 18:19:30 -0700950#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -0700951 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Hanec419e02016-11-01 18:19:30 -0700952#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700953
954 // Sum with the destination
955 for (i = 0; i < n2; ++i) {
956 for (j = 0; j < n; ++j) {
957 int d = i * stride + j;
958 int s = j * outstride + i;
Monty Montgomeryabd94512017-10-14 00:41:42 -0400959#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
960#if CONFIG_LGT
961 if (use_lgt_col)
962 // Output Scaling cases 1, 3
963 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
964 else
965#endif
966 // Output scaling cases 2, 4
967 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
968#else
969 // Output scaling case 1 only
Yaowu Xuc27fc142016-08-22 16:08:15 -0700970 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
Monty Montgomeryabd94512017-10-14 00:41:42 -0400971#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700972 }
973 }
974}
975
Yaowu Xuf883b422016-08-30 14:01:10 -0700976void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -0700977 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -0700978 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -0700979#if CONFIG_MRC_TX
980 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
981#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -0400982#if CONFIG_DCT_ONLY
983 assert(tx_type == DCT_DCT);
984#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700985 static const transform_2d IHT_8x4[] = {
Monty Montgomeryabd94512017-10-14 00:41:42 -0400986#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
987 { daala_idct4, daala_idct8 }, // DCT_DCT = 0
988 { daala_idst4, daala_idct8 }, // ADST_DCT = 1
989 { daala_idct4, daala_idst8 }, // DCT_ADST = 2
990 { daala_idst4, daala_idst8 }, // ADST_ADST = 3
991#if CONFIG_EXT_TX
992 { daala_idst4, daala_idct8 }, // FLIPADST_DCT
993 { daala_idct4, daala_idst8 }, // DCT_FLIPADST
994 { daala_idst4, daala_idst8 }, // FLIPADST_FLIPADST
995 { daala_idst4, daala_idst8 }, // ADST_FLIPADST
996 { daala_idst4, daala_idst8 }, // FLIPADST_ADST
997 { daala_idtx4, daala_idtx8 }, // IDTX
998 { daala_idct4, daala_idtx8 }, // V_DCT
999 { daala_idtx4, daala_idct8 }, // H_DCT
1000 { daala_idst4, daala_idtx8 }, // V_ADST
1001 { daala_idtx4, daala_idst8 }, // H_ADST
1002 { daala_idst4, daala_idtx8 }, // V_FLIPADST
1003 { daala_idtx4, daala_idst8 }, // H_FLIPADST
1004#endif
1005#else
Luca Barbatof0f98572016-09-03 12:14:15 +02001006 { aom_idct4_c, aom_idct8_c }, // DCT_DCT
1007 { aom_iadst4_c, aom_idct8_c }, // ADST_DCT
1008 { aom_idct4_c, aom_iadst8_c }, // DCT_ADST
1009 { aom_iadst4_c, aom_iadst8_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -07001010#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +02001011 { aom_iadst4_c, aom_idct8_c }, // FLIPADST_DCT
1012 { aom_idct4_c, aom_iadst8_c }, // DCT_FLIPADST
1013 { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_FLIPADST
1014 { aom_iadst4_c, aom_iadst8_c }, // ADST_FLIPADST
1015 { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_ADST
1016 { iidtx4_c, iidtx8_c }, // IDTX
1017 { aom_idct4_c, iidtx8_c }, // V_DCT
1018 { iidtx4_c, aom_idct8_c }, // H_DCT
1019 { aom_iadst4_c, iidtx8_c }, // V_ADST
1020 { iidtx4_c, aom_iadst8_c }, // H_ADST
1021 { aom_iadst4_c, iidtx8_c }, // V_FLIPADST
1022 { iidtx4_c, aom_iadst8_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -07001023#endif
Monty Montgomeryabd94512017-10-14 00:41:42 -04001024#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001025 };
Lester Lud8b1ddc2017-07-06 16:13:29 -07001026
Yaowu Xuc27fc142016-08-22 16:08:15 -07001027 const int n = 4;
1028 const int n2 = 8;
1029
1030 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001031 tran_low_t out[8][4], tmp[8][4], outtmp[8];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001032 tran_low_t *outp = &out[0][0];
1033 int outstride = n;
1034
Lester Lu708c1ec2017-06-14 14:54:49 -07001035#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -07001036 const tran_high_t *lgtmtx_col[1];
1037 const tran_high_t *lgtmtx_row[1];
1038 int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
1039 int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
Lester Lu708c1ec2017-06-14 14:54:49 -07001040#endif
1041
Monty Montgomeryabd94512017-10-14 00:41:42 -04001042 // Multi-way scaling matrix (bits):
1043 // LGT/AV1 row,col input+0, rowTX+1, mid+.5, colTX+.5, out-5 == -3
1044 // LGT row, Daala col input+0, rowTX+1, mid+.5, colTX+.5, out-4 == -3
1045 // Daala row, LGT col input+1, rowTX+0, mid+0, colTX+1, out-5 == -3
1046 // Daala row,col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3
1047
Yaowu Xuc27fc142016-08-22 16:08:15 -07001048 // inverse transform row vectors and transpose
1049 for (i = 0; i < n; ++i) {
Lester Lu708c1ec2017-06-14 14:54:49 -07001050#if CONFIG_LGT
Monty Montgomeryabd94512017-10-14 00:41:42 -04001051 if (use_lgt_row) {
1052 // Scaling cases 1 and 2 above
1053 // No input scaling
1054 // Row transform (LGT; scales up 1 bit)
Lester Lu918fe692017-08-17 14:39:29 -07001055 ilgt8(input, outtmp, lgtmtx_row[0]);
Monty Montgomeryabd94512017-10-14 00:41:42 -04001056 // Transpose and mid scaling up by .5 bit
1057 for (j = 0; j < n2; ++j)
1058 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
1059 } else {
Lester Lu708c1ec2017-06-14 14:54:49 -07001060#endif
Monty Montgomeryabd94512017-10-14 00:41:42 -04001061#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
1062 // Daala row transform; Scaling cases 3 and 4 above
1063 tran_low_t temp_in[8];
1064 // Input scaling up by 1 bit
1065 for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
1066 // Row transform; Daala does not scale
1067 IHT_8x4[tx_type].rows(temp_in, outtmp);
1068 // Transpose; no mid scaling
1069 for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j];
1070#else
1071 // AV1 row transform; Scaling case 1 only
1072 // Row transform (AV1 scales up 1 bit)
1073 IHT_8x4[tx_type].rows(input, outtmp);
1074 // Transpose and mid scaling up by .5 bit
Yaowu Xuc27fc142016-08-22 16:08:15 -07001075 for (j = 0; j < n2; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001076 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Monty Montgomeryabd94512017-10-14 00:41:42 -04001077#endif
1078#if CONFIG_LGT
1079 }
1080#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001081 input += n2;
1082 }
1083
1084 // inverse transform column vectors
Monty Montgomeryabd94512017-10-14 00:41:42 -04001085 // AV1 and LGT scale up by .5 bits; Daala does not scale
Yaowu Xuc27fc142016-08-22 16:08:15 -07001086 for (i = 0; i < n2; ++i) {
Lester Lu708c1ec2017-06-14 14:54:49 -07001087#if CONFIG_LGT
1088 if (use_lgt_col)
Lester Lu918fe692017-08-17 14:39:29 -07001089 ilgt4(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -07001090 else
1091#endif
1092 IHT_8x4[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001093 }
1094
Jingning Hanec419e02016-11-01 18:19:30 -07001095#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001096 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Hanec419e02016-11-01 18:19:30 -07001097#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001098
1099 // Sum with the destination
1100 for (i = 0; i < n; ++i) {
1101 for (j = 0; j < n2; ++j) {
1102 int d = i * stride + j;
1103 int s = j * outstride + i;
Monty Montgomeryabd94512017-10-14 00:41:42 -04001104#if CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8
1105#if CONFIG_LGT
1106 if (use_lgt_col)
1107 // Output scaling cases 1, 3
1108 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1109 else
1110#endif
1111 // Output scaling cases 2, 4
1112 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
1113#else
1114 // Output scaling case 1
Yaowu Xuc27fc142016-08-22 16:08:15 -07001115 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
Monty Montgomeryabd94512017-10-14 00:41:42 -04001116#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001117 }
1118 }
1119}
1120
Debargha Mukherjee751de382016-12-13 02:54:22 -08001121void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07001122 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07001123 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07001124#if CONFIG_MRC_TX
1125 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1126#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04001127#if CONFIG_DCT_ONLY
1128 assert(tx_type == DCT_DCT);
1129#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08001130 static const transform_2d IHT_4x16[] = {
1131 { aom_idct16_c, aom_idct4_c }, // DCT_DCT
1132 { aom_iadst16_c, aom_idct4_c }, // ADST_DCT
1133 { aom_idct16_c, aom_iadst4_c }, // DCT_ADST
1134 { aom_iadst16_c, aom_iadst4_c }, // ADST_ADST
1135#if CONFIG_EXT_TX
1136 { aom_iadst16_c, aom_idct4_c }, // FLIPADST_DCT
1137 { aom_idct16_c, aom_iadst4_c }, // DCT_FLIPADST
1138 { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_FLIPADST
1139 { aom_iadst16_c, aom_iadst4_c }, // ADST_FLIPADST
1140 { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_ADST
1141 { iidtx16_c, iidtx4_c }, // IDTX
1142 { aom_idct16_c, iidtx4_c }, // V_DCT
1143 { iidtx16_c, aom_idct4_c }, // H_DCT
1144 { aom_iadst16_c, iidtx4_c }, // V_ADST
1145 { iidtx16_c, aom_iadst4_c }, // H_ADST
1146 { aom_iadst16_c, iidtx4_c }, // V_FLIPADST
1147 { iidtx16_c, aom_iadst4_c }, // H_FLIPADST
1148#endif
1149 };
1150
1151 const int n = 4;
1152 const int n4 = 16;
1153 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001154 tran_low_t out[4][16], tmp[4][16], outtmp[4];
Debargha Mukherjee751de382016-12-13 02:54:22 -08001155 tran_low_t *outp = &out[0][0];
1156 int outstride = n4;
1157
Lester Lu708c1ec2017-06-14 14:54:49 -07001158#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -07001159 const tran_high_t *lgtmtx_row[1];
1160 int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
Lester Lu708c1ec2017-06-14 14:54:49 -07001161#endif
1162
Debargha Mukherjee751de382016-12-13 02:54:22 -08001163 // inverse transform row vectors and transpose
1164 for (i = 0; i < n4; ++i) {
Lester Lu708c1ec2017-06-14 14:54:49 -07001165#if CONFIG_LGT
1166 if (use_lgt_row)
Lester Lu918fe692017-08-17 14:39:29 -07001167 ilgt4(input, outtmp, lgtmtx_row[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -07001168 else
1169#endif
1170 IHT_4x16[tx_type].rows(input, outtmp);
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001171 for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
Debargha Mukherjee751de382016-12-13 02:54:22 -08001172 input += n;
1173 }
1174
1175 // inverse transform column vectors
Lester Lu708c1ec2017-06-14 14:54:49 -07001176 for (i = 0; i < n; ++i) {
1177 IHT_4x16[tx_type].cols(tmp[i], out[i]);
1178 }
Debargha Mukherjee751de382016-12-13 02:54:22 -08001179
1180#if CONFIG_EXT_TX
1181 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
1182#endif
1183
1184 // Sum with the destination
1185 for (i = 0; i < n4; ++i) {
1186 for (j = 0; j < n; ++j) {
1187 int d = i * stride + j;
1188 int s = j * outstride + i;
1189 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1190 }
1191 }
1192}
1193
1194void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07001195 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07001196 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07001197#if CONFIG_MRC_TX
1198 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1199#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04001200#if CONFIG_DCT_ONLY
1201 assert(tx_type == DCT_DCT);
1202#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08001203 static const transform_2d IHT_16x4[] = {
1204 { aom_idct4_c, aom_idct16_c }, // DCT_DCT
1205 { aom_iadst4_c, aom_idct16_c }, // ADST_DCT
1206 { aom_idct4_c, aom_iadst16_c }, // DCT_ADST
1207 { aom_iadst4_c, aom_iadst16_c }, // ADST_ADST
1208#if CONFIG_EXT_TX
1209 { aom_iadst4_c, aom_idct16_c }, // FLIPADST_DCT
1210 { aom_idct4_c, aom_iadst16_c }, // DCT_FLIPADST
1211 { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_FLIPADST
1212 { aom_iadst4_c, aom_iadst16_c }, // ADST_FLIPADST
1213 { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_ADST
1214 { iidtx4_c, iidtx16_c }, // IDTX
1215 { aom_idct4_c, iidtx16_c }, // V_DCT
1216 { iidtx4_c, aom_idct16_c }, // H_DCT
1217 { aom_iadst4_c, iidtx16_c }, // V_ADST
1218 { iidtx4_c, aom_iadst16_c }, // H_ADST
1219 { aom_iadst4_c, iidtx16_c }, // V_FLIPADST
1220 { iidtx4_c, aom_iadst16_c }, // H_FLIPADST
1221#endif
1222 };
Lester Lud8b1ddc2017-07-06 16:13:29 -07001223
Debargha Mukherjee751de382016-12-13 02:54:22 -08001224 const int n = 4;
1225 const int n4 = 16;
1226
1227 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001228 tran_low_t out[16][4], tmp[16][4], outtmp[16];
Debargha Mukherjee751de382016-12-13 02:54:22 -08001229 tran_low_t *outp = &out[0][0];
1230 int outstride = n;
1231
Lester Lu708c1ec2017-06-14 14:54:49 -07001232#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -07001233 const tran_high_t *lgtmtx_col[1];
1234 int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
Lester Lu708c1ec2017-06-14 14:54:49 -07001235#endif
1236
Debargha Mukherjee751de382016-12-13 02:54:22 -08001237 // inverse transform row vectors and transpose
1238 for (i = 0; i < n; ++i) {
1239 IHT_16x4[tx_type].rows(input, outtmp);
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001240 for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
Debargha Mukherjee751de382016-12-13 02:54:22 -08001241 input += n4;
1242 }
1243
1244 // inverse transform column vectors
Lester Lu708c1ec2017-06-14 14:54:49 -07001245 for (i = 0; i < n4; ++i) {
1246#if CONFIG_LGT
1247 if (use_lgt_col)
Lester Lu918fe692017-08-17 14:39:29 -07001248 ilgt4(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -07001249 else
1250#endif
1251 IHT_16x4[tx_type].cols(tmp[i], out[i]);
1252 }
Debargha Mukherjee751de382016-12-13 02:54:22 -08001253
1254#if CONFIG_EXT_TX
1255 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
1256#endif
1257
1258 // Sum with the destination
1259 for (i = 0; i < n; ++i) {
1260 for (j = 0; j < n4; ++j) {
1261 int d = i * stride + j;
1262 int s = j * outstride + i;
1263 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1264 }
1265 }
1266}
1267
Yaowu Xuf883b422016-08-30 14:01:10 -07001268void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07001269 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07001270 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07001271#if CONFIG_MRC_TX
1272 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1273#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04001274#if CONFIG_DCT_ONLY
1275 assert(tx_type == DCT_DCT);
1276#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001277 static const transform_2d IHT_8x16[] = {
Monty Montgomery7eb44542017-10-19 20:47:51 -04001278#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
1279 { daala_idct16, daala_idct8 }, // DCT_DCT = 0
1280 { daala_idst16, daala_idct8 }, // ADST_DCT = 1
1281 { daala_idct16, daala_idst8 }, // DCT_ADST = 2
1282 { daala_idst16, daala_idst8 }, // ADST_ADST = 3
1283#if CONFIG_EXT_TX
1284 { daala_idst16, daala_idct8 }, // FLIPADST_DCT
1285 { daala_idct16, daala_idst8 }, // DCT_FLIPADST
1286 { daala_idst16, daala_idst8 }, // FLIPADST_FLIPADST
1287 { daala_idst16, daala_idst8 }, // ADST_FLIPADST
1288 { daala_idst16, daala_idst8 }, // FLIPADST_ADST
1289 { daala_idtx16, daala_idtx8 }, // IDTX
1290 { daala_idct16, daala_idtx8 }, // V_DCT
1291 { daala_idtx16, daala_idct8 }, // H_DCT
1292 { daala_idst16, daala_idtx8 }, // V_ADST
1293 { daala_idtx16, daala_idst8 }, // H_ADST
1294 { daala_idst16, daala_idtx8 }, // V_FLIPADST
1295 { daala_idtx16, daala_idst8 }, // H_FLIPADST
1296#endif
1297#else
Luca Barbatof0f98572016-09-03 12:14:15 +02001298 { aom_idct16_c, aom_idct8_c }, // DCT_DCT
1299 { aom_iadst16_c, aom_idct8_c }, // ADST_DCT
1300 { aom_idct16_c, aom_iadst8_c }, // DCT_ADST
1301 { aom_iadst16_c, aom_iadst8_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -07001302#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +02001303 { aom_iadst16_c, aom_idct8_c }, // FLIPADST_DCT
1304 { aom_idct16_c, aom_iadst8_c }, // DCT_FLIPADST
1305 { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_FLIPADST
1306 { aom_iadst16_c, aom_iadst8_c }, // ADST_FLIPADST
1307 { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_ADST
1308 { iidtx16_c, iidtx8_c }, // IDTX
1309 { aom_idct16_c, iidtx8_c }, // V_DCT
1310 { iidtx16_c, aom_idct8_c }, // H_DCT
1311 { aom_iadst16_c, iidtx8_c }, // V_ADST
1312 { iidtx16_c, aom_iadst8_c }, // H_ADST
1313 { aom_iadst16_c, iidtx8_c }, // V_FLIPADST
1314 { iidtx16_c, aom_iadst8_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -07001315#endif
Monty Montgomery7eb44542017-10-19 20:47:51 -04001316#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001317 };
1318
1319 const int n = 8;
1320 const int n2 = 16;
1321 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001322 tran_low_t out[8][16], tmp[8][16], outtmp[8];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001323 tran_low_t *outp = &out[0][0];
1324 int outstride = n2;
1325
Lester Lu708c1ec2017-06-14 14:54:49 -07001326#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -07001327 const tran_high_t *lgtmtx_row[1];
1328 int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
Lester Lu708c1ec2017-06-14 14:54:49 -07001329#endif
1330
Monty Montgomery7eb44542017-10-19 20:47:51 -04001331 // Multi-way scaling matrix (bits):
1332 // LGT/AV1 row, AV1 col input+0, rowTX+1, mid+.5, colTX+1.5, out-6 == -3
1333 // LGT row, Daala col input+0, rowTX+1, mid+0, colTX+0, out-4 == -3
1334 // Daala row, LGT col N/A (no 16-point LGT)
1335 // Daala row,col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3
1336
Yaowu Xuc27fc142016-08-22 16:08:15 -07001337 // inverse transform row vectors and transpose
1338 for (i = 0; i < n2; ++i) {
Lester Lu708c1ec2017-06-14 14:54:49 -07001339#if CONFIG_LGT
Monty Montgomery7eb44542017-10-19 20:47:51 -04001340 if (use_lgt_row) {
1341 // Scaling cases 1 and 2 above
1342 // No input scaling
1343 // Row transform (LGT; scales up 1 bit)
Lester Lu918fe692017-08-17 14:39:29 -07001344 ilgt8(input, outtmp, lgtmtx_row[0]);
Monty Montgomery7eb44542017-10-19 20:47:51 -04001345 // Transpose and mid scaling
1346 for (j = 0; j < n; ++j) {
1347#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
1348 // Mid scaling case 2
1349 tmp[j][i] = outtmp[j];
1350#else
1351 // Mid scaling case 1
1352 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Lester Lu708c1ec2017-06-14 14:54:49 -07001353#endif
Monty Montgomery7eb44542017-10-19 20:47:51 -04001354 }
1355 } else {
1356#endif
1357#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
1358 tran_low_t temp_in[8];
1359 // Input scaling case 4
1360 for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
1361 // Row transform (Daala does not scale)
1362 IHT_8x16[tx_type].rows(temp_in, outtmp);
1363 // Transpose (no mid scaling)
1364 for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
1365#else
1366 // Case 1; no input scaling
1367 // Row transform (AV1 scales up 1 bit)
1368 IHT_8x16[tx_type].rows(input, outtmp);
1369 // Transpose and mid scaling up .5 bits
Yaowu Xuc27fc142016-08-22 16:08:15 -07001370 for (j = 0; j < n; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001371 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Monty Montgomery7eb44542017-10-19 20:47:51 -04001372#endif
1373#if CONFIG_LGT
1374 }
1375#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001376 input += n;
1377 }
1378
1379 // inverse transform column vectors
Monty Montgomery7eb44542017-10-19 20:47:51 -04001380 // AV1 column TX scales up by 1.5 bit, Daala does not scale
Yaowu Xuc27fc142016-08-22 16:08:15 -07001381 for (i = 0; i < n; ++i) {
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001382 IHT_8x16[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001383 }
1384
Jingning Hanec419e02016-11-01 18:19:30 -07001385#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001386 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Hanec419e02016-11-01 18:19:30 -07001387#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001388
1389 // Sum with the destination
1390 for (i = 0; i < n2; ++i) {
1391 for (j = 0; j < n; ++j) {
1392 int d = i * stride + j;
1393 int s = j * outstride + i;
Monty Montgomery7eb44542017-10-19 20:47:51 -04001394#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
1395 // Output scaling cases 2 and 4
1396 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
1397#else
1398 // Output scaling case 1
Yaowu Xuc27fc142016-08-22 16:08:15 -07001399 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
Monty Montgomery7eb44542017-10-19 20:47:51 -04001400#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001401 }
1402 }
1403}
1404
Yaowu Xuf883b422016-08-30 14:01:10 -07001405void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07001406 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07001407 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07001408#if CONFIG_MRC_TX
1409 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1410#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04001411#if CONFIG_DCT_ONLY
1412 assert(tx_type == DCT_DCT);
1413#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001414 static const transform_2d IHT_16x8[] = {
Monty Montgomery7eb44542017-10-19 20:47:51 -04001415#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
1416 { daala_idct8, daala_idct16 }, // DCT_DCT = 0
1417 { daala_idst8, daala_idct16 }, // ADST_DCT = 1
1418 { daala_idct8, daala_idst16 }, // DCT_ADST = 2
1419 { daala_idst8, daala_idst16 }, // ADST_ADST = 3
1420#if CONFIG_EXT_TX
1421 { daala_idst8, daala_idct16 }, // FLIPADST_DCT
1422 { daala_idct8, daala_idst16 }, // DCT_FLIPADST
1423 { daala_idst8, daala_idst16 }, // FLIPADST_FLIPADST
1424 { daala_idst8, daala_idst16 }, // ADST_FLIPADST
1425 { daala_idst8, daala_idst16 }, // FLIPADST_ADST
1426 { daala_idtx8, daala_idtx16 }, // IDTX
1427 { daala_idct8, daala_idtx16 }, // V_DCT
1428 { daala_idtx8, daala_idct16 }, // H_DCT
1429 { daala_idst8, daala_idtx16 }, // V_ADST
1430 { daala_idtx8, daala_idst16 }, // H_ADST
1431 { daala_idst8, daala_idtx16 }, // V_FLIPADST
1432 { daala_idtx8, daala_idst16 }, // H_FLIPADST
1433#endif
1434#else
Luca Barbatof0f98572016-09-03 12:14:15 +02001435 { aom_idct8_c, aom_idct16_c }, // DCT_DCT
1436 { aom_iadst8_c, aom_idct16_c }, // ADST_DCT
1437 { aom_idct8_c, aom_iadst16_c }, // DCT_ADST
1438 { aom_iadst8_c, aom_iadst16_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -07001439#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +02001440 { aom_iadst8_c, aom_idct16_c }, // FLIPADST_DCT
1441 { aom_idct8_c, aom_iadst16_c }, // DCT_FLIPADST
1442 { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_FLIPADST
1443 { aom_iadst8_c, aom_iadst16_c }, // ADST_FLIPADST
1444 { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_ADST
1445 { iidtx8_c, iidtx16_c }, // IDTX
1446 { aom_idct8_c, iidtx16_c }, // V_DCT
1447 { iidtx8_c, aom_idct16_c }, // H_DCT
1448 { aom_iadst8_c, iidtx16_c }, // V_ADST
1449 { iidtx8_c, aom_iadst16_c }, // H_ADST
1450 { aom_iadst8_c, iidtx16_c }, // V_FLIPADST
1451 { iidtx8_c, aom_iadst16_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -07001452#endif
Monty Montgomery7eb44542017-10-19 20:47:51 -04001453#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001454 };
Lester Lud8b1ddc2017-07-06 16:13:29 -07001455
Yaowu Xuc27fc142016-08-22 16:08:15 -07001456 const int n = 8;
1457 const int n2 = 16;
1458
1459 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001460 tran_low_t out[16][8], tmp[16][8], outtmp[16];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001461 tran_low_t *outp = &out[0][0];
1462 int outstride = n;
1463
Lester Lu708c1ec2017-06-14 14:54:49 -07001464#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -07001465 const tran_high_t *lgtmtx_col[1];
1466 int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
Lester Lu708c1ec2017-06-14 14:54:49 -07001467#endif
1468
Monty Montgomery7eb44542017-10-19 20:47:51 -04001469 // Multi-way scaling matrix (bits):
1470 // AV1 row, LGT/AV1 col input+0, rowTX+1.5, mid+.5, colTX+1, out-6 == -3
1471 // LGT row, Daala col N/A (no 16-point LGT)
1472 // Daala row, LGT col input+1, rowTX+0, mid+1, colTX+1, out-6 == -3
1473 // Daala row, col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3
1474
Yaowu Xuc27fc142016-08-22 16:08:15 -07001475 // inverse transform row vectors and transpose
1476 for (i = 0; i < n; ++i) {
Monty Montgomery7eb44542017-10-19 20:47:51 -04001477#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
1478 tran_low_t temp_in[16];
1479 // Input scaling cases 3 and 4
1480 for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
1481 // Daala row TX, no scaling
1482 IHT_16x8[tx_type].rows(temp_in, outtmp);
1483// Transpose and mid scaling
1484#if CONFIG_LGT
1485 if (use_lgt_col)
1486 // Case 3
1487 for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j] * 2;
1488 else
1489#endif
1490 // Case 4
1491 for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j];
1492#else
1493 // Case 1
1494 // No input scaling
1495 // Row transform, AV1 scales up by 1.5 bits
Yaowu Xuc27fc142016-08-22 16:08:15 -07001496 IHT_16x8[tx_type].rows(input, outtmp);
Monty Montgomery7eb44542017-10-19 20:47:51 -04001497 // Transpose and mid scaling up .5 bits
Yaowu Xuc27fc142016-08-22 16:08:15 -07001498 for (j = 0; j < n2; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001499 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Monty Montgomery7eb44542017-10-19 20:47:51 -04001500#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001501 input += n2;
1502 }
1503
1504 // inverse transform column vectors
Monty Montgomery7eb44542017-10-19 20:47:51 -04001505 // AV!/LGT scales up by 1 bit, Daala does not scale
Yaowu Xuc27fc142016-08-22 16:08:15 -07001506 for (i = 0; i < n2; ++i) {
Lester Lu708c1ec2017-06-14 14:54:49 -07001507#if CONFIG_LGT
1508 if (use_lgt_col)
Lester Lu918fe692017-08-17 14:39:29 -07001509 ilgt8(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -07001510 else
1511#endif
1512 IHT_16x8[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001513 }
1514
Jingning Hanec419e02016-11-01 18:19:30 -07001515#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001516 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Hanec419e02016-11-01 18:19:30 -07001517#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001518
1519 // Sum with the destination
1520 for (i = 0; i < n; ++i) {
1521 for (j = 0; j < n2; ++j) {
1522 int d = i * stride + j;
1523 int s = j * outstride + i;
Monty Montgomery7eb44542017-10-19 20:47:51 -04001524// Output scaling
1525#if CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16
1526#if CONFIG_LGT
1527 if (use_lgt_col)
1528 // case 3
1529 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1530 else
1531#endif
1532 // case 4
1533 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
1534#else
1535 // case 1
Yaowu Xuc27fc142016-08-22 16:08:15 -07001536 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
Monty Montgomery7eb44542017-10-19 20:47:51 -04001537#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001538 }
1539 }
1540}
1541
Debargha Mukherjee751de382016-12-13 02:54:22 -08001542void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07001543 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07001544 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07001545#if CONFIG_MRC_TX
1546 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1547#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04001548#if CONFIG_DCT_ONLY
1549 assert(tx_type == DCT_DCT);
1550#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08001551 static const transform_2d IHT_8x32[] = {
1552 { aom_idct32_c, aom_idct8_c }, // DCT_DCT
1553 { ihalfright32_c, aom_idct8_c }, // ADST_DCT
1554 { aom_idct32_c, aom_iadst8_c }, // DCT_ADST
1555 { ihalfright32_c, aom_iadst8_c }, // ADST_ADST
1556#if CONFIG_EXT_TX
1557 { ihalfright32_c, aom_idct8_c }, // FLIPADST_DCT
1558 { aom_idct32_c, aom_iadst8_c }, // DCT_FLIPADST
1559 { ihalfright32_c, aom_iadst8_c }, // FLIPADST_FLIPADST
1560 { ihalfright32_c, aom_iadst8_c }, // ADST_FLIPADST
1561 { ihalfright32_c, aom_iadst8_c }, // FLIPADST_ADST
1562 { iidtx32_c, iidtx8_c }, // IDTX
1563 { aom_idct32_c, iidtx8_c }, // V_DCT
1564 { iidtx32_c, aom_idct8_c }, // H_DCT
1565 { ihalfright32_c, iidtx8_c }, // V_ADST
1566 { iidtx32_c, aom_iadst8_c }, // H_ADST
1567 { ihalfright32_c, iidtx8_c }, // V_FLIPADST
1568 { iidtx32_c, aom_iadst8_c }, // H_FLIPADST
1569#endif
1570 };
1571
1572 const int n = 8;
1573 const int n4 = 32;
1574 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001575 tran_low_t out[8][32], tmp[8][32], outtmp[8];
Debargha Mukherjee751de382016-12-13 02:54:22 -08001576 tran_low_t *outp = &out[0][0];
1577 int outstride = n4;
1578
Lester Lu708c1ec2017-06-14 14:54:49 -07001579#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -07001580 const tran_high_t *lgtmtx_row[1];
1581 int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
Lester Lu708c1ec2017-06-14 14:54:49 -07001582#endif
1583
Debargha Mukherjee751de382016-12-13 02:54:22 -08001584 // inverse transform row vectors and transpose
1585 for (i = 0; i < n4; ++i) {
Lester Lu708c1ec2017-06-14 14:54:49 -07001586#if CONFIG_LGT
1587 if (use_lgt_row)
Lester Lu918fe692017-08-17 14:39:29 -07001588 ilgt8(input, outtmp, lgtmtx_row[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -07001589 else
1590#endif
1591 IHT_8x32[tx_type].rows(input, outtmp);
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001592 for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
Debargha Mukherjee751de382016-12-13 02:54:22 -08001593 input += n;
1594 }
1595
1596 // inverse transform column vectors
Lester Lu708c1ec2017-06-14 14:54:49 -07001597 for (i = 0; i < n; ++i) {
1598 IHT_8x32[tx_type].cols(tmp[i], out[i]);
1599 }
Debargha Mukherjee751de382016-12-13 02:54:22 -08001600
1601#if CONFIG_EXT_TX
1602 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
1603#endif
1604
1605 // Sum with the destination
1606 for (i = 0; i < n4; ++i) {
1607 for (j = 0; j < n; ++j) {
1608 int d = i * stride + j;
1609 int s = j * outstride + i;
1610 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1611 }
1612 }
1613}
1614
1615void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07001616 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07001617 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07001618#if CONFIG_MRC_TX
1619 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1620#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04001621#if CONFIG_DCT_ONLY
1622 assert(tx_type == DCT_DCT);
1623#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08001624 static const transform_2d IHT_32x8[] = {
1625 { aom_idct8_c, aom_idct32_c }, // DCT_DCT
1626 { aom_iadst8_c, aom_idct32_c }, // ADST_DCT
1627 { aom_idct8_c, ihalfright32_c }, // DCT_ADST
1628 { aom_iadst8_c, ihalfright32_c }, // ADST_ADST
1629#if CONFIG_EXT_TX
1630 { aom_iadst8_c, aom_idct32_c }, // FLIPADST_DCT
1631 { aom_idct8_c, ihalfright32_c }, // DCT_FLIPADST
1632 { aom_iadst8_c, ihalfright32_c }, // FLIPADST_FLIPADST
1633 { aom_iadst8_c, ihalfright32_c }, // ADST_FLIPADST
1634 { aom_iadst8_c, ihalfright32_c }, // FLIPADST_ADST
1635 { iidtx8_c, iidtx32_c }, // IDTX
1636 { aom_idct8_c, iidtx32_c }, // V_DCT
1637 { iidtx8_c, aom_idct32_c }, // H_DCT
1638 { aom_iadst8_c, iidtx32_c }, // V_ADST
1639 { iidtx8_c, ihalfright32_c }, // H_ADST
1640 { aom_iadst8_c, iidtx32_c }, // V_FLIPADST
1641 { iidtx8_c, ihalfright32_c }, // H_FLIPADST
1642#endif
1643 };
Lester Lud8b1ddc2017-07-06 16:13:29 -07001644
Debargha Mukherjee751de382016-12-13 02:54:22 -08001645 const int n = 8;
1646 const int n4 = 32;
1647
1648 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001649 tran_low_t out[32][8], tmp[32][8], outtmp[32];
Debargha Mukherjee751de382016-12-13 02:54:22 -08001650 tran_low_t *outp = &out[0][0];
1651 int outstride = n;
1652
Lester Lu708c1ec2017-06-14 14:54:49 -07001653#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -07001654 const tran_high_t *lgtmtx_col[1];
1655 int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
Lester Lu708c1ec2017-06-14 14:54:49 -07001656#endif
1657
Debargha Mukherjee751de382016-12-13 02:54:22 -08001658 // inverse transform row vectors and transpose
1659 for (i = 0; i < n; ++i) {
1660 IHT_32x8[tx_type].rows(input, outtmp);
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001661 for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
Debargha Mukherjee751de382016-12-13 02:54:22 -08001662 input += n4;
1663 }
1664
1665 // inverse transform column vectors
Lester Lu708c1ec2017-06-14 14:54:49 -07001666 for (i = 0; i < n4; ++i) {
1667#if CONFIG_LGT
1668 if (use_lgt_col)
Lester Lu918fe692017-08-17 14:39:29 -07001669 ilgt8(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -07001670 else
1671#endif
1672 IHT_32x8[tx_type].cols(tmp[i], out[i]);
1673 }
Debargha Mukherjee751de382016-12-13 02:54:22 -08001674
1675#if CONFIG_EXT_TX
1676 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
1677#endif
1678
1679 // Sum with the destination
1680 for (i = 0; i < n; ++i) {
1681 for (j = 0; j < n4; ++j) {
1682 int d = i * stride + j;
1683 int s = j * outstride + i;
1684 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1685 }
1686 }
1687}
1688
Yaowu Xuf883b422016-08-30 14:01:10 -07001689void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07001690 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07001691 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07001692#if CONFIG_MRC_TX
1693 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1694#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04001695#if CONFIG_DCT_ONLY
1696 assert(tx_type == DCT_DCT);
1697#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001698 static const transform_2d IHT_16x32[] = {
Monty Montgomeryad396852017-10-20 03:35:26 -04001699#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
1700 { daala_idct32, daala_idct16 }, // DCT_DCT = 0
1701 { daala_idst32, daala_idct16 }, // ADST_DCT = 1
1702 { daala_idct32, daala_idst16 }, // DCT_ADST = 2
1703 { daala_idst32, daala_idst16 }, // ADST_ADST = 3
1704#if CONFIG_EXT_TX
1705 { daala_idst32, daala_idct16 }, // FLIPADST_DCT
1706 { daala_idct32, daala_idst16 }, // DCT_FLIPADST
1707 { daala_idst32, daala_idst16 }, // FLIPADST_FLIPADST
1708 { daala_idst32, daala_idst16 }, // ADST_FLIPADST
1709 { daala_idst32, daala_idst16 }, // FLIPADST_ADST
1710 { daala_idtx32, daala_idtx16 }, // IDTX
1711 { daala_idct32, daala_idtx16 }, // V_DCT
1712 { daala_idtx32, daala_idct16 }, // H_DCT
1713 { daala_idst32, daala_idtx16 }, // V_ADST
1714 { daala_idtx32, daala_idst16 }, // H_ADST
1715 { daala_idst32, daala_idtx16 }, // V_FLIPADST
1716 { daala_idtx32, daala_idst16 }, // H_FLIPADST
1717#endif
1718#else
Luca Barbatof0f98572016-09-03 12:14:15 +02001719 { aom_idct32_c, aom_idct16_c }, // DCT_DCT
1720 { ihalfright32_c, aom_idct16_c }, // ADST_DCT
1721 { aom_idct32_c, aom_iadst16_c }, // DCT_ADST
1722 { ihalfright32_c, aom_iadst16_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -07001723#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +02001724 { ihalfright32_c, aom_idct16_c }, // FLIPADST_DCT
1725 { aom_idct32_c, aom_iadst16_c }, // DCT_FLIPADST
1726 { ihalfright32_c, aom_iadst16_c }, // FLIPADST_FLIPADST
1727 { ihalfright32_c, aom_iadst16_c }, // ADST_FLIPADST
1728 { ihalfright32_c, aom_iadst16_c }, // FLIPADST_ADST
1729 { iidtx32_c, iidtx16_c }, // IDTX
1730 { aom_idct32_c, iidtx16_c }, // V_DCT
1731 { iidtx32_c, aom_idct16_c }, // H_DCT
1732 { ihalfright32_c, iidtx16_c }, // V_ADST
1733 { iidtx32_c, aom_iadst16_c }, // H_ADST
1734 { ihalfright32_c, iidtx16_c }, // V_FLIPADST
1735 { iidtx32_c, aom_iadst16_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -07001736#endif
Monty Montgomeryad396852017-10-20 03:35:26 -04001737#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001738 };
1739
1740 const int n = 16;
1741 const int n2 = 32;
1742 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001743 tran_low_t out[16][32], tmp[16][32], outtmp[16];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001744 tran_low_t *outp = &out[0][0];
1745 int outstride = n2;
1746
1747 // inverse transform row vectors and transpose
1748 for (i = 0; i < n2; ++i) {
Monty Montgomeryad396852017-10-20 03:35:26 -04001749#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
1750 tran_low_t temp_in[16];
1751 for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
1752 IHT_16x32[tx_type].rows(temp_in, outtmp);
1753 for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j] * 4;
1754#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07001755 IHT_16x32[tx_type].rows(input, outtmp);
1756 for (j = 0; j < n; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001757 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Monty Montgomeryad396852017-10-20 03:35:26 -04001758#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001759 input += n;
1760 }
1761
1762 // inverse transform column vectors
Lester Lu708c1ec2017-06-14 14:54:49 -07001763 for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001764
Jingning Hanec419e02016-11-01 18:19:30 -07001765#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001766 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
Jingning Hanec419e02016-11-01 18:19:30 -07001767#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001768
1769 // Sum with the destination
1770 for (i = 0; i < n2; ++i) {
1771 for (j = 0; j < n; ++j) {
1772 int d = i * stride + j;
1773 int s = j * outstride + i;
Monty Montgomeryad396852017-10-20 03:35:26 -04001774#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
1775 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1776#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07001777 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
Monty Montgomeryad396852017-10-20 03:35:26 -04001778#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001779 }
1780 }
1781}
1782
Yaowu Xuf883b422016-08-30 14:01:10 -07001783void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07001784 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07001785 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07001786#if CONFIG_MRC_TX
1787 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1788#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04001789#if CONFIG_DCT_ONLY
1790 assert(tx_type == DCT_DCT);
1791#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001792 static const transform_2d IHT_32x16[] = {
Monty Montgomeryad396852017-10-20 03:35:26 -04001793#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
1794 { daala_idct16, daala_idct32 }, // DCT_DCT = 0
1795 { daala_idst16, daala_idct32 }, // ADST_DCT = 1
1796 { daala_idct16, daala_idst32 }, // DCT_ADST = 2
1797 { daala_idst16, daala_idst32 }, // ADST_ADST = 3
1798#if CONFIG_EXT_TX
1799 { daala_idst16, daala_idct32 }, // FLIPADST_DCT
1800 { daala_idct16, daala_idst32 }, // DCT_FLIPADST
1801 { daala_idst16, daala_idst32 }, // FLIPADST_FLIPADST
1802 { daala_idst16, daala_idst32 }, // ADST_FLIPADST
1803 { daala_idst16, daala_idst32 }, // FLIPADST_ADST
1804 { daala_idtx16, daala_idtx32 }, // IDTX
1805 { daala_idct16, daala_idtx32 }, // V_DCT
1806 { daala_idtx16, daala_idct32 }, // H_DCT
1807 { daala_idst16, daala_idtx32 }, // V_ADST
1808 { daala_idtx16, daala_idst32 }, // H_ADST
1809 { daala_idst16, daala_idtx32 }, // V_FLIPADST
1810 { daala_idtx16, daala_idst32 }, // H_FLIPADST
1811#endif
1812#else
Luca Barbatof0f98572016-09-03 12:14:15 +02001813 { aom_idct16_c, aom_idct32_c }, // DCT_DCT
1814 { aom_iadst16_c, aom_idct32_c }, // ADST_DCT
1815 { aom_idct16_c, ihalfright32_c }, // DCT_ADST
1816 { aom_iadst16_c, ihalfright32_c }, // ADST_ADST
Jingning Hanec419e02016-11-01 18:19:30 -07001817#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +02001818 { aom_iadst16_c, aom_idct32_c }, // FLIPADST_DCT
1819 { aom_idct16_c, ihalfright32_c }, // DCT_FLIPADST
1820 { aom_iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
1821 { aom_iadst16_c, ihalfright32_c }, // ADST_FLIPADST
1822 { aom_iadst16_c, ihalfright32_c }, // FLIPADST_ADST
1823 { iidtx16_c, iidtx32_c }, // IDTX
1824 { aom_idct16_c, iidtx32_c }, // V_DCT
1825 { iidtx16_c, aom_idct32_c }, // H_DCT
1826 { aom_iadst16_c, iidtx32_c }, // V_ADST
1827 { iidtx16_c, ihalfright32_c }, // H_ADST
1828 { aom_iadst16_c, iidtx32_c }, // V_FLIPADST
1829 { iidtx16_c, ihalfright32_c }, // H_FLIPADST
Jingning Hanec419e02016-11-01 18:19:30 -07001830#endif
Monty Montgomeryad396852017-10-20 03:35:26 -04001831#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001832 };
1833 const int n = 16;
1834 const int n2 = 32;
1835
1836 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001837 tran_low_t out[32][16], tmp[32][16], outtmp[32];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001838 tran_low_t *outp = &out[0][0];
1839 int outstride = n;
1840
1841 // inverse transform row vectors and transpose
1842 for (i = 0; i < n; ++i) {
Monty Montgomeryad396852017-10-20 03:35:26 -04001843#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
1844 tran_low_t temp_in[32];
1845 for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
1846 IHT_32x16[tx_type].rows(temp_in, outtmp);
1847 for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j] * 4;
1848#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07001849 IHT_32x16[tx_type].rows(input, outtmp);
1850 for (j = 0; j < n2; ++j)
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001851 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
Monty Montgomeryad396852017-10-20 03:35:26 -04001852#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001853 input += n2;
1854 }
1855
1856 // inverse transform column vectors
Lester Lu708c1ec2017-06-14 14:54:49 -07001857 for (i = 0; i < n2; ++i) IHT_32x16[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001858
Jingning Hanec419e02016-11-01 18:19:30 -07001859#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07001860 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
Jingning Hanec419e02016-11-01 18:19:30 -07001861#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001862
1863 // Sum with the destination
1864 for (i = 0; i < n; ++i) {
1865 for (j = 0; j < n2; ++j) {
1866 int d = i * stride + j;
1867 int s = j * outstride + i;
Monty Montgomeryad396852017-10-20 03:35:26 -04001868#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
1869 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1870#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07001871 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
Monty Montgomeryad396852017-10-20 03:35:26 -04001872#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001873 }
1874 }
1875}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001876
Yaowu Xuf883b422016-08-30 14:01:10 -07001877void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07001878 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07001879 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07001880#if CONFIG_MRC_TX
1881 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1882#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04001883#if CONFIG_DCT_ONLY
1884 assert(tx_type == DCT_DCT);
1885#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001886 static const transform_2d IHT_8[] = {
Nathan E. Eggee554f362017-10-04 14:44:38 -04001887#if CONFIG_DAALA_TX8
Nathan E. Egge8a873db2017-09-16 20:55:20 -04001888 { daala_idct8, daala_idct8 }, // DCT_DCT = 0
1889 { daala_idst8, daala_idct8 }, // ADST_DCT = 1
1890 { daala_idct8, daala_idst8 }, // DCT_ADST = 2
1891 { daala_idst8, daala_idst8 }, // ADST_ADST = 3
Nathan E. Egge75bfeb82017-09-16 20:41:24 -04001892#if CONFIG_EXT_TX
Nathan E. Egge8a873db2017-09-16 20:55:20 -04001893 { daala_idst8, daala_idct8 }, // FLIPADST_DCT
1894 { daala_idct8, daala_idst8 }, // DCT_FLIPADST
1895 { daala_idst8, daala_idst8 }, // FLIPADST_FLIPADST
1896 { daala_idst8, daala_idst8 }, // ADST_FLIPADST
1897 { daala_idst8, daala_idst8 }, // FLIPADST_ADST
Nathan E. Egge3f45fb32017-09-18 11:34:48 -04001898 { daala_idtx8, daala_idtx8 }, // IDTX
1899 { daala_idct8, daala_idtx8 }, // V_DCT
1900 { daala_idtx8, daala_idct8 }, // H_DCT
1901 { daala_idst8, daala_idtx8 }, // V_ADST
1902 { daala_idtx8, daala_idst8 }, // H_ADST
1903 { daala_idst8, daala_idtx8 }, // V_FLIPADST
1904 { daala_idtx8, daala_idst8 }, // H_FLIPADST
Nathan E. Egge75bfeb82017-09-16 20:41:24 -04001905#endif
1906#else
Luca Barbatof0f98572016-09-03 12:14:15 +02001907 { aom_idct8_c, aom_idct8_c }, // DCT_DCT = 0
1908 { aom_iadst8_c, aom_idct8_c }, // ADST_DCT = 1
1909 { aom_idct8_c, aom_iadst8_c }, // DCT_ADST = 2
1910 { aom_iadst8_c, aom_iadst8_c }, // ADST_ADST = 3
Yaowu Xuc27fc142016-08-22 16:08:15 -07001911#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +02001912 { aom_iadst8_c, aom_idct8_c }, // FLIPADST_DCT
1913 { aom_idct8_c, aom_iadst8_c }, // DCT_FLIPADST
1914 { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_FLIPADST
1915 { aom_iadst8_c, aom_iadst8_c }, // ADST_FLIPADST
1916 { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_ADST
1917 { iidtx8_c, iidtx8_c }, // IDTX
1918 { aom_idct8_c, iidtx8_c }, // V_DCT
1919 { iidtx8_c, aom_idct8_c }, // H_DCT
1920 { aom_iadst8_c, iidtx8_c }, // V_ADST
1921 { iidtx8_c, aom_iadst8_c }, // H_ADST
1922 { aom_iadst8_c, iidtx8_c }, // V_FLIPADST
1923 { iidtx8_c, aom_iadst8_c }, // H_FLIPADST
Lester Lu708c1ec2017-06-14 14:54:49 -07001924#endif
Nathan E. Egge75bfeb82017-09-16 20:41:24 -04001925#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001926 };
1927
1928 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001929 tran_low_t tmp[8][8];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001930 tran_low_t out[8][8];
1931 tran_low_t *outp = &out[0][0];
1932 int outstride = 8;
1933
Lester Lu708c1ec2017-06-14 14:54:49 -07001934#if CONFIG_LGT
Lester Lu918fe692017-08-17 14:39:29 -07001935 const tran_high_t *lgtmtx_col[1];
1936 const tran_high_t *lgtmtx_row[1];
1937 int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
1938 int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
Lester Lu708c1ec2017-06-14 14:54:49 -07001939#endif
1940
Yaowu Xuc27fc142016-08-22 16:08:15 -07001941 // inverse transform row vectors
1942 for (i = 0; i < 8; ++i) {
Nathan E. Eggee554f362017-10-04 14:44:38 -04001943#if CONFIG_DAALA_TX8
Monty Montgomerycf18fe42017-07-11 21:33:25 -04001944 tran_low_t temp_in[8];
1945 for (j = 0; j < 8; j++) temp_in[j] = input[j] * 2;
1946 IHT_8[tx_type].rows(temp_in, out[i]);
1947#else
Lester Lu708c1ec2017-06-14 14:54:49 -07001948#if CONFIG_LGT
1949 if (use_lgt_row)
Lester Lu918fe692017-08-17 14:39:29 -07001950 ilgt8(input, out[i], lgtmtx_row[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -07001951 else
1952#endif
1953 IHT_8[tx_type].rows(input, out[i]);
Monty Montgomerycf18fe42017-07-11 21:33:25 -04001954#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001955 input += 8;
1956 }
1957
1958 // transpose
Jonathan Matthews362d0c72017-05-09 14:53:11 +01001959 for (i = 0; i < 8; i++) {
1960 for (j = 0; j < 8; j++) {
1961 tmp[j][i] = out[i][j];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001962 }
1963 }
1964
1965 // inverse transform column vectors
1966 for (i = 0; i < 8; ++i) {
Lester Lu708c1ec2017-06-14 14:54:49 -07001967#if CONFIG_LGT
1968 if (use_lgt_col)
Lester Lu918fe692017-08-17 14:39:29 -07001969 ilgt8(tmp[i], out[i], lgtmtx_col[0]);
Lester Lu708c1ec2017-06-14 14:54:49 -07001970 else
1971#endif
1972 IHT_8[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001973 }
1974
1975#if CONFIG_EXT_TX
1976 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
1977#endif
1978
1979 // Sum with the destination
1980 for (i = 0; i < 8; ++i) {
1981 for (j = 0; j < 8; ++j) {
1982 int d = i * stride + j;
1983 int s = j * outstride + i;
Nathan E. Eggee554f362017-10-04 14:44:38 -04001984#if CONFIG_DAALA_TX8
Monty Montgomerycf18fe42017-07-11 21:33:25 -04001985 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
1986#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07001987 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
Monty Montgomerycf18fe42017-07-11 21:33:25 -04001988#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001989 }
1990 }
1991}
1992
Yaowu Xuf883b422016-08-30 14:01:10 -07001993void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07001994 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07001995 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07001996#if CONFIG_MRC_TX
1997 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1998#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04001999#if CONFIG_DCT_ONLY
2000 assert(tx_type == DCT_DCT);
2001#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002002 static const transform_2d IHT_16[] = {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002003#if CONFIG_DAALA_TX16
Nathan E. Eggecbcff062017-09-16 22:32:19 -04002004 { daala_idct16, daala_idct16 }, // DCT_DCT = 0
2005 { daala_idst16, daala_idct16 }, // ADST_DCT = 1
2006 { daala_idct16, daala_idst16 }, // DCT_ADST = 2
2007 { daala_idst16, daala_idst16 }, // ADST_ADST = 3
Nathan E. Eggec5c1e562017-09-16 22:18:18 -04002008#if CONFIG_EXT_TX
Nathan E. Eggecbcff062017-09-16 22:32:19 -04002009 { daala_idst16, daala_idct16 }, // FLIPADST_DCT
2010 { daala_idct16, daala_idst16 }, // DCT_FLIPADST
2011 { daala_idst16, daala_idst16 }, // FLIPADST_FLIPADST
2012 { daala_idst16, daala_idst16 }, // ADST_FLIPADST
2013 { daala_idst16, daala_idst16 }, // FLIPADST_ADST
Nathan E. Egge74e7fd02017-09-18 11:40:31 -04002014 { daala_idtx16, daala_idtx16 }, // IDTX
2015 { daala_idct16, daala_idtx16 }, // V_DCT
2016 { daala_idtx16, daala_idct16 }, // H_DCT
2017 { daala_idst16, daala_idtx16 }, // V_ADST
2018 { daala_idtx16, daala_idst16 }, // H_ADST
2019 { daala_idst16, daala_idtx16 }, // V_FLIPADST
2020 { daala_idtx16, daala_idst16 }, // H_FLIPADST
Nathan E. Eggec5c1e562017-09-16 22:18:18 -04002021#endif
2022#else
Luca Barbatof0f98572016-09-03 12:14:15 +02002023 { aom_idct16_c, aom_idct16_c }, // DCT_DCT = 0
2024 { aom_iadst16_c, aom_idct16_c }, // ADST_DCT = 1
2025 { aom_idct16_c, aom_iadst16_c }, // DCT_ADST = 2
2026 { aom_iadst16_c, aom_iadst16_c }, // ADST_ADST = 3
Yaowu Xuc27fc142016-08-22 16:08:15 -07002027#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +02002028 { aom_iadst16_c, aom_idct16_c }, // FLIPADST_DCT
2029 { aom_idct16_c, aom_iadst16_c }, // DCT_FLIPADST
2030 { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_FLIPADST
2031 { aom_iadst16_c, aom_iadst16_c }, // ADST_FLIPADST
2032 { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_ADST
2033 { iidtx16_c, iidtx16_c }, // IDTX
2034 { aom_idct16_c, iidtx16_c }, // V_DCT
2035 { iidtx16_c, aom_idct16_c }, // H_DCT
2036 { aom_iadst16_c, iidtx16_c }, // V_ADST
2037 { iidtx16_c, aom_iadst16_c }, // H_ADST
2038 { aom_iadst16_c, iidtx16_c }, // V_FLIPADST
2039 { iidtx16_c, aom_iadst16_c }, // H_FLIPADST
Lester Lu708c1ec2017-06-14 14:54:49 -07002040#endif
Nathan E. Eggec5c1e562017-09-16 22:18:18 -04002041#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002042 };
2043
2044 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01002045 tran_low_t tmp[16][16];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002046 tran_low_t out[16][16];
2047 tran_low_t *outp = &out[0][0];
2048 int outstride = 16;
2049
2050 // inverse transform row vectors
2051 for (i = 0; i < 16; ++i) {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002052#if CONFIG_DAALA_TX16
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04002053 tran_low_t temp_in[16];
Sebastien Alaiwan77323262017-08-21 11:34:56 +02002054 for (j = 0; j < 16; j++) temp_in[j] = input[j] * 2;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04002055 IHT_16[tx_type].rows(temp_in, out[i]);
2056#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07002057 IHT_16[tx_type].rows(input, out[i]);
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04002058#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002059 input += 16;
2060 }
2061
2062 // transpose
Jonathan Matthews362d0c72017-05-09 14:53:11 +01002063 for (i = 0; i < 16; i++) {
2064 for (j = 0; j < 16; j++) {
2065 tmp[j][i] = out[i][j];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002066 }
2067 }
2068
2069 // inverse transform column vectors
Lester Lu708c1ec2017-06-14 14:54:49 -07002070 for (i = 0; i < 16; ++i) IHT_16[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002071
2072#if CONFIG_EXT_TX
2073 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
2074#endif
2075
2076 // Sum with the destination
2077 for (i = 0; i < 16; ++i) {
2078 for (j = 0; j < 16; ++j) {
2079 int d = i * stride + j;
2080 int s = j * outstride + i;
Nathan E. Eggee554f362017-10-04 14:44:38 -04002081#if CONFIG_DAALA_TX16
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04002082 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
2083#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07002084 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04002085#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002086 }
2087 }
2088}
2089
Nathan E. Eggee554f362017-10-04 14:44:38 -04002090#if CONFIG_EXT_TX || CONFIG_DAALA_TX32
Yaowu Xuf883b422016-08-30 14:01:10 -07002091void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002092 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07002093 const TX_TYPE tx_type = txfm_param->tx_type;
Monty Montgomerycb55dad2017-07-11 16:59:52 -04002094#if CONFIG_DCT_ONLY
2095 assert(tx_type == DCT_DCT);
2096#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002097 static const transform_2d IHT_32[] = {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002098#if CONFIG_DAALA_TX32
Nathan E. Eggedfd1a922017-09-16 23:35:30 -04002099 { daala_idct32, daala_idct32 }, // DCT_DCT
2100#if CONFIG_EXT_TX
Nathan E. Eggef6d3ba62017-09-18 15:40:08 -04002101 { daala_idst32, daala_idct32 }, // ADST_DCT
2102 { daala_idct32, daala_idst32 }, // DCT_ADST
2103 { daala_idst32, daala_idst32 }, // ADST_ADST
2104 { daala_idst32, daala_idct32 }, // FLIPADST_DCT
2105 { daala_idct32, daala_idst32 }, // DCT_FLIPADST
2106 { daala_idst32, daala_idst32 }, // FLIPADST_FLIPADST
2107 { daala_idst32, daala_idst32 }, // ADST_FLIPADST
2108 { daala_idst32, daala_idst32 }, // FLIPADST_ADST
2109 { daala_idtx32, daala_idtx32 }, // IDTX
2110 { daala_idct32, daala_idtx32 }, // V_DCT
2111 { daala_idtx32, daala_idct32 }, // H_DCT
2112 { daala_idst32, daala_idtx32 }, // V_ADST
2113 { daala_idtx32, daala_idst32 }, // H_ADST
2114 { daala_idst32, daala_idtx32 }, // V_FLIPADST
2115 { daala_idtx32, daala_idst32 }, // H_FLIPADST
Nathan E. Eggedfd1a922017-09-16 23:35:30 -04002116#endif
2117#else
2118 { aom_idct32_c, aom_idct32_c }, // DCT_DCT
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04002119#if CONFIG_EXT_TX
Luca Barbatof0f98572016-09-03 12:14:15 +02002120 { ihalfright32_c, aom_idct32_c }, // ADST_DCT
2121 { aom_idct32_c, ihalfright32_c }, // DCT_ADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07002122 { ihalfright32_c, ihalfright32_c }, // ADST_ADST
Luca Barbatof0f98572016-09-03 12:14:15 +02002123 { ihalfright32_c, aom_idct32_c }, // FLIPADST_DCT
2124 { aom_idct32_c, ihalfright32_c }, // DCT_FLIPADST
Yaowu Xuc27fc142016-08-22 16:08:15 -07002125 { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
2126 { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
2127 { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
2128 { iidtx32_c, iidtx32_c }, // IDTX
Luca Barbatof0f98572016-09-03 12:14:15 +02002129 { aom_idct32_c, iidtx32_c }, // V_DCT
2130 { iidtx32_c, aom_idct32_c }, // H_DCT
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002131 { ihalfright32_c, iidtx32_c }, // V_ADST
2132 { iidtx32_c, ihalfright32_c }, // H_ADST
2133 { ihalfright32_c, iidtx32_c }, // V_FLIPADST
2134 { iidtx32_c, ihalfright32_c }, // H_FLIPADST
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04002135#endif
Nathan E. Eggedfd1a922017-09-16 23:35:30 -04002136#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002137 };
2138
2139 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01002140 tran_low_t tmp[32][32];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002141 tran_low_t out[32][32];
2142 tran_low_t *outp = &out[0][0];
2143 int outstride = 32;
2144
2145 // inverse transform row vectors
2146 for (i = 0; i < 32; ++i) {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002147#if CONFIG_DAALA_TX32
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04002148 tran_low_t temp_in[32];
2149 for (j = 0; j < 32; j++) temp_in[j] = input[j] * 2;
2150 IHT_32[tx_type].rows(temp_in, out[i]);
2151#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07002152 IHT_32[tx_type].rows(input, out[i]);
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04002153#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002154 input += 32;
2155 }
2156
2157 // transpose
Jonathan Matthews362d0c72017-05-09 14:53:11 +01002158 for (i = 0; i < 32; i++) {
2159 for (j = 0; j < 32; j++) {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002160#if CONFIG_DAALA_TX32
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04002161 tmp[j][i] = out[i][j] * 4;
2162#else
Jonathan Matthews362d0c72017-05-09 14:53:11 +01002163 tmp[j][i] = out[i][j];
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04002164#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002165 }
2166 }
2167
2168 // inverse transform column vectors
Lester Lu708c1ec2017-06-14 14:54:49 -07002169 for (i = 0; i < 32; ++i) IHT_32[tx_type].cols(tmp[i], out[i]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002170
2171 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
2172
2173 // Sum with the destination
2174 for (i = 0; i < 32; ++i) {
2175 for (j = 0; j < 32; ++j) {
2176 int d = i * stride + j;
2177 int s = j * outstride + i;
Nathan E. Eggee554f362017-10-04 14:44:38 -04002178#if CONFIG_DAALA_TX32
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04002179 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
2180#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07002181 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04002182#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002183 }
2184 }
2185}
Nathan E. Eggee554f362017-10-04 14:44:38 -04002186#endif // CONFIG_EXT_TX || CONFIG_DAALA_TX32
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002187
2188#if CONFIG_TX64X64
2189void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002190 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07002191 const TX_TYPE tx_type = txfm_param->tx_type;
Sarah Parker53f93db2017-07-11 17:20:04 -07002192#if CONFIG_MRC_TX
2193 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
2194#endif // CONFIG_MRC_TX
Monty Montgomerycb55dad2017-07-11 16:59:52 -04002195#if CONFIG_DCT_ONLY
2196 assert(tx_type == DCT_DCT);
2197#endif
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002198 static const transform_2d IHT_64[] = {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002199#if CONFIG_DAALA_TX64
Nathan E. Egge2496a852017-09-18 15:59:54 -04002200 { daala_idct64, daala_idct64 }, // DCT_DCT
2201 { daala_idst64, daala_idct64 }, // ADST_DCT
2202 { daala_idct64, daala_idst64 }, // DCT_ADST
2203 { daala_idst64, daala_idst64 }, // ADST_ADST
Nathan E. Egged8661142017-09-16 23:57:51 -04002204#if CONFIG_EXT_TX
Nathan E. Egge2496a852017-09-18 15:59:54 -04002205 { daala_idst64, daala_idct64 }, // FLIPADST_DCT
2206 { daala_idct64, daala_idst64 }, // DCT_FLIPADST
2207 { daala_idst64, daala_idst64 }, // FLIPADST_FLIPADST
2208 { daala_idst64, daala_idst64 }, // ADST_FLIPADST
2209 { daala_idst64, daala_idst64 }, // FLIPADST_ADST
2210 { daala_idtx64, daala_idtx64 }, // IDTX
2211 { daala_idct64, daala_idtx64 }, // V_DCT
2212 { daala_idtx64, daala_idct64 }, // H_DCT
2213 { daala_idst64, daala_idtx64 }, // V_ADST
2214 { daala_idtx64, daala_idst64 }, // H_ADST
2215 { daala_idst64, daala_idtx64 }, // V_FLIPADST
2216 { daala_idtx64, daala_idst64 }, // H_FLIPADST
Nathan E. Egged8661142017-09-16 23:57:51 -04002217#endif
2218#else
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002219 { idct64_col_c, idct64_row_c }, // DCT_DCT
2220 { ihalfright64_c, idct64_row_c }, // ADST_DCT
2221 { idct64_col_c, ihalfright64_c }, // DCT_ADST
2222 { ihalfright64_c, ihalfright64_c }, // ADST_ADST
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002223#if CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002224 { ihalfright64_c, idct64_row_c }, // FLIPADST_DCT
2225 { idct64_col_c, ihalfright64_c }, // DCT_FLIPADST
2226 { ihalfright64_c, ihalfright64_c }, // FLIPADST_FLIPADST
2227 { ihalfright64_c, ihalfright64_c }, // ADST_FLIPADST
2228 { ihalfright64_c, ihalfright64_c }, // FLIPADST_ADST
2229 { iidtx64_c, iidtx64_c }, // IDTX
2230 { idct64_col_c, iidtx64_c }, // V_DCT
2231 { iidtx64_c, idct64_row_c }, // H_DCT
2232 { ihalfright64_c, iidtx64_c }, // V_ADST
2233 { iidtx64_c, ihalfright64_c }, // H_ADST
2234 { ihalfright64_c, iidtx64_c }, // V_FLIPADST
2235 { iidtx64_c, ihalfright64_c }, // H_FLIPADST
Lester Lu708c1ec2017-06-14 14:54:49 -07002236#endif
Nathan E. Egged8661142017-09-16 23:57:51 -04002237#endif
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002238 };
2239
2240 int i, j;
Jonathan Matthews362d0c72017-05-09 14:53:11 +01002241 tran_low_t tmp[64][64];
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002242 tran_low_t out[64][64];
2243 tran_low_t *outp = &out[0][0];
2244 int outstride = 64;
2245
2246 // inverse transform row vectors
2247 for (i = 0; i < 64; ++i) {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002248#if CONFIG_DAALA_TX64
Monty Montgomerya4e245a2017-07-22 00:48:31 -04002249 tran_low_t temp_in[64];
2250 for (j = 0; j < 64; j++) temp_in[j] = input[j] * 2;
2251 IHT_64[tx_type].rows(temp_in, out[i]);
2252// Do not rescale intermediate for Daala
2253#else
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002254 IHT_64[tx_type].rows(input, out[i]);
2255 for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
Monty Montgomerya4e245a2017-07-22 00:48:31 -04002256#endif
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002257 input += 64;
2258 }
2259
2260 // transpose
Jonathan Matthews362d0c72017-05-09 14:53:11 +01002261 for (i = 0; i < 64; i++) {
2262 for (j = 0; j < 64; j++) {
2263 tmp[j][i] = out[i][j];
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002264 }
2265 }
2266
2267 // inverse transform column vectors
Lester Lu708c1ec2017-06-14 14:54:49 -07002268 for (i = 0; i < 64; ++i) IHT_64[tx_type].cols(tmp[i], out[i]);
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002269
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002270#if CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002271 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002272#endif // CONFIG_EXT_TX
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002273
2274 // Sum with the destination
2275 for (i = 0; i < 64; ++i) {
2276 for (j = 0; j < 64; ++j) {
2277 int d = i * stride + j;
2278 int s = j * outstride + i;
Nathan E. Eggee554f362017-10-04 14:44:38 -04002279#if CONFIG_DAALA_TX64
Monty Montgomerya4e245a2017-07-22 00:48:31 -04002280 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 2));
2281#else
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002282 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
Monty Montgomerya4e245a2017-07-22 00:48:31 -04002283#endif
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002284 }
2285 }
2286}
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002287
2288void av1_iht64x32_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
2289 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07002290 const TX_TYPE tx_type = txfm_param->tx_type;
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002291#if CONFIG_MRC_TX
2292 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
2293#endif // CONFIG_MRC_TX
2294#if CONFIG_DCT_ONLY
2295 assert(tx_type == DCT_DCT);
2296#endif
2297 static const transform_2d IHT_64x32[] = {
2298 { aom_idct32_c, idct64_row_c }, // DCT_DCT
2299 { ihalfright32_c, idct64_row_c }, // ADST_DCT
2300 { aom_idct32_c, ihalfright64_c }, // DCT_ADST
2301 { ihalfright32_c, ihalfright64_c }, // ADST_ADST
2302#if CONFIG_EXT_TX
2303 { ihalfright32_c, idct64_row_c }, // FLIPADST_DCT
2304 { aom_idct32_c, ihalfright64_c }, // DCT_FLIPADST
2305 { ihalfright32_c, ihalfright64_c }, // FLIPADST_FLIPADST
2306 { ihalfright32_c, ihalfright64_c }, // ADST_FLIPADST
2307 { ihalfright32_c, ihalfright64_c }, // FLIPADST_ADST
2308 { iidtx32_c, iidtx64_c }, // IDTX
2309 { aom_idct32_c, iidtx64_c }, // V_DCT
2310 { iidtx32_c, idct64_row_c }, // H_DCT
2311 { ihalfright32_c, iidtx64_c }, // V_ADST
2312 { iidtx32_c, ihalfright64_c }, // H_ADST
2313 { ihalfright32_c, iidtx64_c }, // V_FLIPADST
2314 { iidtx32_c, ihalfright64_c }, // H_FLIPADST
2315#endif
2316 };
2317 const int n = 32;
2318 const int n2 = 64;
2319
2320 int i, j;
2321 tran_low_t out[64][32], tmp[64][32], outtmp[64];
2322 tran_low_t *outp = &out[0][0];
2323 int outstride = n;
2324
2325 // inverse transform row vectors and transpose
2326 for (i = 0; i < n; ++i) {
2327 IHT_64x32[tx_type].rows(input, outtmp);
2328 for (j = 0; j < n2; ++j)
Debargha Mukherjee570423c2017-10-01 00:35:20 -07002329 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002330 input += n2;
2331 }
2332
2333 // inverse transform column vectors
2334 for (i = 0; i < n2; ++i) IHT_64x32[tx_type].cols(tmp[i], out[i]);
2335
2336#if CONFIG_EXT_TX
2337 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
2338#endif
2339
2340 // Sum with the destination
2341 for (i = 0; i < n; ++i) {
2342 for (j = 0; j < n2; ++j) {
2343 int d = i * stride + j;
2344 int s = j * outstride + i;
2345 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
2346 }
2347 }
2348}
2349
2350void av1_iht32x64_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
2351 const TxfmParam *txfm_param) {
Urvang Joshi2283d372017-10-02 17:16:45 -07002352 const TX_TYPE tx_type = txfm_param->tx_type;
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002353#if CONFIG_MRC_TX
2354 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
2355#endif // CONFIG_MRC_TX
2356#if CONFIG_DCT_ONLY
2357 assert(tx_type == DCT_DCT);
2358#endif
2359 static const transform_2d IHT_32x64[] = {
2360 { idct64_col_c, aom_idct32_c }, // DCT_DCT
2361 { ihalfright64_c, aom_idct32_c }, // ADST_DCT
2362 { idct64_col_c, ihalfright32_c }, // DCT_ADST
2363 { ihalfright64_c, ihalfright32_c }, // ADST_ADST
2364#if CONFIG_EXT_TX
2365 { ihalfright64_c, aom_idct32_c }, // FLIPADST_DCT
2366 { idct64_col_c, ihalfright32_c }, // DCT_FLIPADST
2367 { ihalfright64_c, ihalfright32_c }, // FLIPADST_FLIPADST
2368 { ihalfright64_c, ihalfright32_c }, // ADST_FLIPADST
2369 { ihalfright64_c, ihalfright32_c }, // FLIPADST_ADST
2370 { iidtx64_c, iidtx32_c }, // IDTX
2371 { idct64_col_c, iidtx32_c }, // V_DCT
2372 { iidtx64_c, aom_idct32_c }, // H_DCT
2373 { ihalfright64_c, iidtx32_c }, // V_ADST
2374 { iidtx64_c, ihalfright32_c }, // H_ADST
2375 { ihalfright64_c, iidtx32_c }, // V_FLIPADST
2376 { iidtx64_c, ihalfright32_c }, // H_FLIPADST
2377#endif
2378 };
2379
2380 const int n = 32;
2381 const int n2 = 64;
2382 int i, j;
2383 tran_low_t out[32][64], tmp[32][64], outtmp[32];
2384 tran_low_t *outp = &out[0][0];
2385 int outstride = n2;
2386
2387 // inverse transform row vectors and transpose
2388 for (i = 0; i < n2; ++i) {
2389 IHT_32x64[tx_type].rows(input, outtmp);
2390 for (j = 0; j < n; ++j)
Debargha Mukherjee570423c2017-10-01 00:35:20 -07002391 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002392 input += n;
2393 }
2394
2395 // inverse transform column vectors
2396 for (i = 0; i < n; ++i) IHT_32x64[tx_type].cols(tmp[i], out[i]);
2397
2398#if CONFIG_EXT_TX
2399 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
2400#endif
2401
2402 // Sum with the destination
2403 for (i = 0; i < n2; ++i) {
2404 for (j = 0; j < n; ++j) {
2405 int d = i * stride + j;
2406 int s = j * outstride + i;
2407 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
2408 }
2409 }
2410}
2411
Debargha Mukherjee67d13472016-11-01 14:37:39 -07002412#endif // CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -07002413
2414// idct
Yaowu Xuf883b422016-08-30 14:01:10 -07002415void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002416 const TxfmParam *txfm_param) {
2417 const int eob = txfm_param->eob;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002418 if (eob > 1)
Lester Lu27319b62017-07-10 16:57:15 -07002419 av1_iht4x4_16_add(input, dest, stride, txfm_param);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002420 else
Yaowu Xuf883b422016-08-30 14:01:10 -07002421 aom_idct4x4_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002422}
2423
Yaowu Xuf883b422016-08-30 14:01:10 -07002424void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002425 const TxfmParam *txfm_param) {
2426 const int eob = txfm_param->eob;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002427 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07002428 aom_iwht4x4_16_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002429 else
Yaowu Xuf883b422016-08-30 14:01:10 -07002430 aom_iwht4x4_1_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002431}
2432
Nathan E. Eggee554f362017-10-04 14:44:38 -04002433#if !CONFIG_DAALA_TX8
hui sua5315712017-03-20 11:37:15 -07002434static void idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002435 const TxfmParam *txfm_param) {
Yi Luo2ab63cb2017-05-11 16:44:22 -07002436// If dc is 1, then input[0] is the reconstructed value, do not need
2437// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
Yaowu Xuc27fc142016-08-22 16:08:15 -07002438
Yi Luo2ab63cb2017-05-11 16:44:22 -07002439// The calculation can be simplified if there are not many non-zero dct
2440// coefficients. Use eobs to decide what to do.
2441// TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
2442// Combine that with code here.
2443#if CONFIG_ADAPT_SCAN
Lester Lu27319b62017-07-10 16:57:15 -07002444 const int16_t half = txfm_param->eob_threshold[0];
Yi Luo2ab63cb2017-05-11 16:44:22 -07002445#else
2446 const int16_t half = 12;
2447#endif
2448
Lester Lu27319b62017-07-10 16:57:15 -07002449 const int eob = txfm_param->eob;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002450 if (eob == 1)
2451 // DC only DCT coefficient
Yaowu Xuf883b422016-08-30 14:01:10 -07002452 aom_idct8x8_1_add(input, dest, stride);
Yi Luo2ab63cb2017-05-11 16:44:22 -07002453 else if (eob <= half)
Yaowu Xuf883b422016-08-30 14:01:10 -07002454 aom_idct8x8_12_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002455 else
Yaowu Xuf883b422016-08-30 14:01:10 -07002456 aom_idct8x8_64_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002457}
Monty Montgomerycf18fe42017-07-11 21:33:25 -04002458#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002459
Nathan E. Eggee554f362017-10-04 14:44:38 -04002460#if !CONFIG_DAALA_TX16
hui sua5315712017-03-20 11:37:15 -07002461static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002462 const TxfmParam *txfm_param) {
Yi Luo2ab63cb2017-05-11 16:44:22 -07002463// The calculation can be simplified if there are not many non-zero dct
2464// coefficients. Use eobs to separate different cases.
2465#if CONFIG_ADAPT_SCAN
Lester Lu27319b62017-07-10 16:57:15 -07002466 const int16_t half = txfm_param->eob_threshold[0];
2467 const int16_t quarter = txfm_param->eob_threshold[1];
Yi Luo2ab63cb2017-05-11 16:44:22 -07002468#else
2469 const int16_t half = 38;
2470 const int16_t quarter = 10;
2471#endif
2472
Lester Lu27319b62017-07-10 16:57:15 -07002473 const int eob = txfm_param->eob;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002474 if (eob == 1) /* DC only DCT coefficient. */
Yaowu Xuf883b422016-08-30 14:01:10 -07002475 aom_idct16x16_1_add(input, dest, stride);
Yi Luo2ab63cb2017-05-11 16:44:22 -07002476 else if (eob <= quarter)
Yaowu Xuf883b422016-08-30 14:01:10 -07002477 aom_idct16x16_10_add(input, dest, stride);
Yi Luo2ab63cb2017-05-11 16:44:22 -07002478 else if (eob <= half)
Yi Luof6176ab2017-04-28 15:48:56 -07002479 aom_idct16x16_38_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002480 else
Yaowu Xuf883b422016-08-30 14:01:10 -07002481 aom_idct16x16_256_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002482}
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04002483#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002484
Sarah Parker5b8e6d22017-07-24 15:30:53 -07002485#if CONFIG_MRC_TX
2486static void imrc32x32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
2487 const TxfmParam *txfm_param) {
2488#if CONFIG_ADAPT_SCAN
2489 const int16_t half = txfm_param->eob_threshold[0];
2490 const int16_t quarter = txfm_param->eob_threshold[1];
2491#else
2492 const int16_t half = 135;
2493 const int16_t quarter = 34;
2494#endif
2495
2496 const int eob = txfm_param->eob;
Sarah Parker99e7daa2017-08-29 10:30:13 -07002497 int n_masked_vals = 0;
2498 uint8_t *mask;
2499 uint8_t mask_tmp[32 * 32];
Sarah Parker5b8e6d22017-07-24 15:30:53 -07002500 if (eob == 1) {
2501 aom_idct32x32_1_add_c(input, dest, stride);
2502 } else {
Sarah Parker99e7daa2017-08-29 10:30:13 -07002503 if ((txfm_param->is_inter && SIGNAL_MRC_MASK_INTER) ||
2504 (!txfm_param->is_inter && SIGNAL_MRC_MASK_INTRA)) {
2505 mask = txfm_param->mask;
2506 } else {
2507 n_masked_vals =
2508 get_mrc_pred_mask(txfm_param->dst, txfm_param->stride, mask_tmp, 32,
2509 32, 32, txfm_param->is_inter);
2510 if (!is_valid_mrc_mask(n_masked_vals, 32, 32))
2511 assert(0 && "Invalid MRC mask");
2512 mask = mask_tmp;
2513 }
Sarah Parker5b8e6d22017-07-24 15:30:53 -07002514 if (eob <= quarter)
2515 // non-zero coeff only in upper-left 8x8
2516 aom_imrc32x32_34_add_c(input, dest, stride, mask);
2517 else if (eob <= half)
2518 // non-zero coeff only in upper-left 16x16
2519 aom_imrc32x32_135_add_c(input, dest, stride, mask);
2520 else
2521 aom_imrc32x32_1024_add_c(input, dest, stride, mask);
2522 }
2523}
2524#endif // CONFIG_MRC_TX
2525
Nathan E. Eggee554f362017-10-04 14:44:38 -04002526#if !CONFIG_DAALA_TX32
hui sua5315712017-03-20 11:37:15 -07002527static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002528 const TxfmParam *txfm_param) {
Yi Luo2ab63cb2017-05-11 16:44:22 -07002529#if CONFIG_ADAPT_SCAN
Lester Lu27319b62017-07-10 16:57:15 -07002530 const int16_t half = txfm_param->eob_threshold[0];
2531 const int16_t quarter = txfm_param->eob_threshold[1];
Yi Luo2ab63cb2017-05-11 16:44:22 -07002532#else
2533 const int16_t half = 135;
2534 const int16_t quarter = 34;
2535#endif
2536
Lester Lu27319b62017-07-10 16:57:15 -07002537 const int eob = txfm_param->eob;
Yi Luo2ab63cb2017-05-11 16:44:22 -07002538 if (eob == 1)
2539 aom_idct32x32_1_add(input, dest, stride);
2540 else if (eob <= quarter)
Yaowu Xuc27fc142016-08-22 16:08:15 -07002541 // non-zero coeff only in upper-left 8x8
Yaowu Xuf883b422016-08-30 14:01:10 -07002542 aom_idct32x32_34_add(input, dest, stride);
Yi Luo2ab63cb2017-05-11 16:44:22 -07002543 else if (eob <= half)
Yi Luo40f22ef2017-05-08 16:29:39 -07002544 // non-zero coeff only in upper-left 16x16
2545 aom_idct32x32_135_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002546 else
Yaowu Xuf883b422016-08-30 14:01:10 -07002547 aom_idct32x32_1024_add(input, dest, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002548}
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04002549#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002550
Nathan E. Eggee554f362017-10-04 14:44:38 -04002551#if CONFIG_TX64X64 && !CONFIG_DAALA_TX64
hui sua5315712017-03-20 11:37:15 -07002552static void idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002553 const TxfmParam *txfm_param) {
2554 (void)txfm_param;
Urvang Joshi9136ab72017-07-28 14:15:49 -07002555 av1_iht64x64_4096_add(input, dest, stride, txfm_param);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002556}
Nathan E. Eggee554f362017-10-04 14:44:38 -04002557#endif // CONFIG_TX64X64 && !CONFIG_DAALA_TX64
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002558
Yi Luo2ab63cb2017-05-11 16:44:22 -07002559static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002560 const TxfmParam *txfm_param) {
2561 const TX_TYPE tx_type = txfm_param->tx_type;
2562 if (txfm_param->lossless) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002563 assert(tx_type == DCT_DCT);
Lester Lu27319b62017-07-10 16:57:15 -07002564 av1_iwht4x4_add(input, dest, stride, txfm_param);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002565 return;
2566 }
2567
2568 switch (tx_type) {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002569#if !CONFIG_DAALA_TX4
Lester Lu27319b62017-07-10 16:57:15 -07002570 case DCT_DCT: av1_idct4x4_add(input, dest, stride, txfm_param); break;
Monty Montgomery02078a32017-07-11 21:22:29 -04002571#else
2572 case DCT_DCT:
2573#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002574 case ADST_DCT:
2575 case DCT_ADST:
Lester Luad8290b2017-06-12 18:26:18 -07002576 case ADST_ADST:
Nathan E. Eggee554f362017-10-04 14:44:38 -04002577#if CONFIG_LGT || CONFIG_DAALA_TX4
Lester Luad8290b2017-06-12 18:26:18 -07002578 // LGT only exists in C verson
Lester Lu27319b62017-07-10 16:57:15 -07002579 av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002580 break;
2581#else
Lester Lu27319b62017-07-10 16:57:15 -07002582 av1_iht4x4_16_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002583 break;
2584#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002585#if CONFIG_EXT_TX
2586 case FLIPADST_DCT:
2587 case DCT_FLIPADST:
2588 case FLIPADST_FLIPADST:
2589 case ADST_FLIPADST:
Lester Luad8290b2017-06-12 18:26:18 -07002590 case FLIPADST_ADST:
Nathan E. Eggee554f362017-10-04 14:44:38 -04002591#if CONFIG_LGT || CONFIG_DAALA_TX4
Lester Lu27319b62017-07-10 16:57:15 -07002592 av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002593 break;
2594#else
Lester Lu27319b62017-07-10 16:57:15 -07002595 av1_iht4x4_16_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002596 break;
2597#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002598 case V_DCT:
2599 case H_DCT:
2600 case V_ADST:
2601 case H_ADST:
2602 case V_FLIPADST:
2603 case H_FLIPADST:
2604 // Use C version since DST only exists in C code
Lester Lu27319b62017-07-10 16:57:15 -07002605 av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002606 break;
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002607 case IDTX: inv_idtx_add_c(input, dest, stride, 4, 4, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002608#endif // CONFIG_EXT_TX
2609 default: assert(0); break;
2610 }
2611}
2612
Yi Luo2ab63cb2017-05-11 16:44:22 -07002613static void inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002614 const TxfmParam *txfm_param) {
Monty Montgomeryabd94512017-10-14 00:41:42 -04002615#if CONFIG_LGT || (CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8)
Lester Lu27319b62017-07-10 16:57:15 -07002616 av1_iht4x8_32_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002617#else
Lester Lu27319b62017-07-10 16:57:15 -07002618 av1_iht4x8_32_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002619#endif
Yaowu Xuf883b422016-08-30 14:01:10 -07002620}
2621
Yi Luo2ab63cb2017-05-11 16:44:22 -07002622static void inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002623 const TxfmParam *txfm_param) {
Monty Montgomeryabd94512017-10-14 00:41:42 -04002624#if CONFIG_LGT || (CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8)
Lester Lu27319b62017-07-10 16:57:15 -07002625 av1_iht8x4_32_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002626#else
Lester Lu27319b62017-07-10 16:57:15 -07002627 av1_iht8x4_32_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002628#endif
Yaowu Xuf883b422016-08-30 14:01:10 -07002629}
2630
hui sua5315712017-03-20 11:37:15 -07002631// These will be used by the masked-tx experiment in the future.
Yue Chend6bdd462017-07-19 16:05:43 -07002632#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
hui sua5315712017-03-20 11:37:15 -07002633static void inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002634 int stride, const TxfmParam *txfm_param) {
Lester Luad8290b2017-06-12 18:26:18 -07002635#if CONFIG_LGT
Lester Lu27319b62017-07-10 16:57:15 -07002636 av1_iht4x16_64_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002637#else
Lester Lu27319b62017-07-10 16:57:15 -07002638 av1_iht4x16_64_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002639#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08002640}
2641
hui sua5315712017-03-20 11:37:15 -07002642static void inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002643 int stride, const TxfmParam *txfm_param) {
Lester Luad8290b2017-06-12 18:26:18 -07002644#if CONFIG_LGT
Lester Lu27319b62017-07-10 16:57:15 -07002645 av1_iht16x4_64_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002646#else
Lester Lu27319b62017-07-10 16:57:15 -07002647 av1_iht16x4_64_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002648#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08002649}
2650
hui sua5315712017-03-20 11:37:15 -07002651static void inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002652 int stride, const TxfmParam *txfm_param) {
Lester Luad8290b2017-06-12 18:26:18 -07002653#if CONFIG_LGT
Lester Lu27319b62017-07-10 16:57:15 -07002654 av1_iht8x32_256_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002655#else
Lester Lu27319b62017-07-10 16:57:15 -07002656 av1_iht8x32_256_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002657#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08002658}
2659
hui sua5315712017-03-20 11:37:15 -07002660static void inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002661 int stride, const TxfmParam *txfm_param) {
Lester Luad8290b2017-06-12 18:26:18 -07002662#if CONFIG_LGT
Lester Lu27319b62017-07-10 16:57:15 -07002663 av1_iht32x8_256_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002664#else
Lester Lu27319b62017-07-10 16:57:15 -07002665 av1_iht32x8_256_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002666#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08002667}
Yue Chend6bdd462017-07-19 16:05:43 -07002668#endif
Debargha Mukherjee751de382016-12-13 02:54:22 -08002669
hui sua5315712017-03-20 11:37:15 -07002670static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002671 int stride, const TxfmParam *txfm_param) {
Monty Montgomery7eb44542017-10-19 20:47:51 -04002672#if CONFIG_LGT || (CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16)
Lester Lu27319b62017-07-10 16:57:15 -07002673 av1_iht8x16_128_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002674#else
Lester Lu27319b62017-07-10 16:57:15 -07002675 av1_iht8x16_128_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002676#endif
hui sua5315712017-03-20 11:37:15 -07002677}
2678
2679static void inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002680 int stride, const TxfmParam *txfm_param) {
Monty Montgomery7eb44542017-10-19 20:47:51 -04002681#if CONFIG_LGT || (CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16)
Lester Lu27319b62017-07-10 16:57:15 -07002682 av1_iht16x8_128_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002683#else
Lester Lu27319b62017-07-10 16:57:15 -07002684 av1_iht16x8_128_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002685#endif
hui sua5315712017-03-20 11:37:15 -07002686}
2687
2688static void inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002689 int stride, const TxfmParam *txfm_param) {
Monty Montgomeryad396852017-10-20 03:35:26 -04002690#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
2691 av1_iht16x32_512_add_c(input, dest, stride, txfm_param);
2692#else
Lester Lu27319b62017-07-10 16:57:15 -07002693 av1_iht16x32_512_add(input, dest, stride, txfm_param);
Monty Montgomeryad396852017-10-20 03:35:26 -04002694#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002695}
2696
hui sua5315712017-03-20 11:37:15 -07002697static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002698 int stride, const TxfmParam *txfm_param) {
Monty Montgomeryad396852017-10-20 03:35:26 -04002699#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
2700 av1_iht32x16_512_add_c(input, dest, stride, txfm_param);
2701#else
Lester Lu27319b62017-07-10 16:57:15 -07002702 av1_iht32x16_512_add(input, dest, stride, txfm_param);
Monty Montgomeryad396852017-10-20 03:35:26 -04002703#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002704}
Yaowu Xuc27fc142016-08-22 16:08:15 -07002705
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002706#if CONFIG_TX64X64
2707static void inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest,
2708 int stride, const TxfmParam *txfm_param) {
2709 av1_iht32x64_2048_add(input, dest, stride, txfm_param);
2710}
2711
2712static void inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest,
2713 int stride, const TxfmParam *txfm_param) {
2714 av1_iht64x32_2048_add(input, dest, stride, txfm_param);
2715}
2716#endif // CONFIG_TX64X64
2717
hui sua5315712017-03-20 11:37:15 -07002718static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07002719 const TxfmParam *txfm_param) {
2720 const TX_TYPE tx_type = txfm_param->tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002721 switch (tx_type) {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002722#if !CONFIG_DAALA_TX8
Lester Lu27319b62017-07-10 16:57:15 -07002723 case DCT_DCT: idct8x8_add(input, dest, stride, txfm_param); break;
Monty Montgomerycf18fe42017-07-11 21:33:25 -04002724#else
2725 case DCT_DCT:
2726#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002727 case ADST_DCT:
2728 case DCT_ADST:
Lester Luad8290b2017-06-12 18:26:18 -07002729 case ADST_ADST:
Nathan E. Eggee554f362017-10-04 14:44:38 -04002730#if CONFIG_LGT || CONFIG_DAALA_TX8
Lester Lu27319b62017-07-10 16:57:15 -07002731 av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002732 break;
2733#else
Lester Lu27319b62017-07-10 16:57:15 -07002734 av1_iht8x8_64_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002735 break;
2736#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002737#if CONFIG_EXT_TX
2738 case FLIPADST_DCT:
2739 case DCT_FLIPADST:
2740 case FLIPADST_FLIPADST:
2741 case ADST_FLIPADST:
Lester Luad8290b2017-06-12 18:26:18 -07002742 case FLIPADST_ADST:
Nathan E. Eggee554f362017-10-04 14:44:38 -04002743#if CONFIG_LGT || CONFIG_DAALA_TX8
Lester Lu27319b62017-07-10 16:57:15 -07002744 av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002745 break;
2746#else
Lester Lu27319b62017-07-10 16:57:15 -07002747 av1_iht8x8_64_add(input, dest, stride, txfm_param);
Lester Luad8290b2017-06-12 18:26:18 -07002748 break;
2749#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002750 case V_DCT:
2751 case H_DCT:
2752 case V_ADST:
2753 case H_ADST:
2754 case V_FLIPADST:
2755 case H_FLIPADST:
2756 // Use C version since DST only exists in C code
Lester Lu27319b62017-07-10 16:57:15 -07002757 av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002758 break;
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002759 case IDTX: inv_idtx_add_c(input, dest, stride, 8, 8, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002760#endif // CONFIG_EXT_TX
2761 default: assert(0); break;
2762 }
2763}
2764
hui sua5315712017-03-20 11:37:15 -07002765static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002766 int stride, const TxfmParam *txfm_param) {
2767 const TX_TYPE tx_type = txfm_param->tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002768 switch (tx_type) {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002769#if !CONFIG_DAALA_TX16
Lester Lu27319b62017-07-10 16:57:15 -07002770 case DCT_DCT: idct16x16_add(input, dest, stride, txfm_param); break;
Monty Montgomerycb9c1c52017-07-17 18:15:30 -04002771#else
2772 case DCT_DCT:
2773#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002774 case ADST_DCT:
2775 case DCT_ADST:
Lester Lu27319b62017-07-10 16:57:15 -07002776 case ADST_ADST:
Nathan E. Eggee554f362017-10-04 14:44:38 -04002777#if CONFIG_DAALA_TX16
Nathan E. Egge34e12012017-09-13 09:02:32 -04002778 av1_iht16x16_256_add_c(input, dest, stride, txfm_param);
2779#else
Lester Lu27319b62017-07-10 16:57:15 -07002780 av1_iht16x16_256_add(input, dest, stride, txfm_param);
Nathan E. Eggee554f362017-10-04 14:44:38 -04002781#endif // CONFIG_DAALA_TX16
Lester Lu27319b62017-07-10 16:57:15 -07002782 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002783#if CONFIG_EXT_TX
2784 case FLIPADST_DCT:
2785 case DCT_FLIPADST:
2786 case FLIPADST_FLIPADST:
2787 case ADST_FLIPADST:
2788 case FLIPADST_ADST:
Yaowu Xuc27fc142016-08-22 16:08:15 -07002789 case V_DCT:
2790 case H_DCT:
2791 case V_ADST:
2792 case H_ADST:
2793 case V_FLIPADST:
Lester Lu27319b62017-07-10 16:57:15 -07002794 case H_FLIPADST:
Nathan E. Eggee554f362017-10-04 14:44:38 -04002795#if CONFIG_DAALA_TX16
Nathan E. Egge34e12012017-09-13 09:02:32 -04002796 av1_iht16x16_256_add_c(input, dest, stride, txfm_param);
2797#else
Lester Lu27319b62017-07-10 16:57:15 -07002798 av1_iht16x16_256_add(input, dest, stride, txfm_param);
Nathan E. Eggee554f362017-10-04 14:44:38 -04002799#endif // CONFIG_DAALA_TX16
Lester Lu27319b62017-07-10 16:57:15 -07002800 break;
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002801 case IDTX: inv_idtx_add_c(input, dest, stride, 16, 16, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002802#endif // CONFIG_EXT_TX
Sarah Parker53f93db2017-07-11 17:20:04 -07002803#if CONFIG_MRC_TX
2804 case MRC_DCT: assert(0 && "Invalid tx type for tx size");
2805#endif // CONFIG_MRC_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07002806 default: assert(0); break;
2807 }
2808}
2809
hui sua5315712017-03-20 11:37:15 -07002810static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002811 int stride, const TxfmParam *txfm_param) {
2812 const TX_TYPE tx_type = txfm_param->tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002813 switch (tx_type) {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002814#if !CONFIG_DAALA_TX32
Lester Lu27319b62017-07-10 16:57:15 -07002815 case DCT_DCT: idct32x32_add(input, dest, stride, txfm_param); break;
Monty Montgomery2cb52ba2017-07-17 18:27:27 -04002816#else
2817 case DCT_DCT:
2818 av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
2819 break;
2820#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002821#if CONFIG_EXT_TX
2822 case ADST_DCT:
2823 case DCT_ADST:
2824 case ADST_ADST:
2825 case FLIPADST_DCT:
2826 case DCT_FLIPADST:
2827 case FLIPADST_FLIPADST:
2828 case ADST_FLIPADST:
2829 case FLIPADST_ADST:
2830 case V_DCT:
2831 case H_DCT:
2832 case V_ADST:
2833 case H_ADST:
2834 case V_FLIPADST:
Lester Lu27319b62017-07-10 16:57:15 -07002835 case H_FLIPADST:
2836 av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
2837 break;
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002838 case IDTX: inv_idtx_add_c(input, dest, stride, 32, 32, tx_type); break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002839#endif // CONFIG_EXT_TX
Sarah Parker53f93db2017-07-11 17:20:04 -07002840#if CONFIG_MRC_TX
2841 case MRC_DCT: imrc32x32_add_c(input, dest, stride, txfm_param); break;
2842#endif // CONFIG_MRC_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07002843 default: assert(0); break;
2844 }
2845}
2846
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002847#if CONFIG_TX64X64
hui sua5315712017-03-20 11:37:15 -07002848static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002849 int stride, const TxfmParam *txfm_param) {
2850 const TX_TYPE tx_type = txfm_param->tx_type;
Debargha Mukherjee570423c2017-10-01 00:35:20 -07002851 assert(tx_type == DCT_DCT);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002852 switch (tx_type) {
Nathan E. Eggee554f362017-10-04 14:44:38 -04002853#if !CONFIG_DAALA_TX64
Lester Lu27319b62017-07-10 16:57:15 -07002854 case DCT_DCT: idct64x64_add(input, dest, stride, txfm_param); break;
Monty Montgomerya4e245a2017-07-22 00:48:31 -04002855#else
2856 case DCT_DCT:
2857#endif
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002858#if CONFIG_EXT_TX
2859 case ADST_DCT:
2860 case DCT_ADST:
2861 case ADST_ADST:
2862 case FLIPADST_DCT:
2863 case DCT_FLIPADST:
2864 case FLIPADST_FLIPADST:
2865 case ADST_FLIPADST:
2866 case FLIPADST_ADST:
2867 case V_DCT:
2868 case H_DCT:
2869 case V_ADST:
2870 case H_ADST:
2871 case V_FLIPADST:
Lester Lu27319b62017-07-10 16:57:15 -07002872 case H_FLIPADST:
2873 av1_iht64x64_4096_add_c(input, dest, stride, txfm_param);
2874 break;
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002875 case IDTX: inv_idtx_add_c(input, dest, stride, 64, 64, tx_type); break;
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002876#endif // CONFIG_EXT_TX
Sarah Parker53f93db2017-07-11 17:20:04 -07002877#if CONFIG_MRC_TX
2878 case MRC_DCT: assert(0 && "Invalid tx type for tx size");
2879#endif // CONFIG_MRC_TX
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07002880 default: assert(0); break;
2881 }
2882}
2883#endif // CONFIG_TX64X64
2884
Yaowu Xuf883b422016-08-30 14:01:10 -07002885void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
2886 int eob, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002887 if (eob > 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07002888 aom_highbd_iwht4x4_16_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002889 else
Yaowu Xuf883b422016-08-30 14:01:10 -07002890 aom_highbd_iwht4x4_1_add(input, dest, stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002891}
2892
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02002893static const int32_t *cast_to_int32(const tran_low_t *input) {
2894 assert(sizeof(int32_t) == sizeof(tran_low_t));
2895 return (const int32_t *)input;
2896}
2897
Yaowu Xuf883b422016-08-30 14:01:10 -07002898void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002899 int stride, const TxfmParam *txfm_param) {
2900 int eob = txfm_param->eob;
2901 int bd = txfm_param->bd;
2902 int lossless = txfm_param->lossless;
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02002903 const int32_t *src = cast_to_int32(input);
Urvang Joshi2283d372017-10-02 17:16:45 -07002904 const TX_TYPE tx_type = txfm_param->tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002905 if (lossless) {
2906 assert(tx_type == DCT_DCT);
Yaowu Xuf883b422016-08-30 14:01:10 -07002907 av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002908 return;
2909 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002910 switch (tx_type) {
2911 case DCT_DCT:
2912 case ADST_DCT:
2913 case DCT_ADST:
2914 case ADST_ADST:
Yi Luo51281092017-06-26 16:36:15 -07002915 av1_inv_txfm2d_add_4x4(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
Sarah Parker31c66502017-05-19 16:51:07 -07002916 bd);
2917 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002918#if CONFIG_EXT_TX
2919 case FLIPADST_DCT:
2920 case DCT_FLIPADST:
2921 case FLIPADST_FLIPADST:
2922 case ADST_FLIPADST:
2923 case FLIPADST_ADST:
Yi Luo51281092017-06-26 16:36:15 -07002924 av1_inv_txfm2d_add_4x4(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
Yaowu Xuf883b422016-08-30 14:01:10 -07002925 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002926 break;
Sarah Parker31c66502017-05-19 16:51:07 -07002927 // use the c version for anything including identity for now
Yaowu Xuc27fc142016-08-22 16:08:15 -07002928 case V_DCT:
2929 case H_DCT:
2930 case V_ADST:
2931 case H_ADST:
2932 case V_FLIPADST:
2933 case H_FLIPADST:
Yaowu Xuc27fc142016-08-22 16:08:15 -07002934 case IDTX:
Yi Luo51281092017-06-26 16:36:15 -07002935 av1_inv_txfm2d_add_4x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2936 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002937 break;
2938#endif // CONFIG_EXT_TX
2939 default: assert(0); break;
2940 }
2941}
2942
Yaowu Xuf883b422016-08-30 14:01:10 -07002943void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002944 int stride, const TxfmParam *txfm_param) {
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02002945 const int32_t *src = cast_to_int32(input);
Lester Lud8b1ddc2017-07-06 16:13:29 -07002946 av1_inv_txfm2d_add_4x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
Lester Lu27319b62017-07-10 16:57:15 -07002947 txfm_param->tx_type, txfm_param->bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07002948}
2949
2950void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002951 int stride, const TxfmParam *txfm_param) {
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02002952 const int32_t *src = cast_to_int32(input);
Lester Lud8b1ddc2017-07-06 16:13:29 -07002953 av1_inv_txfm2d_add_8x4_c(src, CONVERT_TO_SHORTPTR(dest), stride,
Lester Lu27319b62017-07-10 16:57:15 -07002954 txfm_param->tx_type, txfm_param->bd);
Debargha Mukherjee751de382016-12-13 02:54:22 -08002955}
2956
hui sua5315712017-03-20 11:37:15 -07002957static void highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002958 int stride, const TxfmParam *txfm_param) {
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02002959 const int32_t *src = cast_to_int32(input);
Lester Lud8b1ddc2017-07-06 16:13:29 -07002960 av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
Lester Lu27319b62017-07-10 16:57:15 -07002961 txfm_param->tx_type, txfm_param->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002962}
2963
hui sua5315712017-03-20 11:37:15 -07002964static void highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002965 int stride, const TxfmParam *txfm_param) {
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02002966 const int32_t *src = cast_to_int32(input);
Lester Lud8b1ddc2017-07-06 16:13:29 -07002967 av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
Lester Lu27319b62017-07-10 16:57:15 -07002968 txfm_param->tx_type, txfm_param->bd);
Debargha Mukherjee751de382016-12-13 02:54:22 -08002969}
2970
hui sua5315712017-03-20 11:37:15 -07002971static void highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002972 int stride, const TxfmParam *txfm_param) {
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02002973 const int32_t *src = cast_to_int32(input);
Lester Lud8b1ddc2017-07-06 16:13:29 -07002974 av1_inv_txfm2d_add_16x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
Lester Lu27319b62017-07-10 16:57:15 -07002975 txfm_param->tx_type, txfm_param->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002976}
2977
hui sua5315712017-03-20 11:37:15 -07002978static void highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07002979 int stride, const TxfmParam *txfm_param) {
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02002980 const int32_t *src = cast_to_int32(input);
Lester Lud8b1ddc2017-07-06 16:13:29 -07002981 av1_inv_txfm2d_add_32x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
Lester Lu27319b62017-07-10 16:57:15 -07002982 txfm_param->tx_type, txfm_param->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002983}
Yaowu Xuc27fc142016-08-22 16:08:15 -07002984
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002985#if CONFIG_TX64X64
2986static void highbd_inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest,
2987 int stride, const TxfmParam *txfm_param) {
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02002988 const int32_t *src = cast_to_int32(input);
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002989 av1_inv_txfm2d_add_32x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2990 txfm_param->tx_type, txfm_param->bd);
2991}
2992
2993static void highbd_inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest,
2994 int stride, const TxfmParam *txfm_param) {
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02002995 const int32_t *src = cast_to_int32(input);
Debargha Mukherjee2b435012017-09-28 08:30:35 -07002996 av1_inv_txfm2d_add_64x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2997 txfm_param->tx_type, txfm_param->bd);
2998}
2999#endif // CONFIG_TX64X64
3000
hui sua5315712017-03-20 11:37:15 -07003001static void highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07003002 int stride, const TxfmParam *txfm_param) {
3003 int bd = txfm_param->bd;
Urvang Joshi2283d372017-10-02 17:16:45 -07003004 const TX_TYPE tx_type = txfm_param->tx_type;
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02003005 const int32_t *src = cast_to_int32(input);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003006 switch (tx_type) {
3007 case DCT_DCT:
3008 case ADST_DCT:
3009 case DCT_ADST:
3010 case ADST_ADST:
Yi Luo51281092017-06-26 16:36:15 -07003011 av1_inv_txfm2d_add_8x8(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
Sarah Parker31c66502017-05-19 16:51:07 -07003012 bd);
3013 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003014#if CONFIG_EXT_TX
3015 case FLIPADST_DCT:
3016 case DCT_FLIPADST:
3017 case FLIPADST_FLIPADST:
3018 case ADST_FLIPADST:
3019 case FLIPADST_ADST:
Yi Luo51281092017-06-26 16:36:15 -07003020 av1_inv_txfm2d_add_8x8(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
Yaowu Xuf883b422016-08-30 14:01:10 -07003021 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003022 break;
Sarah Parker31c66502017-05-19 16:51:07 -07003023 // use the c version for anything including identity for now
Yaowu Xuc27fc142016-08-22 16:08:15 -07003024 case V_DCT:
3025 case H_DCT:
3026 case V_ADST:
3027 case H_ADST:
3028 case V_FLIPADST:
3029 case H_FLIPADST:
Yaowu Xuc27fc142016-08-22 16:08:15 -07003030 case IDTX:
Yi Luo51281092017-06-26 16:36:15 -07003031 av1_inv_txfm2d_add_8x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
3032 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003033 break;
3034#endif // CONFIG_EXT_TX
Sarah Parker31c66502017-05-19 16:51:07 -07003035 default: assert(0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003036 }
3037}
3038
hui sua5315712017-03-20 11:37:15 -07003039static void highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07003040 int stride, const TxfmParam *txfm_param) {
3041 int bd = txfm_param->bd;
Urvang Joshif8fe2ae2017-10-05 12:13:17 -07003042 const TX_TYPE tx_type = txfm_param->tx_type;
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02003043 const int32_t *src = cast_to_int32(input);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003044 switch (tx_type) {
3045 case DCT_DCT:
3046 case ADST_DCT:
3047 case DCT_ADST:
3048 case ADST_ADST:
Yi Luo51281092017-06-26 16:36:15 -07003049 av1_inv_txfm2d_add_16x16(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
3050 bd);
Sarah Parker31c66502017-05-19 16:51:07 -07003051 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003052#if CONFIG_EXT_TX
3053 case FLIPADST_DCT:
3054 case DCT_FLIPADST:
3055 case FLIPADST_FLIPADST:
3056 case ADST_FLIPADST:
3057 case FLIPADST_ADST:
Yi Luo51281092017-06-26 16:36:15 -07003058 av1_inv_txfm2d_add_16x16(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
3059 bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003060 break;
Sarah Parker31c66502017-05-19 16:51:07 -07003061 // use the c version for anything including identity for now
Yaowu Xuc27fc142016-08-22 16:08:15 -07003062 case V_DCT:
3063 case H_DCT:
3064 case V_ADST:
3065 case H_ADST:
3066 case V_FLIPADST:
3067 case H_FLIPADST:
Yaowu Xuc27fc142016-08-22 16:08:15 -07003068 case IDTX:
Yi Luo51281092017-06-26 16:36:15 -07003069 av1_inv_txfm2d_add_16x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
Sarah Parker31c66502017-05-19 16:51:07 -07003070 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003071 break;
3072#endif // CONFIG_EXT_TX
Sarah Parker31c66502017-05-19 16:51:07 -07003073 default: assert(0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003074 }
3075}
3076
hui sua5315712017-03-20 11:37:15 -07003077static void highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07003078 int stride, const TxfmParam *txfm_param) {
3079 int bd = txfm_param->bd;
Urvang Joshif8fe2ae2017-10-05 12:13:17 -07003080 const TX_TYPE tx_type = txfm_param->tx_type;
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02003081 const int32_t *src = cast_to_int32(input);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003082 switch (tx_type) {
3083 case DCT_DCT:
Yi Luo51281092017-06-26 16:36:15 -07003084 av1_inv_txfm2d_add_32x32(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
3085 bd);
Sarah Parker31c66502017-05-19 16:51:07 -07003086 break;
Rupert Swarbrickf16c3c82017-08-17 13:24:42 +01003087
3088 // The optimised version only supports DCT_DCT, so force use of
3089 // the C version for all other transform types.
3090 case ADST_DCT:
3091 case DCT_ADST:
3092 case ADST_ADST:
Sarah Parker31c66502017-05-19 16:51:07 -07003093#if CONFIG_EXT_TX
Yaowu Xuc27fc142016-08-22 16:08:15 -07003094 case FLIPADST_DCT:
3095 case DCT_FLIPADST:
3096 case FLIPADST_FLIPADST:
3097 case ADST_FLIPADST:
3098 case FLIPADST_ADST:
Rupert Swarbrickf16c3c82017-08-17 13:24:42 +01003099 case IDTX:
Yaowu Xuc27fc142016-08-22 16:08:15 -07003100 case V_DCT:
3101 case H_DCT:
3102 case V_ADST:
3103 case H_ADST:
3104 case V_FLIPADST:
3105 case H_FLIPADST:
Rupert Swarbrickf16c3c82017-08-17 13:24:42 +01003106#endif // CONFIG_EXT_TX
Yi Luo51281092017-06-26 16:36:15 -07003107 av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
Sarah Parker31c66502017-05-19 16:51:07 -07003108 tx_type, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003109 break;
Rupert Swarbrickf16c3c82017-08-17 13:24:42 +01003110
Sarah Parker31c66502017-05-19 16:51:07 -07003111 default: assert(0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003112 }
3113}
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07003114
3115#if CONFIG_TX64X64
hui sua5315712017-03-20 11:37:15 -07003116static void highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
Lester Lu27319b62017-07-10 16:57:15 -07003117 int stride, const TxfmParam *txfm_param) {
3118 int bd = txfm_param->bd;
Urvang Joshif8fe2ae2017-10-05 12:13:17 -07003119 const TX_TYPE tx_type = txfm_param->tx_type;
Sebastien Alaiwan62c92e62017-10-04 15:09:19 +02003120 const int32_t *src = cast_to_int32(input);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07003121 switch (tx_type) {
3122 case DCT_DCT:
Yi Luo51281092017-06-26 16:36:15 -07003123 av1_inv_txfm2d_add_64x64(src, CONVERT_TO_SHORTPTR(dest), stride, DCT_DCT,
3124 bd);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07003125 break;
3126#if CONFIG_EXT_TX
3127 case ADST_DCT:
3128 case DCT_ADST:
3129 case ADST_ADST:
3130 case FLIPADST_DCT:
3131 case DCT_FLIPADST:
3132 case FLIPADST_FLIPADST:
3133 case ADST_FLIPADST:
3134 case FLIPADST_ADST:
3135 case V_DCT:
3136 case H_DCT:
3137 case V_ADST:
3138 case H_ADST:
3139 case V_FLIPADST:
3140 case H_FLIPADST:
Sarah Parker31c66502017-05-19 16:51:07 -07003141 // TODO(sarahparker)
3142 // I've deleted the 64x64 implementations that existed in lieu
3143 // of adst, flipadst and identity for simplicity but will bring back
3144 // in a later change. This shouldn't impact performance since
3145 // DCT_DCT is the only extended type currently allowed for 64x64,
3146 // as dictated by get_ext_tx_set_type in blockd.h.
Yi Luo51281092017-06-26 16:36:15 -07003147 av1_inv_txfm2d_add_64x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
Sarah Parker31c66502017-05-19 16:51:07 -07003148 DCT_DCT, bd);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07003149 break;
3150 case IDTX:
Debargha Mukherjee2b435012017-09-28 08:30:35 -07003151 highbd_inv_idtx_add_c(input, dest, stride, 64, 64, tx_type, bd);
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07003152 break;
3153#endif // CONFIG_EXT_TX
3154 default: assert(0); break;
3155 }
3156}
3157#endif // CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -07003158
hui subb9c73b2017-03-17 15:51:02 -07003159void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07003160 TxfmParam *txfm_param) {
3161 const TX_SIZE tx_size = txfm_param->tx_size;
Lester Lu432012f2017-08-17 14:39:29 -07003162#if CONFIG_LGT_FROM_PRED
3163 if (txfm_param->use_lgt) {
3164 assert(is_lgt_allowed(txfm_param->mode, tx_size));
3165 ilgt2d_from_pred_add(input, dest, stride, txfm_param);
3166 return;
3167 }
3168#endif // CONFIG_LGT_FROM_PRED
Yaowu Xuc27fc142016-08-22 16:08:15 -07003169 switch (tx_size) {
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07003170#if CONFIG_TX64X64
Lester Lu27319b62017-07-10 16:57:15 -07003171 case TX_64X64: inv_txfm_add_64x64(input, dest, stride, txfm_param); break;
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07003172#endif // CONFIG_TX64X64
Lester Lu27319b62017-07-10 16:57:15 -07003173 case TX_32X32: inv_txfm_add_32x32(input, dest, stride, txfm_param); break;
3174 case TX_16X16: inv_txfm_add_16x16(input, dest, stride, txfm_param); break;
3175 case TX_8X8: inv_txfm_add_8x8(input, dest, stride, txfm_param); break;
3176 case TX_4X8: inv_txfm_add_4x8(input, dest, stride, txfm_param); break;
3177 case TX_8X4: inv_txfm_add_8x4(input, dest, stride, txfm_param); break;
3178 case TX_8X16: inv_txfm_add_8x16(input, dest, stride, txfm_param); break;
3179 case TX_16X8: inv_txfm_add_16x8(input, dest, stride, txfm_param); break;
3180 case TX_16X32: inv_txfm_add_16x32(input, dest, stride, txfm_param); break;
3181 case TX_32X16: inv_txfm_add_32x16(input, dest, stride, txfm_param); break;
Debargha Mukherjee2b435012017-09-28 08:30:35 -07003182#if CONFIG_TX64X64
3183 case TX_64X32: inv_txfm_add_64x32(input, dest, stride, txfm_param); break;
3184 case TX_32X64: inv_txfm_add_32x64(input, dest, stride, txfm_param); break;
3185#endif // CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -07003186 case TX_4X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07003187 // this is like av1_short_idct4x4 but has a special case around eob<=1
Yaowu Xuc27fc142016-08-22 16:08:15 -07003188 // which is significant (not just an optimization) for the lossless
3189 // case.
Lester Lu27319b62017-07-10 16:57:15 -07003190 inv_txfm_add_4x4(input, dest, stride, txfm_param);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003191 break;
Yue Chend6bdd462017-07-19 16:05:43 -07003192#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
Lester Lu27319b62017-07-10 16:57:15 -07003193 case TX_32X8: inv_txfm_add_32x8(input, dest, stride, txfm_param); break;
3194 case TX_8X32: inv_txfm_add_8x32(input, dest, stride, txfm_param); break;
3195 case TX_16X4: inv_txfm_add_16x4(input, dest, stride, txfm_param); break;
3196 case TX_4X16: inv_txfm_add_4x16(input, dest, stride, txfm_param); break;
Yue Chen56e226e2017-05-02 16:21:40 -07003197#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003198 default: assert(0 && "Invalid transform size"); break;
3199 }
3200}
3201
Lester Lu27319b62017-07-10 16:57:15 -07003202static void init_txfm_param(const MACROBLOCKD *xd, TX_SIZE tx_size,
3203 TX_TYPE tx_type, int eob, TxfmParam *txfm_param) {
3204 txfm_param->tx_type = tx_type;
3205 txfm_param->tx_size = tx_size;
3206 txfm_param->eob = eob;
3207 txfm_param->lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
Lester Lu27319b62017-07-10 16:57:15 -07003208 txfm_param->bd = xd->bd;
Lester Lu708c1ec2017-06-14 14:54:49 -07003209#if CONFIG_LGT
Lester Lu27319b62017-07-10 16:57:15 -07003210 txfm_param->is_inter = is_inter_block(&xd->mi[0]->mbmi);
Lester Lu708c1ec2017-06-14 14:54:49 -07003211#endif
Lester Lu432012f2017-08-17 14:39:29 -07003212#if CONFIG_LGT_FROM_PRED
3213 txfm_param->use_lgt = xd->mi[0]->mbmi.use_lgt;
3214#endif
Yi Luof8e87b42017-04-14 17:20:27 -07003215#if CONFIG_ADAPT_SCAN
Lester Lu27319b62017-07-10 16:57:15 -07003216 txfm_param->eob_threshold =
Yi Luo2ab63cb2017-05-11 16:44:22 -07003217 (const int16_t *)&xd->eob_threshold_md[tx_size][tx_type][0];
Yi Luof8e87b42017-04-14 17:20:27 -07003218#endif
3219}
3220
Angie Chiangad653a32017-07-31 15:30:58 -07003221#if !CONFIG_TXMG
Yi Luo51281092017-06-26 16:36:15 -07003222typedef void (*InvTxfmFunc)(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07003223 TxfmParam *txfm_param);
Yi Luo51281092017-06-26 16:36:15 -07003224
3225static InvTxfmFunc inv_txfm_func[2] = { av1_inv_txfm_add,
3226 av1_highbd_inv_txfm_add };
Angie Chiangad653a32017-07-31 15:30:58 -07003227#endif
Yi Luo51281092017-06-26 16:36:15 -07003228
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07003229void av1_inverse_transform_block(const MACROBLOCKD *xd,
Lester Lu708c1ec2017-06-14 14:54:49 -07003230 const tran_low_t *dqcoeff,
Lester Lu432012f2017-08-17 14:39:29 -07003231#if CONFIG_LGT_FROM_PRED
Lester Lu708c1ec2017-06-14 14:54:49 -07003232 PREDICTION_MODE mode,
3233#endif
Sarah Parker99e7daa2017-08-29 10:30:13 -07003234#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3235 uint8_t *mrc_mask,
3236#endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
Lester Lu708c1ec2017-06-14 14:54:49 -07003237 TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
3238 int stride, int eob) {
Angie Chiangd92d4bf2017-04-02 17:49:18 -07003239 if (!eob) return;
Angie Chiang50910f62017-04-03 12:31:34 -07003240#if CONFIG_PVQ
3241 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
3242 const int txb_width = block_size_wide[tx_bsize];
3243 const int txb_height = block_size_high[tx_bsize];
Angie Chiang50910f62017-04-03 12:31:34 -07003244 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Sebastien Alaiwan37234c12017-08-08 12:01:00 +02003245 for (int r = 0; r < txb_height; r++)
3246 for (int c = 0; c < txb_width; c++)
Angie Chiang50910f62017-04-03 12:31:34 -07003247 CONVERT_TO_SHORTPTR(dst)[r * stride + c] = 0;
3248 } else {
Sebastien Alaiwan37234c12017-08-08 12:01:00 +02003249 for (int r = 0; r < txb_height; r++)
3250 for (int c = 0; c < txb_width; c++) dst[r * stride + c] = 0;
Angie Chiang50910f62017-04-03 12:31:34 -07003251 }
Angie Chiang50910f62017-04-03 12:31:34 -07003252#endif // CONFIG_PVQ
Lester Lu27319b62017-07-10 16:57:15 -07003253 TxfmParam txfm_param;
3254 init_txfm_param(xd, tx_size, tx_type, eob, &txfm_param);
Sarah Parker5b8e6d22017-07-24 15:30:53 -07003255#if CONFIG_LGT || CONFIG_MRC_TX
Lester Lu918fe692017-08-17 14:39:29 -07003256 txfm_param.is_inter = is_inter_block(&xd->mi[0]->mbmi);
Lester Lu432012f2017-08-17 14:39:29 -07003257#endif // CONFIG_LGT || CONFIG_MRC_TX
Sarah Parker99e7daa2017-08-29 10:30:13 -07003258#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3259 txfm_param.mask = mrc_mask;
3260#endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
Lester Lu432012f2017-08-17 14:39:29 -07003261#if CONFIG_LGT_FROM_PRED || CONFIG_MRC_TX
3262 txfm_param.dst = dst;
3263 txfm_param.stride = stride;
3264#if CONFIG_LGT_FROM_PRED
Sarah Parker5b8e6d22017-07-24 15:30:53 -07003265 txfm_param.mode = mode;
Lester Lu432012f2017-08-17 14:39:29 -07003266#endif // CONFIG_LGT_FROM_PRED
3267#endif // CONFIG_LGT_FROM_PRED || CONFIG_MRC_TX
Angie Chiangd92d4bf2017-04-02 17:49:18 -07003268
Yi Luo51281092017-06-26 16:36:15 -07003269 const int is_hbd = get_bitdepth_data_path_index(xd);
Angie Chiange3b604d2017-08-06 16:01:24 -07003270#if CONFIG_TXMG
3271 if (is_hbd) {
3272 av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
3273 } else {
3274 DECLARE_ALIGNED(16, uint16_t, tmp[MAX_TX_SQUARE]);
3275 int tmp_stride = MAX_TX_SIZE;
3276 int w = tx_size_wide[tx_size];
3277 int h = tx_size_high[tx_size];
3278 for (int r = 0; r < h; ++r) {
3279 for (int c = 0; c < w; ++c) {
3280 tmp[r * tmp_stride + c] = dst[r * stride + c];
3281 }
3282 }
3283
3284 av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
3285 &txfm_param);
3286
3287 for (int r = 0; r < h; ++r) {
3288 for (int c = 0; c < w; ++c) {
Debargha Mukherjee5c3b0f82017-10-08 09:38:15 -07003289 dst[r * stride + c] = (uint8_t)tmp[r * tmp_stride + c];
Angie Chiange3b604d2017-08-06 16:01:24 -07003290 }
3291 }
3292 }
3293#else // CONFIG_TXMG
Lester Lu27319b62017-07-10 16:57:15 -07003294 inv_txfm_func[is_hbd](dqcoeff, dst, stride, &txfm_param);
Angie Chiangad653a32017-07-31 15:30:58 -07003295#endif // CONFIG_TXMG
Angie Chiangd92d4bf2017-04-02 17:49:18 -07003296}
3297
Angie Chiangbc2288c2017-04-09 15:41:17 -07003298void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
3299 int blk_row, int blk_col, int eob) {
3300 struct macroblockd_plane *const pd = &xd->plane[plane];
3301 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Sarah Parker99e7daa2017-08-29 10:30:13 -07003302#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3303 uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block);
3304#endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
Angie Chiangbc2288c2017-04-09 15:41:17 -07003305 const PLANE_TYPE plane_type = get_plane_type(plane);
hui su0c6244b2017-07-12 17:11:43 -07003306 const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
Jingning Han19b5c8f2017-07-06 15:10:12 -07003307 const TX_TYPE tx_type =
3308 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
Angie Chiangbc2288c2017-04-09 15:41:17 -07003309 const int dst_stride = pd->dst.stride;
3310 uint8_t *dst =
3311 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
Lester Lu918fe692017-08-17 14:39:29 -07003312 av1_inverse_transform_block(xd, dqcoeff,
Lester Lu432012f2017-08-17 14:39:29 -07003313#if CONFIG_LGT_FROM_PRED
Lester Lu918fe692017-08-17 14:39:29 -07003314 xd->mi[0]->mbmi.mode,
Lester Lu432012f2017-08-17 14:39:29 -07003315#endif // CONFIG_LGT_FROM_PRED
Sarah Parker99e7daa2017-08-29 10:30:13 -07003316#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3317 mrc_mask,
3318#endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
Lester Lu918fe692017-08-17 14:39:29 -07003319 tx_type, tx_size, dst, dst_stride, eob);
Angie Chiangbc2288c2017-04-09 15:41:17 -07003320}
3321
hui subb9c73b2017-03-17 15:51:02 -07003322void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
Lester Lu27319b62017-07-10 16:57:15 -07003323 TxfmParam *txfm_param) {
3324 const TX_SIZE tx_size = txfm_param->tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003325 switch (tx_size) {
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07003326#if CONFIG_TX64X64
Lester Lu27319b62017-07-10 16:57:15 -07003327 case TX_64X64:
3328 highbd_inv_txfm_add_64x64(input, dest, stride, txfm_param);
3329 break;
Debargha Mukherjee6a47cff2016-11-02 14:57:42 -07003330#endif // CONFIG_TX64X64
Lester Lu27319b62017-07-10 16:57:15 -07003331 case TX_32X32:
3332 highbd_inv_txfm_add_32x32(input, dest, stride, txfm_param);
3333 break;
3334 case TX_16X16:
3335 highbd_inv_txfm_add_16x16(input, dest, stride, txfm_param);
3336 break;
3337 case TX_8X8:
3338 highbd_inv_txfm_add_8x8(input, dest, stride, txfm_param);
3339 break;
3340 case TX_4X8:
3341 av1_highbd_inv_txfm_add_4x8(input, dest, stride, txfm_param);
3342 break;
3343 case TX_8X4:
3344 av1_highbd_inv_txfm_add_8x4(input, dest, stride, txfm_param);
3345 break;
3346 case TX_8X16:
3347 highbd_inv_txfm_add_8x16(input, dest, stride, txfm_param);
3348 break;
3349 case TX_16X8:
3350 highbd_inv_txfm_add_16x8(input, dest, stride, txfm_param);
3351 break;
3352 case TX_16X32:
3353 highbd_inv_txfm_add_16x32(input, dest, stride, txfm_param);
3354 break;
3355 case TX_32X16:
3356 highbd_inv_txfm_add_32x16(input, dest, stride, txfm_param);
3357 break;
Debargha Mukherjee2b435012017-09-28 08:30:35 -07003358#if CONFIG_TX64X64
3359 case TX_64X32:
3360 highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param);
3361 break;
3362 case TX_32X64:
3363 highbd_inv_txfm_add_32x64(input, dest, stride, txfm_param);
3364 break;
3365#endif // CONFIG_TX64X64
Yaowu Xuc27fc142016-08-22 16:08:15 -07003366 case TX_4X4:
Yaowu Xuf883b422016-08-30 14:01:10 -07003367 // this is like av1_short_idct4x4 but has a special case around eob<=1
Yaowu Xuc27fc142016-08-22 16:08:15 -07003368 // which is significant (not just an optimization) for the lossless
3369 // case.
Lester Lu27319b62017-07-10 16:57:15 -07003370 av1_highbd_inv_txfm_add_4x4(input, dest, stride, txfm_param);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003371 break;
3372 default: assert(0 && "Invalid transform size"); break;
3373 }
3374}