Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | /* |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 3 | * |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 10 | */ |
| 11 | |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 12 | #include "./aom_dsp_rtcd.h" |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 13 | #include "./av1_rtcd.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 14 | #include "av1/common/enums.h" |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 15 | #include "av1/common/av1_txfm.h" |
| 16 | #include "av1/common/av1_inv_txfm1d.h" |
Sarah Parker | eec47e6 | 2017-05-15 20:49:22 -0700 | [diff] [blame] | 17 | #include "av1/common/av1_inv_txfm1d_cfg.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 18 | |
Angie Chiang | 8c489a8 | 2018-05-15 16:07:30 -0700 | [diff] [blame] | 19 | void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
| 20 | int stride, int bd) { |
| 21 | /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
| 22 | 0.5 shifts per pixel. */ |
| 23 | int i; |
| 24 | tran_low_t output[16]; |
| 25 | tran_low_t a1, b1, c1, d1, e1; |
| 26 | const tran_low_t *ip = input; |
| 27 | tran_low_t *op = output; |
| 28 | uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 29 | |
| 30 | for (i = 0; i < 4; i++) { |
| 31 | a1 = ip[0] >> UNIT_QUANT_SHIFT; |
| 32 | c1 = ip[1] >> UNIT_QUANT_SHIFT; |
| 33 | d1 = ip[2] >> UNIT_QUANT_SHIFT; |
| 34 | b1 = ip[3] >> UNIT_QUANT_SHIFT; |
| 35 | a1 += c1; |
| 36 | d1 -= b1; |
| 37 | e1 = (a1 - d1) >> 1; |
| 38 | b1 = e1 - b1; |
| 39 | c1 = e1 - c1; |
| 40 | a1 -= b1; |
| 41 | d1 += c1; |
Angie Chiang | 95d0e58 | 2018-05-15 16:21:52 -0700 | [diff] [blame^] | 42 | |
Angie Chiang | 8c489a8 | 2018-05-15 16:07:30 -0700 | [diff] [blame] | 43 | op[0] = a1; |
| 44 | op[1] = b1; |
| 45 | op[2] = c1; |
| 46 | op[3] = d1; |
| 47 | ip += 4; |
| 48 | op += 4; |
| 49 | } |
| 50 | |
| 51 | ip = output; |
| 52 | for (i = 0; i < 4; i++) { |
| 53 | a1 = ip[4 * 0]; |
| 54 | c1 = ip[4 * 1]; |
| 55 | d1 = ip[4 * 2]; |
| 56 | b1 = ip[4 * 3]; |
| 57 | a1 += c1; |
| 58 | d1 -= b1; |
| 59 | e1 = (a1 - d1) >> 1; |
| 60 | b1 = e1 - b1; |
| 61 | c1 = e1 - c1; |
| 62 | a1 -= b1; |
| 63 | d1 += c1; |
Angie Chiang | 95d0e58 | 2018-05-15 16:21:52 -0700 | [diff] [blame^] | 64 | |
| 65 | range_check_value(a1, bd + 1); |
| 66 | range_check_value(b1, bd + 1); |
| 67 | range_check_value(c1, bd + 1); |
| 68 | range_check_value(d1, bd + 1); |
| 69 | |
Angie Chiang | 8c489a8 | 2018-05-15 16:07:30 -0700 | [diff] [blame] | 70 | dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); |
| 71 | dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); |
| 72 | dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); |
| 73 | dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); |
| 74 | |
| 75 | ip++; |
| 76 | dest++; |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | void av1_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, |
| 81 | int dest_stride, int bd) { |
| 82 | int i; |
| 83 | tran_low_t a1, e1; |
| 84 | tran_low_t tmp[4]; |
| 85 | const tran_low_t *ip = in; |
| 86 | tran_low_t *op = tmp; |
| 87 | uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 88 | (void)bd; |
| 89 | |
| 90 | a1 = ip[0] >> UNIT_QUANT_SHIFT; |
| 91 | e1 = a1 >> 1; |
| 92 | a1 -= e1; |
| 93 | op[0] = a1; |
| 94 | op[1] = op[2] = op[3] = e1; |
| 95 | |
| 96 | ip = tmp; |
| 97 | for (i = 0; i < 4; i++) { |
| 98 | e1 = ip[0] >> 1; |
| 99 | a1 = ip[0] - e1; |
| 100 | dest[dest_stride * 0] = |
| 101 | highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd); |
| 102 | dest[dest_stride * 1] = |
| 103 | highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd); |
| 104 | dest[dest_stride * 2] = |
| 105 | highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd); |
| 106 | dest[dest_stride * 3] = |
| 107 | highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd); |
| 108 | ip++; |
| 109 | dest++; |
| 110 | } |
| 111 | } |
| 112 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 113 | static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) { |
| 114 | switch (txfm_type) { |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 115 | case TXFM_TYPE_DCT4: return av1_idct4_new; |
| 116 | case TXFM_TYPE_DCT8: return av1_idct8_new; |
| 117 | case TXFM_TYPE_DCT16: return av1_idct16_new; |
| 118 | case TXFM_TYPE_DCT32: return av1_idct32_new; |
Urvang Joshi | 900643b | 2017-08-08 13:09:51 -0700 | [diff] [blame] | 119 | case TXFM_TYPE_DCT64: return av1_idct64_new; |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 120 | case TXFM_TYPE_ADST4: return av1_iadst4_new; |
| 121 | case TXFM_TYPE_ADST8: return av1_iadst8_new; |
| 122 | case TXFM_TYPE_ADST16: return av1_iadst16_new; |
Sarah Parker | 3eed417 | 2017-05-15 20:49:22 -0700 | [diff] [blame] | 123 | case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c; |
| 124 | case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c; |
| 125 | case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c; |
| 126 | case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 127 | default: assert(0); return NULL; |
| 128 | } |
| 129 | } |
| 130 | |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 131 | static const int8_t inv_shift_4x4[2] = { 0, -4 }; |
Angie Chiang | 0625027 | 2018-01-16 17:03:35 -0800 | [diff] [blame] | 132 | static const int8_t inv_shift_8x8[2] = { -1, -4 }; |
| 133 | static const int8_t inv_shift_16x16[2] = { -2, -4 }; |
| 134 | static const int8_t inv_shift_32x32[2] = { -2, -4 }; |
Angie Chiang | 0625027 | 2018-01-16 17:03:35 -0800 | [diff] [blame] | 135 | static const int8_t inv_shift_64x64[2] = { -2, -4 }; |
Angie Chiang | 5d7c1fc | 2018-01-30 15:54:44 -0800 | [diff] [blame] | 136 | static const int8_t inv_shift_4x8[2] = { 0, -4 }; |
| 137 | static const int8_t inv_shift_8x4[2] = { 0, -4 }; |
Angie Chiang | 2fc20eb | 2018-01-12 12:19:00 -0800 | [diff] [blame] | 138 | static const int8_t inv_shift_8x16[2] = { -1, -4 }; |
| 139 | static const int8_t inv_shift_16x8[2] = { -1, -4 }; |
| 140 | static const int8_t inv_shift_16x32[2] = { -1, -4 }; |
| 141 | static const int8_t inv_shift_32x16[2] = { -1, -4 }; |
Angie Chiang | 2fc20eb | 2018-01-12 12:19:00 -0800 | [diff] [blame] | 142 | static const int8_t inv_shift_32x64[2] = { -1, -4 }; |
| 143 | static const int8_t inv_shift_64x32[2] = { -1, -4 }; |
Angie Chiang | 2fc20eb | 2018-01-12 12:19:00 -0800 | [diff] [blame] | 144 | static const int8_t inv_shift_4x16[2] = { -1, -4 }; |
| 145 | static const int8_t inv_shift_16x4[2] = { -1, -4 }; |
Angie Chiang | 0625027 | 2018-01-16 17:03:35 -0800 | [diff] [blame] | 146 | static const int8_t inv_shift_8x32[2] = { -2, -4 }; |
| 147 | static const int8_t inv_shift_32x8[2] = { -2, -4 }; |
Angie Chiang | 0625027 | 2018-01-16 17:03:35 -0800 | [diff] [blame] | 148 | static const int8_t inv_shift_16x64[2] = { -2, -4 }; |
| 149 | static const int8_t inv_shift_64x16[2] = { -2, -4 }; |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 150 | |
| 151 | const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL] = { |
| 152 | inv_shift_4x4, inv_shift_8x8, inv_shift_16x16, inv_shift_32x32, |
Yaowu Xu | d3d4159 | 2018-02-14 13:26:52 -0800 | [diff] [blame] | 153 | inv_shift_64x64, inv_shift_4x8, inv_shift_8x4, inv_shift_8x16, |
| 154 | inv_shift_16x8, inv_shift_16x32, inv_shift_32x16, inv_shift_32x64, |
| 155 | inv_shift_64x32, inv_shift_4x16, inv_shift_16x4, inv_shift_8x32, |
| 156 | inv_shift_32x8, inv_shift_16x64, inv_shift_64x16, |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 157 | }; |
| 158 | |
Peng Bin | 28744b5 | 2018-02-12 11:49:11 +0800 | [diff] [blame] | 159 | /* clang-format off */ |
| 160 | const int8_t inv_cos_bit_col[MAX_TXWH_IDX] // txw_idx |
| 161 | [MAX_TXWH_IDX] = { // txh_idx |
| 162 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 }, |
| 163 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 }, |
| 164 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
| 165 | { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
| 166 | { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT } |
| 167 | }; |
Angie Chiang | d4327bc | 2018-01-22 20:54:04 -0800 | [diff] [blame] | 168 | |
Peng Bin | 28744b5 | 2018-02-12 11:49:11 +0800 | [diff] [blame] | 169 | const int8_t inv_cos_bit_row[MAX_TXWH_IDX] // txw_idx |
| 170 | [MAX_TXWH_IDX] = { // txh_idx |
| 171 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 }, |
| 172 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 }, |
| 173 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
| 174 | { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
| 175 | { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT } |
| 176 | }; |
| 177 | /* clang-format on */ |
Angie Chiang | d4327bc | 2018-01-22 20:54:04 -0800 | [diff] [blame] | 178 | |
Angie Chiang | 5d7c1fc | 2018-01-30 15:54:44 -0800 | [diff] [blame] | 179 | const int8_t iadst4_range[7] = { 0, 1, 0, 0, 0, 0, 0 }; |
| 180 | |
Urvang Joshi | c502216 | 2017-11-21 15:57:42 -0800 | [diff] [blame] | 181 | void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size, |
| 182 | TXFM_2D_FLIP_CFG *cfg) { |
| 183 | assert(cfg != NULL); |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 184 | cfg->tx_size = tx_size; |
Urvang Joshi | c502216 | 2017-11-21 15:57:42 -0800 | [diff] [blame] | 185 | set_flip_cfg(tx_type, cfg); |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 186 | av1_zero(cfg->stage_range_col); |
| 187 | av1_zero(cfg->stage_range_row); |
| 188 | set_flip_cfg(tx_type, cfg); |
| 189 | const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type]; |
| 190 | const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type]; |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 191 | cfg->shift = inv_txfm_shift_ls[tx_size]; |
Angie Chiang | 29d2f21 | 2018-01-24 19:42:57 -0800 | [diff] [blame] | 192 | const int txw_idx = get_txw_idx(tx_size); |
| 193 | const int txh_idx = get_txh_idx(tx_size); |
Angie Chiang | d4327bc | 2018-01-22 20:54:04 -0800 | [diff] [blame] | 194 | cfg->cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; |
| 195 | cfg->cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 196 | cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col]; |
Angie Chiang | 5d7c1fc | 2018-01-30 15:54:44 -0800 | [diff] [blame] | 197 | if (cfg->txfm_type_col == TXFM_TYPE_ADST4) { |
| 198 | memcpy(cfg->stage_range_col, iadst4_range, sizeof(iadst4_range)); |
| 199 | } |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 200 | cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row]; |
Angie Chiang | 5d7c1fc | 2018-01-30 15:54:44 -0800 | [diff] [blame] | 201 | if (cfg->txfm_type_row == TXFM_TYPE_ADST4) { |
| 202 | memcpy(cfg->stage_range_row, iadst4_range, sizeof(iadst4_range)); |
| 203 | } |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 204 | cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col]; |
| 205 | cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row]; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 206 | } |
| 207 | |
Angie Chiang | ce3ad28 | 2017-08-08 09:51:54 -0700 | [diff] [blame] | 208 | void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row, |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 209 | const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size, |
Angie Chiang | ce3ad28 | 2017-08-08 09:51:54 -0700 | [diff] [blame] | 210 | int bd) { |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 211 | const int fwd_shift = inv_start_range[tx_size]; |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 212 | const int8_t *shift = cfg->shift; |
Angie Chiang | ede0792 | 2018-02-28 18:10:56 -0800 | [diff] [blame] | 213 | int8_t opt_range_row, opt_range_col; |
| 214 | if (bd == 8) { |
| 215 | opt_range_row = 16; |
| 216 | opt_range_col = 16; |
| 217 | } else if (bd == 10) { |
| 218 | opt_range_row = 18; |
| 219 | opt_range_col = 16; |
| 220 | } else { |
| 221 | assert(bd == 12); |
| 222 | opt_range_row = 20; |
| 223 | opt_range_col = 18; |
| 224 | } |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 225 | // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 226 | for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) { |
Angie Chiang | ede0792 | 2018-02-28 18:10:56 -0800 | [diff] [blame] | 227 | int real_range_row = cfg->stage_range_row[i] + fwd_shift + bd + 1; |
| 228 | (void)real_range_row; |
Angie Chiang | 2ebe01b | 2018-03-01 14:15:03 -0800 | [diff] [blame] | 229 | if (cfg->txfm_type_row == TXFM_TYPE_ADST4 && i == 1) { |
| 230 | // the adst4 may use 1 extra bit on top of opt_range_row at stage 1 |
| 231 | // so opt_range_col >= real_range_col will not hold |
| 232 | stage_range_row[i] = opt_range_row; |
| 233 | } else { |
| 234 | assert(opt_range_row >= real_range_row); |
| 235 | stage_range_row[i] = opt_range_row; |
| 236 | } |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 237 | } |
| 238 | // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 239 | for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) { |
Angie Chiang | ede0792 | 2018-02-28 18:10:56 -0800 | [diff] [blame] | 240 | int real_range_col = |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 241 | cfg->stage_range_col[i] + fwd_shift + shift[0] + bd + 1; |
Angie Chiang | ede0792 | 2018-02-28 18:10:56 -0800 | [diff] [blame] | 242 | (void)real_range_col; |
Angie Chiang | 2ebe01b | 2018-03-01 14:15:03 -0800 | [diff] [blame] | 243 | if (cfg->txfm_type_col == TXFM_TYPE_ADST4 && i == 1) { |
| 244 | // the adst4 may use 1 extra bit on top of opt_range_row at stage 1 |
| 245 | // so opt_range_col >= real_range_col will not hold |
| 246 | stage_range_col[i] = opt_range_col; |
| 247 | } else { |
| 248 | assert(opt_range_col >= real_range_col); |
| 249 | stage_range_col[i] = opt_range_col; |
| 250 | } |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 251 | } |
| 252 | } |
| 253 | |
| 254 | static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output, |
| 255 | int stride, TXFM_2D_FLIP_CFG *cfg, |
| 256 | int32_t *txfm_buf, TX_SIZE tx_size, |
| 257 | int bd) { |
| 258 | // Note when assigning txfm_size_col, we use the txfm_size from the |
| 259 | // row configuration and vice versa. This is intentionally done to |
| 260 | // accurately perform rectangular transforms. When the transform is |
| 261 | // rectangular, the number of columns will be the same as the |
| 262 | // txfm_size stored in the row cfg struct. It will make no difference |
| 263 | // for square transforms. |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 264 | const int txfm_size_col = tx_size_wide[cfg->tx_size]; |
| 265 | const int txfm_size_row = tx_size_high[cfg->tx_size]; |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 266 | // Take the shift from the larger dimension in the rectangular case. |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 267 | const int8_t *shift = cfg->shift; |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 268 | const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); |
Angie Chiang | ce3ad28 | 2017-08-08 09:51:54 -0700 | [diff] [blame] | 269 | int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; |
| 270 | int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 271 | assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM); |
| 272 | assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 273 | av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd); |
Angie Chiang | ce3ad28 | 2017-08-08 09:51:54 -0700 | [diff] [blame] | 274 | |
Angie Chiang | d4327bc | 2018-01-22 20:54:04 -0800 | [diff] [blame] | 275 | const int8_t cos_bit_col = cfg->cos_bit_col; |
| 276 | const int8_t cos_bit_row = cfg->cos_bit_row; |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 277 | const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col); |
| 278 | const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 279 | |
Angie Chiang | 4b29ea8 | 2018-01-12 14:52:36 -0800 | [diff] [blame] | 280 | // txfm_buf's length is txfm_size_row * txfm_size_col + 2 * |
| 281 | // AOMMAX(txfm_size_row, txfm_size_col) |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 282 | // it is used for intermediate data buffering |
Angie Chiang | 0822557 | 2018-01-23 13:50:16 -0800 | [diff] [blame] | 283 | const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 284 | int32_t *temp_in = txfm_buf; |
Angie Chiang | 0822557 | 2018-01-23 13:50:16 -0800 | [diff] [blame] | 285 | int32_t *temp_out = temp_in + buf_offset; |
| 286 | int32_t *buf = temp_out + buf_offset; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 287 | int32_t *buf_ptr = buf; |
| 288 | int c, r; |
| 289 | |
| 290 | // Rows |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 291 | for (r = 0; r < txfm_size_row; ++r) { |
Angie Chiang | 4b29ea8 | 2018-01-12 14:52:36 -0800 | [diff] [blame] | 292 | if (abs(rect_type) == 1) { |
| 293 | for (c = 0; c < txfm_size_col; ++c) { |
Hui Su | 6d0fdad | 2018-03-12 10:46:40 -0700 | [diff] [blame] | 294 | temp_in[c] = round_shift((int64_t)input[c] * NewInvSqrt2, NewSqrt2Bits); |
Angie Chiang | 4b29ea8 | 2018-01-12 14:52:36 -0800 | [diff] [blame] | 295 | } |
Angie Chiang | b373fec | 2018-03-06 16:06:49 -0800 | [diff] [blame] | 296 | clamp_buf(temp_in, txfm_size_col, bd + 8); |
Angie Chiang | 4b29ea8 | 2018-01-12 14:52:36 -0800 | [diff] [blame] | 297 | txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row); |
| 298 | } else { |
Angie Chiang | b373fec | 2018-03-06 16:06:49 -0800 | [diff] [blame] | 299 | for (c = 0; c < txfm_size_col; ++c) { |
| 300 | temp_in[c] = input[c]; |
| 301 | } |
| 302 | clamp_buf(temp_in, txfm_size_col, bd + 8); |
| 303 | txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row); |
Angie Chiang | 4b29ea8 | 2018-01-12 14:52:36 -0800 | [diff] [blame] | 304 | } |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 305 | av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 306 | input += txfm_size_col; |
| 307 | buf_ptr += txfm_size_col; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 308 | } |
| 309 | |
| 310 | // Columns |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 311 | for (c = 0; c < txfm_size_col; ++c) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 312 | if (cfg->lr_flip == 0) { |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 313 | for (r = 0; r < txfm_size_row; ++r) |
| 314 | temp_in[r] = buf[r * txfm_size_col + c]; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 315 | } else { |
| 316 | // flip left right |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 317 | for (r = 0; r < txfm_size_row; ++r) |
| 318 | temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 319 | } |
Angie Chiang | b373fec | 2018-03-06 16:06:49 -0800 | [diff] [blame] | 320 | clamp_buf(temp_in, txfm_size_row, AOMMAX(bd + 6, 16)); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 321 | txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); |
Angie Chiang | 2fc20eb | 2018-01-12 12:19:00 -0800 | [diff] [blame] | 322 | av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 323 | if (cfg->ud_flip == 0) { |
Jonathan Matthews | 284f9d0 | 2017-06-08 15:49:08 +0100 | [diff] [blame] | 324 | for (r = 0; r < txfm_size_row; ++r) { |
| 325 | output[r * stride + c] = |
| 326 | highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); |
| 327 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 328 | } else { |
| 329 | // flip upside down |
Jonathan Matthews | 284f9d0 | 2017-06-08 15:49:08 +0100 | [diff] [blame] | 330 | for (r = 0; r < txfm_size_row; ++r) { |
| 331 | output[r * stride + c] = highbd_clip_pixel_add( |
| 332 | output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); |
| 333 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 334 | } |
| 335 | } |
| 336 | } |
| 337 | |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 338 | static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output, |
| 339 | int stride, int32_t *txfm_buf, |
Urvang Joshi | 9752a2e | 2017-10-02 17:32:27 -0700 | [diff] [blame] | 340 | TX_TYPE tx_type, TX_SIZE tx_size, |
| 341 | int bd) { |
Urvang Joshi | c502216 | 2017-11-21 15:57:42 -0800 | [diff] [blame] | 342 | TXFM_2D_FLIP_CFG cfg; |
| 343 | av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg); |
Urvang Joshi | a989d83 | 2017-12-11 16:41:26 -0800 | [diff] [blame] | 344 | // Forward shift sum uses larger square size, to be consistent with what |
| 345 | // av1_gen_inv_stage_range() does for inverse shifts. |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 346 | inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, tx_size, bd); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 347 | } |
| 348 | |
| 349 | void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 350 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 351 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 352 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X8, bd); |
| 353 | } |
| 354 | |
| 355 | void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 356 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 357 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 358 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 359 | } |
| 360 | |
| 361 | void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 362 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 363 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 16 + 16 + 16]); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 364 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X16, bd); |
| 365 | } |
| 366 | |
| 367 | void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 368 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 369 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 16 + 16]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 370 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 371 | } |
| 372 | |
| 373 | void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 374 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 375 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 32 + 32 + 32]); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 376 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X32, bd); |
| 377 | } |
| 378 | |
| 379 | void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 380 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 381 | DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 32 + 32]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 382 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd); |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 383 | } |
| 384 | |
| 385 | void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 386 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 387 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 4 + 4]); |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 388 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X4, bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 389 | } |
| 390 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 391 | void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 392 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 393 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 8 + 8 + 8]); |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 394 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X8, bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 395 | } |
| 396 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 397 | void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 398 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 399 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 16 + 16 + 16]); |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 400 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X16, bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 401 | } |
| 402 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 403 | void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 404 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 405 | DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]); |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 406 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X32, bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 407 | } |
| 408 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 409 | void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 410 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 411 | // TODO(urvang): Can the same array be reused, instead of using a new array? |
| 412 | // Remap 32x32 input into a modified 64x64 by: |
| 413 | // - Copying over these values in top-left 32x32 locations. |
| 414 | // - Setting the rest of the locations to 0. |
| 415 | int32_t mod_input[64 * 64]; |
| 416 | for (int row = 0; row < 32; ++row) { |
| 417 | memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); |
| 418 | memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); |
| 419 | } |
| 420 | memset(mod_input + 32 * 64, 0, 32 * 64 * sizeof(*mod_input)); |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 421 | DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]); |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 422 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X64, |
| 423 | bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 424 | } |
Debargha Mukherjee | 2b43501 | 2017-09-28 08:30:35 -0700 | [diff] [blame] | 425 | |
| 426 | void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 427 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 428 | // Remap 32x32 input into a modified 64x32 by: |
| 429 | // - Copying over these values in top-left 32x32 locations. |
| 430 | // - Setting the rest of the locations to 0. |
| 431 | int32_t mod_input[64 * 32]; |
| 432 | for (int row = 0; row < 32; ++row) { |
| 433 | memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); |
| 434 | memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); |
| 435 | } |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 436 | DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 437 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X32, |
| 438 | bd); |
Debargha Mukherjee | 2b43501 | 2017-09-28 08:30:35 -0700 | [diff] [blame] | 439 | } |
| 440 | |
| 441 | void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 442 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 443 | // Remap 32x32 input into a modified 32x64 input by: |
| 444 | // - Copying over these values in top-left 32x32 locations. |
| 445 | // - Setting the rest of the locations to 0. |
| 446 | int32_t mod_input[32 * 64]; |
| 447 | memcpy(mod_input, input, 32 * 32 * sizeof(*mod_input)); |
| 448 | memset(mod_input + 32 * 32, 0, 32 * 32 * sizeof(*mod_input)); |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 449 | DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]); |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 450 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_32X64, |
| 451 | bd); |
Debargha Mukherjee | 2b43501 | 2017-09-28 08:30:35 -0700 | [diff] [blame] | 452 | } |
Debargha Mukherjee | 0254fee | 2017-12-02 09:08:52 -0800 | [diff] [blame] | 453 | |
| 454 | void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output, |
| 455 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 030cea9 | 2017-12-05 15:27:09 -0800 | [diff] [blame] | 456 | // Remap 16x32 input into a modified 16x64 input by: |
| 457 | // - Copying over these values in top-left 16x32 locations. |
| 458 | // - Setting the rest of the locations to 0. |
| 459 | int32_t mod_input[16 * 64]; |
| 460 | memcpy(mod_input, input, 16 * 32 * sizeof(*mod_input)); |
| 461 | memset(mod_input + 16 * 32, 0, 16 * 32 * sizeof(*mod_input)); |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 462 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]); |
Urvang Joshi | 030cea9 | 2017-12-05 15:27:09 -0800 | [diff] [blame] | 463 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_16X64, |
| 464 | bd); |
Debargha Mukherjee | 0254fee | 2017-12-02 09:08:52 -0800 | [diff] [blame] | 465 | } |
| 466 | |
| 467 | void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output, |
| 468 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 030cea9 | 2017-12-05 15:27:09 -0800 | [diff] [blame] | 469 | // Remap 32x16 input into a modified 64x16 by: |
| 470 | // - Copying over these values in top-left 32x16 locations. |
| 471 | // - Setting the rest of the locations to 0. |
| 472 | int32_t mod_input[64 * 16]; |
| 473 | for (int row = 0; row < 16; ++row) { |
| 474 | memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); |
| 475 | memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); |
| 476 | } |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 477 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 478 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16, |
| 479 | bd); |
Debargha Mukherjee | 0254fee | 2017-12-02 09:08:52 -0800 | [diff] [blame] | 480 | } |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 481 | |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 482 | void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output, |
| 483 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 484 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 485 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X16, bd); |
| 486 | } |
| 487 | |
| 488 | void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output, |
| 489 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 490 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 491 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X4, bd); |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 492 | } |
| 493 | |
| 494 | void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output, |
| 495 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 496 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]); |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 497 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X32, bd); |
| 498 | } |
| 499 | |
| 500 | void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output, |
| 501 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 502 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 503 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X8, bd); |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 504 | } |