Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | /* |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 3 | * |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 10 | */ |
| 11 | |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 12 | #include "./aom_dsp_rtcd.h" |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 13 | #include "./av1_rtcd.h" |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 14 | #include "aom_dsp/inv_txfm.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 15 | #include "av1/common/enums.h" |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 16 | #include "av1/common/av1_txfm.h" |
| 17 | #include "av1/common/av1_inv_txfm1d.h" |
Sarah Parker | eec47e6 | 2017-05-15 20:49:22 -0700 | [diff] [blame] | 18 | #include "av1/common/av1_inv_txfm1d_cfg.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 19 | |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 20 | #define NO_INV_TRANSPOSE 1 |
| 21 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 22 | static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) { |
| 23 | switch (txfm_type) { |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 24 | case TXFM_TYPE_DCT4: return av1_idct4_new; |
| 25 | case TXFM_TYPE_DCT8: return av1_idct8_new; |
| 26 | case TXFM_TYPE_DCT16: return av1_idct16_new; |
| 27 | case TXFM_TYPE_DCT32: return av1_idct32_new; |
Urvang Joshi | 900643b | 2017-08-08 13:09:51 -0700 | [diff] [blame] | 28 | case TXFM_TYPE_DCT64: return av1_idct64_new; |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 29 | case TXFM_TYPE_ADST4: return av1_iadst4_new; |
| 30 | case TXFM_TYPE_ADST8: return av1_iadst8_new; |
| 31 | case TXFM_TYPE_ADST16: return av1_iadst16_new; |
| 32 | case TXFM_TYPE_ADST32: return av1_iadst32_new; |
Sarah Parker | 3eed417 | 2017-05-15 20:49:22 -0700 | [diff] [blame] | 33 | case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c; |
| 34 | case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c; |
| 35 | case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c; |
| 36 | case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c; |
Debargha Mukherjee | 570423c | 2017-10-01 00:35:20 -0700 | [diff] [blame] | 37 | case TXFM_TYPE_IDENTITY64: return av1_iidentity64_c; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 38 | default: assert(0); return NULL; |
| 39 | } |
| 40 | } |
| 41 | |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 42 | static const int8_t inv_shift_4x4[2] = { 0, -4 }; |
Angie Chiang | 0625027 | 2018-01-16 17:03:35 -0800 | [diff] [blame] | 43 | static const int8_t inv_shift_8x8[2] = { -1, -4 }; |
| 44 | static const int8_t inv_shift_16x16[2] = { -2, -4 }; |
| 45 | static const int8_t inv_shift_32x32[2] = { -2, -4 }; |
Angie Chiang | 0625027 | 2018-01-16 17:03:35 -0800 | [diff] [blame] | 46 | static const int8_t inv_shift_64x64[2] = { -2, -4 }; |
Angie Chiang | 5d7c1fc | 2018-01-30 15:54:44 -0800 | [diff] [blame] | 47 | static const int8_t inv_shift_4x8[2] = { 0, -4 }; |
| 48 | static const int8_t inv_shift_8x4[2] = { 0, -4 }; |
Angie Chiang | 2fc20eb | 2018-01-12 12:19:00 -0800 | [diff] [blame] | 49 | static const int8_t inv_shift_8x16[2] = { -1, -4 }; |
| 50 | static const int8_t inv_shift_16x8[2] = { -1, -4 }; |
| 51 | static const int8_t inv_shift_16x32[2] = { -1, -4 }; |
| 52 | static const int8_t inv_shift_32x16[2] = { -1, -4 }; |
Angie Chiang | 2fc20eb | 2018-01-12 12:19:00 -0800 | [diff] [blame] | 53 | static const int8_t inv_shift_32x64[2] = { -1, -4 }; |
| 54 | static const int8_t inv_shift_64x32[2] = { -1, -4 }; |
Angie Chiang | 2fc20eb | 2018-01-12 12:19:00 -0800 | [diff] [blame] | 55 | static const int8_t inv_shift_4x16[2] = { -1, -4 }; |
| 56 | static const int8_t inv_shift_16x4[2] = { -1, -4 }; |
Angie Chiang | 0625027 | 2018-01-16 17:03:35 -0800 | [diff] [blame] | 57 | static const int8_t inv_shift_8x32[2] = { -2, -4 }; |
| 58 | static const int8_t inv_shift_32x8[2] = { -2, -4 }; |
Angie Chiang | 0625027 | 2018-01-16 17:03:35 -0800 | [diff] [blame] | 59 | static const int8_t inv_shift_16x64[2] = { -2, -4 }; |
| 60 | static const int8_t inv_shift_64x16[2] = { -2, -4 }; |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 61 | |
| 62 | const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL] = { |
| 63 | inv_shift_4x4, inv_shift_8x8, inv_shift_16x16, inv_shift_32x32, |
Yaowu Xu | d3d4159 | 2018-02-14 13:26:52 -0800 | [diff] [blame] | 64 | inv_shift_64x64, inv_shift_4x8, inv_shift_8x4, inv_shift_8x16, |
| 65 | inv_shift_16x8, inv_shift_16x32, inv_shift_32x16, inv_shift_32x64, |
| 66 | inv_shift_64x32, inv_shift_4x16, inv_shift_16x4, inv_shift_8x32, |
| 67 | inv_shift_32x8, inv_shift_16x64, inv_shift_64x16, |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 68 | }; |
| 69 | |
Peng Bin | 28744b5 | 2018-02-12 11:49:11 +0800 | [diff] [blame] | 70 | /* clang-format off */ |
| 71 | const int8_t inv_cos_bit_col[MAX_TXWH_IDX] // txw_idx |
| 72 | [MAX_TXWH_IDX] = { // txh_idx |
| 73 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 }, |
| 74 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 }, |
| 75 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
| 76 | { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
| 77 | { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT } |
| 78 | }; |
Angie Chiang | d4327bc | 2018-01-22 20:54:04 -0800 | [diff] [blame] | 79 | |
Peng Bin | 28744b5 | 2018-02-12 11:49:11 +0800 | [diff] [blame] | 80 | const int8_t inv_cos_bit_row[MAX_TXWH_IDX] // txw_idx |
| 81 | [MAX_TXWH_IDX] = { // txh_idx |
| 82 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0, 0 }, |
| 83 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, 0 }, |
| 84 | { INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
| 85 | { 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT }, |
| 86 | { 0, 0, INV_COS_BIT, INV_COS_BIT, INV_COS_BIT } |
| 87 | }; |
| 88 | /* clang-format on */ |
Angie Chiang | d4327bc | 2018-01-22 20:54:04 -0800 | [diff] [blame] | 89 | |
Angie Chiang | 5d7c1fc | 2018-01-30 15:54:44 -0800 | [diff] [blame] | 90 | const int8_t iadst4_range[7] = { 0, 1, 0, 0, 0, 0, 0 }; |
| 91 | |
Urvang Joshi | c502216 | 2017-11-21 15:57:42 -0800 | [diff] [blame] | 92 | void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size, |
| 93 | TXFM_2D_FLIP_CFG *cfg) { |
| 94 | assert(cfg != NULL); |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 95 | cfg->tx_size = tx_size; |
Urvang Joshi | c502216 | 2017-11-21 15:57:42 -0800 | [diff] [blame] | 96 | set_flip_cfg(tx_type, cfg); |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 97 | av1_zero(cfg->stage_range_col); |
| 98 | av1_zero(cfg->stage_range_row); |
| 99 | set_flip_cfg(tx_type, cfg); |
| 100 | const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type]; |
| 101 | const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type]; |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 102 | cfg->shift = inv_txfm_shift_ls[tx_size]; |
Angie Chiang | 29d2f21 | 2018-01-24 19:42:57 -0800 | [diff] [blame] | 103 | const int txw_idx = get_txw_idx(tx_size); |
| 104 | const int txh_idx = get_txh_idx(tx_size); |
Angie Chiang | d4327bc | 2018-01-22 20:54:04 -0800 | [diff] [blame] | 105 | cfg->cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx]; |
| 106 | cfg->cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx]; |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 107 | cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col]; |
Angie Chiang | 5d7c1fc | 2018-01-30 15:54:44 -0800 | [diff] [blame] | 108 | if (cfg->txfm_type_col == TXFM_TYPE_ADST4) { |
| 109 | memcpy(cfg->stage_range_col, iadst4_range, sizeof(iadst4_range)); |
| 110 | } |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 111 | cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row]; |
Angie Chiang | 5d7c1fc | 2018-01-30 15:54:44 -0800 | [diff] [blame] | 112 | if (cfg->txfm_type_row == TXFM_TYPE_ADST4) { |
| 113 | memcpy(cfg->stage_range_row, iadst4_range, sizeof(iadst4_range)); |
| 114 | } |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 115 | cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col]; |
| 116 | cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row]; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 117 | } |
| 118 | |
Angie Chiang | ce3ad28 | 2017-08-08 09:51:54 -0700 | [diff] [blame] | 119 | void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row, |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 120 | const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size, |
Angie Chiang | ce3ad28 | 2017-08-08 09:51:54 -0700 | [diff] [blame] | 121 | int bd) { |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 122 | const int fwd_shift = inv_start_range[tx_size]; |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 123 | const int8_t *shift = cfg->shift; |
Angie Chiang | ede0792 | 2018-02-28 18:10:56 -0800 | [diff] [blame^] | 124 | int8_t opt_range_row, opt_range_col; |
| 125 | if (bd == 8) { |
| 126 | opt_range_row = 16; |
| 127 | opt_range_col = 16; |
| 128 | } else if (bd == 10) { |
| 129 | opt_range_row = 18; |
| 130 | opt_range_col = 16; |
| 131 | } else { |
| 132 | assert(bd == 12); |
| 133 | opt_range_row = 20; |
| 134 | opt_range_col = 18; |
| 135 | } |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 136 | // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 137 | for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) { |
Angie Chiang | ede0792 | 2018-02-28 18:10:56 -0800 | [diff] [blame^] | 138 | int real_range_row = cfg->stage_range_row[i] + fwd_shift + bd + 1; |
| 139 | (void)real_range_row; |
| 140 | // assert(opt_range_row >= real_range_row); |
| 141 | stage_range_row[i] = opt_range_row; |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 142 | } |
| 143 | // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 144 | for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) { |
Angie Chiang | ede0792 | 2018-02-28 18:10:56 -0800 | [diff] [blame^] | 145 | int real_range_col = |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 146 | cfg->stage_range_col[i] + fwd_shift + shift[0] + bd + 1; |
Angie Chiang | ede0792 | 2018-02-28 18:10:56 -0800 | [diff] [blame^] | 147 | (void)real_range_col; |
| 148 | // assert(opt_range_col >= real_range_col); |
| 149 | stage_range_col[i] = opt_range_col; |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 150 | } |
| 151 | } |
| 152 | |
| 153 | static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output, |
| 154 | int stride, TXFM_2D_FLIP_CFG *cfg, |
| 155 | int32_t *txfm_buf, TX_SIZE tx_size, |
| 156 | int bd) { |
| 157 | // Note when assigning txfm_size_col, we use the txfm_size from the |
| 158 | // row configuration and vice versa. This is intentionally done to |
| 159 | // accurately perform rectangular transforms. When the transform is |
| 160 | // rectangular, the number of columns will be the same as the |
| 161 | // txfm_size stored in the row cfg struct. It will make no difference |
| 162 | // for square transforms. |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 163 | const int txfm_size_col = tx_size_wide[cfg->tx_size]; |
| 164 | const int txfm_size_row = tx_size_high[cfg->tx_size]; |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 165 | // Take the shift from the larger dimension in the rectangular case. |
Angie Chiang | 4a75b5a | 2018-01-10 17:19:06 -0800 | [diff] [blame] | 166 | const int8_t *shift = cfg->shift; |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 167 | const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); |
Angie Chiang | ce3ad28 | 2017-08-08 09:51:54 -0700 | [diff] [blame] | 168 | int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; |
| 169 | int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 170 | assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM); |
| 171 | assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 172 | av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, tx_size, bd); |
Angie Chiang | ce3ad28 | 2017-08-08 09:51:54 -0700 | [diff] [blame] | 173 | |
Angie Chiang | d4327bc | 2018-01-22 20:54:04 -0800 | [diff] [blame] | 174 | const int8_t cos_bit_col = cfg->cos_bit_col; |
| 175 | const int8_t cos_bit_row = cfg->cos_bit_row; |
Angie Chiang | 0c7b8d8 | 2018-01-23 19:20:44 -0800 | [diff] [blame] | 176 | const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col); |
| 177 | const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 178 | |
Angie Chiang | 4b29ea8 | 2018-01-12 14:52:36 -0800 | [diff] [blame] | 179 | // txfm_buf's length is txfm_size_row * txfm_size_col + 2 * |
| 180 | // AOMMAX(txfm_size_row, txfm_size_col) |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 181 | // it is used for intermediate data buffering |
Angie Chiang | 0822557 | 2018-01-23 13:50:16 -0800 | [diff] [blame] | 182 | const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 183 | int32_t *temp_in = txfm_buf; |
Angie Chiang | 0822557 | 2018-01-23 13:50:16 -0800 | [diff] [blame] | 184 | int32_t *temp_out = temp_in + buf_offset; |
| 185 | int32_t *buf = temp_out + buf_offset; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 186 | int32_t *buf_ptr = buf; |
| 187 | int c, r; |
| 188 | |
| 189 | // Rows |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 190 | for (r = 0; r < txfm_size_row; ++r) { |
Angie Chiang | 4b29ea8 | 2018-01-12 14:52:36 -0800 | [diff] [blame] | 191 | if (abs(rect_type) == 1) { |
| 192 | for (c = 0; c < txfm_size_col; ++c) { |
Angie Chiang | c8d6c08 | 2018-02-06 14:26:55 -0800 | [diff] [blame] | 193 | temp_in[c] = round_shift(input[c] * NewInvSqrt2, NewSqrt2Bits); |
Angie Chiang | 4b29ea8 | 2018-01-12 14:52:36 -0800 | [diff] [blame] | 194 | } |
| 195 | txfm_func_row(temp_in, buf_ptr, cos_bit_row, stage_range_row); |
| 196 | } else { |
| 197 | txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row); |
| 198 | } |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 199 | av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]); |
Debargha Mukherjee | a5657c2 | 2018-02-05 10:57:37 -0800 | [diff] [blame] | 200 | clamp_buf(buf_ptr, txfm_size_col, AOMMAX(bd + 6, 16)); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 201 | input += txfm_size_col; |
| 202 | buf_ptr += txfm_size_col; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 203 | } |
| 204 | |
| 205 | // Columns |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 206 | for (c = 0; c < txfm_size_col; ++c) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 207 | if (cfg->lr_flip == 0) { |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 208 | for (r = 0; r < txfm_size_row; ++r) |
| 209 | temp_in[r] = buf[r * txfm_size_col + c]; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 210 | } else { |
| 211 | // flip left right |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 212 | for (r = 0; r < txfm_size_row; ++r) |
| 213 | temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)]; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 214 | } |
| 215 | txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); |
Angie Chiang | 2fc20eb | 2018-01-12 12:19:00 -0800 | [diff] [blame] | 216 | av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); |
Sebastien Alaiwan | a3457cc | 2017-06-16 10:11:58 +0200 | [diff] [blame] | 217 | clamp_buf(temp_out, txfm_size_row, bd + 1); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 218 | if (cfg->ud_flip == 0) { |
Jonathan Matthews | 284f9d0 | 2017-06-08 15:49:08 +0100 | [diff] [blame] | 219 | for (r = 0; r < txfm_size_row; ++r) { |
| 220 | output[r * stride + c] = |
| 221 | highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd); |
| 222 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 223 | } else { |
| 224 | // flip upside down |
Jonathan Matthews | 284f9d0 | 2017-06-08 15:49:08 +0100 | [diff] [blame] | 225 | for (r = 0; r < txfm_size_row; ++r) { |
| 226 | output[r * stride + c] = highbd_clip_pixel_add( |
| 227 | output[r * stride + c], temp_out[txfm_size_row - r - 1], bd); |
| 228 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 229 | } |
| 230 | } |
| 231 | } |
| 232 | |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 233 | static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output, |
| 234 | int stride, int32_t *txfm_buf, |
Urvang Joshi | 9752a2e | 2017-10-02 17:32:27 -0700 | [diff] [blame] | 235 | TX_TYPE tx_type, TX_SIZE tx_size, |
| 236 | int bd) { |
Urvang Joshi | c502216 | 2017-11-21 15:57:42 -0800 | [diff] [blame] | 237 | TXFM_2D_FLIP_CFG cfg; |
| 238 | av1_get_inv_txfm_cfg(tx_type, tx_size, &cfg); |
Urvang Joshi | a989d83 | 2017-12-11 16:41:26 -0800 | [diff] [blame] | 239 | // Forward shift sum uses larger square size, to be consistent with what |
| 240 | // av1_gen_inv_stage_range() does for inverse shifts. |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 241 | inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, tx_size, bd); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 242 | } |
| 243 | |
| 244 | void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 245 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 246 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 247 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X8, bd); |
| 248 | } |
| 249 | |
| 250 | void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 251 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 252 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 253 | #if NO_INV_TRANSPOSE |
| 254 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd); |
| 255 | #else |
Angie Chiang | 155bf9a | 2017-08-06 19:52:57 -0700 | [diff] [blame] | 256 | int32_t rinput[8 * 4]; |
| 257 | uint16_t routput[8 * 4]; |
Urvang Joshi | 9752a2e | 2017-10-02 17:32:27 -0700 | [diff] [blame] | 258 | TX_SIZE tx_size = TX_8X4; |
| 259 | TX_SIZE rtx_size = av1_rotate_tx_size(tx_size); |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 260 | TX_TYPE rtx_type = av1_rotate_tx_type(tx_type); |
Angie Chiang | 155bf9a | 2017-08-06 19:52:57 -0700 | [diff] [blame] | 261 | int w = tx_size_wide[tx_size]; |
| 262 | int h = tx_size_high[tx_size]; |
| 263 | int rw = h; |
| 264 | int rh = w; |
| 265 | transpose_int32(rinput, rw, input, w, w, h); |
| 266 | transpose_uint16(routput, rw, output, stride, w, h); |
| 267 | inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd); |
| 268 | transpose_uint16(output, stride, routput, rw, rw, rh); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 269 | #endif // NO_INV_TRANSPOSE |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 270 | } |
| 271 | |
| 272 | void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 273 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 274 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 16 + 16 + 16]); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 275 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X16, bd); |
| 276 | } |
| 277 | |
| 278 | void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 279 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 280 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 8 + 16 + 16]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 281 | #if NO_INV_TRANSPOSE |
| 282 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd); |
| 283 | #else |
Angie Chiang | 155bf9a | 2017-08-06 19:52:57 -0700 | [diff] [blame] | 284 | int32_t rinput[16 * 8]; |
| 285 | uint16_t routput[16 * 8]; |
Urvang Joshi | 9752a2e | 2017-10-02 17:32:27 -0700 | [diff] [blame] | 286 | TX_SIZE tx_size = TX_16X8; |
| 287 | TX_SIZE rtx_size = av1_rotate_tx_size(tx_size); |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 288 | TX_TYPE rtx_type = av1_rotate_tx_type(tx_type); |
Angie Chiang | 155bf9a | 2017-08-06 19:52:57 -0700 | [diff] [blame] | 289 | int w = tx_size_wide[tx_size]; |
| 290 | int h = tx_size_high[tx_size]; |
| 291 | int rw = h; |
| 292 | int rh = w; |
| 293 | transpose_int32(rinput, rw, input, w, w, h); |
| 294 | transpose_uint16(routput, rw, output, stride, w, h); |
| 295 | inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd); |
| 296 | transpose_uint16(output, stride, routput, rw, rw, rh); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 297 | #endif // NO_INV_TRANSPOSE |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 298 | } |
| 299 | |
| 300 | void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 301 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 302 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 32 + 32 + 32]); |
Sarah Parker | 31c6650 | 2017-05-19 16:51:07 -0700 | [diff] [blame] | 303 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X32, bd); |
| 304 | } |
| 305 | |
| 306 | void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 307 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 308 | DECLARE_ALIGNED(32, int, txfm_buf[32 * 16 + 32 + 32]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 309 | #if NO_INV_TRANSPOSE |
| 310 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd); |
| 311 | #else |
Angie Chiang | 155bf9a | 2017-08-06 19:52:57 -0700 | [diff] [blame] | 312 | int32_t rinput[32 * 16]; |
| 313 | uint16_t routput[32 * 16]; |
Urvang Joshi | 9752a2e | 2017-10-02 17:32:27 -0700 | [diff] [blame] | 314 | TX_SIZE tx_size = TX_32X16; |
| 315 | TX_SIZE rtx_size = av1_rotate_tx_size(tx_size); |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 316 | TX_TYPE rtx_type = av1_rotate_tx_type(tx_type); |
Angie Chiang | 155bf9a | 2017-08-06 19:52:57 -0700 | [diff] [blame] | 317 | int w = tx_size_wide[tx_size]; |
| 318 | int h = tx_size_high[tx_size]; |
| 319 | int rw = h; |
| 320 | int rh = w; |
| 321 | transpose_int32(rinput, rw, input, w, w, h); |
| 322 | transpose_uint16(routput, rw, output, stride, w, h); |
| 323 | inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd); |
| 324 | transpose_uint16(output, stride, routput, rw, rw, rh); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 325 | #endif // NO_INV_TRANSPOSE |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 326 | } |
| 327 | |
| 328 | void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 329 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 330 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 4 + 4]); |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 331 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X4, bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 332 | } |
| 333 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 334 | void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 335 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 336 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 8 + 8 + 8]); |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 337 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X8, bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 338 | } |
| 339 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 340 | void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 341 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 342 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 16 + 16 + 16]); |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 343 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X16, bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 344 | } |
| 345 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 346 | void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 347 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 348 | DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]); |
Frederic Barbier | c53753f | 2017-04-25 11:05:01 +0200 | [diff] [blame] | 349 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X32, bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 350 | } |
| 351 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 352 | void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 353 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 354 | // TODO(urvang): Can the same array be reused, instead of using a new array? |
| 355 | // Remap 32x32 input into a modified 64x64 by: |
| 356 | // - Copying over these values in top-left 32x32 locations. |
| 357 | // - Setting the rest of the locations to 0. |
| 358 | int32_t mod_input[64 * 64]; |
| 359 | for (int row = 0; row < 32; ++row) { |
| 360 | memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); |
| 361 | memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); |
| 362 | } |
| 363 | memset(mod_input + 32 * 64, 0, 32 * 64 * sizeof(*mod_input)); |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 364 | DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]); |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 365 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X64, |
| 366 | bd); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 367 | } |
Debargha Mukherjee | 2b43501 | 2017-09-28 08:30:35 -0700 | [diff] [blame] | 368 | |
| 369 | void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 370 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 371 | // Remap 32x32 input into a modified 64x32 by: |
| 372 | // - Copying over these values in top-left 32x32 locations. |
| 373 | // - Setting the rest of the locations to 0. |
| 374 | int32_t mod_input[64 * 32]; |
| 375 | for (int row = 0; row < 32; ++row) { |
| 376 | memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); |
| 377 | memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); |
| 378 | } |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 379 | DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 380 | #if NO_INV_TRANSPOSE |
| 381 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X32, |
| 382 | bd); |
| 383 | #else |
Debargha Mukherjee | 570423c | 2017-10-01 00:35:20 -0700 | [diff] [blame] | 384 | int32_t rinput[64 * 32]; |
| 385 | uint16_t routput[64 * 32]; |
Urvang Joshi | 9752a2e | 2017-10-02 17:32:27 -0700 | [diff] [blame] | 386 | TX_SIZE tx_size = TX_64X32; |
| 387 | TX_SIZE rtx_size = av1_rotate_tx_size(tx_size); |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 388 | TX_TYPE rtx_type = av1_rotate_tx_type(tx_type); |
Debargha Mukherjee | 570423c | 2017-10-01 00:35:20 -0700 | [diff] [blame] | 389 | int w = tx_size_wide[tx_size]; |
| 390 | int h = tx_size_high[tx_size]; |
| 391 | int rw = h; |
| 392 | int rh = w; |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 393 | transpose_int32(rinput, rw, mod_input, w, w, h); |
Debargha Mukherjee | 570423c | 2017-10-01 00:35:20 -0700 | [diff] [blame] | 394 | transpose_uint16(routput, rw, output, stride, w, h); |
| 395 | inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd); |
| 396 | transpose_uint16(output, stride, routput, rw, rw, rh); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 397 | #endif // NO_INV_TRANSPOSE |
Debargha Mukherjee | 2b43501 | 2017-09-28 08:30:35 -0700 | [diff] [blame] | 398 | } |
| 399 | |
| 400 | void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output, |
Urvang Joshi | 2283d37 | 2017-10-02 17:16:45 -0700 | [diff] [blame] | 401 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 402 | // Remap 32x32 input into a modified 32x64 input by: |
| 403 | // - Copying over these values in top-left 32x32 locations. |
| 404 | // - Setting the rest of the locations to 0. |
| 405 | int32_t mod_input[32 * 64]; |
| 406 | memcpy(mod_input, input, 32 * 32 * sizeof(*mod_input)); |
| 407 | memset(mod_input + 32 * 32, 0, 32 * 32 * sizeof(*mod_input)); |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 408 | DECLARE_ALIGNED(32, int, txfm_buf[64 * 32 + 64 + 64]); |
Urvang Joshi | 8089315 | 2017-10-27 11:51:14 -0700 | [diff] [blame] | 409 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_32X64, |
| 410 | bd); |
Debargha Mukherjee | 2b43501 | 2017-09-28 08:30:35 -0700 | [diff] [blame] | 411 | } |
Debargha Mukherjee | 0254fee | 2017-12-02 09:08:52 -0800 | [diff] [blame] | 412 | |
| 413 | void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output, |
| 414 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 030cea9 | 2017-12-05 15:27:09 -0800 | [diff] [blame] | 415 | // Remap 16x32 input into a modified 16x64 input by: |
| 416 | // - Copying over these values in top-left 16x32 locations. |
| 417 | // - Setting the rest of the locations to 0. |
| 418 | int32_t mod_input[16 * 64]; |
| 419 | memcpy(mod_input, input, 16 * 32 * sizeof(*mod_input)); |
| 420 | memset(mod_input + 16 * 32, 0, 16 * 32 * sizeof(*mod_input)); |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 421 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]); |
Urvang Joshi | 030cea9 | 2017-12-05 15:27:09 -0800 | [diff] [blame] | 422 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_16X64, |
| 423 | bd); |
Debargha Mukherjee | 0254fee | 2017-12-02 09:08:52 -0800 | [diff] [blame] | 424 | } |
| 425 | |
| 426 | void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output, |
| 427 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 030cea9 | 2017-12-05 15:27:09 -0800 | [diff] [blame] | 428 | // Remap 32x16 input into a modified 64x16 by: |
| 429 | // - Copying over these values in top-left 32x16 locations. |
| 430 | // - Setting the rest of the locations to 0. |
| 431 | int32_t mod_input[64 * 16]; |
| 432 | for (int row = 0; row < 16; ++row) { |
| 433 | memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input)); |
| 434 | memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input)); |
| 435 | } |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 436 | DECLARE_ALIGNED(32, int, txfm_buf[16 * 64 + 64 + 64]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 437 | #if NO_INV_TRANSPOSE |
| 438 | inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16, |
| 439 | bd); |
| 440 | #else |
Debargha Mukherjee | 0254fee | 2017-12-02 09:08:52 -0800 | [diff] [blame] | 441 | int32_t rinput[16 * 64]; |
| 442 | uint16_t routput[16 * 64]; |
| 443 | TX_SIZE tx_size = TX_64X16; |
| 444 | TX_SIZE rtx_size = av1_rotate_tx_size(tx_size); |
| 445 | TX_TYPE rtx_type = av1_rotate_tx_type(tx_type); |
| 446 | int w = tx_size_wide[tx_size]; |
| 447 | int h = tx_size_high[tx_size]; |
| 448 | int rw = h; |
| 449 | int rh = w; |
Urvang Joshi | 030cea9 | 2017-12-05 15:27:09 -0800 | [diff] [blame] | 450 | transpose_int32(rinput, rw, mod_input, w, w, h); |
Debargha Mukherjee | 0254fee | 2017-12-02 09:08:52 -0800 | [diff] [blame] | 451 | transpose_uint16(routput, rw, output, stride, w, h); |
| 452 | inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd); |
| 453 | transpose_uint16(output, stride, routput, rw, rw, rh); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 454 | #endif // NO_INV_TRANSPOSE |
Debargha Mukherjee | 0254fee | 2017-12-02 09:08:52 -0800 | [diff] [blame] | 455 | } |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 456 | |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 457 | void av1_inv_txfm2d_add_4x16_c(const int32_t *input, uint16_t *output, |
| 458 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 459 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 460 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X16, bd); |
| 461 | } |
| 462 | |
| 463 | void av1_inv_txfm2d_add_16x4_c(const int32_t *input, uint16_t *output, |
| 464 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 465 | DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 466 | #if NO_INV_TRANSPOSE |
| 467 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X4, bd); |
| 468 | #else |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 469 | int32_t rinput[4 * 16]; |
| 470 | uint16_t routput[4 * 16]; |
| 471 | TX_SIZE tx_size = TX_16X4; |
| 472 | TX_SIZE rtx_size = av1_rotate_tx_size(tx_size); |
| 473 | TX_TYPE rtx_type = av1_rotate_tx_type(tx_type); |
| 474 | int w = tx_size_wide[tx_size]; |
| 475 | int h = tx_size_high[tx_size]; |
| 476 | int rw = h; |
| 477 | int rh = w; |
| 478 | transpose_int32(rinput, rw, input, w, w, h); |
| 479 | transpose_uint16(routput, rw, output, stride, w, h); |
| 480 | inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd); |
| 481 | transpose_uint16(output, stride, routput, rw, rw, rh); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 482 | #endif // NO_INV_TRANSPOSE |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 483 | } |
| 484 | |
| 485 | void av1_inv_txfm2d_add_8x32_c(const int32_t *input, uint16_t *output, |
| 486 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 487 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]); |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 488 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X32, bd); |
| 489 | } |
| 490 | |
| 491 | void av1_inv_txfm2d_add_32x8_c(const int32_t *input, uint16_t *output, |
| 492 | int stride, TX_TYPE tx_type, int bd) { |
Urvang Joshi | 1ac47a7 | 2017-12-07 12:12:50 -0800 | [diff] [blame] | 493 | DECLARE_ALIGNED(32, int, txfm_buf[8 * 32 + 32 + 32]); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 494 | #if NO_INV_TRANSPOSE |
| 495 | inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X8, bd); |
| 496 | #else |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 497 | int32_t rinput[8 * 32]; |
| 498 | uint16_t routput[8 * 32]; |
| 499 | TX_SIZE tx_size = TX_32X8; |
| 500 | TX_SIZE rtx_size = av1_rotate_tx_size(tx_size); |
| 501 | TX_TYPE rtx_type = av1_rotate_tx_type(tx_type); |
| 502 | int w = tx_size_wide[tx_size]; |
| 503 | int h = tx_size_high[tx_size]; |
| 504 | int rw = h; |
| 505 | int rh = w; |
| 506 | transpose_int32(rinput, rw, input, w, w, h); |
| 507 | transpose_uint16(routput, rw, output, stride, w, h); |
| 508 | inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd); |
| 509 | transpose_uint16(output, stride, routput, rw, rw, rh); |
Debargha Mukherjee | 1158bff | 2018-01-01 18:23:59 -0800 | [diff] [blame] | 510 | #endif // NO_INV_TRANSPOSE |
Debargha Mukherjee | 845057f | 2017-11-13 07:03:36 -0800 | [diff] [blame] | 511 | } |