Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 11 | #include "./av1_rtcd.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 12 | #include "av1/common/enums.h" |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 13 | #include "av1/common/av1_txfm.h" |
| 14 | #include "av1/common/av1_inv_txfm1d.h" |
| 15 | #include "av1/common/av1_inv_txfm2d_cfg.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 16 | |
| 17 | static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) { |
| 18 | switch (txfm_type) { |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 19 | case TXFM_TYPE_DCT4: return av1_idct4_new; |
| 20 | case TXFM_TYPE_DCT8: return av1_idct8_new; |
| 21 | case TXFM_TYPE_DCT16: return av1_idct16_new; |
| 22 | case TXFM_TYPE_DCT32: return av1_idct32_new; |
| 23 | case TXFM_TYPE_ADST4: return av1_iadst4_new; |
| 24 | case TXFM_TYPE_ADST8: return av1_iadst8_new; |
| 25 | case TXFM_TYPE_ADST16: return av1_iadst16_new; |
| 26 | case TXFM_TYPE_ADST32: return av1_iadst32_new; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 27 | default: assert(0); return NULL; |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | #if CONFIG_EXT_TX |
| 32 | static const TXFM_2D_CFG *inv_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = { |
| 33 | { &inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8, |
| 34 | &inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 }, |
| 35 | { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8, |
| 36 | &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 }, |
| 37 | { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8, |
| 38 | &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 }, |
| 39 | { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, |
| 40 | &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 }, |
| 41 | { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8, |
| 42 | &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 }, |
| 43 | { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8, |
| 44 | &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 }, |
| 45 | { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, |
| 46 | &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 }, |
| 47 | { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, |
| 48 | &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 }, |
| 49 | { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, |
| 50 | &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 }, |
| 51 | }; |
| 52 | #else |
| 53 | static const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = { |
| 54 | { &inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8, |
| 55 | &inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 }, |
| 56 | { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8, |
| 57 | &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 }, |
| 58 | { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8, |
| 59 | &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 }, |
| 60 | { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8, |
| 61 | &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 }, |
| 62 | }; |
| 63 | #endif |
| 64 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 65 | TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(int tx_type, int tx_size) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 66 | TXFM_2D_FLIP_CFG cfg; |
| 67 | set_flip_cfg(tx_type, &cfg); |
| 68 | cfg.cfg = inv_txfm_cfg_ls[tx_type][tx_size]; |
| 69 | return cfg; |
| 70 | } |
| 71 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 72 | TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 73 | TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL }; |
| 74 | switch (tx_type) { |
| 75 | case DCT_DCT: |
| 76 | cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64; |
| 77 | set_flip_cfg(tx_type, &cfg); |
| 78 | break; |
| 79 | default: assert(0); |
| 80 | } |
| 81 | return cfg; |
| 82 | } |
| 83 | |
| 84 | static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output, |
| 85 | int stride, TXFM_2D_FLIP_CFG *cfg, |
| 86 | int32_t *txfm_buf) { |
| 87 | const int txfm_size = cfg->cfg->txfm_size; |
| 88 | const int8_t *shift = cfg->cfg->shift; |
| 89 | const int8_t *stage_range_col = cfg->cfg->stage_range_col; |
| 90 | const int8_t *stage_range_row = cfg->cfg->stage_range_row; |
| 91 | const int8_t *cos_bit_col = cfg->cfg->cos_bit_col; |
| 92 | const int8_t *cos_bit_row = cfg->cfg->cos_bit_row; |
| 93 | const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->cfg->txfm_type_col); |
| 94 | const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->cfg->txfm_type_row); |
| 95 | |
| 96 | // txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size |
| 97 | // it is used for intermediate data buffering |
| 98 | int32_t *temp_in = txfm_buf; |
| 99 | int32_t *temp_out = temp_in + txfm_size; |
| 100 | int32_t *buf = temp_out + txfm_size; |
| 101 | int32_t *buf_ptr = buf; |
| 102 | int c, r; |
| 103 | |
| 104 | // Rows |
| 105 | for (r = 0; r < txfm_size; ++r) { |
| 106 | txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row); |
| 107 | round_shift_array(buf_ptr, txfm_size, -shift[0]); |
| 108 | input += txfm_size; |
| 109 | buf_ptr += txfm_size; |
| 110 | } |
| 111 | |
| 112 | // Columns |
| 113 | for (c = 0; c < txfm_size; ++c) { |
| 114 | if (cfg->lr_flip == 0) { |
| 115 | for (r = 0; r < txfm_size; ++r) temp_in[r] = buf[r * txfm_size + c]; |
| 116 | } else { |
| 117 | // flip left right |
| 118 | for (r = 0; r < txfm_size; ++r) |
| 119 | temp_in[r] = buf[r * txfm_size + (txfm_size - c - 1)]; |
| 120 | } |
| 121 | txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); |
| 122 | round_shift_array(temp_out, txfm_size, -shift[1]); |
| 123 | if (cfg->ud_flip == 0) { |
| 124 | for (r = 0; r < txfm_size; ++r) output[r * stride + c] += temp_out[r]; |
| 125 | } else { |
| 126 | // flip upside down |
| 127 | for (r = 0; r < txfm_size; ++r) |
| 128 | output[r * stride + c] += temp_out[txfm_size - r - 1]; |
| 129 | } |
| 130 | } |
| 131 | } |
| 132 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 133 | void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, |
| 134 | int stride, int tx_type, int bd) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 135 | int txfm_buf[4 * 4 + 4 + 4]; |
| 136 | // output contains the prediction signal which is always positive and smaller |
| 137 | // than (1 << bd) - 1 |
| 138 | // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an |
| 139 | // int16_t* |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 140 | TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(tx_type, TX_4X4); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 141 | inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf); |
| 142 | clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1); |
| 143 | } |
| 144 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 145 | void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, |
| 146 | int stride, int tx_type, int bd) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 147 | int txfm_buf[8 * 8 + 8 + 8]; |
| 148 | // output contains the prediction signal which is always positive and smaller |
| 149 | // than (1 << bd) - 1 |
| 150 | // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an |
| 151 | // int16_t* |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 152 | TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(tx_type, TX_8X8); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 153 | inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf); |
| 154 | clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1); |
| 155 | } |
| 156 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 157 | void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, |
| 158 | int stride, int tx_type, int bd) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 159 | int txfm_buf[16 * 16 + 16 + 16]; |
| 160 | // output contains the prediction signal which is always positive and smaller |
| 161 | // than (1 << bd) - 1 |
| 162 | // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an |
| 163 | // int16_t* |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 164 | TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(tx_type, TX_16X16); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 165 | inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf); |
| 166 | clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1); |
| 167 | } |
| 168 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 169 | void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, |
| 170 | int stride, int tx_type, int bd) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 171 | int txfm_buf[32 * 32 + 32 + 32]; |
| 172 | // output contains the prediction signal which is always positive and smaller |
| 173 | // than (1 << bd) - 1 |
| 174 | // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an |
| 175 | // int16_t* |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 176 | TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(tx_type, TX_32X32); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 177 | inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf); |
| 178 | clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1); |
| 179 | } |
| 180 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 181 | void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, |
| 182 | int stride, int tx_type, int bd) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 183 | int txfm_buf[64 * 64 + 64 + 64]; |
| 184 | // output contains the prediction signal which is always positive and smaller |
| 185 | // than (1 << bd) - 1 |
| 186 | // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an |
| 187 | // int16_t* |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 188 | TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_64x64_cfg(tx_type); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 189 | inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf); |
| 190 | clamp_block((int16_t *)output, 64, stride, 0, (1 << bd) - 1); |
| 191 | } |