blob: 853e5ec80b94903def5793fc91947d8d58f29dd4 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Krishna Rapaka7319db52021-09-28 20:35:29 -07002 * Copyright (c) 2021, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Vibhoothi41c6dd72021-10-12 18:48:26 +00004 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * aomedia.org/license/patent-license/.
Yaowu Xuc27fc142016-08-22 16:08:15 -070011 */
12
Urvang Joshifdb60962016-10-14 15:30:27 -070013#include <assert.h>
Urvang Joshi698720b2018-05-09 15:04:31 -040014#include "aom_dsp/txfm_common.h"
Tom Finegan44702c82018-05-22 13:00:39 -070015#include "config/aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070016
Jerome Jiangd4e351d2020-01-30 15:13:11 -080017void aom_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
18 // The 2D transform is done with two passes which are actually pretty
19 // similar. In the first one, we transform the columns and transpose
20 // the results. In the second one, we transform the rows. To achieve that,
21 // as the first pass results are transposed, we transpose the columns (that
22 // is the transposed rows) and transpose the results (so that it goes back
23 // in normal/row positions).
24 // We need an intermediate buffer between passes.
25 tran_low_t intermediate[4 * 4];
26 const tran_low_t *in_low = NULL;
27 tran_low_t *out = intermediate;
28 // Do the two transform/transpose passes
29 for (int pass = 0; pass < 2; ++pass) {
30 tran_high_t in_high[4]; // canbe16
31 tran_high_t step[4]; // canbe16
32 tran_high_t temp1, temp2; // needs32
33 for (int i = 0; i < 4; ++i) {
34 // Load inputs.
35 if (pass == 0) {
36 in_high[0] = input[0 * stride] * 16;
37 in_high[1] = input[1 * stride] * 16;
38 in_high[2] = input[2 * stride] * 16;
39 in_high[3] = input[3 * stride] * 16;
40 if (i == 0 && in_high[0]) {
41 ++in_high[0];
42 }
43 } else {
44 assert(in_low != NULL);
45 in_high[0] = in_low[0 * 4];
46 in_high[1] = in_low[1 * 4];
47 in_high[2] = in_low[2 * 4];
48 in_high[3] = in_low[3 * 4];
49 ++in_low;
50 }
51 // Transform.
52 step[0] = in_high[0] + in_high[3];
53 step[1] = in_high[1] + in_high[2];
54 step[2] = in_high[1] - in_high[2];
55 step[3] = in_high[0] - in_high[3];
56 temp1 = (step[0] + step[1]) * cospi_16_64;
57 temp2 = (step[0] - step[1]) * cospi_16_64;
58 out[0] = (tran_low_t)fdct_round_shift(temp1);
59 out[2] = (tran_low_t)fdct_round_shift(temp2);
60 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
61 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
62 out[1] = (tran_low_t)fdct_round_shift(temp1);
63 out[3] = (tran_low_t)fdct_round_shift(temp2);
64 // Do next column (which is a transposed row in second/horizontal pass)
65 ++input;
66 out += 4;
67 }
68 // Setup in/out for next pass.
69 in_low = intermediate;
70 out = output;
71 }
72
73 for (int i = 0; i < 4; ++i) {
74 for (int j = 0; j < 4; ++j)
75 output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
76 }
77}
78
79void aom_fdct4x4_lp_c(const int16_t *input, int16_t *output, int stride) {
80 // The 2D transform is done with two passes which are actually pretty
81 // similar. In the first one, we transform the columns and transpose
82 // the results. In the second one, we transform the rows. To achieve that,
83 // as the first pass results are transposed, we transpose the columns (that
84 // is the transposed rows) and transpose the results (so that it goes back
85 // in normal/row positions).
86 // We need an intermediate buffer between passes.
87 int16_t intermediate[4 * 4];
88 const int16_t *in_low = NULL;
89 int16_t *out = intermediate;
90 // Do the two transform/transpose passes
91 for (int pass = 0; pass < 2; ++pass) {
92 int32_t in_high[4]; // canbe16
93 int32_t step[4]; // canbe16
94 int32_t temp1, temp2; // needs32
95 for (int i = 0; i < 4; ++i) {
96 // Load inputs.
97 if (pass == 0) {
98 in_high[0] = input[0 * stride] * 16;
99 in_high[1] = input[1 * stride] * 16;
100 in_high[2] = input[2 * stride] * 16;
101 in_high[3] = input[3 * stride] * 16;
102 if (i == 0 && in_high[0]) {
103 ++in_high[0];
104 }
105 } else {
106 assert(in_low != NULL);
107 in_high[0] = in_low[0 * 4];
108 in_high[1] = in_low[1 * 4];
109 in_high[2] = in_low[2 * 4];
110 in_high[3] = in_low[3 * 4];
111 ++in_low;
112 }
113 // Transform.
114 step[0] = in_high[0] + in_high[3];
115 step[1] = in_high[1] + in_high[2];
116 step[2] = in_high[1] - in_high[2];
117 step[3] = in_high[0] - in_high[3];
Jerome Jiang89b884c2020-02-04 11:18:31 -0800118 temp1 = (step[0] + step[1]) * (int32_t)cospi_16_64;
119 temp2 = (step[0] - step[1]) * (int32_t)cospi_16_64;
Jerome Jiangd4e351d2020-01-30 15:13:11 -0800120 out[0] = (int16_t)fdct_round_shift(temp1);
121 out[2] = (int16_t)fdct_round_shift(temp2);
Jerome Jiang89b884c2020-02-04 11:18:31 -0800122 temp1 = step[2] * (int32_t)cospi_24_64 + step[3] * (int32_t)cospi_8_64;
123 temp2 = -step[2] * (int32_t)cospi_8_64 + step[3] * (int32_t)cospi_24_64;
Jerome Jiangd4e351d2020-01-30 15:13:11 -0800124 out[1] = (int16_t)fdct_round_shift(temp1);
125 out[3] = (int16_t)fdct_round_shift(temp2);
126 // Do next column (which is a transposed row in second/horizontal pass)
127 ++input;
128 out += 4;
129 }
130 // Setup in/out for next pass.
131 in_low = intermediate;
132 out = output;
133 }
134
135 for (int i = 0; i < 4; ++i) {
136 for (int j = 0; j < 4; ++j)
137 output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
138 }
139}
140
James Almer857e93f2022-05-25 16:44:43 +0000141void aom_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
142 int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700143 int i, j;
144 tran_low_t intermediate[64];
145 int pass;
146 tran_low_t *output = intermediate;
147 const tran_low_t *in = NULL;
148
149 // Transform columns
150 for (pass = 0; pass < 2; ++pass) {
151 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
152 tran_high_t t0, t1, t2, t3; // needs32
153 tran_high_t x0, x1, x2, x3; // canbe16
154
Yaowu Xuc27fc142016-08-22 16:08:15 -0700155 for (i = 0; i < 8; i++) {
156 // stage 1
157 if (pass == 0) {
158 s0 = (input[0 * stride] + input[7 * stride]) * 4;
159 s1 = (input[1 * stride] + input[6 * stride]) * 4;
160 s2 = (input[2 * stride] + input[5 * stride]) * 4;
161 s3 = (input[3 * stride] + input[4 * stride]) * 4;
162 s4 = (input[3 * stride] - input[4 * stride]) * 4;
163 s5 = (input[2 * stride] - input[5 * stride]) * 4;
164 s6 = (input[1 * stride] - input[6 * stride]) * 4;
165 s7 = (input[0 * stride] - input[7 * stride]) * 4;
166 ++input;
167 } else {
168 s0 = in[0 * 8] + in[7 * 8];
169 s1 = in[1 * 8] + in[6 * 8];
170 s2 = in[2 * 8] + in[5 * 8];
171 s3 = in[3 * 8] + in[4 * 8];
172 s4 = in[3 * 8] - in[4 * 8];
173 s5 = in[2 * 8] - in[5 * 8];
174 s6 = in[1 * 8] - in[6 * 8];
175 s7 = in[0 * 8] - in[7 * 8];
176 ++in;
177 }
178
179 // fdct4(step, step);
180 x0 = s0 + s3;
181 x1 = s1 + s2;
182 x2 = s1 - s2;
183 x3 = s0 - s3;
184 t0 = (x0 + x1) * cospi_16_64;
185 t1 = (x0 - x1) * cospi_16_64;
186 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
187 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
188 output[0] = (tran_low_t)fdct_round_shift(t0);
189 output[2] = (tran_low_t)fdct_round_shift(t2);
190 output[4] = (tran_low_t)fdct_round_shift(t1);
191 output[6] = (tran_low_t)fdct_round_shift(t3);
192
193 // Stage 2
194 t0 = (s6 - s5) * cospi_16_64;
195 t1 = (s6 + s5) * cospi_16_64;
196 t2 = fdct_round_shift(t0);
197 t3 = fdct_round_shift(t1);
198
199 // Stage 3
200 x0 = s4 + t2;
201 x1 = s4 - t2;
202 x2 = s7 - t3;
203 x3 = s7 + t3;
204
205 // Stage 4
206 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
207 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
208 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
209 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
210 output[1] = (tran_low_t)fdct_round_shift(t0);
211 output[3] = (tran_low_t)fdct_round_shift(t2);
212 output[5] = (tran_low_t)fdct_round_shift(t1);
213 output[7] = (tran_low_t)fdct_round_shift(t3);
214 output += 8;
215 }
216 in = intermediate;
217 output = final_output;
218 }
219
220 // Rows
221 for (i = 0; i < 8; ++i) {
222 for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2;
223 }
224}