blob: ca2f83841b4b75f9e8ace3ef747b624d860e895e [file] [log] [blame]
John Koleszar5ca6a362013-01-25 09:47:09 -08001/*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
Christian Duvivier094e2572013-02-11 15:34:08 -080010#include "vp9/common/vp9_convolve.h"
11
John Koleszar5ca6a362013-01-25 09:47:09 -080012#include <assert.h>
13
14#include "./vpx_config.h"
15#include "./vp9_rtcd.h"
16#include "vp9/common/vp9_common.h"
17#include "vpx/vpx_integer.h"
Christian Duvivier094e2572013-02-11 15:34:08 -080018#include "vpx_ports/mem.h"
John Koleszar5ca6a362013-01-25 09:47:09 -080019
20#define VP9_FILTER_WEIGHT 128
21#define VP9_FILTER_SHIFT 7
John Koleszar5ca6a362013-01-25 09:47:09 -080022
23/* Assume a bank of 16 filters to choose from. There are two implementations
24 * for filter wrapping behavior, since we want to be able to pick which filter
25 * to start with. We could either:
26 *
27 * 1) make filter_ a pointer to the base of the filter array, and then add an
28 * additional offset parameter, to choose the starting filter.
29 * 2) use a pointer to 2 periods worth of filters, so that even if the original
30 * phase offset is at 15/16, we'll have valid data to read. The filter
31 * tables become [32][8], and the second half is duplicated.
32 * 3) fix the alignment of the filter tables, so that we know the 0/16 is
33 * always 256 byte aligned.
34 *
35 * Implementations 2 and 3 are likely preferable, as they avoid an extra 2
John Koleszar6fd7dd12013-02-20 15:59:20 -080036 * parameters, and switching between them is trivial, with the
37 * ALIGN_FILTERS_256 macro, below.
John Koleszar5ca6a362013-01-25 09:47:09 -080038 */
John Koleszar6fd7dd12013-02-20 15:59:20 -080039 #define ALIGN_FILTERS_256 1
40
Ronald S. Bultjedecead72013-07-10 11:17:19 -070041static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
42 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -080043 const int16_t *filter_x0, int x_step_q4,
44 const int16_t *filter_y, int y_step_q4,
45 int w, int h, int taps) {
46 int x, y, k, sum;
47 const int16_t *filter_x_base = filter_x0;
48
49#if ALIGN_FILTERS_256
50 filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
51#endif
52
53 /* Adjust base pointer address for this source line */
54 src -= taps / 2 - 1;
55
56 for (y = 0; y < h; ++y) {
57 /* Pointer to filter to use */
58 const int16_t *filter_x = filter_x0;
59
60 /* Initial phase offset */
John Koleszar6fd7dd12013-02-20 15:59:20 -080061 int x0_q4 = (filter_x - filter_x_base) / taps;
62 int x_q4 = x0_q4;
John Koleszar5ca6a362013-01-25 09:47:09 -080063
64 for (x = 0; x < w; ++x) {
65 /* Per-pixel src offset */
John Koleszar6fd7dd12013-02-20 15:59:20 -080066 int src_x = (x_q4 - x0_q4) >> 4;
John Koleszar5ca6a362013-01-25 09:47:09 -080067
68 for (sum = 0, k = 0; k < taps; ++k) {
69 sum += src[src_x + k] * filter_x[k];
70 }
71 sum += (VP9_FILTER_WEIGHT >> 1);
72 dst[x] = clip_pixel(sum >> VP9_FILTER_SHIFT);
73
74 /* Adjust source and filter to use for the next pixel */
75 x_q4 += x_step_q4;
76 filter_x = filter_x_base + (x_q4 & 0xf) * taps;
77 }
78 src += src_stride;
79 dst += dst_stride;
80 }
81}
82
Ronald S. Bultjedecead72013-07-10 11:17:19 -070083static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
84 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -080085 const int16_t *filter_x0, int x_step_q4,
86 const int16_t *filter_y, int y_step_q4,
87 int w, int h, int taps) {
88 int x, y, k, sum;
89 const int16_t *filter_x_base = filter_x0;
90
91#if ALIGN_FILTERS_256
92 filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
93#endif
94
95 /* Adjust base pointer address for this source line */
96 src -= taps / 2 - 1;
97
98 for (y = 0; y < h; ++y) {
99 /* Pointer to filter to use */
100 const int16_t *filter_x = filter_x0;
101
102 /* Initial phase offset */
John Koleszar6fd7dd12013-02-20 15:59:20 -0800103 int x0_q4 = (filter_x - filter_x_base) / taps;
104 int x_q4 = x0_q4;
John Koleszar5ca6a362013-01-25 09:47:09 -0800105
106 for (x = 0; x < w; ++x) {
107 /* Per-pixel src offset */
John Koleszar6fd7dd12013-02-20 15:59:20 -0800108 int src_x = (x_q4 - x0_q4) >> 4;
John Koleszar5ca6a362013-01-25 09:47:09 -0800109
110 for (sum = 0, k = 0; k < taps; ++k) {
111 sum += src[src_x + k] * filter_x[k];
112 }
113 sum += (VP9_FILTER_WEIGHT >> 1);
114 dst[x] = (dst[x] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;
115
116 /* Adjust source and filter to use for the next pixel */
117 x_q4 += x_step_q4;
118 filter_x = filter_x_base + (x_q4 & 0xf) * taps;
119 }
120 src += src_stride;
121 dst += dst_stride;
122 }
123}
124
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700125static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
126 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -0800127 const int16_t *filter_x, int x_step_q4,
128 const int16_t *filter_y0, int y_step_q4,
129 int w, int h, int taps) {
130 int x, y, k, sum;
131
132 const int16_t *filter_y_base = filter_y0;
133
134#if ALIGN_FILTERS_256
135 filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
136#endif
137
138 /* Adjust base pointer address for this source column */
139 src -= src_stride * (taps / 2 - 1);
140 for (x = 0; x < w; ++x) {
141 /* Pointer to filter to use */
142 const int16_t *filter_y = filter_y0;
143
144 /* Initial phase offset */
John Koleszar6fd7dd12013-02-20 15:59:20 -0800145 int y0_q4 = (filter_y - filter_y_base) / taps;
146 int y_q4 = y0_q4;
John Koleszar5ca6a362013-01-25 09:47:09 -0800147
148 for (y = 0; y < h; ++y) {
149 /* Per-pixel src offset */
John Koleszar6fd7dd12013-02-20 15:59:20 -0800150 int src_y = (y_q4 - y0_q4) >> 4;
John Koleszar5ca6a362013-01-25 09:47:09 -0800151
152 for (sum = 0, k = 0; k < taps; ++k) {
153 sum += src[(src_y + k) * src_stride] * filter_y[k];
154 }
155 sum += (VP9_FILTER_WEIGHT >> 1);
156 dst[y * dst_stride] = clip_pixel(sum >> VP9_FILTER_SHIFT);
157
158 /* Adjust source and filter to use for the next pixel */
159 y_q4 += y_step_q4;
160 filter_y = filter_y_base + (y_q4 & 0xf) * taps;
161 }
162 ++src;
163 ++dst;
164 }
165}
166
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700167static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
168 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -0800169 const int16_t *filter_x, int x_step_q4,
170 const int16_t *filter_y0, int y_step_q4,
171 int w, int h, int taps) {
172 int x, y, k, sum;
173
174 const int16_t *filter_y_base = filter_y0;
175
176#if ALIGN_FILTERS_256
177 filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
178#endif
179
180 /* Adjust base pointer address for this source column */
181 src -= src_stride * (taps / 2 - 1);
182 for (x = 0; x < w; ++x) {
183 /* Pointer to filter to use */
184 const int16_t *filter_y = filter_y0;
185
186 /* Initial phase offset */
John Koleszar6fd7dd12013-02-20 15:59:20 -0800187 int y0_q4 = (filter_y - filter_y_base) / taps;
188 int y_q4 = y0_q4;
John Koleszar5ca6a362013-01-25 09:47:09 -0800189
190 for (y = 0; y < h; ++y) {
191 /* Per-pixel src offset */
John Koleszar6fd7dd12013-02-20 15:59:20 -0800192 int src_y = (y_q4 - y0_q4) >> 4;
John Koleszar5ca6a362013-01-25 09:47:09 -0800193
194 for (sum = 0, k = 0; k < taps; ++k) {
195 sum += src[(src_y + k) * src_stride] * filter_y[k];
196 }
197 sum += (VP9_FILTER_WEIGHT >> 1);
198 dst[y * dst_stride] =
199 (dst[y * dst_stride] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;
200
201 /* Adjust source and filter to use for the next pixel */
202 y_q4 += y_step_q4;
203 filter_y = filter_y_base + (y_q4 & 0xf) * taps;
204 }
205 ++src;
206 ++dst;
207 }
208}
209
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700210static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
211 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -0800212 const int16_t *filter_x, int x_step_q4,
213 const int16_t *filter_y, int y_step_q4,
214 int w, int h, int taps) {
John Koleszareb939f42013-02-24 20:55:14 -0800215 /* Fixed size intermediate buffer places limits on parameters.
John Koleszara9ebbcc2013-04-18 13:05:38 -0700216 * Maximum intermediate_height is 135, for y_step_q4 == 32,
217 * h == 64, taps == 8.
John Koleszareb939f42013-02-24 20:55:14 -0800218 */
John Koleszara9ebbcc2013-04-18 13:05:38 -0700219 uint8_t temp[64 * 135];
Tero Rintaluoma18303b12013-07-05 13:53:36 +0300220 int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;
John Koleszareb939f42013-02-24 20:55:14 -0800221
John Koleszara9ebbcc2013-04-18 13:05:38 -0700222 assert(w <= 64);
223 assert(h <= 64);
John Koleszar5ca6a362013-01-25 09:47:09 -0800224 assert(taps <= 8);
John Koleszareb939f42013-02-24 20:55:14 -0800225 assert(y_step_q4 <= 32);
Tero Rintaluoma18303b12013-07-05 13:53:36 +0300226 assert(x_step_q4 <= 32);
John Koleszareb939f42013-02-24 20:55:14 -0800227
228 if (intermediate_height < h)
229 intermediate_height = h;
John Koleszar5ca6a362013-01-25 09:47:09 -0800230
231 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
John Koleszara9ebbcc2013-04-18 13:05:38 -0700232 temp, 64,
John Koleszar5ca6a362013-01-25 09:47:09 -0800233 filter_x, x_step_q4, filter_y, y_step_q4,
John Koleszareb939f42013-02-24 20:55:14 -0800234 w, intermediate_height, taps);
John Koleszara9ebbcc2013-04-18 13:05:38 -0700235 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -0800236 filter_x, x_step_q4, filter_y, y_step_q4,
237 w, h, taps);
238}
239
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700240void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
241 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -0800242 const int16_t *filter_x, int x_step_q4,
243 const int16_t *filter_y, int y_step_q4,
244 int w, int h) {
245 convolve_horiz_c(src, src_stride, dst, dst_stride,
Dmitry Kovalev9a31d052013-08-12 14:28:00 -0700246 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
John Koleszar5ca6a362013-01-25 09:47:09 -0800247}
248
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700249void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
250 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -0800251 const int16_t *filter_x, int x_step_q4,
252 const int16_t *filter_y, int y_step_q4,
253 int w, int h) {
254 convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
Dmitry Kovalev9a31d052013-08-12 14:28:00 -0700255 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
John Koleszar5ca6a362013-01-25 09:47:09 -0800256}
257
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700258void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
259 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -0800260 const int16_t *filter_x, int x_step_q4,
261 const int16_t *filter_y, int y_step_q4,
262 int w, int h) {
263 convolve_vert_c(src, src_stride, dst, dst_stride,
Dmitry Kovalev9a31d052013-08-12 14:28:00 -0700264 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
John Koleszar5ca6a362013-01-25 09:47:09 -0800265}
266
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700267void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
268 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -0800269 const int16_t *filter_x, int x_step_q4,
270 const int16_t *filter_y, int y_step_q4,
271 int w, int h) {
272 convolve_avg_vert_c(src, src_stride, dst, dst_stride,
Dmitry Kovalev9a31d052013-08-12 14:28:00 -0700273 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
John Koleszar5ca6a362013-01-25 09:47:09 -0800274}
275
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700276void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
277 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -0800278 const int16_t *filter_x, int x_step_q4,
279 const int16_t *filter_y, int y_step_q4,
280 int w, int h) {
281 convolve_c(src, src_stride, dst, dst_stride,
Dmitry Kovalev9a31d052013-08-12 14:28:00 -0700282 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
John Koleszar5ca6a362013-01-25 09:47:09 -0800283}
284
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700285void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
286 uint8_t *dst, ptrdiff_t dst_stride,
John Koleszar5ca6a362013-01-25 09:47:09 -0800287 const int16_t *filter_x, int x_step_q4,
288 const int16_t *filter_y, int y_step_q4,
289 int w, int h) {
Christian Duvivier094e2572013-02-11 15:34:08 -0800290 /* Fixed size intermediate buffer places limits on parameters. */
John Koleszara9ebbcc2013-04-18 13:05:38 -0700291 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
292 assert(w <= 64);
293 assert(h <= 64);
Christian Duvivier094e2572013-02-11 15:34:08 -0800294
Dmitry Kovalev9a31d052013-08-12 14:28:00 -0700295 vp9_convolve8(src, src_stride, temp, 64,
296 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
297 vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
John Koleszar5ca6a362013-01-25 09:47:09 -0800298}
John Koleszar7a07eea2013-01-28 16:59:03 -0800299
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700300void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
301 uint8_t *dst, ptrdiff_t dst_stride,
302 const int16_t *filter_x, int filter_x_stride,
303 const int16_t *filter_y, int filter_y_stride,
304 int w, int h) {
305 int r;
John Koleszar7a07eea2013-01-28 16:59:03 -0800306
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700307 for (r = h; r > 0; --r) {
308 memcpy(dst, src, w);
309 src += src_stride;
310 dst += dst_stride;
John Koleszar7a07eea2013-01-28 16:59:03 -0800311 }
312}
313
Ronald S. Bultjedecead72013-07-10 11:17:19 -0700314void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
315 uint8_t *dst, ptrdiff_t dst_stride,
316 const int16_t *filter_x, int filter_x_stride,
317 const int16_t *filter_y, int filter_y_stride,
318 int w, int h) {
John Koleszar7a07eea2013-01-28 16:59:03 -0800319 int x, y;
320
321 for (y = 0; y < h; ++y) {
Dmitry Kovalev9a31d052013-08-12 14:28:00 -0700322 for (x = 0; x < w; ++x)
323 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
324
John Koleszar7a07eea2013-01-28 16:59:03 -0800325 src += src_stride;
326 dst += dst_stride;
327 }
328}