blob: a079d1b40e69727f6bb685301d0da0c892fed92f [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu9c01aa12016-09-01 14:32:49 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu9c01aa12016-09-01 14:32:49 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <assert.h>
13#include <string.h>
14
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_config.h"
16#include "./aom_dsp_rtcd.h"
17#include "aom/aom_integer.h"
18#include "aom_dsp/aom_convolve.h"
19#include "aom_dsp/aom_dsp_common.h"
20#include "aom_dsp/aom_filter.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070021#include "aom_ports/mem.h"
22
23static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
24 uint8_t *dst, ptrdiff_t dst_stride,
25 const InterpKernel *x_filters, int x0_q4,
26 int x_step_q4, int w, int h) {
27 int x, y;
28 src -= SUBPEL_TAPS / 2 - 1;
29 for (y = 0; y < h; ++y) {
30 int x_q4 = x0_q4;
31 for (x = 0; x < w; ++x) {
32 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
33 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
34 int k, sum = 0;
35 for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
36 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
37 x_q4 += x_step_q4;
38 }
39 src += src_stride;
40 dst += dst_stride;
41 }
42}
43
44static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
45 uint8_t *dst, ptrdiff_t dst_stride,
46 const InterpKernel *x_filters, int x0_q4,
47 int x_step_q4, int w, int h) {
48 int x, y;
49 src -= SUBPEL_TAPS / 2 - 1;
50 for (y = 0; y < h; ++y) {
51 int x_q4 = x0_q4;
52 for (x = 0; x < w; ++x) {
53 const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
54 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
55 int k, sum = 0;
56 for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
57 dst[x] = ROUND_POWER_OF_TWO(
58 dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
59 x_q4 += x_step_q4;
60 }
61 src += src_stride;
62 dst += dst_stride;
63 }
64}
65
66static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
67 uint8_t *dst, ptrdiff_t dst_stride,
68 const InterpKernel *y_filters, int y0_q4,
69 int y_step_q4, int w, int h) {
70 int x, y;
71 src -= src_stride * (SUBPEL_TAPS / 2 - 1);
72
73 for (x = 0; x < w; ++x) {
74 int y_q4 = y0_q4;
75 for (y = 0; y < h; ++y) {
76 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
77 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
78 int k, sum = 0;
79 for (k = 0; k < SUBPEL_TAPS; ++k)
80 sum += src_y[k * src_stride] * y_filter[k];
81 dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
82 y_q4 += y_step_q4;
83 }
84 ++src;
85 ++dst;
86 }
87}
88
89static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
90 uint8_t *dst, ptrdiff_t dst_stride,
91 const InterpKernel *y_filters, int y0_q4,
92 int y_step_q4, int w, int h) {
93 int x, y;
94 src -= src_stride * (SUBPEL_TAPS / 2 - 1);
95
96 for (x = 0; x < w; ++x) {
97 int y_q4 = y0_q4;
98 for (y = 0; y < h; ++y) {
99 const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
100 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
101 int k, sum = 0;
102 for (k = 0; k < SUBPEL_TAPS; ++k)
103 sum += src_y[k * src_stride] * y_filter[k];
104 dst[y * dst_stride] = ROUND_POWER_OF_TWO(
105 dst[y * dst_stride] +
106 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
107 1);
108 y_q4 += y_step_q4;
109 }
110 ++src;
111 ++dst;
112 }
113}
114
115static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
116 ptrdiff_t dst_stride, const InterpKernel *const x_filters,
117 int x0_q4, int x_step_q4,
118 const InterpKernel *const y_filters, int y0_q4,
119 int y_step_q4, int w, int h) {
120 // Note: Fixed size intermediate buffer, temp, places limits on parameters.
121 // 2d filtering proceeds in 2 steps:
122 // (1) Interpolate horizontally into an intermediate buffer, temp.
123 // (2) Interpolate temp vertically to derive the sub-pixel result.
124 // Deriving the maximum number of rows in the temp buffer (135):
125 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
126 // --Largest block size is 64x64 pixels.
127 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
128 // original frame (in 1/16th pixel units).
129 // --Must round-up because block may be located at sub-pixel position.
130 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
131 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
132 uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];
133 int intermediate_height =
134 (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
135
136 assert(w <= MAX_SB_SIZE);
137 assert(h <= MAX_SB_SIZE);
138
139 assert(y_step_q4 <= 32);
140 assert(x_step_q4 <= 32);
141
142 convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp,
143 MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w,
144 intermediate_height);
145 convolve_vert(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, dst,
146 dst_stride, y_filters, y0_q4, y_step_q4, w, h);
147}
148
149static const InterpKernel *get_filter_base(const int16_t *filter) {
150 // NOTE: This assumes that the filter table is 256-byte aligned.
151 // TODO(agrange) Modify to make independent of table alignment.
152 return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
153}
154
155static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
156 return (int)((const InterpKernel *)(intptr_t)f - base);
157}
158
Yaowu Xuf883b422016-08-30 14:01:10 -0700159void aom_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700160 uint8_t *dst, ptrdiff_t dst_stride,
161 const int16_t *filter_x, int x_step_q4,
162 const int16_t *filter_y, int y_step_q4, int w,
163 int h) {
164 const InterpKernel *const filters_x = get_filter_base(filter_x);
165 const int x0_q4 = get_filter_offset(filter_x, filters_x);
166
167 (void)filter_y;
168 (void)y_step_q4;
169
170 convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
171 w, h);
172}
173
Yaowu Xuf883b422016-08-30 14:01:10 -0700174void aom_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700175 uint8_t *dst, ptrdiff_t dst_stride,
176 const int16_t *filter_x, int x_step_q4,
177 const int16_t *filter_y, int y_step_q4, int w,
178 int h) {
179 const InterpKernel *const filters_x = get_filter_base(filter_x);
180 const int x0_q4 = get_filter_offset(filter_x, filters_x);
181
182 (void)filter_y;
183 (void)y_step_q4;
184
185 convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
186 x_step_q4, w, h);
187}
188
Yaowu Xuf883b422016-08-30 14:01:10 -0700189void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700190 uint8_t *dst, ptrdiff_t dst_stride,
191 const int16_t *filter_x, int x_step_q4,
192 const int16_t *filter_y, int y_step_q4, int w,
193 int h) {
194 const InterpKernel *const filters_y = get_filter_base(filter_y);
195 const int y0_q4 = get_filter_offset(filter_y, filters_y);
196
197 (void)filter_x;
198 (void)x_step_q4;
199
200 convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
201 w, h);
202}
203
Yaowu Xuf883b422016-08-30 14:01:10 -0700204void aom_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700205 uint8_t *dst, ptrdiff_t dst_stride,
206 const int16_t *filter_x, int x_step_q4,
207 const int16_t *filter_y, int y_step_q4, int w,
208 int h) {
209 const InterpKernel *const filters_y = get_filter_base(filter_y);
210 const int y0_q4 = get_filter_offset(filter_y, filters_y);
211
212 (void)filter_x;
213 (void)x_step_q4;
214
215 convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
216 y_step_q4, w, h);
217}
218
Yaowu Xuf883b422016-08-30 14:01:10 -0700219void aom_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700220 ptrdiff_t dst_stride, const int16_t *filter_x,
221 int x_step_q4, const int16_t *filter_y, int y_step_q4,
222 int w, int h) {
223 const InterpKernel *const filters_x = get_filter_base(filter_x);
224 const int x0_q4 = get_filter_offset(filter_x, filters_x);
225
226 const InterpKernel *const filters_y = get_filter_base(filter_y);
227 const int y0_q4 = get_filter_offset(filter_y, filters_y);
228
229 convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
230 filters_y, y0_q4, y_step_q4, w, h);
231}
232
Yaowu Xuf883b422016-08-30 14:01:10 -0700233void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700234 ptrdiff_t dst_stride, const int16_t *filter_x,
235 int x_step_q4, const int16_t *filter_y, int y_step_q4,
236 int w, int h) {
237 /* Fixed size intermediate buffer places limits on parameters. */
238 DECLARE_ALIGNED(16, uint8_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);
239 assert(w <= MAX_SB_SIZE);
240 assert(h <= MAX_SB_SIZE);
241
Yaowu Xuf883b422016-08-30 14:01:10 -0700242 aom_convolve8_c(src, src_stride, temp, MAX_SB_SIZE, filter_x, x_step_q4,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700243 filter_y, y_step_q4, w, h);
Yaowu Xuf883b422016-08-30 14:01:10 -0700244 aom_convolve_avg_c(temp, MAX_SB_SIZE, dst, dst_stride, NULL, 0, NULL, 0, w,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700245 h);
246}
247
Yaowu Xuf883b422016-08-30 14:01:10 -0700248void aom_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700249 ptrdiff_t dst_stride, const int16_t *filter_x,
250 int filter_x_stride, const int16_t *filter_y,
251 int filter_y_stride, int w, int h) {
252 int r;
253
254 (void)filter_x;
255 (void)filter_x_stride;
256 (void)filter_y;
257 (void)filter_y_stride;
258
259 for (r = h; r > 0; --r) {
260 memcpy(dst, src, w);
261 src += src_stride;
262 dst += dst_stride;
263 }
264}
265
Yaowu Xuf883b422016-08-30 14:01:10 -0700266void aom_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700267 ptrdiff_t dst_stride, const int16_t *filter_x,
268 int filter_x_stride, const int16_t *filter_y,
269 int filter_y_stride, int w, int h) {
270 int x, y;
271
272 (void)filter_x;
273 (void)filter_x_stride;
274 (void)filter_y;
275 (void)filter_y_stride;
276
277 for (y = 0; y < h; ++y) {
278 for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
279
280 src += src_stride;
281 dst += dst_stride;
282 }
283}
284
Yaowu Xuf883b422016-08-30 14:01:10 -0700285void aom_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700286 ptrdiff_t dst_stride, const int16_t *filter_x,
287 int x_step_q4, const int16_t *filter_y, int y_step_q4,
288 int w, int h) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700289 aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700290 filter_y, y_step_q4, w, h);
291}
292
Yaowu Xuf883b422016-08-30 14:01:10 -0700293void aom_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700294 ptrdiff_t dst_stride, const int16_t *filter_x,
295 int x_step_q4, const int16_t *filter_y, int y_step_q4,
296 int w, int h) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700297 aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700298 filter_y, y_step_q4, w, h);
299}
300
Yaowu Xuf883b422016-08-30 14:01:10 -0700301void aom_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700302 ptrdiff_t dst_stride, const int16_t *filter_x,
303 int x_step_q4, const int16_t *filter_y, int y_step_q4,
304 int w, int h) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700305 aom_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700306 filter_y, y_step_q4, w, h);
307}
308
Yaowu Xuf883b422016-08-30 14:01:10 -0700309void aom_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700310 uint8_t *dst, ptrdiff_t dst_stride,
311 const int16_t *filter_x, int x_step_q4,
312 const int16_t *filter_y, int y_step_q4, int w,
313 int h) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700314 aom_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700315 x_step_q4, filter_y, y_step_q4, w, h);
316}
317
Yaowu Xuf883b422016-08-30 14:01:10 -0700318void aom_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700319 uint8_t *dst, ptrdiff_t dst_stride,
320 const int16_t *filter_x, int x_step_q4,
321 const int16_t *filter_y, int y_step_q4, int w,
322 int h) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700323 aom_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700324 x_step_q4, filter_y, y_step_q4, w, h);
325}
326
Yaowu Xuf883b422016-08-30 14:01:10 -0700327void aom_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700328 ptrdiff_t dst_stride, const int16_t *filter_x,
329 int x_step_q4, const int16_t *filter_y, int y_step_q4,
330 int w, int h) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700331 aom_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700332 filter_y, y_step_q4, w, h);
333}
334
Yaowu Xuf883b422016-08-30 14:01:10 -0700335#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700336static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
337 uint8_t *dst8, ptrdiff_t dst_stride,
338 const InterpKernel *x_filters, int x0_q4,
339 int x_step_q4, int w, int h, int bd) {
340 int x, y;
341 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
342 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
343 src -= SUBPEL_TAPS / 2 - 1;
344 for (y = 0; y < h; ++y) {
345 int x_q4 = x0_q4;
346 for (x = 0; x < w; ++x) {
347 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
348 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
349 int k, sum = 0;
350 for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
351 dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
352 x_q4 += x_step_q4;
353 }
354 src += src_stride;
355 dst += dst_stride;
356 }
357}
358
359static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
360 uint8_t *dst8, ptrdiff_t dst_stride,
361 const InterpKernel *x_filters, int x0_q4,
362 int x_step_q4, int w, int h, int bd) {
363 int x, y;
364 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
365 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
366 src -= SUBPEL_TAPS / 2 - 1;
367 for (y = 0; y < h; ++y) {
368 int x_q4 = x0_q4;
369 for (x = 0; x < w; ++x) {
370 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
371 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
372 int k, sum = 0;
373 for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
374 dst[x] = ROUND_POWER_OF_TWO(
375 dst[x] + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
376 1);
377 x_q4 += x_step_q4;
378 }
379 src += src_stride;
380 dst += dst_stride;
381 }
382}
383
384static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
385 uint8_t *dst8, ptrdiff_t dst_stride,
386 const InterpKernel *y_filters, int y0_q4,
387 int y_step_q4, int w, int h, int bd) {
388 int x, y;
389 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
390 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
391 src -= src_stride * (SUBPEL_TAPS / 2 - 1);
392 for (x = 0; x < w; ++x) {
393 int y_q4 = y0_q4;
394 for (y = 0; y < h; ++y) {
395 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
396 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
397 int k, sum = 0;
398 for (k = 0; k < SUBPEL_TAPS; ++k)
399 sum += src_y[k * src_stride] * y_filter[k];
400 dst[y * dst_stride] =
401 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
402 y_q4 += y_step_q4;
403 }
404 ++src;
405 ++dst;
406 }
407}
408
409static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
410 uint8_t *dst8, ptrdiff_t dst_stride,
411 const InterpKernel *y_filters, int y0_q4,
412 int y_step_q4, int w, int h, int bd) {
413 int x, y;
414 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
415 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
416 src -= src_stride * (SUBPEL_TAPS / 2 - 1);
417 for (x = 0; x < w; ++x) {
418 int y_q4 = y0_q4;
419 for (y = 0; y < h; ++y) {
420 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
421 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
422 int k, sum = 0;
423 for (k = 0; k < SUBPEL_TAPS; ++k)
424 sum += src_y[k * src_stride] * y_filter[k];
425 dst[y * dst_stride] = ROUND_POWER_OF_TWO(
426 dst[y * dst_stride] +
427 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
428 1);
429 y_q4 += y_step_q4;
430 }
431 ++src;
432 ++dst;
433 }
434}
435
436static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
437 uint8_t *dst, ptrdiff_t dst_stride,
438 const InterpKernel *const x_filters, int x0_q4,
439 int x_step_q4, const InterpKernel *const y_filters,
440 int y0_q4, int y_step_q4, int w, int h, int bd) {
441 // Note: Fixed size intermediate buffer, temp, places limits on parameters.
442 // 2d filtering proceeds in 2 steps:
443 // (1) Interpolate horizontally into an intermediate buffer, temp.
444 // (2) Interpolate temp vertically to derive the sub-pixel result.
445 // Deriving the maximum number of rows in the temp buffer (135):
446 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
447 // --Largest block size is 64x64 pixels.
448 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
449 // original frame (in 1/16th pixel units).
450 // --Must round-up because block may be located at sub-pixel position.
451 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
452 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
453 uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];
454 int intermediate_height =
455 (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
456
457 assert(w <= MAX_SB_SIZE);
458 assert(h <= MAX_SB_SIZE);
459 assert(y_step_q4 <= 32);
460 assert(x_step_q4 <= 32);
461
462 highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
463 CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, x_filters, x0_q4,
464 x_step_q4, w, intermediate_height, bd);
465 highbd_convolve_vert(
466 CONVERT_TO_BYTEPTR(temp) + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),
467 MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h, bd);
468}
469
Yaowu Xuf883b422016-08-30 14:01:10 -0700470void aom_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700471 uint8_t *dst, ptrdiff_t dst_stride,
472 const int16_t *filter_x, int x_step_q4,
473 const int16_t *filter_y, int y_step_q4, int w,
474 int h, int bd) {
475 const InterpKernel *const filters_x = get_filter_base(filter_x);
476 const int x0_q4 = get_filter_offset(filter_x, filters_x);
477 (void)filter_y;
478 (void)y_step_q4;
479
480 highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
481 x_step_q4, w, h, bd);
482}
483
Yaowu Xuf883b422016-08-30 14:01:10 -0700484void aom_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700485 uint8_t *dst, ptrdiff_t dst_stride,
486 const int16_t *filter_x, int x_step_q4,
487 const int16_t *filter_y, int y_step_q4,
488 int w, int h, int bd) {
489 const InterpKernel *const filters_x = get_filter_base(filter_x);
490 const int x0_q4 = get_filter_offset(filter_x, filters_x);
491 (void)filter_y;
492 (void)y_step_q4;
493
494 highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
495 x_step_q4, w, h, bd);
496}
497
Yaowu Xuf883b422016-08-30 14:01:10 -0700498void aom_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700499 uint8_t *dst, ptrdiff_t dst_stride,
500 const int16_t *filter_x, int x_step_q4,
501 const int16_t *filter_y, int y_step_q4, int w,
502 int h, int bd) {
503 const InterpKernel *const filters_y = get_filter_base(filter_y);
504 const int y0_q4 = get_filter_offset(filter_y, filters_y);
505 (void)filter_x;
506 (void)x_step_q4;
507
508 highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
509 y_step_q4, w, h, bd);
510}
511
Yaowu Xuf883b422016-08-30 14:01:10 -0700512void aom_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700513 uint8_t *dst, ptrdiff_t dst_stride,
514 const int16_t *filter_x, int x_step_q4,
515 const int16_t *filter_y, int y_step_q4,
516 int w, int h, int bd) {
517 const InterpKernel *const filters_y = get_filter_base(filter_y);
518 const int y0_q4 = get_filter_offset(filter_y, filters_y);
519 (void)filter_x;
520 (void)x_step_q4;
521
522 highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
523 y_step_q4, w, h, bd);
524}
525
Yaowu Xuf883b422016-08-30 14:01:10 -0700526void aom_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700527 uint8_t *dst, ptrdiff_t dst_stride,
528 const int16_t *filter_x, int x_step_q4,
529 const int16_t *filter_y, int y_step_q4, int w,
530 int h, int bd) {
531 const InterpKernel *const filters_x = get_filter_base(filter_x);
532 const int x0_q4 = get_filter_offset(filter_x, filters_x);
533
534 const InterpKernel *const filters_y = get_filter_base(filter_y);
535 const int y0_q4 = get_filter_offset(filter_y, filters_y);
536
537 highbd_convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
538 filters_y, y0_q4, y_step_q4, w, h, bd);
539}
540
Yaowu Xuf883b422016-08-30 14:01:10 -0700541void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700542 uint8_t *dst, ptrdiff_t dst_stride,
543 const int16_t *filter_x, int x_step_q4,
544 const int16_t *filter_y, int y_step_q4, int w,
545 int h, int bd) {
546 // Fixed size intermediate buffer places limits on parameters.
547 DECLARE_ALIGNED(16, uint16_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);
548 assert(w <= MAX_SB_SIZE);
549 assert(h <= MAX_SB_SIZE);
550
Yaowu Xuf883b422016-08-30 14:01:10 -0700551 aom_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700552 filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
Yaowu Xuf883b422016-08-30 14:01:10 -0700553 aom_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, dst,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700554 dst_stride, NULL, 0, NULL, 0, w, h, bd);
555}
556
Yaowu Xuf883b422016-08-30 14:01:10 -0700557void aom_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700558 uint8_t *dst8, ptrdiff_t dst_stride,
559 const int16_t *filter_x, int filter_x_stride,
560 const int16_t *filter_y, int filter_y_stride,
561 int w, int h, int bd) {
562 int r;
563 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
564 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
565 (void)filter_x;
566 (void)filter_y;
567 (void)filter_x_stride;
568 (void)filter_y_stride;
569 (void)bd;
570
571 for (r = h; r > 0; --r) {
572 memcpy(dst, src, w * sizeof(uint16_t));
573 src += src_stride;
574 dst += dst_stride;
575 }
576}
577
Yaowu Xuf883b422016-08-30 14:01:10 -0700578void aom_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700579 uint8_t *dst8, ptrdiff_t dst_stride,
580 const int16_t *filter_x, int filter_x_stride,
581 const int16_t *filter_y, int filter_y_stride,
582 int w, int h, int bd) {
583 int x, y;
584 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
585 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
586 (void)filter_x;
587 (void)filter_y;
588 (void)filter_x_stride;
589 (void)filter_y_stride;
590 (void)bd;
591
592 for (y = 0; y < h; ++y) {
593 for (x = 0; x < w; ++x) {
594 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
595 }
596 src += src_stride;
597 dst += dst_stride;
598 }
599}
600#endif