blob: 8293af15484632f675d4dd9728436a3026379a65 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 *
Yaowu Xuc27fc142016-08-22 16:08:15 -070011 */
12
13#include <math.h>
14
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_config.h"
16#include "./aom_dsp_rtcd.h"
David Barker9666e752016-12-08 11:25:47 +000017#include "./aom_scale_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070018#include "av1/common/onyxc_int.h"
19#include "av1/common/restoration.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070020#include "aom_dsp/aom_dsp_common.h"
21#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070022#include "aom_ports/mem.h"
23
Debargha Mukherjee8f209a82016-10-12 10:47:01 -070024const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -080025#if USE_HIGHPASS_IN_SGRPROJ
26 // corner, edge, r2, eps2
27 { -1, 2, 1, 1 }, { -1, 2, 1, 2 }, { -1, 2, 1, 3 }, { -1, 2, 1, 4 },
28 { -1, 2, 1, 5 }, { -2, 3, 1, 2 }, { -2, 3, 1, 3 }, { -2, 3, 1, 4 },
29 { -2, 3, 1, 5 }, { -2, 3, 1, 6 }, { -3, 4, 1, 3 }, { -3, 4, 1, 4 },
30 { -3, 4, 1, 5 }, { -3, 4, 1, 6 }, { -3, 4, 1, 7 }, { -3, 4, 1, 8 }
31#else
Debargha Mukherjee4bfd72e2017-03-08 22:20:31 -080032 // r1, eps1, r2, eps2
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080033 { 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 },
34 { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
35 { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 3, 30, 1, 10 },
36 { 3, 50, 1, 12 }, { 3, 50, 2, 25 }, { 3, 60, 2, 35 }, { 3, 70, 2, 45 },
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -080037#endif
Debargha Mukherjee8f209a82016-10-12 10:47:01 -070038};
39
Yaowu Xuc27fc142016-08-22 16:08:15 -070040typedef void (*restore_func_type)(uint8_t *data8, int width, int height,
41 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +000042 uint8_t *dst8, int dst_stride);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020043#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070044typedef void (*restore_func_highbd_type)(uint8_t *data8, int width, int height,
45 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +000046 int bit_depth, uint8_t *dst8,
47 int dst_stride);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020048#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070049
David Barkerbefcc422017-01-31 09:42:10 +000050int av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rst_info,
51 int width, int height) {
Debargha Mukherjee1008c1e2017-03-06 19:18:43 -080052 const int ntiles = av1_get_rest_ntiles(
53 width, height, rst_info->restoration_tilesize, NULL, NULL, NULL, NULL);
Alex Converse7f094f12017-02-23 17:29:40 -080054 aom_free(rst_info->restoration_type);
Alex Converse232e3842017-02-24 12:24:36 -080055 CHECK_MEM_ERROR(cm, rst_info->restoration_type,
Alex Converse7f094f12017-02-23 17:29:40 -080056 (RestorationType *)aom_malloc(
Alex Converse232e3842017-02-24 12:24:36 -080057 sizeof(*rst_info->restoration_type) * ntiles));
David Barkerbefcc422017-01-31 09:42:10 +000058 aom_free(rst_info->wiener_info);
59 CHECK_MEM_ERROR(
60 cm, rst_info->wiener_info,
61 (WienerInfo *)aom_memalign(16, sizeof(*rst_info->wiener_info) * ntiles));
David Barker1e8e6b92017-01-13 13:45:51 +000062 memset(rst_info->wiener_info, 0, sizeof(*rst_info->wiener_info) * ntiles);
Alex Converse7f094f12017-02-23 17:29:40 -080063 aom_free(rst_info->sgrproj_info);
David Barkerbefcc422017-01-31 09:42:10 +000064 CHECK_MEM_ERROR(
65 cm, rst_info->sgrproj_info,
Alex Converse7f094f12017-02-23 17:29:40 -080066 (SgrprojInfo *)aom_malloc(sizeof(*rst_info->sgrproj_info) * ntiles));
Debargha Mukherjee874d36d2016-12-14 16:53:17 -080067 return ntiles;
68}
69
70void av1_free_restoration_struct(RestorationInfo *rst_info) {
71 aom_free(rst_info->restoration_type);
72 rst_info->restoration_type = NULL;
73 aom_free(rst_info->wiener_info);
74 rst_info->wiener_info = NULL;
75 aom_free(rst_info->sgrproj_info);
76 rst_info->sgrproj_info = NULL;
Debargha Mukherjee874d36d2016-12-14 16:53:17 -080077}
78
Debargha Mukherjee4be12622017-02-15 21:38:02 -080079#define MAX_RADIUS 3 // Only 1, 2, 3 allowed
80#define MAX_EPS 80 // Max value of eps
81#define MAX_NELEM ((2 * MAX_RADIUS + 1) * (2 * MAX_RADIUS + 1))
David Barker9198d132017-02-17 14:27:05 +000082#define SGRPROJ_MTABLE_BITS 20
83#define SGRPROJ_RECIP_BITS 12
Debargha Mukherjee4be12622017-02-15 21:38:02 -080084
85// TODO(debargha): This table can be substantially reduced since only a few
86// values are actually used.
David Barkerce110cc2017-02-22 10:38:59 +000087int sgrproj_mtable[MAX_EPS][MAX_NELEM];
Debargha Mukherjee4be12622017-02-15 21:38:02 -080088
89static void GenSgrprojVtable() {
90 int e, n;
91 for (e = 1; e <= MAX_EPS; ++e)
92 for (n = 1; n <= MAX_NELEM; ++n) {
93 const int n2e = n * n * e;
94 sgrproj_mtable[e - 1][n - 1] =
95 (((1 << SGRPROJ_MTABLE_BITS) + n2e / 2) / n2e);
96 }
97}
Debargha Mukherjee4be12622017-02-15 21:38:02 -080098
Debargha Mukherjee4bfd72e2017-03-08 22:20:31 -080099void av1_loop_restoration_precal() { GenSgrprojVtable(); }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700100
Debargha Mukherjeed7489142017-01-05 13:58:16 -0800101static void loop_restoration_init(RestorationInternal *rst, int kf) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700102 rst->keyframe = kf;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700103}
104
David Barker33f3bfd2017-01-06 15:34:50 +0000105void extend_frame(uint8_t *data, int width, int height, int stride) {
David Barker025b2542016-12-08 11:50:42 +0000106 uint8_t *data_p;
107 int i;
108 for (i = 0; i < height; ++i) {
109 data_p = data + i * stride;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800110 memset(data_p - WIENER_HALFWIN, data_p[0], WIENER_HALFWIN);
111 memset(data_p + width, data_p[width - 1], WIENER_HALFWIN);
David Barker025b2542016-12-08 11:50:42 +0000112 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800113 data_p = data - WIENER_HALFWIN;
114 for (i = -WIENER_HALFWIN; i < 0; ++i) {
115 memcpy(data_p + i * stride, data_p, width + 2 * WIENER_HALFWIN);
David Barker025b2542016-12-08 11:50:42 +0000116 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800117 for (i = height; i < height + WIENER_HALFWIN; ++i) {
David Barker025b2542016-12-08 11:50:42 +0000118 memcpy(data_p + i * stride, data_p + (height - 1) * stride,
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800119 width + 2 * WIENER_HALFWIN);
David Barker025b2542016-12-08 11:50:42 +0000120 }
121}
122
David Barker9666e752016-12-08 11:25:47 +0000123static void loop_copy_tile(uint8_t *data, int tile_idx, int subtile_idx,
124 int subtile_bits, int width, int height, int stride,
125 RestorationInternal *rst, uint8_t *dst,
126 int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -0800127 const int tile_width = rst->tile_width;
128 const int tile_height = rst->tile_height;
David Barker9666e752016-12-08 11:25:47 +0000129 int i;
130 int h_start, h_end, v_start, v_end;
131 av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, rst->nhtiles,
132 rst->nvtiles, tile_width, tile_height, width, height,
133 0, 0, &h_start, &h_end, &v_start, &v_end);
134 for (i = v_start; i < v_end; ++i)
135 memcpy(dst + i * dst_stride + h_start, data + i * stride + h_start,
136 h_end - h_start);
137}
138
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700139static void loop_wiener_filter_tile(uint8_t *data, int tile_idx, int width,
140 int height, int stride,
David Barker025b2542016-12-08 11:50:42 +0000141 RestorationInternal *rst, uint8_t *dst,
David Barker9666e752016-12-08 11:25:47 +0000142 int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -0800143 const int tile_width = rst->tile_width;
144 const int tile_height = rst->tile_height;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700145 int i, j;
146 int h_start, h_end, v_start, v_end;
Debargha Mukherjee994ccd72017-01-06 11:18:23 -0800147 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
David Barker9666e752016-12-08 11:25:47 +0000148 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
149 dst_stride);
150 return;
151 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700152 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
David Barker025b2542016-12-08 11:50:42 +0000153 tile_width, tile_height, width, height, 0, 0,
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700154 &h_start, &h_end, &v_start, &v_end);
David Barker025b2542016-12-08 11:50:42 +0000155 // Convolve the whole tile (done in blocks here to match the requirements
156 // of the vectorized convolve functions, but the result is equivalent)
157 for (i = v_start; i < v_end; i += MAX_SB_SIZE)
158 for (j = h_start; j < h_end; j += MAX_SB_SIZE) {
159 int w = AOMMIN(MAX_SB_SIZE, (h_end - j + 15) & ~15);
160 int h = AOMMIN(MAX_SB_SIZE, (v_end - i + 15) & ~15);
161 const uint8_t *data_p = data + i * stride + j;
162 uint8_t *dst_p = dst + i * dst_stride + j;
Debargha Mukherjee28d15c72017-05-12 10:44:03 -0700163#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
164 aom_convolve8_add_src_hip(data_p, stride, dst_p, dst_stride,
165 rst->rsi->wiener_info[tile_idx].hfilter, 16,
166 rst->rsi->wiener_info[tile_idx].vfilter, 16, w,
167 h);
168#else
David Barker1e8e6b92017-01-13 13:45:51 +0000169 aom_convolve8_add_src(data_p, stride, dst_p, dst_stride,
170 rst->rsi->wiener_info[tile_idx].hfilter, 16,
171 rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h);
Debargha Mukherjee28d15c72017-05-12 10:44:03 -0700172#endif // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700173 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700174}
175
Yaowu Xuc27fc142016-08-22 16:08:15 -0700176static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
David Barker025b2542016-12-08 11:50:42 +0000177 RestorationInternal *rst, uint8_t *dst,
178 int dst_stride) {
179 int tile_idx;
180 extend_frame(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700181 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
David Barker025b2542016-12-08 11:50:42 +0000182 loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst,
183 dst_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700184 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700185}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700186
David Barker6928a5d2017-01-05 11:29:22 +0000187/* Calculate windowed sums (if sqr=0) or sums of squares (if sqr=1)
188 over the input. The window is of size (2r + 1)x(2r + 1), and we
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800189 specialize to r = 1, 2, 3. A default function is used for r > 3.
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700190
David Barker6928a5d2017-01-05 11:29:22 +0000191 Each loop follows the same format: We keep a window's worth of input
192 in individual variables and select data out of that as appropriate.
193*/
194static void boxsum1(int32_t *src, int width, int height, int src_stride,
195 int sqr, int32_t *dst, int dst_stride) {
196 int i, j, a, b, c;
197
198 // Vertical sum over 3-pixel regions, from src into dst.
199 if (!sqr) {
200 for (j = 0; j < width; ++j) {
201 a = src[j];
202 b = src[src_stride + j];
203 c = src[2 * src_stride + j];
204
205 dst[j] = a + b;
206 for (i = 1; i < height - 2; ++i) {
207 // Loop invariant: At the start of each iteration,
208 // a = src[(i - 1) * src_stride + j]
209 // b = src[(i ) * src_stride + j]
210 // c = src[(i + 1) * src_stride + j]
211 dst[i * dst_stride + j] = a + b + c;
212 a = b;
213 b = c;
214 c = src[(i + 2) * src_stride + j];
215 }
216 dst[i * dst_stride + j] = a + b + c;
217 dst[(i + 1) * dst_stride + j] = b + c;
218 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700219 } else {
David Barker6928a5d2017-01-05 11:29:22 +0000220 for (j = 0; j < width; ++j) {
221 a = src[j] * src[j];
222 b = src[src_stride + j] * src[src_stride + j];
223 c = src[2 * src_stride + j] * src[2 * src_stride + j];
224
225 dst[j] = a + b;
226 for (i = 1; i < height - 2; ++i) {
227 dst[i * dst_stride + j] = a + b + c;
228 a = b;
229 b = c;
230 c = src[(i + 2) * src_stride + j] * src[(i + 2) * src_stride + j];
231 }
232 dst[i * dst_stride + j] = a + b + c;
233 dst[(i + 1) * dst_stride + j] = b + c;
234 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700235 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700236
David Barker6928a5d2017-01-05 11:29:22 +0000237 // Horizontal sum over 3-pixel regions of dst
238 for (i = 0; i < height; ++i) {
239 a = dst[i * dst_stride];
240 b = dst[i * dst_stride + 1];
241 c = dst[i * dst_stride + 2];
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700242
David Barker6928a5d2017-01-05 11:29:22 +0000243 dst[i * dst_stride] = a + b;
244 for (j = 1; j < width - 2; ++j) {
245 // Loop invariant: At the start of each iteration,
246 // a = src[i * src_stride + (j - 1)]
247 // b = src[i * src_stride + (j )]
248 // c = src[i * src_stride + (j + 1)]
249 dst[i * dst_stride + j] = a + b + c;
250 a = b;
251 b = c;
252 c = dst[i * dst_stride + (j + 2)];
253 }
254 dst[i * dst_stride + j] = a + b + c;
255 dst[i * dst_stride + (j + 1)] = b + c;
256 }
257}
258
259static void boxsum2(int32_t *src, int width, int height, int src_stride,
260 int sqr, int32_t *dst, int dst_stride) {
261 int i, j, a, b, c, d, e;
262
263 // Vertical sum over 5-pixel regions, from src into dst.
264 if (!sqr) {
265 for (j = 0; j < width; ++j) {
266 a = src[j];
267 b = src[src_stride + j];
268 c = src[2 * src_stride + j];
269 d = src[3 * src_stride + j];
270 e = src[4 * src_stride + j];
271
272 dst[j] = a + b + c;
273 dst[dst_stride + j] = a + b + c + d;
274 for (i = 2; i < height - 3; ++i) {
275 // Loop invariant: At the start of each iteration,
276 // a = src[(i - 2) * src_stride + j]
277 // b = src[(i - 1) * src_stride + j]
278 // c = src[(i ) * src_stride + j]
279 // d = src[(i + 1) * src_stride + j]
280 // e = src[(i + 2) * src_stride + j]
281 dst[i * dst_stride + j] = a + b + c + d + e;
282 a = b;
283 b = c;
284 c = d;
285 d = e;
286 e = src[(i + 3) * src_stride + j];
287 }
288 dst[i * dst_stride + j] = a + b + c + d + e;
289 dst[(i + 1) * dst_stride + j] = b + c + d + e;
290 dst[(i + 2) * dst_stride + j] = c + d + e;
291 }
292 } else {
293 for (j = 0; j < width; ++j) {
294 a = src[j] * src[j];
295 b = src[src_stride + j] * src[src_stride + j];
296 c = src[2 * src_stride + j] * src[2 * src_stride + j];
297 d = src[3 * src_stride + j] * src[3 * src_stride + j];
298 e = src[4 * src_stride + j] * src[4 * src_stride + j];
299
300 dst[j] = a + b + c;
301 dst[dst_stride + j] = a + b + c + d;
302 for (i = 2; i < height - 3; ++i) {
303 dst[i * dst_stride + j] = a + b + c + d + e;
304 a = b;
305 b = c;
306 c = d;
307 d = e;
308 e = src[(i + 3) * src_stride + j] * src[(i + 3) * src_stride + j];
309 }
310 dst[i * dst_stride + j] = a + b + c + d + e;
311 dst[(i + 1) * dst_stride + j] = b + c + d + e;
312 dst[(i + 2) * dst_stride + j] = c + d + e;
313 }
314 }
315
316 // Horizontal sum over 5-pixel regions of dst
317 for (i = 0; i < height; ++i) {
318 a = dst[i * dst_stride];
319 b = dst[i * dst_stride + 1];
320 c = dst[i * dst_stride + 2];
321 d = dst[i * dst_stride + 3];
322 e = dst[i * dst_stride + 4];
323
324 dst[i * dst_stride] = a + b + c;
325 dst[i * dst_stride + 1] = a + b + c + d;
326 for (j = 2; j < width - 3; ++j) {
327 // Loop invariant: At the start of each iteration,
328 // a = src[i * src_stride + (j - 2)]
329 // b = src[i * src_stride + (j - 1)]
330 // c = src[i * src_stride + (j )]
331 // d = src[i * src_stride + (j + 1)]
332 // e = src[i * src_stride + (j + 2)]
333 dst[i * dst_stride + j] = a + b + c + d + e;
334 a = b;
335 b = c;
336 c = d;
337 d = e;
338 e = dst[i * dst_stride + (j + 3)];
339 }
340 dst[i * dst_stride + j] = a + b + c + d + e;
341 dst[i * dst_stride + (j + 1)] = b + c + d + e;
342 dst[i * dst_stride + (j + 2)] = c + d + e;
343 }
344}
345
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800346static void boxsum3(int32_t *src, int width, int height, int src_stride,
347 int sqr, int32_t *dst, int dst_stride) {
348 int i, j, a, b, c, d, e, f, g;
349
350 // Vertical sum over 7-pixel regions, from src into dst.
351 if (!sqr) {
352 for (j = 0; j < width; ++j) {
353 a = src[j];
354 b = src[1 * src_stride + j];
355 c = src[2 * src_stride + j];
356 d = src[3 * src_stride + j];
357 e = src[4 * src_stride + j];
358 f = src[5 * src_stride + j];
359 g = src[6 * src_stride + j];
360
361 dst[j] = a + b + c + d;
362 dst[dst_stride + j] = a + b + c + d + e;
363 dst[2 * dst_stride + j] = a + b + c + d + e + f;
364 for (i = 3; i < height - 4; ++i) {
365 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
366 a = b;
367 b = c;
368 c = d;
369 d = e;
370 e = f;
371 f = g;
372 g = src[(i + 4) * src_stride + j];
373 }
374 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
375 dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g;
376 dst[(i + 2) * dst_stride + j] = c + d + e + f + g;
377 dst[(i + 3) * dst_stride + j] = d + e + f + g;
378 }
379 } else {
380 for (j = 0; j < width; ++j) {
381 a = src[j] * src[j];
382 b = src[1 * src_stride + j] * src[1 * src_stride + j];
383 c = src[2 * src_stride + j] * src[2 * src_stride + j];
384 d = src[3 * src_stride + j] * src[3 * src_stride + j];
385 e = src[4 * src_stride + j] * src[4 * src_stride + j];
386 f = src[5 * src_stride + j] * src[5 * src_stride + j];
387 g = src[6 * src_stride + j] * src[6 * src_stride + j];
388
389 dst[j] = a + b + c + d;
390 dst[dst_stride + j] = a + b + c + d + e;
391 dst[2 * dst_stride + j] = a + b + c + d + e + f;
392 for (i = 3; i < height - 4; ++i) {
393 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
394 a = b;
395 b = c;
396 c = d;
397 d = e;
398 e = f;
399 f = g;
400 g = src[(i + 4) * src_stride + j] * src[(i + 4) * src_stride + j];
401 }
402 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
403 dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g;
404 dst[(i + 2) * dst_stride + j] = c + d + e + f + g;
405 dst[(i + 3) * dst_stride + j] = d + e + f + g;
406 }
407 }
408
409 // Horizontal sum over 7-pixel regions of dst
410 for (i = 0; i < height; ++i) {
411 a = dst[i * dst_stride];
412 b = dst[i * dst_stride + 1];
413 c = dst[i * dst_stride + 2];
414 d = dst[i * dst_stride + 3];
415 e = dst[i * dst_stride + 4];
416 f = dst[i * dst_stride + 5];
417 g = dst[i * dst_stride + 6];
418
419 dst[i * dst_stride] = a + b + c + d;
420 dst[i * dst_stride + 1] = a + b + c + d + e;
421 dst[i * dst_stride + 2] = a + b + c + d + e + f;
422 for (j = 3; j < width - 4; ++j) {
423 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
424 a = b;
425 b = c;
426 c = d;
427 d = e;
428 e = f;
429 f = g;
430 g = dst[i * dst_stride + (j + 4)];
431 }
432 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
433 dst[i * dst_stride + (j + 1)] = b + c + d + e + f + g;
434 dst[i * dst_stride + (j + 2)] = c + d + e + f + g;
435 dst[i * dst_stride + (j + 3)] = d + e + f + g;
436 }
437}
438
439// Generic version for any r. To be removed after experiments are done.
440static void boxsumr(int32_t *src, int width, int height, int src_stride, int r,
441 int sqr, int32_t *dst, int dst_stride) {
442 int32_t *tmp = aom_malloc(width * height * sizeof(*tmp));
443 int tmp_stride = width;
444 int i, j;
445 if (sqr) {
446 for (j = 0; j < width; ++j) tmp[j] = src[j] * src[j];
447 for (j = 0; j < width; ++j)
448 for (i = 1; i < height; ++i)
449 tmp[i * tmp_stride + j] =
450 tmp[(i - 1) * tmp_stride + j] +
451 src[i * src_stride + j] * src[i * src_stride + j];
452 } else {
453 memcpy(tmp, src, sizeof(*tmp) * width);
454 for (j = 0; j < width; ++j)
455 for (i = 1; i < height; ++i)
456 tmp[i * tmp_stride + j] =
457 tmp[(i - 1) * tmp_stride + j] + src[i * src_stride + j];
458 }
459 for (i = 0; i <= r; ++i)
460 memcpy(&dst[i * dst_stride], &tmp[(i + r) * tmp_stride],
461 sizeof(*tmp) * width);
462 for (i = r + 1; i < height - r; ++i)
463 for (j = 0; j < width; ++j)
464 dst[i * dst_stride + j] =
465 tmp[(i + r) * tmp_stride + j] - tmp[(i - r - 1) * tmp_stride + j];
466 for (i = height - r; i < height; ++i)
467 for (j = 0; j < width; ++j)
468 dst[i * dst_stride + j] = tmp[(height - 1) * tmp_stride + j] -
469 tmp[(i - r - 1) * tmp_stride + j];
470
471 for (i = 0; i < height; ++i) tmp[i * tmp_stride] = dst[i * dst_stride];
472 for (i = 0; i < height; ++i)
473 for (j = 1; j < width; ++j)
474 tmp[i * tmp_stride + j] =
475 tmp[i * tmp_stride + j - 1] + dst[i * src_stride + j];
476
477 for (j = 0; j <= r; ++j)
478 for (i = 0; i < height; ++i)
479 dst[i * dst_stride + j] = tmp[i * tmp_stride + j + r];
480 for (j = r + 1; j < width - r; ++j)
481 for (i = 0; i < height; ++i)
482 dst[i * dst_stride + j] =
483 tmp[i * tmp_stride + j + r] - tmp[i * tmp_stride + j - r - 1];
484 for (j = width - r; j < width; ++j)
485 for (i = 0; i < height; ++i)
486 dst[i * dst_stride + j] =
487 tmp[i * tmp_stride + width - 1] - tmp[i * tmp_stride + j - r - 1];
488 aom_free(tmp);
489}
490
David Barker6928a5d2017-01-05 11:29:22 +0000491static void boxsum(int32_t *src, int width, int height, int src_stride, int r,
492 int sqr, int32_t *dst, int dst_stride) {
493 if (r == 1)
494 boxsum1(src, width, height, src_stride, sqr, dst, dst_stride);
495 else if (r == 2)
496 boxsum2(src, width, height, src_stride, sqr, dst, dst_stride);
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800497 else if (r == 3)
498 boxsum3(src, width, height, src_stride, sqr, dst, dst_stride);
499 else
500 boxsumr(src, width, height, src_stride, r, sqr, dst, dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700501}
502
503static void boxnum(int width, int height, int r, int8_t *num, int num_stride) {
504 int i, j;
David Barker6928a5d2017-01-05 11:29:22 +0000505 for (i = 0; i <= r; ++i) {
506 for (j = 0; j <= r; ++j) {
507 num[i * num_stride + j] = (r + 1 + i) * (r + 1 + j);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700508 num[i * num_stride + (width - 1 - j)] = num[i * num_stride + j];
509 num[(height - 1 - i) * num_stride + j] = num[i * num_stride + j];
510 num[(height - 1 - i) * num_stride + (width - 1 - j)] =
511 num[i * num_stride + j];
512 }
513 }
David Barker6928a5d2017-01-05 11:29:22 +0000514 for (j = 0; j <= r; ++j) {
515 const int val = (2 * r + 1) * (r + 1 + j);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700516 for (i = r + 1; i < height - r; ++i) {
517 num[i * num_stride + j] = val;
518 num[i * num_stride + (width - 1 - j)] = val;
519 }
520 }
David Barker6928a5d2017-01-05 11:29:22 +0000521 for (i = 0; i <= r; ++i) {
522 const int val = (2 * r + 1) * (r + 1 + i);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700523 for (j = r + 1; j < width - r; ++j) {
524 num[i * num_stride + j] = val;
525 num[(height - 1 - i) * num_stride + j] = val;
526 }
527 }
528 for (i = r + 1; i < height - r; ++i) {
529 for (j = r + 1; j < width - r; ++j) {
David Barker6928a5d2017-01-05 11:29:22 +0000530 num[i * num_stride + j] = (2 * r + 1) * (2 * r + 1);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700531 }
532 }
533}
534
535void decode_xq(int *xqd, int *xq) {
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800536 xq[0] = xqd[0];
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700537 xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
538}
539
David Barkerce110cc2017-02-22 10:38:59 +0000540const int32_t x_by_xplus1[256] = {
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800541 0, 128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239,
542 240, 241, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 247, 247,
543 248, 248, 248, 248, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250,
544 250, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 252, 252, 252, 252,
545 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253,
546 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253,
547 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 254, 254, 254,
548 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
549 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
550 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
551 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
552 254, 254, 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
553 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
554 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
555 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
556 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
557 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
558 256,
559};
560
David Barkerce110cc2017-02-22 10:38:59 +0000561const int32_t one_by_x[MAX_NELEM] = {
David Barker9198d132017-02-17 14:27:05 +0000562 4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315,
563 293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164, 158,
564 152, 146, 141, 137, 132, 128, 124, 120, 117, 114, 111, 108, 105,
565 102, 100, 98, 95, 93, 91, 89, 87, 85, 84
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800566};
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800567
David Barker506eb722017-03-08 13:35:49 +0000568static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
569 int height, int stride,
570 int bit_depth, int r, int eps,
571 int32_t *tmpbuf) {
David Barker3a0df182016-12-21 10:44:52 +0000572 int32_t *A = tmpbuf;
David Barkercff43bb2017-03-08 13:15:17 +0000573 int32_t *B = A + SGRPROJ_OUTBUF_SIZE;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700574 int8_t num[RESTORATION_TILEPELS_MAX];
575 int i, j;
David Barkerce110cc2017-02-22 10:38:59 +0000576 // Adjusting the stride of A and B here appears to avoid bad cache effects,
577 // leading to a significant speed improvement.
578 // We also align the stride to a multiple of 16 bytes, for consistency
579 // with the SIMD version of this function.
580 int buf_stride = ((width + 3) & ~3) + 16;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700581
David Barker6928a5d2017-01-05 11:29:22 +0000582 // Don't filter tiles with dimensions < 5 on any axis
583 if ((width < 5) || (height < 5)) return;
584
David Barkerce110cc2017-02-22 10:38:59 +0000585 boxsum(dgd, width, height, stride, r, 0, B, buf_stride);
586 boxsum(dgd, width, height, stride, r, 1, A, buf_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700587 boxnum(width, height, r, num, width);
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800588 assert(r <= 3);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700589 for (i = 0; i < height; ++i) {
590 for (j = 0; j < width; ++j) {
David Barkerce110cc2017-02-22 10:38:59 +0000591 const int k = i * buf_stride + j;
592 const int n = num[i * width + j];
Debargha Mukherjee4bfd72e2017-03-08 22:20:31 -0800593
David Barker9198d132017-02-17 14:27:05 +0000594 // a < 2^16 * n < 2^22 regardless of bit depth
595 uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8));
596 // b < 2^8 * n < 2^14 regardless of bit depth
597 uint32_t b = ROUND_POWER_OF_TWO(B[k], bit_depth - 8);
598
599 // Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28,
600 // and p itself satisfies p < 2^14 * n^2 < 2^26.
601 // Note: Sometimes, in high bit depth, we can end up with a*n < b*b.
602 // This is an artefact of rounding, and can only happen if all pixels
603 // are (almost) identical, so in this case we saturate to p=0.
604 uint32_t p = (a * n < b * b) ? 0 : a * n - b * b;
605 uint32_t s = sgrproj_mtable[eps - 1][n - 1];
606
607 // p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32
608 // as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12
609 // (this holds even after accounting for the rounding in s)
610 const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS);
611
612 A[k] = x_by_xplus1[AOMMIN(z, 255)]; // < 2^8
613
614 // SGRPROJ_SGR - A[k] < 2^8, B[k] < 2^(bit_depth) * n,
615 // one_by_x[n - 1] = round(2^12 / n)
616 // => the product here is < 2^(20 + bit_depth) <= 2^32,
617 // and B[k] is set to a value < 2^(8 + bit depth)
618 B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) *
619 (uint32_t)B[k] *
620 (uint32_t)one_by_x[n - 1],
621 SGRPROJ_RECIP_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700622 }
623 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700624 i = 0;
625 j = 0;
626 {
David Barkerce110cc2017-02-22 10:38:59 +0000627 const int k = i * buf_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700628 const int l = i * stride + j;
629 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800630 const int32_t a =
David Barkerce110cc2017-02-22 10:38:59 +0000631 3 * A[k] + 2 * A[k + 1] + 2 * A[k + buf_stride] + A[k + buf_stride + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800632 const int32_t b =
David Barkerce110cc2017-02-22 10:38:59 +0000633 3 * B[k] + 2 * B[k + 1] + 2 * B[k + buf_stride] + B[k + buf_stride + 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000634 const int32_t v = a * dgd[l] + b;
635 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700636 }
637 i = 0;
638 j = width - 1;
639 {
David Barkerce110cc2017-02-22 10:38:59 +0000640 const int k = i * buf_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700641 const int l = i * stride + j;
642 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800643 const int32_t a =
David Barkerce110cc2017-02-22 10:38:59 +0000644 3 * A[k] + 2 * A[k - 1] + 2 * A[k + buf_stride] + A[k + buf_stride - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800645 const int32_t b =
David Barkerce110cc2017-02-22 10:38:59 +0000646 3 * B[k] + 2 * B[k - 1] + 2 * B[k + buf_stride] + B[k + buf_stride - 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000647 const int32_t v = a * dgd[l] + b;
648 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700649 }
650 i = height - 1;
651 j = 0;
652 {
David Barkerce110cc2017-02-22 10:38:59 +0000653 const int k = i * buf_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700654 const int l = i * stride + j;
655 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800656 const int32_t a =
David Barkerce110cc2017-02-22 10:38:59 +0000657 3 * A[k] + 2 * A[k + 1] + 2 * A[k - buf_stride] + A[k - buf_stride + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800658 const int32_t b =
David Barkerce110cc2017-02-22 10:38:59 +0000659 3 * B[k] + 2 * B[k + 1] + 2 * B[k - buf_stride] + B[k - buf_stride + 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000660 const int32_t v = a * dgd[l] + b;
661 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700662 }
663 i = height - 1;
664 j = width - 1;
665 {
David Barkerce110cc2017-02-22 10:38:59 +0000666 const int k = i * buf_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700667 const int l = i * stride + j;
668 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800669 const int32_t a =
David Barkerce110cc2017-02-22 10:38:59 +0000670 3 * A[k] + 2 * A[k - 1] + 2 * A[k - buf_stride] + A[k - buf_stride - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800671 const int32_t b =
David Barkerce110cc2017-02-22 10:38:59 +0000672 3 * B[k] + 2 * B[k - 1] + 2 * B[k - buf_stride] + B[k - buf_stride - 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000673 const int32_t v = a * dgd[l] + b;
674 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700675 }
676 i = 0;
677 for (j = 1; j < width - 1; ++j) {
David Barkerce110cc2017-02-22 10:38:59 +0000678 const int k = i * buf_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700679 const int l = i * stride + j;
680 const int nb = 3;
David Barkerce110cc2017-02-22 10:38:59 +0000681 const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + buf_stride] +
682 A[k + buf_stride - 1] + A[k + buf_stride + 1];
683 const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + buf_stride] +
684 B[k + buf_stride - 1] + B[k + buf_stride + 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000685 const int32_t v = a * dgd[l] + b;
686 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700687 }
688 i = height - 1;
689 for (j = 1; j < width - 1; ++j) {
David Barkerce110cc2017-02-22 10:38:59 +0000690 const int k = i * buf_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700691 const int l = i * stride + j;
692 const int nb = 3;
David Barkerce110cc2017-02-22 10:38:59 +0000693 const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - buf_stride] +
694 A[k - buf_stride - 1] + A[k - buf_stride + 1];
695 const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - buf_stride] +
696 B[k - buf_stride - 1] + B[k - buf_stride + 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000697 const int32_t v = a * dgd[l] + b;
698 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700699 }
700 j = 0;
701 for (i = 1; i < height - 1; ++i) {
David Barkerce110cc2017-02-22 10:38:59 +0000702 const int k = i * buf_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700703 const int l = i * stride + j;
704 const int nb = 3;
David Barkerce110cc2017-02-22 10:38:59 +0000705 const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
706 A[k + 1] + A[k - buf_stride + 1] + A[k + buf_stride + 1];
707 const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
708 B[k + 1] + B[k - buf_stride + 1] + B[k + buf_stride + 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000709 const int32_t v = a * dgd[l] + b;
710 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700711 }
712 j = width - 1;
713 for (i = 1; i < height - 1; ++i) {
David Barkerce110cc2017-02-22 10:38:59 +0000714 const int k = i * buf_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700715 const int l = i * stride + j;
716 const int nb = 3;
David Barkerce110cc2017-02-22 10:38:59 +0000717 const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
718 A[k - 1] + A[k - buf_stride - 1] + A[k + buf_stride - 1];
719 const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
720 B[k - 1] + B[k - buf_stride - 1] + B[k + buf_stride - 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000721 const int32_t v = a * dgd[l] + b;
722 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700723 }
724 for (i = 1; i < height - 1; ++i) {
725 for (j = 1; j < width - 1; ++j) {
David Barkerce110cc2017-02-22 10:38:59 +0000726 const int k = i * buf_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700727 const int l = i * stride + j;
728 const int nb = 5;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800729 const int32_t a =
David Barkerce110cc2017-02-22 10:38:59 +0000730 (A[k] + A[k - 1] + A[k + 1] + A[k - buf_stride] + A[k + buf_stride]) *
731 4 +
732 (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
733 A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700734 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800735 const int32_t b =
David Barkerce110cc2017-02-22 10:38:59 +0000736 (B[k] + B[k - 1] + B[k + 1] + B[k - buf_stride] + B[k + buf_stride]) *
737 4 +
738 (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
739 B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700740 3;
David Barker7dcd7f52017-03-01 12:53:00 +0000741 const int32_t v = a * dgd[l] + b;
742 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700743 }
744 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700745}
746
David Barker506eb722017-03-08 13:35:49 +0000747void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height,
748 int stride, int32_t *dst, int dst_stride,
David Barker4d2af5d2017-03-09 11:46:50 +0000749 int r, int eps, int32_t *tmpbuf) {
David Barker506eb722017-03-08 13:35:49 +0000750 int i, j;
751 for (i = 0; i < height; ++i) {
752 for (j = 0; j < width; ++j) {
753 dst[i * dst_stride + j] = dgd[i * stride + j];
754 }
755 }
David Barker4d2af5d2017-03-09 11:46:50 +0000756 av1_selfguided_restoration_internal(dst, width, height, dst_stride, 8, r, eps,
757 tmpbuf);
David Barker506eb722017-03-08 13:35:49 +0000758}
759
Urvang Joshi0c459412017-04-21 18:10:09 +0000760void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride,
761 int32_t *dst, int dst_stride, int corner, int edge) {
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800762 int i, j;
David Barkereed824e2017-03-10 11:35:22 +0000763 const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800764
765 i = 0;
766 j = 0;
767 {
768 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000769 const int l = i * dst_stride + j;
770 dst[l] =
771 center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) +
772 corner * (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800773 }
774 i = 0;
775 j = width - 1;
776 {
777 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000778 const int l = i * dst_stride + j;
779 dst[l] =
780 center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) +
781 corner * (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800782 }
783 i = height - 1;
784 j = 0;
785 {
786 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000787 const int l = i * dst_stride + j;
788 dst[l] =
789 center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) +
790 corner * (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800791 }
792 i = height - 1;
793 j = width - 1;
794 {
795 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000796 const int l = i * dst_stride + j;
797 dst[l] =
798 center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) +
799 corner * (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800800 }
801 i = 0;
802 for (j = 1; j < width - 1; ++j) {
803 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000804 const int l = i * dst_stride + j;
805 dst[l] = center * dgd[k] +
806 edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) +
807 corner * (dgd[k + stride - 1] + dgd[k + stride + 1] + dgd[k - 1] +
808 dgd[k + 1]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800809 }
810 i = height - 1;
811 for (j = 1; j < width - 1; ++j) {
812 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000813 const int l = i * dst_stride + j;
814 dst[l] = center * dgd[k] +
815 edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) +
816 corner * (dgd[k - stride - 1] + dgd[k - stride + 1] + dgd[k - 1] +
817 dgd[k + 1]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800818 }
819 j = 0;
820 for (i = 1; i < height - 1; ++i) {
821 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000822 const int l = i * dst_stride + j;
823 dst[l] = center * dgd[k] +
824 edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) +
825 corner * (dgd[k + stride + 1] + dgd[k - stride + 1] +
826 dgd[k - stride] + dgd[k + stride]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800827 }
828 j = width - 1;
829 for (i = 1; i < height - 1; ++i) {
830 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000831 const int l = i * dst_stride + j;
832 dst[l] = center * dgd[k] +
833 edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) +
834 corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
835 dgd[k - stride] + dgd[k + stride]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800836 }
837 for (i = 1; i < height - 1; ++i) {
838 for (j = 1; j < width - 1; ++j) {
839 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000840 const int l = i * dst_stride + j;
841 dst[l] =
842 center * dgd[k] +
843 edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) +
844 corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
845 dgd[k - stride + 1] + dgd[k + stride + 1]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800846 }
847 }
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800848}
849
David Barkerce110cc2017-02-22 10:38:59 +0000850void apply_selfguided_restoration_c(uint8_t *dat, int width, int height,
David Barker4d2af5d2017-03-09 11:46:50 +0000851 int stride, int eps, int *xqd, uint8_t *dst,
852 int dst_stride, int32_t *tmpbuf) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700853 int xq[2];
David Barker3a0df182016-12-21 10:44:52 +0000854 int32_t *flt1 = tmpbuf;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800855 int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
David Barker3a0df182016-12-21 10:44:52 +0000856 int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700857 int i, j;
David Barker3a0df182016-12-21 10:44:52 +0000858 assert(width * height <= RESTORATION_TILEPELS_MAX);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800859#if USE_HIGHPASS_IN_SGRPROJ
860 av1_highpass_filter_c(dat, width, height, stride, flt1, width,
David Barkereed824e2017-03-10 11:35:22 +0000861 sgr_params[eps].corner, sgr_params[eps].edge);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800862#else
David Barker506eb722017-03-08 13:35:49 +0000863 av1_selfguided_restoration_c(dat, width, height, stride, flt1, width,
David Barker4d2af5d2017-03-09 11:46:50 +0000864 sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800865#endif // USE_HIGHPASS_IN_SGRPROJ
David Barker506eb722017-03-08 13:35:49 +0000866 av1_selfguided_restoration_c(dat, width, height, stride, flt2, width,
David Barker4d2af5d2017-03-09 11:46:50 +0000867 sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700868 decode_xq(xqd, xq);
869 for (i = 0; i < height; ++i) {
870 for (j = 0; j < width; ++j) {
871 const int k = i * width + j;
872 const int l = i * stride + j;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800873 const int m = i * dst_stride + j;
874 const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
875 const int32_t f1 = (int32_t)flt1[k] - u;
876 const int32_t f2 = (int32_t)flt2[k] - u;
David Barkerce110cc2017-02-22 10:38:59 +0000877 const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700878 const int16_t w =
879 (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800880 dst[m] = clip_pixel(w);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700881 }
882 }
883}
884
885static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width,
886 int height, int stride,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800887 RestorationInternal *rst, uint8_t *dst,
888 int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -0800889 const int tile_width = rst->tile_width;
890 const int tile_height = rst->tile_height;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700891 int h_start, h_end, v_start, v_end;
David Barker9666e752016-12-08 11:25:47 +0000892 uint8_t *data_p, *dst_p;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700893
Debargha Mukherjee994ccd72017-01-06 11:18:23 -0800894 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
David Barker9666e752016-12-08 11:25:47 +0000895 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
896 dst_stride);
897 return;
898 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700899 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
900 tile_width, tile_height, width, height, 0, 0,
901 &h_start, &h_end, &v_start, &v_end);
902 data_p = data + h_start + v_start * stride;
David Barker9666e752016-12-08 11:25:47 +0000903 dst_p = dst + h_start + v_start * dst_stride;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800904 apply_selfguided_restoration(data_p, h_end - h_start, v_end - v_start, stride,
David Barker4d2af5d2017-03-09 11:46:50 +0000905 rst->rsi->sgrproj_info[tile_idx].ep,
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800906 rst->rsi->sgrproj_info[tile_idx].xqd, dst_p,
David Barker3a0df182016-12-21 10:44:52 +0000907 dst_stride, rst->tmpbuf);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700908}
909
910static void loop_sgrproj_filter(uint8_t *data, int width, int height,
911 int stride, RestorationInternal *rst,
David Barker025b2542016-12-08 11:50:42 +0000912 uint8_t *dst, int dst_stride) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700913 int tile_idx;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700914 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800915 loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst,
916 dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700917 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700918}
919
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700920static void loop_switchable_filter(uint8_t *data, int width, int height,
921 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +0000922 uint8_t *dst, int dst_stride) {
David Barker025b2542016-12-08 11:50:42 +0000923 int tile_idx;
David Barker025b2542016-12-08 11:50:42 +0000924 extend_frame(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700925 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
David Barker9666e752016-12-08 11:25:47 +0000926 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
927 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
928 dst_stride);
929 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
David Barker025b2542016-12-08 11:50:42 +0000930 loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst,
931 dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700932 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) {
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800933 loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst,
934 dst_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700935 }
936 }
937}
938
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200939#if CONFIG_HIGHBITDEPTH
David Barker33f3bfd2017-01-06 15:34:50 +0000940void extend_frame_highbd(uint16_t *data, int width, int height, int stride) {
David Barker025b2542016-12-08 11:50:42 +0000941 uint16_t *data_p;
942 int i, j;
943 for (i = 0; i < height; ++i) {
944 data_p = data + i * stride;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800945 for (j = -WIENER_HALFWIN; j < 0; ++j) data_p[j] = data_p[0];
946 for (j = width; j < width + WIENER_HALFWIN; ++j)
David Barker025b2542016-12-08 11:50:42 +0000947 data_p[j] = data_p[width - 1];
948 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800949 data_p = data - WIENER_HALFWIN;
950 for (i = -WIENER_HALFWIN; i < 0; ++i) {
David Barker025b2542016-12-08 11:50:42 +0000951 memcpy(data_p + i * stride, data_p,
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800952 (width + 2 * WIENER_HALFWIN) * sizeof(uint16_t));
David Barker025b2542016-12-08 11:50:42 +0000953 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800954 for (i = height; i < height + WIENER_HALFWIN; ++i) {
David Barker025b2542016-12-08 11:50:42 +0000955 memcpy(data_p + i * stride, data_p + (height - 1) * stride,
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800956 (width + 2 * WIENER_HALFWIN) * sizeof(uint16_t));
David Barker025b2542016-12-08 11:50:42 +0000957 }
958}
959
David Barker9666e752016-12-08 11:25:47 +0000960static void loop_copy_tile_highbd(uint16_t *data, int tile_idx, int subtile_idx,
961 int subtile_bits, int width, int height,
962 int stride, RestorationInternal *rst,
963 uint16_t *dst, int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -0800964 const int tile_width = rst->tile_width;
965 const int tile_height = rst->tile_height;
David Barker9666e752016-12-08 11:25:47 +0000966 int i;
967 int h_start, h_end, v_start, v_end;
968 av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, rst->nhtiles,
969 rst->nvtiles, tile_width, tile_height, width, height,
970 0, 0, &h_start, &h_end, &v_start, &v_end);
971 for (i = v_start; i < v_end; ++i)
972 memcpy(dst + i * dst_stride + h_start, data + i * stride + h_start,
973 (h_end - h_start) * sizeof(*dst));
974}
975
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700976static void loop_wiener_filter_tile_highbd(uint16_t *data, int tile_idx,
977 int width, int height, int stride,
978 RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +0000979 int bit_depth, uint16_t *dst,
980 int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -0800981 const int tile_width = rst->tile_width;
982 const int tile_height = rst->tile_height;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700983 int h_start, h_end, v_start, v_end;
984 int i, j;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700985
Debargha Mukherjee994ccd72017-01-06 11:18:23 -0800986 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
David Barker9666e752016-12-08 11:25:47 +0000987 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
988 dst_stride);
989 return;
990 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700991 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
David Barker025b2542016-12-08 11:50:42 +0000992 tile_width, tile_height, width, height, 0, 0,
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700993 &h_start, &h_end, &v_start, &v_end);
David Barker025b2542016-12-08 11:50:42 +0000994 // Convolve the whole tile (done in blocks here to match the requirements
995 // of the vectorized convolve functions, but the result is equivalent)
996 for (i = v_start; i < v_end; i += MAX_SB_SIZE)
997 for (j = h_start; j < h_end; j += MAX_SB_SIZE) {
998 int w = AOMMIN(MAX_SB_SIZE, (h_end - j + 15) & ~15);
999 int h = AOMMIN(MAX_SB_SIZE, (v_end - i + 15) & ~15);
1000 const uint16_t *data_p = data + i * stride + j;
1001 uint16_t *dst_p = dst + i * dst_stride + j;
Debargha Mukherjee28d15c72017-05-12 10:44:03 -07001002#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
1003 aom_highbd_convolve8_add_src_hip(
1004 CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
1005 dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
1006 rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h, bit_depth);
1007#else
David Barker1e8e6b92017-01-13 13:45:51 +00001008 aom_highbd_convolve8_add_src(
1009 CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
1010 dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
1011 rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h, bit_depth);
Debargha Mukherjee28d15c72017-05-12 10:44:03 -07001012#endif // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001013 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001014}
1015
Yaowu Xuc27fc142016-08-22 16:08:15 -07001016static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
1017 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001018 int bit_depth, uint8_t *dst8,
1019 int dst_stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001020 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +00001021 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001022 int tile_idx;
David Barker025b2542016-12-08 11:50:42 +00001023 extend_frame_highbd(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001024 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001025 loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
David Barker025b2542016-12-08 11:50:42 +00001026 bit_depth, dst, dst_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001027 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001028}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001029
David Barker506eb722017-03-08 13:35:49 +00001030void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height,
1031 int stride, int32_t *dst,
1032 int dst_stride, int bit_depth, int r,
1033 int eps, int32_t *tmpbuf) {
1034 int i, j;
1035 for (i = 0; i < height; ++i) {
1036 for (j = 0; j < width; ++j) {
1037 dst[i * dst_stride + j] = dgd[i * stride + j];
1038 }
1039 }
1040 av1_selfguided_restoration_internal(dst, width, height, dst_stride, bit_depth,
1041 r, eps, tmpbuf);
1042}
1043
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001044void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height,
1045 int stride, int32_t *dst, int dst_stride,
David Barkereed824e2017-03-10 11:35:22 +00001046 int corner, int edge) {
Urvang Joshi0c459412017-04-21 18:10:09 +00001047 int i, j;
1048 const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge);
1049
1050 i = 0;
1051 j = 0;
1052 {
1053 const int k = i * stride + j;
1054 const int l = i * dst_stride + j;
1055 dst[l] =
1056 center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) +
1057 corner * (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]);
1058 }
1059 i = 0;
1060 j = width - 1;
1061 {
1062 const int k = i * stride + j;
1063 const int l = i * dst_stride + j;
1064 dst[l] =
1065 center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) +
1066 corner * (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]);
1067 }
1068 i = height - 1;
1069 j = 0;
1070 {
1071 const int k = i * stride + j;
1072 const int l = i * dst_stride + j;
1073 dst[l] =
1074 center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) +
1075 corner * (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]);
1076 }
1077 i = height - 1;
1078 j = width - 1;
1079 {
1080 const int k = i * stride + j;
1081 const int l = i * dst_stride + j;
1082 dst[l] =
1083 center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) +
1084 corner * (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]);
1085 }
1086 i = 0;
1087 for (j = 1; j < width - 1; ++j) {
1088 const int k = i * stride + j;
1089 const int l = i * dst_stride + j;
1090 dst[l] = center * dgd[k] +
1091 edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) +
1092 corner * (dgd[k + stride - 1] + dgd[k + stride + 1] + dgd[k - 1] +
1093 dgd[k + 1]);
1094 }
1095 i = height - 1;
1096 for (j = 1; j < width - 1; ++j) {
1097 const int k = i * stride + j;
1098 const int l = i * dst_stride + j;
1099 dst[l] = center * dgd[k] +
1100 edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) +
1101 corner * (dgd[k - stride - 1] + dgd[k - stride + 1] + dgd[k - 1] +
1102 dgd[k + 1]);
1103 }
1104 j = 0;
1105 for (i = 1; i < height - 1; ++i) {
1106 const int k = i * stride + j;
1107 const int l = i * dst_stride + j;
1108 dst[l] = center * dgd[k] +
1109 edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) +
1110 corner * (dgd[k + stride + 1] + dgd[k - stride + 1] +
1111 dgd[k - stride] + dgd[k + stride]);
1112 }
1113 j = width - 1;
1114 for (i = 1; i < height - 1; ++i) {
1115 const int k = i * stride + j;
1116 const int l = i * dst_stride + j;
1117 dst[l] = center * dgd[k] +
1118 edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) +
1119 corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
1120 dgd[k - stride] + dgd[k + stride]);
1121 }
1122 for (i = 1; i < height - 1; ++i) {
1123 for (j = 1; j < width - 1; ++j) {
1124 const int k = i * stride + j;
1125 const int l = i * dst_stride + j;
1126 dst[l] =
1127 center * dgd[k] +
1128 edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) +
1129 corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
1130 dgd[k - stride + 1] + dgd[k + stride + 1]);
1131 }
1132 }
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001133}
1134
David Barker506eb722017-03-08 13:35:49 +00001135void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height,
1136 int stride, int bit_depth, int eps,
1137 int *xqd, uint16_t *dst,
1138 int dst_stride, int32_t *tmpbuf) {
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001139 int xq[2];
David Barker3a0df182016-12-21 10:44:52 +00001140 int32_t *flt1 = tmpbuf;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001141 int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
David Barker3a0df182016-12-21 10:44:52 +00001142 int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001143 int i, j;
David Barker0b04e9b2017-01-18 15:29:20 +00001144 assert(width * height <= RESTORATION_TILEPELS_MAX);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001145#if USE_HIGHPASS_IN_SGRPROJ
1146 av1_highpass_filter_highbd_c(dat, width, height, stride, flt1, width,
David Barkereed824e2017-03-10 11:35:22 +00001147 sgr_params[eps].corner, sgr_params[eps].edge);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001148#else
David Barker506eb722017-03-08 13:35:49 +00001149 av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt1, width,
1150 bit_depth, sgr_params[eps].r1,
1151 sgr_params[eps].e1, tmpbuf2);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001152#endif // USE_HIGHPASS_IN_SGRPROJ
David Barker506eb722017-03-08 13:35:49 +00001153 av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt2, width,
1154 bit_depth, sgr_params[eps].r2,
1155 sgr_params[eps].e2, tmpbuf2);
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001156 decode_xq(xqd, xq);
1157 for (i = 0; i < height; ++i) {
1158 for (j = 0; j < width; ++j) {
1159 const int k = i * width + j;
1160 const int l = i * stride + j;
1161 const int m = i * dst_stride + j;
1162 const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
1163 const int32_t f1 = (int32_t)flt1[k] - u;
1164 const int32_t f2 = (int32_t)flt2[k] - u;
David Barkerce110cc2017-02-22 10:38:59 +00001165 const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001166 const int16_t w =
1167 (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
1168 dst[m] = (uint16_t)clip_pixel_highbd(w, bit_depth);
1169 }
1170 }
1171}
1172
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001173static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
1174 int width, int height, int stride,
1175 RestorationInternal *rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001176 int bit_depth, uint16_t *dst,
1177 int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001178 const int tile_width = rst->tile_width;
1179 const int tile_height = rst->tile_height;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001180 int h_start, h_end, v_start, v_end;
David Barker9666e752016-12-08 11:25:47 +00001181 uint16_t *data_p, *dst_p;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001182
Debargha Mukherjee994ccd72017-01-06 11:18:23 -08001183 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
David Barker9666e752016-12-08 11:25:47 +00001184 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
1185 dst_stride);
1186 return;
1187 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001188 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
1189 tile_width, tile_height, width, height, 0, 0,
1190 &h_start, &h_end, &v_start, &v_end);
1191 data_p = data + h_start + v_start * stride;
David Barker9666e752016-12-08 11:25:47 +00001192 dst_p = dst + h_start + v_start * dst_stride;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001193 apply_selfguided_restoration_highbd(
1194 data_p, h_end - h_start, v_end - v_start, stride, bit_depth,
1195 rst->rsi->sgrproj_info[tile_idx].ep, rst->rsi->sgrproj_info[tile_idx].xqd,
David Barker3a0df182016-12-21 10:44:52 +00001196 dst_p, dst_stride, rst->tmpbuf);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001197}
1198
1199static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
1200 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001201 int bit_depth, uint8_t *dst8,
1202 int dst_stride) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001203 int tile_idx;
1204 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +00001205 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001206 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
1207 loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001208 bit_depth, dst, dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001209 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001210}
1211
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001212static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
1213 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001214 int bit_depth, uint8_t *dst8,
1215 int dst_stride) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001216 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +00001217 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001218 int tile_idx;
David Barker025b2542016-12-08 11:50:42 +00001219 extend_frame_highbd(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001220 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
David Barker9666e752016-12-08 11:25:47 +00001221 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
1222 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst,
1223 dst, dst_stride);
1224 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001225 loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
David Barker025b2542016-12-08 11:50:42 +00001226 bit_depth, dst, dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001227 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) {
1228 loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001229 rst, bit_depth, dst, dst_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001230 }
1231 }
1232}
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001233#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001234
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001235static void loop_restoration_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
1236 int start_mi_row, int end_mi_row,
1237 int components_pattern, RestorationInfo *rsi,
1238 YV12_BUFFER_CONFIG *dst) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001239 const int ywidth = frame->y_crop_width;
Debargha Mukherjee2dd982e2017-06-05 13:55:12 -07001240 const int yheight = frame->y_crop_height;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001241 const int uvwidth = frame->uv_crop_width;
Debargha Mukherjee2dd982e2017-06-05 13:55:12 -07001242 const int uvheight = frame->uv_crop_height;
1243 const int ystride = frame->y_stride;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001244 const int uvstride = frame->uv_stride;
1245 const int ystart = start_mi_row << MI_SIZE_LOG2;
1246 const int uvstart = ystart >> cm->subsampling_y;
1247 int yend = end_mi_row << MI_SIZE_LOG2;
1248 int uvend = yend >> cm->subsampling_y;
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001249 restore_func_type restore_funcs[RESTORE_TYPES] = {
Debargha Mukherjee4bfd72e2017-03-08 22:20:31 -08001250 NULL, loop_wiener_filter, loop_sgrproj_filter, loop_switchable_filter
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001251 };
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001252#if CONFIG_HIGHBITDEPTH
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001253 restore_func_highbd_type restore_funcs_highbd[RESTORE_TYPES] = {
Debargha Mukherjee4bfd72e2017-03-08 22:20:31 -08001254 NULL, loop_wiener_filter_highbd, loop_sgrproj_filter_highbd,
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001255 loop_switchable_filter_highbd
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001256 };
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001257#endif // CONFIG_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001258 restore_func_type restore_func;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001259#if CONFIG_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001260 restore_func_highbd_type restore_func_highbd;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001261#endif // CONFIG_HIGHBITDEPTH
David Barker9666e752016-12-08 11:25:47 +00001262 YV12_BUFFER_CONFIG dst_;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001263
Fergus Simpson9cd57cf2017-06-12 17:02:03 -07001264 yend = AOMMIN(yend, yheight);
1265 uvend = AOMMIN(uvend, uvheight);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001266
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001267 if (components_pattern == (1 << AOM_PLANE_Y)) {
1268 // Only y
1269 if (rsi[0].frame_restoration_type == RESTORE_NONE) {
1270 if (dst) aom_yv12_copy_y(frame, dst);
1271 return;
David Barker9666e752016-12-08 11:25:47 +00001272 }
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001273 } else if (components_pattern == (1 << AOM_PLANE_U)) {
1274 // Only U
1275 if (rsi[1].frame_restoration_type == RESTORE_NONE) {
1276 if (dst) aom_yv12_copy_u(frame, dst);
1277 return;
1278 }
1279 } else if (components_pattern == (1 << AOM_PLANE_V)) {
1280 // Only V
1281 if (rsi[2].frame_restoration_type == RESTORE_NONE) {
1282 if (dst) aom_yv12_copy_v(frame, dst);
1283 return;
1284 }
1285 } else if (components_pattern ==
1286 ((1 << AOM_PLANE_Y) | (1 << AOM_PLANE_U) | (1 << AOM_PLANE_V))) {
1287 // All components
1288 if (rsi[0].frame_restoration_type == RESTORE_NONE &&
1289 rsi[1].frame_restoration_type == RESTORE_NONE &&
1290 rsi[2].frame_restoration_type == RESTORE_NONE) {
1291 if (dst) aom_yv12_copy_frame(frame, dst);
1292 return;
1293 }
David Barker9666e752016-12-08 11:25:47 +00001294 }
1295
David Barker9666e752016-12-08 11:25:47 +00001296 if (!dst) {
1297 dst = &dst_;
1298 memset(dst, 0, sizeof(YV12_BUFFER_CONFIG));
1299 if (aom_realloc_frame_buffer(
Debargha Mukherjee2dd982e2017-06-05 13:55:12 -07001300 dst, ywidth, yheight, cm->subsampling_x, cm->subsampling_y,
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001301#if CONFIG_HIGHBITDEPTH
David Barker9666e752016-12-08 11:25:47 +00001302 cm->use_highbitdepth,
1303#endif
1304 AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL) < 0)
1305 aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
1306 "Failed to allocate restoration dst buffer");
1307 }
Debargha Mukherjee818e42a2016-12-12 11:52:56 -08001308
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001309 if ((components_pattern >> AOM_PLANE_Y) & 1) {
1310 if (rsi[0].frame_restoration_type != RESTORE_NONE) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001311 cm->rst_internal.ntiles = av1_get_rest_ntiles(
Debargha Mukherjee2dd982e2017-06-05 13:55:12 -07001312 ywidth, yheight, cm->rst_info[AOM_PLANE_Y].restoration_tilesize,
Debargha Mukherjee1008c1e2017-03-06 19:18:43 -08001313 &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
1314 &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001315 cm->rst_internal.rsi = &rsi[0];
1316 restore_func =
1317 restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001318#if CONFIG_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001319 restore_func_highbd =
1320 restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
1321 if (cm->use_highbitdepth)
1322 restore_func_highbd(
1323 frame->y_buffer + ystart * ystride, ywidth, yend - ystart, ystride,
1324 &cm->rst_internal, cm->bit_depth,
1325 dst->y_buffer + ystart * dst->y_stride, dst->y_stride);
1326 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001327#endif // CONFIG_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001328 restore_func(frame->y_buffer + ystart * ystride, ywidth, yend - ystart,
1329 ystride, &cm->rst_internal,
1330 dst->y_buffer + ystart * dst->y_stride, dst->y_stride);
1331 } else {
1332 aom_yv12_copy_y(frame, dst);
1333 }
1334 }
1335
1336 if ((components_pattern >> AOM_PLANE_U) & 1) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001337 if (rsi[AOM_PLANE_U].frame_restoration_type != RESTORE_NONE) {
1338 cm->rst_internal.ntiles = av1_get_rest_ntiles(
Debargha Mukherjee2dd982e2017-06-05 13:55:12 -07001339 uvwidth, uvheight, cm->rst_info[AOM_PLANE_U].restoration_tilesize,
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001340 &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
1341 &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
1342 cm->rst_internal.rsi = &rsi[AOM_PLANE_U];
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001343 restore_func =
1344 restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001345#if CONFIG_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001346 restore_func_highbd =
1347 restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
1348 if (cm->use_highbitdepth)
1349 restore_func_highbd(
1350 frame->u_buffer + uvstart * uvstride, uvwidth, uvend - uvstart,
1351 uvstride, &cm->rst_internal, cm->bit_depth,
1352 dst->u_buffer + uvstart * dst->uv_stride, dst->uv_stride);
1353 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001354#endif // CONFIG_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001355 restore_func(frame->u_buffer + uvstart * uvstride, uvwidth,
1356 uvend - uvstart, uvstride, &cm->rst_internal,
1357 dst->u_buffer + uvstart * dst->uv_stride, dst->uv_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001358 } else {
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001359 aom_yv12_copy_u(frame, dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001360 }
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001361 }
1362
1363 if ((components_pattern >> AOM_PLANE_V) & 1) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001364 if (rsi[AOM_PLANE_V].frame_restoration_type != RESTORE_NONE) {
1365 cm->rst_internal.ntiles = av1_get_rest_ntiles(
Debargha Mukherjee2dd982e2017-06-05 13:55:12 -07001366 uvwidth, uvheight, cm->rst_info[AOM_PLANE_V].restoration_tilesize,
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001367 &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
1368 &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
1369 cm->rst_internal.rsi = &rsi[AOM_PLANE_V];
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001370 restore_func =
1371 restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001372#if CONFIG_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001373 restore_func_highbd =
1374 restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
1375 if (cm->use_highbitdepth)
1376 restore_func_highbd(
1377 frame->v_buffer + uvstart * uvstride, uvwidth, uvend - uvstart,
1378 uvstride, &cm->rst_internal, cm->bit_depth,
1379 dst->v_buffer + uvstart * dst->uv_stride, dst->uv_stride);
1380 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001381#endif // CONFIG_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001382 restore_func(frame->v_buffer + uvstart * uvstride, uvwidth,
1383 uvend - uvstart, uvstride, &cm->rst_internal,
1384 dst->v_buffer + uvstart * dst->uv_stride, dst->uv_stride);
1385 } else {
1386 aom_yv12_copy_v(frame, dst);
1387 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001388 }
David Barker9666e752016-12-08 11:25:47 +00001389
David Barker9666e752016-12-08 11:25:47 +00001390 if (dst == &dst_) {
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001391 if ((components_pattern >> AOM_PLANE_Y) & 1) aom_yv12_copy_y(dst, frame);
1392 if ((components_pattern >> AOM_PLANE_U) & 1) aom_yv12_copy_u(dst, frame);
1393 if ((components_pattern >> AOM_PLANE_V) & 1) aom_yv12_copy_v(dst, frame);
David Barker9666e752016-12-08 11:25:47 +00001394 aom_free_frame_buffer(dst);
1395 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001396}
1397
Yaowu Xuf883b422016-08-30 14:01:10 -07001398void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001399 RestorationInfo *rsi, int components_pattern,
David Barker9666e752016-12-08 11:25:47 +00001400 int partial_frame, YV12_BUFFER_CONFIG *dst) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001401 int start_mi_row, end_mi_row, mi_rows_to_filter;
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001402 start_mi_row = 0;
Fergus Simpson9cd57cf2017-06-12 17:02:03 -07001403#if CONFIG_FRAME_SUPERRES
1404 mi_rows_to_filter =
1405 ALIGN_POWER_OF_TWO(cm->superres_upscaled_height, 3) >> MI_SIZE_LOG2;
1406#else
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001407 mi_rows_to_filter = cm->mi_rows;
Fergus Simpson9cd57cf2017-06-12 17:02:03 -07001408#endif // CONFIG_FRAME_SUPERRES
1409 if (partial_frame && mi_rows_to_filter > 8) {
1410 start_mi_row = mi_rows_to_filter >> 1;
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001411 start_mi_row &= 0xfffffff8;
Fergus Simpson9cd57cf2017-06-12 17:02:03 -07001412 mi_rows_to_filter = AOMMAX(mi_rows_to_filter / 8, 8);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001413 }
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001414 end_mi_row = start_mi_row + mi_rows_to_filter;
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001415 loop_restoration_init(&cm->rst_internal, cm->frame_type == KEY_FRAME);
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001416 loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, components_pattern,
1417 rsi, dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001418}