blob: adf0c2cc5618ec9e98329d3f1097556a56af7108 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 *
Yaowu Xuc27fc142016-08-22 16:08:15 -070011 */
12
13#include <math.h>
14
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_config.h"
16#include "./aom_dsp_rtcd.h"
David Barker9666e752016-12-08 11:25:47 +000017#include "./aom_scale_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070018#include "av1/common/onyxc_int.h"
19#include "av1/common/restoration.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070020#include "aom_dsp/aom_dsp_common.h"
21#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070022#include "aom_ports/mem.h"
23
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080024#if USE_DOMAINTXFMRF
Debargha Mukherjee3981be92016-11-21 09:35:44 -080025static int domaintxfmrf_vtable[DOMAINTXFMRF_ITERS][DOMAINTXFMRF_PARAMS][256];
26
27static const int domaintxfmrf_params[DOMAINTXFMRF_PARAMS] = {
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -080028 32, 40, 48, 56, 64, 68, 72, 76, 80, 82, 84, 86, 88,
Debargha Mukherjee3981be92016-11-21 09:35:44 -080029 90, 92, 94, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
30 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
31 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130, 132, 134,
32 136, 138, 140, 142, 146, 150, 154, 158, 162, 166, 170, 174
33};
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080034#endif // USE_DOMAINTXFMRF
Debargha Mukherjee3981be92016-11-21 09:35:44 -080035
Debargha Mukherjee8f209a82016-10-12 10:47:01 -070036const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080037// r1, eps1, r2, eps2
38#if SGRPROJ_PARAMS_BITS == 3
Debargha Mukherjee8a709192017-01-10 11:29:31 -080039 { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 }, { 2, 55, 1, 14 },
40 { 2, 65, 1, 15 }, { 3, 50, 2, 25 }, { 3, 60, 2, 35 }, { 3, 70, 2, 45 },
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080041#elif SGRPROJ_PARAMS_BITS == 4
42 { 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 },
43 { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
44 { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 3, 30, 1, 10 },
45 { 3, 50, 1, 12 }, { 3, 50, 2, 25 }, { 3, 60, 2, 35 }, { 3, 70, 2, 45 },
46#endif // SGRPROJ_PARAMS_BITS == 3
Debargha Mukherjee8f209a82016-10-12 10:47:01 -070047};
48
Yaowu Xuc27fc142016-08-22 16:08:15 -070049typedef void (*restore_func_type)(uint8_t *data8, int width, int height,
50 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +000051 uint8_t *dst8, int dst_stride);
Yaowu Xuf883b422016-08-30 14:01:10 -070052#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070053typedef void (*restore_func_highbd_type)(uint8_t *data8, int width, int height,
54 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +000055 int bit_depth, uint8_t *dst8,
56 int dst_stride);
Yaowu Xuf883b422016-08-30 14:01:10 -070057#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070058
David Barkerbefcc422017-01-31 09:42:10 +000059int av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rst_info,
60 int width, int height) {
Debargha Mukherjee874d36d2016-12-14 16:53:17 -080061 const int ntiles = av1_get_rest_ntiles(width, height, NULL, NULL, NULL, NULL);
62 rst_info->restoration_type = (RestorationType *)aom_realloc(
63 rst_info->restoration_type, sizeof(*rst_info->restoration_type) * ntiles);
David Barkerbefcc422017-01-31 09:42:10 +000064 aom_free(rst_info->wiener_info);
65 CHECK_MEM_ERROR(
66 cm, rst_info->wiener_info,
67 (WienerInfo *)aom_memalign(16, sizeof(*rst_info->wiener_info) * ntiles));
David Barker1e8e6b92017-01-13 13:45:51 +000068 memset(rst_info->wiener_info, 0, sizeof(*rst_info->wiener_info) * ntiles);
David Barkerbefcc422017-01-31 09:42:10 +000069 CHECK_MEM_ERROR(
70 cm, rst_info->sgrproj_info,
71 (SgrprojInfo *)aom_realloc(rst_info->sgrproj_info,
72 sizeof(*rst_info->sgrproj_info) * ntiles));
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080073#if USE_DOMAINTXFMRF
Debargha Mukherjee874d36d2016-12-14 16:53:17 -080074 rst_info->domaintxfmrf_info = (DomaintxfmrfInfo *)aom_realloc(
75 rst_info->domaintxfmrf_info,
76 sizeof(*rst_info->domaintxfmrf_info) * ntiles);
David Barkerbefcc422017-01-31 09:42:10 +000077 CHECK_MEM_ERROR(cm, rst_info->domaintxfmrf_info,
78 (DomaintxfmrfInfo *)aom_realloc(
79 rst_info->domaintxfmrf_info,
80 sizeof(*rst_info->domaintxfmrf_info) * ntiles));
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080081#endif // USE_DOMAINTXFMRF
Debargha Mukherjee874d36d2016-12-14 16:53:17 -080082 return ntiles;
83}
84
85void av1_free_restoration_struct(RestorationInfo *rst_info) {
86 aom_free(rst_info->restoration_type);
87 rst_info->restoration_type = NULL;
88 aom_free(rst_info->wiener_info);
89 rst_info->wiener_info = NULL;
90 aom_free(rst_info->sgrproj_info);
91 rst_info->sgrproj_info = NULL;
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080092#if USE_DOMAINTXFMRF
Debargha Mukherjee874d36d2016-12-14 16:53:17 -080093 aom_free(rst_info->domaintxfmrf_info);
94 rst_info->domaintxfmrf_info = NULL;
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080095#endif // USE_DOMAINTXFMRF
Debargha Mukherjee874d36d2016-12-14 16:53:17 -080096}
97
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080098#if USE_DOMAINTXFMRF
Debargha Mukherjee3981be92016-11-21 09:35:44 -080099static void GenDomainTxfmRFVtable() {
100 int i, j;
101 const double sigma_s = sqrt(2.0);
102 for (i = 0; i < DOMAINTXFMRF_ITERS; ++i) {
103 const int nm = (1 << (DOMAINTXFMRF_ITERS - i - 1));
104 const double A = exp(-DOMAINTXFMRF_MULT / (sigma_s * nm));
105 for (j = 0; j < DOMAINTXFMRF_PARAMS; ++j) {
106 const double sigma_r =
107 (double)domaintxfmrf_params[j] / DOMAINTXFMRF_SIGMA_SCALE;
108 const double scale = sigma_s / sigma_r;
109 int k;
110 for (k = 0; k < 256; ++k) {
111 domaintxfmrf_vtable[i][j][k] =
112 RINT(DOMAINTXFMRF_VTABLE_PREC * pow(A, 1.0 + k * scale));
113 }
114 }
115 }
116}
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -0800117#endif // USE_DOMAINTXFMRF
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800118
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800119#define APPROXIMATE_SGR 1
120
121#if APPROXIMATE_SGR
122#define MAX_RADIUS 3 // Only 1, 2, 3 allowed
123#define MAX_EPS 80 // Max value of eps
124#define MAX_NELEM ((2 * MAX_RADIUS + 1) * (2 * MAX_RADIUS + 1))
David Barker9198d132017-02-17 14:27:05 +0000125#define SGRPROJ_MTABLE_BITS 20
126#define SGRPROJ_RECIP_BITS 12
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800127
128// TODO(debargha): This table can be substantially reduced since only a few
129// values are actually used.
130static int sgrproj_mtable[MAX_EPS][MAX_NELEM];
131
132static void GenSgrprojVtable() {
133 int e, n;
134 for (e = 1; e <= MAX_EPS; ++e)
135 for (n = 1; n <= MAX_NELEM; ++n) {
136 const int n2e = n * n * e;
137 sgrproj_mtable[e - 1][n - 1] =
138 (((1 << SGRPROJ_MTABLE_BITS) + n2e / 2) / n2e);
139 }
140}
141#endif // APPROXIMATE_SGR
142
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -0800143void av1_loop_restoration_precal() {
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800144#if APPROXIMATE_SGR
145 GenSgrprojVtable();
146#endif // APPROXIMATE_SGR
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -0800147#if USE_DOMAINTXFMRF
148 GenDomainTxfmRFVtable();
149#endif // USE_DOMAINTXFMRF
150}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700151
Debargha Mukherjeed7489142017-01-05 13:58:16 -0800152static void loop_restoration_init(RestorationInternal *rst, int kf) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700153 rst->keyframe = kf;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700154}
155
David Barker33f3bfd2017-01-06 15:34:50 +0000156void extend_frame(uint8_t *data, int width, int height, int stride) {
David Barker025b2542016-12-08 11:50:42 +0000157 uint8_t *data_p;
158 int i;
159 for (i = 0; i < height; ++i) {
160 data_p = data + i * stride;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800161 memset(data_p - WIENER_HALFWIN, data_p[0], WIENER_HALFWIN);
162 memset(data_p + width, data_p[width - 1], WIENER_HALFWIN);
David Barker025b2542016-12-08 11:50:42 +0000163 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800164 data_p = data - WIENER_HALFWIN;
165 for (i = -WIENER_HALFWIN; i < 0; ++i) {
166 memcpy(data_p + i * stride, data_p, width + 2 * WIENER_HALFWIN);
David Barker025b2542016-12-08 11:50:42 +0000167 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800168 for (i = height; i < height + WIENER_HALFWIN; ++i) {
David Barker025b2542016-12-08 11:50:42 +0000169 memcpy(data_p + i * stride, data_p + (height - 1) * stride,
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800170 width + 2 * WIENER_HALFWIN);
David Barker025b2542016-12-08 11:50:42 +0000171 }
172}
173
David Barker9666e752016-12-08 11:25:47 +0000174static void loop_copy_tile(uint8_t *data, int tile_idx, int subtile_idx,
175 int subtile_bits, int width, int height, int stride,
176 RestorationInternal *rst, uint8_t *dst,
177 int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -0800178 const int tile_width = rst->tile_width;
179 const int tile_height = rst->tile_height;
David Barker9666e752016-12-08 11:25:47 +0000180 int i;
181 int h_start, h_end, v_start, v_end;
182 av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, rst->nhtiles,
183 rst->nvtiles, tile_width, tile_height, width, height,
184 0, 0, &h_start, &h_end, &v_start, &v_end);
185 for (i = v_start; i < v_end; ++i)
186 memcpy(dst + i * dst_stride + h_start, data + i * stride + h_start,
187 h_end - h_start);
188}
189
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700190static void loop_wiener_filter_tile(uint8_t *data, int tile_idx, int width,
191 int height, int stride,
David Barker025b2542016-12-08 11:50:42 +0000192 RestorationInternal *rst, uint8_t *dst,
David Barker9666e752016-12-08 11:25:47 +0000193 int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -0800194 const int tile_width = rst->tile_width;
195 const int tile_height = rst->tile_height;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700196 int i, j;
197 int h_start, h_end, v_start, v_end;
Debargha Mukherjee994ccd72017-01-06 11:18:23 -0800198 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
David Barker9666e752016-12-08 11:25:47 +0000199 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
200 dst_stride);
201 return;
202 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700203 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
David Barker025b2542016-12-08 11:50:42 +0000204 tile_width, tile_height, width, height, 0, 0,
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700205 &h_start, &h_end, &v_start, &v_end);
David Barker025b2542016-12-08 11:50:42 +0000206 // Convolve the whole tile (done in blocks here to match the requirements
207 // of the vectorized convolve functions, but the result is equivalent)
208 for (i = v_start; i < v_end; i += MAX_SB_SIZE)
209 for (j = h_start; j < h_end; j += MAX_SB_SIZE) {
210 int w = AOMMIN(MAX_SB_SIZE, (h_end - j + 15) & ~15);
211 int h = AOMMIN(MAX_SB_SIZE, (v_end - i + 15) & ~15);
212 const uint8_t *data_p = data + i * stride + j;
213 uint8_t *dst_p = dst + i * dst_stride + j;
David Barker1e8e6b92017-01-13 13:45:51 +0000214 aom_convolve8_add_src(data_p, stride, dst_p, dst_stride,
215 rst->rsi->wiener_info[tile_idx].hfilter, 16,
216 rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700217 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700218}
219
Yaowu Xuc27fc142016-08-22 16:08:15 -0700220static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
David Barker025b2542016-12-08 11:50:42 +0000221 RestorationInternal *rst, uint8_t *dst,
222 int dst_stride) {
223 int tile_idx;
224 extend_frame(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700225 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
David Barker025b2542016-12-08 11:50:42 +0000226 loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst,
227 dst_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700228 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700229}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700230
David Barker6928a5d2017-01-05 11:29:22 +0000231/* Calculate windowed sums (if sqr=0) or sums of squares (if sqr=1)
232 over the input. The window is of size (2r + 1)x(2r + 1), and we
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800233 specialize to r = 1, 2, 3. A default function is used for r > 3.
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700234
David Barker6928a5d2017-01-05 11:29:22 +0000235 Each loop follows the same format: We keep a window's worth of input
236 in individual variables and select data out of that as appropriate.
237*/
238static void boxsum1(int32_t *src, int width, int height, int src_stride,
239 int sqr, int32_t *dst, int dst_stride) {
240 int i, j, a, b, c;
241
242 // Vertical sum over 3-pixel regions, from src into dst.
243 if (!sqr) {
244 for (j = 0; j < width; ++j) {
245 a = src[j];
246 b = src[src_stride + j];
247 c = src[2 * src_stride + j];
248
249 dst[j] = a + b;
250 for (i = 1; i < height - 2; ++i) {
251 // Loop invariant: At the start of each iteration,
252 // a = src[(i - 1) * src_stride + j]
253 // b = src[(i ) * src_stride + j]
254 // c = src[(i + 1) * src_stride + j]
255 dst[i * dst_stride + j] = a + b + c;
256 a = b;
257 b = c;
258 c = src[(i + 2) * src_stride + j];
259 }
260 dst[i * dst_stride + j] = a + b + c;
261 dst[(i + 1) * dst_stride + j] = b + c;
262 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700263 } else {
David Barker6928a5d2017-01-05 11:29:22 +0000264 for (j = 0; j < width; ++j) {
265 a = src[j] * src[j];
266 b = src[src_stride + j] * src[src_stride + j];
267 c = src[2 * src_stride + j] * src[2 * src_stride + j];
268
269 dst[j] = a + b;
270 for (i = 1; i < height - 2; ++i) {
271 dst[i * dst_stride + j] = a + b + c;
272 a = b;
273 b = c;
274 c = src[(i + 2) * src_stride + j] * src[(i + 2) * src_stride + j];
275 }
276 dst[i * dst_stride + j] = a + b + c;
277 dst[(i + 1) * dst_stride + j] = b + c;
278 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700279 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700280
David Barker6928a5d2017-01-05 11:29:22 +0000281 // Horizontal sum over 3-pixel regions of dst
282 for (i = 0; i < height; ++i) {
283 a = dst[i * dst_stride];
284 b = dst[i * dst_stride + 1];
285 c = dst[i * dst_stride + 2];
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700286
David Barker6928a5d2017-01-05 11:29:22 +0000287 dst[i * dst_stride] = a + b;
288 for (j = 1; j < width - 2; ++j) {
289 // Loop invariant: At the start of each iteration,
290 // a = src[i * src_stride + (j - 1)]
291 // b = src[i * src_stride + (j )]
292 // c = src[i * src_stride + (j + 1)]
293 dst[i * dst_stride + j] = a + b + c;
294 a = b;
295 b = c;
296 c = dst[i * dst_stride + (j + 2)];
297 }
298 dst[i * dst_stride + j] = a + b + c;
299 dst[i * dst_stride + (j + 1)] = b + c;
300 }
301}
302
303static void boxsum2(int32_t *src, int width, int height, int src_stride,
304 int sqr, int32_t *dst, int dst_stride) {
305 int i, j, a, b, c, d, e;
306
307 // Vertical sum over 5-pixel regions, from src into dst.
308 if (!sqr) {
309 for (j = 0; j < width; ++j) {
310 a = src[j];
311 b = src[src_stride + j];
312 c = src[2 * src_stride + j];
313 d = src[3 * src_stride + j];
314 e = src[4 * src_stride + j];
315
316 dst[j] = a + b + c;
317 dst[dst_stride + j] = a + b + c + d;
318 for (i = 2; i < height - 3; ++i) {
319 // Loop invariant: At the start of each iteration,
320 // a = src[(i - 2) * src_stride + j]
321 // b = src[(i - 1) * src_stride + j]
322 // c = src[(i ) * src_stride + j]
323 // d = src[(i + 1) * src_stride + j]
324 // e = src[(i + 2) * src_stride + j]
325 dst[i * dst_stride + j] = a + b + c + d + e;
326 a = b;
327 b = c;
328 c = d;
329 d = e;
330 e = src[(i + 3) * src_stride + j];
331 }
332 dst[i * dst_stride + j] = a + b + c + d + e;
333 dst[(i + 1) * dst_stride + j] = b + c + d + e;
334 dst[(i + 2) * dst_stride + j] = c + d + e;
335 }
336 } else {
337 for (j = 0; j < width; ++j) {
338 a = src[j] * src[j];
339 b = src[src_stride + j] * src[src_stride + j];
340 c = src[2 * src_stride + j] * src[2 * src_stride + j];
341 d = src[3 * src_stride + j] * src[3 * src_stride + j];
342 e = src[4 * src_stride + j] * src[4 * src_stride + j];
343
344 dst[j] = a + b + c;
345 dst[dst_stride + j] = a + b + c + d;
346 for (i = 2; i < height - 3; ++i) {
347 dst[i * dst_stride + j] = a + b + c + d + e;
348 a = b;
349 b = c;
350 c = d;
351 d = e;
352 e = src[(i + 3) * src_stride + j] * src[(i + 3) * src_stride + j];
353 }
354 dst[i * dst_stride + j] = a + b + c + d + e;
355 dst[(i + 1) * dst_stride + j] = b + c + d + e;
356 dst[(i + 2) * dst_stride + j] = c + d + e;
357 }
358 }
359
360 // Horizontal sum over 5-pixel regions of dst
361 for (i = 0; i < height; ++i) {
362 a = dst[i * dst_stride];
363 b = dst[i * dst_stride + 1];
364 c = dst[i * dst_stride + 2];
365 d = dst[i * dst_stride + 3];
366 e = dst[i * dst_stride + 4];
367
368 dst[i * dst_stride] = a + b + c;
369 dst[i * dst_stride + 1] = a + b + c + d;
370 for (j = 2; j < width - 3; ++j) {
371 // Loop invariant: At the start of each iteration,
372 // a = src[i * src_stride + (j - 2)]
373 // b = src[i * src_stride + (j - 1)]
374 // c = src[i * src_stride + (j )]
375 // d = src[i * src_stride + (j + 1)]
376 // e = src[i * src_stride + (j + 2)]
377 dst[i * dst_stride + j] = a + b + c + d + e;
378 a = b;
379 b = c;
380 c = d;
381 d = e;
382 e = dst[i * dst_stride + (j + 3)];
383 }
384 dst[i * dst_stride + j] = a + b + c + d + e;
385 dst[i * dst_stride + (j + 1)] = b + c + d + e;
386 dst[i * dst_stride + (j + 2)] = c + d + e;
387 }
388}
389
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800390static void boxsum3(int32_t *src, int width, int height, int src_stride,
391 int sqr, int32_t *dst, int dst_stride) {
392 int i, j, a, b, c, d, e, f, g;
393
394 // Vertical sum over 7-pixel regions, from src into dst.
395 if (!sqr) {
396 for (j = 0; j < width; ++j) {
397 a = src[j];
398 b = src[1 * src_stride + j];
399 c = src[2 * src_stride + j];
400 d = src[3 * src_stride + j];
401 e = src[4 * src_stride + j];
402 f = src[5 * src_stride + j];
403 g = src[6 * src_stride + j];
404
405 dst[j] = a + b + c + d;
406 dst[dst_stride + j] = a + b + c + d + e;
407 dst[2 * dst_stride + j] = a + b + c + d + e + f;
408 for (i = 3; i < height - 4; ++i) {
409 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
410 a = b;
411 b = c;
412 c = d;
413 d = e;
414 e = f;
415 f = g;
416 g = src[(i + 4) * src_stride + j];
417 }
418 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
419 dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g;
420 dst[(i + 2) * dst_stride + j] = c + d + e + f + g;
421 dst[(i + 3) * dst_stride + j] = d + e + f + g;
422 }
423 } else {
424 for (j = 0; j < width; ++j) {
425 a = src[j] * src[j];
426 b = src[1 * src_stride + j] * src[1 * src_stride + j];
427 c = src[2 * src_stride + j] * src[2 * src_stride + j];
428 d = src[3 * src_stride + j] * src[3 * src_stride + j];
429 e = src[4 * src_stride + j] * src[4 * src_stride + j];
430 f = src[5 * src_stride + j] * src[5 * src_stride + j];
431 g = src[6 * src_stride + j] * src[6 * src_stride + j];
432
433 dst[j] = a + b + c + d;
434 dst[dst_stride + j] = a + b + c + d + e;
435 dst[2 * dst_stride + j] = a + b + c + d + e + f;
436 for (i = 3; i < height - 4; ++i) {
437 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
438 a = b;
439 b = c;
440 c = d;
441 d = e;
442 e = f;
443 f = g;
444 g = src[(i + 4) * src_stride + j] * src[(i + 4) * src_stride + j];
445 }
446 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
447 dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g;
448 dst[(i + 2) * dst_stride + j] = c + d + e + f + g;
449 dst[(i + 3) * dst_stride + j] = d + e + f + g;
450 }
451 }
452
453 // Horizontal sum over 7-pixel regions of dst
454 for (i = 0; i < height; ++i) {
455 a = dst[i * dst_stride];
456 b = dst[i * dst_stride + 1];
457 c = dst[i * dst_stride + 2];
458 d = dst[i * dst_stride + 3];
459 e = dst[i * dst_stride + 4];
460 f = dst[i * dst_stride + 5];
461 g = dst[i * dst_stride + 6];
462
463 dst[i * dst_stride] = a + b + c + d;
464 dst[i * dst_stride + 1] = a + b + c + d + e;
465 dst[i * dst_stride + 2] = a + b + c + d + e + f;
466 for (j = 3; j < width - 4; ++j) {
467 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
468 a = b;
469 b = c;
470 c = d;
471 d = e;
472 e = f;
473 f = g;
474 g = dst[i * dst_stride + (j + 4)];
475 }
476 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
477 dst[i * dst_stride + (j + 1)] = b + c + d + e + f + g;
478 dst[i * dst_stride + (j + 2)] = c + d + e + f + g;
479 dst[i * dst_stride + (j + 3)] = d + e + f + g;
480 }
481}
482
483// Generic version for any r. To be removed after experiments are done.
484static void boxsumr(int32_t *src, int width, int height, int src_stride, int r,
485 int sqr, int32_t *dst, int dst_stride) {
486 int32_t *tmp = aom_malloc(width * height * sizeof(*tmp));
487 int tmp_stride = width;
488 int i, j;
489 if (sqr) {
490 for (j = 0; j < width; ++j) tmp[j] = src[j] * src[j];
491 for (j = 0; j < width; ++j)
492 for (i = 1; i < height; ++i)
493 tmp[i * tmp_stride + j] =
494 tmp[(i - 1) * tmp_stride + j] +
495 src[i * src_stride + j] * src[i * src_stride + j];
496 } else {
497 memcpy(tmp, src, sizeof(*tmp) * width);
498 for (j = 0; j < width; ++j)
499 for (i = 1; i < height; ++i)
500 tmp[i * tmp_stride + j] =
501 tmp[(i - 1) * tmp_stride + j] + src[i * src_stride + j];
502 }
503 for (i = 0; i <= r; ++i)
504 memcpy(&dst[i * dst_stride], &tmp[(i + r) * tmp_stride],
505 sizeof(*tmp) * width);
506 for (i = r + 1; i < height - r; ++i)
507 for (j = 0; j < width; ++j)
508 dst[i * dst_stride + j] =
509 tmp[(i + r) * tmp_stride + j] - tmp[(i - r - 1) * tmp_stride + j];
510 for (i = height - r; i < height; ++i)
511 for (j = 0; j < width; ++j)
512 dst[i * dst_stride + j] = tmp[(height - 1) * tmp_stride + j] -
513 tmp[(i - r - 1) * tmp_stride + j];
514
515 for (i = 0; i < height; ++i) tmp[i * tmp_stride] = dst[i * dst_stride];
516 for (i = 0; i < height; ++i)
517 for (j = 1; j < width; ++j)
518 tmp[i * tmp_stride + j] =
519 tmp[i * tmp_stride + j - 1] + dst[i * src_stride + j];
520
521 for (j = 0; j <= r; ++j)
522 for (i = 0; i < height; ++i)
523 dst[i * dst_stride + j] = tmp[i * tmp_stride + j + r];
524 for (j = r + 1; j < width - r; ++j)
525 for (i = 0; i < height; ++i)
526 dst[i * dst_stride + j] =
527 tmp[i * tmp_stride + j + r] - tmp[i * tmp_stride + j - r - 1];
528 for (j = width - r; j < width; ++j)
529 for (i = 0; i < height; ++i)
530 dst[i * dst_stride + j] =
531 tmp[i * tmp_stride + width - 1] - tmp[i * tmp_stride + j - r - 1];
532 aom_free(tmp);
533}
534
David Barker6928a5d2017-01-05 11:29:22 +0000535static void boxsum(int32_t *src, int width, int height, int src_stride, int r,
536 int sqr, int32_t *dst, int dst_stride) {
537 if (r == 1)
538 boxsum1(src, width, height, src_stride, sqr, dst, dst_stride);
539 else if (r == 2)
540 boxsum2(src, width, height, src_stride, sqr, dst, dst_stride);
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800541 else if (r == 3)
542 boxsum3(src, width, height, src_stride, sqr, dst, dst_stride);
543 else
544 boxsumr(src, width, height, src_stride, r, sqr, dst, dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700545}
546
547static void boxnum(int width, int height, int r, int8_t *num, int num_stride) {
548 int i, j;
David Barker6928a5d2017-01-05 11:29:22 +0000549 for (i = 0; i <= r; ++i) {
550 for (j = 0; j <= r; ++j) {
551 num[i * num_stride + j] = (r + 1 + i) * (r + 1 + j);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700552 num[i * num_stride + (width - 1 - j)] = num[i * num_stride + j];
553 num[(height - 1 - i) * num_stride + j] = num[i * num_stride + j];
554 num[(height - 1 - i) * num_stride + (width - 1 - j)] =
555 num[i * num_stride + j];
556 }
557 }
David Barker6928a5d2017-01-05 11:29:22 +0000558 for (j = 0; j <= r; ++j) {
559 const int val = (2 * r + 1) * (r + 1 + j);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700560 for (i = r + 1; i < height - r; ++i) {
561 num[i * num_stride + j] = val;
562 num[i * num_stride + (width - 1 - j)] = val;
563 }
564 }
David Barker6928a5d2017-01-05 11:29:22 +0000565 for (i = 0; i <= r; ++i) {
566 const int val = (2 * r + 1) * (r + 1 + i);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700567 for (j = r + 1; j < width - r; ++j) {
568 num[i * num_stride + j] = val;
569 num[(height - 1 - i) * num_stride + j] = val;
570 }
571 }
572 for (i = r + 1; i < height - r; ++i) {
573 for (j = r + 1; j < width - r; ++j) {
David Barker6928a5d2017-01-05 11:29:22 +0000574 num[i * num_stride + j] = (2 * r + 1) * (2 * r + 1);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700575 }
576 }
577}
578
579void decode_xq(int *xqd, int *xq) {
580 xq[0] = -xqd[0];
581 xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
582}
583
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800584#if APPROXIMATE_SGR
585static const uint16_t x_by_xplus1[256] = {
586 0, 128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239,
587 240, 241, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 247, 247,
588 248, 248, 248, 248, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250,
589 250, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 252, 252, 252, 252,
590 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253,
591 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253,
592 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 254, 254, 254,
593 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
594 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
595 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
596 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
597 254, 254, 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
598 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
599 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
600 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
601 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
602 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
603 256,
604};
605
606static const uint16_t one_by_x[MAX_NELEM] = {
David Barker9198d132017-02-17 14:27:05 +0000607 4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315,
608 293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164, 158,
609 152, 146, 141, 137, 132, 128, 124, 120, 117, 114, 111, 108, 105,
610 102, 100, 98, 95, 93, 91, 89, 87, 85, 84
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800611};
612#endif // APPROXIMATE_SGR
613
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800614void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride,
David Barker3a0df182016-12-21 10:44:52 +0000615 int bit_depth, int r, int eps,
616 int32_t *tmpbuf) {
617 int32_t *A = tmpbuf;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800618 int32_t *B = A + RESTORATION_TILEPELS_MAX;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700619 int8_t num[RESTORATION_TILEPELS_MAX];
620 int i, j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700621
David Barker6928a5d2017-01-05 11:29:22 +0000622 // Don't filter tiles with dimensions < 5 on any axis
623 if ((width < 5) || (height < 5)) return;
624
625 boxsum(dgd, width, height, stride, r, 0, B, width);
626 boxsum(dgd, width, height, stride, r, 1, A, width);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700627 boxnum(width, height, r, num, width);
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800628 assert(r <= 3);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700629 for (i = 0; i < height; ++i) {
630 for (j = 0; j < width; ++j) {
631 const int k = i * width + j;
632 const int n = num[k];
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800633#if APPROXIMATE_SGR
David Barker9198d132017-02-17 14:27:05 +0000634 // a < 2^16 * n < 2^22 regardless of bit depth
635 uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8));
636 // b < 2^8 * n < 2^14 regardless of bit depth
637 uint32_t b = ROUND_POWER_OF_TWO(B[k], bit_depth - 8);
638
639 // Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28,
640 // and p itself satisfies p < 2^14 * n^2 < 2^26.
641 // Note: Sometimes, in high bit depth, we can end up with a*n < b*b.
642 // This is an artefact of rounding, and can only happen if all pixels
643 // are (almost) identical, so in this case we saturate to p=0.
644 uint32_t p = (a * n < b * b) ? 0 : a * n - b * b;
645 uint32_t s = sgrproj_mtable[eps - 1][n - 1];
646
647 // p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32
648 // as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12
649 // (this holds even after accounting for the rounding in s)
650 const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS);
651
652 A[k] = x_by_xplus1[AOMMIN(z, 255)]; // < 2^8
653
654 // SGRPROJ_SGR - A[k] < 2^8, B[k] < 2^(bit_depth) * n,
655 // one_by_x[n - 1] = round(2^12 / n)
656 // => the product here is < 2^(20 + bit_depth) <= 2^32,
657 // and B[k] is set to a value < 2^(8 + bit depth)
658 B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) *
659 (uint32_t)B[k] *
660 (uint32_t)one_by_x[n - 1],
661 SGRPROJ_RECIP_BITS);
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800662#else
David Barker9198d132017-02-17 14:27:05 +0000663 const uint32_t p = (uint32_t)((uint64_t)A[k] * n - (uint64_t)B[k] * B[k]);
David Barker6928a5d2017-01-05 11:29:22 +0000664 const uint32_t q = (uint32_t)(p + n * n * eps);
665 assert((uint64_t)A[k] * n - (uint64_t)B[k] * B[k] < (25 * 25U << 22));
666 A[k] = (int32_t)(((uint64_t)p << SGRPROJ_SGR_BITS) + (q >> 1)) / q;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700667 B[k] = ((SGRPROJ_SGR - A[k]) * B[k] + (n >> 1)) / n;
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800668#endif // APPROXIMATE_SGR
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700669 }
670 }
671#if APPROXIMATE_SGR
672 i = 0;
673 j = 0;
674 {
675 const int k = i * width + j;
676 const int l = i * stride + j;
677 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800678 const int32_t a =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700679 3 * A[k] + 2 * A[k + 1] + 2 * A[k + width] + A[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800680 const int32_t b =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700681 3 * B[k] + 2 * B[k + 1] + 2 * B[k + width] + B[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800682 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700683 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
684 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
685 }
686 i = 0;
687 j = width - 1;
688 {
689 const int k = i * width + j;
690 const int l = i * stride + j;
691 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800692 const int32_t a =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700693 3 * A[k] + 2 * A[k - 1] + 2 * A[k + width] + A[k + width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800694 const int32_t b =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700695 3 * B[k] + 2 * B[k - 1] + 2 * B[k + width] + B[k + width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800696 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700697 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
698 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
699 }
700 i = height - 1;
701 j = 0;
702 {
703 const int k = i * width + j;
704 const int l = i * stride + j;
705 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800706 const int32_t a =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700707 3 * A[k] + 2 * A[k + 1] + 2 * A[k - width] + A[k - width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800708 const int32_t b =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700709 3 * B[k] + 2 * B[k + 1] + 2 * B[k - width] + B[k - width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800710 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700711 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
712 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
713 }
714 i = height - 1;
715 j = width - 1;
716 {
717 const int k = i * width + j;
718 const int l = i * stride + j;
719 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800720 const int32_t a =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700721 3 * A[k] + 2 * A[k - 1] + 2 * A[k - width] + A[k - width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800722 const int32_t b =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700723 3 * B[k] + 2 * B[k - 1] + 2 * B[k - width] + B[k - width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800724 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700725 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
726 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
727 }
728 i = 0;
729 for (j = 1; j < width - 1; ++j) {
730 const int k = i * width + j;
731 const int l = i * stride + j;
732 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800733 const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + width] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700734 A[k + width - 1] + A[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800735 const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + width] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700736 B[k + width - 1] + B[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800737 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700738 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
739 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
740 }
741 i = height - 1;
742 for (j = 1; j < width - 1; ++j) {
743 const int k = i * width + j;
744 const int l = i * stride + j;
745 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800746 const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - width] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700747 A[k - width - 1] + A[k - width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800748 const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - width] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700749 B[k - width - 1] + B[k - width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800750 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700751 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
752 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
753 }
754 j = 0;
755 for (i = 1; i < height - 1; ++i) {
756 const int k = i * width + j;
757 const int l = i * stride + j;
758 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800759 const int32_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k + 1] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700760 A[k - width + 1] + A[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800761 const int32_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k + 1] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700762 B[k - width + 1] + B[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800763 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700764 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
765 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
766 }
767 j = width - 1;
768 for (i = 1; i < height - 1; ++i) {
769 const int k = i * width + j;
770 const int l = i * stride + j;
771 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800772 const int32_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k - 1] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700773 A[k - width - 1] + A[k + width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800774 const int32_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k - 1] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700775 B[k - width - 1] + B[k + width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800776 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700777 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
778 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
779 }
780 for (i = 1; i < height - 1; ++i) {
781 for (j = 1; j < width - 1; ++j) {
782 const int k = i * width + j;
783 const int l = i * stride + j;
784 const int nb = 5;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800785 const int32_t a =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700786 (A[k] + A[k - 1] + A[k + 1] + A[k - width] + A[k + width]) * 4 +
787 (A[k - 1 - width] + A[k - 1 + width] + A[k + 1 - width] +
788 A[k + 1 + width]) *
789 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800790 const int32_t b =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700791 (B[k] + B[k - 1] + B[k + 1] + B[k - width] + B[k + width]) * 4 +
792 (B[k - 1 - width] + B[k - 1 + width] + B[k + 1 - width] +
793 B[k + 1 + width]) *
794 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800795 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700796 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
797 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
798 }
799 }
800#else
801 if (r > 1) boxnum(width, height, r = 1, num, width);
David Barker6928a5d2017-01-05 11:29:22 +0000802 boxsum(A, width, height, width, r, 0, A, width);
803 boxsum(B, width, height, width, r, 0, B, width);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700804 for (i = 0; i < height; ++i) {
805 for (j = 0; j < width; ++j) {
806 const int k = i * width + j;
807 const int l = i * stride + j;
808 const int n = num[k];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800809 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700810 (((A[k] * dgd[l] + B[k]) << SGRPROJ_RST_BITS) + (n >> 1)) / n;
811 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
812 }
813 }
814#endif // APPROXIMATE_SGR
815}
816
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800817static void apply_selfguided_restoration(uint8_t *dat, int width, int height,
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700818 int stride, int bit_depth, int eps,
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800819 int *xqd, uint8_t *dst, int dst_stride,
David Barker3a0df182016-12-21 10:44:52 +0000820 int32_t *tmpbuf) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700821 int xq[2];
David Barker3a0df182016-12-21 10:44:52 +0000822 int32_t *flt1 = tmpbuf;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800823 int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
David Barker3a0df182016-12-21 10:44:52 +0000824 int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700825 int i, j;
David Barker3a0df182016-12-21 10:44:52 +0000826 assert(width * height <= RESTORATION_TILEPELS_MAX);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700827 for (i = 0; i < height; ++i) {
828 for (j = 0; j < width; ++j) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700829 flt1[i * width + j] = dat[i * stride + j];
830 flt2[i * width + j] = dat[i * stride + j];
831 }
832 }
833 av1_selfguided_restoration(flt1, width, height, width, bit_depth,
834 sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2);
835 av1_selfguided_restoration(flt2, width, height, width, bit_depth,
836 sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2);
837 decode_xq(xqd, xq);
838 for (i = 0; i < height; ++i) {
839 for (j = 0; j < width; ++j) {
840 const int k = i * width + j;
841 const int l = i * stride + j;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800842 const int m = i * dst_stride + j;
843 const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
844 const int32_t f1 = (int32_t)flt1[k] - u;
845 const int32_t f2 = (int32_t)flt2[k] - u;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700846 const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
847 const int16_t w =
848 (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800849 dst[m] = clip_pixel(w);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700850 }
851 }
852}
853
854static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width,
855 int height, int stride,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800856 RestorationInternal *rst, uint8_t *dst,
857 int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -0800858 const int tile_width = rst->tile_width;
859 const int tile_height = rst->tile_height;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700860 int h_start, h_end, v_start, v_end;
David Barker9666e752016-12-08 11:25:47 +0000861 uint8_t *data_p, *dst_p;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700862
Debargha Mukherjee994ccd72017-01-06 11:18:23 -0800863 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
David Barker9666e752016-12-08 11:25:47 +0000864 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
865 dst_stride);
866 return;
867 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700868 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
869 tile_width, tile_height, width, height, 0, 0,
870 &h_start, &h_end, &v_start, &v_end);
871 data_p = data + h_start + v_start * stride;
David Barker9666e752016-12-08 11:25:47 +0000872 dst_p = dst + h_start + v_start * dst_stride;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800873 apply_selfguided_restoration(data_p, h_end - h_start, v_end - v_start, stride,
874 8, rst->rsi->sgrproj_info[tile_idx].ep,
875 rst->rsi->sgrproj_info[tile_idx].xqd, dst_p,
David Barker3a0df182016-12-21 10:44:52 +0000876 dst_stride, rst->tmpbuf);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700877}
878
879static void loop_sgrproj_filter(uint8_t *data, int width, int height,
880 int stride, RestorationInternal *rst,
David Barker025b2542016-12-08 11:50:42 +0000881 uint8_t *dst, int dst_stride) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700882 int tile_idx;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700883 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800884 loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst,
885 dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700886 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700887}
888
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -0800889#if USE_DOMAINTXFMRF
David Barker6928a5d2017-01-05 11:29:22 +0000890static void apply_domaintxfmrf(int iter, int param, uint8_t *diff_right,
891 uint8_t *diff_down, int width, int height,
892 int32_t *dat, int dat_stride) {
893 int i, j, acc;
894 // Do first row separately, to initialize the top to bottom filter
895 i = 0;
896 {
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800897 // left to right
David Barker6928a5d2017-01-05 11:29:22 +0000898 acc = dat[i * dat_stride] * DOMAINTXFMRF_VTABLE_PREC;
899 dat[i * dat_stride] = acc;
900 for (j = 1; j < width; ++j) {
901 const int in = dat[i * dat_stride + j];
902 const int diff =
903 diff_right[i * width + j - 1]; // Left absolute difference
904 const int v = domaintxfmrf_vtable[iter][param][diff];
905 acc = in * (DOMAINTXFMRF_VTABLE_PREC - v) +
906 ROUND_POWER_OF_TWO(v * acc, DOMAINTXFMRF_VTABLE_PRECBITS);
907 dat[i * dat_stride + j] = acc;
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800908 }
909 // right to left
David Barker6928a5d2017-01-05 11:29:22 +0000910 for (j = width - 2; j >= 0; --j) {
911 const int in = dat[i * dat_stride + j];
912 const int diff = diff_right[i * width + j]; // Right absolute difference
913 const int v = domaintxfmrf_vtable[iter][param][diff];
914 acc = ROUND_POWER_OF_TWO(in * (DOMAINTXFMRF_VTABLE_PREC - v) + acc * v,
915 DOMAINTXFMRF_VTABLE_PRECBITS);
916 dat[i * dat_stride + j] = acc;
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800917 }
918 }
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800919
David Barker6928a5d2017-01-05 11:29:22 +0000920 for (i = 1; i < height; ++i) {
921 // left to right
922 acc = dat[i * dat_stride] * DOMAINTXFMRF_VTABLE_PREC;
923 dat[i * dat_stride] = acc;
924 for (j = 1; j < width; ++j) {
925 const int in = dat[i * dat_stride + j];
926 const int diff =
927 diff_right[i * width + j - 1]; // Left absolute difference
928 const int v = domaintxfmrf_vtable[iter][param][diff];
929 acc = in * (DOMAINTXFMRF_VTABLE_PREC - v) +
930 ROUND_POWER_OF_TWO(v * acc, DOMAINTXFMRF_VTABLE_PRECBITS);
931 dat[i * dat_stride + j] = acc;
932 }
933 // right to left
934 for (j = width - 2; j >= 0; --j) {
935 const int in = dat[i * dat_stride + j];
936 const int diff = diff_right[i * width + j]; // Right absolute difference
937 const int v = domaintxfmrf_vtable[iter][param][diff];
938 acc = ROUND_POWER_OF_TWO(in * (DOMAINTXFMRF_VTABLE_PREC - v) + acc * v,
939 DOMAINTXFMRF_VTABLE_PRECBITS);
940 dat[i * dat_stride + j] = acc;
941 }
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800942 // top to bottom
David Barker6928a5d2017-01-05 11:29:22 +0000943 for (j = 0; j < width; ++j) {
944 const int in = dat[i * dat_stride + j];
945 const int in_above = dat[(i - 1) * dat_stride + j];
946 const int diff =
947 diff_down[(i - 1) * width + j]; // Upward absolute difference
948 const int v = domaintxfmrf_vtable[iter][param][diff];
949 acc =
950 ROUND_POWER_OF_TWO(in * (DOMAINTXFMRF_VTABLE_PREC - v) + in_above * v,
951 DOMAINTXFMRF_VTABLE_PRECBITS);
952 dat[i * dat_stride + j] = acc;
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800953 }
954 }
David Barker6928a5d2017-01-05 11:29:22 +0000955 for (j = 0; j < width; ++j) {
956 // bottom to top + output rounding
957 acc = dat[(height - 1) * dat_stride + j];
958 dat[(height - 1) * dat_stride + j] =
959 ROUND_POWER_OF_TWO(acc, DOMAINTXFMRF_VTABLE_PRECBITS);
960 for (i = height - 2; i >= 0; --i) {
961 const int in = dat[i * dat_stride + j];
962 const int diff =
963 diff_down[i * width + j]; // Downward absolute difference
964 const int v = domaintxfmrf_vtable[iter][param][diff];
965 acc = ROUND_POWER_OF_TWO(in * (DOMAINTXFMRF_VTABLE_PREC - v) + acc * v,
966 DOMAINTXFMRF_VTABLE_PRECBITS);
967 dat[i * dat_stride + j] =
968 ROUND_POWER_OF_TWO(acc, DOMAINTXFMRF_VTABLE_PRECBITS);
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800969 }
970 }
971}
972
973void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
David Barker9666e752016-12-08 11:25:47 +0000974 int stride, int param, uint8_t *dst,
Yaowu Xubf1d62d2016-12-14 19:20:46 -0800975 int dst_stride, int32_t *tmpbuf) {
976 int32_t *dat = tmpbuf;
David Barker6928a5d2017-01-05 11:29:22 +0000977 uint8_t *diff_right = (uint8_t *)(tmpbuf + RESTORATION_TILEPELS_MAX);
978 uint8_t *diff_down = diff_right + RESTORATION_TILEPELS_MAX;
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800979 int i, j, t;
David Barker6928a5d2017-01-05 11:29:22 +0000980
981 for (i = 0; i < height; ++i) {
982 int cur_px = dgd[i * stride];
983 for (j = 0; j < width - 1; ++j) {
984 const int next_px = dgd[i * stride + j + 1];
985 diff_right[i * width + j] = abs(cur_px - next_px);
986 cur_px = next_px;
987 }
988 }
989 for (j = 0; j < width; ++j) {
990 int cur_px = dgd[j];
991 for (i = 0; i < height - 1; ++i) {
992 const int next_px = dgd[(i + 1) * stride + j];
993 diff_down[i * width + j] = abs(cur_px - next_px);
994 cur_px = next_px;
995 }
996 }
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800997 for (i = 0; i < height; ++i) {
998 for (j = 0; j < width; ++j) {
999 dat[i * width + j] = dgd[i * stride + j];
1000 }
1001 }
David Barker6928a5d2017-01-05 11:29:22 +00001002
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001003 for (t = 0; t < DOMAINTXFMRF_ITERS; ++t) {
David Barker6928a5d2017-01-05 11:29:22 +00001004 apply_domaintxfmrf(t, param, diff_right, diff_down, width, height, dat,
1005 width);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001006 }
1007 for (i = 0; i < height; ++i) {
1008 for (j = 0; j < width; ++j) {
David Barker9666e752016-12-08 11:25:47 +00001009 dst[i * dst_stride + j] = clip_pixel(dat[i * width + j]);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001010 }
1011 }
1012}
1013
1014static void loop_domaintxfmrf_filter_tile(uint8_t *data, int tile_idx,
1015 int width, int height, int stride,
1016 RestorationInternal *rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001017 uint8_t *dst, int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001018 const int tile_width = rst->tile_width;
1019 const int tile_height = rst->tile_height;
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001020 int h_start, h_end, v_start, v_end;
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001021 int32_t *tmpbuf = (int32_t *)rst->tmpbuf;
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001022
Debargha Mukherjee994ccd72017-01-06 11:18:23 -08001023 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
David Barker9666e752016-12-08 11:25:47 +00001024 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
1025 dst_stride);
1026 return;
1027 }
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001028 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
1029 tile_width, tile_height, width, height, 0, 0,
1030 &h_start, &h_end, &v_start, &v_end);
David Barker9666e752016-12-08 11:25:47 +00001031 av1_domaintxfmrf_restoration(
1032 data + h_start + v_start * stride, h_end - h_start, v_end - v_start,
1033 stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r,
Yaowu Xubf1d62d2016-12-14 19:20:46 -08001034 dst + h_start + v_start * dst_stride, dst_stride, tmpbuf);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001035}
1036
1037static void loop_domaintxfmrf_filter(uint8_t *data, int width, int height,
1038 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001039 uint8_t *dst, int dst_stride) {
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001040 int tile_idx;
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001041 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
1042 loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001043 dst, dst_stride);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001044 }
1045}
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001046#endif // USE_DOMAINTXFMRF
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001047
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001048static void loop_switchable_filter(uint8_t *data, int width, int height,
1049 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001050 uint8_t *dst, int dst_stride) {
David Barker025b2542016-12-08 11:50:42 +00001051 int tile_idx;
David Barker025b2542016-12-08 11:50:42 +00001052 extend_frame(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001053 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
David Barker9666e752016-12-08 11:25:47 +00001054 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
1055 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
1056 dst_stride);
1057 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
David Barker025b2542016-12-08 11:50:42 +00001058 loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst,
1059 dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001060 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) {
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001061 loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst,
1062 dst_stride);
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001063#if USE_DOMAINTXFMRF
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001064 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) {
1065 loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001066 dst, dst_stride);
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001067#endif // USE_DOMAINTXFMRF
Yaowu Xuc27fc142016-08-22 16:08:15 -07001068 }
1069 }
1070}
1071
Yaowu Xuf883b422016-08-30 14:01:10 -07001072#if CONFIG_AOM_HIGHBITDEPTH
David Barker33f3bfd2017-01-06 15:34:50 +00001073void extend_frame_highbd(uint16_t *data, int width, int height, int stride) {
David Barker025b2542016-12-08 11:50:42 +00001074 uint16_t *data_p;
1075 int i, j;
1076 for (i = 0; i < height; ++i) {
1077 data_p = data + i * stride;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -08001078 for (j = -WIENER_HALFWIN; j < 0; ++j) data_p[j] = data_p[0];
1079 for (j = width; j < width + WIENER_HALFWIN; ++j)
David Barker025b2542016-12-08 11:50:42 +00001080 data_p[j] = data_p[width - 1];
1081 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -08001082 data_p = data - WIENER_HALFWIN;
1083 for (i = -WIENER_HALFWIN; i < 0; ++i) {
David Barker025b2542016-12-08 11:50:42 +00001084 memcpy(data_p + i * stride, data_p,
Debargha Mukherjee999d2f62016-12-15 13:23:21 -08001085 (width + 2 * WIENER_HALFWIN) * sizeof(uint16_t));
David Barker025b2542016-12-08 11:50:42 +00001086 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -08001087 for (i = height; i < height + WIENER_HALFWIN; ++i) {
David Barker025b2542016-12-08 11:50:42 +00001088 memcpy(data_p + i * stride, data_p + (height - 1) * stride,
Debargha Mukherjee999d2f62016-12-15 13:23:21 -08001089 (width + 2 * WIENER_HALFWIN) * sizeof(uint16_t));
David Barker025b2542016-12-08 11:50:42 +00001090 }
1091}
1092
David Barker9666e752016-12-08 11:25:47 +00001093static void loop_copy_tile_highbd(uint16_t *data, int tile_idx, int subtile_idx,
1094 int subtile_bits, int width, int height,
1095 int stride, RestorationInternal *rst,
1096 uint16_t *dst, int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001097 const int tile_width = rst->tile_width;
1098 const int tile_height = rst->tile_height;
David Barker9666e752016-12-08 11:25:47 +00001099 int i;
1100 int h_start, h_end, v_start, v_end;
1101 av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, rst->nhtiles,
1102 rst->nvtiles, tile_width, tile_height, width, height,
1103 0, 0, &h_start, &h_end, &v_start, &v_end);
1104 for (i = v_start; i < v_end; ++i)
1105 memcpy(dst + i * dst_stride + h_start, data + i * stride + h_start,
1106 (h_end - h_start) * sizeof(*dst));
1107}
1108
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001109static void loop_wiener_filter_tile_highbd(uint16_t *data, int tile_idx,
1110 int width, int height, int stride,
1111 RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001112 int bit_depth, uint16_t *dst,
1113 int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001114 const int tile_width = rst->tile_width;
1115 const int tile_height = rst->tile_height;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001116 int h_start, h_end, v_start, v_end;
1117 int i, j;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001118
Debargha Mukherjee994ccd72017-01-06 11:18:23 -08001119 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
David Barker9666e752016-12-08 11:25:47 +00001120 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
1121 dst_stride);
1122 return;
1123 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001124 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
David Barker025b2542016-12-08 11:50:42 +00001125 tile_width, tile_height, width, height, 0, 0,
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001126 &h_start, &h_end, &v_start, &v_end);
David Barker025b2542016-12-08 11:50:42 +00001127 // Convolve the whole tile (done in blocks here to match the requirements
1128 // of the vectorized convolve functions, but the result is equivalent)
1129 for (i = v_start; i < v_end; i += MAX_SB_SIZE)
1130 for (j = h_start; j < h_end; j += MAX_SB_SIZE) {
1131 int w = AOMMIN(MAX_SB_SIZE, (h_end - j + 15) & ~15);
1132 int h = AOMMIN(MAX_SB_SIZE, (v_end - i + 15) & ~15);
1133 const uint16_t *data_p = data + i * stride + j;
1134 uint16_t *dst_p = dst + i * dst_stride + j;
David Barker1e8e6b92017-01-13 13:45:51 +00001135 aom_highbd_convolve8_add_src(
1136 CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
1137 dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
1138 rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h, bit_depth);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001139 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001140}
1141
Yaowu Xuc27fc142016-08-22 16:08:15 -07001142static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
1143 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001144 int bit_depth, uint8_t *dst8,
1145 int dst_stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001146 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +00001147 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001148 int tile_idx;
David Barker025b2542016-12-08 11:50:42 +00001149 extend_frame_highbd(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001150 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001151 loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
David Barker025b2542016-12-08 11:50:42 +00001152 bit_depth, dst, dst_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001153 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001154}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001155
David Barker3a0df182016-12-21 10:44:52 +00001156static void apply_selfguided_restoration_highbd(
1157 uint16_t *dat, int width, int height, int stride, int bit_depth, int eps,
1158 int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf) {
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001159 int xq[2];
David Barker3a0df182016-12-21 10:44:52 +00001160 int32_t *flt1 = tmpbuf;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001161 int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
David Barker3a0df182016-12-21 10:44:52 +00001162 int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001163 int i, j;
David Barker0b04e9b2017-01-18 15:29:20 +00001164 assert(width * height <= RESTORATION_TILEPELS_MAX);
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001165 for (i = 0; i < height; ++i) {
1166 for (j = 0; j < width; ++j) {
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001167 flt1[i * width + j] = dat[i * stride + j];
1168 flt2[i * width + j] = dat[i * stride + j];
1169 }
1170 }
1171 av1_selfguided_restoration(flt1, width, height, width, bit_depth,
1172 sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2);
1173 av1_selfguided_restoration(flt2, width, height, width, bit_depth,
1174 sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2);
1175 decode_xq(xqd, xq);
1176 for (i = 0; i < height; ++i) {
1177 for (j = 0; j < width; ++j) {
1178 const int k = i * width + j;
1179 const int l = i * stride + j;
1180 const int m = i * dst_stride + j;
1181 const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
1182 const int32_t f1 = (int32_t)flt1[k] - u;
1183 const int32_t f2 = (int32_t)flt2[k] - u;
1184 const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
1185 const int16_t w =
1186 (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
1187 dst[m] = (uint16_t)clip_pixel_highbd(w, bit_depth);
1188 }
1189 }
1190}
1191
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001192static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
1193 int width, int height, int stride,
1194 RestorationInternal *rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001195 int bit_depth, uint16_t *dst,
1196 int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001197 const int tile_width = rst->tile_width;
1198 const int tile_height = rst->tile_height;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001199 int h_start, h_end, v_start, v_end;
David Barker9666e752016-12-08 11:25:47 +00001200 uint16_t *data_p, *dst_p;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001201
Debargha Mukherjee994ccd72017-01-06 11:18:23 -08001202 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
David Barker9666e752016-12-08 11:25:47 +00001203 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
1204 dst_stride);
1205 return;
1206 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001207 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
1208 tile_width, tile_height, width, height, 0, 0,
1209 &h_start, &h_end, &v_start, &v_end);
1210 data_p = data + h_start + v_start * stride;
David Barker9666e752016-12-08 11:25:47 +00001211 dst_p = dst + h_start + v_start * dst_stride;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001212 apply_selfguided_restoration_highbd(
1213 data_p, h_end - h_start, v_end - v_start, stride, bit_depth,
1214 rst->rsi->sgrproj_info[tile_idx].ep, rst->rsi->sgrproj_info[tile_idx].xqd,
David Barker3a0df182016-12-21 10:44:52 +00001215 dst_p, dst_stride, rst->tmpbuf);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001216}
1217
1218static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
1219 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001220 int bit_depth, uint8_t *dst8,
1221 int dst_stride) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001222 int tile_idx;
1223 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +00001224 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001225 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
1226 loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001227 bit_depth, dst, dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001228 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001229}
1230
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001231#if USE_DOMAINTXFMRF
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001232void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height,
David Barker9666e752016-12-08 11:25:47 +00001233 int stride, int param, int bit_depth,
Yaowu Xubf1d62d2016-12-14 19:20:46 -08001234 uint16_t *dst, int dst_stride,
1235 int32_t *tmpbuf) {
1236 int32_t *dat = tmpbuf;
David Barker0b04e9b2017-01-18 15:29:20 +00001237 uint8_t *diff_right = (uint8_t *)(tmpbuf + RESTORATION_TILEPELS_MAX);
1238 uint8_t *diff_down = diff_right + RESTORATION_TILEPELS_MAX;
1239 const int shift = (bit_depth - 8);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001240 int i, j, t;
David Barker0b04e9b2017-01-18 15:29:20 +00001241
1242 for (i = 0; i < height; ++i) {
1243 int cur_px = dgd[i * stride] >> shift;
1244 for (j = 0; j < width - 1; ++j) {
1245 const int next_px = dgd[i * stride + j + 1] >> shift;
1246 diff_right[i * width + j] = abs(cur_px - next_px);
1247 cur_px = next_px;
1248 }
1249 }
1250 for (j = 0; j < width; ++j) {
1251 int cur_px = dgd[j] >> shift;
1252 for (i = 0; i < height - 1; ++i) {
1253 const int next_px = dgd[(i + 1) * stride + j] >> shift;
1254 diff_down[i * width + j] = abs(cur_px - next_px);
1255 cur_px = next_px;
1256 }
1257 }
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001258 for (i = 0; i < height; ++i) {
1259 for (j = 0; j < width; ++j) {
1260 dat[i * width + j] = dgd[i * stride + j];
1261 }
1262 }
1263 for (t = 0; t < DOMAINTXFMRF_ITERS; ++t) {
David Barker0b04e9b2017-01-18 15:29:20 +00001264 apply_domaintxfmrf(t, param, diff_right, diff_down, width, height, dat,
1265 width);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001266 }
1267 for (i = 0; i < height; ++i) {
1268 for (j = 0; j < width; ++j) {
David Barker9666e752016-12-08 11:25:47 +00001269 dst[i * dst_stride + j] =
1270 clip_pixel_highbd(dat[i * width + j], bit_depth);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001271 }
1272 }
1273}
1274
David Barker9666e752016-12-08 11:25:47 +00001275static void loop_domaintxfmrf_filter_tile_highbd(
1276 uint16_t *data, int tile_idx, int width, int height, int stride,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001277 RestorationInternal *rst, int bit_depth, uint16_t *dst, int dst_stride) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001278 const int tile_width = rst->tile_width;
1279 const int tile_height = rst->tile_height;
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001280 int h_start, h_end, v_start, v_end;
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001281 int32_t *tmpbuf = (int32_t *)rst->tmpbuf;
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001282
Debargha Mukherjee994ccd72017-01-06 11:18:23 -08001283 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
David Barker9666e752016-12-08 11:25:47 +00001284 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
1285 dst_stride);
1286 return;
1287 }
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001288 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
1289 tile_width, tile_height, width, height, 0, 0,
1290 &h_start, &h_end, &v_start, &v_end);
1291 av1_domaintxfmrf_restoration_highbd(
1292 data + h_start + v_start * stride, h_end - h_start, v_end - v_start,
David Barker9666e752016-12-08 11:25:47 +00001293 stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r, bit_depth,
Yaowu Xubf1d62d2016-12-14 19:20:46 -08001294 dst + h_start + v_start * dst_stride, dst_stride, tmpbuf);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001295}
1296
1297static void loop_domaintxfmrf_filter_highbd(uint8_t *data8, int width,
1298 int height, int stride,
1299 RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001300 int bit_depth, uint8_t *dst8,
1301 int dst_stride) {
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001302 int tile_idx;
1303 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +00001304 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001305 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
1306 loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height, stride,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001307 rst, bit_depth, dst, dst_stride);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001308 }
1309}
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001310#endif // USE_DOMAINTXFMRF
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001311
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001312static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
1313 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001314 int bit_depth, uint8_t *dst8,
1315 int dst_stride) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001316 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +00001317 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001318 int tile_idx;
David Barker025b2542016-12-08 11:50:42 +00001319 extend_frame_highbd(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001320 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
David Barker9666e752016-12-08 11:25:47 +00001321 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
1322 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst,
1323 dst, dst_stride);
1324 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001325 loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
David Barker025b2542016-12-08 11:50:42 +00001326 bit_depth, dst, dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001327 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) {
1328 loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001329 rst, bit_depth, dst, dst_stride);
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001330#if USE_DOMAINTXFMRF
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001331 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) {
1332 loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height,
David Barker9666e752016-12-08 11:25:47 +00001333 stride, rst, bit_depth, dst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001334 dst_stride);
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001335#endif // USE_DOMAINTXFMRF
Yaowu Xuc27fc142016-08-22 16:08:15 -07001336 }
1337 }
1338}
Yaowu Xuf883b422016-08-30 14:01:10 -07001339#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001340
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001341static void loop_restoration_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
1342 int start_mi_row, int end_mi_row,
1343 int components_pattern, RestorationInfo *rsi,
1344 YV12_BUFFER_CONFIG *dst) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001345 const int ywidth = frame->y_crop_width;
1346 const int ystride = frame->y_stride;
1347 const int uvwidth = frame->uv_crop_width;
1348 const int uvstride = frame->uv_stride;
1349 const int ystart = start_mi_row << MI_SIZE_LOG2;
1350 const int uvstart = ystart >> cm->subsampling_y;
1351 int yend = end_mi_row << MI_SIZE_LOG2;
1352 int uvend = yend >> cm->subsampling_y;
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001353 restore_func_type restore_funcs[RESTORE_TYPES] = {
1354 NULL,
1355 loop_wiener_filter,
1356 loop_sgrproj_filter,
1357#if USE_DOMAINTXFMRF
1358 loop_domaintxfmrf_filter,
1359#endif // USE_DOMAINTXFMRF
1360 loop_switchable_filter
1361 };
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001362#if CONFIG_AOM_HIGHBITDEPTH
1363 restore_func_highbd_type restore_funcs_highbd[RESTORE_TYPES] = {
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -08001364 NULL,
1365 loop_wiener_filter_highbd,
1366 loop_sgrproj_filter_highbd,
1367#if USE_DOMAINTXFMRF
1368 loop_domaintxfmrf_filter_highbd,
1369#endif // USE_DOMAINTXFMRF
1370 loop_switchable_filter_highbd
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001371 };
1372#endif // CONFIG_AOM_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001373 restore_func_type restore_func;
Yaowu Xuf883b422016-08-30 14:01:10 -07001374#if CONFIG_AOM_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001375 restore_func_highbd_type restore_func_highbd;
Yaowu Xuf883b422016-08-30 14:01:10 -07001376#endif // CONFIG_AOM_HIGHBITDEPTH
David Barker9666e752016-12-08 11:25:47 +00001377 YV12_BUFFER_CONFIG dst_;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001378
Yaowu Xuf883b422016-08-30 14:01:10 -07001379 yend = AOMMIN(yend, cm->height);
1380 uvend = AOMMIN(uvend, cm->subsampling_y ? (cm->height + 1) >> 1 : cm->height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001381
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001382 if (components_pattern == (1 << AOM_PLANE_Y)) {
1383 // Only y
1384 if (rsi[0].frame_restoration_type == RESTORE_NONE) {
1385 if (dst) aom_yv12_copy_y(frame, dst);
1386 return;
David Barker9666e752016-12-08 11:25:47 +00001387 }
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001388 } else if (components_pattern == (1 << AOM_PLANE_U)) {
1389 // Only U
1390 if (rsi[1].frame_restoration_type == RESTORE_NONE) {
1391 if (dst) aom_yv12_copy_u(frame, dst);
1392 return;
1393 }
1394 } else if (components_pattern == (1 << AOM_PLANE_V)) {
1395 // Only V
1396 if (rsi[2].frame_restoration_type == RESTORE_NONE) {
1397 if (dst) aom_yv12_copy_v(frame, dst);
1398 return;
1399 }
1400 } else if (components_pattern ==
1401 ((1 << AOM_PLANE_Y) | (1 << AOM_PLANE_U) | (1 << AOM_PLANE_V))) {
1402 // All components
1403 if (rsi[0].frame_restoration_type == RESTORE_NONE &&
1404 rsi[1].frame_restoration_type == RESTORE_NONE &&
1405 rsi[2].frame_restoration_type == RESTORE_NONE) {
1406 if (dst) aom_yv12_copy_frame(frame, dst);
1407 return;
1408 }
David Barker9666e752016-12-08 11:25:47 +00001409 }
1410
David Barker9666e752016-12-08 11:25:47 +00001411 if (!dst) {
1412 dst = &dst_;
1413 memset(dst, 0, sizeof(YV12_BUFFER_CONFIG));
1414 if (aom_realloc_frame_buffer(
1415 dst, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y,
1416#if CONFIG_AOM_HIGHBITDEPTH
1417 cm->use_highbitdepth,
1418#endif
1419 AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL) < 0)
1420 aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
1421 "Failed to allocate restoration dst buffer");
1422 }
Debargha Mukherjee818e42a2016-12-12 11:52:56 -08001423
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001424 if ((components_pattern >> AOM_PLANE_Y) & 1) {
1425 if (rsi[0].frame_restoration_type != RESTORE_NONE) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001426 cm->rst_internal.ntiles = av1_get_rest_ntiles(
1427 cm->width, cm->height, &cm->rst_internal.tile_width,
1428 &cm->rst_internal.tile_height, &cm->rst_internal.nhtiles,
1429 &cm->rst_internal.nvtiles);
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001430 cm->rst_internal.rsi = &rsi[0];
1431 restore_func =
1432 restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
Yaowu Xuf883b422016-08-30 14:01:10 -07001433#if CONFIG_AOM_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001434 restore_func_highbd =
1435 restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
1436 if (cm->use_highbitdepth)
1437 restore_func_highbd(
1438 frame->y_buffer + ystart * ystride, ywidth, yend - ystart, ystride,
1439 &cm->rst_internal, cm->bit_depth,
1440 dst->y_buffer + ystart * dst->y_stride, dst->y_stride);
1441 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001442#endif // CONFIG_AOM_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001443 restore_func(frame->y_buffer + ystart * ystride, ywidth, yend - ystart,
1444 ystride, &cm->rst_internal,
1445 dst->y_buffer + ystart * dst->y_stride, dst->y_stride);
1446 } else {
1447 aom_yv12_copy_y(frame, dst);
1448 }
1449 }
1450
1451 if ((components_pattern >> AOM_PLANE_U) & 1) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001452 if (rsi[AOM_PLANE_U].frame_restoration_type != RESTORE_NONE) {
1453 cm->rst_internal.ntiles = av1_get_rest_ntiles(
Debargha Mukherjee1a0ae842017-01-26 16:45:22 -08001454 ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x),
1455 ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y),
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001456 &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
1457 &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
1458 cm->rst_internal.rsi = &rsi[AOM_PLANE_U];
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001459 restore_func =
1460 restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
Yaowu Xuf883b422016-08-30 14:01:10 -07001461#if CONFIG_AOM_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001462 restore_func_highbd =
1463 restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
1464 if (cm->use_highbitdepth)
1465 restore_func_highbd(
1466 frame->u_buffer + uvstart * uvstride, uvwidth, uvend - uvstart,
1467 uvstride, &cm->rst_internal, cm->bit_depth,
1468 dst->u_buffer + uvstart * dst->uv_stride, dst->uv_stride);
1469 else
1470#endif // CONFIG_AOM_HIGHBITDEPTH
1471 restore_func(frame->u_buffer + uvstart * uvstride, uvwidth,
1472 uvend - uvstart, uvstride, &cm->rst_internal,
1473 dst->u_buffer + uvstart * dst->uv_stride, dst->uv_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001474 } else {
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001475 aom_yv12_copy_u(frame, dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001476 }
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001477 }
1478
1479 if ((components_pattern >> AOM_PLANE_V) & 1) {
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001480 if (rsi[AOM_PLANE_V].frame_restoration_type != RESTORE_NONE) {
1481 cm->rst_internal.ntiles = av1_get_rest_ntiles(
Debargha Mukherjee1a0ae842017-01-26 16:45:22 -08001482 ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x),
1483 ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y),
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001484 &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
1485 &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
1486 cm->rst_internal.rsi = &rsi[AOM_PLANE_V];
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001487 restore_func =
1488 restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
1489#if CONFIG_AOM_HIGHBITDEPTH
1490 restore_func_highbd =
1491 restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
1492 if (cm->use_highbitdepth)
1493 restore_func_highbd(
1494 frame->v_buffer + uvstart * uvstride, uvwidth, uvend - uvstart,
1495 uvstride, &cm->rst_internal, cm->bit_depth,
1496 dst->v_buffer + uvstart * dst->uv_stride, dst->uv_stride);
1497 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001498#endif // CONFIG_AOM_HIGHBITDEPTH
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001499 restore_func(frame->v_buffer + uvstart * uvstride, uvwidth,
1500 uvend - uvstart, uvstride, &cm->rst_internal,
1501 dst->v_buffer + uvstart * dst->uv_stride, dst->uv_stride);
1502 } else {
1503 aom_yv12_copy_v(frame, dst);
1504 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001505 }
David Barker9666e752016-12-08 11:25:47 +00001506
David Barker9666e752016-12-08 11:25:47 +00001507 if (dst == &dst_) {
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001508 if ((components_pattern >> AOM_PLANE_Y) & 1) aom_yv12_copy_y(dst, frame);
1509 if ((components_pattern >> AOM_PLANE_U) & 1) aom_yv12_copy_u(dst, frame);
1510 if ((components_pattern >> AOM_PLANE_V) & 1) aom_yv12_copy_v(dst, frame);
David Barker9666e752016-12-08 11:25:47 +00001511 aom_free_frame_buffer(dst);
1512 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001513}
1514
Yaowu Xuf883b422016-08-30 14:01:10 -07001515void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001516 RestorationInfo *rsi, int components_pattern,
David Barker9666e752016-12-08 11:25:47 +00001517 int partial_frame, YV12_BUFFER_CONFIG *dst) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001518 int start_mi_row, end_mi_row, mi_rows_to_filter;
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001519 start_mi_row = 0;
1520 mi_rows_to_filter = cm->mi_rows;
1521 if (partial_frame && cm->mi_rows > 8) {
1522 start_mi_row = cm->mi_rows >> 1;
1523 start_mi_row &= 0xfffffff8;
1524 mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001525 }
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001526 end_mi_row = start_mi_row + mi_rows_to_filter;
Debargha Mukherjeed7489142017-01-05 13:58:16 -08001527 loop_restoration_init(&cm->rst_internal, cm->frame_type == KEY_FRAME);
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001528 loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, components_pattern,
1529 rsi, dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001530}