blob: 59ffab974ed046d7cb4e9c54d94bc17cb19b18a1 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 *
Yaowu Xuc27fc142016-08-22 16:08:15 -070011 */
12
13#include <math.h>
14
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_config.h"
16#include "./aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070017#include "av1/common/onyxc_int.h"
18#include "av1/common/restoration.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "aom_dsp/aom_dsp_common.h"
20#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070021#include "aom_ports/mem.h"
22
23#define BILATERAL_PARAM_PRECISION 16
24#define BILATERAL_AMP_RANGE 256
25#define BILATERAL_AMP_RANGE_SYM (2 * BILATERAL_AMP_RANGE + 1)
26
clang-format67948d32016-09-07 22:40:40 -070027static uint8_t bilateral_filter_coeffs_r_kf[BILATERAL_LEVELS_KF]
28 [BILATERAL_AMP_RANGE_SYM];
29static uint8_t bilateral_filter_coeffs_r[BILATERAL_LEVELS]
30 [BILATERAL_AMP_RANGE_SYM];
Yaowu Xuc27fc142016-08-22 16:08:15 -070031static uint8_t bilateral_filter_coeffs_s_kf[BILATERAL_LEVELS_KF]
32 [RESTORATION_WIN][RESTORATION_WIN];
33static uint8_t bilateral_filter_coeffs_s[BILATERAL_LEVELS][RESTORATION_WIN]
34 [RESTORATION_WIN];
35
36typedef struct bilateral_params {
37 int sigma_x; // spatial variance x
38 int sigma_y; // spatial variance y
39 int sigma_r; // range variance
40} BilateralParamsType;
41
42static BilateralParamsType bilateral_level_to_params_arr[BILATERAL_LEVELS] = {
Debargha Mukherjee5d89a632016-09-17 13:16:58 -070043 // Values are rounded to 1/16 th precision
Yaowu Xuc27fc142016-08-22 16:08:15 -070044 { 8, 9, 30 }, { 9, 8, 30 }, { 9, 11, 32 }, { 11, 9, 32 },
Debargha Mukherjee035c5f32016-09-03 02:06:13 -070045 { 14, 14, 36 }, { 18, 18, 36 }, { 24, 24, 40 }, { 32, 32, 40 },
Yaowu Xuc27fc142016-08-22 16:08:15 -070046};
47
48static BilateralParamsType
49 bilateral_level_to_params_arr_kf[BILATERAL_LEVELS_KF] = {
Debargha Mukherjee5d89a632016-09-17 13:16:58 -070050 // Values are rounded to 1/16 th precision
Yaowu Xuc27fc142016-08-22 16:08:15 -070051 { 8, 8, 30 }, { 9, 9, 32 }, { 10, 10, 32 }, { 12, 12, 32 },
52 { 14, 14, 32 }, { 18, 18, 36 }, { 24, 24, 40 }, { 30, 30, 44 },
53 { 36, 36, 48 }, { 42, 42, 48 }, { 48, 48, 48 }, { 48, 48, 56 },
54 { 56, 56, 48 }, { 56, 56, 56 }, { 56, 56, 64 }, { 64, 64, 48 },
55 };
56
Debargha Mukherjee8f209a82016-10-12 10:47:01 -070057const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
58 // r1, eps1, r2, eps2
59 { 2, 27, 1, 11 }, { 2, 31, 1, 12 }, { 2, 37, 1, 12 }, { 2, 44, 1, 12 },
60 { 2, 49, 1, 13 }, { 2, 54, 1, 14 }, { 2, 60, 1, 15 }, { 2, 68, 1, 15 },
61};
62
Yaowu Xuc27fc142016-08-22 16:08:15 -070063typedef void (*restore_func_type)(uint8_t *data8, int width, int height,
64 int stride, RestorationInternal *rst,
65 uint8_t *tmpdata8, int tmpstride);
Yaowu Xuf883b422016-08-30 14:01:10 -070066#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070067typedef void (*restore_func_highbd_type)(uint8_t *data8, int width, int height,
68 int stride, RestorationInternal *rst,
69 uint8_t *tmpdata8, int tmpstride,
70 int bit_depth);
Yaowu Xuf883b422016-08-30 14:01:10 -070071#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070072
Yaowu Xuf883b422016-08-30 14:01:10 -070073static INLINE BilateralParamsType av1_bilateral_level_to_params(int index,
74 int kf) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070075 return kf ? bilateral_level_to_params_arr_kf[index]
76 : bilateral_level_to_params_arr[index];
77}
78
Yaowu Xuf883b422016-08-30 14:01:10 -070079void av1_loop_restoration_precal() {
Yaowu Xuc27fc142016-08-22 16:08:15 -070080 int i;
81 for (i = 0; i < BILATERAL_LEVELS_KF; i++) {
Yaowu Xuf883b422016-08-30 14:01:10 -070082 const BilateralParamsType param = av1_bilateral_level_to_params(i, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -070083 const int sigma_x = param.sigma_x;
84 const int sigma_y = param.sigma_y;
85 const int sigma_r = param.sigma_r;
86 const double sigma_r_d = (double)sigma_r / BILATERAL_PARAM_PRECISION;
87 const double sigma_x_d = (double)sigma_x / BILATERAL_PARAM_PRECISION;
88 const double sigma_y_d = (double)sigma_y / BILATERAL_PARAM_PRECISION;
89
90 uint8_t *fr = bilateral_filter_coeffs_r_kf[i] + BILATERAL_AMP_RANGE;
91 int j, x, y;
92 for (j = 0; j <= BILATERAL_AMP_RANGE; j++) {
93 fr[j] = (uint8_t)(0.5 +
94 RESTORATION_FILT_STEP *
95 exp(-(j * j) / (2 * sigma_r_d * sigma_r_d)));
96 fr[-j] = fr[j];
97 }
98 for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) {
99 for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) {
clang-format67948d32016-09-07 22:40:40 -0700100 bilateral_filter_coeffs_s_kf[i][y + RESTORATION_HALFWIN]
101 [x + RESTORATION_HALFWIN] = (uint8_t)(
102 0.5 +
103 RESTORATION_FILT_STEP *
104 exp(-(x * x) / (2 * sigma_x_d *
105 sigma_x_d) -
106 (y * y) / (2 * sigma_y_d *
107 sigma_y_d)));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700108 }
109 }
110 }
111 for (i = 0; i < BILATERAL_LEVELS; i++) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700112 const BilateralParamsType param = av1_bilateral_level_to_params(i, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700113 const int sigma_x = param.sigma_x;
114 const int sigma_y = param.sigma_y;
115 const int sigma_r = param.sigma_r;
116 const double sigma_r_d = (double)sigma_r / BILATERAL_PARAM_PRECISION;
117 const double sigma_x_d = (double)sigma_x / BILATERAL_PARAM_PRECISION;
118 const double sigma_y_d = (double)sigma_y / BILATERAL_PARAM_PRECISION;
119
120 uint8_t *fr = bilateral_filter_coeffs_r[i] + BILATERAL_AMP_RANGE;
121 int j, x, y;
122 for (j = 0; j <= BILATERAL_AMP_RANGE; j++) {
123 fr[j] = (uint8_t)(0.5 +
124 RESTORATION_FILT_STEP *
125 exp(-(j * j) / (2 * sigma_r_d * sigma_r_d)));
126 fr[-j] = fr[j];
127 }
128 for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) {
129 for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) {
clang-format67948d32016-09-07 22:40:40 -0700130 bilateral_filter_coeffs_s[i][y + RESTORATION_HALFWIN]
131 [x + RESTORATION_HALFWIN] = (uint8_t)(
132 0.5 +
133 RESTORATION_FILT_STEP *
134 exp(-(x * x) /
135 (2 * sigma_x_d * sigma_x_d) -
136 (y * y) /
137 (2 * sigma_y_d * sigma_y_d)));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700138 }
139 }
140 }
141}
142
Yaowu Xuf883b422016-08-30 14:01:10 -0700143int av1_bilateral_level_bits(const AV1_COMMON *const cm) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700144 return cm->frame_type == KEY_FRAME ? BILATERAL_LEVEL_BITS_KF
145 : BILATERAL_LEVEL_BITS;
146}
147
Yaowu Xuf883b422016-08-30 14:01:10 -0700148void av1_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
149 int kf, int width, int height) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700150 int i, tile_idx;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700151 rst->rsi = rsi;
152 rst->keyframe = kf;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700153 rst->subsampling_x = 0;
154 rst->subsampling_y = 0;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700155 rst->ntiles =
clang-formatbda8d612016-09-19 15:55:46 -0700156 av1_get_rest_ntiles(width, height, &rst->tile_width, &rst->tile_height,
157 &rst->nhtiles, &rst->nvtiles);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700158 if (rsi->frame_restoration_type == RESTORE_WIENER) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700159 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700160 rsi->wiener_info[tile_idx].vfilter[RESTORATION_HALFWIN] =
161 rsi->wiener_info[tile_idx].hfilter[RESTORATION_HALFWIN] =
162 RESTORATION_FILT_STEP;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700163 for (i = 0; i < RESTORATION_HALFWIN; ++i) {
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700164 rsi->wiener_info[tile_idx].vfilter[RESTORATION_WIN - 1 - i] =
165 rsi->wiener_info[tile_idx].vfilter[i];
166 rsi->wiener_info[tile_idx].hfilter[RESTORATION_WIN - 1 - i] =
167 rsi->wiener_info[tile_idx].hfilter[i];
168 rsi->wiener_info[tile_idx].vfilter[RESTORATION_HALFWIN] -=
169 2 * rsi->wiener_info[tile_idx].vfilter[i];
170 rsi->wiener_info[tile_idx].hfilter[RESTORATION_HALFWIN] -=
171 2 * rsi->wiener_info[tile_idx].hfilter[i];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700172 }
173 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700174 } else if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700175 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700176 if (rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700177 rsi->wiener_info[tile_idx].vfilter[RESTORATION_HALFWIN] =
178 rsi->wiener_info[tile_idx].hfilter[RESTORATION_HALFWIN] =
179 RESTORATION_FILT_STEP;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700180 for (i = 0; i < RESTORATION_HALFWIN; ++i) {
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700181 rsi->wiener_info[tile_idx].vfilter[RESTORATION_WIN - 1 - i] =
182 rsi->wiener_info[tile_idx].vfilter[i];
183 rsi->wiener_info[tile_idx].hfilter[RESTORATION_WIN - 1 - i] =
184 rsi->wiener_info[tile_idx].hfilter[i];
185 rsi->wiener_info[tile_idx].vfilter[RESTORATION_HALFWIN] -=
186 2 * rsi->wiener_info[tile_idx].vfilter[i];
187 rsi->wiener_info[tile_idx].hfilter[RESTORATION_HALFWIN] -=
188 2 * rsi->wiener_info[tile_idx].hfilter[i];
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700189 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700190 }
191 }
192 }
193}
194
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700195static void loop_bilateral_filter_tile(uint8_t *data, int tile_idx, int width,
196 int height, int stride,
197 RestorationInternal *rst,
198 uint8_t *tmpdata, int tmpstride) {
199 int i, j, subtile_idx;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700200 int h_start, h_end, v_start, v_end;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700201 const int tile_width = rst->tile_width >> rst->subsampling_x;
202 const int tile_height = rst->tile_height >> rst->subsampling_y;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700203
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700204 for (subtile_idx = 0; subtile_idx < BILATERAL_SUBTILES; ++subtile_idx) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700205 uint8_t *data_p, *tmpdata_p;
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700206 const int level = rst->rsi->bilateral_info[tile_idx].level[subtile_idx];
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700207 uint8_t(*wx_lut)[RESTORATION_WIN];
208 uint8_t *wr_lut_;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700209
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700210 if (level < 0) continue;
211 wr_lut_ = (rst->keyframe ? bilateral_filter_coeffs_r_kf[level]
212 : bilateral_filter_coeffs_r[level]) +
213 BILATERAL_AMP_RANGE;
214 wx_lut = rst->keyframe ? bilateral_filter_coeffs_s_kf[level]
215 : bilateral_filter_coeffs_s[level];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700216
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700217 av1_get_rest_tile_limits(tile_idx, subtile_idx, BILATERAL_SUBTILE_BITS,
218 rst->nhtiles, rst->nvtiles, tile_width,
219 tile_height, width, height, 1, 1, &h_start, &h_end,
220 &v_start, &v_end);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700221
222 data_p = data + h_start + v_start * stride;
223 tmpdata_p = tmpdata + h_start + v_start * tmpstride;
224
225 for (i = 0; i < (v_end - v_start); ++i) {
226 for (j = 0; j < (h_end - h_start); ++j) {
Debargha Mukherjee1a16a982016-10-04 11:50:00 -0700227 int x, y, wt;
228 int64_t flsum = 0, wtsum = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700229 uint8_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
230 for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
231 for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700232 wt = (int)wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] *
Yaowu Xuc27fc142016-08-22 16:08:15 -0700233 (int)wr_lut_[data_p2[x] - data_p[j]];
Debargha Mukherjee1a16a982016-10-04 11:50:00 -0700234 wtsum += (int64_t)wt;
235 flsum += (int64_t)wt * data_p2[x];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700236 }
237 data_p2 += stride;
238 }
239 if (wtsum > 0)
240 tmpdata_p[j] = clip_pixel((int)((flsum + wtsum / 2) / wtsum));
241 else
242 tmpdata_p[j] = data_p[j];
243 }
244 tmpdata_p += tmpstride;
245 data_p += stride;
246 }
247 for (i = v_start; i < v_end; ++i) {
248 memcpy(data + i * stride + h_start, tmpdata + i * tmpstride + h_start,
249 (h_end - h_start) * sizeof(*data));
250 }
251 }
252}
253
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700254static void loop_bilateral_filter(uint8_t *data, int width, int height,
255 int stride, RestorationInternal *rst,
256 uint8_t *tmpdata, int tmpstride) {
257 int tile_idx;
258 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
259 loop_bilateral_filter_tile(data, tile_idx, width, height, stride, rst,
260 tmpdata, tmpstride);
261 }
262}
263
Yaowu Xuc27fc142016-08-22 16:08:15 -0700264uint8_t hor_sym_filter(uint8_t *d, int *hfilter) {
265 int32_t s =
266 (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * hfilter[RESTORATION_HALFWIN];
267 int i;
268 for (i = 1; i <= RESTORATION_HALFWIN; ++i)
269 s += (d[i] + d[-i]) * hfilter[RESTORATION_HALFWIN + i];
270 return clip_pixel(s >> RESTORATION_FILT_BITS);
271}
272
273uint8_t ver_sym_filter(uint8_t *d, int stride, int *vfilter) {
274 int32_t s =
275 (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * vfilter[RESTORATION_HALFWIN];
276 int i;
277 for (i = 1; i <= RESTORATION_HALFWIN; ++i)
278 s += (d[i * stride] + d[-i * stride]) * vfilter[RESTORATION_HALFWIN + i];
279 return clip_pixel(s >> RESTORATION_FILT_BITS);
280}
281
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700282static void loop_wiener_filter_tile(uint8_t *data, int tile_idx, int width,
283 int height, int stride,
284 RestorationInternal *rst, uint8_t *tmpdata,
285 int tmpstride) {
286 const int tile_width = rst->tile_width >> rst->subsampling_x;
287 const int tile_height = rst->tile_height >> rst->subsampling_y;
288 int i, j;
289 int h_start, h_end, v_start, v_end;
290 uint8_t *data_p, *tmpdata_p;
291
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700292 if (rst->rsi->wiener_info[tile_idx].level == 0) return;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700293 // Filter row-wise
294 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
295 tile_width, tile_height, width, height, 1, 0,
296 &h_start, &h_end, &v_start, &v_end);
297 data_p = data + h_start + v_start * stride;
298 tmpdata_p = tmpdata + h_start + v_start * tmpstride;
299 for (i = 0; i < (v_end - v_start); ++i) {
300 for (j = 0; j < (h_end - h_start); ++j) {
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700301 *tmpdata_p++ =
302 hor_sym_filter(data_p++, rst->rsi->wiener_info[tile_idx].hfilter);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700303 }
304 data_p += stride - (h_end - h_start);
305 tmpdata_p += tmpstride - (h_end - h_start);
306 }
307 // Filter col-wise
308 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
309 tile_width, tile_height, width, height, 0, 1,
310 &h_start, &h_end, &v_start, &v_end);
311 data_p = data + h_start + v_start * stride;
312 tmpdata_p = tmpdata + h_start + v_start * tmpstride;
313 for (i = 0; i < (v_end - v_start); ++i) {
314 for (j = 0; j < (h_end - h_start); ++j) {
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700315 *data_p++ = ver_sym_filter(tmpdata_p++, tmpstride,
316 rst->rsi->wiener_info[tile_idx].vfilter);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700317 }
318 data_p += stride - (h_end - h_start);
319 tmpdata_p += tmpstride - (h_end - h_start);
320 }
321}
322
Yaowu Xuc27fc142016-08-22 16:08:15 -0700323static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
324 RestorationInternal *rst, uint8_t *tmpdata,
325 int tmpstride) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700326 int tile_idx;
327 int i;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700328 uint8_t *data_p, *tmpdata_p;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700329 // Initialize tmp buffer
330 data_p = data;
331 tmpdata_p = tmpdata;
332 for (i = 0; i < height; ++i) {
333 memcpy(tmpdata_p, data_p, sizeof(*data_p) * width);
334 data_p += stride;
335 tmpdata_p += tmpstride;
336 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700337 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700338 loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, tmpdata,
339 tmpstride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700340 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700341}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700342
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700343static void boxsum(int64_t *src, int width, int height, int src_stride, int r,
344 int sqr, int64_t *dst, int dst_stride, int64_t *tmp,
345 int tmp_stride) {
346 int i, j;
347
348 if (sqr) {
349 for (j = 0; j < width; ++j) tmp[j] = src[j] * src[j];
350 for (j = 0; j < width; ++j)
351 for (i = 1; i < height; ++i)
352 tmp[i * tmp_stride + j] =
353 tmp[(i - 1) * tmp_stride + j] +
354 src[i * src_stride + j] * src[i * src_stride + j];
355 } else {
356 memcpy(tmp, src, sizeof(*tmp) * width);
357 for (j = 0; j < width; ++j)
358 for (i = 1; i < height; ++i)
359 tmp[i * tmp_stride + j] =
360 tmp[(i - 1) * tmp_stride + j] + src[i * src_stride + j];
361 }
362 for (i = 0; i <= r; ++i)
363 memcpy(&dst[i * dst_stride], &tmp[(i + r) * tmp_stride],
364 sizeof(*tmp) * width);
365 for (i = r + 1; i < height - r; ++i)
366 for (j = 0; j < width; ++j)
367 dst[i * dst_stride + j] =
368 tmp[(i + r) * tmp_stride + j] - tmp[(i - r - 1) * tmp_stride + j];
369 for (i = height - r; i < height; ++i)
370 for (j = 0; j < width; ++j)
371 dst[i * dst_stride + j] = tmp[(height - 1) * tmp_stride + j] -
372 tmp[(i - r - 1) * tmp_stride + j];
373
374 for (i = 0; i < height; ++i) tmp[i * tmp_stride] = dst[i * dst_stride];
375 for (i = 0; i < height; ++i)
376 for (j = 1; j < width; ++j)
377 tmp[i * tmp_stride + j] =
378 tmp[i * tmp_stride + j - 1] + dst[i * src_stride + j];
379
380 for (j = 0; j <= r; ++j)
381 for (i = 0; i < height; ++i)
382 dst[i * dst_stride + j] = tmp[i * tmp_stride + j + r];
383 for (j = r + 1; j < width - r; ++j)
384 for (i = 0; i < height; ++i)
385 dst[i * dst_stride + j] =
386 tmp[i * tmp_stride + j + r] - tmp[i * tmp_stride + j - r - 1];
387 for (j = width - r; j < width; ++j)
388 for (i = 0; i < height; ++i)
389 dst[i * dst_stride + j] =
390 tmp[i * tmp_stride + width - 1] - tmp[i * tmp_stride + j - r - 1];
391}
392
393static void boxnum(int width, int height, int r, int8_t *num, int num_stride) {
394 int i, j;
395 for (i = 0; i <= r; ++i) {
396 for (j = 0; j <= r; ++j) {
397 num[i * num_stride + j] = (r + 1 + i) * (r + 1 + j);
398 num[i * num_stride + (width - 1 - j)] = num[i * num_stride + j];
399 num[(height - 1 - i) * num_stride + j] = num[i * num_stride + j];
400 num[(height - 1 - i) * num_stride + (width - 1 - j)] =
401 num[i * num_stride + j];
402 }
403 }
404 for (j = 0; j <= r; ++j) {
405 const int val = (2 * r + 1) * (r + 1 + j);
406 for (i = r + 1; i < height - r; ++i) {
407 num[i * num_stride + j] = val;
408 num[i * num_stride + (width - 1 - j)] = val;
409 }
410 }
411 for (i = 0; i <= r; ++i) {
412 const int val = (2 * r + 1) * (r + 1 + i);
413 for (j = r + 1; j < width - r; ++j) {
414 num[i * num_stride + j] = val;
415 num[(height - 1 - i) * num_stride + j] = val;
416 }
417 }
418 for (i = r + 1; i < height - r; ++i) {
419 for (j = r + 1; j < width - r; ++j) {
420 num[i * num_stride + j] = (2 * r + 1) * (2 * r + 1);
421 }
422 }
423}
424
425void decode_xq(int *xqd, int *xq) {
426 xq[0] = -xqd[0];
427 xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
428}
429
430#define APPROXIMATE_SGR 1
431void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
432 int bit_depth, int r, int eps, void *tmpbuf) {
433 int64_t *A = (int64_t *)tmpbuf;
434 int64_t *B = A + RESTORATION_TILEPELS_MAX;
435 int64_t *T = B + RESTORATION_TILEPELS_MAX;
436 int8_t num[RESTORATION_TILEPELS_MAX];
437 int i, j;
438 eps <<= 2 * (bit_depth - 8);
439
440 boxsum(dgd, width, height, stride, r, 0, B, width, T, width);
441 boxsum(dgd, width, height, stride, r, 1, A, width, T, width);
442 boxnum(width, height, r, num, width);
443 for (i = 0; i < height; ++i) {
444 for (j = 0; j < width; ++j) {
445 const int k = i * width + j;
446 const int n = num[k];
447 int64_t den;
448 A[k] = A[k] * n - B[k] * B[k];
449 den = A[k] + n * n * eps;
450 A[k] = ((A[k] << SGRPROJ_SGR_BITS) + (den >> 1)) / den;
451 B[k] = ((SGRPROJ_SGR - A[k]) * B[k] + (n >> 1)) / n;
452 }
453 }
454#if APPROXIMATE_SGR
455 i = 0;
456 j = 0;
457 {
458 const int k = i * width + j;
459 const int l = i * stride + j;
460 const int nb = 3;
461 const int64_t a =
462 3 * A[k] + 2 * A[k + 1] + 2 * A[k + width] + A[k + width + 1];
463 const int64_t b =
464 3 * B[k] + 2 * B[k + 1] + 2 * B[k + width] + B[k + width + 1];
465 const int64_t v =
466 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
467 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
468 }
469 i = 0;
470 j = width - 1;
471 {
472 const int k = i * width + j;
473 const int l = i * stride + j;
474 const int nb = 3;
475 const int64_t a =
476 3 * A[k] + 2 * A[k - 1] + 2 * A[k + width] + A[k + width - 1];
477 const int64_t b =
478 3 * B[k] + 2 * B[k - 1] + 2 * B[k + width] + B[k + width - 1];
479 const int64_t v =
480 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
481 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
482 }
483 i = height - 1;
484 j = 0;
485 {
486 const int k = i * width + j;
487 const int l = i * stride + j;
488 const int nb = 3;
489 const int64_t a =
490 3 * A[k] + 2 * A[k + 1] + 2 * A[k - width] + A[k - width + 1];
491 const int64_t b =
492 3 * B[k] + 2 * B[k + 1] + 2 * B[k - width] + B[k - width + 1];
493 const int64_t v =
494 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
495 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
496 }
497 i = height - 1;
498 j = width - 1;
499 {
500 const int k = i * width + j;
501 const int l = i * stride + j;
502 const int nb = 3;
503 const int64_t a =
504 3 * A[k] + 2 * A[k - 1] + 2 * A[k - width] + A[k - width - 1];
505 const int64_t b =
506 3 * B[k] + 2 * B[k - 1] + 2 * B[k - width] + B[k - width - 1];
507 const int64_t v =
508 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
509 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
510 }
511 i = 0;
512 for (j = 1; j < width - 1; ++j) {
513 const int k = i * width + j;
514 const int l = i * stride + j;
515 const int nb = 3;
516 const int64_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + width] +
517 A[k + width - 1] + A[k + width + 1];
518 const int64_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + width] +
519 B[k + width - 1] + B[k + width + 1];
520 const int64_t v =
521 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
522 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
523 }
524 i = height - 1;
525 for (j = 1; j < width - 1; ++j) {
526 const int k = i * width + j;
527 const int l = i * stride + j;
528 const int nb = 3;
529 const int64_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - width] +
530 A[k - width - 1] + A[k - width + 1];
531 const int64_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - width] +
532 B[k - width - 1] + B[k - width + 1];
533 const int64_t v =
534 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
535 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
536 }
537 j = 0;
538 for (i = 1; i < height - 1; ++i) {
539 const int k = i * width + j;
540 const int l = i * stride + j;
541 const int nb = 3;
542 const int64_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k + 1] +
543 A[k - width + 1] + A[k + width + 1];
544 const int64_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k + 1] +
545 B[k - width + 1] + B[k + width + 1];
546 const int64_t v =
547 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
548 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
549 }
550 j = width - 1;
551 for (i = 1; i < height - 1; ++i) {
552 const int k = i * width + j;
553 const int l = i * stride + j;
554 const int nb = 3;
555 const int64_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k - 1] +
556 A[k - width - 1] + A[k + width - 1];
557 const int64_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k - 1] +
558 B[k - width - 1] + B[k + width - 1];
559 const int64_t v =
560 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
561 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
562 }
563 for (i = 1; i < height - 1; ++i) {
564 for (j = 1; j < width - 1; ++j) {
565 const int k = i * width + j;
566 const int l = i * stride + j;
567 const int nb = 5;
568 const int64_t a =
569 (A[k] + A[k - 1] + A[k + 1] + A[k - width] + A[k + width]) * 4 +
570 (A[k - 1 - width] + A[k - 1 + width] + A[k + 1 - width] +
571 A[k + 1 + width]) *
572 3;
573 const int64_t b =
574 (B[k] + B[k - 1] + B[k + 1] + B[k - width] + B[k + width]) * 4 +
575 (B[k - 1 - width] + B[k - 1 + width] + B[k + 1 - width] +
576 B[k + 1 + width]) *
577 3;
578 const int64_t v =
579 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
580 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
581 }
582 }
583#else
584 if (r > 1) boxnum(width, height, r = 1, num, width);
585 boxsum(A, width, height, width, r, 0, A, width, T, width);
586 boxsum(B, width, height, width, r, 0, B, width, T, width);
587 for (i = 0; i < height; ++i) {
588 for (j = 0; j < width; ++j) {
589 const int k = i * width + j;
590 const int l = i * stride + j;
591 const int n = num[k];
592 const int64_t v =
593 (((A[k] * dgd[l] + B[k]) << SGRPROJ_RST_BITS) + (n >> 1)) / n;
594 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
595 }
596 }
597#endif // APPROXIMATE_SGR
598}
599
600static void apply_selfguided_restoration(int64_t *dat, int width, int height,
601 int stride, int bit_depth, int eps,
602 int *xqd, void *tmpbuf) {
603 int xq[2];
604 int64_t *flt1 = (int64_t *)tmpbuf;
605 int64_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
606 uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
607 int i, j;
608 for (i = 0; i < height; ++i) {
609 for (j = 0; j < width; ++j) {
610 assert(i * width + j < RESTORATION_TILEPELS_MAX);
611 flt1[i * width + j] = dat[i * stride + j];
612 flt2[i * width + j] = dat[i * stride + j];
613 }
614 }
615 av1_selfguided_restoration(flt1, width, height, width, bit_depth,
616 sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2);
617 av1_selfguided_restoration(flt2, width, height, width, bit_depth,
618 sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2);
619 decode_xq(xqd, xq);
620 for (i = 0; i < height; ++i) {
621 for (j = 0; j < width; ++j) {
622 const int k = i * width + j;
623 const int l = i * stride + j;
624 const int64_t u = ((int64_t)dat[l] << SGRPROJ_RST_BITS);
625 const int64_t f1 = (int64_t)flt1[k] - u;
626 const int64_t f2 = (int64_t)flt2[k] - u;
627 const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
628 const int16_t w =
629 (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
630 dat[l] = w;
631 }
632 }
633}
634
635static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width,
636 int height, int stride,
637 RestorationInternal *rst, void *tmpbuf) {
638 const int tile_width = rst->tile_width >> rst->subsampling_x;
639 const int tile_height = rst->tile_height >> rst->subsampling_y;
640 int i, j;
641 int h_start, h_end, v_start, v_end;
642 uint8_t *data_p;
643 int64_t dat[RESTORATION_TILEPELS_MAX];
644
645 if (rst->rsi->sgrproj_info[tile_idx].level == 0) return;
646 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
647 tile_width, tile_height, width, height, 0, 0,
648 &h_start, &h_end, &v_start, &v_end);
649 data_p = data + h_start + v_start * stride;
650 for (i = 0; i < (v_end - v_start); ++i) {
651 for (j = 0; j < (h_end - h_start); ++j) {
652 dat[i * (h_end - h_start) + j] = data_p[i * stride + j];
653 }
654 }
655 apply_selfguided_restoration(dat, h_end - h_start, v_end - v_start,
656 h_end - h_start, 8,
657 rst->rsi->sgrproj_info[tile_idx].ep,
658 rst->rsi->sgrproj_info[tile_idx].xqd, tmpbuf);
659 for (i = 0; i < (v_end - v_start); ++i) {
660 for (j = 0; j < (h_end - h_start); ++j) {
661 data_p[i * stride + j] = clip_pixel(dat[i * (h_end - h_start) + j]);
662 }
663 }
664}
665
666static void loop_sgrproj_filter(uint8_t *data, int width, int height,
667 int stride, RestorationInternal *rst,
668 uint8_t *tmpdata, int tmpstride) {
669 int tile_idx;
670 uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
671 (void)tmpdata;
672 (void)tmpstride;
673 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
674 loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst,
675 tmpbuf);
676 }
677 aom_free(tmpbuf);
678}
679
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700680static void loop_switchable_filter(uint8_t *data, int width, int height,
681 int stride, RestorationInternal *rst,
682 uint8_t *tmpdata, int tmpstride) {
683 int i, tile_idx;
684 uint8_t *data_p, *tmpdata_p;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700685 uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700686
687 // Initialize tmp buffer
688 data_p = data;
689 tmpdata_p = tmpdata;
690 for (i = 0; i < height; ++i) {
691 memcpy(tmpdata_p, data_p, sizeof(*data_p) * width);
692 data_p += stride;
693 tmpdata_p += tmpstride;
694 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700695 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700696 if (rst->rsi->restoration_type[tile_idx] == RESTORE_BILATERAL) {
697 loop_bilateral_filter_tile(data, tile_idx, width, height, stride, rst,
698 tmpdata, tmpstride);
699 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
700 loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst,
701 tmpdata, tmpstride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700702 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) {
703 loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst,
704 tmpbuf);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700705 }
706 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700707 aom_free(tmpbuf);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700708}
709
Yaowu Xuf883b422016-08-30 14:01:10 -0700710#if CONFIG_AOM_HIGHBITDEPTH
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700711static void loop_bilateral_filter_tile_highbd(uint16_t *data, int tile_idx,
712 int width, int height, int stride,
713 RestorationInternal *rst,
714 uint16_t *tmpdata, int tmpstride,
715 int bit_depth) {
716 const int tile_width = rst->tile_width >> rst->subsampling_x;
717 const int tile_height = rst->tile_height >> rst->subsampling_y;
718 int i, j, subtile_idx;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700719 int h_start, h_end, v_start, v_end;
Debargha Mukherjeec57924c2016-11-03 15:14:27 -0700720 const int shift = bit_depth - 8;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700721
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700722 for (subtile_idx = 0; subtile_idx < BILATERAL_SUBTILES; ++subtile_idx) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700723 uint16_t *data_p, *tmpdata_p;
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700724 const int level = rst->rsi->bilateral_info[tile_idx].level[subtile_idx];
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700725 uint8_t(*wx_lut)[RESTORATION_WIN];
726 uint8_t *wr_lut_;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700727
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700728 if (level < 0) continue;
729 wr_lut_ = (rst->keyframe ? bilateral_filter_coeffs_r_kf[level]
730 : bilateral_filter_coeffs_r[level]) +
731 BILATERAL_AMP_RANGE;
732 wx_lut = rst->keyframe ? bilateral_filter_coeffs_s_kf[level]
733 : bilateral_filter_coeffs_s[level];
734 av1_get_rest_tile_limits(tile_idx, subtile_idx, BILATERAL_SUBTILE_BITS,
735 rst->nhtiles, rst->nvtiles, tile_width,
736 tile_height, width, height, 1, 1, &h_start, &h_end,
737 &v_start, &v_end);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700738
739 data_p = data + h_start + v_start * stride;
740 tmpdata_p = tmpdata + h_start + v_start * tmpstride;
741
742 for (i = 0; i < (v_end - v_start); ++i) {
743 for (j = 0; j < (h_end - h_start); ++j) {
Debargha Mukherjee1a16a982016-10-04 11:50:00 -0700744 int x, y, wt;
745 int64_t flsum = 0, wtsum = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700746 uint16_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
747 for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
748 for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700749 wt = (int)wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] *
Debargha Mukherjeec57924c2016-11-03 15:14:27 -0700750 (int)wr_lut_[(data_p2[x] >> shift) - (data_p[j] >> shift)];
Debargha Mukherjee1a16a982016-10-04 11:50:00 -0700751 wtsum += (int64_t)wt;
752 flsum += (int64_t)wt * data_p2[x];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700753 }
754 data_p2 += stride;
755 }
756 if (wtsum > 0)
757 tmpdata_p[j] =
758 clip_pixel_highbd((int)((flsum + wtsum / 2) / wtsum), bit_depth);
759 else
760 tmpdata_p[j] = data_p[j];
761 }
762 tmpdata_p += tmpstride;
763 data_p += stride;
764 }
765 for (i = v_start; i < v_end; ++i) {
766 memcpy(data + i * stride + h_start, tmpdata + i * tmpstride + h_start,
767 (h_end - h_start) * sizeof(*data));
768 }
769 }
770}
771
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700772static void loop_bilateral_filter_highbd(uint8_t *data8, int width, int height,
773 int stride, RestorationInternal *rst,
774 uint8_t *tmpdata8, int tmpstride,
775 int bit_depth) {
776 int tile_idx;
777 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
778 uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
779
780 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
781 loop_bilateral_filter_tile_highbd(data, tile_idx, width, height, stride,
782 rst, tmpdata, tmpstride, bit_depth);
783 }
784}
785
Yaowu Xuc27fc142016-08-22 16:08:15 -0700786uint16_t hor_sym_filter_highbd(uint16_t *d, int *hfilter, int bd) {
787 int32_t s =
788 (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * hfilter[RESTORATION_HALFWIN];
789 int i;
790 for (i = 1; i <= RESTORATION_HALFWIN; ++i)
791 s += (d[i] + d[-i]) * hfilter[RESTORATION_HALFWIN + i];
792 return clip_pixel_highbd(s >> RESTORATION_FILT_BITS, bd);
793}
794
795uint16_t ver_sym_filter_highbd(uint16_t *d, int stride, int *vfilter, int bd) {
796 int32_t s =
797 (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * vfilter[RESTORATION_HALFWIN];
798 int i;
799 for (i = 1; i <= RESTORATION_HALFWIN; ++i)
800 s += (d[i * stride] + d[-i * stride]) * vfilter[RESTORATION_HALFWIN + i];
801 return clip_pixel_highbd(s >> RESTORATION_FILT_BITS, bd);
802}
803
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700804static void loop_wiener_filter_tile_highbd(uint16_t *data, int tile_idx,
805 int width, int height, int stride,
806 RestorationInternal *rst,
807 uint16_t *tmpdata, int tmpstride,
808 int bit_depth) {
809 const int tile_width = rst->tile_width >> rst->subsampling_x;
810 const int tile_height = rst->tile_height >> rst->subsampling_y;
811 int h_start, h_end, v_start, v_end;
812 int i, j;
813 uint16_t *data_p, *tmpdata_p;
814
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700815 if (rst->rsi->wiener_info[tile_idx].level == 0) return;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700816 // Filter row-wise
817 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
818 tile_width, tile_height, width, height, 1, 0,
819 &h_start, &h_end, &v_start, &v_end);
820 data_p = data + h_start + v_start * stride;
821 tmpdata_p = tmpdata + h_start + v_start * tmpstride;
822 for (i = 0; i < (v_end - v_start); ++i) {
823 for (j = 0; j < (h_end - h_start); ++j) {
824 *tmpdata_p++ = hor_sym_filter_highbd(
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700825 data_p++, rst->rsi->wiener_info[tile_idx].hfilter, bit_depth);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700826 }
827 data_p += stride - (h_end - h_start);
828 tmpdata_p += tmpstride - (h_end - h_start);
829 }
830 // Filter col-wise
831 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
832 tile_width, tile_height, width, height, 0, 1,
833 &h_start, &h_end, &v_start, &v_end);
834 data_p = data + h_start + v_start * stride;
835 tmpdata_p = tmpdata + h_start + v_start * tmpstride;
836 for (i = 0; i < (v_end - v_start); ++i) {
837 for (j = 0; j < (h_end - h_start); ++j) {
838 *data_p++ = ver_sym_filter_highbd(tmpdata_p++, tmpstride,
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700839 rst->rsi->wiener_info[tile_idx].vfilter,
840 bit_depth);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700841 }
842 data_p += stride - (h_end - h_start);
843 tmpdata_p += tmpstride - (h_end - h_start);
844 }
845}
846
Yaowu Xuc27fc142016-08-22 16:08:15 -0700847static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
848 int stride, RestorationInternal *rst,
849 uint8_t *tmpdata8, int tmpstride,
850 int bit_depth) {
851 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
852 uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700853 int tile_idx;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700854 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700855 loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
856 tmpdata, tmpstride, bit_depth);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700857 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700858}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700859
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700860static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
861 int width, int height, int stride,
862 RestorationInternal *rst,
863 int bit_depth, void *tmpbuf) {
864 const int tile_width = rst->tile_width >> rst->subsampling_x;
865 const int tile_height = rst->tile_height >> rst->subsampling_y;
866 int i, j;
867 int h_start, h_end, v_start, v_end;
868 uint16_t *data_p;
869 int64_t dat[RESTORATION_TILEPELS_MAX];
870
871 if (rst->rsi->sgrproj_info[tile_idx].level == 0) return;
872 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
873 tile_width, tile_height, width, height, 0, 0,
874 &h_start, &h_end, &v_start, &v_end);
875 data_p = data + h_start + v_start * stride;
876 for (i = 0; i < (v_end - v_start); ++i) {
877 for (j = 0; j < (h_end - h_start); ++j) {
878 dat[i * (h_end - h_start) + j] = data_p[i * stride + j];
879 }
880 }
881 apply_selfguided_restoration(dat, h_end - h_start, v_end - v_start,
882 h_end - h_start, bit_depth,
883 rst->rsi->sgrproj_info[tile_idx].ep,
884 rst->rsi->sgrproj_info[tile_idx].xqd, tmpbuf);
885 for (i = 0; i < (v_end - v_start); ++i) {
886 for (j = 0; j < (h_end - h_start); ++j) {
887 data_p[i * stride + j] =
888 clip_pixel_highbd(dat[i * (h_end - h_start) + j], bit_depth);
889 }
890 }
891}
892
893static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
894 int stride, RestorationInternal *rst,
895 uint8_t *tmpdata8, int tmpstride,
896 int bit_depth) {
897 int tile_idx;
898 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
899 uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
900 (void)tmpdata8;
901 (void)tmpstride;
902 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
903 loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
904 bit_depth, tmpbuf);
905 }
906 aom_free(tmpbuf);
907}
908
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700909static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
910 int stride, RestorationInternal *rst,
911 uint8_t *tmpdata8, int tmpstride,
912 int bit_depth) {
913 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
914 uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700915 uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700916 int i, tile_idx;
917 uint16_t *data_p, *tmpdata_p;
918
919 // Initialize tmp buffer
920 data_p = data;
921 tmpdata_p = tmpdata;
922 for (i = 0; i < height; ++i) {
923 memcpy(tmpdata_p, data_p, sizeof(*data_p) * width);
924 data_p += stride;
925 tmpdata_p += tmpstride;
926 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700927 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700928 if (rst->rsi->restoration_type[tile_idx] == RESTORE_BILATERAL) {
929 loop_bilateral_filter_tile_highbd(data, tile_idx, width, height, stride,
930 rst, tmpdata, tmpstride, bit_depth);
931 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
932 loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
933 tmpdata, tmpstride, bit_depth);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700934 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) {
935 loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride,
936 rst, bit_depth, tmpbuf);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700937 }
938 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700939 aom_free(tmpbuf);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700940}
Yaowu Xuf883b422016-08-30 14:01:10 -0700941#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700942
Yaowu Xuf883b422016-08-30 14:01:10 -0700943void av1_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
944 int start_mi_row, int end_mi_row, int y_only) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700945 const int ywidth = frame->y_crop_width;
946 const int ystride = frame->y_stride;
947 const int uvwidth = frame->uv_crop_width;
948 const int uvstride = frame->uv_stride;
949 const int ystart = start_mi_row << MI_SIZE_LOG2;
950 const int uvstart = ystart >> cm->subsampling_y;
951 int yend = end_mi_row << MI_SIZE_LOG2;
952 int uvend = yend >> cm->subsampling_y;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700953 restore_func_type restore_funcs[RESTORE_TYPES] = { NULL, loop_sgrproj_filter,
954 loop_bilateral_filter,
955 loop_wiener_filter,
956 loop_switchable_filter };
957#if CONFIG_AOM_HIGHBITDEPTH
958 restore_func_highbd_type restore_funcs_highbd[RESTORE_TYPES] = {
959 NULL, loop_sgrproj_filter_highbd, loop_bilateral_filter_highbd,
960 loop_wiener_filter_highbd, loop_switchable_filter_highbd
961 };
962#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700963 restore_func_type restore_func =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700964 restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
Yaowu Xuf883b422016-08-30 14:01:10 -0700965#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700966 restore_func_highbd_type restore_func_highbd =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700967 restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
Yaowu Xuf883b422016-08-30 14:01:10 -0700968#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700969 YV12_BUFFER_CONFIG tmp_buf;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700970
971 if (cm->rst_internal.rsi->frame_restoration_type == RESTORE_NONE) return;
972
Yaowu Xuc27fc142016-08-22 16:08:15 -0700973 memset(&tmp_buf, 0, sizeof(YV12_BUFFER_CONFIG));
974
Yaowu Xuf883b422016-08-30 14:01:10 -0700975 yend = AOMMIN(yend, cm->height);
976 uvend = AOMMIN(uvend, cm->subsampling_y ? (cm->height + 1) >> 1 : cm->height);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700977
Yaowu Xuf883b422016-08-30 14:01:10 -0700978 if (aom_realloc_frame_buffer(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700979 &tmp_buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y,
Yaowu Xuf883b422016-08-30 14:01:10 -0700980#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700981 cm->use_highbitdepth,
982#endif
Yaowu Xu671f2bd2016-09-30 15:07:57 -0700983 AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL) < 0)
Yaowu Xuf883b422016-08-30 14:01:10 -0700984 aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700985 "Failed to allocate tmp restoration buffer");
986
Yaowu Xuf883b422016-08-30 14:01:10 -0700987#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700988 if (cm->use_highbitdepth)
989 restore_func_highbd(frame->y_buffer + ystart * ystride, ywidth,
990 yend - ystart, ystride, &cm->rst_internal,
991 tmp_buf.y_buffer + ystart * tmp_buf.y_stride,
992 tmp_buf.y_stride, cm->bit_depth);
993 else
Yaowu Xuf883b422016-08-30 14:01:10 -0700994#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700995 restore_func(frame->y_buffer + ystart * ystride, ywidth, yend - ystart,
996 ystride, &cm->rst_internal,
997 tmp_buf.y_buffer + ystart * tmp_buf.y_stride,
998 tmp_buf.y_stride);
999 if (!y_only) {
1000 cm->rst_internal.subsampling_x = cm->subsampling_x;
1001 cm->rst_internal.subsampling_y = cm->subsampling_y;
Yaowu Xuf883b422016-08-30 14:01:10 -07001002#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001003 if (cm->use_highbitdepth) {
1004 restore_func_highbd(frame->u_buffer + uvstart * uvstride, uvwidth,
1005 uvend - uvstart, uvstride, &cm->rst_internal,
1006 tmp_buf.u_buffer + uvstart * tmp_buf.uv_stride,
1007 tmp_buf.uv_stride, cm->bit_depth);
1008 restore_func_highbd(frame->v_buffer + uvstart * uvstride, uvwidth,
1009 uvend - uvstart, uvstride, &cm->rst_internal,
1010 tmp_buf.v_buffer + uvstart * tmp_buf.uv_stride,
1011 tmp_buf.uv_stride, cm->bit_depth);
1012 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001013#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001014 restore_func(frame->u_buffer + uvstart * uvstride, uvwidth,
1015 uvend - uvstart, uvstride, &cm->rst_internal,
1016 tmp_buf.u_buffer + uvstart * tmp_buf.uv_stride,
1017 tmp_buf.uv_stride);
1018 restore_func(frame->v_buffer + uvstart * uvstride, uvwidth,
1019 uvend - uvstart, uvstride, &cm->rst_internal,
1020 tmp_buf.v_buffer + uvstart * tmp_buf.uv_stride,
1021 tmp_buf.uv_stride);
Yaowu Xuf883b422016-08-30 14:01:10 -07001022#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001023 }
Yaowu Xuf883b422016-08-30 14:01:10 -07001024#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001025 }
Yaowu Xuf883b422016-08-30 14:01:10 -07001026 aom_free_frame_buffer(&tmp_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001027}
1028
Yaowu Xuf883b422016-08-30 14:01:10 -07001029void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
1030 RestorationInfo *rsi, int y_only,
1031 int partial_frame) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001032 int start_mi_row, end_mi_row, mi_rows_to_filter;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001033 if (rsi->frame_restoration_type != RESTORE_NONE) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001034 start_mi_row = 0;
1035 mi_rows_to_filter = cm->mi_rows;
1036 if (partial_frame && cm->mi_rows > 8) {
1037 start_mi_row = cm->mi_rows >> 1;
1038 start_mi_row &= 0xfffffff8;
Yaowu Xuf883b422016-08-30 14:01:10 -07001039 mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001040 }
1041 end_mi_row = start_mi_row + mi_rows_to_filter;
Yaowu Xuf883b422016-08-30 14:01:10 -07001042 av1_loop_restoration_init(&cm->rst_internal, rsi,
1043 cm->frame_type == KEY_FRAME, cm->width,
1044 cm->height);
1045 av1_loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, y_only);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001046 }
1047}