blob: 6573380dbb58c6d8e399d20ef578575a02d9edb2 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 *
Yaowu Xuc27fc142016-08-22 16:08:15 -070011 */
12
13#include <math.h>
14
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_config.h"
16#include "./aom_dsp_rtcd.h"
David Barker9666e752016-12-08 11:25:47 +000017#include "./aom_scale_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070018#include "av1/common/onyxc_int.h"
19#include "av1/common/restoration.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070020#include "aom_dsp/aom_dsp_common.h"
21#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070022#include "aom_ports/mem.h"
23
Debargha Mukherjee3981be92016-11-21 09:35:44 -080024static int domaintxfmrf_vtable[DOMAINTXFMRF_ITERS][DOMAINTXFMRF_PARAMS][256];
25
Debargha Mukherjee818e42a2016-12-12 11:52:56 -080026// Whether to filter only y or not
27static const int override_y_only[RESTORE_TYPES] = { 1, 1, 1, 1, 1 };
28
Debargha Mukherjee3981be92016-11-21 09:35:44 -080029static const int domaintxfmrf_params[DOMAINTXFMRF_PARAMS] = {
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -080030 32, 40, 48, 56, 64, 68, 72, 76, 80, 82, 84, 86, 88,
Debargha Mukherjee3981be92016-11-21 09:35:44 -080031 90, 92, 94, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
32 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
33 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130, 132, 134,
34 136, 138, 140, 142, 146, 150, 154, 158, 162, 166, 170, 174
35};
36
Debargha Mukherjee8f209a82016-10-12 10:47:01 -070037const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
38 // r1, eps1, r2, eps2
39 { 2, 27, 1, 11 }, { 2, 31, 1, 12 }, { 2, 37, 1, 12 }, { 2, 44, 1, 12 },
40 { 2, 49, 1, 13 }, { 2, 54, 1, 14 }, { 2, 60, 1, 15 }, { 2, 68, 1, 15 },
41};
42
Yaowu Xuc27fc142016-08-22 16:08:15 -070043typedef void (*restore_func_type)(uint8_t *data8, int width, int height,
44 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +000045 uint8_t *dst8, int dst_stride);
Yaowu Xuf883b422016-08-30 14:01:10 -070046#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070047typedef void (*restore_func_highbd_type)(uint8_t *data8, int width, int height,
48 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +000049 int bit_depth, uint8_t *dst8,
50 int dst_stride);
Yaowu Xuf883b422016-08-30 14:01:10 -070051#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070052
Debargha Mukherjee874d36d2016-12-14 16:53:17 -080053int av1_alloc_restoration_struct(RestorationInfo *rst_info, int width,
54 int height) {
55 const int ntiles = av1_get_rest_ntiles(width, height, NULL, NULL, NULL, NULL);
56 rst_info->restoration_type = (RestorationType *)aom_realloc(
57 rst_info->restoration_type, sizeof(*rst_info->restoration_type) * ntiles);
58 rst_info->wiener_info = (WienerInfo *)aom_realloc(
59 rst_info->wiener_info, sizeof(*rst_info->wiener_info) * ntiles);
60 assert(rst_info->wiener_info != NULL);
61 rst_info->sgrproj_info = (SgrprojInfo *)aom_realloc(
62 rst_info->sgrproj_info, sizeof(*rst_info->sgrproj_info) * ntiles);
63 assert(rst_info->sgrproj_info != NULL);
64 rst_info->domaintxfmrf_info = (DomaintxfmrfInfo *)aom_realloc(
65 rst_info->domaintxfmrf_info,
66 sizeof(*rst_info->domaintxfmrf_info) * ntiles);
67 assert(rst_info->domaintxfmrf_info != NULL);
68 return ntiles;
69}
70
71void av1_free_restoration_struct(RestorationInfo *rst_info) {
72 aom_free(rst_info->restoration_type);
73 rst_info->restoration_type = NULL;
74 aom_free(rst_info->wiener_info);
75 rst_info->wiener_info = NULL;
76 aom_free(rst_info->sgrproj_info);
77 rst_info->sgrproj_info = NULL;
78 aom_free(rst_info->domaintxfmrf_info);
79 rst_info->domaintxfmrf_info = NULL;
80}
81
Debargha Mukherjee3981be92016-11-21 09:35:44 -080082static void GenDomainTxfmRFVtable() {
83 int i, j;
84 const double sigma_s = sqrt(2.0);
85 for (i = 0; i < DOMAINTXFMRF_ITERS; ++i) {
86 const int nm = (1 << (DOMAINTXFMRF_ITERS - i - 1));
87 const double A = exp(-DOMAINTXFMRF_MULT / (sigma_s * nm));
88 for (j = 0; j < DOMAINTXFMRF_PARAMS; ++j) {
89 const double sigma_r =
90 (double)domaintxfmrf_params[j] / DOMAINTXFMRF_SIGMA_SCALE;
91 const double scale = sigma_s / sigma_r;
92 int k;
93 for (k = 0; k < 256; ++k) {
94 domaintxfmrf_vtable[i][j][k] =
95 RINT(DOMAINTXFMRF_VTABLE_PREC * pow(A, 1.0 + k * scale));
96 }
97 }
98 }
99}
100
Debargha Mukherjee0e67b252016-12-08 09:22:44 -0800101void av1_loop_restoration_precal() { GenDomainTxfmRFVtable(); }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700102
Yaowu Xuf883b422016-08-30 14:01:10 -0700103void av1_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
104 int kf, int width, int height) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700105 int i, tile_idx;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700106 rst->rsi = rsi;
107 rst->keyframe = kf;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700108 rst->subsampling_x = 0;
109 rst->subsampling_y = 0;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700110 rst->ntiles =
clang-formatbda8d612016-09-19 15:55:46 -0700111 av1_get_rest_ntiles(width, height, &rst->tile_width, &rst->tile_height,
112 &rst->nhtiles, &rst->nvtiles);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700113 if (rsi->frame_restoration_type == RESTORE_WIENER) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700114 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800115 if (rsi->wiener_info[tile_idx].level) {
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800116 rsi->wiener_info[tile_idx].vfilter[WIENER_HALFWIN] =
117 rsi->wiener_info[tile_idx].hfilter[WIENER_HALFWIN] =
118 WIENER_FILT_STEP;
119 for (i = 0; i < WIENER_HALFWIN; ++i) {
120 rsi->wiener_info[tile_idx].vfilter[WIENER_WIN - 1 - i] =
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800121 rsi->wiener_info[tile_idx].vfilter[i];
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800122 rsi->wiener_info[tile_idx].hfilter[WIENER_WIN - 1 - i] =
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800123 rsi->wiener_info[tile_idx].hfilter[i];
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800124 rsi->wiener_info[tile_idx].vfilter[WIENER_HALFWIN] -=
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800125 2 * rsi->wiener_info[tile_idx].vfilter[i];
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800126 rsi->wiener_info[tile_idx].hfilter[WIENER_HALFWIN] -=
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800127 2 * rsi->wiener_info[tile_idx].hfilter[i];
128 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700129 }
130 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700131 } else if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700132 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700133 if (rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800134 rsi->wiener_info[tile_idx].vfilter[WIENER_HALFWIN] =
135 rsi->wiener_info[tile_idx].hfilter[WIENER_HALFWIN] =
136 WIENER_FILT_STEP;
137 for (i = 0; i < WIENER_HALFWIN; ++i) {
138 rsi->wiener_info[tile_idx].vfilter[WIENER_WIN - 1 - i] =
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700139 rsi->wiener_info[tile_idx].vfilter[i];
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800140 rsi->wiener_info[tile_idx].hfilter[WIENER_WIN - 1 - i] =
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700141 rsi->wiener_info[tile_idx].hfilter[i];
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800142 rsi->wiener_info[tile_idx].vfilter[WIENER_HALFWIN] -=
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700143 2 * rsi->wiener_info[tile_idx].vfilter[i];
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800144 rsi->wiener_info[tile_idx].hfilter[WIENER_HALFWIN] -=
Debargha Mukherjee5d89a632016-09-17 13:16:58 -0700145 2 * rsi->wiener_info[tile_idx].hfilter[i];
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700146 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700147 }
148 }
149 }
150}
151
David Barker025b2542016-12-08 11:50:42 +0000152static void extend_frame(uint8_t *data, int width, int height, int stride) {
153 uint8_t *data_p;
154 int i;
155 for (i = 0; i < height; ++i) {
156 data_p = data + i * stride;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800157 memset(data_p - WIENER_HALFWIN, data_p[0], WIENER_HALFWIN);
158 memset(data_p + width, data_p[width - 1], WIENER_HALFWIN);
David Barker025b2542016-12-08 11:50:42 +0000159 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800160 data_p = data - WIENER_HALFWIN;
161 for (i = -WIENER_HALFWIN; i < 0; ++i) {
162 memcpy(data_p + i * stride, data_p, width + 2 * WIENER_HALFWIN);
David Barker025b2542016-12-08 11:50:42 +0000163 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800164 for (i = height; i < height + WIENER_HALFWIN; ++i) {
David Barker025b2542016-12-08 11:50:42 +0000165 memcpy(data_p + i * stride, data_p + (height - 1) * stride,
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800166 width + 2 * WIENER_HALFWIN);
David Barker025b2542016-12-08 11:50:42 +0000167 }
168}
169
David Barker9666e752016-12-08 11:25:47 +0000170static void loop_copy_tile(uint8_t *data, int tile_idx, int subtile_idx,
171 int subtile_bits, int width, int height, int stride,
172 RestorationInternal *rst, uint8_t *dst,
173 int dst_stride) {
174 const int tile_width = rst->tile_width >> rst->subsampling_x;
175 const int tile_height = rst->tile_height >> rst->subsampling_y;
176 int i;
177 int h_start, h_end, v_start, v_end;
178 av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, rst->nhtiles,
179 rst->nvtiles, tile_width, tile_height, width, height,
180 0, 0, &h_start, &h_end, &v_start, &v_end);
181 for (i = v_start; i < v_end; ++i)
182 memcpy(dst + i * dst_stride + h_start, data + i * stride + h_start,
183 h_end - h_start);
184}
185
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700186static void loop_wiener_filter_tile(uint8_t *data, int tile_idx, int width,
187 int height, int stride,
David Barker025b2542016-12-08 11:50:42 +0000188 RestorationInternal *rst, uint8_t *dst,
David Barker9666e752016-12-08 11:25:47 +0000189 int dst_stride) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700190 const int tile_width = rst->tile_width >> rst->subsampling_x;
191 const int tile_height = rst->tile_height >> rst->subsampling_y;
192 int i, j;
193 int h_start, h_end, v_start, v_end;
David Barker025b2542016-12-08 11:50:42 +0000194 DECLARE_ALIGNED(16, InterpKernel, hkernel);
195 DECLARE_ALIGNED(16, InterpKernel, vkernel);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700196
David Barker9666e752016-12-08 11:25:47 +0000197 if (rst->rsi->wiener_info[tile_idx].level == 0) {
198 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
199 dst_stride);
200 return;
201 }
David Barker025b2542016-12-08 11:50:42 +0000202 // TODO(david.barker): Store hfilter/vfilter as an InterpKernel
203 // instead of the current format. Then this can be removed.
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800204 assert(WIENER_WIN == SUBPEL_TAPS - 1);
205 for (i = 0; i < WIENER_WIN; ++i) {
David Barker025b2542016-12-08 11:50:42 +0000206 hkernel[i] = rst->rsi->wiener_info[tile_idx].hfilter[i];
207 vkernel[i] = rst->rsi->wiener_info[tile_idx].vfilter[i];
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700208 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800209 hkernel[WIENER_WIN] = 0;
210 vkernel[WIENER_WIN] = 0;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700211 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
David Barker025b2542016-12-08 11:50:42 +0000212 tile_width, tile_height, width, height, 0, 0,
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700213 &h_start, &h_end, &v_start, &v_end);
David Barker025b2542016-12-08 11:50:42 +0000214 // Convolve the whole tile (done in blocks here to match the requirements
215 // of the vectorized convolve functions, but the result is equivalent)
216 for (i = v_start; i < v_end; i += MAX_SB_SIZE)
217 for (j = h_start; j < h_end; j += MAX_SB_SIZE) {
218 int w = AOMMIN(MAX_SB_SIZE, (h_end - j + 15) & ~15);
219 int h = AOMMIN(MAX_SB_SIZE, (v_end - i + 15) & ~15);
220 const uint8_t *data_p = data + i * stride + j;
221 uint8_t *dst_p = dst + i * dst_stride + j;
222 aom_convolve8(data_p, stride, dst_p, dst_stride, hkernel, 16, vkernel, 16,
223 w, h);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700224 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700225}
226
Yaowu Xuc27fc142016-08-22 16:08:15 -0700227static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
David Barker025b2542016-12-08 11:50:42 +0000228 RestorationInternal *rst, uint8_t *dst,
229 int dst_stride) {
230 int tile_idx;
231 extend_frame(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700232 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
David Barker025b2542016-12-08 11:50:42 +0000233 loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst,
234 dst_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700235 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700236}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700237
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800238static void boxsum(int32_t *src, int width, int height, int src_stride, int r,
239 int sqr, int32_t *dst, int dst_stride, int32_t *tmp,
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700240 int tmp_stride) {
241 int i, j;
242
243 if (sqr) {
244 for (j = 0; j < width; ++j) tmp[j] = src[j] * src[j];
245 for (j = 0; j < width; ++j)
246 for (i = 1; i < height; ++i)
247 tmp[i * tmp_stride + j] =
248 tmp[(i - 1) * tmp_stride + j] +
249 src[i * src_stride + j] * src[i * src_stride + j];
250 } else {
251 memcpy(tmp, src, sizeof(*tmp) * width);
252 for (j = 0; j < width; ++j)
253 for (i = 1; i < height; ++i)
254 tmp[i * tmp_stride + j] =
255 tmp[(i - 1) * tmp_stride + j] + src[i * src_stride + j];
256 }
257 for (i = 0; i <= r; ++i)
258 memcpy(&dst[i * dst_stride], &tmp[(i + r) * tmp_stride],
259 sizeof(*tmp) * width);
260 for (i = r + 1; i < height - r; ++i)
261 for (j = 0; j < width; ++j)
262 dst[i * dst_stride + j] =
263 tmp[(i + r) * tmp_stride + j] - tmp[(i - r - 1) * tmp_stride + j];
264 for (i = height - r; i < height; ++i)
265 for (j = 0; j < width; ++j)
266 dst[i * dst_stride + j] = tmp[(height - 1) * tmp_stride + j] -
267 tmp[(i - r - 1) * tmp_stride + j];
268
269 for (i = 0; i < height; ++i) tmp[i * tmp_stride] = dst[i * dst_stride];
270 for (i = 0; i < height; ++i)
271 for (j = 1; j < width; ++j)
272 tmp[i * tmp_stride + j] =
273 tmp[i * tmp_stride + j - 1] + dst[i * src_stride + j];
274
275 for (j = 0; j <= r; ++j)
276 for (i = 0; i < height; ++i)
277 dst[i * dst_stride + j] = tmp[i * tmp_stride + j + r];
278 for (j = r + 1; j < width - r; ++j)
279 for (i = 0; i < height; ++i)
280 dst[i * dst_stride + j] =
281 tmp[i * tmp_stride + j + r] - tmp[i * tmp_stride + j - r - 1];
282 for (j = width - r; j < width; ++j)
283 for (i = 0; i < height; ++i)
284 dst[i * dst_stride + j] =
285 tmp[i * tmp_stride + width - 1] - tmp[i * tmp_stride + j - r - 1];
286}
287
288static void boxnum(int width, int height, int r, int8_t *num, int num_stride) {
289 int i, j;
Debargha Mukherjee1ee98b62016-12-07 11:24:13 -0800290 for (i = 0; i <= AOMMIN(r, height - 1); ++i) {
291 for (j = 0; j <= AOMMIN(r, width - 1); ++j) {
292 num[i * num_stride + j] =
293 AOMMIN(r + 1 + i, height) * AOMMIN(r + 1 + j, width);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700294 num[i * num_stride + (width - 1 - j)] = num[i * num_stride + j];
295 num[(height - 1 - i) * num_stride + j] = num[i * num_stride + j];
296 num[(height - 1 - i) * num_stride + (width - 1 - j)] =
297 num[i * num_stride + j];
298 }
299 }
Debargha Mukherjee1ee98b62016-12-07 11:24:13 -0800300 for (j = 0; j <= AOMMIN(r, width - 1); ++j) {
301 const int val = AOMMIN(2 * r + 1, height) * AOMMIN(r + 1 + j, width);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700302 for (i = r + 1; i < height - r; ++i) {
303 num[i * num_stride + j] = val;
304 num[i * num_stride + (width - 1 - j)] = val;
305 }
306 }
Debargha Mukherjee1ee98b62016-12-07 11:24:13 -0800307 for (i = 0; i <= AOMMIN(r, height - 1); ++i) {
308 const int val = AOMMIN(2 * r + 1, width) * AOMMIN(r + 1 + i, height);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700309 for (j = r + 1; j < width - r; ++j) {
310 num[i * num_stride + j] = val;
311 num[(height - 1 - i) * num_stride + j] = val;
312 }
313 }
314 for (i = r + 1; i < height - r; ++i) {
315 for (j = r + 1; j < width - r; ++j) {
Debargha Mukherjee1ee98b62016-12-07 11:24:13 -0800316 num[i * num_stride + j] =
317 AOMMIN(2 * r + 1, height) * AOMMIN(2 * r + 1, width);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700318 }
319 }
320}
321
322void decode_xq(int *xqd, int *xq) {
323 xq[0] = -xqd[0];
324 xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
325}
326
327#define APPROXIMATE_SGR 1
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800328void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride,
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700329 int bit_depth, int r, int eps, void *tmpbuf) {
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800330 int32_t *A = (int32_t *)tmpbuf;
331 int32_t *B = A + RESTORATION_TILEPELS_MAX;
332 int32_t *T = B + RESTORATION_TILEPELS_MAX;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700333 int8_t num[RESTORATION_TILEPELS_MAX];
334 int i, j;
335 eps <<= 2 * (bit_depth - 8);
336
337 boxsum(dgd, width, height, stride, r, 0, B, width, T, width);
338 boxsum(dgd, width, height, stride, r, 1, A, width, T, width);
339 boxnum(width, height, r, num, width);
340 for (i = 0; i < height; ++i) {
341 for (j = 0; j < width; ++j) {
342 const int k = i * width + j;
343 const int n = num[k];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800344 const int64_t p = A[k] * n - B[k] * B[k];
345 const int64_t q = p + n * n * eps;
346 A[k] = (int32_t)((p << SGRPROJ_SGR_BITS) + (q >> 1)) / q;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700347 B[k] = ((SGRPROJ_SGR - A[k]) * B[k] + (n >> 1)) / n;
348 }
349 }
350#if APPROXIMATE_SGR
351 i = 0;
352 j = 0;
353 {
354 const int k = i * width + j;
355 const int l = i * stride + j;
356 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800357 const int32_t a =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700358 3 * A[k] + 2 * A[k + 1] + 2 * A[k + width] + A[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800359 const int32_t b =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700360 3 * B[k] + 2 * B[k + 1] + 2 * B[k + width] + B[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800361 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700362 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
363 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
364 }
365 i = 0;
366 j = width - 1;
367 {
368 const int k = i * width + j;
369 const int l = i * stride + j;
370 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800371 const int32_t a =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700372 3 * A[k] + 2 * A[k - 1] + 2 * A[k + width] + A[k + width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800373 const int32_t b =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700374 3 * B[k] + 2 * B[k - 1] + 2 * B[k + width] + B[k + width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800375 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700376 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
377 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
378 }
379 i = height - 1;
380 j = 0;
381 {
382 const int k = i * width + j;
383 const int l = i * stride + j;
384 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800385 const int32_t a =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700386 3 * A[k] + 2 * A[k + 1] + 2 * A[k - width] + A[k - width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800387 const int32_t b =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700388 3 * B[k] + 2 * B[k + 1] + 2 * B[k - width] + B[k - width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800389 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700390 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
391 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
392 }
393 i = height - 1;
394 j = width - 1;
395 {
396 const int k = i * width + j;
397 const int l = i * stride + j;
398 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800399 const int32_t a =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700400 3 * A[k] + 2 * A[k - 1] + 2 * A[k - width] + A[k - width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800401 const int32_t b =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700402 3 * B[k] + 2 * B[k - 1] + 2 * B[k - width] + B[k - width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800403 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700404 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
405 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
406 }
407 i = 0;
408 for (j = 1; j < width - 1; ++j) {
409 const int k = i * width + j;
410 const int l = i * stride + j;
411 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800412 const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + width] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700413 A[k + width - 1] + A[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800414 const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + width] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700415 B[k + width - 1] + B[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800416 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700417 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
418 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
419 }
420 i = height - 1;
421 for (j = 1; j < width - 1; ++j) {
422 const int k = i * width + j;
423 const int l = i * stride + j;
424 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800425 const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - width] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700426 A[k - width - 1] + A[k - width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800427 const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - width] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700428 B[k - width - 1] + B[k - width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800429 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700430 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
431 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
432 }
433 j = 0;
434 for (i = 1; i < height - 1; ++i) {
435 const int k = i * width + j;
436 const int l = i * stride + j;
437 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800438 const int32_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k + 1] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700439 A[k - width + 1] + A[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800440 const int32_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k + 1] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700441 B[k - width + 1] + B[k + width + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800442 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700443 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
444 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
445 }
446 j = width - 1;
447 for (i = 1; i < height - 1; ++i) {
448 const int k = i * width + j;
449 const int l = i * stride + j;
450 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800451 const int32_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k - 1] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700452 A[k - width - 1] + A[k + width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800453 const int32_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k - 1] +
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700454 B[k - width - 1] + B[k + width - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800455 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700456 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
457 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
458 }
459 for (i = 1; i < height - 1; ++i) {
460 for (j = 1; j < width - 1; ++j) {
461 const int k = i * width + j;
462 const int l = i * stride + j;
463 const int nb = 5;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800464 const int32_t a =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700465 (A[k] + A[k - 1] + A[k + 1] + A[k - width] + A[k + width]) * 4 +
466 (A[k - 1 - width] + A[k - 1 + width] + A[k + 1 - width] +
467 A[k + 1 + width]) *
468 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800469 const int32_t b =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700470 (B[k] + B[k - 1] + B[k + 1] + B[k - width] + B[k + width]) * 4 +
471 (B[k - 1 - width] + B[k - 1 + width] + B[k + 1 - width] +
472 B[k + 1 + width]) *
473 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800474 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700475 (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
476 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
477 }
478 }
479#else
480 if (r > 1) boxnum(width, height, r = 1, num, width);
481 boxsum(A, width, height, width, r, 0, A, width, T, width);
482 boxsum(B, width, height, width, r, 0, B, width, T, width);
483 for (i = 0; i < height; ++i) {
484 for (j = 0; j < width; ++j) {
485 const int k = i * width + j;
486 const int l = i * stride + j;
487 const int n = num[k];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800488 const int32_t v =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700489 (((A[k] * dgd[l] + B[k]) << SGRPROJ_RST_BITS) + (n >> 1)) / n;
490 dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
491 }
492 }
493#endif // APPROXIMATE_SGR
494}
495
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800496static void apply_selfguided_restoration(uint8_t *dat, int width, int height,
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700497 int stride, int bit_depth, int eps,
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800498 int *xqd, uint8_t *dst, int dst_stride,
499 void *tmpbuf) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700500 int xq[2];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800501 int32_t *flt1 = (int32_t *)tmpbuf;
502 int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700503 uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
504 int i, j;
505 for (i = 0; i < height; ++i) {
506 for (j = 0; j < width; ++j) {
507 assert(i * width + j < RESTORATION_TILEPELS_MAX);
508 flt1[i * width + j] = dat[i * stride + j];
509 flt2[i * width + j] = dat[i * stride + j];
510 }
511 }
512 av1_selfguided_restoration(flt1, width, height, width, bit_depth,
513 sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2);
514 av1_selfguided_restoration(flt2, width, height, width, bit_depth,
515 sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2);
516 decode_xq(xqd, xq);
517 for (i = 0; i < height; ++i) {
518 for (j = 0; j < width; ++j) {
519 const int k = i * width + j;
520 const int l = i * stride + j;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800521 const int m = i * dst_stride + j;
522 const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
523 const int32_t f1 = (int32_t)flt1[k] - u;
524 const int32_t f2 = (int32_t)flt2[k] - u;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700525 const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
526 const int16_t w =
527 (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800528 dst[m] = clip_pixel(w);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700529 }
530 }
531}
532
533static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width,
534 int height, int stride,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800535 RestorationInternal *rst, uint8_t *dst,
536 int dst_stride) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700537 const int tile_width = rst->tile_width >> rst->subsampling_x;
538 const int tile_height = rst->tile_height >> rst->subsampling_y;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700539 int h_start, h_end, v_start, v_end;
David Barker9666e752016-12-08 11:25:47 +0000540 uint8_t *data_p, *dst_p;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800541 uint8_t *dat = (uint8_t *)rst->tmpbuf;
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800542 uint8_t *tmpbuf =
543 (uint8_t *)rst->tmpbuf + RESTORATION_TILEPELS_MAX * sizeof(*dat);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700544
David Barker9666e752016-12-08 11:25:47 +0000545 if (rst->rsi->sgrproj_info[tile_idx].level == 0) {
546 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
547 dst_stride);
548 return;
549 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700550 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
551 tile_width, tile_height, width, height, 0, 0,
552 &h_start, &h_end, &v_start, &v_end);
553 data_p = data + h_start + v_start * stride;
David Barker9666e752016-12-08 11:25:47 +0000554 dst_p = dst + h_start + v_start * dst_stride;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800555 apply_selfguided_restoration(data_p, h_end - h_start, v_end - v_start, stride,
556 8, rst->rsi->sgrproj_info[tile_idx].ep,
557 rst->rsi->sgrproj_info[tile_idx].xqd, dst_p,
558 dst_stride, tmpbuf);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700559}
560
561static void loop_sgrproj_filter(uint8_t *data, int width, int height,
562 int stride, RestorationInternal *rst,
David Barker025b2542016-12-08 11:50:42 +0000563 uint8_t *dst, int dst_stride) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700564 int tile_idx;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700565 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800566 loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst,
567 dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700568 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700569}
570
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800571static void apply_domaintxfmrf_hor(int iter, int param, uint8_t *img, int width,
572 int height, int img_stride, int32_t *dat,
573 int dat_stride) {
574 int i, j;
575 for (i = 0; i < height; ++i) {
576 uint8_t *ip = &img[i * img_stride];
577 int32_t *dp = &dat[i * dat_stride];
578 *dp *= DOMAINTXFMRF_VTABLE_PREC;
579 dp++;
580 ip++;
581 // left to right
582 for (j = 1; j < width; ++j, dp++, ip++) {
583 const int v = domaintxfmrf_vtable[iter][param][abs(ip[0] - ip[-1])];
584 dp[0] = dp[0] * (DOMAINTXFMRF_VTABLE_PREC - v) +
585 ((v * dp[-1] + DOMAINTXFMRF_VTABLE_PREC / 2) >>
586 DOMAINTXFMRF_VTABLE_PRECBITS);
587 }
588 // right to left
589 dp -= 2;
590 ip -= 2;
591 for (j = width - 2; j >= 0; --j, dp--, ip--) {
592 const int v = domaintxfmrf_vtable[iter][param][abs(ip[1] - ip[0])];
593 dp[0] = (dp[0] * (DOMAINTXFMRF_VTABLE_PREC - v) + v * dp[1] +
594 DOMAINTXFMRF_VTABLE_PREC / 2) >>
595 DOMAINTXFMRF_VTABLE_PRECBITS;
596 }
597 }
598}
599
600static void apply_domaintxfmrf_ver(int iter, int param, uint8_t *img, int width,
601 int height, int img_stride, int32_t *dat,
602 int dat_stride) {
603 int i, j;
604 for (j = 0; j < width; ++j) {
605 uint8_t *ip = &img[j];
606 int32_t *dp = &dat[j];
607 dp += dat_stride;
608 ip += img_stride;
609 // top to bottom
610 for (i = 1; i < height; ++i, dp += dat_stride, ip += img_stride) {
611 const int v =
612 domaintxfmrf_vtable[iter][param][abs(ip[0] - ip[-img_stride])];
613 dp[0] = (dp[0] * (DOMAINTXFMRF_VTABLE_PREC - v) +
614 (dp[-dat_stride] * v + DOMAINTXFMRF_VTABLE_PREC / 2)) >>
615 DOMAINTXFMRF_VTABLE_PRECBITS;
616 }
617 // bottom to top
618 dp -= 2 * dat_stride;
619 ip -= 2 * img_stride;
620 for (i = height - 2; i >= 0; --i, dp -= dat_stride, ip -= img_stride) {
621 const int v =
622 domaintxfmrf_vtable[iter][param][abs(ip[img_stride] - ip[0])];
623 dp[0] = (dp[0] * (DOMAINTXFMRF_VTABLE_PREC - v) + dp[dat_stride] * v +
624 DOMAINTXFMRF_VTABLE_PREC / 2) >>
625 DOMAINTXFMRF_VTABLE_PRECBITS;
626 }
627 }
628}
629
630static void apply_domaintxfmrf_reduce_prec(int32_t *dat, int width, int height,
631 int dat_stride) {
632 int i, j;
633 for (i = 0; i < height; ++i) {
634 for (j = 0; j < width; ++j) {
635 dat[i * dat_stride + j] = ROUND_POWER_OF_TWO_SIGNED(
636 dat[i * dat_stride + j], DOMAINTXFMRF_VTABLE_PRECBITS);
637 }
638 }
639}
640
641void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
David Barker9666e752016-12-08 11:25:47 +0000642 int stride, int param, uint8_t *dst,
Yaowu Xubf1d62d2016-12-14 19:20:46 -0800643 int dst_stride, int32_t *tmpbuf) {
644 int32_t *dat = tmpbuf;
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800645 int i, j, t;
646 for (i = 0; i < height; ++i) {
647 for (j = 0; j < width; ++j) {
648 dat[i * width + j] = dgd[i * stride + j];
649 }
650 }
651 for (t = 0; t < DOMAINTXFMRF_ITERS; ++t) {
652 apply_domaintxfmrf_hor(t, param, dgd, width, height, stride, dat, width);
653 apply_domaintxfmrf_ver(t, param, dgd, width, height, stride, dat, width);
654 apply_domaintxfmrf_reduce_prec(dat, width, height, width);
655 }
656 for (i = 0; i < height; ++i) {
657 for (j = 0; j < width; ++j) {
David Barker9666e752016-12-08 11:25:47 +0000658 dst[i * dst_stride + j] = clip_pixel(dat[i * width + j]);
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800659 }
660 }
661}
662
663static void loop_domaintxfmrf_filter_tile(uint8_t *data, int tile_idx,
664 int width, int height, int stride,
665 RestorationInternal *rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800666 uint8_t *dst, int dst_stride) {
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800667 const int tile_width = rst->tile_width >> rst->subsampling_x;
668 const int tile_height = rst->tile_height >> rst->subsampling_y;
669 int h_start, h_end, v_start, v_end;
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800670 int32_t *tmpbuf = (int32_t *)rst->tmpbuf;
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800671
David Barker9666e752016-12-08 11:25:47 +0000672 if (rst->rsi->domaintxfmrf_info[tile_idx].level == 0) {
673 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
674 dst_stride);
675 return;
676 }
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800677 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
678 tile_width, tile_height, width, height, 0, 0,
679 &h_start, &h_end, &v_start, &v_end);
David Barker9666e752016-12-08 11:25:47 +0000680 av1_domaintxfmrf_restoration(
681 data + h_start + v_start * stride, h_end - h_start, v_end - v_start,
682 stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r,
Yaowu Xubf1d62d2016-12-14 19:20:46 -0800683 dst + h_start + v_start * dst_stride, dst_stride, tmpbuf);
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800684}
685
686static void loop_domaintxfmrf_filter(uint8_t *data, int width, int height,
687 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +0000688 uint8_t *dst, int dst_stride) {
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800689 int tile_idx;
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800690 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
691 loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800692 dst, dst_stride);
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800693 }
694}
695
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700696static void loop_switchable_filter(uint8_t *data, int width, int height,
697 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +0000698 uint8_t *dst, int dst_stride) {
David Barker025b2542016-12-08 11:50:42 +0000699 int tile_idx;
David Barker025b2542016-12-08 11:50:42 +0000700 extend_frame(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700701 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
David Barker9666e752016-12-08 11:25:47 +0000702 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
703 loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
704 dst_stride);
705 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
David Barker025b2542016-12-08 11:50:42 +0000706 loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst,
707 dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700708 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) {
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800709 loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst,
710 dst_stride);
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800711 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) {
712 loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800713 dst, dst_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700714 }
715 }
716}
717
Yaowu Xuf883b422016-08-30 14:01:10 -0700718#if CONFIG_AOM_HIGHBITDEPTH
David Barker025b2542016-12-08 11:50:42 +0000719static void extend_frame_highbd(uint16_t *data, int width, int height,
720 int stride) {
721 uint16_t *data_p;
722 int i, j;
723 for (i = 0; i < height; ++i) {
724 data_p = data + i * stride;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800725 for (j = -WIENER_HALFWIN; j < 0; ++j) data_p[j] = data_p[0];
726 for (j = width; j < width + WIENER_HALFWIN; ++j)
David Barker025b2542016-12-08 11:50:42 +0000727 data_p[j] = data_p[width - 1];
728 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800729 data_p = data - WIENER_HALFWIN;
730 for (i = -WIENER_HALFWIN; i < 0; ++i) {
David Barker025b2542016-12-08 11:50:42 +0000731 memcpy(data_p + i * stride, data_p,
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800732 (width + 2 * WIENER_HALFWIN) * sizeof(uint16_t));
David Barker025b2542016-12-08 11:50:42 +0000733 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800734 for (i = height; i < height + WIENER_HALFWIN; ++i) {
David Barker025b2542016-12-08 11:50:42 +0000735 memcpy(data_p + i * stride, data_p + (height - 1) * stride,
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800736 (width + 2 * WIENER_HALFWIN) * sizeof(uint16_t));
David Barker025b2542016-12-08 11:50:42 +0000737 }
738}
739
David Barker9666e752016-12-08 11:25:47 +0000740static void loop_copy_tile_highbd(uint16_t *data, int tile_idx, int subtile_idx,
741 int subtile_bits, int width, int height,
742 int stride, RestorationInternal *rst,
743 uint16_t *dst, int dst_stride) {
744 const int tile_width = rst->tile_width >> rst->subsampling_x;
745 const int tile_height = rst->tile_height >> rst->subsampling_y;
746 int i;
747 int h_start, h_end, v_start, v_end;
748 av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, rst->nhtiles,
749 rst->nvtiles, tile_width, tile_height, width, height,
750 0, 0, &h_start, &h_end, &v_start, &v_end);
751 for (i = v_start; i < v_end; ++i)
752 memcpy(dst + i * dst_stride + h_start, data + i * stride + h_start,
753 (h_end - h_start) * sizeof(*dst));
754}
755
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700756static void loop_wiener_filter_tile_highbd(uint16_t *data, int tile_idx,
757 int width, int height, int stride,
758 RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +0000759 int bit_depth, uint16_t *dst,
760 int dst_stride) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700761 const int tile_width = rst->tile_width >> rst->subsampling_x;
762 const int tile_height = rst->tile_height >> rst->subsampling_y;
763 int h_start, h_end, v_start, v_end;
764 int i, j;
David Barker025b2542016-12-08 11:50:42 +0000765 DECLARE_ALIGNED(16, InterpKernel, hkernel);
766 DECLARE_ALIGNED(16, InterpKernel, vkernel);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700767
David Barker9666e752016-12-08 11:25:47 +0000768 if (rst->rsi->wiener_info[tile_idx].level == 0) {
769 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
770 dst_stride);
771 return;
772 }
David Barker025b2542016-12-08 11:50:42 +0000773 // TODO(david.barker): Store hfilter/vfilter as an InterpKernel
774 // instead of the current format. Then this can be removed.
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800775 assert(WIENER_WIN == SUBPEL_TAPS - 1);
776 for (i = 0; i < WIENER_WIN; ++i) {
David Barker025b2542016-12-08 11:50:42 +0000777 hkernel[i] = rst->rsi->wiener_info[tile_idx].hfilter[i];
778 vkernel[i] = rst->rsi->wiener_info[tile_idx].vfilter[i];
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700779 }
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800780 hkernel[WIENER_WIN] = 0;
781 vkernel[WIENER_WIN] = 0;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700782 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
David Barker025b2542016-12-08 11:50:42 +0000783 tile_width, tile_height, width, height, 0, 0,
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700784 &h_start, &h_end, &v_start, &v_end);
David Barker025b2542016-12-08 11:50:42 +0000785 // Convolve the whole tile (done in blocks here to match the requirements
786 // of the vectorized convolve functions, but the result is equivalent)
787 for (i = v_start; i < v_end; i += MAX_SB_SIZE)
788 for (j = h_start; j < h_end; j += MAX_SB_SIZE) {
789 int w = AOMMIN(MAX_SB_SIZE, (h_end - j + 15) & ~15);
790 int h = AOMMIN(MAX_SB_SIZE, (v_end - i + 15) & ~15);
791 const uint16_t *data_p = data + i * stride + j;
792 uint16_t *dst_p = dst + i * dst_stride + j;
793 aom_highbd_convolve8_c(CONVERT_TO_BYTEPTR(data_p), stride,
794 CONVERT_TO_BYTEPTR(dst_p), dst_stride, hkernel, 16,
795 vkernel, 16, w, h, bit_depth);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700796 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700797}
798
Yaowu Xuc27fc142016-08-22 16:08:15 -0700799static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
800 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +0000801 int bit_depth, uint8_t *dst8,
802 int dst_stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700803 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +0000804 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800805 int tile_idx;
David Barker025b2542016-12-08 11:50:42 +0000806 extend_frame_highbd(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700807 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700808 loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
David Barker025b2542016-12-08 11:50:42 +0000809 bit_depth, dst, dst_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700810 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700811}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700812
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800813static void apply_selfguided_restoration_highbd(uint16_t *dat, int width,
814 int height, int stride,
815 int bit_depth, int eps,
816 int *xqd, uint16_t *dst,
817 int dst_stride, void *tmpbuf) {
818 int xq[2];
819 int32_t *flt1 = (int32_t *)tmpbuf;
820 int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
821 uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
822 int i, j;
823 for (i = 0; i < height; ++i) {
824 for (j = 0; j < width; ++j) {
825 assert(i * width + j < RESTORATION_TILEPELS_MAX);
826 flt1[i * width + j] = dat[i * stride + j];
827 flt2[i * width + j] = dat[i * stride + j];
828 }
829 }
830 av1_selfguided_restoration(flt1, width, height, width, bit_depth,
831 sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2);
832 av1_selfguided_restoration(flt2, width, height, width, bit_depth,
833 sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2);
834 decode_xq(xqd, xq);
835 for (i = 0; i < height; ++i) {
836 for (j = 0; j < width; ++j) {
837 const int k = i * width + j;
838 const int l = i * stride + j;
839 const int m = i * dst_stride + j;
840 const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
841 const int32_t f1 = (int32_t)flt1[k] - u;
842 const int32_t f2 = (int32_t)flt2[k] - u;
843 const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
844 const int16_t w =
845 (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
846 dst[m] = (uint16_t)clip_pixel_highbd(w, bit_depth);
847 }
848 }
849}
850
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700851static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
852 int width, int height, int stride,
853 RestorationInternal *rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800854 int bit_depth, uint16_t *dst,
855 int dst_stride) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700856 const int tile_width = rst->tile_width >> rst->subsampling_x;
857 const int tile_height = rst->tile_height >> rst->subsampling_y;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700858 int h_start, h_end, v_start, v_end;
David Barker9666e752016-12-08 11:25:47 +0000859 uint16_t *data_p, *dst_p;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800860 uint16_t *dat = (uint16_t *)rst->tmpbuf;
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800861 uint8_t *tmpbuf =
862 (uint8_t *)rst->tmpbuf + RESTORATION_TILEPELS_MAX * sizeof(*dat);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700863
David Barker9666e752016-12-08 11:25:47 +0000864 if (rst->rsi->sgrproj_info[tile_idx].level == 0) {
865 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
866 dst_stride);
867 return;
868 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700869 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
870 tile_width, tile_height, width, height, 0, 0,
871 &h_start, &h_end, &v_start, &v_end);
872 data_p = data + h_start + v_start * stride;
David Barker9666e752016-12-08 11:25:47 +0000873 dst_p = dst + h_start + v_start * dst_stride;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800874 apply_selfguided_restoration_highbd(
875 data_p, h_end - h_start, v_end - v_start, stride, bit_depth,
876 rst->rsi->sgrproj_info[tile_idx].ep, rst->rsi->sgrproj_info[tile_idx].xqd,
877 dst_p, dst_stride, tmpbuf);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700878}
879
880static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
881 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +0000882 int bit_depth, uint8_t *dst8,
883 int dst_stride) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700884 int tile_idx;
885 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +0000886 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700887 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
888 loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800889 bit_depth, dst, dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700890 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700891}
892
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800893static void apply_domaintxfmrf_hor_highbd(int iter, int param, uint16_t *img,
894 int width, int height, int img_stride,
895 int32_t *dat, int dat_stride,
896 int bd) {
897 const int shift = (bd - 8);
898 int i, j;
899 for (i = 0; i < height; ++i) {
900 uint16_t *ip = &img[i * img_stride];
901 int32_t *dp = &dat[i * dat_stride];
902 *dp *= DOMAINTXFMRF_VTABLE_PREC;
903 dp++;
904 ip++;
905 // left to right
906 for (j = 1; j < width; ++j, dp++, ip++) {
907 const int v =
908 domaintxfmrf_vtable[iter][param]
909 [abs((ip[0] >> shift) - (ip[-1] >> shift))];
910 dp[0] = dp[0] * (DOMAINTXFMRF_VTABLE_PREC - v) +
911 ((v * dp[-1] + DOMAINTXFMRF_VTABLE_PREC / 2) >>
912 DOMAINTXFMRF_VTABLE_PRECBITS);
913 }
914 // right to left
915 dp -= 2;
916 ip -= 2;
917 for (j = width - 2; j >= 0; --j, dp--, ip--) {
918 const int v =
919 domaintxfmrf_vtable[iter][param]
920 [abs((ip[1] >> shift) - (ip[0] >> shift))];
921 dp[0] = (dp[0] * (DOMAINTXFMRF_VTABLE_PREC - v) + v * dp[1] +
922 DOMAINTXFMRF_VTABLE_PREC / 2) >>
923 DOMAINTXFMRF_VTABLE_PRECBITS;
924 }
925 }
926}
927
928static void apply_domaintxfmrf_ver_highbd(int iter, int param, uint16_t *img,
929 int width, int height, int img_stride,
930 int32_t *dat, int dat_stride,
931 int bd) {
932 int i, j;
933 const int shift = (bd - 8);
934 for (j = 0; j < width; ++j) {
935 uint16_t *ip = &img[j];
936 int32_t *dp = &dat[j];
937 dp += dat_stride;
938 ip += img_stride;
939 // top to bottom
940 for (i = 1; i < height; ++i, dp += dat_stride, ip += img_stride) {
941 const int v = domaintxfmrf_vtable[iter][param][abs(
942 (ip[0] >> shift) - (ip[-img_stride] >> shift))];
943 dp[0] = (dp[0] * (DOMAINTXFMRF_VTABLE_PREC - v) +
944 (dp[-dat_stride] * v + DOMAINTXFMRF_VTABLE_PREC / 2)) >>
945 DOMAINTXFMRF_VTABLE_PRECBITS;
946 }
947 // bottom to top
948 dp -= 2 * dat_stride;
949 ip -= 2 * img_stride;
950 for (i = height - 2; i >= 0; --i, dp -= dat_stride, ip -= img_stride) {
951 const int v = domaintxfmrf_vtable[iter][param][abs(
952 (ip[img_stride] >> shift) - (ip[0] >> shift))];
953 dp[0] = (dp[0] * (DOMAINTXFMRF_VTABLE_PREC - v) + dp[dat_stride] * v +
954 DOMAINTXFMRF_VTABLE_PREC / 2) >>
955 DOMAINTXFMRF_VTABLE_PRECBITS;
956 }
957 }
958}
959
960void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height,
David Barker9666e752016-12-08 11:25:47 +0000961 int stride, int param, int bit_depth,
Yaowu Xubf1d62d2016-12-14 19:20:46 -0800962 uint16_t *dst, int dst_stride,
963 int32_t *tmpbuf) {
964 int32_t *dat = tmpbuf;
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800965 int i, j, t;
966 for (i = 0; i < height; ++i) {
967 for (j = 0; j < width; ++j) {
968 dat[i * width + j] = dgd[i * stride + j];
969 }
970 }
971 for (t = 0; t < DOMAINTXFMRF_ITERS; ++t) {
972 apply_domaintxfmrf_hor_highbd(t, param, dgd, width, height, stride, dat,
973 width, bit_depth);
974 apply_domaintxfmrf_ver_highbd(t, param, dgd, width, height, stride, dat,
975 width, bit_depth);
976 apply_domaintxfmrf_reduce_prec(dat, width, height, width);
977 }
978 for (i = 0; i < height; ++i) {
979 for (j = 0; j < width; ++j) {
David Barker9666e752016-12-08 11:25:47 +0000980 dst[i * dst_stride + j] =
981 clip_pixel_highbd(dat[i * width + j], bit_depth);
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800982 }
983 }
984}
985
David Barker9666e752016-12-08 11:25:47 +0000986static void loop_domaintxfmrf_filter_tile_highbd(
987 uint16_t *data, int tile_idx, int width, int height, int stride,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800988 RestorationInternal *rst, int bit_depth, uint16_t *dst, int dst_stride) {
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800989 const int tile_width = rst->tile_width >> rst->subsampling_x;
990 const int tile_height = rst->tile_height >> rst->subsampling_y;
991 int h_start, h_end, v_start, v_end;
Debargha Mukherjee874d36d2016-12-14 16:53:17 -0800992 int32_t *tmpbuf = (int32_t *)rst->tmpbuf;
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800993
David Barker9666e752016-12-08 11:25:47 +0000994 if (rst->rsi->domaintxfmrf_info[tile_idx].level == 0) {
995 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
996 dst_stride);
997 return;
998 }
Debargha Mukherjee3981be92016-11-21 09:35:44 -0800999 av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
1000 tile_width, tile_height, width, height, 0, 0,
1001 &h_start, &h_end, &v_start, &v_end);
1002 av1_domaintxfmrf_restoration_highbd(
1003 data + h_start + v_start * stride, h_end - h_start, v_end - v_start,
David Barker9666e752016-12-08 11:25:47 +00001004 stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r, bit_depth,
Yaowu Xubf1d62d2016-12-14 19:20:46 -08001005 dst + h_start + v_start * dst_stride, dst_stride, tmpbuf);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001006}
1007
1008static void loop_domaintxfmrf_filter_highbd(uint8_t *data8, int width,
1009 int height, int stride,
1010 RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001011 int bit_depth, uint8_t *dst8,
1012 int dst_stride) {
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001013 int tile_idx;
1014 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +00001015 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001016 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
1017 loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height, stride,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001018 rst, bit_depth, dst, dst_stride);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001019 }
1020}
1021
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001022static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
1023 int stride, RestorationInternal *rst,
David Barker9666e752016-12-08 11:25:47 +00001024 int bit_depth, uint8_t *dst8,
1025 int dst_stride) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001026 uint16_t *data = CONVERT_TO_SHORTPTR(data8);
David Barker9666e752016-12-08 11:25:47 +00001027 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001028 int tile_idx;
David Barker025b2542016-12-08 11:50:42 +00001029 extend_frame_highbd(data, width, height, stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001030 for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
David Barker9666e752016-12-08 11:25:47 +00001031 if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
1032 loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst,
1033 dst, dst_stride);
1034 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001035 loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
David Barker025b2542016-12-08 11:50:42 +00001036 bit_depth, dst, dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001037 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) {
1038 loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001039 rst, bit_depth, dst, dst_stride);
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001040 } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) {
1041 loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height,
David Barker9666e752016-12-08 11:25:47 +00001042 stride, rst, bit_depth, dst,
Debargha Mukherjee874d36d2016-12-14 16:53:17 -08001043 dst_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001044 }
1045 }
1046}
Yaowu Xuf883b422016-08-30 14:01:10 -07001047#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001048
Yaowu Xuf883b422016-08-30 14:01:10 -07001049void av1_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
David Barker9666e752016-12-08 11:25:47 +00001050 int start_mi_row, int end_mi_row, int y_only,
1051 YV12_BUFFER_CONFIG *dst) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001052 const int ywidth = frame->y_crop_width;
1053 const int ystride = frame->y_stride;
1054 const int uvwidth = frame->uv_crop_width;
1055 const int uvstride = frame->uv_stride;
1056 const int ystart = start_mi_row << MI_SIZE_LOG2;
1057 const int uvstart = ystart >> cm->subsampling_y;
1058 int yend = end_mi_row << MI_SIZE_LOG2;
1059 int uvend = yend >> cm->subsampling_y;
Debargha Mukherjee0e67b252016-12-08 09:22:44 -08001060 restore_func_type restore_funcs[RESTORE_TYPES] = { NULL, loop_wiener_filter,
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001061 loop_sgrproj_filter,
Debargha Mukherjee3981be92016-11-21 09:35:44 -08001062 loop_domaintxfmrf_filter,
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001063 loop_switchable_filter };
1064#if CONFIG_AOM_HIGHBITDEPTH
1065 restore_func_highbd_type restore_funcs_highbd[RESTORE_TYPES] = {
Debargha Mukherjee0e67b252016-12-08 09:22:44 -08001066 NULL, loop_wiener_filter_highbd, loop_sgrproj_filter_highbd,
1067 loop_domaintxfmrf_filter_highbd, loop_switchable_filter_highbd
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001068 };
1069#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001070 restore_func_type restore_func =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001071 restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
Yaowu Xuf883b422016-08-30 14:01:10 -07001072#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001073 restore_func_highbd_type restore_func_highbd =
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001074 restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type];
Yaowu Xuf883b422016-08-30 14:01:10 -07001075#endif // CONFIG_AOM_HIGHBITDEPTH
David Barker9666e752016-12-08 11:25:47 +00001076 YV12_BUFFER_CONFIG dst_;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001077
Yaowu Xuf883b422016-08-30 14:01:10 -07001078 yend = AOMMIN(yend, cm->height);
1079 uvend = AOMMIN(uvend, cm->subsampling_y ? (cm->height + 1) >> 1 : cm->height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001080
David Barker9666e752016-12-08 11:25:47 +00001081 if (cm->rst_internal.rsi->frame_restoration_type == RESTORE_NONE) {
1082 if (dst) {
1083 if (y_only)
1084 aom_yv12_copy_y(frame, dst);
1085 else
1086 aom_yv12_copy_frame(frame, dst);
1087 }
1088 return;
1089 }
1090
Debargha Mukherjee818e42a2016-12-12 11:52:56 -08001091 if (y_only == 0)
1092 y_only = override_y_only[cm->rst_internal.rsi->frame_restoration_type];
David Barker9666e752016-12-08 11:25:47 +00001093 if (!dst) {
1094 dst = &dst_;
1095 memset(dst, 0, sizeof(YV12_BUFFER_CONFIG));
1096 if (aom_realloc_frame_buffer(
1097 dst, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y,
1098#if CONFIG_AOM_HIGHBITDEPTH
1099 cm->use_highbitdepth,
1100#endif
1101 AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL) < 0)
1102 aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
1103 "Failed to allocate restoration dst buffer");
1104 }
Debargha Mukherjee818e42a2016-12-12 11:52:56 -08001105
Yaowu Xuf883b422016-08-30 14:01:10 -07001106#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001107 if (cm->use_highbitdepth)
1108 restore_func_highbd(frame->y_buffer + ystart * ystride, ywidth,
1109 yend - ystart, ystride, &cm->rst_internal,
David Barker025b2542016-12-08 11:50:42 +00001110 cm->bit_depth, dst->y_buffer + ystart * dst->y_stride,
1111 dst->y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001112 else
Yaowu Xuf883b422016-08-30 14:01:10 -07001113#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001114 restore_func(frame->y_buffer + ystart * ystride, ywidth, yend - ystart,
1115 ystride, &cm->rst_internal,
David Barker9666e752016-12-08 11:25:47 +00001116 dst->y_buffer + ystart * dst->y_stride, dst->y_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001117 if (!y_only) {
1118 cm->rst_internal.subsampling_x = cm->subsampling_x;
1119 cm->rst_internal.subsampling_y = cm->subsampling_y;
Yaowu Xuf883b422016-08-30 14:01:10 -07001120#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001121 if (cm->use_highbitdepth) {
David Barker025b2542016-12-08 11:50:42 +00001122 restore_func_highbd(
1123 frame->u_buffer + uvstart * uvstride, uvwidth, uvend - uvstart,
1124 uvstride, &cm->rst_internal, cm->bit_depth,
1125 dst->u_buffer + uvstart * dst->uv_stride, dst->uv_stride);
1126 restore_func_highbd(
1127 frame->v_buffer + uvstart * uvstride, uvwidth, uvend - uvstart,
1128 uvstride, &cm->rst_internal, cm->bit_depth,
1129 dst->v_buffer + uvstart * dst->uv_stride, dst->uv_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001130 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001131#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001132 restore_func(frame->u_buffer + uvstart * uvstride, uvwidth,
1133 uvend - uvstart, uvstride, &cm->rst_internal,
David Barker025b2542016-12-08 11:50:42 +00001134 dst->u_buffer + uvstart * dst->uv_stride, dst->uv_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001135 restore_func(frame->v_buffer + uvstart * uvstride, uvwidth,
1136 uvend - uvstart, uvstride, &cm->rst_internal,
David Barker025b2542016-12-08 11:50:42 +00001137 dst->v_buffer + uvstart * dst->uv_stride, dst->uv_stride);
Yaowu Xuf883b422016-08-30 14:01:10 -07001138#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001139 }
Yaowu Xuf883b422016-08-30 14:01:10 -07001140#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001141 }
David Barker9666e752016-12-08 11:25:47 +00001142
David Barker9666e752016-12-08 11:25:47 +00001143 if (dst == &dst_) {
1144 if (y_only)
1145 aom_yv12_copy_y(dst, frame);
1146 else
1147 aom_yv12_copy_frame(dst, frame);
1148 aom_free_frame_buffer(dst);
1149 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001150}
1151
Yaowu Xuf883b422016-08-30 14:01:10 -07001152void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
1153 RestorationInfo *rsi, int y_only,
David Barker9666e752016-12-08 11:25:47 +00001154 int partial_frame, YV12_BUFFER_CONFIG *dst) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001155 int start_mi_row, end_mi_row, mi_rows_to_filter;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001156 if (rsi->frame_restoration_type != RESTORE_NONE) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001157 start_mi_row = 0;
1158 mi_rows_to_filter = cm->mi_rows;
1159 if (partial_frame && cm->mi_rows > 8) {
1160 start_mi_row = cm->mi_rows >> 1;
1161 start_mi_row &= 0xfffffff8;
Yaowu Xuf883b422016-08-30 14:01:10 -07001162 mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001163 }
1164 end_mi_row = start_mi_row + mi_rows_to_filter;
Yaowu Xuf883b422016-08-30 14:01:10 -07001165 av1_loop_restoration_init(&cm->rst_internal, rsi,
1166 cm->frame_type == KEY_FRAME, cm->width,
1167 cm->height);
David Barker9666e752016-12-08 11:25:47 +00001168 av1_loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, y_only, dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001169 }
1170}