blob: d8b2e1d306707727fe959d005f38257821bfda2b [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 *
Yaowu Xuc27fc142016-08-22 16:08:15 -070011 */
12
13#include <math.h>
14
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_config.h"
16#include "./aom_dsp_rtcd.h"
David Barker9666e752016-12-08 11:25:47 +000017#include "./aom_scale_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070018#include "av1/common/onyxc_int.h"
19#include "av1/common/restoration.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070020#include "aom_dsp/aom_dsp_common.h"
21#include "aom_mem/aom_mem.h"
Debargha Mukherjee76be32d2017-08-15 16:45:13 -070022
Yaowu Xuc27fc142016-08-22 16:08:15 -070023#include "aom_ports/mem.h"
24
Debargha Mukherjee8f209a82016-10-12 10:47:01 -070025const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -080026#if USE_HIGHPASS_IN_SGRPROJ
27 // corner, edge, r2, eps2
28 { -1, 2, 1, 1 }, { -1, 2, 1, 2 }, { -1, 2, 1, 3 }, { -1, 2, 1, 4 },
29 { -1, 2, 1, 5 }, { -2, 3, 1, 2 }, { -2, 3, 1, 3 }, { -2, 3, 1, 4 },
30 { -2, 3, 1, 5 }, { -2, 3, 1, 6 }, { -3, 4, 1, 3 }, { -3, 4, 1, 4 },
31 { -3, 4, 1, 5 }, { -3, 4, 1, 6 }, { -3, 4, 1, 7 }, { -3, 4, 1, 8 }
32#else
Debargha Mukherjee76be32d2017-08-15 16:45:13 -070033// r1, eps1, r2, eps2
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -070034#if MAX_RADIUS == 2
Debargha Mukherjee76be32d2017-08-15 16:45:13 -070035 { 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 },
36 { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
Debargha Mukherjeee5fabfb2017-10-10 09:10:24 -070037 { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 2, 30, 1, 6 },
Debargha Mukherjee76be32d2017-08-15 16:45:13 -070038 { 2, 50, 1, 12 }, { 2, 60, 1, 13 }, { 2, 70, 1, 14 }, { 2, 80, 1, 15 },
39#else
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080040 { 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 },
41 { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
42 { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 3, 30, 1, 10 },
43 { 3, 50, 1, 12 }, { 3, 50, 2, 25 }, { 3, 60, 2, 35 }, { 3, 70, 2, 45 },
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -070044#endif // MAX_RADIUS == 2
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -080045#endif
Debargha Mukherjee8f209a82016-10-12 10:47:01 -070046};
47
David Barkerbefcc422017-01-31 09:42:10 +000048int av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rst_info,
49 int width, int height) {
Debargha Mukherjee1008c1e2017-03-06 19:18:43 -080050 const int ntiles = av1_get_rest_ntiles(
Rupert Swarbrick64b8bbd2017-10-16 15:53:07 +010051 width, height, rst_info->restoration_tilesize, NULL, NULL);
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +010052 aom_free(rst_info->unit_info);
David Barkerbefcc422017-01-31 09:42:10 +000053 CHECK_MEM_ERROR(
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +010054 cm, rst_info->unit_info,
55 (RestorationUnitInfo *)aom_malloc(sizeof(*rst_info->unit_info) * ntiles));
Debargha Mukherjee874d36d2016-12-14 16:53:17 -080056 return ntiles;
57}
58
59void av1_free_restoration_struct(RestorationInfo *rst_info) {
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +010060 aom_free(rst_info->unit_info);
61 rst_info->unit_info = NULL;
Debargha Mukherjee874d36d2016-12-14 16:53:17 -080062}
63
Debargha Mukherjee4be12622017-02-15 21:38:02 -080064// TODO(debargha): This table can be substantially reduced since only a few
65// values are actually used.
David Barkerce110cc2017-02-22 10:38:59 +000066int sgrproj_mtable[MAX_EPS][MAX_NELEM];
Debargha Mukherjee4be12622017-02-15 21:38:02 -080067
68static void GenSgrprojVtable() {
69 int e, n;
70 for (e = 1; e <= MAX_EPS; ++e)
71 for (n = 1; n <= MAX_NELEM; ++n) {
72 const int n2e = n * n * e;
73 sgrproj_mtable[e - 1][n - 1] =
74 (((1 << SGRPROJ_MTABLE_BITS) + n2e / 2) / n2e);
75 }
76}
Debargha Mukherjee4be12622017-02-15 21:38:02 -080077
Debargha Mukherjee4bfd72e2017-03-08 22:20:31 -080078void av1_loop_restoration_precal() { GenSgrprojVtable(); }
Yaowu Xuc27fc142016-08-22 16:08:15 -070079
Rupert Swarbrickd3d06152017-10-19 10:31:57 +010080static void extend_frame_lowbd(uint8_t *data, int width, int height, int stride,
81 int border_horz, int border_vert) {
David Barker025b2542016-12-08 11:50:42 +000082 uint8_t *data_p;
83 int i;
84 for (i = 0; i < height; ++i) {
85 data_p = data + i * stride;
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -070086 memset(data_p - border_horz, data_p[0], border_horz);
87 memset(data_p + width, data_p[width - 1], border_horz);
David Barker025b2542016-12-08 11:50:42 +000088 }
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -070089 data_p = data - border_horz;
90 for (i = -border_vert; i < 0; ++i) {
91 memcpy(data_p + i * stride, data_p, width + 2 * border_horz);
David Barker025b2542016-12-08 11:50:42 +000092 }
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -070093 for (i = height; i < height + border_vert; ++i) {
David Barker025b2542016-12-08 11:50:42 +000094 memcpy(data_p + i * stride, data_p + (height - 1) * stride,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -070095 width + 2 * border_horz);
David Barker025b2542016-12-08 11:50:42 +000096 }
97}
98
Rupert Swarbrickd3d06152017-10-19 10:31:57 +010099#if CONFIG_HIGHBITDEPTH
100static void extend_frame_highbd(uint16_t *data, int width, int height,
101 int stride, int border_horz, int border_vert) {
102 uint16_t *data_p;
103 int i, j;
104 for (i = 0; i < height; ++i) {
105 data_p = data + i * stride;
106 for (j = -border_horz; j < 0; ++j) data_p[j] = data_p[0];
107 for (j = width; j < width + border_horz; ++j) data_p[j] = data_p[width - 1];
108 }
109 data_p = data - border_horz;
110 for (i = -border_vert; i < 0; ++i) {
111 memcpy(data_p + i * stride, data_p,
112 (width + 2 * border_horz) * sizeof(uint16_t));
113 }
114 for (i = height; i < height + border_vert; ++i) {
115 memcpy(data_p + i * stride, data_p + (height - 1) * stride,
116 (width + 2 * border_horz) * sizeof(uint16_t));
117 }
118}
119#endif
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200120
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100121void extend_frame(uint8_t *data, int width, int height, int stride,
122 int border_horz, int border_vert, int highbd) {
123#if !CONFIG_HIGHBITDEPTH
124 assert(highbd == 0);
125 (void)highbd;
126#else
127 if (highbd)
128 extend_frame_highbd(CONVERT_TO_SHORTPTR(data), width, height, stride,
129 border_horz, border_vert);
130 else
131#endif
132 extend_frame_lowbd(data, width, height, stride, border_horz, border_vert);
133}
134
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100135static void copy_tile_lowbd(int width, int height, const uint8_t *src,
136 int src_stride, uint8_t *dst, int dst_stride) {
137 for (int i = 0; i < height; ++i)
138 memcpy(dst + i * dst_stride, src + i * src_stride, width);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100139}
140
141#if CONFIG_HIGHBITDEPTH
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100142static void copy_tile_highbd(int width, int height, const uint16_t *src,
143 int src_stride, uint16_t *dst, int dst_stride) {
144 for (int i = 0; i < height; ++i)
145 memcpy(dst + i * dst_stride, src + i * src_stride, width * sizeof(*dst));
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100146}
147#endif
148
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100149static void copy_tile(int width, int height, const uint8_t *src, int src_stride,
150 uint8_t *dst, int dst_stride, int highbd) {
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100151#if !CONFIG_HIGHBITDEPTH
152 assert(highbd == 0);
153 (void)highbd;
154#else
155 if (highbd)
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100156 copy_tile_highbd(width, height, CONVERT_TO_SHORTPTR(src), src_stride,
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100157 CONVERT_TO_SHORTPTR(dst), dst_stride);
158 else
159#endif
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100160 copy_tile_lowbd(width, height, src, src_stride, dst, dst_stride);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100161}
162
163#if CONFIG_STRIPED_LOOP_RESTORATION
164#define REAL_PTR(hbd, d) ((hbd) ? (uint8_t *)CONVERT_TO_SHORTPTR(d) : (d))
165
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100166// With striped loop restoration, the filtering for each 64-pixel stripe gets
167// most of its input from the output of CDEF (stored in data8), but pixels just
168// above and below the stripe come straight from the deblocker. These have been
169// stored away in separate buffers.
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100170//
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100171// This function modifies data8 (which was the output from CDEF) by copying in
172// the boundary pixels. Before doing so, it saves the pixels that get
173// overwritten into a temporary buffer. They will be restored again by
174// restore_processing_stripe_boundary.
175//
176// limits gives the rectangular limits of the remaining stripes for the current
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100177// restoration unit. rsb is the stored stripe boundaries (the saved output from
178// the deblocker). stripe_height is the height of each stripe. ss_y is true if
179// we're on a chroma plane with vertical subsampling. use_highbd is true if the
180// data has 2 bytes per pixel. rlbs contain scratch buffers to hold the CDEF
181// data (written back to the frame by restore_processing_stripe_boundary)
182static int setup_processing_stripe_boundary(
183 const RestorationTileLimits *limits, const RestorationStripeBoundaries *rsb,
184 int stripe_height, int ss_y, int use_highbd, uint8_t *data8, int stride,
185 RestorationLineBuffers *rlbs) {
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100186 // Which stripe is this? limits->v_start is the top of the stripe in pixel
187 // units, but we add tile_offset to get the number of pixels from the top of
188 // the first stripe, which lies off the image.
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100189 const int tile_offset = RESTORATION_TILE_OFFSET >> ss_y;
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100190 const int stripe_index = (limits->v_start + tile_offset) / stripe_height;
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200191
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100192 // Horizontal offsets within the line buffers. The buffer logically starts at
193 // column -RESTORATION_EXTRA_HORZ. We'll start our copy from the column
194 // limits->h_start - RESTORATION_EXTRA_HORZ and copy up to the column
195 // limits->h_end + RESTORATION_EXTRA_HORZ.
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100196 const int buf_stride = rsb->stripe_boundary_stride;
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100197 const int buf_x0_off = limits->h_start;
198 const int line_width =
199 (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ;
200 const int line_size = line_width << use_highbd;
201 const int data_x0_off = limits->h_start - RESTORATION_EXTRA_HORZ;
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200202
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100203 assert(CONFIG_HIGHBITDEPTH || !use_highbd);
204
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100205 // Replace the pixels above the top of the stripe, unless this is the top of
206 // the image.
207 if (stripe_index > 0) {
208 const int above_buf_y = 2 * (stripe_index - 1);
209 uint8_t *data8_tl = data8 + (limits->v_start - 2) * stride + data_x0_off;
210
211 for (int i = 0; i < 2; ++i) {
212 const int buf_off = buf_x0_off + (above_buf_y + i) * buf_stride;
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100213 const uint8_t *src = rsb->stripe_boundary_above + (buf_off << use_highbd);
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100214 uint8_t *dst8 = data8_tl + i * stride;
215 // Save old pixels, then replace with data from boundary_above_buf
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100216 memcpy(rlbs->tmp_save_above[i], REAL_PTR(use_highbd, dst8), line_size);
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100217 memcpy(REAL_PTR(use_highbd, dst8), src, line_size);
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200218 }
219 }
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100220
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100221 // Replace the pixels below the bottom of the stripe if necessary. This might
222 // not be needed if the stripe is less than stripe_height high (which might
223 // happen on the bottom of a loop restoration unit), in which case
224 // rows_needed_below might be negative.
225 const int stripe_bottom = stripe_height * (1 + stripe_index) - tile_offset;
226 const int rows_needed_below = AOMMIN(limits->v_end + 2 - stripe_bottom, 2);
227
228 const int below_buf_y = 2 * stripe_index;
229 uint8_t *data8_bl = data8 + stripe_bottom * stride + data_x0_off;
230
231 for (int i = 0; i < rows_needed_below; ++i) {
232 const int buf_off = buf_x0_off + (below_buf_y + i) * buf_stride;
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100233 const uint8_t *src = rsb->stripe_boundary_below + (buf_off << use_highbd);
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100234 uint8_t *dst8 = data8_bl + i * stride;
235 // Save old pixels, then replace with data from boundary_below_buf
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100236 memcpy(rlbs->tmp_save_below[i], REAL_PTR(use_highbd, dst8), line_size);
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100237 memcpy(REAL_PTR(use_highbd, dst8), src, line_size);
238 }
239
240 // Finally, return the actual height of this stripe.
241 return AOMMIN(limits->v_end, stripe_bottom) - limits->v_start;
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200242}
243
244// This function restores the boundary lines modified by
245// setup_processing_stripe_boundary.
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100246static void restore_processing_stripe_boundary(
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100247 const RestorationTileLimits *limits, const RestorationLineBuffers *rlbs,
248 int stripe_height, int ss_y, int use_highbd, uint8_t *data8, int stride) {
249 const int tile_offset = RESTORATION_TILE_OFFSET >> ss_y;
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100250 const int stripe_index = (limits->v_start + tile_offset) / stripe_height;
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200251
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100252 const int line_width =
253 (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ;
254 const int line_size = line_width << use_highbd;
255 const int data_x0_off = limits->h_start - RESTORATION_EXTRA_HORZ;
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200256
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100257 assert(CONFIG_HIGHBITDEPTH || !use_highbd);
258
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100259 if (stripe_index > 0) {
260 uint8_t *data8_tl = data8 + (limits->v_start - 2) * stride + data_x0_off;
261 for (int i = 0; i < 2; ++i) {
262 uint8_t *dst8 = data8_tl + i * stride;
263 // Save old pixels, then replace with data from boundary_above_buf
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100264 memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_above[i], line_size);
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200265 }
266 }
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +0100267
268 const int stripe_bottom = stripe_height * (1 + stripe_index) - tile_offset;
269 const int rows_needed_below = AOMMIN(limits->v_end + 2 - stripe_bottom, 2);
270
271 uint8_t *data8_bl = data8 + stripe_bottom * stride + data_x0_off;
272
273 for (int i = 0; i < rows_needed_below; ++i) {
274 uint8_t *dst8 = data8_bl + i * stride;
275 // Save old pixels, then replace with data from boundary_below_buf
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100276 memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[i], line_size);
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200277 }
278}
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100279#undef REAL_PTR
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200280#endif
281
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700282static void stepdown_wiener_kernel(const InterpKernel orig, InterpKernel vert,
283 int boundary_dist, int istop) {
Debargha Mukherjee22bbe4c2017-08-31 12:30:10 -0700284 memcpy(vert, orig, sizeof(InterpKernel));
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700285 switch (boundary_dist) {
286 case 0:
287 vert[WIENER_HALFWIN] += vert[2] + vert[1] + vert[0];
288 vert[2] = vert[1] = vert[0] = 0;
289 break;
290 case 1:
291 vert[2] += vert[1] + vert[0];
292 vert[1] = vert[0] = 0;
293 break;
294 case 2:
295 vert[1] += vert[0];
296 vert[0] = 0;
297 break;
298 default: break;
299 }
300 if (!istop) {
301 int tmp;
302 tmp = vert[0];
303 vert[0] = vert[WIENER_WIN - 1];
304 vert[WIENER_WIN - 1] = tmp;
305 tmp = vert[1];
306 vert[1] = vert[WIENER_WIN - 2];
307 vert[WIENER_WIN - 2] = tmp;
308 tmp = vert[2];
309 vert[2] = vert[WIENER_WIN - 3];
310 vert[WIENER_WIN - 3] = tmp;
311 }
Debargha Mukherjee22bbe4c2017-08-31 12:30:10 -0700312}
313
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100314#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
315#define wiener_convolve8_add_src aom_convolve8_add_src_hip
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200316#else
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100317#define wiener_convolve8_add_src aom_convolve8_add_src
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200318#endif
Ola Hugosson1e7f2d02017-09-22 21:36:26 +0200319
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100320static void wiener_filter_stripe(const RestorationUnitInfo *rui,
321 int stripe_width, int stripe_height,
322 int procunit_width, const uint8_t *src,
323 int src_stride, uint8_t *dst, int dst_stride,
324 int32_t *tmpbuf, int bit_depth) {
325 (void)tmpbuf;
326 (void)bit_depth;
327 assert(bit_depth == 8);
328
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100329 const int mid_height =
330 stripe_height - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2;
331 assert(mid_height > 0);
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100332 for (int j = 0; j < stripe_width; j += procunit_width) {
333 int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100334 const uint8_t *src_p = src + j;
335 uint8_t *dst_p = dst + j;
336 for (int b = 0; b < WIENER_HALFWIN - WIENER_BORDER_VERT; ++b) {
337 InterpKernel vertical_top;
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100338 stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_top,
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100339 WIENER_BORDER_VERT + b, 1);
340 wiener_convolve8_add_src(src_p, src_stride, dst_p, dst_stride,
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100341 rui->wiener_info.hfilter, 16, vertical_top, 16,
342 w, 1);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100343 src_p += src_stride;
344 dst_p += dst_stride;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700345 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700346
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100347 wiener_convolve8_add_src(src_p, src_stride, dst_p, dst_stride,
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100348 rui->wiener_info.hfilter, 16,
349 rui->wiener_info.vfilter, 16, w, mid_height);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100350 src_p += src_stride * mid_height;
351 dst_p += dst_stride * mid_height;
352
353 for (int b = WIENER_HALFWIN - WIENER_BORDER_VERT - 1; b >= 0; --b) {
354 InterpKernel vertical_bot;
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100355 stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_bot,
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100356 WIENER_BORDER_VERT + b, 0);
357 wiener_convolve8_add_src(src_p, src_stride, dst_p, dst_stride,
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100358 rui->wiener_info.hfilter, 16, vertical_bot, 16,
359 w, 1);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100360 src_p += src_stride;
361 dst_p += dst_stride;
362 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700363 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700364}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700365
David Barker6928a5d2017-01-05 11:29:22 +0000366/* Calculate windowed sums (if sqr=0) or sums of squares (if sqr=1)
367 over the input. The window is of size (2r + 1)x(2r + 1), and we
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800368 specialize to r = 1, 2, 3. A default function is used for r > 3.
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700369
David Barker6928a5d2017-01-05 11:29:22 +0000370 Each loop follows the same format: We keep a window's worth of input
371 in individual variables and select data out of that as appropriate.
372*/
373static void boxsum1(int32_t *src, int width, int height, int src_stride,
374 int sqr, int32_t *dst, int dst_stride) {
375 int i, j, a, b, c;
376
377 // Vertical sum over 3-pixel regions, from src into dst.
378 if (!sqr) {
379 for (j = 0; j < width; ++j) {
380 a = src[j];
381 b = src[src_stride + j];
382 c = src[2 * src_stride + j];
383
384 dst[j] = a + b;
385 for (i = 1; i < height - 2; ++i) {
386 // Loop invariant: At the start of each iteration,
387 // a = src[(i - 1) * src_stride + j]
388 // b = src[(i ) * src_stride + j]
389 // c = src[(i + 1) * src_stride + j]
390 dst[i * dst_stride + j] = a + b + c;
391 a = b;
392 b = c;
393 c = src[(i + 2) * src_stride + j];
394 }
395 dst[i * dst_stride + j] = a + b + c;
396 dst[(i + 1) * dst_stride + j] = b + c;
397 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700398 } else {
David Barker6928a5d2017-01-05 11:29:22 +0000399 for (j = 0; j < width; ++j) {
400 a = src[j] * src[j];
401 b = src[src_stride + j] * src[src_stride + j];
402 c = src[2 * src_stride + j] * src[2 * src_stride + j];
403
404 dst[j] = a + b;
405 for (i = 1; i < height - 2; ++i) {
406 dst[i * dst_stride + j] = a + b + c;
407 a = b;
408 b = c;
409 c = src[(i + 2) * src_stride + j] * src[(i + 2) * src_stride + j];
410 }
411 dst[i * dst_stride + j] = a + b + c;
412 dst[(i + 1) * dst_stride + j] = b + c;
413 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700414 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700415
David Barker6928a5d2017-01-05 11:29:22 +0000416 // Horizontal sum over 3-pixel regions of dst
417 for (i = 0; i < height; ++i) {
418 a = dst[i * dst_stride];
419 b = dst[i * dst_stride + 1];
420 c = dst[i * dst_stride + 2];
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700421
David Barker6928a5d2017-01-05 11:29:22 +0000422 dst[i * dst_stride] = a + b;
423 for (j = 1; j < width - 2; ++j) {
424 // Loop invariant: At the start of each iteration,
425 // a = src[i * src_stride + (j - 1)]
426 // b = src[i * src_stride + (j )]
427 // c = src[i * src_stride + (j + 1)]
428 dst[i * dst_stride + j] = a + b + c;
429 a = b;
430 b = c;
431 c = dst[i * dst_stride + (j + 2)];
432 }
433 dst[i * dst_stride + j] = a + b + c;
434 dst[i * dst_stride + (j + 1)] = b + c;
435 }
436}
437
438static void boxsum2(int32_t *src, int width, int height, int src_stride,
439 int sqr, int32_t *dst, int dst_stride) {
440 int i, j, a, b, c, d, e;
441
442 // Vertical sum over 5-pixel regions, from src into dst.
443 if (!sqr) {
444 for (j = 0; j < width; ++j) {
445 a = src[j];
446 b = src[src_stride + j];
447 c = src[2 * src_stride + j];
448 d = src[3 * src_stride + j];
449 e = src[4 * src_stride + j];
450
451 dst[j] = a + b + c;
452 dst[dst_stride + j] = a + b + c + d;
453 for (i = 2; i < height - 3; ++i) {
454 // Loop invariant: At the start of each iteration,
455 // a = src[(i - 2) * src_stride + j]
456 // b = src[(i - 1) * src_stride + j]
457 // c = src[(i ) * src_stride + j]
458 // d = src[(i + 1) * src_stride + j]
459 // e = src[(i + 2) * src_stride + j]
460 dst[i * dst_stride + j] = a + b + c + d + e;
461 a = b;
462 b = c;
463 c = d;
464 d = e;
465 e = src[(i + 3) * src_stride + j];
466 }
467 dst[i * dst_stride + j] = a + b + c + d + e;
468 dst[(i + 1) * dst_stride + j] = b + c + d + e;
469 dst[(i + 2) * dst_stride + j] = c + d + e;
470 }
471 } else {
472 for (j = 0; j < width; ++j) {
473 a = src[j] * src[j];
474 b = src[src_stride + j] * src[src_stride + j];
475 c = src[2 * src_stride + j] * src[2 * src_stride + j];
476 d = src[3 * src_stride + j] * src[3 * src_stride + j];
477 e = src[4 * src_stride + j] * src[4 * src_stride + j];
478
479 dst[j] = a + b + c;
480 dst[dst_stride + j] = a + b + c + d;
481 for (i = 2; i < height - 3; ++i) {
482 dst[i * dst_stride + j] = a + b + c + d + e;
483 a = b;
484 b = c;
485 c = d;
486 d = e;
487 e = src[(i + 3) * src_stride + j] * src[(i + 3) * src_stride + j];
488 }
489 dst[i * dst_stride + j] = a + b + c + d + e;
490 dst[(i + 1) * dst_stride + j] = b + c + d + e;
491 dst[(i + 2) * dst_stride + j] = c + d + e;
492 }
493 }
494
495 // Horizontal sum over 5-pixel regions of dst
496 for (i = 0; i < height; ++i) {
497 a = dst[i * dst_stride];
498 b = dst[i * dst_stride + 1];
499 c = dst[i * dst_stride + 2];
500 d = dst[i * dst_stride + 3];
501 e = dst[i * dst_stride + 4];
502
503 dst[i * dst_stride] = a + b + c;
504 dst[i * dst_stride + 1] = a + b + c + d;
505 for (j = 2; j < width - 3; ++j) {
506 // Loop invariant: At the start of each iteration,
507 // a = src[i * src_stride + (j - 2)]
508 // b = src[i * src_stride + (j - 1)]
509 // c = src[i * src_stride + (j )]
510 // d = src[i * src_stride + (j + 1)]
511 // e = src[i * src_stride + (j + 2)]
512 dst[i * dst_stride + j] = a + b + c + d + e;
513 a = b;
514 b = c;
515 c = d;
516 d = e;
517 e = dst[i * dst_stride + (j + 3)];
518 }
519 dst[i * dst_stride + j] = a + b + c + d + e;
520 dst[i * dst_stride + (j + 1)] = b + c + d + e;
521 dst[i * dst_stride + (j + 2)] = c + d + e;
522 }
523}
524
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800525static void boxsum3(int32_t *src, int width, int height, int src_stride,
526 int sqr, int32_t *dst, int dst_stride) {
527 int i, j, a, b, c, d, e, f, g;
528
529 // Vertical sum over 7-pixel regions, from src into dst.
530 if (!sqr) {
531 for (j = 0; j < width; ++j) {
532 a = src[j];
533 b = src[1 * src_stride + j];
534 c = src[2 * src_stride + j];
535 d = src[3 * src_stride + j];
536 e = src[4 * src_stride + j];
537 f = src[5 * src_stride + j];
538 g = src[6 * src_stride + j];
539
540 dst[j] = a + b + c + d;
541 dst[dst_stride + j] = a + b + c + d + e;
542 dst[2 * dst_stride + j] = a + b + c + d + e + f;
543 for (i = 3; i < height - 4; ++i) {
544 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
545 a = b;
546 b = c;
547 c = d;
548 d = e;
549 e = f;
550 f = g;
551 g = src[(i + 4) * src_stride + j];
552 }
553 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
554 dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g;
555 dst[(i + 2) * dst_stride + j] = c + d + e + f + g;
556 dst[(i + 3) * dst_stride + j] = d + e + f + g;
557 }
558 } else {
559 for (j = 0; j < width; ++j) {
560 a = src[j] * src[j];
561 b = src[1 * src_stride + j] * src[1 * src_stride + j];
562 c = src[2 * src_stride + j] * src[2 * src_stride + j];
563 d = src[3 * src_stride + j] * src[3 * src_stride + j];
564 e = src[4 * src_stride + j] * src[4 * src_stride + j];
565 f = src[5 * src_stride + j] * src[5 * src_stride + j];
566 g = src[6 * src_stride + j] * src[6 * src_stride + j];
567
568 dst[j] = a + b + c + d;
569 dst[dst_stride + j] = a + b + c + d + e;
570 dst[2 * dst_stride + j] = a + b + c + d + e + f;
571 for (i = 3; i < height - 4; ++i) {
572 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
573 a = b;
574 b = c;
575 c = d;
576 d = e;
577 e = f;
578 f = g;
579 g = src[(i + 4) * src_stride + j] * src[(i + 4) * src_stride + j];
580 }
581 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
582 dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g;
583 dst[(i + 2) * dst_stride + j] = c + d + e + f + g;
584 dst[(i + 3) * dst_stride + j] = d + e + f + g;
585 }
586 }
587
588 // Horizontal sum over 7-pixel regions of dst
589 for (i = 0; i < height; ++i) {
590 a = dst[i * dst_stride];
591 b = dst[i * dst_stride + 1];
592 c = dst[i * dst_stride + 2];
593 d = dst[i * dst_stride + 3];
594 e = dst[i * dst_stride + 4];
595 f = dst[i * dst_stride + 5];
596 g = dst[i * dst_stride + 6];
597
598 dst[i * dst_stride] = a + b + c + d;
599 dst[i * dst_stride + 1] = a + b + c + d + e;
600 dst[i * dst_stride + 2] = a + b + c + d + e + f;
601 for (j = 3; j < width - 4; ++j) {
602 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
603 a = b;
604 b = c;
605 c = d;
606 d = e;
607 e = f;
608 f = g;
609 g = dst[i * dst_stride + (j + 4)];
610 }
611 dst[i * dst_stride + j] = a + b + c + d + e + f + g;
612 dst[i * dst_stride + (j + 1)] = b + c + d + e + f + g;
613 dst[i * dst_stride + (j + 2)] = c + d + e + f + g;
614 dst[i * dst_stride + (j + 3)] = d + e + f + g;
615 }
616}
617
618// Generic version for any r. To be removed after experiments are done.
619static void boxsumr(int32_t *src, int width, int height, int src_stride, int r,
620 int sqr, int32_t *dst, int dst_stride) {
621 int32_t *tmp = aom_malloc(width * height * sizeof(*tmp));
622 int tmp_stride = width;
623 int i, j;
624 if (sqr) {
625 for (j = 0; j < width; ++j) tmp[j] = src[j] * src[j];
626 for (j = 0; j < width; ++j)
627 for (i = 1; i < height; ++i)
628 tmp[i * tmp_stride + j] =
629 tmp[(i - 1) * tmp_stride + j] +
630 src[i * src_stride + j] * src[i * src_stride + j];
631 } else {
632 memcpy(tmp, src, sizeof(*tmp) * width);
633 for (j = 0; j < width; ++j)
634 for (i = 1; i < height; ++i)
635 tmp[i * tmp_stride + j] =
636 tmp[(i - 1) * tmp_stride + j] + src[i * src_stride + j];
637 }
638 for (i = 0; i <= r; ++i)
639 memcpy(&dst[i * dst_stride], &tmp[(i + r) * tmp_stride],
640 sizeof(*tmp) * width);
641 for (i = r + 1; i < height - r; ++i)
642 for (j = 0; j < width; ++j)
643 dst[i * dst_stride + j] =
644 tmp[(i + r) * tmp_stride + j] - tmp[(i - r - 1) * tmp_stride + j];
645 for (i = height - r; i < height; ++i)
646 for (j = 0; j < width; ++j)
647 dst[i * dst_stride + j] = tmp[(height - 1) * tmp_stride + j] -
648 tmp[(i - r - 1) * tmp_stride + j];
649
650 for (i = 0; i < height; ++i) tmp[i * tmp_stride] = dst[i * dst_stride];
651 for (i = 0; i < height; ++i)
652 for (j = 1; j < width; ++j)
653 tmp[i * tmp_stride + j] =
654 tmp[i * tmp_stride + j - 1] + dst[i * src_stride + j];
655
656 for (j = 0; j <= r; ++j)
657 for (i = 0; i < height; ++i)
658 dst[i * dst_stride + j] = tmp[i * tmp_stride + j + r];
659 for (j = r + 1; j < width - r; ++j)
660 for (i = 0; i < height; ++i)
661 dst[i * dst_stride + j] =
662 tmp[i * tmp_stride + j + r] - tmp[i * tmp_stride + j - r - 1];
663 for (j = width - r; j < width; ++j)
664 for (i = 0; i < height; ++i)
665 dst[i * dst_stride + j] =
666 tmp[i * tmp_stride + width - 1] - tmp[i * tmp_stride + j - r - 1];
667 aom_free(tmp);
668}
669
David Barker6928a5d2017-01-05 11:29:22 +0000670static void boxsum(int32_t *src, int width, int height, int src_stride, int r,
671 int sqr, int32_t *dst, int dst_stride) {
672 if (r == 1)
673 boxsum1(src, width, height, src_stride, sqr, dst, dst_stride);
674 else if (r == 2)
675 boxsum2(src, width, height, src_stride, sqr, dst, dst_stride);
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800676 else if (r == 3)
677 boxsum3(src, width, height, src_stride, sqr, dst, dst_stride);
678 else
679 boxsumr(src, width, height, src_stride, r, sqr, dst, dst_stride);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700680}
681
682static void boxnum(int width, int height, int r, int8_t *num, int num_stride) {
683 int i, j;
David Barker6928a5d2017-01-05 11:29:22 +0000684 for (i = 0; i <= r; ++i) {
685 for (j = 0; j <= r; ++j) {
686 num[i * num_stride + j] = (r + 1 + i) * (r + 1 + j);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700687 num[i * num_stride + (width - 1 - j)] = num[i * num_stride + j];
688 num[(height - 1 - i) * num_stride + j] = num[i * num_stride + j];
689 num[(height - 1 - i) * num_stride + (width - 1 - j)] =
690 num[i * num_stride + j];
691 }
692 }
David Barker6928a5d2017-01-05 11:29:22 +0000693 for (j = 0; j <= r; ++j) {
694 const int val = (2 * r + 1) * (r + 1 + j);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700695 for (i = r + 1; i < height - r; ++i) {
696 num[i * num_stride + j] = val;
697 num[i * num_stride + (width - 1 - j)] = val;
698 }
699 }
David Barker6928a5d2017-01-05 11:29:22 +0000700 for (i = 0; i <= r; ++i) {
701 const int val = (2 * r + 1) * (r + 1 + i);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700702 for (j = r + 1; j < width - r; ++j) {
703 num[i * num_stride + j] = val;
704 num[(height - 1 - i) * num_stride + j] = val;
705 }
706 }
707 for (i = r + 1; i < height - r; ++i) {
708 for (j = r + 1; j < width - r; ++j) {
David Barker6928a5d2017-01-05 11:29:22 +0000709 num[i * num_stride + j] = (2 * r + 1) * (2 * r + 1);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700710 }
711 }
712}
713
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100714void decode_xq(const int *xqd, int *xq) {
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800715 xq[0] = xqd[0];
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700716 xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
717}
718
David Barkerce110cc2017-02-22 10:38:59 +0000719const int32_t x_by_xplus1[256] = {
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800720 0, 128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239,
721 240, 241, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 247, 247,
722 248, 248, 248, 248, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250,
723 250, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 252, 252, 252, 252,
724 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253,
725 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253,
726 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 254, 254, 254,
727 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
728 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
729 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
730 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
731 254, 254, 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
732 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
733 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
734 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
735 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
736 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
737 256,
738};
739
David Barkerce110cc2017-02-22 10:38:59 +0000740const int32_t one_by_x[MAX_NELEM] = {
David Barker9198d132017-02-17 14:27:05 +0000741 4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -0700742 293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164,
743#if MAX_RADIUS > 2
744 158, 152, 146, 141, 137, 132, 128, 124, 120, 117, 114, 111, 108,
745 105, 102, 100, 98, 95, 93, 91, 89, 87, 85, 84
746#endif // MAX_RADIUS > 2
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800747};
Debargha Mukherjee4be12622017-02-15 21:38:02 -0800748
David Barker506eb722017-03-08 13:35:49 +0000749static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700750 int height, int dgd_stride,
751 int32_t *dst, int dst_stride,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -0700752 int bit_depth, int r, int eps) {
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700753 const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
754 const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
755 const int num_stride = width_ext;
David Barkerce110cc2017-02-22 10:38:59 +0000756 // Adjusting the stride of A and B here appears to avoid bad cache effects,
757 // leading to a significant speed improvement.
758 // We also align the stride to a multiple of 16 bytes, for consistency
759 // with the SIMD version of this function.
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700760 int buf_stride = ((width_ext + 3) & ~3) + 16;
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -0700761 int32_t A_[RESTORATION_PROC_UNIT_PELS];
762 int32_t B_[RESTORATION_PROC_UNIT_PELS];
763 int32_t *A = A_;
764 int32_t *B = B_;
765 int8_t num_[RESTORATION_PROC_UNIT_PELS];
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700766 int8_t *num = num_ + SGRPROJ_BORDER_VERT * num_stride + SGRPROJ_BORDER_HORZ;
767 int i, j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700768
David Barker6928a5d2017-01-05 11:29:22 +0000769 // Don't filter tiles with dimensions < 5 on any axis
770 if ((width < 5) || (height < 5)) return;
771
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700772 boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
773 width_ext, height_ext, dgd_stride, r, 0, B, buf_stride);
774 boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
775 width_ext, height_ext, dgd_stride, r, 1, A, buf_stride);
776 boxnum(width_ext, height_ext, r, num_, num_stride);
Debargha Mukherjee8a709192017-01-10 11:29:31 -0800777 assert(r <= 3);
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700778 A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
779 B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700780 for (i = 0; i < height; ++i) {
781 for (j = 0; j < width; ++j) {
David Barkerce110cc2017-02-22 10:38:59 +0000782 const int k = i * buf_stride + j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700783 const int n = num[i * num_stride + j];
Debargha Mukherjee4bfd72e2017-03-08 22:20:31 -0800784
David Barker9198d132017-02-17 14:27:05 +0000785 // a < 2^16 * n < 2^22 regardless of bit depth
786 uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8));
787 // b < 2^8 * n < 2^14 regardless of bit depth
788 uint32_t b = ROUND_POWER_OF_TWO(B[k], bit_depth - 8);
789
790 // Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28,
791 // and p itself satisfies p < 2^14 * n^2 < 2^26.
792 // Note: Sometimes, in high bit depth, we can end up with a*n < b*b.
793 // This is an artefact of rounding, and can only happen if all pixels
794 // are (almost) identical, so in this case we saturate to p=0.
795 uint32_t p = (a * n < b * b) ? 0 : a * n - b * b;
796 uint32_t s = sgrproj_mtable[eps - 1][n - 1];
797
798 // p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32
799 // as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12
800 // (this holds even after accounting for the rounding in s)
801 const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS);
802
803 A[k] = x_by_xplus1[AOMMIN(z, 255)]; // < 2^8
804
805 // SGRPROJ_SGR - A[k] < 2^8, B[k] < 2^(bit_depth) * n,
806 // one_by_x[n - 1] = round(2^12 / n)
807 // => the product here is < 2^(20 + bit_depth) <= 2^32,
808 // and B[k] is set to a value < 2^(8 + bit depth)
809 B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) *
810 (uint32_t)B[k] *
811 (uint32_t)one_by_x[n - 1],
812 SGRPROJ_RECIP_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700813 }
814 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700815 i = 0;
816 j = 0;
817 {
David Barkerce110cc2017-02-22 10:38:59 +0000818 const int k = i * buf_stride + j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700819 const int l = i * dgd_stride + j;
820 const int m = i * dst_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700821 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800822 const int32_t a =
David Barkerce110cc2017-02-22 10:38:59 +0000823 3 * A[k] + 2 * A[k + 1] + 2 * A[k + buf_stride] + A[k + buf_stride + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800824 const int32_t b =
David Barkerce110cc2017-02-22 10:38:59 +0000825 3 * B[k] + 2 * B[k + 1] + 2 * B[k + buf_stride] + B[k + buf_stride + 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000826 const int32_t v = a * dgd[l] + b;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700827 dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700828 }
829 i = 0;
830 j = width - 1;
831 {
David Barkerce110cc2017-02-22 10:38:59 +0000832 const int k = i * buf_stride + j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700833 const int l = i * dgd_stride + j;
834 const int m = i * dst_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700835 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800836 const int32_t a =
David Barkerce110cc2017-02-22 10:38:59 +0000837 3 * A[k] + 2 * A[k - 1] + 2 * A[k + buf_stride] + A[k + buf_stride - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800838 const int32_t b =
David Barkerce110cc2017-02-22 10:38:59 +0000839 3 * B[k] + 2 * B[k - 1] + 2 * B[k + buf_stride] + B[k + buf_stride - 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000840 const int32_t v = a * dgd[l] + b;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700841 dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700842 }
843 i = height - 1;
844 j = 0;
845 {
David Barkerce110cc2017-02-22 10:38:59 +0000846 const int k = i * buf_stride + j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700847 const int l = i * dgd_stride + j;
848 const int m = i * dst_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700849 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800850 const int32_t a =
David Barkerce110cc2017-02-22 10:38:59 +0000851 3 * A[k] + 2 * A[k + 1] + 2 * A[k - buf_stride] + A[k - buf_stride + 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800852 const int32_t b =
David Barkerce110cc2017-02-22 10:38:59 +0000853 3 * B[k] + 2 * B[k + 1] + 2 * B[k - buf_stride] + B[k - buf_stride + 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000854 const int32_t v = a * dgd[l] + b;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700855 dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700856 }
857 i = height - 1;
858 j = width - 1;
859 {
David Barkerce110cc2017-02-22 10:38:59 +0000860 const int k = i * buf_stride + j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700861 const int l = i * dgd_stride + j;
862 const int m = i * dst_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700863 const int nb = 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800864 const int32_t a =
David Barkerce110cc2017-02-22 10:38:59 +0000865 3 * A[k] + 2 * A[k - 1] + 2 * A[k - buf_stride] + A[k - buf_stride - 1];
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800866 const int32_t b =
David Barkerce110cc2017-02-22 10:38:59 +0000867 3 * B[k] + 2 * B[k - 1] + 2 * B[k - buf_stride] + B[k - buf_stride - 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000868 const int32_t v = a * dgd[l] + b;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700869 dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700870 }
871 i = 0;
872 for (j = 1; j < width - 1; ++j) {
David Barkerce110cc2017-02-22 10:38:59 +0000873 const int k = i * buf_stride + j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700874 const int l = i * dgd_stride + j;
875 const int m = i * dst_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700876 const int nb = 3;
David Barkerce110cc2017-02-22 10:38:59 +0000877 const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + buf_stride] +
878 A[k + buf_stride - 1] + A[k + buf_stride + 1];
879 const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + buf_stride] +
880 B[k + buf_stride - 1] + B[k + buf_stride + 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000881 const int32_t v = a * dgd[l] + b;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700882 dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700883 }
884 i = height - 1;
885 for (j = 1; j < width - 1; ++j) {
David Barkerce110cc2017-02-22 10:38:59 +0000886 const int k = i * buf_stride + j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700887 const int l = i * dgd_stride + j;
888 const int m = i * dst_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700889 const int nb = 3;
David Barkerce110cc2017-02-22 10:38:59 +0000890 const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - buf_stride] +
891 A[k - buf_stride - 1] + A[k - buf_stride + 1];
892 const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - buf_stride] +
893 B[k - buf_stride - 1] + B[k - buf_stride + 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000894 const int32_t v = a * dgd[l] + b;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700895 dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700896 }
897 j = 0;
898 for (i = 1; i < height - 1; ++i) {
David Barkerce110cc2017-02-22 10:38:59 +0000899 const int k = i * buf_stride + j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700900 const int l = i * dgd_stride + j;
901 const int m = i * dst_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700902 const int nb = 3;
David Barkerce110cc2017-02-22 10:38:59 +0000903 const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
904 A[k + 1] + A[k - buf_stride + 1] + A[k + buf_stride + 1];
905 const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
906 B[k + 1] + B[k - buf_stride + 1] + B[k + buf_stride + 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000907 const int32_t v = a * dgd[l] + b;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700908 dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700909 }
910 j = width - 1;
911 for (i = 1; i < height - 1; ++i) {
David Barkerce110cc2017-02-22 10:38:59 +0000912 const int k = i * buf_stride + j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700913 const int l = i * dgd_stride + j;
914 const int m = i * dst_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700915 const int nb = 3;
David Barkerce110cc2017-02-22 10:38:59 +0000916 const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
917 A[k - 1] + A[k - buf_stride - 1] + A[k + buf_stride - 1];
918 const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
919 B[k - 1] + B[k - buf_stride - 1] + B[k + buf_stride - 1];
David Barker7dcd7f52017-03-01 12:53:00 +0000920 const int32_t v = a * dgd[l] + b;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700921 dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700922 }
923 for (i = 1; i < height - 1; ++i) {
924 for (j = 1; j < width - 1; ++j) {
David Barkerce110cc2017-02-22 10:38:59 +0000925 const int k = i * buf_stride + j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700926 const int l = i * dgd_stride + j;
927 const int m = i * dst_stride + j;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700928 const int nb = 5;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800929 const int32_t a =
David Barkerce110cc2017-02-22 10:38:59 +0000930 (A[k] + A[k - 1] + A[k + 1] + A[k - buf_stride] + A[k + buf_stride]) *
931 4 +
932 (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
933 A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700934 3;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800935 const int32_t b =
David Barkerce110cc2017-02-22 10:38:59 +0000936 (B[k] + B[k - 1] + B[k + 1] + B[k - buf_stride] + B[k + buf_stride]) *
937 4 +
938 (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
939 B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700940 3;
David Barker7dcd7f52017-03-01 12:53:00 +0000941 const int32_t v = a * dgd[l] + b;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700942 dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700943 }
944 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700945}
946
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100947void av1_selfguided_restoration_c(const uint8_t *dgd, int width, int height,
David Barker506eb722017-03-08 13:35:49 +0000948 int stride, int32_t *dst, int dst_stride,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -0700949 int r, int eps) {
950 int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700951 const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ;
952 int32_t *dgd32 =
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -0700953 dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ;
David Barker506eb722017-03-08 13:35:49 +0000954 int i, j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700955 for (i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
956 for (j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
957 dgd32[i * dgd32_stride + j] = dgd[i * stride + j];
David Barker506eb722017-03-08 13:35:49 +0000958 }
959 }
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700960 av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, dst,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -0700961 dst_stride, 8, r, eps);
David Barker506eb722017-03-08 13:35:49 +0000962}
963
Rupert Swarbrickd3d06152017-10-19 10:31:57 +0100964void av1_highpass_filter_c(const uint8_t *dgd, int width, int height,
965 int stride, int32_t *dst, int dst_stride, int corner,
966 int edge) {
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800967 int i, j;
David Barkereed824e2017-03-10 11:35:22 +0000968 const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800969
970 i = 0;
971 j = 0;
972 {
973 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000974 const int l = i * dst_stride + j;
975 dst[l] =
976 center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) +
977 corner * (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800978 }
979 i = 0;
980 j = width - 1;
981 {
982 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000983 const int l = i * dst_stride + j;
984 dst[l] =
985 center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) +
986 corner * (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800987 }
988 i = height - 1;
989 j = 0;
990 {
991 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +0000992 const int l = i * dst_stride + j;
993 dst[l] =
994 center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) +
995 corner * (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800996 }
997 i = height - 1;
998 j = width - 1;
999 {
1000 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +00001001 const int l = i * dst_stride + j;
1002 dst[l] =
1003 center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) +
1004 corner * (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001005 }
1006 i = 0;
1007 for (j = 1; j < width - 1; ++j) {
1008 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +00001009 const int l = i * dst_stride + j;
1010 dst[l] = center * dgd[k] +
1011 edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) +
1012 corner * (dgd[k + stride - 1] + dgd[k + stride + 1] + dgd[k - 1] +
1013 dgd[k + 1]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001014 }
1015 i = height - 1;
1016 for (j = 1; j < width - 1; ++j) {
1017 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +00001018 const int l = i * dst_stride + j;
1019 dst[l] = center * dgd[k] +
1020 edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) +
1021 corner * (dgd[k - stride - 1] + dgd[k - stride + 1] + dgd[k - 1] +
1022 dgd[k + 1]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001023 }
1024 j = 0;
1025 for (i = 1; i < height - 1; ++i) {
1026 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +00001027 const int l = i * dst_stride + j;
1028 dst[l] = center * dgd[k] +
1029 edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) +
1030 corner * (dgd[k + stride + 1] + dgd[k - stride + 1] +
1031 dgd[k - stride] + dgd[k + stride]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001032 }
1033 j = width - 1;
1034 for (i = 1; i < height - 1; ++i) {
1035 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +00001036 const int l = i * dst_stride + j;
1037 dst[l] = center * dgd[k] +
1038 edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) +
1039 corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
1040 dgd[k - stride] + dgd[k + stride]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001041 }
1042 for (i = 1; i < height - 1; ++i) {
1043 for (j = 1; j < width - 1; ++j) {
1044 const int k = i * stride + j;
David Barkereed824e2017-03-10 11:35:22 +00001045 const int l = i * dst_stride + j;
1046 dst[l] =
1047 center * dgd[k] +
1048 edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) +
1049 corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
1050 dgd[k - stride + 1] + dgd[k + stride + 1]);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001051 }
1052 }
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001053}
1054
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001055void apply_selfguided_restoration_c(const uint8_t *dat, int width, int height,
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001056 int stride, int eps, const int *xqd,
1057 uint8_t *dst, int dst_stride,
1058 int32_t *tmpbuf) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001059 int xq[2];
David Barker3a0df182016-12-21 10:44:52 +00001060 int32_t *flt1 = tmpbuf;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001061 int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001062 int i, j;
David Barker3a0df182016-12-21 10:44:52 +00001063 assert(width * height <= RESTORATION_TILEPELS_MAX);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001064#if USE_HIGHPASS_IN_SGRPROJ
1065 av1_highpass_filter_c(dat, width, height, stride, flt1, width,
David Barkereed824e2017-03-10 11:35:22 +00001066 sgr_params[eps].corner, sgr_params[eps].edge);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001067#else
David Barker506eb722017-03-08 13:35:49 +00001068 av1_selfguided_restoration_c(dat, width, height, stride, flt1, width,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -07001069 sgr_params[eps].r1, sgr_params[eps].e1);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001070#endif // USE_HIGHPASS_IN_SGRPROJ
David Barker506eb722017-03-08 13:35:49 +00001071 av1_selfguided_restoration_c(dat, width, height, stride, flt2, width,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -07001072 sgr_params[eps].r2, sgr_params[eps].e2);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001073 decode_xq(xqd, xq);
1074 for (i = 0; i < height; ++i) {
1075 for (j = 0; j < width; ++j) {
1076 const int k = i * width + j;
1077 const int l = i * stride + j;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001078 const int m = i * dst_stride + j;
1079 const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
1080 const int32_t f1 = (int32_t)flt1[k] - u;
1081 const int32_t f2 = (int32_t)flt2[k] - u;
David Barkerce110cc2017-02-22 10:38:59 +00001082 const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001083 const int16_t w =
1084 (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001085 dst[m] = clip_pixel(w);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -07001086 }
1087 }
1088}
1089
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001090static void sgrproj_filter_stripe(const RestorationUnitInfo *rui,
1091 int stripe_width, int stripe_height,
1092 int procunit_width, const uint8_t *src,
1093 int src_stride, uint8_t *dst, int dst_stride,
1094 int32_t *tmpbuf, int bit_depth) {
1095 (void)bit_depth;
1096 assert(bit_depth == 8);
1097
1098 for (int j = 0; j < stripe_width; j += procunit_width) {
1099 int w = AOMMIN(procunit_width, stripe_width - j);
1100 apply_selfguided_restoration(src + j, w, stripe_height, src_stride,
1101 rui->sgrproj_info.ep, rui->sgrproj_info.xqd,
1102 dst + j, dst_stride, tmpbuf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001103 }
1104}
1105
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001106#if CONFIG_HIGHBITDEPTH
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001107#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
1108#define wiener_highbd_convolve8_add_src aom_highbd_convolve8_add_src_hip
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001109#else
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001110#define wiener_highbd_convolve8_add_src aom_highbd_convolve8_add_src
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001111#endif
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001112
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001113static void wiener_filter_stripe_highbd(const RestorationUnitInfo *rui,
1114 int stripe_width, int stripe_height,
1115 int procunit_width, const uint8_t *src8,
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001116 int src_stride, uint8_t *dst8,
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001117 int dst_stride, int32_t *tmpbuf,
1118 int bit_depth) {
1119 (void)tmpbuf;
1120
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001121 const int mid_height =
1122 stripe_height - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2;
1123 assert(mid_height > 0);
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001124
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001125 for (int j = 0; j < stripe_width; j += procunit_width) {
1126 int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001127 const uint8_t *src8_p = src8 + j;
1128 uint8_t *dst8_p = dst8 + j;
1129
1130 for (int b = 0; b < WIENER_HALFWIN - WIENER_BORDER_VERT; ++b) {
1131 InterpKernel vertical_top;
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001132 stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_top,
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001133 WIENER_BORDER_VERT + b, 1);
1134 wiener_highbd_convolve8_add_src(src8_p, src_stride, dst8_p, dst_stride,
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001135 rui->wiener_info.hfilter, 16,
1136 vertical_top, 16, w, 1, bit_depth);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001137 src8_p += src_stride;
1138 dst8_p += dst_stride;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001139 }
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001140 assert(stripe_height > (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001141 wiener_highbd_convolve8_add_src(
1142 src8_p, src_stride, dst8_p, dst_stride, rui->wiener_info.hfilter, 16,
1143 rui->wiener_info.vfilter, 16, w, mid_height, bit_depth);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001144 src8_p += src_stride * (mid_height);
1145 dst8_p += dst_stride * (mid_height);
1146 for (int b = WIENER_HALFWIN - WIENER_BORDER_VERT - 1; b >= 0; --b) {
1147 InterpKernel vertical_bot;
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001148 stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_bot,
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001149 WIENER_BORDER_VERT + b, 0);
1150 wiener_highbd_convolve8_add_src(src8_p, src_stride, dst8_p, dst_stride,
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001151 rui->wiener_info.hfilter, 16,
1152 vertical_bot, 16, w, 1, bit_depth);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001153 src8_p += src_stride;
1154 dst8_p += dst_stride;
1155 }
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001156 }
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001157}
1158
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001159void av1_selfguided_restoration_highbd_c(const uint16_t *dgd, int width,
1160 int height, int stride, int32_t *dst,
David Barker506eb722017-03-08 13:35:49 +00001161 int dst_stride, int bit_depth, int r,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -07001162 int eps) {
1163 int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
Debargha Mukherjeee168a782017-08-31 12:30:10 -07001164 const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ;
1165 int32_t *dgd32 =
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -07001166 dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ;
David Barker506eb722017-03-08 13:35:49 +00001167 int i, j;
Debargha Mukherjeee168a782017-08-31 12:30:10 -07001168 for (i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
1169 for (j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
1170 dgd32[i * dgd32_stride + j] = dgd[i * stride + j];
David Barker506eb722017-03-08 13:35:49 +00001171 }
1172 }
Debargha Mukherjeee168a782017-08-31 12:30:10 -07001173 av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, dst,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -07001174 dst_stride, bit_depth, r, eps);
David Barker506eb722017-03-08 13:35:49 +00001175}
1176
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001177void av1_highpass_filter_highbd_c(const uint16_t *dgd, int width, int height,
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001178 int stride, int32_t *dst, int dst_stride,
David Barkereed824e2017-03-10 11:35:22 +00001179 int corner, int edge) {
Urvang Joshi0c459412017-04-21 18:10:09 +00001180 int i, j;
1181 const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge);
1182
1183 i = 0;
1184 j = 0;
1185 {
1186 const int k = i * stride + j;
1187 const int l = i * dst_stride + j;
1188 dst[l] =
1189 center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) +
1190 corner * (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]);
1191 }
1192 i = 0;
1193 j = width - 1;
1194 {
1195 const int k = i * stride + j;
1196 const int l = i * dst_stride + j;
1197 dst[l] =
1198 center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) +
1199 corner * (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]);
1200 }
1201 i = height - 1;
1202 j = 0;
1203 {
1204 const int k = i * stride + j;
1205 const int l = i * dst_stride + j;
1206 dst[l] =
1207 center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) +
1208 corner * (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]);
1209 }
1210 i = height - 1;
1211 j = width - 1;
1212 {
1213 const int k = i * stride + j;
1214 const int l = i * dst_stride + j;
1215 dst[l] =
1216 center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) +
1217 corner * (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]);
1218 }
1219 i = 0;
1220 for (j = 1; j < width - 1; ++j) {
1221 const int k = i * stride + j;
1222 const int l = i * dst_stride + j;
1223 dst[l] = center * dgd[k] +
1224 edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) +
1225 corner * (dgd[k + stride - 1] + dgd[k + stride + 1] + dgd[k - 1] +
1226 dgd[k + 1]);
1227 }
1228 i = height - 1;
1229 for (j = 1; j < width - 1; ++j) {
1230 const int k = i * stride + j;
1231 const int l = i * dst_stride + j;
1232 dst[l] = center * dgd[k] +
1233 edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) +
1234 corner * (dgd[k - stride - 1] + dgd[k - stride + 1] + dgd[k - 1] +
1235 dgd[k + 1]);
1236 }
1237 j = 0;
1238 for (i = 1; i < height - 1; ++i) {
1239 const int k = i * stride + j;
1240 const int l = i * dst_stride + j;
1241 dst[l] = center * dgd[k] +
1242 edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) +
1243 corner * (dgd[k + stride + 1] + dgd[k - stride + 1] +
1244 dgd[k - stride] + dgd[k + stride]);
1245 }
1246 j = width - 1;
1247 for (i = 1; i < height - 1; ++i) {
1248 const int k = i * stride + j;
1249 const int l = i * dst_stride + j;
1250 dst[l] = center * dgd[k] +
1251 edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) +
1252 corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
1253 dgd[k - stride] + dgd[k + stride]);
1254 }
1255 for (i = 1; i < height - 1; ++i) {
1256 for (j = 1; j < width - 1; ++j) {
1257 const int k = i * stride + j;
1258 const int l = i * dst_stride + j;
1259 dst[l] =
1260 center * dgd[k] +
1261 edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) +
1262 corner * (dgd[k + stride - 1] + dgd[k - stride - 1] +
1263 dgd[k - stride + 1] + dgd[k + stride + 1]);
1264 }
1265 }
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001266}
1267
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001268void apply_selfguided_restoration_highbd_c(const uint16_t *dat, int width,
1269 int height, int stride,
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001270 int bit_depth, int eps,
1271 const int *xqd, uint16_t *dst,
1272 int dst_stride, int32_t *tmpbuf) {
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001273 int xq[2];
David Barker3a0df182016-12-21 10:44:52 +00001274 int32_t *flt1 = tmpbuf;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001275 int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001276 int i, j;
David Barker0b04e9b2017-01-18 15:29:20 +00001277 assert(width * height <= RESTORATION_TILEPELS_MAX);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001278#if USE_HIGHPASS_IN_SGRPROJ
1279 av1_highpass_filter_highbd_c(dat, width, height, stride, flt1, width,
David Barkereed824e2017-03-10 11:35:22 +00001280 sgr_params[eps].corner, sgr_params[eps].edge);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001281#else
David Barker506eb722017-03-08 13:35:49 +00001282 av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt1, width,
1283 bit_depth, sgr_params[eps].r1,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -07001284 sgr_params[eps].e1);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -08001285#endif // USE_HIGHPASS_IN_SGRPROJ
David Barker506eb722017-03-08 13:35:49 +00001286 av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt2, width,
1287 bit_depth, sgr_params[eps].r2,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -07001288 sgr_params[eps].e2);
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001289 decode_xq(xqd, xq);
1290 for (i = 0; i < height; ++i) {
1291 for (j = 0; j < width; ++j) {
1292 const int k = i * width + j;
1293 const int l = i * stride + j;
1294 const int m = i * dst_stride + j;
1295 const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
1296 const int32_t f1 = (int32_t)flt1[k] - u;
1297 const int32_t f2 = (int32_t)flt2[k] - u;
David Barkerce110cc2017-02-22 10:38:59 +00001298 const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -08001299 const int16_t w =
1300 (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
1301 dst[m] = (uint16_t)clip_pixel_highbd(w, bit_depth);
1302 }
1303 }
1304}
1305
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001306static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui,
1307 int stripe_width, int stripe_height,
1308 int procunit_width,
1309 const uint8_t *src8, int src_stride,
1310 uint8_t *dst8, int dst_stride,
1311 int32_t *tmpbuf, int bit_depth) {
1312 for (int j = 0; j < stripe_width; j += procunit_width) {
1313 int w = AOMMIN(procunit_width, stripe_width - j);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001314 const uint16_t *data_p = CONVERT_TO_SHORTPTR(src8) + j;
1315 uint16_t *dst_p = CONVERT_TO_SHORTPTR(dst8) + j;
1316 apply_selfguided_restoration_highbd(
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001317 data_p, w, stripe_height, src_stride, bit_depth, rui->sgrproj_info.ep,
1318 rui->sgrproj_info.xqd, dst_p, dst_stride, tmpbuf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001319 }
1320}
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001321#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001322
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001323typedef void (*stripe_filter_fun)(const RestorationUnitInfo *rui,
1324 int stripe_width, int stripe_height,
1325 int procunit_width, const uint8_t *src,
1326 int src_stride, uint8_t *dst, int dst_stride,
1327 int32_t *tmpbuf, int bit_depth);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001328
1329#if CONFIG_HIGHBITDEPTH
1330#define NUM_STRIPE_FILTERS 4
1331#else
1332#define NUM_STRIPE_FILTERS 2
1333#endif
1334
1335static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = {
1336 wiener_filter_stripe, sgrproj_filter_stripe,
1337#if CONFIG_HIGHBITDEPTH
1338 wiener_filter_stripe_highbd, sgrproj_filter_stripe_highbd
1339#endif // CONFIG_HIGHBITDEPTH
1340};
1341
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001342void av1_loop_restoration_filter_unit(const RestorationTileLimits *limits,
1343 const RestorationUnitInfo *rui,
1344#if CONFIG_STRIPED_LOOP_RESTORATION
1345 const RestorationStripeBoundaries *rsb,
1346 RestorationLineBuffers *rlbs, int ss_y,
1347#endif
1348 int procunit_width, int procunit_height,
1349 int highbd, int bit_depth, uint8_t *data8,
1350 int stride, uint8_t *dst8, int dst_stride,
1351 int32_t *tmpbuf) {
1352 RestorationType unit_rtype = rui->restoration_type;
1353
1354 int unit_h = limits->v_end - limits->v_start;
1355 int unit_w = limits->h_end - limits->h_start;
1356 uint8_t *data8_tl = data8 + limits->v_start * stride + limits->h_start;
1357 uint8_t *dst8_tl = dst8 + limits->v_start * dst_stride + limits->h_start;
1358
1359 if (unit_rtype == RESTORE_NONE) {
1360 copy_tile(unit_w, unit_h, data8_tl, stride, dst8_tl, dst_stride, highbd);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001361 return;
1362 }
1363
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001364 const int filter_idx = 2 * highbd + (unit_rtype == RESTORE_SGRPROJ);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001365 assert(filter_idx < NUM_STRIPE_FILTERS);
1366 const stripe_filter_fun stripe_filter = stripe_filters[filter_idx];
1367
Rupert Swarbrick9af0cf32017-10-19 10:43:42 +01001368// Convolve the whole tile one stripe at a time
1369#if CONFIG_STRIPED_LOOP_RESTORATION
1370 RestorationTileLimits remaining_stripes = *limits;
1371#endif
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001372 int i = 0;
1373 while (i < unit_h) {
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001374#if CONFIG_STRIPED_LOOP_RESTORATION
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001375 remaining_stripes.v_start = limits->v_start + i;
1376 int h = setup_processing_stripe_boundary(&remaining_stripes, rsb,
1377 procunit_height, ss_y, highbd,
1378 data8, stride, rlbs);
1379 if (unit_rtype == RESTORE_WIENER) h = ALIGN_POWER_OF_TWO(h, 1);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001380#else
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001381 const int h = AOMMIN(procunit_height, (unit_h - i + 15) & ~15);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001382#endif
1383
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001384 stripe_filter(rui, unit_w, h, procunit_width, data8_tl + i * stride, stride,
1385 dst8_tl + i * dst_stride, dst_stride, tmpbuf, bit_depth);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001386
1387#if CONFIG_STRIPED_LOOP_RESTORATION
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001388 restore_processing_stripe_boundary(
1389 &remaining_stripes, rlbs, procunit_height, ss_y, highbd, data8, stride);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001390#endif
1391
1392 i += h;
1393 }
1394}
1395
1396struct restore_borders {
1397 int hborder, vborder;
1398};
1399
1400static const struct restore_borders restore_borders[RESTORE_TYPES] = {
1401 { 0, 0 },
1402 { WIENER_BORDER_HORZ, WIENER_BORDER_VERT },
1403 { SGRPROJ_BORDER_HORZ, SGRPROJ_BORDER_VERT },
1404 { RESTORATION_BORDER_HORZ, RESTORATION_BORDER_VERT }
1405};
1406
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001407void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
1408 AV1_COMMON *cm, RestorationInfo *rsi,
1409 int components_pattern,
1410 YV12_BUFFER_CONFIG *dst) {
David Barker9666e752016-12-08 11:25:47 +00001411 YV12_BUFFER_CONFIG dst_;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001412
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001413 typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src,
1414 YV12_BUFFER_CONFIG *dst);
1415 static const copy_fun copy_funs[3] = { aom_yv12_copy_y, aom_yv12_copy_u,
1416 aom_yv12_copy_v };
1417
1418 for (int plane = 0; plane < 3; ++plane) {
1419 if ((components_pattern == 1 << plane) &&
1420 (rsi[plane].frame_restoration_type == RESTORE_NONE)) {
1421 if (dst) copy_funs[plane](frame, dst);
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001422 return;
David Barker9666e752016-12-08 11:25:47 +00001423 }
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001424 }
1425 if (components_pattern ==
1426 ((1 << AOM_PLANE_Y) | (1 << AOM_PLANE_U) | (1 << AOM_PLANE_V))) {
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001427 // All components
1428 if (rsi[0].frame_restoration_type == RESTORE_NONE &&
1429 rsi[1].frame_restoration_type == RESTORE_NONE &&
1430 rsi[2].frame_restoration_type == RESTORE_NONE) {
1431 if (dst) aom_yv12_copy_frame(frame, dst);
1432 return;
1433 }
David Barker9666e752016-12-08 11:25:47 +00001434 }
1435
David Barker9666e752016-12-08 11:25:47 +00001436 if (!dst) {
1437 dst = &dst_;
1438 memset(dst, 0, sizeof(YV12_BUFFER_CONFIG));
Rupert Swarbrick146a0602017-10-17 16:52:20 +01001439 if (aom_realloc_frame_buffer(dst, frame->y_crop_width, frame->y_crop_height,
1440 cm->subsampling_x, cm->subsampling_y,
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001441#if CONFIG_HIGHBITDEPTH
Rupert Swarbrick146a0602017-10-17 16:52:20 +01001442 cm->use_highbitdepth,
David Barker9666e752016-12-08 11:25:47 +00001443#endif
Rupert Swarbrick146a0602017-10-17 16:52:20 +01001444 AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL,
1445 NULL, NULL) < 0)
David Barker9666e752016-12-08 11:25:47 +00001446 aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
1447 "Failed to allocate restoration dst buffer");
1448 }
Debargha Mukherjee818e42a2016-12-12 11:52:56 -08001449
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001450#if CONFIG_STRIPED_LOOP_RESTORATION
1451 RestorationLineBuffers rlbs;
1452#endif
1453#if CONFIG_HIGHBITDEPTH
1454 const int bit_depth = cm->bit_depth;
1455 const int highbd = cm->use_highbitdepth;
1456#else
1457 const int bit_depth = 8;
1458 const int highbd = 0;
1459#endif
1460
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001461 for (int plane = 0; plane < 3; ++plane) {
1462 if (!((components_pattern >> plane) & 1)) continue;
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001463 const RestorationInfo *prsi = &rsi[plane];
1464 RestorationType rtype = prsi->frame_restoration_type;
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001465 if (rtype == RESTORE_NONE) {
1466 copy_funs[plane](frame, dst);
1467 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001468 }
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001469
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001470 const int is_uv = plane > 0;
1471 const int ss_y = is_uv && cm->subsampling_y;
1472
1473 const int plane_width = frame->crop_widths[is_uv];
1474 const int plane_height = frame->crop_heights[is_uv];
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001475
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001476 int nhtiles, nvtiles;
1477 const int ntiles =
1478 av1_get_rest_ntiles(plane_width, plane_height,
1479 prsi->restoration_tilesize, &nhtiles, &nvtiles);
Rupert Swarbrickd3d06152017-10-19 10:31:57 +01001480
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001481 const struct restore_borders *borders =
1482 &restore_borders[prsi->frame_restoration_type];
1483 extend_frame(frame->buffers[plane], plane_width, plane_height,
1484 frame->strides[is_uv], borders->hborder, borders->vborder,
1485 highbd);
1486
1487 for (int tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
1488 RestorationTileLimits limits = av1_get_rest_tile_limits(
1489 tile_idx, nhtiles, nvtiles, prsi->restoration_tilesize, plane_width,
1490 plane_height, ss_y);
1491
1492 av1_loop_restoration_filter_unit(
1493 &limits, &prsi->unit_info[tile_idx],
1494#if CONFIG_STRIPED_LOOP_RESTORATION
1495 &prsi->boundaries, &rlbs, ss_y,
1496#endif
1497 prsi->procunit_width, prsi->procunit_height, highbd, bit_depth,
1498 frame->buffers[plane], frame->strides[is_uv], dst->buffers[plane],
1499 dst->strides[is_uv], cm->rst_tmpbuf);
1500 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001501 }
David Barker9666e752016-12-08 11:25:47 +00001502
David Barker9666e752016-12-08 11:25:47 +00001503 if (dst == &dst_) {
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001504 for (int plane = 0; plane < 3; ++plane) {
1505 if ((components_pattern >> plane) & 1) {
1506 copy_funs[plane](dst, frame);
1507 }
1508 }
David Barker9666e752016-12-08 11:25:47 +00001509 aom_free_frame_buffer(dst);
1510 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001511}
1512
Rupert Swarbrick6c545212017-09-01 17:17:25 +01001513int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
1514 int mi_row, int mi_col, BLOCK_SIZE bsize,
1515 int *rcol0, int *rcol1, int *rrow0,
1516 int *rrow1, int *nhtiles) {
1517 assert(rcol0 && rcol1 && rrow0 && rrow1 && nhtiles);
1518
1519 if (bsize != cm->sb_size) return 0;
1520
1521#if CONFIG_FRAME_SUPERRES
1522 const int frame_w = cm->superres_upscaled_width;
1523 const int frame_h = cm->superres_upscaled_height;
Urvang Joshide71d142017-10-05 12:12:15 -07001524 const int mi_to_px = MI_SIZE * SCALE_NUMERATOR;
1525 const int denom = cm->superres_scale_denominator;
Rupert Swarbrick6c545212017-09-01 17:17:25 +01001526#else
1527 const int frame_w = cm->width;
1528 const int frame_h = cm->height;
1529 const int mi_to_px = MI_SIZE;
1530 const int denom = 1;
1531#endif // CONFIG_FRAME_SUPERRES
1532
Rupert Swarbrick7380b252017-09-05 10:18:21 +01001533 const int ss_x = plane > 0 && cm->subsampling_x != 0;
1534 const int ss_y = plane > 0 && cm->subsampling_y != 0;
1535
1536 const int ss_frame_w = (frame_w + ss_x) >> ss_x;
1537 const int ss_frame_h = (frame_h + ss_y) >> ss_y;
Rupert Swarbrick6c545212017-09-01 17:17:25 +01001538
Rupert Swarbrick64b8bbd2017-10-16 15:53:07 +01001539 const int rtile_size = cm->rst_info[plane].restoration_tilesize;
Rupert Swarbrick6c545212017-09-01 17:17:25 +01001540
Rupert Swarbrick64b8bbd2017-10-16 15:53:07 +01001541 int nvtiles;
1542 av1_get_rest_ntiles(ss_frame_w, ss_frame_h, rtile_size, nhtiles, &nvtiles);
1543
1544 const int rnd = rtile_size * denom - 1;
Rupert Swarbrick6c545212017-09-01 17:17:25 +01001545
1546 // rcol0/rrow0 should be the first column/row of rtiles that doesn't start
1547 // left/below of mi_col/mi_row. For this calculation, we need to round up the
1548 // division (if the sb starts at rtile column 10.1, the first matching rtile
1549 // has column index 11)
Rupert Swarbrick64b8bbd2017-10-16 15:53:07 +01001550 *rcol0 = (mi_col * mi_to_px + rnd) / (rtile_size * denom);
1551 *rrow0 = (mi_row * mi_to_px + rnd) / (rtile_size * denom);
Rupert Swarbrick6c545212017-09-01 17:17:25 +01001552
1553 // rcol1/rrow1 is the equivalent calculation, but for the superblock
1554 // below-right. There are some slightly strange boundary effects. First, we
1555 // need to clamp to nhtiles/nvtiles for the case where it appears there are,
1556 // say, 2.4 restoration tiles horizontally. There we need a maximum mi_row1
1557 // of 2 because tile 1 gets extended.
1558 //
1559 // Second, if mi_col1 >= cm->mi_cols then we must manually set *rcol1 to
1560 // nhtiles. This is needed whenever the frame's width rounded up to the next
1561 // toplevel superblock is smaller than nhtiles * rtile_w. The same logic is
1562 // needed for rows.
1563 const int mi_row1 = mi_row + mi_size_high[bsize];
1564 const int mi_col1 = mi_col + mi_size_wide[bsize];
1565
1566 if (mi_col1 >= cm->mi_cols)
1567 *rcol1 = *nhtiles;
1568 else
Rupert Swarbrick64b8bbd2017-10-16 15:53:07 +01001569 *rcol1 =
1570 AOMMIN(*nhtiles, (mi_col1 * mi_to_px + rnd) / (rtile_size * denom));
Rupert Swarbrick6c545212017-09-01 17:17:25 +01001571
1572 if (mi_row1 >= cm->mi_rows)
1573 *rrow1 = nvtiles;
1574 else
Rupert Swarbrick64b8bbd2017-10-16 15:53:07 +01001575 *rrow1 = AOMMIN(nvtiles, (mi_row1 * mi_to_px + rnd) / (rtile_size * denom));
Rupert Swarbrick6c545212017-09-01 17:17:25 +01001576
1577 return *rcol0 < *rcol1 && *rrow0 < *rrow1;
1578}
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001579
1580#if CONFIG_STRIPED_LOOP_RESTORATION
1581
1582// Extend to left and right
1583static void extend_line(uint8_t *buf, int width, int extend,
1584 int use_highbitdepth) {
1585 int i;
1586 if (use_highbitdepth) {
1587 uint16_t val, *buf16 = (uint16_t *)buf;
1588 val = buf16[0];
1589 for (i = 0; i < extend; i++) buf16[-1 - i] = val;
1590 val = buf16[width - 1];
1591 for (i = 0; i < extend; i++) buf16[width + i] = val;
1592 } else {
1593 uint8_t val;
1594 val = buf[0];
1595 for (i = 0; i < extend; i++) buf[-1 - i] = val;
1596 val = buf[width - 1];
1597 for (i = 0; i < extend; i++) buf[width + i] = val;
1598 }
1599}
1600
1601// For each 64 pixel high stripe, save 4 scan lines to be used as boundary in
1602// the loop restoration process. The lines are saved in
1603// rst_internal.stripe_boundary_lines
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001604void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001605 AV1_COMMON *cm) {
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001606 for (int p = 0; p < MAX_MB_PLANE; ++p) {
1607 const int is_uv = p > 0;
1608 const uint8_t *src_buf = frame->buffers[p];
1609 const int src_width = frame->crop_widths[is_uv];
1610 const int src_height = frame->crop_heights[is_uv];
1611 const int src_stride = frame->strides[is_uv];
1612 const int stripe_height = 64 >> (is_uv && cm->subsampling_y);
1613 const int stripe_offset = (56 >> (is_uv && cm->subsampling_y)) - 2;
1614
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001615 RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries;
1616 uint8_t *boundary_above_buf = boundaries->stripe_boundary_above;
1617 uint8_t *boundary_below_buf = boundaries->stripe_boundary_below;
1618 const int boundary_stride = boundaries->stripe_boundary_stride;
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001619#if CONFIG_HIGHBITDEPTH
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001620 const int use_highbitdepth = cm->use_highbitdepth;
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001621 if (use_highbitdepth) {
1622 src_buf = (uint8_t *)CONVERT_TO_SHORTPTR(src_buf);
1623 }
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001624#else
1625 const int use_highbitdepth = 0;
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001626#endif
1627 src_buf += (stripe_offset * src_stride) << use_highbitdepth;
1628 boundary_above_buf += RESTORATION_EXTRA_HORZ << use_highbitdepth;
1629 boundary_below_buf += RESTORATION_EXTRA_HORZ << use_highbitdepth;
1630 // Loop over stripes
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001631 for (int stripe_y = stripe_offset; stripe_y < src_height;
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001632 stripe_y += stripe_height) {
1633 // Save 2 lines above the LR stripe (offset -9, -10)
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001634 for (int yy = 0; yy < 2; yy++) {
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001635 if (stripe_y + yy < src_height) {
1636 memcpy(boundary_above_buf, src_buf, src_width << use_highbitdepth);
1637 extend_line(boundary_above_buf, src_width, RESTORATION_EXTRA_HORZ,
1638 use_highbitdepth);
1639 src_buf += src_stride << use_highbitdepth;
1640 boundary_above_buf += boundary_stride << use_highbitdepth;
1641 }
1642 }
1643 // Save 2 lines below the LR stripe (offset 56,57)
Rupert Swarbrickf88bc042017-10-18 10:45:51 +01001644 for (int yy = 2; yy < 4; yy++) {
Ola Hugosson1e7f2d02017-09-22 21:36:26 +02001645 if (stripe_y + yy < src_height) {
1646 memcpy(boundary_below_buf, src_buf, src_width << use_highbitdepth);
1647 extend_line(boundary_below_buf, src_width, RESTORATION_EXTRA_HORZ,
1648 use_highbitdepth);
1649 src_buf += src_stride << use_highbitdepth;
1650 boundary_below_buf += boundary_stride << use_highbitdepth;
1651 }
1652 }
1653 // jump to next stripe
1654 src_buf += ((stripe_height - 4) * src_stride) << use_highbitdepth;
1655 }
1656 }
1657}
1658
1659#endif // CONFIG_STRIPED_LOOP_RESTORATION