blob: 26330d9e60055e700e3d04edecc8654b486ff058 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xubde4ac82016-11-28 15:26:06 -08002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xubde4ac82016-11-28 15:26:06 -08004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <assert.h>
13#include <float.h>
14#include <limits.h>
15#include <math.h>
16
Yaowu Xuf883b422016-08-30 14:01:10 -070017#include "./aom_scale_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070018
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "aom_dsp/aom_dsp_common.h"
Debargha Mukherjeecfc12f32017-04-18 07:03:32 -070020#include "aom_dsp/binary_codes_writer.h"
21#include "aom_dsp/psnr.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070022#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070023#include "aom_ports/mem.h"
Jingning Han041c67b2017-04-14 21:39:26 -070024#include "aom_ports/system_state.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070025
26#include "av1/common/onyxc_int.h"
27#include "av1/common/quant_common.h"
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -080028#include "av1/common/restoration.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070029
Tom Finegan17ce8b12017-02-08 12:46:31 -080030#include "av1/encoder/av1_quantize.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070031#include "av1/encoder/encoder.h"
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -070032#include "av1/encoder/mathutils.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070033#include "av1/encoder/picklpf.h"
34#include "av1/encoder/pickrst.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070035
Debargha Mukherjeefdbe3f72017-04-06 12:09:19 -070036// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
Tom Finegan8af64ae2017-09-07 08:13:06 -070037// When set to RESTORE_TYPES we allow switchable.
Tom Finegan50c62ee2017-09-07 12:44:16 -070038static const RestorationType force_restore_type = RESTORE_TYPES;
Debargha Mukherjee1b3dbf02017-03-13 14:47:21 -070039
40// Number of Wiener iterations
Debargha Mukherjeee39e2ee2017-05-11 03:38:03 -070041#define NUM_WIENER_ITERS 5
Debargha Mukherjee1b3dbf02017-03-13 14:47:21 -070042
Debargha Mukherjeeb3c43bc2017-02-01 13:09:03 -080043const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 };
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -070044
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010045typedef int64_t (*sse_extractor_type)(const YV12_BUFFER_CONFIG *a,
46 const YV12_BUFFER_CONFIG *b);
47typedef int64_t (*sse_part_extractor_type)(const YV12_BUFFER_CONFIG *a,
48 const YV12_BUFFER_CONFIG *b,
49 int hstart, int width, int vstart,
50 int height);
51
52#define NUM_EXTRACTORS (3 * (1 + CONFIG_HIGHBITDEPTH))
53
54static const sse_part_extractor_type sse_part_extractors[NUM_EXTRACTORS] = {
55 aom_get_y_sse_part, aom_get_u_sse_part,
56 aom_get_v_sse_part,
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020057#if CONFIG_HIGHBITDEPTH
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010058 aom_highbd_get_y_sse_part, aom_highbd_get_u_sse_part,
59 aom_highbd_get_v_sse_part,
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020060#endif // CONFIG_HIGHBITDEPTH
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010061};
62
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010063static int64_t sse_restoration_tile(const RestorationTileLimits *limits,
64 const YV12_BUFFER_CONFIG *src,
65 const YV12_BUFFER_CONFIG *dst, int plane,
66 int highbd) {
67 assert(CONFIG_HIGHBITDEPTH || !highbd);
68 return sse_part_extractors[3 * highbd + plane](
69 src, dst, limits->h_start, limits->h_end - limits->h_start,
70 limits->v_start, limits->v_end - limits->v_start);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -070071}
72
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010073static int64_t try_restoration_tile(const AV1_COMMON *cm,
74 const YV12_BUFFER_CONFIG *src,
75 const RestorationTileLimits *limits,
76 const RestorationUnitInfo *rui,
77 YV12_BUFFER_CONFIG *dst, int plane) {
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +010078 const RestorationInfo *rsi = &cm->rst_info[plane];
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010079 const int is_uv = plane > 0;
80#if CONFIG_STRIPED_LOOP_RESTORATION
81 RestorationLineBuffers rlbs;
Rupert Swarbrick64b8bbd2017-10-16 15:53:07 +010082 const int ss_y = is_uv && cm->subsampling_y;
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010083#endif
84#if CONFIG_HIGHBITDEPTH
85 const int bit_depth = cm->bit_depth;
86 const int highbd = cm->use_highbitdepth;
87#else
88 const int bit_depth = 8;
89 const int highbd = 0;
90#endif
Rupert Swarbrick64b8bbd2017-10-16 15:53:07 +010091
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010092 const YV12_BUFFER_CONFIG *fts = cm->frame_to_show;
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -070093
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010094 av1_loop_restoration_filter_unit(limits, rui,
95#if CONFIG_STRIPED_LOOP_RESTORATION
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +010096 &rsi->boundaries, &rlbs, ss_y,
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010097#endif
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +010098 rsi->procunit_width, rsi->procunit_height,
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +010099 highbd, bit_depth, fts->buffers[plane],
100 fts->strides[is_uv], dst->buffers[plane],
101 dst->strides[is_uv], cm->rst_tmpbuf);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700102
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100103 return sse_restoration_tile(limits, src, dst, plane, highbd);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -0700104}
105
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100106static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
107 int src_stride, const uint8_t *dat8,
Rupert Swarbrick32d150b2017-09-04 10:35:51 +0100108 int dat_stride, int use_highbitdepth,
David Barker3a0df182016-12-21 10:44:52 +0000109 int32_t *flt1, int flt1_stride,
110 int32_t *flt2, int flt2_stride, int *xqd) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700111 int i, j;
112 int64_t err = 0;
113 int xq[2];
114 decode_xq(xqd, xq);
Rupert Swarbrick32d150b2017-09-04 10:35:51 +0100115 if (!use_highbitdepth) {
David Barker3a0df182016-12-21 10:44:52 +0000116 const uint8_t *src = src8;
117 const uint8_t *dat = dat8;
118 for (i = 0; i < height; ++i) {
119 for (j = 0; j < width; ++j) {
120 const int32_t u =
121 (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
122 const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
123 const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
David Barkerce110cc2017-02-22 10:38:59 +0000124 const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
David Barker3a0df182016-12-21 10:44:52 +0000125 const int32_t e =
126 ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
127 src[i * src_stride + j];
128 err += e * e;
129 }
130 }
131 } else {
132 const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
133 const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
134 for (i = 0; i < height; ++i) {
135 for (j = 0; j < width; ++j) {
136 const int32_t u =
137 (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
138 const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
139 const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
David Barkerce110cc2017-02-22 10:38:59 +0000140 const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
David Barker3a0df182016-12-21 10:44:52 +0000141 const int32_t e =
142 ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
143 src[i * src_stride + j];
144 err += e * e;
145 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700146 }
147 }
148 return err;
149}
150
Debargha Mukherjee749f5cd2017-05-31 11:26:51 -0700151#define USE_SGRPROJ_REFINEMENT_SEARCH 1
152static int64_t finer_search_pixel_proj_error(
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100153 const uint8_t *src8, int width, int height, int src_stride,
Rupert Swarbrick32d150b2017-09-04 10:35:51 +0100154 const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt1,
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100155 int flt1_stride, int32_t *flt2, int flt2_stride, int start_step, int *xqd) {
Debargha Mukherjee749f5cd2017-05-31 11:26:51 -0700156 int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8,
Rupert Swarbrick32d150b2017-09-04 10:35:51 +0100157 dat_stride, use_highbitdepth, flt1,
158 flt1_stride, flt2, flt2_stride, xqd);
Debargha Mukherjee749f5cd2017-05-31 11:26:51 -0700159 (void)start_step;
160#if USE_SGRPROJ_REFINEMENT_SEARCH
161 int64_t err2;
162 int tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 };
163 int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 };
164 for (int s = start_step; s >= 1; s >>= 1) {
165 for (int p = 0; p < 2; ++p) {
166 int skip = 0;
167 do {
168 if (xqd[p] - s >= tap_min[p]) {
169 xqd[p] -= s;
170 err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
Rupert Swarbrick32d150b2017-09-04 10:35:51 +0100171 dat_stride, use_highbitdepth, flt1,
172 flt1_stride, flt2, flt2_stride, xqd);
Debargha Mukherjee749f5cd2017-05-31 11:26:51 -0700173 if (err2 > err) {
174 xqd[p] += s;
175 } else {
176 err = err2;
177 skip = 1;
178 // At the highest step size continue moving in the same direction
179 if (s == start_step) continue;
180 }
181 }
182 break;
183 } while (1);
184 if (skip) break;
185 do {
186 if (xqd[p] + s <= tap_max[p]) {
187 xqd[p] += s;
188 err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
Rupert Swarbrick32d150b2017-09-04 10:35:51 +0100189 dat_stride, use_highbitdepth, flt1,
190 flt1_stride, flt2, flt2_stride, xqd);
Debargha Mukherjee749f5cd2017-05-31 11:26:51 -0700191 if (err2 > err) {
192 xqd[p] -= s;
193 } else {
194 err = err2;
195 // At the highest step size continue moving in the same direction
196 if (s == start_step) continue;
197 }
198 }
199 break;
200 } while (1);
201 }
202 }
203#endif // USE_SGRPROJ_REFINEMENT_SEARCH
204 return err;
205}
206
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100207static void get_proj_subspace(const uint8_t *src8, int width, int height,
David Barker3a0df182016-12-21 10:44:52 +0000208 int src_stride, uint8_t *dat8, int dat_stride,
Rupert Swarbrick32d150b2017-09-04 10:35:51 +0100209 int use_highbitdepth, int32_t *flt1,
210 int flt1_stride, int32_t *flt2, int flt2_stride,
211 int *xq) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700212 int i, j;
213 double H[2][2] = { { 0, 0 }, { 0, 0 } };
214 double C[2] = { 0, 0 };
215 double Det;
216 double x[2];
217 const int size = width * height;
218
Jingning Han041c67b2017-04-14 21:39:26 -0700219 aom_clear_system_state();
220
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800221 // Default
222 xq[0] = 0;
223 xq[1] = 0;
Rupert Swarbrick32d150b2017-09-04 10:35:51 +0100224 if (!use_highbitdepth) {
David Barker3a0df182016-12-21 10:44:52 +0000225 const uint8_t *src = src8;
226 const uint8_t *dat = dat8;
227 for (i = 0; i < height; ++i) {
228 for (j = 0; j < width; ++j) {
229 const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
230 const double s =
231 (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
232 const double f1 = (double)flt1[i * flt1_stride + j] - u;
233 const double f2 = (double)flt2[i * flt2_stride + j] - u;
234 H[0][0] += f1 * f1;
235 H[1][1] += f2 * f2;
236 H[0][1] += f1 * f2;
237 C[0] += f1 * s;
238 C[1] += f2 * s;
239 }
240 }
241 } else {
242 const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
243 const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
244 for (i = 0; i < height; ++i) {
245 for (j = 0; j < width; ++j) {
246 const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
247 const double s =
248 (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
249 const double f1 = (double)flt1[i * flt1_stride + j] - u;
250 const double f2 = (double)flt2[i * flt2_stride + j] - u;
251 H[0][0] += f1 * f1;
252 H[1][1] += f2 * f2;
253 H[0][1] += f1 * f2;
254 C[0] += f1 * s;
255 C[1] += f2 * s;
256 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700257 }
258 }
259 H[0][0] /= size;
260 H[0][1] /= size;
261 H[1][1] /= size;
262 H[1][0] = H[0][1];
263 C[0] /= size;
264 C[1] /= size;
265 Det = (H[0][0] * H[1][1] - H[0][1] * H[1][0]);
266 if (Det < 1e-8) return; // ill-posed, return default values
267 x[0] = (H[1][1] * C[0] - H[0][1] * C[1]) / Det;
268 x[1] = (H[0][0] * C[1] - H[1][0] * C[0]) / Det;
269 xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
270 xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
271}
272
273void encode_xq(int *xq, int *xqd) {
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800274 xqd[0] = xq[0];
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700275 xqd[0] = clamp(xqd[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800276 xqd[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1];
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700277 xqd[1] = clamp(xqd[1], SGRPROJ_PRJ_MIN1, SGRPROJ_PRJ_MAX1);
278}
279
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100280static SgrprojInfo search_selfguided_restoration(
281 uint8_t *dat8, int width, int height, int dat_stride, const uint8_t *src8,
282 int src_stride, int use_highbitdepth, int bit_depth, int pu_width,
283 int pu_height, int32_t *rstbuf) {
David Barker3a0df182016-12-21 10:44:52 +0000284 int32_t *flt1 = rstbuf;
Debargha Mukherjee519dbcf2016-12-16 03:13:02 -0800285 int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
David Barker506eb722017-03-08 13:35:49 +0000286 int ep, bestep = 0;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700287 int64_t err, besterr = -1;
288 int exqd[2], bestxqd[2] = { 0, 0 };
Debargha Mukherjee22bbe4c2017-08-31 12:30:10 -0700289 int flt1_stride = ((width + 7) & ~7) + 8;
290 int flt2_stride = ((width + 7) & ~7) + 8;
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700291 assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
292 pu_width == RESTORATION_PROC_UNIT_SIZE);
293 assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
294 pu_height == RESTORATION_PROC_UNIT_SIZE);
Yaowu Xu069cc312017-09-06 09:03:03 -0700295#if !CONFIG_HIGHBITDEPTH
296 (void)bit_depth;
297#endif
David Barker3a0df182016-12-21 10:44:52 +0000298
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700299 for (ep = 0; ep < SGRPROJ_PARAMS; ep++) {
300 int exq[2];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200301#if CONFIG_HIGHBITDEPTH
Rupert Swarbrick32d150b2017-09-04 10:35:51 +0100302 if (use_highbitdepth) {
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700303 uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700304 for (int i = 0; i < height; i += pu_height)
305 for (int j = 0; j < width; j += pu_width) {
306 const int w = AOMMIN(pu_width, width - j);
307 const int h = AOMMIN(pu_height, height - i);
308 uint16_t *dat_p = dat + i * dat_stride + j;
309 int32_t *flt1_p = flt1 + i * flt1_stride + j;
310 int32_t *flt2_p = flt2 + i * flt2_stride + j;
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800311#if USE_HIGHPASS_IN_SGRPROJ
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700312 av1_highpass_filter_highbd(dat_p, w, h, dat_stride, flt1_p,
313 flt1_stride, sgr_params[ep].corner,
314 sgr_params[ep].edge);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800315#else
Debargha Mukherjee22bbe4c2017-08-31 12:30:10 -0700316 av1_selfguided_restoration_highbd(
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700317 dat_p, w, h, dat_stride, flt1_p, flt1_stride, bit_depth,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -0700318 sgr_params[ep].r1, sgr_params[ep].e1);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800319#endif // USE_HIGHPASS_IN_SGRPROJ
Debargha Mukherjee22bbe4c2017-08-31 12:30:10 -0700320 av1_selfguided_restoration_highbd(
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700321 dat_p, w, h, dat_stride, flt2_p, flt2_stride, bit_depth,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -0700322 sgr_params[ep].r2, sgr_params[ep].e2);
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700323 }
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700324 } else {
David Barker506eb722017-03-08 13:35:49 +0000325#endif
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700326 for (int i = 0; i < height; i += pu_height)
327 for (int j = 0; j < width; j += pu_width) {
328 const int w = AOMMIN(pu_width, width - j);
329 const int h = AOMMIN(pu_height, height - i);
330 uint8_t *dat_p = dat8 + i * dat_stride + j;
331 int32_t *flt1_p = flt1 + i * flt1_stride + j;
332 int32_t *flt2_p = flt2 + i * flt2_stride + j;
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800333#if USE_HIGHPASS_IN_SGRPROJ
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700334 av1_highpass_filter(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
335 sgr_params[ep].corner, sgr_params[ep].edge);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800336#else
Debargha Mukherjee22bbe4c2017-08-31 12:30:10 -0700337 av1_selfguided_restoration(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -0700338 sgr_params[ep].r1, sgr_params[ep].e1);
Debargha Mukherjeeb7bb0972017-03-09 06:47:43 -0800339#endif // USE_HIGHPASS_IN_SGRPROJ
Debargha Mukherjee22bbe4c2017-08-31 12:30:10 -0700340 av1_selfguided_restoration(dat_p, w, h, dat_stride, flt2_p,
341 flt2_stride, sgr_params[ep].r2,
Debargha Mukherjee1330dfd2017-09-03 22:22:27 -0700342 sgr_params[ep].e2);
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700343 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200344#if CONFIG_HIGHBITDEPTH
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700345 }
David Barker506eb722017-03-08 13:35:49 +0000346#endif
Debargha Mukherjee7ae7aea2017-05-04 15:17:17 -0700347 aom_clear_system_state();
David Barker3a0df182016-12-21 10:44:52 +0000348 get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700349 use_highbitdepth, flt1, flt1_stride, flt2, flt2_stride,
350 exq);
Debargha Mukherjee7ae7aea2017-05-04 15:17:17 -0700351 aom_clear_system_state();
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700352 encode_xq(exq, exqd);
Debargha Mukherjee7a5587a2017-08-31 07:41:30 -0700353 err = finer_search_pixel_proj_error(
354 src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
355 flt1, flt1_stride, flt2, flt2_stride, 2, exqd);
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700356 if (besterr == -1 || err < besterr) {
357 bestep = ep;
358 besterr = err;
359 bestxqd[0] = exqd[0];
360 bestxqd[1] = exqd[1];
361 }
362 }
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100363
364 SgrprojInfo ret;
365 ret.ep = bestep;
366 ret.xqd[0] = bestxqd[0];
367 ret.xqd[1] = bestxqd[1];
368 return ret;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700369}
370
Debargha Mukherjeecfc12f32017-04-18 07:03:32 -0700371static int count_sgrproj_bits(SgrprojInfo *sgrproj_info,
372 SgrprojInfo *ref_sgrproj_info) {
373 int bits = SGRPROJ_PARAMS_BITS;
374 bits += aom_count_primitive_refsubexpfin(
375 SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
376 ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
377 sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
378 bits += aom_count_primitive_refsubexpfin(
379 SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
380 ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
381 sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
382 return bits;
383}
384
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100385typedef struct {
386 // The best coefficients for Wiener or Sgrproj restoration
387 WienerInfo wiener;
388 SgrprojInfo sgrproj;
389
390 // The sum of squared errors for this rtype.
391 int64_t sse[RESTORE_SWITCHABLE_TYPES];
392
393 // The rtype to use for this unit given a frame rtype as
394 // index. Indices: WIENER, SGRPROJ, SWITCHABLE.
395 RestorationType best_rtype[RESTORE_TYPES - 1];
396} RestUnitSearchInfo;
397
398typedef struct {
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100399 const YV12_BUFFER_CONFIG *src;
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100400 const AV1_COMMON *cm;
401 const MACROBLOCK *x;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100402 int plane;
403 int plane_width;
404 int plane_height;
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100405 RestUnitSearchInfo *rusi;
406 YV12_BUFFER_CONFIG *dst_frame;
407
408 uint8_t *dgd_buffer;
409 int dgd_stride;
410 const uint8_t *src_buffer;
411 int src_stride;
412
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100413 // sse and bits are initialised by reset_rsc in search_rest_type
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100414 int64_t sse;
415 int64_t bits;
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100416
417 // sgrproj and wiener are initialised by rsc_on_tile when starting the first
418 // tile in the frame.
419 SgrprojInfo sgrproj;
420 WienerInfo wiener;
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100421} RestSearchCtxt;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100422
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100423static void rsc_on_tile(int tile_row, int tile_col, void *priv) {
424 (void)tile_row;
425 (void)tile_col;
426
427 RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100428 set_default_sgrproj(&rsc->sgrproj);
429 set_default_wiener(&rsc->wiener);
430}
431
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100432static void reset_rsc(RestSearchCtxt *rsc) {
433 rsc->sse = 0;
434 rsc->bits = 0;
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100435}
436
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100437static void init_rsc(const YV12_BUFFER_CONFIG *src, const AV1_COMMON *cm,
438 const MACROBLOCK *x, int plane, RestUnitSearchInfo *rusi,
439 YV12_BUFFER_CONFIG *dst_frame, RestSearchCtxt *rsc) {
440 rsc->src = src;
441 rsc->cm = cm;
442 rsc->x = x;
443 rsc->plane = plane;
444 rsc->rusi = rusi;
445 rsc->dst_frame = dst_frame;
446
447 const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
448 const int is_uv = plane != AOM_PLANE_Y;
449 rsc->plane_width = src->crop_widths[is_uv];
450 rsc->plane_height = src->crop_heights[is_uv];
451 rsc->src_buffer = src->buffers[plane];
452 rsc->src_stride = src->strides[is_uv];
453 rsc->dgd_buffer = dgd->buffers[plane];
454 rsc->dgd_stride = dgd->strides[is_uv];
455 assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]);
456 assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]);
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100457}
458
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100459static void search_sgrproj(const RestorationTileLimits *limits,
460 int rest_unit_idx, void *priv) {
461 RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
462 RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100463
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100464 const MACROBLOCK *const x = rsc->x;
465 const AV1_COMMON *const cm = rsc->cm;
466 const RestorationInfo *rsi = &cm->rst_info[rsc->plane];
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100467
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100468#if CONFIG_HIGHBITDEPTH
469 const int highbd = cm->use_highbitdepth;
470 const int bit_depth = cm->bit_depth;
471#else
472 const int highbd = 0;
473 const int bit_depth = 8;
474#endif // CONFIG_HIGHBITDEPTH
475
Rupert Swarbrick5d2e7292017-09-26 11:32:17 +0100476 uint8_t *dgd_start =
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100477 rsc->dgd_buffer + limits->v_start * rsc->dgd_stride + limits->h_start;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100478 const uint8_t *src_start =
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100479 rsc->src_buffer + limits->v_start * rsc->src_stride + limits->h_start;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100480
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100481 rusi->sgrproj = search_selfguided_restoration(
Rupert Swarbrick5d2e7292017-09-26 11:32:17 +0100482 dgd_start, limits->h_end - limits->h_start,
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100483 limits->v_end - limits->v_start, rsc->dgd_stride, src_start,
484 rsc->src_stride, highbd, bit_depth, rsi->procunit_width,
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100485 rsi->procunit_height, cm->rst_tmpbuf);
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100486
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100487 RestorationUnitInfo rui;
488 rui.restoration_type = RESTORE_SGRPROJ;
489 rui.sgrproj_info = rusi->sgrproj;
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100490
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100491 rusi->sse[RESTORE_SGRPROJ] = try_restoration_tile(cm, rsc->src, limits, &rui,
492 rsc->dst_frame, rsc->plane);
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100493
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100494 const int64_t bits_none = x->sgrproj_restore_cost[0];
495 const int64_t bits_sgr = x->sgrproj_restore_cost[1] +
496 (count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj)
497 << AV1_PROB_COST_SHIFT);
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100498
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100499 double cost_none =
500 RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
501 double cost_sgr =
502 RDCOST_DBL(x->rdmult, bits_sgr >> 4, rusi->sse[RESTORE_SGRPROJ]);
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100503
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100504 RestorationType rtype =
505 (cost_sgr < cost_none) ? RESTORE_SGRPROJ : RESTORE_NONE;
506 rusi->best_rtype[RESTORE_SGRPROJ - 1] = rtype;
Debargha Mukherjeee168a782017-08-31 12:30:10 -0700507
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +0100508 rsc->sse += rusi->sse[rtype];
509 rsc->bits += (cost_sgr < cost_none) ? bits_sgr : bits_none;
510 if (cost_sgr < cost_none) rsc->sgrproj = rusi->sgrproj;
Debargha Mukherjee8f209a82016-10-12 10:47:01 -0700511}
512
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100513static double find_average(const uint8_t *src, int h_start, int h_end,
514 int v_start, int v_end, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700515 uint64_t sum = 0;
516 double avg = 0;
517 int i, j;
Jingning Han041c67b2017-04-14 21:39:26 -0700518 aom_clear_system_state();
Yaowu Xuc27fc142016-08-22 16:08:15 -0700519 for (i = v_start; i < v_end; i++)
520 for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
521 avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
522 return avg;
523}
524
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100525static void compute_stats(int wiener_win, const uint8_t *dgd,
526 const uint8_t *src, int h_start, int h_end,
527 int v_start, int v_end, int dgd_stride,
528 int src_stride, double *M, double *H) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700529 int i, j, k, l;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800530 double Y[WIENER_WIN2];
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700531 const int wiener_win2 = wiener_win * wiener_win;
532 const int wiener_halfwin = (wiener_win >> 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700533 const double avg =
534 find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
535
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700536 memset(M, 0, sizeof(*M) * wiener_win2);
537 memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700538 for (i = v_start; i < v_end; i++) {
539 for (j = h_start; j < h_end; j++) {
540 const double X = (double)src[i * src_stride + j] - avg;
541 int idx = 0;
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700542 for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
543 for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700544 Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
545 idx++;
546 }
547 }
Debargha Mukherjeea1a1e362017-10-04 20:01:03 -0700548 assert(idx == wiener_win2);
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700549 for (k = 0; k < wiener_win2; ++k) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700550 M[k] += Y[k] * X;
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700551 H[k * wiener_win2 + k] += Y[k] * Y[k];
552 for (l = k + 1; l < wiener_win2; ++l) {
David Barker33f3bfd2017-01-06 15:34:50 +0000553 // H is a symmetric matrix, so we only need to fill out the upper
554 // triangle here. We can copy it down to the lower triangle outside
555 // the (i, j) loops.
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700556 H[k * wiener_win2 + l] += Y[k] * Y[l];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700557 }
558 }
559 }
560 }
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700561 for (k = 0; k < wiener_win2; ++k) {
562 for (l = k + 1; l < wiener_win2; ++l) {
563 H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
David Barker33f3bfd2017-01-06 15:34:50 +0000564 }
565 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700566}
567
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200568#if CONFIG_HIGHBITDEPTH
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100569static double find_average_highbd(const uint16_t *src, int h_start, int h_end,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700570 int v_start, int v_end, int stride) {
571 uint64_t sum = 0;
572 double avg = 0;
573 int i, j;
Jingning Han041c67b2017-04-14 21:39:26 -0700574 aom_clear_system_state();
Yaowu Xuc27fc142016-08-22 16:08:15 -0700575 for (i = v_start; i < v_end; i++)
576 for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
577 avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
578 return avg;
579}
580
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100581static void compute_stats_highbd(int wiener_win, const uint8_t *dgd8,
582 const uint8_t *src8, int h_start, int h_end,
583 int v_start, int v_end, int dgd_stride,
584 int src_stride, double *M, double *H) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700585 int i, j, k, l;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800586 double Y[WIENER_WIN2];
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700587 const int wiener_win2 = wiener_win * wiener_win;
588 const int wiener_halfwin = (wiener_win >> 1);
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100589 const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
590 const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700591 const double avg =
592 find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
593
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700594 memset(M, 0, sizeof(*M) * wiener_win2);
595 memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700596 for (i = v_start; i < v_end; i++) {
597 for (j = h_start; j < h_end; j++) {
598 const double X = (double)src[i * src_stride + j] - avg;
599 int idx = 0;
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700600 for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
601 for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700602 Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
603 idx++;
604 }
605 }
Debargha Mukherjeea1a1e362017-10-04 20:01:03 -0700606 assert(idx == wiener_win2);
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700607 for (k = 0; k < wiener_win2; ++k) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700608 M[k] += Y[k] * X;
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700609 H[k * wiener_win2 + k] += Y[k] * Y[k];
610 for (l = k + 1; l < wiener_win2; ++l) {
David Barker33f3bfd2017-01-06 15:34:50 +0000611 // H is a symmetric matrix, so we only need to fill out the upper
612 // triangle here. We can copy it down to the lower triangle outside
613 // the (i, j) loops.
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700614 H[k * wiener_win2 + l] += Y[k] * Y[l];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700615 }
616 }
617 }
618 }
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700619 for (k = 0; k < wiener_win2; ++k) {
620 for (l = k + 1; l < wiener_win2; ++l) {
621 H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
David Barker33f3bfd2017-01-06 15:34:50 +0000622 }
623 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700624}
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200625#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700626
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700627static INLINE int wrap_index(int i, int wiener_win) {
628 const int wiener_halfwin1 = (wiener_win >> 1) + 1;
629 return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700630}
631
632// Fix vector b, update vector a
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700633static void update_a_sep_sym(int wiener_win, double **Mc, double **Hc,
634 double *a, double *b) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700635 int i, j;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800636 double S[WIENER_WIN];
Debargha Mukherjee6ae588f2017-04-14 00:40:02 -0700637 double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700638 const int wiener_win2 = wiener_win * wiener_win;
639 const int wiener_halfwin1 = (wiener_win >> 1) + 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700640 memset(A, 0, sizeof(A));
641 memset(B, 0, sizeof(B));
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700642 for (i = 0; i < wiener_win; i++) {
643 for (j = 0; j < wiener_win; ++j) {
644 const int jj = wrap_index(j, wiener_win);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700645 A[jj] += Mc[i][j] * b[i];
646 }
647 }
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700648 for (i = 0; i < wiener_win; i++) {
649 for (j = 0; j < wiener_win; j++) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700650 int k, l;
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700651 for (k = 0; k < wiener_win; ++k)
652 for (l = 0; l < wiener_win; ++l) {
653 const int kk = wrap_index(k, wiener_win);
654 const int ll = wrap_index(l, wiener_win);
655 B[ll * wiener_halfwin1 + kk] +=
656 Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] * b[j];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700657 }
658 }
659 }
660 // Normalization enforcement in the system of equations itself
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700661 for (i = 0; i < wiener_halfwin1 - 1; ++i)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700662 A[i] -=
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700663 A[wiener_halfwin1 - 1] * 2 +
664 B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
665 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
666 for (i = 0; i < wiener_halfwin1 - 1; ++i)
667 for (j = 0; j < wiener_halfwin1 - 1; ++j)
668 B[i * wiener_halfwin1 + j] -=
669 2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
670 B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
671 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
672 (wiener_halfwin1 - 1)]);
673 if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
674 S[wiener_halfwin1 - 1] = 1.0;
675 for (i = wiener_halfwin1; i < wiener_win; ++i) {
676 S[i] = S[wiener_win - 1 - i];
677 S[wiener_halfwin1 - 1] -= 2 * S[i];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700678 }
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700679 memcpy(a, S, wiener_win * sizeof(*a));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700680 }
681}
682
683// Fix vector a, update vector b
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700684static void update_b_sep_sym(int wiener_win, double **Mc, double **Hc,
685 double *a, double *b) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700686 int i, j;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800687 double S[WIENER_WIN];
Debargha Mukherjee6ae588f2017-04-14 00:40:02 -0700688 double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700689 const int wiener_win2 = wiener_win * wiener_win;
690 const int wiener_halfwin1 = (wiener_win >> 1) + 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700691 memset(A, 0, sizeof(A));
692 memset(B, 0, sizeof(B));
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700693 for (i = 0; i < wiener_win; i++) {
694 const int ii = wrap_index(i, wiener_win);
695 for (j = 0; j < wiener_win; j++) A[ii] += Mc[i][j] * a[j];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700696 }
697
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700698 for (i = 0; i < wiener_win; i++) {
699 for (j = 0; j < wiener_win; j++) {
700 const int ii = wrap_index(i, wiener_win);
701 const int jj = wrap_index(j, wiener_win);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700702 int k, l;
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700703 for (k = 0; k < wiener_win; ++k)
704 for (l = 0; l < wiener_win; ++l)
705 B[jj * wiener_halfwin1 + ii] +=
706 Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] * a[l];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700707 }
708 }
709 // Normalization enforcement in the system of equations itself
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700710 for (i = 0; i < wiener_halfwin1 - 1; ++i)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700711 A[i] -=
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700712 A[wiener_halfwin1 - 1] * 2 +
713 B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
714 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
715 for (i = 0; i < wiener_halfwin1 - 1; ++i)
716 for (j = 0; j < wiener_halfwin1 - 1; ++j)
717 B[i * wiener_halfwin1 + j] -=
718 2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
719 B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
720 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
721 (wiener_halfwin1 - 1)]);
722 if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
723 S[wiener_halfwin1 - 1] = 1.0;
724 for (i = wiener_halfwin1; i < wiener_win; ++i) {
725 S[i] = S[wiener_win - 1 - i];
726 S[wiener_halfwin1 - 1] -= 2 * S[i];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700727 }
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700728 memcpy(b, S, wiener_win * sizeof(*b));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700729 }
730}
731
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700732static int wiener_decompose_sep_sym(int wiener_win, double *M, double *H,
733 double *a, double *b) {
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700734 static const int init_filt[WIENER_WIN] = {
735 WIENER_FILT_TAP0_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP2_MIDV,
736 WIENER_FILT_TAP3_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP1_MIDV,
737 WIENER_FILT_TAP0_MIDV,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700738 };
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800739 double *Hc[WIENER_WIN2];
740 double *Mc[WIENER_WIN];
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700741 int i, j, iter;
742 const int plane_off = (WIENER_WIN - wiener_win) >> 1;
743 const int wiener_win2 = wiener_win * wiener_win;
744 for (i = 0; i < wiener_win; i++) {
745 a[i] = b[i] = (double)init_filt[i + plane_off] / WIENER_FILT_STEP;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700746 }
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700747 for (i = 0; i < wiener_win; i++) {
748 Mc[i] = M + i * wiener_win;
749 for (j = 0; j < wiener_win; j++) {
750 Hc[i * wiener_win + j] =
751 H + i * wiener_win * wiener_win2 + j * wiener_win;
752 }
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700753 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700754
755 iter = 1;
Debargha Mukherjee1b3dbf02017-03-13 14:47:21 -0700756 while (iter < NUM_WIENER_ITERS) {
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700757 update_a_sep_sym(wiener_win, Mc, Hc, a, b);
758 update_b_sep_sym(wiener_win, Mc, Hc, a, b);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700759 iter++;
760 }
761 return 1;
762}
763
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -0800764// Computes the function x'*H*x - x'*M for the learned 2D filter x, and compares
Yaowu Xuc27fc142016-08-22 16:08:15 -0700765// against identity filters; Final score is defined as the difference between
766// the function values
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700767static double compute_score(int wiener_win, double *M, double *H,
768 InterpKernel vfilt, InterpKernel hfilt) {
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800769 double ab[WIENER_WIN * WIENER_WIN];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700770 int i, k, l;
771 double P = 0, Q = 0;
772 double iP = 0, iQ = 0;
773 double Score, iScore;
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800774 double a[WIENER_WIN], b[WIENER_WIN];
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700775 const int plane_off = (WIENER_WIN - wiener_win) >> 1;
776 const int wiener_win2 = wiener_win * wiener_win;
Jingning Han041c67b2017-04-14 21:39:26 -0700777
778 aom_clear_system_state();
779
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800780 a[WIENER_HALFWIN] = b[WIENER_HALFWIN] = 1.0;
781 for (i = 0; i < WIENER_HALFWIN; ++i) {
782 a[i] = a[WIENER_WIN - i - 1] = (double)vfilt[i] / WIENER_FILT_STEP;
783 b[i] = b[WIENER_WIN - i - 1] = (double)hfilt[i] / WIENER_FILT_STEP;
784 a[WIENER_HALFWIN] -= 2 * a[i];
785 b[WIENER_HALFWIN] -= 2 * b[i];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700786 }
Debargha Mukherjeea1a1e362017-10-04 20:01:03 -0700787 memset(ab, 0, sizeof(ab));
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700788 for (k = 0; k < wiener_win; ++k) {
789 for (l = 0; l < wiener_win; ++l)
790 ab[k * wiener_win + l] = a[l + plane_off] * b[k + plane_off];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700791 }
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700792 for (k = 0; k < wiener_win2; ++k) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700793 P += ab[k] * M[k];
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700794 for (l = 0; l < wiener_win2; ++l)
795 Q += ab[k] * H[k * wiener_win2 + l] * ab[l];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700796 }
797 Score = Q - 2 * P;
798
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700799 iP = M[wiener_win2 >> 1];
800 iQ = H[(wiener_win2 >> 1) * wiener_win2 + (wiener_win2 >> 1)];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700801 iScore = iQ - 2 * iP;
802
803 return Score - iScore;
804}
805
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700806static void quantize_sym_filter(int wiener_win, double *f, InterpKernel fi) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700807 int i;
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700808 const int wiener_halfwin = (wiener_win >> 1);
809 for (i = 0; i < wiener_halfwin; ++i) {
Debargha Mukherjee999d2f62016-12-15 13:23:21 -0800810 fi[i] = RINT(f[i] * WIENER_FILT_STEP);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700811 }
812 // Specialize for 7-tap filter
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700813 if (wiener_win == WIENER_WIN) {
814 fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV);
815 fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
816 fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
817 } else {
818 fi[2] = CLIP(fi[1], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
819 fi[1] = CLIP(fi[0], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
820 fi[0] = 0;
821 }
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -0800822 // Satisfy filter constraints
823 fi[WIENER_WIN - 1] = fi[0];
824 fi[WIENER_WIN - 2] = fi[1];
825 fi[WIENER_WIN - 3] = fi[2];
David Barker1e8e6b92017-01-13 13:45:51 +0000826 // The central element has an implicit +WIENER_FILT_STEP
827 fi[3] = -2 * (fi[0] + fi[1] + fi[2]);
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -0800828}
829
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700830static int count_wiener_bits(int wiener_win, WienerInfo *wiener_info,
Debargha Mukherjeecfc12f32017-04-18 07:03:32 -0700831 WienerInfo *ref_wiener_info) {
832 int bits = 0;
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700833 if (wiener_win == WIENER_WIN)
834 bits += aom_count_primitive_refsubexpfin(
835 WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
836 WIENER_FILT_TAP0_SUBEXP_K,
837 ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
838 wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV);
Debargha Mukherjeecfc12f32017-04-18 07:03:32 -0700839 bits += aom_count_primitive_refsubexpfin(
840 WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
841 WIENER_FILT_TAP1_SUBEXP_K,
842 ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV,
843 wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV);
844 bits += aom_count_primitive_refsubexpfin(
845 WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
846 WIENER_FILT_TAP2_SUBEXP_K,
847 ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV,
848 wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV);
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700849 if (wiener_win == WIENER_WIN)
850 bits += aom_count_primitive_refsubexpfin(
851 WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
852 WIENER_FILT_TAP0_SUBEXP_K,
853 ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
854 wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV);
Debargha Mukherjeecfc12f32017-04-18 07:03:32 -0700855 bits += aom_count_primitive_refsubexpfin(
856 WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
857 WIENER_FILT_TAP1_SUBEXP_K,
858 ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV,
859 wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV);
860 bits += aom_count_primitive_refsubexpfin(
861 WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
862 WIENER_FILT_TAP2_SUBEXP_K,
863 ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV,
864 wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV);
865 return bits;
866}
867
Debargha Mukherjeee39e2ee2017-05-11 03:38:03 -0700868#define USE_WIENER_REFINEMENT_SEARCH 1
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100869static int64_t finer_tile_search_wiener(
870 const AV1_COMMON *cm, const YV12_BUFFER_CONFIG *src,
871 const RestorationTileLimits *limits, RestorationUnitInfo *rui,
872 int start_step, int plane, int wiener_win, YV12_BUFFER_CONFIG *dst_frame) {
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700873 const int plane_off = (WIENER_WIN - wiener_win) >> 1;
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100874 int64_t err = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
Debargha Mukherjee749f5cd2017-05-31 11:26:51 -0700875 (void)start_step;
Debargha Mukherjeee39e2ee2017-05-11 03:38:03 -0700876#if USE_WIENER_REFINEMENT_SEARCH
877 int64_t err2;
878 int tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV,
879 WIENER_FILT_TAP2_MINV };
880 int tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV,
881 WIENER_FILT_TAP2_MAXV };
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100882
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100883 WienerInfo *plane_wiener = &rui->wiener_info;
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100884
Debargha Mukherjeee39e2ee2017-05-11 03:38:03 -0700885 // printf("err pre = %"PRId64"\n", err);
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700886 for (int s = start_step; s >= 1; s >>= 1) {
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700887 for (int p = plane_off; p < WIENER_HALFWIN; ++p) {
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700888 int skip = 0;
889 do {
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100890 if (plane_wiener->hfilter[p] - s >= tap_min[p]) {
891 plane_wiener->hfilter[p] -= s;
892 plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
893 plane_wiener->hfilter[WIENER_HALFWIN] += 2 * s;
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100894 err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700895 if (err2 > err) {
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100896 plane_wiener->hfilter[p] += s;
897 plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
898 plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * s;
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700899 } else {
900 err = err2;
901 skip = 1;
902 // At the highest step size continue moving in the same direction
903 if (s == start_step) continue;
904 }
905 }
906 break;
907 } while (1);
908 if (skip) break;
909 do {
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100910 if (plane_wiener->hfilter[p] + s <= tap_max[p]) {
911 plane_wiener->hfilter[p] += s;
912 plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
913 plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * s;
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100914 err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700915 if (err2 > err) {
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100916 plane_wiener->hfilter[p] -= s;
917 plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
918 plane_wiener->hfilter[WIENER_HALFWIN] += 2 * s;
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700919 } else {
920 err = err2;
921 // At the highest step size continue moving in the same direction
922 if (s == start_step) continue;
923 }
924 }
925 break;
926 } while (1);
Debargha Mukherjeee39e2ee2017-05-11 03:38:03 -0700927 }
Debargha Mukherjee1cb757c2017-08-21 02:46:31 -0700928 for (int p = plane_off; p < WIENER_HALFWIN; ++p) {
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700929 int skip = 0;
930 do {
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100931 if (plane_wiener->vfilter[p] - s >= tap_min[p]) {
932 plane_wiener->vfilter[p] -= s;
933 plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
934 plane_wiener->vfilter[WIENER_HALFWIN] += 2 * s;
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100935 err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700936 if (err2 > err) {
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100937 plane_wiener->vfilter[p] += s;
938 plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
939 plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * s;
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700940 } else {
941 err = err2;
942 skip = 1;
943 // At the highest step size continue moving in the same direction
944 if (s == start_step) continue;
945 }
946 }
947 break;
948 } while (1);
949 if (skip) break;
950 do {
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100951 if (plane_wiener->vfilter[p] + s <= tap_max[p]) {
952 plane_wiener->vfilter[p] += s;
953 plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
954 plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * s;
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100955 err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700956 if (err2 > err) {
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +0100957 plane_wiener->vfilter[p] -= s;
958 plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
959 plane_wiener->vfilter[WIENER_HALFWIN] += 2 * s;
Debargha Mukherjee0c18b2c2017-05-15 21:15:30 -0700960 } else {
961 err = err2;
962 // At the highest step size continue moving in the same direction
963 if (s == start_step) continue;
964 }
965 }
966 break;
967 } while (1);
Debargha Mukherjeee39e2ee2017-05-11 03:38:03 -0700968 }
969 }
970// printf("err post = %"PRId64"\n", err);
971#endif // USE_WIENER_REFINEMENT_SEARCH
972 return err;
973}
974
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100975static void search_wiener(const RestorationTileLimits *limits,
976 int rest_unit_idx, void *priv) {
977 RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
978 RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
979
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100980 const int wiener_win =
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100981 (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100982
983 double M[WIENER_WIN2];
984 double H[WIENER_WIN2 * WIENER_WIN2];
985 double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN];
986
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +0100987#if CONFIG_HIGHBITDEPTH
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100988 const AV1_COMMON *const cm = rsc->cm;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100989 if (cm->use_highbitdepth)
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100990 compute_stats_highbd(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
Rupert Swarbrick5d2e7292017-09-26 11:32:17 +0100991 limits->h_start, limits->h_end, limits->v_start,
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100992 limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100993 else
994#endif // CONFIG_HIGHBITDEPTH
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100995 compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer, limits->h_start,
996 limits->h_end, limits->v_start, limits->v_end,
997 rsc->dgd_stride, rsc->src_stride, M, H);
Rupert Swarbrick09b5b162017-08-31 16:32:29 +0100998
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +0100999 const MACROBLOCK *const x = rsc->x;
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001000 const int64_t bits_none = x->wiener_restore_cost[0];
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001001
1002 if (!wiener_decompose_sep_sym(wiener_win, M, H, vfilterd, hfilterd)) {
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001003 rsc->bits += bits_none;
1004 rsc->sse += rusi->sse[RESTORE_NONE];
1005 rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
1006 rusi->sse[RESTORE_WIENER] = INT64_MAX;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001007 return;
1008 }
1009
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001010 RestorationUnitInfo rui;
1011 memset(&rui, 0, sizeof(rui));
1012 rui.restoration_type = RESTORE_WIENER;
1013 quantize_sym_filter(wiener_win, vfilterd, rui.wiener_info.vfilter);
1014 quantize_sym_filter(wiener_win, hfilterd, rui.wiener_info.hfilter);
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001015
1016 // Filter score computes the value of the function x'*A*x - x'*b for the
1017 // learned filter and compares it against identity filer. If there is no
1018 // reduction in the function, the filter is reverted back to identity
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001019 if (compute_score(wiener_win, M, H, rui.wiener_info.vfilter,
1020 rui.wiener_info.hfilter) > 0) {
1021 rsc->bits += bits_none;
1022 rsc->sse += rusi->sse[RESTORE_NONE];
1023 rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
1024 rusi->sse[RESTORE_WIENER] = INT64_MAX;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001025 return;
1026 }
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001027
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001028 aom_clear_system_state();
1029
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001030 rusi->sse[RESTORE_WIENER] =
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001031 finer_tile_search_wiener(rsc->cm, rsc->src, limits, &rui, 4, rsc->plane,
1032 wiener_win, rsc->dst_frame);
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001033 rusi->wiener = rui.wiener_info;
1034
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001035 if (wiener_win != WIENER_WIN) {
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001036 assert(rui.wiener_info.vfilter[0] == 0 &&
1037 rui.wiener_info.vfilter[WIENER_WIN - 1] == 0);
1038 assert(rui.wiener_info.hfilter[0] == 0 &&
1039 rui.wiener_info.hfilter[WIENER_WIN - 1] == 0);
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001040 }
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001041
1042 const int64_t bits_wiener =
1043 x->wiener_restore_cost[1] +
1044 (count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener)
1045 << AV1_PROB_COST_SHIFT);
1046
1047 double cost_none =
1048 RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
1049 double cost_wiener =
1050 RDCOST_DBL(x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER]);
1051
1052 RestorationType rtype =
1053 (cost_wiener < cost_none) ? RESTORE_WIENER : RESTORE_NONE;
1054 rusi->best_rtype[RESTORE_WIENER - 1] = rtype;
1055
1056 rsc->sse += rusi->sse[rtype];
1057 rsc->bits += (cost_wiener < cost_none) ? bits_wiener : bits_none;
1058 if (cost_wiener < cost_none) rsc->wiener = rusi->wiener;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001059}
1060
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001061static void search_norestore(const RestorationTileLimits *limits,
1062 int rest_unit_idx, void *priv) {
1063 RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
1064 RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
1065
Debargha Mukherjeee39e2ee2017-05-11 03:38:03 -07001066#if CONFIG_HIGHBITDEPTH
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001067 const int highbd = rsc->cm->use_highbitdepth;
Rupert Swarbrick2ec2a6f2017-10-20 09:52:13 +01001068#else
1069 const int highbd = 0;
1070#endif // CONFIG_HIGHBITDEPTH
1071
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001072 rusi->sse[RESTORE_NONE] = sse_restoration_tile(
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001073 limits, rsc->src, rsc->cm->frame_to_show, rsc->plane, highbd);
Rupert Swarbrick64b8bbd2017-10-16 15:53:07 +01001074
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001075 rsc->sse += rusi->sse[RESTORE_NONE];
Debargha Mukherjee5d89a632016-09-17 13:16:58 -07001076}
1077
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001078static void search_switchable(const RestorationTileLimits *limits,
1079 int rest_unit_idx, void *priv) {
Rupert Swarbrick5d2e7292017-09-26 11:32:17 +01001080 (void)limits;
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001081 RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
1082 RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001083
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001084 const MACROBLOCK *const x = rsc->x;
Rupert Swarbrickdd6f09a2017-10-19 16:10:23 +01001085
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001086 const int wiener_win =
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001087 (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001088
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001089 double best_cost = 0;
1090 int64_t best_bits = 0;
1091 RestorationType best_rtype = RESTORE_NONE;
1092
1093 for (RestorationType r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) {
1094 const int64_t sse = rusi->sse[r];
1095 int64_t coeff_pcost = 0;
1096 switch (r) {
1097 case RESTORE_NONE: coeff_pcost = 0; break;
1098 case RESTORE_WIENER:
1099 coeff_pcost =
1100 count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener);
1101 break;
1102 default:
1103 assert(r == RESTORE_SGRPROJ);
1104 coeff_pcost = count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj);
1105 break;
1106 }
1107 const int64_t coeff_bits = coeff_pcost << AV1_PROB_COST_SHIFT;
1108 const int64_t bits = x->switchable_restore_cost[r] + coeff_bits;
1109 double cost = RDCOST_DBL(x->rdmult, bits >> 4, sse);
1110 if (r == 0 || cost < best_cost) {
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001111 best_cost = cost;
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001112 best_bits = bits;
1113 best_rtype = r;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001114 }
1115 }
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001116
1117 rusi->best_rtype[RESTORE_SWITCHABLE - 1] = best_rtype;
1118
1119 rsc->sse += rusi->sse[best_rtype];
1120 rsc->bits += best_bits;
1121 if (best_rtype == RESTORE_WIENER) rsc->wiener = rusi->wiener;
1122 if (best_rtype == RESTORE_SGRPROJ) rsc->sgrproj = rusi->sgrproj;
Rupert Swarbrick09b5b162017-08-31 16:32:29 +01001123}
1124
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001125static void copy_unit_info(RestorationType frame_rtype,
1126 const RestUnitSearchInfo *rusi,
1127 RestorationUnitInfo *rui) {
1128 assert(frame_rtype > 0);
1129 rui->restoration_type = rusi->best_rtype[frame_rtype - 1];
1130 if (rui->restoration_type == RESTORE_WIENER)
1131 rui->wiener_info = rusi->wiener;
1132 else
1133 rui->sgrproj_info = rusi->sgrproj;
1134}
Debargha Mukherjee5d89a632016-09-17 13:16:58 -07001135
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001136static double search_rest_type(RestSearchCtxt *rsc, RestorationType rtype) {
1137 static const rest_unit_visitor_t funs[RESTORE_TYPES] = {
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001138 search_norestore, search_wiener, search_sgrproj, search_switchable
1139 };
1140 static const int hborders[RESTORE_TYPES] = { 0, WIENER_HALFWIN,
1141 SGRPROJ_BORDER_HORZ, 0 };
1142 static const int vborders[RESTORE_TYPES] = { 0, WIENER_HALFWIN,
1143 SGRPROJ_BORDER_VERT, 0 };
Debargha Mukherjeecfc12f32017-04-18 07:03:32 -07001144
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001145 if (hborders[rtype] || vborders[rtype]) {
1146#if CONFIG_HIGHBITDEPTH
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001147 const int highbd = rsc->cm->use_highbitdepth;
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001148#else
1149 const int highbd = 0;
1150#endif
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001151 extend_frame(rsc->dgd_buffer, rsc->plane_width, rsc->plane_height,
1152 rsc->dgd_stride, hborders[rtype], vborders[rtype], highbd);
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001153 }
1154
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001155 reset_rsc(rsc);
1156 av1_foreach_rest_unit_in_frame(rsc->cm, rsc->plane, rsc_on_tile, funs[rtype],
1157 rsc);
1158 return RDCOST_DBL(rsc->x->rdmult, rsc->bits >> 4, rsc->sse);
Debargha Mukherjee5cd2ab92016-09-08 15:15:17 -07001159}
1160
Rupert Swarbrickbcb65fe2017-10-25 17:15:28 +01001161static int rest_tiles_in_plane(const AV1_COMMON *cm, int plane) {
1162 const RestorationInfo *rsi = &cm->rst_info[plane];
1163 return cm->tile_rows * cm->tile_cols * rsi->units_per_tile;
1164}
1165
Rupert Swarbrick146a0602017-10-17 16:52:20 +01001166void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001167 AV1_COMMON *const cm = &cpi->common;
Debargha Mukherjee5d89a632016-09-17 13:16:58 -07001168
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001169 int ntiles[2];
1170 for (int is_uv = 0; is_uv < 2; ++is_uv)
Rupert Swarbrickbcb65fe2017-10-25 17:15:28 +01001171 ntiles[is_uv] = rest_tiles_in_plane(cm, is_uv);
Debargha Mukherjeed48f5732017-05-19 14:58:07 -07001172
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001173 assert(ntiles[1] <= ntiles[0]);
1174 RestUnitSearchInfo *rusi =
1175 (RestUnitSearchInfo *)aom_malloc(sizeof(*rusi) * ntiles[0]);
Debargha Mukherjeed48f5732017-05-19 14:58:07 -07001176
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001177 RestSearchCtxt rsc;
Debargha Mukherjeed48f5732017-05-19 14:58:07 -07001178 for (int plane = AOM_PLANE_Y; plane <= AOM_PLANE_V; ++plane) {
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001179 init_rsc(src, &cpi->common, &cpi->td.mb, plane, rusi, &cpi->trial_frame_rst,
1180 &rsc);
Debargha Mukherjeea43a2d92017-01-03 15:14:57 -08001181
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001182 const int plane_ntiles = ntiles[plane > 0];
1183 const RestorationType num_rtypes =
1184 (plane_ntiles > 1) ? RESTORE_TYPES : RESTORE_SWITCHABLE_TYPES;
1185
1186 double best_cost = 0;
1187 RestorationType best_rtype = RESTORE_NONE;
1188
1189 for (RestorationType r = 0; r < num_rtypes; ++r) {
1190 if ((force_restore_type != RESTORE_TYPES) && (r != RESTORE_NONE) &&
1191 (r != force_restore_type))
1192 continue;
1193
Rupert Swarbrick33ed9e62017-10-23 13:32:37 +01001194 double cost = search_rest_type(&rsc, r);
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001195
1196 if (r == 0 || cost < best_cost) {
1197 best_cost = cost;
1198 best_rtype = r;
1199 }
1200 }
1201
1202 cm->rst_info[plane].frame_restoration_type = best_rtype;
1203 if (force_restore_type != RESTORE_TYPES)
1204 assert(best_rtype == force_restore_type || best_rtype == RESTORE_NONE);
1205
1206 if (best_rtype != RESTORE_NONE) {
1207 for (int u = 0; u < plane_ntiles; ++u) {
1208 copy_unit_info(best_rtype, &rusi[u], &cm->rst_info[plane].unit_info[u]);
1209 }
1210 }
Debargha Mukherjee994ccd72017-01-06 11:18:23 -08001211 }
Rupert Swarbrick1a96c3f2017-10-24 11:55:00 +01001212
1213 aom_free(rusi);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001214}