blob: 6729676f9906be7f379559cc8521872aec3a68da [file] [log] [blame]
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010011
12#include <math.h>
13#include <stdlib.h>
14
Yaowu Xu253c0012016-08-15 10:27:19 -070015#ifdef HAVE_CONFIG_H
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010016#include "./config.h"
Yaowu Xu253c0012016-08-15 10:27:19 -070017#endif
18
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010019#include "./aom_dsp_rtcd.h"
Michael Bebenita7227b652016-10-06 14:27:34 -070020#include "./av1_rtcd.h"
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010021#include "./cdef.h"
Yaowu Xu253c0012016-08-15 10:27:19 -070022
23/* Generated from gen_filter_tables.c. */
24const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
clang-format21a0c2c2016-08-18 15:10:22 -070025 { -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2,
26 -3 * OD_FILT_BSTRIDE + 3 },
27 { 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2,
28 -1 * OD_FILT_BSTRIDE + 3 },
29 { 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 },
30 { 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 },
31 { 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 },
32 { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 },
33 { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 },
34 { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 },
Yaowu Xu253c0012016-08-15 10:27:19 -070035};
36
Yaowu Xu253c0012016-08-15 10:27:19 -070037/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
38 The search minimizes the weighted variance along all the lines in a
39 particular direction, i.e. the squared error between the input and a
40 "predicted" block where each pixel is replaced by the average along a line
41 in a particular direction. Since each direction have the same sum(x^2) term,
42 that term is never computed. See Section 2, step 2, of:
43 http://jmvalin.ca/notes/intra_paint.pdf */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010044int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var,
Michael Bebenita7227b652016-10-06 14:27:34 -070045 int coeff_shift) {
Yaowu Xu253c0012016-08-15 10:27:19 -070046 int i;
clang-format21a0c2c2016-08-18 15:10:22 -070047 int32_t cost[8] = { 0 };
48 int partial[8][15] = { { 0 } };
Yaowu Xu253c0012016-08-15 10:27:19 -070049 int32_t best_cost = 0;
50 int best_dir = 0;
51 /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
52 The output is then 840 times larger, but we don't care for finding
53 the max. */
clang-format21a0c2c2016-08-18 15:10:22 -070054 static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
Yaowu Xu253c0012016-08-15 10:27:19 -070055 for (i = 0; i < 8; i++) {
56 int j;
57 for (j = 0; j < 8; j++) {
58 int x;
59 /* We subtract 128 here to reduce the maximum range of the squared
60 partial sums. */
clang-format21a0c2c2016-08-18 15:10:22 -070061 x = (img[i * stride + j] >> coeff_shift) - 128;
Yaowu Xu253c0012016-08-15 10:27:19 -070062 partial[0][i + j] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070063 partial[1][i + j / 2] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070064 partial[2][i] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070065 partial[3][3 + i - j / 2] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070066 partial[4][7 + i - j] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070067 partial[5][3 - i / 2 + j] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070068 partial[6][j] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070069 partial[7][i / 2 + j] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070070 }
71 }
72 for (i = 0; i < 8; i++) {
clang-format21a0c2c2016-08-18 15:10:22 -070073 cost[2] += partial[2][i] * partial[2][i];
74 cost[6] += partial[6][i] * partial[6][i];
Yaowu Xu253c0012016-08-15 10:27:19 -070075 }
76 cost[2] *= div_table[8];
77 cost[6] *= div_table[8];
78 for (i = 0; i < 7; i++) {
clang-format21a0c2c2016-08-18 15:10:22 -070079 cost[0] += (partial[0][i] * partial[0][i] +
80 partial[0][14 - i] * partial[0][14 - i]) *
81 div_table[i + 1];
82 cost[4] += (partial[4][i] * partial[4][i] +
83 partial[4][14 - i] * partial[4][14 - i]) *
84 div_table[i + 1];
Yaowu Xu253c0012016-08-15 10:27:19 -070085 }
clang-format21a0c2c2016-08-18 15:10:22 -070086 cost[0] += partial[0][7] * partial[0][7] * div_table[8];
87 cost[4] += partial[4][7] * partial[4][7] * div_table[8];
Yaowu Xu253c0012016-08-15 10:27:19 -070088 for (i = 1; i < 8; i += 2) {
89 int j;
90 for (j = 0; j < 4 + 1; j++) {
clang-format21a0c2c2016-08-18 15:10:22 -070091 cost[i] += partial[i][3 + j] * partial[i][3 + j];
Yaowu Xu253c0012016-08-15 10:27:19 -070092 }
93 cost[i] *= div_table[8];
94 for (j = 0; j < 4 - 1; j++) {
clang-format21a0c2c2016-08-18 15:10:22 -070095 cost[i] += (partial[i][j] * partial[i][j] +
96 partial[i][10 - j] * partial[i][10 - j]) *
97 div_table[2 * j + 2];
Yaowu Xu253c0012016-08-15 10:27:19 -070098 }
99 }
100 for (i = 0; i < 8; i++) {
101 if (cost[i] > best_cost) {
102 best_cost = cost[i];
103 best_dir = i;
104 }
105 }
106 /* Difference between the optimal variance and the variance along the
107 orthogonal direction. Again, the sum(x^2) terms cancel out. */
108 *var = best_cost - cost[(best_dir + 4) & 7];
109 /* We'd normally divide by 840, but dividing by 1024 is close enough
110 for what we're going to do with this. */
111 *var >>= 10;
112 return best_dir;
113}
114
Yaowu Xu253c0012016-08-15 10:27:19 -0700115/* Smooth in the direction detected. */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100116int od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
117 const uint16_t *in, int threshold,
118 int dir) {
Yaowu Xu253c0012016-08-15 10:27:19 -0700119 int i;
120 int j;
121 int k;
Yaowu Xu9c323bc2016-09-01 11:35:16 -0700122 static const int taps[3] = { 3, 2, 1 };
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400123 int total_abs = 0;
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400124 for (i = 0; i < 8; i++) {
125 for (j = 0; j < 8; j++) {
Yaowu Xu253c0012016-08-15 10:27:19 -0700126 int16_t sum;
127 int16_t xx;
128 int16_t yy;
clang-format21a0c2c2016-08-18 15:10:22 -0700129 xx = in[i * OD_FILT_BSTRIDE + j];
130 sum = 0;
Yaowu Xu253c0012016-08-15 10:27:19 -0700131 for (k = 0; k < 3; k++) {
132 int16_t p0;
133 int16_t p1;
clang-format21a0c2c2016-08-18 15:10:22 -0700134 p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
135 xx;
136 p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
137 xx;
138 if (abs(p0) < threshold) sum += taps[k] * p0;
139 if (abs(p1) < threshold) sum += taps[k] * p1;
Yaowu Xu253c0012016-08-15 10:27:19 -0700140 }
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400141 sum = (sum + 8) >> 4;
142 total_abs += abs(sum);
143 yy = xx + sum;
clang-format21a0c2c2016-08-18 15:10:22 -0700144 y[i * ystride + j] = yy;
Yaowu Xu253c0012016-08-15 10:27:19 -0700145 }
146 }
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400147 return (total_abs + 8) >> 4;
Yaowu Xu253c0012016-08-15 10:27:19 -0700148}
149
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400150/* Smooth in the direction detected. */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100151int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
152 const uint16_t *in, int threshold,
153 int dir) {
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400154 int i;
155 int j;
156 int k;
157 static const int taps[2] = { 4, 1 };
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400158 int total_abs = 0;
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400159 for (i = 0; i < 4; i++) {
160 for (j = 0; j < 4; j++) {
161 int16_t sum;
162 int16_t xx;
163 int16_t yy;
164 xx = in[i * OD_FILT_BSTRIDE + j];
165 sum = 0;
166 for (k = 0; k < 2; k++) {
167 int16_t p0;
168 int16_t p1;
169 p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
170 xx;
171 p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
172 xx;
173 if (abs(p0) < threshold) sum += taps[k] * p0;
174 if (abs(p1) < threshold) sum += taps[k] * p1;
175 }
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400176 sum = (sum + 8) >> 4;
177 total_abs += abs(sum);
178 yy = xx + sum;
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400179 y[i * ystride + j] = yy;
180 }
181 }
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400182 return (total_abs + 2) >> 2;
Yaowu Xu253c0012016-08-15 10:27:19 -0700183}
184
Yaowu Xu253c0012016-08-15 10:27:19 -0700185/* This table approximates x^0.16 with the index being log2(x). It is clamped
186 to [-.5, 3]. The table is computed as:
187 round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */
188static const int16_t OD_THRESH_TABLE_Q8[18] = {
clang-format21a0c2c2016-08-18 15:10:22 -0700189 128, 134, 150, 168, 188, 210, 234, 262, 292,
190 327, 365, 408, 455, 509, 569, 635, 710, 768,
Yaowu Xu253c0012016-08-15 10:27:19 -0700191};
192
Jean-Marc Valina8ce2c92016-10-07 18:10:19 -0400193/* Compute deringing filter threshold for an 8x8 block based on the
Yaowu Xu253c0012016-08-15 10:27:19 -0700194 directional variance difference. A high variance difference means that we
195 have a highly directional pattern (e.g. a high contrast edge), so we can
196 apply more deringing. A low variance means that we either have a low
197 contrast edge, or a non-directional texture, so we want to be careful not
198 to blur. */
Jean-Marc Valina8ce2c92016-10-07 18:10:19 -0400199static INLINE int od_adjust_thresh(int threshold, int32_t var) {
200 int v1;
201 /* We use the variance of 8x8 blocks to adjust the threshold. */
202 v1 = OD_MINI(32767, var >> 6);
203 return (threshold * OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
Yaowu Xu253c0012016-08-15 10:27:19 -0700204}
205
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100206static INLINE void copy_8x8_16bit_to_16bit(uint16_t *dst, int dstride,
207 uint16_t *src, int sstride) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400208 int i, j;
209 for (i = 0; i < 8; i++)
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400210 for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400211}
212
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100213static INLINE void copy_4x4_16bit_to_16bit(uint16_t *dst, int dstride,
214 uint16_t *src, int sstride) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400215 int i, j;
216 for (i = 0; i < 4; i++)
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400217 for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400218}
219
220/* TODO: Optimize this function for SSE. */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100221void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400222 dering_list *dlist, int dering_count,
223 int bsize) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400224 int bi, bx, by;
225 if (bsize == 3) {
226 for (bi = 0; bi < dering_count; bi++) {
Jean-Marc Valine0465032016-10-18 15:56:37 -0400227 by = dlist[bi].by;
228 bx = dlist[bi].bx;
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400229 copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
230 &src[bi << 2 * bsize], 1 << bsize);
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400231 }
232 } else {
233 for (bi = 0; bi < dering_count; bi++) {
Jean-Marc Valine0465032016-10-18 15:56:37 -0400234 by = dlist[bi].by;
235 bx = dlist[bi].bx;
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400236 copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
237 &src[bi << 2 * bsize], 1 << bsize);
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400238 }
239 }
240}
241
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200242static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride,
243 uint16_t *src, int sstride) {
244 int i, j;
245 for (i = 0; i < 8; i++)
246 for (j = 0; j < 8; j++)
247 dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
248}
249
250static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride,
251 uint16_t *src, int sstride) {
252 int i, j;
253 for (i = 0; i < 4; i++)
254 for (j = 0; j < 4; j++)
255 dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
256}
257
258/* TODO: Optimize this function for SSE. */
259static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, uint16_t *src,
260 dering_list *dlist, int dering_count,
261 int bsize) {
262 int bi, bx, by;
263 if (bsize == 3) {
264 for (bi = 0; bi < dering_count; bi++) {
265 by = dlist[bi].by;
266 bx = dlist[bi].bx;
267 copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
268 &src[bi << 2 * bsize], 1 << bsize);
269 }
270 } else {
271 for (bi = 0; bi < dering_count; bi++) {
272 by = dlist[bi].by;
273 bx = dlist[bi].bx;
274 copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
275 &src[bi << 2 * bsize], 1 << bsize);
276 }
277 }
278}
279
280void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
Steinar Midtskogen5cedcd82017-03-24 12:44:53 +0100281 int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int *dirinit,
282 int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
Jean-Marc Valine0465032016-10-18 15:56:37 -0400283 dering_list *dlist, int dering_count, int threshold,
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400284 int clpf_strength, int clpf_damping, int coeff_shift,
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200285 int skip_dering, int hbd) {
Jean-Marc Valin3e44bcc2016-10-11 16:53:59 -0400286 int bi;
Yaowu Xu253c0012016-08-15 10:27:19 -0700287 int bx;
288 int by;
Yaowu Xu3e90f842016-11-02 08:22:02 -0700289 int bsize;
Michael Bebenita7227b652016-10-06 14:27:34 -0700290 od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES] = {
Yaowu Xu3e90f842016-11-02 08:22:02 -0700291 od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
Michael Bebenita7227b652016-10-06 14:27:34 -0700292 };
Jean-Marc Valine2542412016-10-26 01:26:25 -0400293 bsize = OD_DERING_SIZE_LOG2 - xdec;
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400294 if (!skip_dering) {
295 if (pli == 0) {
296 if (!dirinit || !*dirinit) {
297 for (bi = 0; bi < dering_count; bi++) {
298 by = dlist[bi].by;
299 bx = dlist[bi].bx;
300 dir[by][bx] =
301 od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx],
302 OD_FILT_BSTRIDE, &var[by][bx], coeff_shift);
303 }
304 if (dirinit) *dirinit = 1;
305 }
Steinar Midtskogen5cedcd82017-03-24 12:44:53 +0100306 for (bi = 0; bi < dering_count; bi++) {
307 by = dlist[bi].by;
308 bx = dlist[bi].bx;
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400309 /* Deringing orthogonal to the direction uses a tighter threshold
310 because we want to be conservative. We've presumably already
311 achieved some deringing, so the amount of change is expected
312 to be low. Also, since we might be filtering across an edge, we
313 want to make sure not to blur it. That being said, we might want
314 to be a little bit more aggressive on pure horizontal/vertical
315 since the ringing there tends to be directional, so it doesn't
316 get removed by the directional filtering. */
317 (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
318 &y[bi << 2 * bsize], 1 << bsize,
319 &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
320 od_adjust_thresh(threshold, var[by][bx]), dir[by][bx]);
Steinar Midtskogen5cedcd82017-03-24 12:44:53 +0100321 }
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400322 } else {
323 for (bi = 0; bi < dering_count; bi++) {
324 by = dlist[bi].by;
325 bx = dlist[bi].bx;
326 (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
327 &y[bi << 2 * bsize], 1 << bsize,
328 &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], threshold,
329 dir[by][bx]);
330 }
Yaowu Xu253c0012016-08-15 10:27:19 -0700331 }
332 }
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200333 if (clpf_strength) {
334 if (threshold && !skip_dering)
335 copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
336 bsize);
337 for (bi = 0; bi < dering_count; bi++) {
338 by = dlist[bi].by;
339 bx = dlist[bi].bx;
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100340
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200341 (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
342 : aom_clpf_hblock_hbd)(
343 in, &y[((bi - by) << 2 * bsize) - (bx << bsize)], OD_FILT_BSTRIDE,
344 1 << bsize, bx << bsize, by << bsize, 1 << bsize, 1 << bsize,
345 clpf_strength << coeff_shift, clpf_damping + coeff_shift);
346 }
347 }
348 if (dst) {
349 if (hbd) {
350 copy_dering_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist,
351 dering_count, 3 - xdec);
352 } else {
353 copy_dering_16bit_to_8bit(dst, dstride, y, dlist, dering_count, bsize);
354 }
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100355 }
Yaowu Xu253c0012016-08-15 10:27:19 -0700356}