blob: af89b80dbac1c76a0d06ac0c6907c53aa0a5864d [file] [log] [blame]
Yaowu Xu253c0012016-08-15 10:27:19 -07001/*Daala video codec
2Copyright (c) 2014-2016 Daala project contributors. All rights reserved.
3
4Redistribution and use in source and binary forms, with or without
5modification, are permitted provided that the following conditions are met:
6
7- Redistributions of source code must retain the above copyright notice, this
8 list of conditions and the following disclaimer.
9
10- Redistributions in binary form must reproduce the above copyright notice,
11 this list of conditions and the following disclaimer in the documentation
12 and/or other materials provided with the distribution.
13
14THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
18FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
24
25#ifdef HAVE_CONFIG_H
26# include "config.h"
27#endif
28
29#include <stdlib.h>
30#include <math.h>
31#include "dering.h"
32
33const od_dering_opt_vtbl OD_DERING_VTBL_C = {
34 {od_filter_dering_direction_4x4_c, od_filter_dering_direction_8x8_c},
35 {od_filter_dering_orthogonal_4x4_c, od_filter_dering_orthogonal_8x8_c}
36};
37
38/* Generated from gen_filter_tables.c. */
39const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
40 {-1*OD_FILT_BSTRIDE + 1, -2*OD_FILT_BSTRIDE + 2, -3*OD_FILT_BSTRIDE + 3 },
41 { 0*OD_FILT_BSTRIDE + 1, -1*OD_FILT_BSTRIDE + 2, -1*OD_FILT_BSTRIDE + 3 },
42 { 0*OD_FILT_BSTRIDE + 1, 0*OD_FILT_BSTRIDE + 2, 0*OD_FILT_BSTRIDE + 3 },
43 { 0*OD_FILT_BSTRIDE + 1, 1*OD_FILT_BSTRIDE + 2, 1*OD_FILT_BSTRIDE + 3 },
44 { 1*OD_FILT_BSTRIDE + 1, 2*OD_FILT_BSTRIDE + 2, 3*OD_FILT_BSTRIDE + 3 },
45 { 1*OD_FILT_BSTRIDE + 0, 2*OD_FILT_BSTRIDE + 1, 3*OD_FILT_BSTRIDE + 1 },
46 { 1*OD_FILT_BSTRIDE + 0, 2*OD_FILT_BSTRIDE + 0, 3*OD_FILT_BSTRIDE + 0 },
47 { 1*OD_FILT_BSTRIDE + 0, 2*OD_FILT_BSTRIDE - 1, 3*OD_FILT_BSTRIDE - 1 },
48};
49
50const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS] = {
51 0, 0.5, 0.707, 1, 1.41, 2
52};
53
54/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
55 The search minimizes the weighted variance along all the lines in a
56 particular direction, i.e. the squared error between the input and a
57 "predicted" block where each pixel is replaced by the average along a line
58 in a particular direction. Since each direction have the same sum(x^2) term,
59 that term is never computed. See Section 2, step 2, of:
60 http://jmvalin.ca/notes/intra_paint.pdf */
61static int od_dir_find8(const od_dering_in *img, int stride, int32_t *var,
62 int coeff_shift) {
63 int i;
64 int32_t cost[8] = {0};
65 int partial[8][15] = {{0}};
66 int32_t best_cost = 0;
67 int best_dir = 0;
68 /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
69 The output is then 840 times larger, but we don't care for finding
70 the max. */
71 static const int div_table[] = {0, 840, 420, 280, 210, 168, 140, 120, 105};
72 for (i = 0; i < 8; i++) {
73 int j;
74 for (j = 0; j < 8; j++) {
75 int x;
76 /* We subtract 128 here to reduce the maximum range of the squared
77 partial sums. */
78 x = (img[i*stride + j] >> coeff_shift) - 128;
79 partial[0][i + j] += x;
80 partial[1][i + j/2] += x;
81 partial[2][i] += x;
82 partial[3][3 + i - j/2] += x;
83 partial[4][7 + i - j] += x;
84 partial[5][3 - i/2 + j] += x;
85 partial[6][j] += x;
86 partial[7][i/2 + j] += x;
87 }
88 }
89 for (i = 0; i < 8; i++) {
90 cost[2] += partial[2][i]*partial[2][i];
91 cost[6] += partial[6][i]*partial[6][i];
92 }
93 cost[2] *= div_table[8];
94 cost[6] *= div_table[8];
95 for (i = 0; i < 7; i++) {
96 cost[0] += (partial[0][i]*partial[0][i]
97 + partial[0][14 - i]*partial[0][14 - i])*div_table[i + 1];
98 cost[4] += (partial[4][i]*partial[4][i]
99 + partial[4][14 - i]*partial[4][14 - i])*div_table[i + 1];
100 }
101 cost[0] += partial[0][7]*partial[0][7]*div_table[8];
102 cost[4] += partial[4][7]*partial[4][7]*div_table[8];
103 for (i = 1; i < 8; i += 2) {
104 int j;
105 for (j = 0; j < 4 + 1; j++) {
106 cost[i] += partial[i][3 + j]*partial[i][3 + j];
107 }
108 cost[i] *= div_table[8];
109 for (j = 0; j < 4 - 1; j++) {
110 cost[i] += (partial[i][j]*partial[i][j]
111 + partial[i][10 - j]*partial[i][10 - j])*div_table[2*j + 2];
112 }
113 }
114 for (i = 0; i < 8; i++) {
115 if (cost[i] > best_cost) {
116 best_cost = cost[i];
117 best_dir = i;
118 }
119 }
120 /* Difference between the optimal variance and the variance along the
121 orthogonal direction. Again, the sum(x^2) terms cancel out. */
122 *var = best_cost - cost[(best_dir + 4) & 7];
123 /* We'd normally divide by 840, but dividing by 1024 is close enough
124 for what we're going to do with this. */
125 *var >>= 10;
126 return best_dir;
127}
128
129#define OD_DERING_VERY_LARGE (30000)
130#define OD_DERING_INBUF_SIZE ((OD_BSIZE_MAX + 2*OD_FILT_BORDER)*\
131 (OD_BSIZE_MAX + 2*OD_FILT_BORDER))
132
133/* Smooth in the direction detected. */
134void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in,
135 int ln, int threshold, int dir) {
136 int i;
137 int j;
138 int k;
139 static const int taps[3] = {3, 2, 2};
140 for (i = 0; i < 1 << ln; i++) {
141 for (j = 0; j < 1 << ln; j++) {
142 int16_t sum;
143 int16_t xx;
144 int16_t yy;
145 xx = in[i*OD_FILT_BSTRIDE + j];
146 sum= 0;
147 for (k = 0; k < 3; k++) {
148 int16_t p0;
149 int16_t p1;
150 p0 = in[i*OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]]
151 - xx;
152 p1 = in[i*OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]]
153 - xx;
154 if (abs(p0) < threshold) sum += taps[k]*p0;
155 if (abs(p1) < threshold) sum += taps[k]*p1;
156 }
157 yy = xx + ((sum + 8) >> 4);
158 y[i*ystride + j] = yy;
159 }
160 }
161}
162
163void od_filter_dering_direction_4x4_c(int16_t *y, int ystride,
164 const int16_t *in, int threshold, int dir) {
165 od_filter_dering_direction_c(y, ystride, in, 2, threshold, dir);
166}
167
168void od_filter_dering_direction_8x8_c(int16_t *y, int ystride,
169 const int16_t *in, int threshold, int dir) {
170 od_filter_dering_direction_c(y, ystride, in, 3, threshold, dir);
171}
172
173/* Smooth in the direction orthogonal to what was detected. */
174void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in,
175 const od_dering_in *x, int xstride, int ln, int threshold, int dir) {
176 int i;
177 int j;
178 int offset;
179 if (dir > 0 && dir < 4) offset = OD_FILT_BSTRIDE;
180 else offset = 1;
181 for (i = 0; i < 1 << ln; i++) {
182 for (j = 0; j < 1 << ln; j++) {
183 int16_t athresh;
184 int16_t yy;
185 int16_t sum;
186 int16_t p;
187 /* Deringing orthogonal to the direction uses a tighter threshold
188 because we want to be conservative. We've presumably already
189 achieved some deringing, so the amount of change is expected
190 to be low. Also, since we might be filtering across an edge, we
191 want to make sure not to blur it. That being said, we might want
192 to be a little bit more aggressive on pure horizontal/vertical
193 since the ringing there tends to be directional, so it doesn't
194 get removed by the directional filtering. */
195 athresh = OD_MINI(threshold, threshold/3
196 + abs(in[i*OD_FILT_BSTRIDE + j] - x[i*xstride + j]));
197 yy = in[i*OD_FILT_BSTRIDE + j];
198 sum = 0;
199 p = in[i*OD_FILT_BSTRIDE + j + offset] - yy;
200 if (abs(p) < athresh) sum += p;
201 p = in[i*OD_FILT_BSTRIDE + j - offset] - yy;
202 if (abs(p) < athresh) sum += p;
203 p = in[i*OD_FILT_BSTRIDE + j + 2*offset] - yy;
204 if (abs(p) < athresh) sum += p;
205 p = in[i*OD_FILT_BSTRIDE + j - 2*offset] - yy;
206 if (abs(p) < athresh) sum += p;
207 y[i*ystride + j] = yy + ((3*sum + 8) >> 4);
208 }
209 }
210}
211
212void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
213 const int16_t *in, const od_dering_in *x, int xstride, int threshold,
214 int dir) {
215 od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 2, threshold, dir);
216}
217
218void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
219 const int16_t *in, const od_dering_in *x, int xstride, int threshold,
220 int dir) {
221 od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 3, threshold, dir);
222}
223
224/* This table approximates x^0.16 with the index being log2(x). It is clamped
225 to [-.5, 3]. The table is computed as:
226 round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */
227static const int16_t OD_THRESH_TABLE_Q8[18] = {
228 128, 134, 150, 168, 188, 210, 234, 262,
229 292, 327, 365, 408, 455, 509, 569, 635,
230 710, 768,
231};
232
233/* Compute deringing filter threshold for each 8x8 block based on the
234 directional variance difference. A high variance difference means that we
235 have a highly directional pattern (e.g. a high contrast edge), so we can
236 apply more deringing. A low variance means that we either have a low
237 contrast edge, or a non-directional texture, so we want to be careful not
238 to blur. */
239static void od_compute_thresh(int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
240 int threshold, int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
241 int nhb, int nvb) {
242 int bx;
243 int by;
244 for (by = 0; by < nvb; by++) {
245 for (bx = 0; bx < nhb; bx++) {
246 int v1;
247 /* We use the variance of 8x8 blocks to adjust the threshold. */
248 v1 = OD_MINI(32767, var[by][bx] >> 6);
249 thresh[by][bx] = (threshold*OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
250 }
251 }
252}
253
254void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
255 const od_dering_in *x, int xstride, int nhb, int nvb, int sbx, int sby,
256 int nhsb, int nvsb, int xdec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
257 int pli, unsigned char *bskip, int skip_stride, int threshold, int overlap,
258 int coeff_shift) {
259 int i;
260 int j;
261 int bx;
262 int by;
263 int16_t inbuf[OD_DERING_INBUF_SIZE];
264 int16_t *in;
265 int bsize;
266 int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
267 int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
268 bsize = 3 - xdec;
269 in = inbuf + OD_FILT_BORDER*OD_FILT_BSTRIDE + OD_FILT_BORDER;
270 /* We avoid filtering the pixels for which some of the pixels to average
271 are outside the frame. We could change the filter instead, but it would
272 add special cases for any future vectorization. */
273 for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
274 for (i = -OD_FILT_BORDER*(sby != 0); i < (nvb << bsize)
275 + OD_FILT_BORDER*(sby != nvsb - 1); i++) {
276 for (j = -OD_FILT_BORDER*(sbx != 0); j < (nhb << bsize)
277 + OD_FILT_BORDER*(sbx != nhsb - 1); j++) {
278 in[i*OD_FILT_BSTRIDE + j] = x[i*xstride + j];
279 }
280 }
281 if (pli == 0) {
282 for (by = 0; by < nvb; by++) {
283 for (bx = 0; bx < nhb; bx++) {
284 dir[by][bx] = od_dir_find8(&x[8*by*xstride + 8*bx], xstride,
285 &var[by][bx], coeff_shift);
286 }
287 }
288 od_compute_thresh(thresh, threshold, var, nhb, nvb);
289 }
290 else {
291 for (by = 0; by < nvb; by++) {
292 for (bx = 0; bx < nhb; bx++) {
293 thresh[by][bx] = threshold;
294 }
295 }
296 }
297 for (by = 0; by < nvb; by++) {
298 for (bx = 0; bx < nhb; bx++) {
299 int skip;
300# if defined(DAALA_ODINTRIN)
301 int xstart;
302 int ystart;
303 int xend;
304 int yend;
305 xstart = ystart = 0;
306 xend = yend = (2 >> xdec);
307 if (overlap) {
308 xstart -= (sbx != 0);
309 ystart -= (sby != 0);
310 xend += (sbx != nhsb - 1);
311 yend += (sby != nvsb - 1);
312 }
313 skip = 1;
314 /* We look at whether the current block and its 4x4 surrounding (due to
315 lapping) are skipped to avoid filtering the same content multiple
316 times. */
317 for (i = ystart; i < yend; i++) {
318 for (j = xstart; j < xend; j++) {
319 skip = skip && bskip[((by << 1 >> xdec) + i)*skip_stride
320 + (bx << 1 >> xdec) + j];
321 }
322 }
323#else
324 (void)overlap;
325 skip = bskip[by*skip_stride + bx];
326#endif
327 if (skip) thresh[by][bx] = 0;
328 }
329 }
330 for (by = 0; by < nvb; by++) {
331 for (bx = 0; bx < nhb; bx++) {
332 (vtbl->filter_dering_direction[bsize - OD_LOG_BSIZE0])(
333 &y[(by*ystride << bsize) + (bx << bsize)], ystride,
334 &in[(by*OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
335 thresh[by][bx], dir[by][bx]);
336 }
337 }
338 for (i = 0; i < nvb << bsize; i++) {
339 for (j = 0; j < nhb << bsize; j++) {
340 in[i*OD_FILT_BSTRIDE + j] = y[i*ystride + j];
341 }
342 }
343 for (by = 0; by < nvb; by++) {
344 for (bx = 0; bx < nhb; bx++) {
345 (vtbl->filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
346 &y[(by*ystride << bsize) + (bx << bsize)], ystride,
347 &in[(by*OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
348 &x[(by*xstride << bsize) + (bx << bsize)], xstride,
349 thresh[by][bx], dir[by][bx]);
350 }
351 }
352}