blob: f54f337ef546053e762867f50bbfaa22cac0f8b8 [file] [log] [blame]
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010011
12#include <math.h>
13#include <stdlib.h>
14
Yaowu Xu253c0012016-08-15 10:27:19 -070015#ifdef HAVE_CONFIG_H
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010016#include "./config.h"
Yaowu Xu253c0012016-08-15 10:27:19 -070017#endif
18
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010019#include "./aom_dsp_rtcd.h"
Michael Bebenita7227b652016-10-06 14:27:34 -070020#include "./av1_rtcd.h"
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010021#include "./cdef.h"
Yaowu Xu253c0012016-08-15 10:27:19 -070022
23/* Generated from gen_filter_tables.c. */
24const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
clang-format21a0c2c2016-08-18 15:10:22 -070025 { -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2,
26 -3 * OD_FILT_BSTRIDE + 3 },
27 { 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2,
28 -1 * OD_FILT_BSTRIDE + 3 },
29 { 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 },
30 { 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 },
31 { 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 },
32 { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 },
33 { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 },
34 { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 },
Yaowu Xu253c0012016-08-15 10:27:19 -070035};
36
Yaowu Xu253c0012016-08-15 10:27:19 -070037/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
38 The search minimizes the weighted variance along all the lines in a
39 particular direction, i.e. the squared error between the input and a
40 "predicted" block where each pixel is replaced by the average along a line
41 in a particular direction. Since each direction have the same sum(x^2) term,
42 that term is never computed. See Section 2, step 2, of:
43 http://jmvalin.ca/notes/intra_paint.pdf */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010044int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var,
Michael Bebenita7227b652016-10-06 14:27:34 -070045 int coeff_shift) {
Yaowu Xu253c0012016-08-15 10:27:19 -070046 int i;
clang-format21a0c2c2016-08-18 15:10:22 -070047 int32_t cost[8] = { 0 };
48 int partial[8][15] = { { 0 } };
Yaowu Xu253c0012016-08-15 10:27:19 -070049 int32_t best_cost = 0;
50 int best_dir = 0;
51 /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
52 The output is then 840 times larger, but we don't care for finding
53 the max. */
clang-format21a0c2c2016-08-18 15:10:22 -070054 static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
Yaowu Xu253c0012016-08-15 10:27:19 -070055 for (i = 0; i < 8; i++) {
56 int j;
57 for (j = 0; j < 8; j++) {
58 int x;
59 /* We subtract 128 here to reduce the maximum range of the squared
60 partial sums. */
clang-format21a0c2c2016-08-18 15:10:22 -070061 x = (img[i * stride + j] >> coeff_shift) - 128;
Yaowu Xu253c0012016-08-15 10:27:19 -070062 partial[0][i + j] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070063 partial[1][i + j / 2] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070064 partial[2][i] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070065 partial[3][3 + i - j / 2] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070066 partial[4][7 + i - j] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070067 partial[5][3 - i / 2 + j] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070068 partial[6][j] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070069 partial[7][i / 2 + j] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070070 }
71 }
72 for (i = 0; i < 8; i++) {
clang-format21a0c2c2016-08-18 15:10:22 -070073 cost[2] += partial[2][i] * partial[2][i];
74 cost[6] += partial[6][i] * partial[6][i];
Yaowu Xu253c0012016-08-15 10:27:19 -070075 }
76 cost[2] *= div_table[8];
77 cost[6] *= div_table[8];
78 for (i = 0; i < 7; i++) {
clang-format21a0c2c2016-08-18 15:10:22 -070079 cost[0] += (partial[0][i] * partial[0][i] +
80 partial[0][14 - i] * partial[0][14 - i]) *
81 div_table[i + 1];
82 cost[4] += (partial[4][i] * partial[4][i] +
83 partial[4][14 - i] * partial[4][14 - i]) *
84 div_table[i + 1];
Yaowu Xu253c0012016-08-15 10:27:19 -070085 }
clang-format21a0c2c2016-08-18 15:10:22 -070086 cost[0] += partial[0][7] * partial[0][7] * div_table[8];
87 cost[4] += partial[4][7] * partial[4][7] * div_table[8];
Yaowu Xu253c0012016-08-15 10:27:19 -070088 for (i = 1; i < 8; i += 2) {
89 int j;
90 for (j = 0; j < 4 + 1; j++) {
clang-format21a0c2c2016-08-18 15:10:22 -070091 cost[i] += partial[i][3 + j] * partial[i][3 + j];
Yaowu Xu253c0012016-08-15 10:27:19 -070092 }
93 cost[i] *= div_table[8];
94 for (j = 0; j < 4 - 1; j++) {
clang-format21a0c2c2016-08-18 15:10:22 -070095 cost[i] += (partial[i][j] * partial[i][j] +
96 partial[i][10 - j] * partial[i][10 - j]) *
97 div_table[2 * j + 2];
Yaowu Xu253c0012016-08-15 10:27:19 -070098 }
99 }
100 for (i = 0; i < 8; i++) {
101 if (cost[i] > best_cost) {
102 best_cost = cost[i];
103 best_dir = i;
104 }
105 }
106 /* Difference between the optimal variance and the variance along the
107 orthogonal direction. Again, the sum(x^2) terms cancel out. */
108 *var = best_cost - cost[(best_dir + 4) & 7];
109 /* We'd normally divide by 840, but dividing by 1024 is close enough
110 for what we're going to do with this. */
111 *var >>= 10;
112 return best_dir;
113}
114
Yaowu Xu253c0012016-08-15 10:27:19 -0700115/* Smooth in the direction detected. */
Jean-Marc Valin1d4568c2017-04-03 17:03:00 -0400116void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
117 const uint16_t *in, int threshold,
Steinar Midtskogen8ff52fc2017-04-04 12:29:19 +0200118 int dir, int damping) {
Yaowu Xu253c0012016-08-15 10:27:19 -0700119 int i;
120 int j;
121 int k;
Yaowu Xu9c323bc2016-09-01 11:35:16 -0700122 static const int taps[3] = { 3, 2, 1 };
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400123 for (i = 0; i < 8; i++) {
124 for (j = 0; j < 8; j++) {
Yaowu Xu253c0012016-08-15 10:27:19 -0700125 int16_t sum;
126 int16_t xx;
127 int16_t yy;
clang-format21a0c2c2016-08-18 15:10:22 -0700128 xx = in[i * OD_FILT_BSTRIDE + j];
129 sum = 0;
Yaowu Xu253c0012016-08-15 10:27:19 -0700130 for (k = 0; k < 3; k++) {
131 int16_t p0;
132 int16_t p1;
clang-format21a0c2c2016-08-18 15:10:22 -0700133 p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
134 xx;
135 p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
136 xx;
Steinar Midtskogen8ff52fc2017-04-04 12:29:19 +0200137 sum += taps[k] * constrain(p0, threshold, damping);
138 sum += taps[k] * constrain(p1, threshold, damping);
Yaowu Xu253c0012016-08-15 10:27:19 -0700139 }
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400140 sum = (sum + 8) >> 4;
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400141 yy = xx + sum;
clang-format21a0c2c2016-08-18 15:10:22 -0700142 y[i * ystride + j] = yy;
Yaowu Xu253c0012016-08-15 10:27:19 -0700143 }
144 }
145}
146
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400147/* Smooth in the direction detected. */
Jean-Marc Valin1d4568c2017-04-03 17:03:00 -0400148void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
149 const uint16_t *in, int threshold,
Steinar Midtskogen8ff52fc2017-04-04 12:29:19 +0200150 int dir, int damping) {
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400151 int i;
152 int j;
153 int k;
154 static const int taps[2] = { 4, 1 };
155 for (i = 0; i < 4; i++) {
156 for (j = 0; j < 4; j++) {
157 int16_t sum;
158 int16_t xx;
159 int16_t yy;
160 xx = in[i * OD_FILT_BSTRIDE + j];
161 sum = 0;
162 for (k = 0; k < 2; k++) {
163 int16_t p0;
164 int16_t p1;
165 p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
166 xx;
167 p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
168 xx;
Steinar Midtskogen8ff52fc2017-04-04 12:29:19 +0200169 sum += taps[k] * constrain(p0, threshold, damping);
170 sum += taps[k] * constrain(p1, threshold, damping);
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400171 }
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400172 sum = (sum + 8) >> 4;
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400173 yy = xx + sum;
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400174 y[i * ystride + j] = yy;
175 }
176 }
Yaowu Xu253c0012016-08-15 10:27:19 -0700177}
178
Jean-Marc Valina8ce2c92016-10-07 18:10:19 -0400179/* Compute deringing filter threshold for an 8x8 block based on the
Yaowu Xu253c0012016-08-15 10:27:19 -0700180 directional variance difference. A high variance difference means that we
181 have a highly directional pattern (e.g. a high contrast edge), so we can
182 apply more deringing. A low variance means that we either have a low
183 contrast edge, or a non-directional texture, so we want to be careful not
184 to blur. */
Jean-Marc Valina8ce2c92016-10-07 18:10:19 -0400185static INLINE int od_adjust_thresh(int threshold, int32_t var) {
Steinar Midtskogenfade4632017-04-14 20:29:05 +0200186 const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
Jean-Marc Valina8ce2c92016-10-07 18:10:19 -0400187 /* We use the variance of 8x8 blocks to adjust the threshold. */
Steinar Midtskogenfade4632017-04-14 20:29:05 +0200188 return var ? (threshold * (4 + i) + 8) >> 4 : 0;
Yaowu Xu253c0012016-08-15 10:27:19 -0700189}
190
Michael Bebenita54170d92017-03-31 15:48:44 -0700191void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
192 int sstride) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400193 int i, j;
194 for (i = 0; i < 8; i++)
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400195 for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400196}
197
Michael Bebenita54170d92017-03-31 15:48:44 -0700198void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
199 int sstride) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400200 int i, j;
201 for (i = 0; i < 4; i++)
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400202 for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400203}
204
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100205void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400206 dering_list *dlist, int dering_count,
Jingning Hanba502322017-04-03 10:56:24 -0700207 int bsize) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400208 int bi, bx, by;
Jingning Hanc86e7aa2017-03-28 14:27:03 -0700209
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200210 if (bsize == BLOCK_8X8) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400211 for (bi = 0; bi < dering_count; bi++) {
Jean-Marc Valine0465032016-10-18 15:56:37 -0400212 by = dlist[bi].by;
213 bx = dlist[bi].bx;
Jingning Hana4ecb1b2017-03-31 11:52:53 -0700214 copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200215 &src[bi << (3 + 3)], 8);
216 }
217 } else if (bsize == BLOCK_4X8) {
218 for (bi = 0; bi < dering_count; bi++) {
219 by = dlist[bi].by;
220 bx = dlist[bi].bx;
221 copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
222 &src[bi << (3 + 2)], 4);
223 copy_4x4_16bit_to_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
224 dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
225 }
226 } else if (bsize == BLOCK_8X4) {
227 for (bi = 0; bi < dering_count; bi++) {
228 by = dlist[bi].by;
229 bx = dlist[bi].bx;
230 copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
231 &src[bi << (2 + 3)], 8);
232 copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4],
233 dstride, &src[(bi << (2 + 3)) + 4], 8);
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400234 }
235 } else {
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200236 assert(bsize == BLOCK_4X4);
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400237 for (bi = 0; bi < dering_count; bi++) {
Jean-Marc Valine0465032016-10-18 15:56:37 -0400238 by = dlist[bi].by;
239 bx = dlist[bi].bx;
Jingning Hana4ecb1b2017-03-31 11:52:53 -0700240 copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200241 &src[bi << (2 + 2)], 4);
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400242 }
243 }
244}
245
Michael Bebenita54170d92017-03-31 15:48:44 -0700246void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
247 int sstride) {
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200248 int i, j;
249 for (i = 0; i < 8; i++)
250 for (j = 0; j < 8; j++)
251 dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
252}
253
Michael Bebenita54170d92017-03-31 15:48:44 -0700254void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
255 int sstride) {
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200256 int i, j;
257 for (i = 0; i < 4; i++)
258 for (j = 0; j < 4; j++)
259 dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
260}
261
Michael Bebenita54170d92017-03-31 15:48:44 -0700262static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride,
263 const uint16_t *src, dering_list *dlist,
264 int dering_count, int bsize) {
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200265 int bi, bx, by;
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200266 if (bsize == BLOCK_8X8) {
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200267 for (bi = 0; bi < dering_count; bi++) {
268 by = dlist[bi].by;
269 bx = dlist[bi].bx;
270 copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200271 &src[bi << (3 + 3)], 8);
272 }
273 } else if (bsize == BLOCK_4X8) {
274 for (bi = 0; bi < dering_count; bi++) {
275 by = dlist[bi].by;
276 bx = dlist[bi].bx;
277 copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
278 &src[bi << (3 + 2)], 4);
279 copy_4x4_16bit_to_8bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
280 dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
281 }
282 } else if (bsize == BLOCK_8X4) {
283 for (bi = 0; bi < dering_count; bi++) {
284 by = dlist[bi].by;
285 bx = dlist[bi].bx;
286 copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
287 &src[bi << (2 + 3)], 8);
288 copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
289 &src[(bi << (2 + 3)) + 4], 8);
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200290 }
291 } else {
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200292 assert(bsize == BLOCK_4X4);
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200293 for (bi = 0; bi < dering_count; bi++) {
294 by = dlist[bi].by;
295 bx = dlist[bi].bx;
296 copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200297 &src[bi << (2 * 2)], 4);
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200298 }
299 }
300}
301
Jean-Marc Valin70f0e5e2017-04-12 02:33:50 -0400302int get_filter_skip(int level) {
303 int filter_skip = level & 1;
304 if (level == 1) filter_skip = 0;
305 return filter_skip;
306}
307
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200308void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200309 int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
310 int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
311 int pli, dering_list *dlist, int dering_count, int level,
Steinar Midtskogen0c966a52017-04-18 14:38:13 +0200312 int clpf_strength, int clpf_damping, int dering_damping,
313 int coeff_shift, int skip_dering, int hbd) {
Jean-Marc Valin3e44bcc2016-10-11 16:53:59 -0400314 int bi;
Yaowu Xu253c0012016-08-15 10:27:19 -0700315 int bx;
316 int by;
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200317 int bsize, bsizex, bsizey;
Steinar Midtskogen233ef942017-03-24 10:40:18 +0100318
Steinar Midtskogendaab3482017-04-05 20:45:02 +0200319 int threshold = (level >> 1) << coeff_shift;
Jean-Marc Valin70f0e5e2017-04-12 02:33:50 -0400320 int filter_skip = get_filter_skip(level);
321 if (level == 1) threshold = 31 << coeff_shift;
Steinar Midtskogen233ef942017-03-24 10:40:18 +0100322
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200323 od_filter_dering_direction_func filter_dering_direction[] = {
Yaowu Xu3e90f842016-11-02 08:22:02 -0700324 od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
Michael Bebenita7227b652016-10-06 14:27:34 -0700325 };
Steinar Midtskogen0c966a52017-04-18 14:38:13 +0200326 clpf_damping += coeff_shift - (pli != AOM_PLANE_Y);
327 dering_damping += coeff_shift - (pli != AOM_PLANE_Y);
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200328 bsize =
329 ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
330 bsizex = 3 - xdec;
331 bsizey = 3 - ydec;
332
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400333 if (!skip_dering) {
334 if (pli == 0) {
335 if (!dirinit || !*dirinit) {
336 for (bi = 0; bi < dering_count; bi++) {
337 by = dlist[bi].by;
338 bx = dlist[bi].bx;
339 dir[by][bx] =
Jingning Hana4ecb1b2017-03-31 11:52:53 -0700340 od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx],
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400341 OD_FILT_BSTRIDE, &var[by][bx], coeff_shift);
342 }
343 if (dirinit) *dirinit = 1;
344 }
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200345 }
346 // Only run dering for non-zero threshold (which is always the case for
347 // 4:2:2 or 4:4:0). If we don't dering, we still need to eventually write
348 // something out in y[] later.
349 if (threshold != 0) {
350 assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
Steinar Midtskogen5cedcd82017-03-24 12:44:53 +0100351 for (bi = 0; bi < dering_count; bi++) {
Steinar Midtskogen7b069a52017-04-11 07:38:04 +0200352 int t = !filter_skip && dlist[bi].skip ? 0 : threshold;
Steinar Midtskogen5cedcd82017-03-24 12:44:53 +0100353 by = dlist[bi].by;
354 bx = dlist[bi].bx;
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200355 (filter_dering_direction[bsize == BLOCK_8X8])(
356 &y[bi << (bsizex + bsizey)], 1 << bsizex,
357 &in[(by * OD_FILT_BSTRIDE << bsizey) + (bx << bsizex)],
Steinar Midtskogen7b069a52017-04-11 07:38:04 +0200358 pli ? t : od_adjust_thresh(t, var[by][bx]), dir[by][bx],
359 dering_damping);
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400360 }
Yaowu Xu253c0012016-08-15 10:27:19 -0700361 }
362 }
Jingning Hanc86e7aa2017-03-28 14:27:03 -0700363
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200364 if (clpf_strength) {
365 if (threshold && !skip_dering)
366 copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
Jingning Hanba502322017-04-03 10:56:24 -0700367 bsize);
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200368 for (bi = 0; bi < dering_count; bi++) {
369 by = dlist[bi].by;
370 bx = dlist[bi].bx;
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200371 int py = by << bsizey;
372 int px = bx << bsizex;
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100373
Steinar Midtskogen7b069a52017-04-11 07:38:04 +0200374 if (!filter_skip && dlist[bi].skip) continue;
Steinar Midtskogen73aa77c2017-03-27 17:50:30 +0200375 if (!dst || hbd) {
376 // 16 bit destination if high bitdepth or 8 bit destination not given
377 (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
378 : aom_clpf_hblock_hbd)(
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200379 dst ? (uint16_t *)dst + py * dstride + px
380 : &y[bi << (bsizex + bsizey)],
381 in + py * OD_FILT_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
382 OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey,
Steinar Midtskogen8ff52fc2017-04-04 12:29:19 +0200383 clpf_strength << coeff_shift, clpf_damping);
Steinar Midtskogen73aa77c2017-03-27 17:50:30 +0200384 } else {
385 // Do clpf and write the result to an 8 bit destination
386 (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
387 : aom_clpf_hblock)(
388 dst + py * dstride + px, in + py * OD_FILT_BSTRIDE + px, dstride,
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200389 OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey,
Steinar Midtskogen8ff52fc2017-04-04 12:29:19 +0200390 clpf_strength << coeff_shift, clpf_damping);
Steinar Midtskogen73aa77c2017-03-27 17:50:30 +0200391 }
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200392 }
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200393 } else if (threshold != 0) {
Steinar Midtskogen73aa77c2017-03-27 17:50:30 +0200394 // No clpf, so copy instead
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200395 if (hbd) {
396 copy_dering_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist,
Jingning Hanba502322017-04-03 10:56:24 -0700397 dering_count, bsize);
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200398 } else {
399 copy_dering_16bit_to_8bit(dst, dstride, y, dlist, dering_count, bsize);
400 }
Steinar Midtskogen95a2f862017-04-07 09:24:02 +0200401 } else if (dirinit) {
402 // If we're here, both dering and clpf are off, and we still haven't written
403 // anything to y[] yet, so we just copy the input to y[]. This is necessary
404 // only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
405 for (bi = 0; bi < dering_count; bi++) {
406 by = dlist[bi].by;
407 bx = dlist[bi].bx;
408 int iy, ix;
409 // TODO(stemidts/jmvalin): SIMD optimisations
410 for (iy = 0; iy < 1 << bsizey; iy++)
411 for (ix = 0; ix < 1 << bsizex; ix++)
412 y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
413 in[((by << bsizey) + iy) * OD_FILT_BSTRIDE + (bx << bsizex) + ix];
414 }
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100415 }
Yaowu Xu253c0012016-08-15 10:27:19 -0700416}