blob: 1833d33ede5b02e89c1ece9a85566cd4a5dc68cf [file] [log] [blame]
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010011
12#include <math.h>
13#include <stdlib.h>
14
Yaowu Xu253c0012016-08-15 10:27:19 -070015#ifdef HAVE_CONFIG_H
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010016#include "./config.h"
Yaowu Xu253c0012016-08-15 10:27:19 -070017#endif
18
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010019#include "./aom_dsp_rtcd.h"
Michael Bebenita7227b652016-10-06 14:27:34 -070020#include "./av1_rtcd.h"
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010021#include "./cdef.h"
Yaowu Xu253c0012016-08-15 10:27:19 -070022
23/* Generated from gen_filter_tables.c. */
24const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
clang-format21a0c2c2016-08-18 15:10:22 -070025 { -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2,
26 -3 * OD_FILT_BSTRIDE + 3 },
27 { 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2,
28 -1 * OD_FILT_BSTRIDE + 3 },
29 { 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 },
30 { 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 },
31 { 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 },
32 { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 },
33 { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 },
34 { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 },
Yaowu Xu253c0012016-08-15 10:27:19 -070035};
36
Yaowu Xu253c0012016-08-15 10:27:19 -070037/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
38 The search minimizes the weighted variance along all the lines in a
39 particular direction, i.e. the squared error between the input and a
40 "predicted" block where each pixel is replaced by the average along a line
41 in a particular direction. Since each direction have the same sum(x^2) term,
42 that term is never computed. See Section 2, step 2, of:
43 http://jmvalin.ca/notes/intra_paint.pdf */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +010044int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var,
Michael Bebenita7227b652016-10-06 14:27:34 -070045 int coeff_shift) {
Yaowu Xu253c0012016-08-15 10:27:19 -070046 int i;
clang-format21a0c2c2016-08-18 15:10:22 -070047 int32_t cost[8] = { 0 };
48 int partial[8][15] = { { 0 } };
Yaowu Xu253c0012016-08-15 10:27:19 -070049 int32_t best_cost = 0;
50 int best_dir = 0;
51 /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
52 The output is then 840 times larger, but we don't care for finding
53 the max. */
clang-format21a0c2c2016-08-18 15:10:22 -070054 static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
Yaowu Xu253c0012016-08-15 10:27:19 -070055 for (i = 0; i < 8; i++) {
56 int j;
57 for (j = 0; j < 8; j++) {
58 int x;
59 /* We subtract 128 here to reduce the maximum range of the squared
60 partial sums. */
clang-format21a0c2c2016-08-18 15:10:22 -070061 x = (img[i * stride + j] >> coeff_shift) - 128;
Yaowu Xu253c0012016-08-15 10:27:19 -070062 partial[0][i + j] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070063 partial[1][i + j / 2] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070064 partial[2][i] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070065 partial[3][3 + i - j / 2] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070066 partial[4][7 + i - j] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070067 partial[5][3 - i / 2 + j] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070068 partial[6][j] += x;
clang-format21a0c2c2016-08-18 15:10:22 -070069 partial[7][i / 2 + j] += x;
Yaowu Xu253c0012016-08-15 10:27:19 -070070 }
71 }
72 for (i = 0; i < 8; i++) {
clang-format21a0c2c2016-08-18 15:10:22 -070073 cost[2] += partial[2][i] * partial[2][i];
74 cost[6] += partial[6][i] * partial[6][i];
Yaowu Xu253c0012016-08-15 10:27:19 -070075 }
76 cost[2] *= div_table[8];
77 cost[6] *= div_table[8];
78 for (i = 0; i < 7; i++) {
clang-format21a0c2c2016-08-18 15:10:22 -070079 cost[0] += (partial[0][i] * partial[0][i] +
80 partial[0][14 - i] * partial[0][14 - i]) *
81 div_table[i + 1];
82 cost[4] += (partial[4][i] * partial[4][i] +
83 partial[4][14 - i] * partial[4][14 - i]) *
84 div_table[i + 1];
Yaowu Xu253c0012016-08-15 10:27:19 -070085 }
clang-format21a0c2c2016-08-18 15:10:22 -070086 cost[0] += partial[0][7] * partial[0][7] * div_table[8];
87 cost[4] += partial[4][7] * partial[4][7] * div_table[8];
Yaowu Xu253c0012016-08-15 10:27:19 -070088 for (i = 1; i < 8; i += 2) {
89 int j;
90 for (j = 0; j < 4 + 1; j++) {
clang-format21a0c2c2016-08-18 15:10:22 -070091 cost[i] += partial[i][3 + j] * partial[i][3 + j];
Yaowu Xu253c0012016-08-15 10:27:19 -070092 }
93 cost[i] *= div_table[8];
94 for (j = 0; j < 4 - 1; j++) {
clang-format21a0c2c2016-08-18 15:10:22 -070095 cost[i] += (partial[i][j] * partial[i][j] +
96 partial[i][10 - j] * partial[i][10 - j]) *
97 div_table[2 * j + 2];
Yaowu Xu253c0012016-08-15 10:27:19 -070098 }
99 }
100 for (i = 0; i < 8; i++) {
101 if (cost[i] > best_cost) {
102 best_cost = cost[i];
103 best_dir = i;
104 }
105 }
106 /* Difference between the optimal variance and the variance along the
107 orthogonal direction. Again, the sum(x^2) terms cancel out. */
108 *var = best_cost - cost[(best_dir + 4) & 7];
109 /* We'd normally divide by 840, but dividing by 1024 is close enough
110 for what we're going to do with this. */
111 *var >>= 10;
112 return best_dir;
113}
114
Yaowu Xu253c0012016-08-15 10:27:19 -0700115/* Smooth in the direction detected. */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100116int od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
117 const uint16_t *in, int threshold,
118 int dir) {
Yaowu Xu253c0012016-08-15 10:27:19 -0700119 int i;
120 int j;
121 int k;
Yaowu Xu9c323bc2016-09-01 11:35:16 -0700122 static const int taps[3] = { 3, 2, 1 };
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400123 int total_abs = 0;
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400124 for (i = 0; i < 8; i++) {
125 for (j = 0; j < 8; j++) {
Yaowu Xu253c0012016-08-15 10:27:19 -0700126 int16_t sum;
127 int16_t xx;
128 int16_t yy;
clang-format21a0c2c2016-08-18 15:10:22 -0700129 xx = in[i * OD_FILT_BSTRIDE + j];
130 sum = 0;
Yaowu Xu253c0012016-08-15 10:27:19 -0700131 for (k = 0; k < 3; k++) {
132 int16_t p0;
133 int16_t p1;
clang-format21a0c2c2016-08-18 15:10:22 -0700134 p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
135 xx;
136 p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
137 xx;
138 if (abs(p0) < threshold) sum += taps[k] * p0;
139 if (abs(p1) < threshold) sum += taps[k] * p1;
Yaowu Xu253c0012016-08-15 10:27:19 -0700140 }
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400141 sum = (sum + 8) >> 4;
142 total_abs += abs(sum);
143 yy = xx + sum;
clang-format21a0c2c2016-08-18 15:10:22 -0700144 y[i * ystride + j] = yy;
Yaowu Xu253c0012016-08-15 10:27:19 -0700145 }
146 }
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400147 return (total_abs + 8) >> 4;
Yaowu Xu253c0012016-08-15 10:27:19 -0700148}
149
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400150/* Smooth in the direction detected. */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100151int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
152 const uint16_t *in, int threshold,
153 int dir) {
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400154 int i;
155 int j;
156 int k;
157 static const int taps[2] = { 4, 1 };
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400158 int total_abs = 0;
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400159 for (i = 0; i < 4; i++) {
160 for (j = 0; j < 4; j++) {
161 int16_t sum;
162 int16_t xx;
163 int16_t yy;
164 xx = in[i * OD_FILT_BSTRIDE + j];
165 sum = 0;
166 for (k = 0; k < 2; k++) {
167 int16_t p0;
168 int16_t p1;
169 p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
170 xx;
171 p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
172 xx;
173 if (abs(p0) < threshold) sum += taps[k] * p0;
174 if (abs(p1) < threshold) sum += taps[k] * p1;
175 }
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400176 sum = (sum + 8) >> 4;
177 total_abs += abs(sum);
178 yy = xx + sum;
Jean-Marc Valinea64c342016-09-15 16:23:12 -0400179 y[i * ystride + j] = yy;
180 }
181 }
Jean-Marc Valin4713d8d2016-09-16 11:06:50 -0400182 return (total_abs + 2) >> 2;
Yaowu Xu253c0012016-08-15 10:27:19 -0700183}
184
Yaowu Xu253c0012016-08-15 10:27:19 -0700185/* This table approximates x^0.16 with the index being log2(x). It is clamped
186 to [-.5, 3]. The table is computed as:
187 round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */
188static const int16_t OD_THRESH_TABLE_Q8[18] = {
clang-format21a0c2c2016-08-18 15:10:22 -0700189 128, 134, 150, 168, 188, 210, 234, 262, 292,
190 327, 365, 408, 455, 509, 569, 635, 710, 768,
Yaowu Xu253c0012016-08-15 10:27:19 -0700191};
192
Jean-Marc Valina8ce2c92016-10-07 18:10:19 -0400193/* Compute deringing filter threshold for an 8x8 block based on the
Yaowu Xu253c0012016-08-15 10:27:19 -0700194 directional variance difference. A high variance difference means that we
195 have a highly directional pattern (e.g. a high contrast edge), so we can
196 apply more deringing. A low variance means that we either have a low
197 contrast edge, or a non-directional texture, so we want to be careful not
198 to blur. */
Jean-Marc Valina8ce2c92016-10-07 18:10:19 -0400199static INLINE int od_adjust_thresh(int threshold, int32_t var) {
200 int v1;
201 /* We use the variance of 8x8 blocks to adjust the threshold. */
202 v1 = OD_MINI(32767, var >> 6);
203 return (threshold * OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
Yaowu Xu253c0012016-08-15 10:27:19 -0700204}
205
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100206static INLINE void copy_8x8_16bit_to_16bit(uint16_t *dst, int dstride,
207 uint16_t *src, int sstride) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400208 int i, j;
209 for (i = 0; i < 8; i++)
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400210 for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400211}
212
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100213static INLINE void copy_4x4_16bit_to_16bit(uint16_t *dst, int dstride,
214 uint16_t *src, int sstride) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400215 int i, j;
216 for (i = 0; i < 4; i++)
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400217 for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400218}
219
220/* TODO: Optimize this function for SSE. */
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100221void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
Jean-Marc Valin39d92a02016-11-02 02:33:46 -0400222 dering_list *dlist, int dering_count,
Jingning Hanc86e7aa2017-03-28 14:27:03 -0700223 BLOCK_SIZE bsize) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400224 int bi, bx, by;
Jingning Hanb06af202017-03-28 15:39:44 -0700225 const int mi_size_l2 = (bsize == BLOCK_8X8) ? MI_SIZE_LOG2 : MI_SIZE_LOG2 - 1;
Jingning Hanc86e7aa2017-03-28 14:27:03 -0700226
227 if (bsize == BLOCK_8X8) {
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400228 for (bi = 0; bi < dering_count; bi++) {
Jean-Marc Valine0465032016-10-18 15:56:37 -0400229 by = dlist[bi].by;
230 bx = dlist[bi].bx;
Jingning Hanb06af202017-03-28 15:39:44 -0700231 copy_8x8_16bit_to_16bit(
232 &dst[(by << mi_size_l2) * dstride + (bx << mi_size_l2)], dstride,
233 &src[bi << (2 * 3)], 8);
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400234 }
235 } else {
236 for (bi = 0; bi < dering_count; bi++) {
Jean-Marc Valine0465032016-10-18 15:56:37 -0400237 by = dlist[bi].by;
238 bx = dlist[bi].bx;
Jingning Hanb06af202017-03-28 15:39:44 -0700239 copy_4x4_16bit_to_16bit(
240 &dst[(by << mi_size_l2) * dstride + (bx << mi_size_l2)], dstride,
241 &src[bi << (2 * 2)], 4);
Jean-Marc Valincf23aef2016-10-11 17:47:36 -0400242 }
243 }
244}
245
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200246static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride,
247 uint16_t *src, int sstride) {
248 int i, j;
249 for (i = 0; i < 8; i++)
250 for (j = 0; j < 8; j++)
251 dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
252}
253
254static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride,
255 uint16_t *src, int sstride) {
256 int i, j;
257 for (i = 0; i < 4; i++)
258 for (j = 0; j < 4; j++)
259 dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
260}
261
262/* TODO: Optimize this function for SSE. */
263static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, uint16_t *src,
264 dering_list *dlist, int dering_count,
265 int bsize) {
266 int bi, bx, by;
267 if (bsize == 3) {
268 for (bi = 0; bi < dering_count; bi++) {
269 by = dlist[bi].by;
270 bx = dlist[bi].bx;
271 copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
272 &src[bi << 2 * bsize], 1 << bsize);
273 }
274 } else {
275 for (bi = 0; bi < dering_count; bi++) {
276 by = dlist[bi].by;
277 bx = dlist[bi].bx;
278 copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
279 &src[bi << 2 * bsize], 1 << bsize);
280 }
281 }
282}
283
284void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
Steinar Midtskogen5cedcd82017-03-24 12:44:53 +0100285 int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int *dirinit,
286 int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
Steinar Midtskogen233ef942017-03-24 10:40:18 +0100287 dering_list *dlist, int dering_count, int level,
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400288 int clpf_strength, int clpf_damping, int coeff_shift,
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200289 int skip_dering, int hbd) {
Jean-Marc Valin3e44bcc2016-10-11 16:53:59 -0400290 int bi;
Yaowu Xu253c0012016-08-15 10:27:19 -0700291 int bx;
292 int by;
Yaowu Xu3e90f842016-11-02 08:22:02 -0700293 int bsize;
Steinar Midtskogen233ef942017-03-24 10:40:18 +0100294
295 // TODO(stemidts): We might be good with fewer strengths and different
296 // strengths for chroma. Perhaps reduce CDEF_STRENGTH_BITS to 5 and
297 // DERING_STRENGTHS to 8 and use the following tables:
298 // static int level_table[DERING_STRENGTHS] = {0, 1, 3, 7, 14, 24, 39, 63};
299 // static int level_table_uv[DERING_STRENGTHS] = {0, 1, 2, 5, 8, 12, 18, 25};
300 // For now, use 21 strengths and the same for luma and chroma.
301 static int level_table[DERING_STRENGTHS] = {
302 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, 24, 28, 33, 39, 46, 54, 63
303 };
304 static int level_table_uv[DERING_STRENGTHS] = {
305 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, 24, 28, 33, 39, 46, 54, 63
306 };
307
308 int threshold = (pli ? level_table_uv : level_table)[level] << coeff_shift;
Jingning Hanb06af202017-03-28 15:39:44 -0700309 const int mi_size_l2 = xdec ? MI_SIZE_LOG2 - 1 : MI_SIZE_LOG2;
Michael Bebenita7227b652016-10-06 14:27:34 -0700310 od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES] = {
Yaowu Xu3e90f842016-11-02 08:22:02 -0700311 od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
Michael Bebenita7227b652016-10-06 14:27:34 -0700312 };
Jean-Marc Valine2542412016-10-26 01:26:25 -0400313 bsize = OD_DERING_SIZE_LOG2 - xdec;
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400314 if (!skip_dering) {
315 if (pli == 0) {
316 if (!dirinit || !*dirinit) {
317 for (bi = 0; bi < dering_count; bi++) {
318 by = dlist[bi].by;
319 bx = dlist[bi].bx;
320 dir[by][bx] =
Jingning Hanb06af202017-03-28 15:39:44 -0700321 od_dir_find8(&in[MI_SIZE * by * OD_FILT_BSTRIDE + MI_SIZE * bx],
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400322 OD_FILT_BSTRIDE, &var[by][bx], coeff_shift);
323 }
324 if (dirinit) *dirinit = 1;
325 }
Steinar Midtskogen5cedcd82017-03-24 12:44:53 +0100326 for (bi = 0; bi < dering_count; bi++) {
327 by = dlist[bi].by;
328 bx = dlist[bi].bx;
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400329 /* Deringing orthogonal to the direction uses a tighter threshold
330 because we want to be conservative. We've presumably already
331 achieved some deringing, so the amount of change is expected
332 to be low. Also, since we might be filtering across an edge, we
333 want to make sure not to blur it. That being said, we might want
334 to be a little bit more aggressive on pure horizontal/vertical
335 since the ringing there tends to be directional, so it doesn't
336 get removed by the directional filtering. */
337 (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
338 &y[bi << 2 * bsize], 1 << bsize,
Jingning Hanb06af202017-03-28 15:39:44 -0700339 &in[(by * OD_FILT_BSTRIDE << mi_size_l2) + (bx << mi_size_l2)],
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400340 od_adjust_thresh(threshold, var[by][bx]), dir[by][bx]);
Steinar Midtskogen5cedcd82017-03-24 12:44:53 +0100341 }
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400342 } else {
343 for (bi = 0; bi < dering_count; bi++) {
344 by = dlist[bi].by;
345 bx = dlist[bi].bx;
346 (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
347 &y[bi << 2 * bsize], 1 << bsize,
Jingning Hanb06af202017-03-28 15:39:44 -0700348 &in[(by * OD_FILT_BSTRIDE << mi_size_l2) + (bx << mi_size_l2)],
349 threshold, dir[by][bx]);
Jean-Marc Valin5bd2d2f2017-03-25 01:45:23 -0400350 }
Yaowu Xu253c0012016-08-15 10:27:19 -0700351 }
352 }
Jingning Hanc86e7aa2017-03-28 14:27:03 -0700353
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200354 if (clpf_strength) {
355 if (threshold && !skip_dering)
356 copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
Jingning Hanc86e7aa2017-03-28 14:27:03 -0700357 xdec ? BLOCK_4X4 : BLOCK_8X8);
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200358 for (bi = 0; bi < dering_count; bi++) {
359 by = dlist[bi].by;
360 bx = dlist[bi].bx;
Steinar Midtskogen73aa77c2017-03-27 17:50:30 +0200361 int py = by << mi_size_l2;
362 int px = bx << mi_size_l2;
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100363
Steinar Midtskogen73aa77c2017-03-27 17:50:30 +0200364 if (!dst || hbd) {
365 // 16 bit destination if high bitdepth or 8 bit destination not given
366 (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
367 : aom_clpf_hblock_hbd)(
368 dst ? (uint16_t *)dst + py * dstride + px : &y[bi << 2 * bsize],
369 in + py * OD_FILT_BSTRIDE + px, dst && hbd ? dstride : 1 << bsize,
370 OD_FILT_BSTRIDE, 1 << bsize, 1 << bsize,
371 clpf_strength << coeff_shift, clpf_damping + coeff_shift);
372 } else {
373 // Do clpf and write the result to an 8 bit destination
374 (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
375 : aom_clpf_hblock)(
376 dst + py * dstride + px, in + py * OD_FILT_BSTRIDE + px, dstride,
377 OD_FILT_BSTRIDE, 1 << bsize, 1 << bsize,
378 clpf_strength << coeff_shift, clpf_damping + coeff_shift);
379 }
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200380 }
Steinar Midtskogen73aa77c2017-03-27 17:50:30 +0200381 } else {
382 // No clpf, so copy instead
Steinar Midtskogen302d2c12017-03-27 04:21:15 +0200383 if (hbd) {
384 copy_dering_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist,
385 dering_count, 3 - xdec);
386 } else {
387 copy_dering_16bit_to_8bit(dst, dstride, y, dlist, dering_count, bsize);
388 }
Steinar Midtskogena9d41e82017-03-17 12:48:15 +0100389 }
Yaowu Xu253c0012016-08-15 10:27:19 -0700390}