blob: f45f3db2921cfe658527944d44048fc5edd79ba1 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <math.h>
13
Yaowu Xuf883b422016-08-30 14:01:10 -070014#include "./aom_config.h"
15#include "./aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070016#include "av1/common/loopfilter.h"
17#include "av1/common/onyxc_int.h"
18#include "av1/common/reconinter.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "aom_dsp/aom_dsp_common.h"
20#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070021#include "aom_ports/mem.h"
22
23#include "av1/common/seg_common.h"
24
25// 64 bit masks for left transform size. Each 1 represents a position where
26// we should apply a loop filter across the left border of an 8x8 block
27// boundary.
28//
29// In the case of TX_16X16-> ( in low order byte first we end up with
30// a mask that looks like this
31//
32// 10101010
33// 10101010
34// 10101010
35// 10101010
36// 10101010
37// 10101010
38// 10101010
39// 10101010
40//
41// A loopfilter should be applied to every other 8x8 horizontally.
42static const uint64_t left_64x64_txform_mask[TX_SIZES] = {
43 0xffffffffffffffffULL, // TX_4X4
44 0xffffffffffffffffULL, // TX_8x8
45 0x5555555555555555ULL, // TX_16x16
46 0x1111111111111111ULL, // TX_32x32
47};
48
49// 64 bit masks for above transform size. Each 1 represents a position where
50// we should apply a loop filter across the top border of an 8x8 block
51// boundary.
52//
53// In the case of TX_32x32 -> ( in low order byte first we end up with
54// a mask that looks like this
55//
56// 11111111
57// 00000000
58// 00000000
59// 00000000
60// 11111111
61// 00000000
62// 00000000
63// 00000000
64//
65// A loopfilter should be applied to every other 4 the row vertically.
66static const uint64_t above_64x64_txform_mask[TX_SIZES] = {
67 0xffffffffffffffffULL, // TX_4X4
68 0xffffffffffffffffULL, // TX_8x8
69 0x00ff00ff00ff00ffULL, // TX_16x16
70 0x000000ff000000ffULL, // TX_32x32
71};
72
73// 64 bit masks for prediction sizes (left). Each 1 represents a position
74// where left border of an 8x8 block. These are aligned to the right most
75// appropriate bit, and then shifted into place.
76//
77// In the case of TX_16x32 -> ( low order byte first ) we end up with
78// a mask that looks like this :
79//
80// 10000000
81// 10000000
82// 10000000
83// 10000000
84// 00000000
85// 00000000
86// 00000000
87// 00000000
88static const uint64_t left_prediction_mask[BLOCK_SIZES] = {
89 0x0000000000000001ULL, // BLOCK_4X4,
90 0x0000000000000001ULL, // BLOCK_4X8,
91 0x0000000000000001ULL, // BLOCK_8X4,
92 0x0000000000000001ULL, // BLOCK_8X8,
93 0x0000000000000101ULL, // BLOCK_8X16,
94 0x0000000000000001ULL, // BLOCK_16X8,
95 0x0000000000000101ULL, // BLOCK_16X16,
96 0x0000000001010101ULL, // BLOCK_16X32,
97 0x0000000000000101ULL, // BLOCK_32X16,
98 0x0000000001010101ULL, // BLOCK_32X32,
99 0x0101010101010101ULL, // BLOCK_32X64,
100 0x0000000001010101ULL, // BLOCK_64X32,
101 0x0101010101010101ULL, // BLOCK_64X64
102};
103
104// 64 bit mask to shift and set for each prediction size.
105static const uint64_t above_prediction_mask[BLOCK_SIZES] = {
106 0x0000000000000001ULL, // BLOCK_4X4
107 0x0000000000000001ULL, // BLOCK_4X8
108 0x0000000000000001ULL, // BLOCK_8X4
109 0x0000000000000001ULL, // BLOCK_8X8
110 0x0000000000000001ULL, // BLOCK_8X16,
111 0x0000000000000003ULL, // BLOCK_16X8
112 0x0000000000000003ULL, // BLOCK_16X16
113 0x0000000000000003ULL, // BLOCK_16X32,
114 0x000000000000000fULL, // BLOCK_32X16,
115 0x000000000000000fULL, // BLOCK_32X32,
116 0x000000000000000fULL, // BLOCK_32X64,
117 0x00000000000000ffULL, // BLOCK_64X32,
118 0x00000000000000ffULL, // BLOCK_64X64
119};
120// 64 bit mask to shift and set for each prediction size. A bit is set for
121// each 8x8 block that would be in the left most block of the given block
122// size in the 64x64 block.
123static const uint64_t size_mask[BLOCK_SIZES] = {
124 0x0000000000000001ULL, // BLOCK_4X4
125 0x0000000000000001ULL, // BLOCK_4X8
126 0x0000000000000001ULL, // BLOCK_8X4
127 0x0000000000000001ULL, // BLOCK_8X8
128 0x0000000000000101ULL, // BLOCK_8X16,
129 0x0000000000000003ULL, // BLOCK_16X8
130 0x0000000000000303ULL, // BLOCK_16X16
131 0x0000000003030303ULL, // BLOCK_16X32,
132 0x0000000000000f0fULL, // BLOCK_32X16,
133 0x000000000f0f0f0fULL, // BLOCK_32X32,
134 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64,
135 0x00000000ffffffffULL, // BLOCK_64X32,
136 0xffffffffffffffffULL, // BLOCK_64X64
137};
138
139// These are used for masking the left and above borders.
140static const uint64_t left_border = 0x1111111111111111ULL;
141static const uint64_t above_border = 0x000000ff000000ffULL;
142
143// 16 bit masks for uv transform sizes.
144static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = {
145 0xffff, // TX_4X4
146 0xffff, // TX_8x8
147 0x5555, // TX_16x16
148 0x1111, // TX_32x32
149};
150
151static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = {
152 0xffff, // TX_4X4
153 0xffff, // TX_8x8
154 0x0f0f, // TX_16x16
155 0x000f, // TX_32x32
156};
157
158// 16 bit left mask to shift and set for each uv prediction size.
159static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = {
160 0x0001, // BLOCK_4X4,
161 0x0001, // BLOCK_4X8,
162 0x0001, // BLOCK_8X4,
163 0x0001, // BLOCK_8X8,
164 0x0001, // BLOCK_8X16,
165 0x0001, // BLOCK_16X8,
166 0x0001, // BLOCK_16X16,
167 0x0011, // BLOCK_16X32,
168 0x0001, // BLOCK_32X16,
169 0x0011, // BLOCK_32X32,
170 0x1111, // BLOCK_32X64
171 0x0011, // BLOCK_64X32,
172 0x1111, // BLOCK_64X64
173};
174// 16 bit above mask to shift and set for uv each prediction size.
175static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = {
176 0x0001, // BLOCK_4X4
177 0x0001, // BLOCK_4X8
178 0x0001, // BLOCK_8X4
179 0x0001, // BLOCK_8X8
180 0x0001, // BLOCK_8X16,
181 0x0001, // BLOCK_16X8
182 0x0001, // BLOCK_16X16
183 0x0001, // BLOCK_16X32,
184 0x0003, // BLOCK_32X16,
185 0x0003, // BLOCK_32X32,
186 0x0003, // BLOCK_32X64,
187 0x000f, // BLOCK_64X32,
188 0x000f, // BLOCK_64X64
189};
190
191// 64 bit mask to shift and set for each uv prediction size
192static const uint16_t size_mask_uv[BLOCK_SIZES] = {
193 0x0001, // BLOCK_4X4
194 0x0001, // BLOCK_4X8
195 0x0001, // BLOCK_8X4
196 0x0001, // BLOCK_8X8
197 0x0001, // BLOCK_8X16,
198 0x0001, // BLOCK_16X8
199 0x0001, // BLOCK_16X16
200 0x0011, // BLOCK_16X32,
201 0x0003, // BLOCK_32X16,
202 0x0033, // BLOCK_32X32,
203 0x3333, // BLOCK_32X64,
204 0x00ff, // BLOCK_64X32,
205 0xffff, // BLOCK_64X64
206};
207static const uint16_t left_border_uv = 0x1111;
208static const uint16_t above_border_uv = 0x000f;
209
210static const int mode_lf_lut[MB_MODE_COUNT] = {
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
212 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0)
213#if CONFIG_EXT_INTER
214 ,
215 1, // NEWFROMNEARMV mode
216 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0)
217#endif // CONFIG_EXT_INTER
218};
219
220static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
221 int lvl;
222
223 // For each possible value for the loop filter fill out limits
224 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
225 // Set loop filter parameters that control sharpness.
226 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
227
228 if (sharpness_lvl > 0) {
229 if (block_inside_limit > (9 - sharpness_lvl))
230 block_inside_limit = (9 - sharpness_lvl);
231 }
232
233 if (block_inside_limit < 1) block_inside_limit = 1;
234
235 memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
236 memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
237 SIMD_WIDTH);
238 }
239}
240
241static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
242 const MB_MODE_INFO *mbmi) {
243#if CONFIG_SUPERTX
Yaowu Xuf883b422016-08-30 14:01:10 -0700244 const int segment_id = AOMMIN(mbmi->segment_id, mbmi->segment_id_supertx);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700245 assert(
246 IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx != MAX_SEGMENTS));
247 assert(IMPLIES(supertx_enabled(mbmi),
248 mbmi->segment_id_supertx <= mbmi->segment_id));
249#else
250 const int segment_id = mbmi->segment_id;
251#endif // CONFIG_SUPERTX
252 return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
253}
254
Yaowu Xuf883b422016-08-30 14:01:10 -0700255void av1_loop_filter_init(AV1_COMMON *cm) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700256 loop_filter_info_n *lfi = &cm->lf_info;
257 struct loopfilter *lf = &cm->lf;
258 int lvl;
259
260 // init limits for given sharpness
261 update_sharpness(lfi, lf->sharpness_level);
262 lf->last_sharpness_level = lf->sharpness_level;
263
264 // init hev threshold const vectors
265 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
266 memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
267}
268
Yaowu Xuf883b422016-08-30 14:01:10 -0700269void av1_loop_filter_frame_init(AV1_COMMON *cm, int default_filt_lvl) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700270 int seg_id;
271 // n_shift is the multiplier for lf_deltas
272 // the multiplier is 1 for when filter_lvl is between 0 and 31;
273 // 2 when filter_lvl is between 32 and 63
274 const int scale = 1 << (default_filt_lvl >> 5);
275 loop_filter_info_n *const lfi = &cm->lf_info;
276 struct loopfilter *const lf = &cm->lf;
277 const struct segmentation *const seg = &cm->seg;
278
279 // update limits if sharpness has changed
280 if (lf->last_sharpness_level != lf->sharpness_level) {
281 update_sharpness(lfi, lf->sharpness_level);
282 lf->last_sharpness_level = lf->sharpness_level;
283 }
284
285 for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
286 int lvl_seg = default_filt_lvl;
287 if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
288 const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
289 lvl_seg = clamp(
290 seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data, 0,
291 MAX_LOOP_FILTER);
292 }
293
294 if (!lf->mode_ref_delta_enabled) {
295 // we could get rid of this if we assume that deltas are set to
296 // zero when not in use; encoder always uses deltas
297 memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
298 } else {
299 int ref, mode;
300 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
301 lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
302
303 for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
304 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
305 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
306 lf->mode_deltas[mode] * scale;
307 lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
308 }
309 }
310 }
311 }
312}
313
314static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s,
315 int pitch, unsigned int mask_16x16_l,
316 unsigned int mask_8x8_l,
317 unsigned int mask_4x4_l,
318 unsigned int mask_4x4_int_l,
319 const loop_filter_info_n *lfi_n,
320 const uint8_t *lfl) {
321 const int mask_shift = subsampling_factor ? 4 : 8;
322 const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
323 const int lfl_forward = subsampling_factor ? 4 : 8;
324
325 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
326 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
327 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
328 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
329 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
330 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
331 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
332 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
333 unsigned int mask;
334
335 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
336 mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
337 mask; mask >>= 1) {
338 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
339 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
340
341 if (mask & 1) {
342 if ((mask_16x16_0 | mask_16x16_1) & 1) {
343 if ((mask_16x16_0 & mask_16x16_1) & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700344 aom_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700345 lfi0->hev_thr);
346 } else if (mask_16x16_0 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700347 aom_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700348 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700349 aom_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700350 lfi1->hev_thr);
351 }
352 }
353
354 if ((mask_8x8_0 | mask_8x8_1) & 1) {
355 if ((mask_8x8_0 & mask_8x8_1) & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700356 aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700357 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
358 lfi1->hev_thr);
359 } else if (mask_8x8_0 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700360 aom_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700361 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700362 aom_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700363 lfi1->hev_thr);
364 }
365 }
366
367 if ((mask_4x4_0 | mask_4x4_1) & 1) {
368 if ((mask_4x4_0 & mask_4x4_1) & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700369 aom_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700370 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
371 lfi1->hev_thr);
372 } else if (mask_4x4_0 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700373 aom_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700374 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700375 aom_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700376 lfi1->hev_thr);
377 }
378 }
379
380 if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
381 if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700382 aom_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700383 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
384 lfi1->hev_thr);
385 } else if (mask_4x4_int_0 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700386 aom_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700387 lfi0->hev_thr);
388 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700389 aom_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700390 lfi1->hev_thr);
391 }
392 }
393 }
394
395 s += 8;
396 lfl += 1;
397 mask_16x16_0 >>= 1;
398 mask_8x8_0 >>= 1;
399 mask_4x4_0 >>= 1;
400 mask_4x4_int_0 >>= 1;
401 mask_16x16_1 >>= 1;
402 mask_8x8_1 >>= 1;
403 mask_4x4_1 >>= 1;
404 mask_4x4_int_1 >>= 1;
405 }
406}
407
Yaowu Xuf883b422016-08-30 14:01:10 -0700408#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700409static void highbd_filter_selectively_vert_row2(
410 int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16_l,
411 unsigned int mask_8x8_l, unsigned int mask_4x4_l,
412 unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n,
413 const uint8_t *lfl, int bd) {
414 const int mask_shift = subsampling_factor ? 4 : 8;
415 const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
416 const int lfl_forward = subsampling_factor ? 4 : 8;
417
418 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
419 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
420 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
421 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
422 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
423 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
424 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
425 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
426 unsigned int mask;
427
428 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
429 mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
430 mask; mask >>= 1) {
431 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
432 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
433
434 if (mask & 1) {
435 if ((mask_16x16_0 | mask_16x16_1) & 1) {
436 if ((mask_16x16_0 & mask_16x16_1) & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700437 aom_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700438 lfi0->hev_thr, bd);
439 } else if (mask_16x16_0 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700440 aom_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700441 lfi0->hev_thr, bd);
442 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700443 aom_highbd_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700444 lfi1->lim, lfi1->hev_thr, bd);
445 }
446 }
447
448 if ((mask_8x8_0 | mask_8x8_1) & 1) {
449 if ((mask_8x8_0 & mask_8x8_1) & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700450 aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700451 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
452 lfi1->hev_thr, bd);
453 } else if (mask_8x8_0 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700454 aom_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700455 lfi0->hev_thr, bd);
456 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700457 aom_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700458 lfi1->lim, lfi1->hev_thr, bd);
459 }
460 }
461
462 if ((mask_4x4_0 | mask_4x4_1) & 1) {
463 if ((mask_4x4_0 & mask_4x4_1) & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700464 aom_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700465 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
466 lfi1->hev_thr, bd);
467 } else if (mask_4x4_0 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700468 aom_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700469 lfi0->hev_thr, bd);
470 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700471 aom_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700472 lfi1->lim, lfi1->hev_thr, bd);
473 }
474 }
475
476 if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
477 if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700478 aom_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700479 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
480 lfi1->hev_thr, bd);
481 } else if (mask_4x4_int_0 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700482 aom_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700483 lfi0->hev_thr, bd);
484 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700485 aom_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700486 lfi1->lim, lfi1->hev_thr, bd);
487 }
488 }
489 }
490
491 s += 8;
492 lfl += 1;
493 mask_16x16_0 >>= 1;
494 mask_8x8_0 >>= 1;
495 mask_4x4_0 >>= 1;
496 mask_4x4_int_0 >>= 1;
497 mask_16x16_1 >>= 1;
498 mask_8x8_1 >>= 1;
499 mask_4x4_1 >>= 1;
500 mask_4x4_int_1 >>= 1;
501 }
502}
Yaowu Xuf883b422016-08-30 14:01:10 -0700503#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700504
505static void filter_selectively_horiz(
506 uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
507 unsigned int mask_4x4, unsigned int mask_4x4_int,
508 const loop_filter_info_n *lfi_n, const uint8_t *lfl) {
509 unsigned int mask;
510 int count;
511
512 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
513 mask >>= count) {
514 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
515
516 count = 1;
517 if (mask & 1) {
518 if (mask_16x16 & 1) {
519 if ((mask_16x16 & 3) == 3) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700520 aom_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700521 lfi->hev_thr);
522 count = 2;
523 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700524 aom_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700525 lfi->hev_thr);
526 }
527 } else if (mask_8x8 & 1) {
528 if ((mask_8x8 & 3) == 3) {
529 // Next block's thresholds.
530 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
531
Yaowu Xuf883b422016-08-30 14:01:10 -0700532 aom_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700533 lfi->hev_thr, lfin->mblim, lfin->lim,
534 lfin->hev_thr);
535
536 if ((mask_4x4_int & 3) == 3) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700537 aom_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700538 lfi->lim, lfi->hev_thr, lfin->mblim,
539 lfin->lim, lfin->hev_thr);
540 } else {
541 if (mask_4x4_int & 1)
Yaowu Xuf883b422016-08-30 14:01:10 -0700542 aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700543 lfi->hev_thr);
544 else if (mask_4x4_int & 2)
Yaowu Xuf883b422016-08-30 14:01:10 -0700545 aom_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700546 lfin->lim, lfin->hev_thr);
547 }
548 count = 2;
549 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700550 aom_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700551
552 if (mask_4x4_int & 1)
Yaowu Xuf883b422016-08-30 14:01:10 -0700553 aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700554 lfi->hev_thr);
555 }
556 } else if (mask_4x4 & 1) {
557 if ((mask_4x4 & 3) == 3) {
558 // Next block's thresholds.
559 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
560
Yaowu Xuf883b422016-08-30 14:01:10 -0700561 aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700562 lfi->hev_thr, lfin->mblim, lfin->lim,
563 lfin->hev_thr);
564 if ((mask_4x4_int & 3) == 3) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700565 aom_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700566 lfi->lim, lfi->hev_thr, lfin->mblim,
567 lfin->lim, lfin->hev_thr);
568 } else {
569 if (mask_4x4_int & 1)
Yaowu Xuf883b422016-08-30 14:01:10 -0700570 aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700571 lfi->hev_thr);
572 else if (mask_4x4_int & 2)
Yaowu Xuf883b422016-08-30 14:01:10 -0700573 aom_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700574 lfin->lim, lfin->hev_thr);
575 }
576 count = 2;
577 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700578 aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700579
580 if (mask_4x4_int & 1)
Yaowu Xuf883b422016-08-30 14:01:10 -0700581 aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700582 lfi->hev_thr);
583 }
584 } else if (mask_4x4_int & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700585 aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700586 lfi->hev_thr);
587 }
588 }
589 s += 8 * count;
590 lfl += count;
591 mask_16x16 >>= count;
592 mask_8x8 >>= count;
593 mask_4x4 >>= count;
594 mask_4x4_int >>= count;
595 }
596}
597
Yaowu Xuf883b422016-08-30 14:01:10 -0700598#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700599static void highbd_filter_selectively_horiz(
600 uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
601 unsigned int mask_4x4, unsigned int mask_4x4_int,
602 const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
603 unsigned int mask;
604 int count;
605
606 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
607 mask >>= count) {
608 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
609
610 count = 1;
611 if (mask & 1) {
612 if (mask_16x16 & 1) {
613 if ((mask_16x16 & 3) == 3) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700614 aom_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700615 lfi->hev_thr, bd);
616 count = 2;
617 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700618 aom_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700619 lfi->hev_thr, bd);
620 }
621 } else if (mask_8x8 & 1) {
622 if ((mask_8x8 & 3) == 3) {
623 // Next block's thresholds.
624 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
625
Yaowu Xuf883b422016-08-30 14:01:10 -0700626 aom_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700627 lfi->hev_thr, lfin->mblim, lfin->lim,
628 lfin->hev_thr, bd);
629
630 if ((mask_4x4_int & 3) == 3) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700631 aom_highbd_lpf_horizontal_4_dual(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700632 s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
633 lfin->mblim, lfin->lim, lfin->hev_thr, bd);
634 } else {
635 if (mask_4x4_int & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700636 aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700637 lfi->lim, lfi->hev_thr, bd);
638 } else if (mask_4x4_int & 2) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700639 aom_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700640 lfin->lim, lfin->hev_thr, bd);
641 }
642 }
643 count = 2;
644 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700645 aom_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700646 lfi->hev_thr, bd);
647
648 if (mask_4x4_int & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700649 aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700650 lfi->lim, lfi->hev_thr, bd);
651 }
652 }
653 } else if (mask_4x4 & 1) {
654 if ((mask_4x4 & 3) == 3) {
655 // Next block's thresholds.
656 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
657
Yaowu Xuf883b422016-08-30 14:01:10 -0700658 aom_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700659 lfi->hev_thr, lfin->mblim, lfin->lim,
660 lfin->hev_thr, bd);
661 if ((mask_4x4_int & 3) == 3) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700662 aom_highbd_lpf_horizontal_4_dual(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700663 s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
664 lfin->mblim, lfin->lim, lfin->hev_thr, bd);
665 } else {
666 if (mask_4x4_int & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700667 aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700668 lfi->lim, lfi->hev_thr, bd);
669 } else if (mask_4x4_int & 2) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700670 aom_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700671 lfin->lim, lfin->hev_thr, bd);
672 }
673 }
674 count = 2;
675 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -0700676 aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700677 lfi->hev_thr, bd);
678
679 if (mask_4x4_int & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700680 aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700681 lfi->lim, lfi->hev_thr, bd);
682 }
683 }
684 } else if (mask_4x4_int & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700685 aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700686 lfi->hev_thr, bd);
687 }
688 }
689 s += 8 * count;
690 lfl += count;
691 mask_16x16 >>= count;
692 mask_8x8 >>= count;
693 mask_4x4 >>= count;
694 mask_4x4_int >>= count;
695 }
696}
Yaowu Xuf883b422016-08-30 14:01:10 -0700697#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700698
699// This function ors into the current lfm structure, where to do loop
700// filters for the specific mi we are looking at. It uses information
701// including the block_size_type (32x16, 32x32, etc.), the transform size,
702// whether there were any coefficients encoded, and the loop filter strength
703// block we are currently looking at. Shift is used to position the
704// 1's we produce.
705// TODO(JBB) Need another function for different resolution color..
706static void build_masks(const loop_filter_info_n *const lfi_n,
707 const MODE_INFO *mi, const int shift_y,
708 const int shift_uv, LOOP_FILTER_MASK *lfm) {
709 const MB_MODE_INFO *mbmi = &mi->mbmi;
710 const BLOCK_SIZE block_size = mbmi->sb_type;
711 // TODO(debargha): Check if masks can be setup correctly when
712 // rectangular transfroms are used with the EXT_TX expt.
713 const TX_SIZE tx_size_y = txsize_sqr_up_map[mbmi->tx_size];
714 const TX_SIZE tx_size_uv =
Debargha Mukherjee2f123402016-08-30 17:43:38 -0700715 txsize_sqr_up_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700716 const int filter_level = get_filter_level(lfi_n, mbmi);
717 uint64_t *const left_y = &lfm->left_y[tx_size_y];
718 uint64_t *const above_y = &lfm->above_y[tx_size_y];
719 uint64_t *const int_4x4_y = &lfm->int_4x4_y;
720 uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
721 uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
722 uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv;
723 int i;
724
725 // If filter level is 0 we don't loop filter.
726 if (!filter_level) {
727 return;
728 } else {
729 const int w = num_8x8_blocks_wide_lookup[block_size];
730 const int h = num_8x8_blocks_high_lookup[block_size];
731 const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
732 const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
733
734 for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
735 }
736
737 // These set 1 in the current block size for the block size edges.
738 // For instance if the block size is 32x16, we'll set:
739 // above = 1111
740 // 0000
741 // and
742 // left = 1000
743 // = 1000
744 // NOTE : In this example the low bit is left most ( 1000 ) is stored as
745 // 1, not 8...
746 //
747 // U and V set things on a 16 bit scale.
748 //
749 *above_y |= above_prediction_mask[block_size] << shift_y;
750 *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
751 *left_y |= left_prediction_mask[block_size] << shift_y;
752 *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
753
754 // If the block has no coefficients and is not intra we skip applying
755 // the loop filter on block edges.
756 if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return;
757
758 // Here we are adding a mask for the transform size. The transform
759 // size mask is set to be correct for a 64x64 prediction block size. We
760 // mask to match the size of the block we are working on and then shift it
761 // into place..
762 *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
763 << shift_y;
764 *above_uv |=
765 (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv])
766 << shift_uv;
767
768 *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
769 << shift_y;
770 *left_uv |= (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv])
771 << shift_uv;
772
773 // Here we are trying to determine what to do with the internal 4x4 block
774 // boundaries. These differ from the 4x4 boundaries on the outside edge of
775 // an 8x8 in that the internal ones can be skipped and don't depend on
776 // the prediction block size.
777 if (tx_size_y == TX_4X4)
778 *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
779
780 if (tx_size_uv == TX_4X4)
781 *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
782}
783
784// This function does the same thing as the one above with the exception that
785// it only affects the y masks. It exists because for blocks < 16x16 in size,
786// we only update u and v masks on the first block.
787static void build_y_mask(const loop_filter_info_n *const lfi_n,
788 const MODE_INFO *mi, const int shift_y,
789#if CONFIG_SUPERTX
790 int supertx_enabled,
791#endif // CONFIG_SUPERTX
792 LOOP_FILTER_MASK *lfm) {
793 const MB_MODE_INFO *mbmi = &mi->mbmi;
794 const TX_SIZE tx_size_y = txsize_sqr_up_map[mbmi->tx_size];
795#if CONFIG_SUPERTX
796 const BLOCK_SIZE block_size =
797 supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type;
798#else
799 const BLOCK_SIZE block_size = mbmi->sb_type;
800#endif
801 const int filter_level = get_filter_level(lfi_n, mbmi);
802 uint64_t *const left_y = &lfm->left_y[tx_size_y];
803 uint64_t *const above_y = &lfm->above_y[tx_size_y];
804 uint64_t *const int_4x4_y = &lfm->int_4x4_y;
805 int i;
806
807 if (!filter_level) {
808 return;
809 } else {
810 const int w = num_8x8_blocks_wide_lookup[block_size];
811 const int h = num_8x8_blocks_high_lookup[block_size];
812 const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
813 const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
814
815 for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
816 }
817
818 *above_y |= above_prediction_mask[block_size] << shift_y;
819 *left_y |= left_prediction_mask[block_size] << shift_y;
820
821 if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return;
822
823 *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
824 << shift_y;
825
826 *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
827 << shift_y;
828
829 if (tx_size_y == TX_4X4)
830 *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
831}
832
833// This function sets up the bit masks for the entire 64x64 region represented
834// by mi_row, mi_col.
835// TODO(JBB): This function only works for yv12.
Yaowu Xuf883b422016-08-30 14:01:10 -0700836void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col,
837 MODE_INFO **mi, const int mode_info_stride,
838 LOOP_FILTER_MASK *lfm) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700839 int idx_32, idx_16, idx_8;
840 const loop_filter_info_n *const lfi_n = &cm->lf_info;
841 MODE_INFO **mip = mi;
842 MODE_INFO **mip2 = mi;
843
844 // These are offsets to the next mi in the 64x64 block. It is what gets
845 // added to the mi ptr as we go through each loop. It helps us to avoid
846 // setting up special row and column counters for each index. The last step
847 // brings us out back to the starting position.
848 const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4,
849 -(mode_info_stride << 2) - 4 };
850 const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2,
851 -(mode_info_stride << 1) - 2 };
852 const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 };
853
854 // Following variables represent shifts to position the current block
855 // mask over the appropriate block. A shift of 36 to the left will move
856 // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
857 // 4 rows to the appropriate spot.
858 const int shift_32_y[] = { 0, 4, 32, 36 };
859 const int shift_16_y[] = { 0, 2, 16, 18 };
860 const int shift_8_y[] = { 0, 1, 8, 9 };
861 const int shift_32_uv[] = { 0, 2, 8, 10 };
862 const int shift_16_uv[] = { 0, 1, 4, 5 };
863 int i;
Yaowu Xuf883b422016-08-30 14:01:10 -0700864 const int max_rows = AOMMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE);
865 const int max_cols = AOMMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700866#if CONFIG_EXT_PARTITION
867 assert(0 && "Not yet updated");
868#endif // CONFIG_EXT_PARTITION
869
Yaowu Xuf883b422016-08-30 14:01:10 -0700870 av1_zero(*lfm);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700871 assert(mip[0] != NULL);
872
873 // TODO(jimbankoski): Try moving most of the following code into decode
874 // loop and storing lfm in the mbmi structure so that we don't have to go
875 // through the recursive loop structure multiple times.
876 switch (mip[0]->mbmi.sb_type) {
877 case BLOCK_64X64: build_masks(lfi_n, mip[0], 0, 0, lfm); break;
878 case BLOCK_64X32:
879 build_masks(lfi_n, mip[0], 0, 0, lfm);
880 mip2 = mip + mode_info_stride * 4;
881 if (4 >= max_rows) break;
882 build_masks(lfi_n, mip2[0], 32, 8, lfm);
883 break;
884 case BLOCK_32X64:
885 build_masks(lfi_n, mip[0], 0, 0, lfm);
886 mip2 = mip + 4;
887 if (4 >= max_cols) break;
888 build_masks(lfi_n, mip2[0], 4, 2, lfm);
889 break;
890 default:
891 for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
892 const int shift_y = shift_32_y[idx_32];
893 const int shift_uv = shift_32_uv[idx_32];
894 const int mi_32_col_offset = ((idx_32 & 1) << 2);
895 const int mi_32_row_offset = ((idx_32 >> 1) << 2);
896 if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
897 continue;
898 switch (mip[0]->mbmi.sb_type) {
899 case BLOCK_32X32:
900 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
901 break;
902 case BLOCK_32X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
903#if CONFIG_SUPERTX
904 if (supertx_enabled(&mip[0]->mbmi)) break;
905#endif
906 if (mi_32_row_offset + 2 >= max_rows) continue;
907 mip2 = mip + mode_info_stride * 2;
908 build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
909 break;
910 case BLOCK_16X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
911#if CONFIG_SUPERTX
912 if (supertx_enabled(&mip[0]->mbmi)) break;
913#endif
914 if (mi_32_col_offset + 2 >= max_cols) continue;
915 mip2 = mip + 2;
916 build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
917 break;
918 default:
919#if CONFIG_SUPERTX
920 if (mip[0]->mbmi.tx_size == TX_32X32) {
921 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
922 break;
923 }
924#endif
925 for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
926 const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
927 const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
928 const int mi_16_col_offset =
929 mi_32_col_offset + ((idx_16 & 1) << 1);
930 const int mi_16_row_offset =
931 mi_32_row_offset + ((idx_16 >> 1) << 1);
932
933 if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
934 continue;
935
936 switch (mip[0]->mbmi.sb_type) {
937 case BLOCK_16X16:
938 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
939 break;
940 case BLOCK_16X8:
941#if CONFIG_SUPERTX
942 if (supertx_enabled(&mip[0]->mbmi)) break;
943#endif
944 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
945 if (mi_16_row_offset + 1 >= max_rows) continue;
946 mip2 = mip + mode_info_stride;
947 build_y_mask(lfi_n, mip2[0], shift_y + 8,
948#if CONFIG_SUPERTX
949 0,
950#endif
951 lfm);
952 break;
953 case BLOCK_8X16:
954#if CONFIG_SUPERTX
955 if (supertx_enabled(&mip[0]->mbmi)) break;
956#endif
957 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
958 if (mi_16_col_offset + 1 >= max_cols) continue;
959 mip2 = mip + 1;
960 build_y_mask(lfi_n, mip2[0], shift_y + 1,
961#if CONFIG_SUPERTX
962 0,
963#endif
964 lfm);
965 break;
966 default: {
967 const int shift_y =
968 shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0];
969#if CONFIG_SUPERTX
970 if (mip[0]->mbmi.tx_size == TX_16X16) {
971 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
972 break;
973 }
974#endif
975 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
976 mip += offset[0];
977 for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
978 const int shift_y = shift_32_y[idx_32] +
979 shift_16_y[idx_16] + shift_8_y[idx_8];
980 const int mi_8_col_offset =
981 mi_16_col_offset + ((idx_8 & 1));
982 const int mi_8_row_offset =
983 mi_16_row_offset + ((idx_8 >> 1));
984
985 if (mi_8_col_offset >= max_cols ||
986 mi_8_row_offset >= max_rows)
987 continue;
988 build_y_mask(lfi_n, mip[0], shift_y,
989#if CONFIG_SUPERTX
990 supertx_enabled(&mip[0]->mbmi),
991#endif
992 lfm);
993 }
994 break;
995 }
996 }
997 }
998 break;
999 }
1000 }
1001 break;
1002 }
1003 // The largest loopfilter we have is 16x16 so we use the 16x16 mask
1004 // for 32x32 transforms also.
1005 lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
1006 lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
1007 lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
1008 lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
1009
1010 // We do at least 8 tap filter on every 32x32 even if the transform size
1011 // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
1012 // remove it from the 4x4.
1013 lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
1014 lfm->left_y[TX_4X4] &= ~left_border;
1015 lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
1016 lfm->above_y[TX_4X4] &= ~above_border;
1017 lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
1018 lfm->left_uv[TX_4X4] &= ~left_border_uv;
1019 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
1020 lfm->above_uv[TX_4X4] &= ~above_border_uv;
1021
1022 // We do some special edge handling.
1023 if (mi_row + MAX_MIB_SIZE > cm->mi_rows) {
1024 const uint64_t rows = cm->mi_rows - mi_row;
1025
1026 // Each pixel inside the border gets a 1,
1027 const uint64_t mask_y = (((uint64_t)1 << (rows << MAX_MIB_SIZE_LOG2)) - 1);
1028 const uint16_t mask_uv =
1029 (((uint16_t)1 << (((rows + 1) >> 1) << (MAX_MIB_SIZE_LOG2 - 1))) - 1);
1030
1031 // Remove values completely outside our border.
1032 for (i = 0; i < TX_32X32; i++) {
1033 lfm->left_y[i] &= mask_y;
1034 lfm->above_y[i] &= mask_y;
1035 lfm->left_uv[i] &= mask_uv;
1036 lfm->above_uv[i] &= mask_uv;
1037 }
1038 lfm->int_4x4_y &= mask_y;
1039 lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv;
1040
1041 // We don't apply a wide loop filter on the last uv block row. If set
1042 // apply the shorter one instead.
1043 if (rows == 1) {
1044 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
1045 lfm->above_uv[TX_16X16] = 0;
1046 }
1047 if (rows == 5) {
1048 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
1049 lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
1050 }
1051 }
1052
1053 if (mi_col + MAX_MIB_SIZE > cm->mi_cols) {
1054 const uint64_t columns = cm->mi_cols - mi_col;
1055
1056 // Each pixel inside the border gets a 1, the multiply copies the border
1057 // to where we need it.
1058 const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL;
1059 const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
1060
1061 // Internal edges are not applied on the last column of the image so
1062 // we mask 1 more for the internal edges
1063 const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
1064
1065 // Remove the bits outside the image edge.
1066 for (i = 0; i < TX_32X32; i++) {
1067 lfm->left_y[i] &= mask_y;
1068 lfm->above_y[i] &= mask_y;
1069 lfm->left_uv[i] &= mask_uv;
1070 lfm->above_uv[i] &= mask_uv;
1071 }
1072 lfm->int_4x4_y &= mask_y;
1073 lfm->left_int_4x4_uv &= mask_uv_int;
1074
1075 // We don't apply a wide loop filter on the last uv column. If set
1076 // apply the shorter one instead.
1077 if (columns == 1) {
1078 lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
1079 lfm->left_uv[TX_16X16] = 0;
1080 }
1081 if (columns == 5) {
1082 lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
1083 lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
1084 }
1085 }
1086 // We don't apply a loop filter on the first column in the image, mask that
1087 // out.
1088 if (mi_col == 0) {
1089 for (i = 0; i < TX_32X32; i++) {
1090 lfm->left_y[i] &= 0xfefefefefefefefeULL;
1091 lfm->left_uv[i] &= 0xeeee;
1092 }
1093 }
1094
1095 // Assert if we try to apply 2 different loop filters at the same position.
1096 assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
1097 assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
1098 assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
1099 assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
1100 assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8]));
1101 assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
1102 assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
1103 assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16]));
1104 assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
1105 assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
1106 assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
1107 assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
1108 assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
1109 assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
1110 assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
1111 assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16]));
1112}
1113
1114static void filter_selectively_vert(
1115 uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
1116 unsigned int mask_4x4, unsigned int mask_4x4_int,
1117 const loop_filter_info_n *lfi_n, const uint8_t *lfl) {
1118 unsigned int mask;
1119
1120 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
1121 mask >>= 1) {
1122 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
1123
1124 if (mask & 1) {
1125 if (mask_16x16 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001126 aom_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001127 } else if (mask_8x8 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001128 aom_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001129 } else if (mask_4x4 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001130 aom_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001131 }
1132 }
1133 if (mask_4x4_int & 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001134 aom_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001135 s += 8;
1136 lfl += 1;
1137 mask_16x16 >>= 1;
1138 mask_8x8 >>= 1;
1139 mask_4x4 >>= 1;
1140 mask_4x4_int >>= 1;
1141 }
1142}
1143
Yaowu Xuf883b422016-08-30 14:01:10 -07001144#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001145static void highbd_filter_selectively_vert(
1146 uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
1147 unsigned int mask_4x4, unsigned int mask_4x4_int,
1148 const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
1149 unsigned int mask;
1150
1151 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
1152 mask >>= 1) {
1153 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
1154
1155 if (mask & 1) {
1156 if (mask_16x16 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001157 aom_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001158 bd);
1159 } else if (mask_8x8 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001160 aom_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001161 bd);
1162 } else if (mask_4x4 & 1) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001163 aom_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001164 bd);
1165 }
1166 }
1167 if (mask_4x4_int & 1)
Yaowu Xuf883b422016-08-30 14:01:10 -07001168 aom_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001169 lfi->hev_thr, bd);
1170 s += 8;
1171 lfl += 1;
1172 mask_16x16 >>= 1;
1173 mask_8x8 >>= 1;
1174 mask_4x4 >>= 1;
1175 mask_4x4_int >>= 1;
1176 }
1177}
Yaowu Xuf883b422016-08-30 14:01:10 -07001178#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001179
Yaowu Xuf883b422016-08-30 14:01:10 -07001180void av1_filter_block_plane_non420(AV1_COMMON *cm,
1181 struct macroblockd_plane *plane,
1182 MODE_INFO **mib, int mi_row, int mi_col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001183 const int ss_x = plane->subsampling_x;
1184 const int ss_y = plane->subsampling_y;
1185 const int row_step = 1 << ss_y;
1186 const int col_step = 1 << ss_x;
1187 struct buf_2d *const dst = &plane->dst;
1188 uint8_t *const dst0 = dst->buf;
1189 unsigned int mask_16x16[MAX_MIB_SIZE] = { 0 };
1190 unsigned int mask_8x8[MAX_MIB_SIZE] = { 0 };
1191 unsigned int mask_4x4[MAX_MIB_SIZE] = { 0 };
1192 unsigned int mask_4x4_int[MAX_MIB_SIZE] = { 0 };
1193 uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE];
1194 int r, c;
1195
1196 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
1197 unsigned int mask_16x16_c = 0;
1198 unsigned int mask_8x8_c = 0;
1199 unsigned int mask_4x4_c = 0;
1200 unsigned int border_mask;
1201
1202 // Determine the vertical edges that need filtering
1203 for (c = 0; c < cm->mib_size && mi_col + c < cm->mi_cols; c += col_step) {
1204 const MODE_INFO *mi = mib[c];
1205 const MB_MODE_INFO *mbmi = &mi[0].mbmi;
1206 const BLOCK_SIZE sb_type = mbmi->sb_type;
1207 const int skip_this = mbmi->skip && is_inter_block(mbmi);
1208 const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
1209 const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
1210
1211 // left edge of current unit is block/partition edge -> no skip
1212 const int block_edge_left =
1213 (num_4x4_blocks_wide_lookup[sb_type] > 1) ? !blk_col : 1;
1214 const int skip_this_c = skip_this && !block_edge_left;
1215 // top edge of current unit is block/partition edge -> no skip
1216 const int block_edge_above =
1217 (num_4x4_blocks_high_lookup[sb_type] > 1) ? !blk_row : 1;
1218 const int skip_this_r = skip_this && !block_edge_above;
1219
1220#if CONFIG_VAR_TX
1221 TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
1222 ? get_uv_tx_size(mbmi, plane)
1223 : mbmi->tx_size;
1224#else
1225 const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
1226 ? get_uv_tx_size(mbmi, plane)
1227 : mbmi->tx_size;
1228#endif
1229
1230 const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
1231 const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
1232
1233 TX_SIZE tx_size_c = num_4x4_blocks_wide_txsize_log2_lookup[tx_size];
1234 TX_SIZE tx_size_r = num_4x4_blocks_high_txsize_log2_lookup[tx_size];
1235
1236 int tx_size_mask = 0;
1237 // Filter level can vary per MI
1238 if (!(lfl[r][c >> ss_x] = get_filter_level(&cm->lf_info, mbmi))) continue;
1239
Debargha Mukherjee2f123402016-08-30 17:43:38 -07001240 if (txsize_sqr_up_map[tx_size] == TX_32X32)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001241 tx_size_mask = 3;
Debargha Mukherjee2f123402016-08-30 17:43:38 -07001242 else if (txsize_sqr_up_map[tx_size] == TX_16X16)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001243 tx_size_mask = 1;
1244 else
1245 tx_size_mask = 0;
1246
1247#if CONFIG_VAR_TX
Yue Chena1e48dc2016-08-29 17:29:33 -07001248 if (is_inter_block(mbmi) && !mbmi->skip) {
1249#if CONFIG_EXT_TX && CONFIG_RECT_TX
1250 TX_SIZE mb_tx_size = is_rect_tx(mbmi->tx_size)
1251 ? mbmi->tx_size
1252 : mbmi->inter_tx_size[blk_row][blk_col];
1253#else
1254 TX_SIZE mb_tx_size = mbmi->inter_tx_size[blk_row][blk_col];
1255#endif
1256 tx_size = (plane->plane_type == PLANE_TYPE_UV)
1257 ? uv_txsize_lookup[sb_type][mb_tx_size][ss_x][ss_y]
1258 : mb_tx_size;
1259 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001260
1261#if CONFIG_EXT_TX && CONFIG_RECT_TX
1262 tx_size_r =
Yaowu Xuf883b422016-08-30 14:01:10 -07001263 AOMMIN(txsize_horz_map[tx_size], cm->above_txfm_context[mi_col + c]);
1264 tx_size_c = AOMMIN(txsize_vert_map[tx_size],
Yaowu Xuc27fc142016-08-22 16:08:15 -07001265 cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK]);
1266
1267 cm->above_txfm_context[mi_col + c] = txsize_horz_map[tx_size];
1268 cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK] =
1269 txsize_vert_map[tx_size];
1270#else
Yaowu Xuf883b422016-08-30 14:01:10 -07001271 tx_size_r = AOMMIN(tx_size, cm->above_txfm_context[mi_col + c]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001272 tx_size_c =
Yaowu Xuf883b422016-08-30 14:01:10 -07001273 AOMMIN(tx_size, cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001274
1275 cm->above_txfm_context[mi_col + c] = tx_size;
1276 cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK] = tx_size;
1277#endif
1278#endif
1279
1280 // Build masks based on the transform size of each block
1281 // handle vertical mask
1282 if (tx_size_c == TX_32X32) {
1283 if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
1284 if (!skip_border_4x4_c)
1285 mask_16x16_c |= 1 << (c >> ss_x);
1286 else
1287 mask_8x8_c |= 1 << (c >> ss_x);
1288 }
1289 } else if (tx_size_c == TX_16X16) {
1290 if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
1291 if (!skip_border_4x4_c)
1292 mask_16x16_c |= 1 << (c >> ss_x);
1293 else
1294 mask_8x8_c |= 1 << (c >> ss_x);
1295 }
1296 } else {
1297 // force 8x8 filtering on 32x32 boundaries
1298 if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
1299 if (tx_size_c == TX_8X8 || ((c >> ss_x) & 3) == 0)
1300 mask_8x8_c |= 1 << (c >> ss_x);
1301 else
1302 mask_4x4_c |= 1 << (c >> ss_x);
1303 }
1304
1305 if (!skip_this && tx_size_c < TX_8X8 && !skip_border_4x4_c &&
1306 ((c >> ss_x) & tx_size_mask) == 0)
1307 mask_4x4_int[r] |= 1 << (c >> ss_x);
1308 }
1309
1310 // set horizontal mask
1311 if (tx_size_r == TX_32X32) {
1312 if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
1313 if (!skip_border_4x4_r)
1314 mask_16x16[r] |= 1 << (c >> ss_x);
1315 else
1316 mask_8x8[r] |= 1 << (c >> ss_x);
1317 }
1318 } else if (tx_size_r == TX_16X16) {
1319 if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
1320 if (!skip_border_4x4_r)
1321 mask_16x16[r] |= 1 << (c >> ss_x);
1322 else
1323 mask_8x8[r] |= 1 << (c >> ss_x);
1324 }
1325 } else {
1326 // force 8x8 filtering on 32x32 boundaries
1327 if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
1328 if (tx_size_r == TX_8X8 || ((r >> ss_y) & 3) == 0)
1329 mask_8x8[r] |= 1 << (c >> ss_x);
1330 else
1331 mask_4x4[r] |= 1 << (c >> ss_x);
1332 }
1333
1334 if (!skip_this && tx_size_r < TX_8X8 && !skip_border_4x4_c &&
1335 ((r >> ss_y) & tx_size_mask) == 0)
1336 mask_4x4_int[r] |= 1 << (c >> ss_x);
1337 }
1338 }
1339
1340 // Disable filtering on the leftmost column
1341 border_mask = ~(mi_col == 0);
Yaowu Xuf883b422016-08-30 14:01:10 -07001342#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001343 if (cm->use_highbitdepth) {
1344 highbd_filter_selectively_vert(
1345 CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
1346 mask_16x16_c & border_mask, mask_8x8_c & border_mask,
1347 mask_4x4_c & border_mask, mask_4x4_int[r], &cm->lf_info, &lfl[r][0],
1348 (int)cm->bit_depth);
1349 } else {
1350 filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask,
1351 mask_8x8_c & border_mask,
1352 mask_4x4_c & border_mask, mask_4x4_int[r],
1353 &cm->lf_info, &lfl[r][0]);
1354 }
1355#else
1356 filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask,
1357 mask_8x8_c & border_mask, mask_4x4_c & border_mask,
1358 mask_4x4_int[r], &cm->lf_info, &lfl[r][0]);
Yaowu Xuf883b422016-08-30 14:01:10 -07001359#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001360 dst->buf += MI_SIZE * dst->stride;
1361 mib += row_step * cm->mi_stride;
1362 }
1363
1364 // Now do horizontal pass
1365 dst->buf = dst0;
1366 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
1367 const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
1368 const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
1369
1370 unsigned int mask_16x16_r;
1371 unsigned int mask_8x8_r;
1372 unsigned int mask_4x4_r;
1373
1374 if (mi_row + r == 0) {
1375 mask_16x16_r = 0;
1376 mask_8x8_r = 0;
1377 mask_4x4_r = 0;
1378 } else {
1379 mask_16x16_r = mask_16x16[r];
1380 mask_8x8_r = mask_8x8[r];
1381 mask_4x4_r = mask_4x4[r];
1382 }
Yaowu Xuf883b422016-08-30 14:01:10 -07001383#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001384 if (cm->use_highbitdepth) {
1385 highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
1386 dst->stride, mask_16x16_r, mask_8x8_r,
1387 mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
1388 &lfl[r][0], (int)cm->bit_depth);
1389 } else {
1390 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
1391 mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
1392 &lfl[r][0]);
1393 }
1394#else
1395 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
1396 mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
1397 &lfl[r][0]);
Yaowu Xuf883b422016-08-30 14:01:10 -07001398#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001399 dst->buf += MI_SIZE * dst->stride;
1400 }
1401}
1402
Yaowu Xuf883b422016-08-30 14:01:10 -07001403void av1_filter_block_plane_ss00(AV1_COMMON *const cm,
1404 struct macroblockd_plane *const plane,
1405 int mi_row, LOOP_FILTER_MASK *lfm) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001406 struct buf_2d *const dst = &plane->dst;
1407 uint8_t *const dst0 = dst->buf;
1408 int r;
1409 uint64_t mask_16x16 = lfm->left_y[TX_16X16];
1410 uint64_t mask_8x8 = lfm->left_y[TX_8X8];
1411 uint64_t mask_4x4 = lfm->left_y[TX_4X4];
1412 uint64_t mask_4x4_int = lfm->int_4x4_y;
1413
1414 assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
1415
1416 // Vertical pass: do 2 rows at one time
1417 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
1418 unsigned int mask_16x16_l = mask_16x16 & 0xffff;
1419 unsigned int mask_8x8_l = mask_8x8 & 0xffff;
1420 unsigned int mask_4x4_l = mask_4x4 & 0xffff;
1421 unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
1422
1423// Disable filtering on the leftmost column.
Yaowu Xuf883b422016-08-30 14:01:10 -07001424#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001425 if (cm->use_highbitdepth) {
1426 highbd_filter_selectively_vert_row2(
1427 plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
1428 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
1429 &lfm->lfl_y[r][0], (int)cm->bit_depth);
1430 } else {
1431 filter_selectively_vert_row2(
1432 plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
1433 mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]);
1434 }
1435#else
1436 filter_selectively_vert_row2(
1437 plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
1438 mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]);
Yaowu Xuf883b422016-08-30 14:01:10 -07001439#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001440 dst->buf += 2 * MI_SIZE * dst->stride;
1441 mask_16x16 >>= 2 * MI_SIZE;
1442 mask_8x8 >>= 2 * MI_SIZE;
1443 mask_4x4 >>= 2 * MI_SIZE;
1444 mask_4x4_int >>= 2 * MI_SIZE;
1445 }
1446
1447 // Horizontal pass
1448 dst->buf = dst0;
1449 mask_16x16 = lfm->above_y[TX_16X16];
1450 mask_8x8 = lfm->above_y[TX_8X8];
1451 mask_4x4 = lfm->above_y[TX_4X4];
1452 mask_4x4_int = lfm->int_4x4_y;
1453
1454 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) {
1455 unsigned int mask_16x16_r;
1456 unsigned int mask_8x8_r;
1457 unsigned int mask_4x4_r;
1458
1459 if (mi_row + r == 0) {
1460 mask_16x16_r = 0;
1461 mask_8x8_r = 0;
1462 mask_4x4_r = 0;
1463 } else {
1464 mask_16x16_r = mask_16x16 & 0xff;
1465 mask_8x8_r = mask_8x8 & 0xff;
1466 mask_4x4_r = mask_4x4 & 0xff;
1467 }
1468
Yaowu Xuf883b422016-08-30 14:01:10 -07001469#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001470 if (cm->use_highbitdepth) {
1471 highbd_filter_selectively_horiz(
1472 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
1473 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r][0],
1474 (int)cm->bit_depth);
1475 } else {
1476 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
1477 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
1478 &lfm->lfl_y[r][0]);
1479 }
1480#else
1481 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
1482 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
1483 &lfm->lfl_y[r][0]);
Yaowu Xuf883b422016-08-30 14:01:10 -07001484#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001485
1486 dst->buf += MI_SIZE * dst->stride;
1487 mask_16x16 >>= MI_SIZE;
1488 mask_8x8 >>= MI_SIZE;
1489 mask_4x4 >>= MI_SIZE;
1490 mask_4x4_int >>= MI_SIZE;
1491 }
1492}
1493
Yaowu Xuf883b422016-08-30 14:01:10 -07001494void av1_filter_block_plane_ss11(AV1_COMMON *const cm,
1495 struct macroblockd_plane *const plane,
1496 int mi_row, LOOP_FILTER_MASK *lfm) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001497 struct buf_2d *const dst = &plane->dst;
1498 uint8_t *const dst0 = dst->buf;
1499 int r, c;
1500
1501 uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
1502 uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
1503 uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
1504 uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
1505
1506 assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
1507 assert(plane->plane_type == PLANE_TYPE_UV);
1508
1509 // Vertical pass: do 2 rows at one time
1510 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
1511 for (c = 0; c < (cm->mib_size >> 1); c++) {
1512 lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
1513 lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
1514 }
1515
1516 {
1517 unsigned int mask_16x16_l = mask_16x16 & 0xff;
1518 unsigned int mask_8x8_l = mask_8x8 & 0xff;
1519 unsigned int mask_4x4_l = mask_4x4 & 0xff;
1520 unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
1521
1522// Disable filtering on the leftmost column.
Yaowu Xuf883b422016-08-30 14:01:10 -07001523#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001524 if (cm->use_highbitdepth) {
1525 highbd_filter_selectively_vert_row2(
1526 plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
1527 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
1528 &lfm->lfl_uv[r >> 1][0], (int)cm->bit_depth);
1529 } else {
1530 filter_selectively_vert_row2(plane->subsampling_x, dst->buf,
1531 dst->stride, mask_16x16_l, mask_8x8_l,
1532 mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
1533 &lfm->lfl_uv[r >> 1][0]);
1534 }
1535#else
1536 filter_selectively_vert_row2(
1537 plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
1538 mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_uv[r >> 1][0]);
Yaowu Xuf883b422016-08-30 14:01:10 -07001539#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001540
1541 dst->buf += 2 * MI_SIZE * dst->stride;
1542 mask_16x16 >>= MI_SIZE;
1543 mask_8x8 >>= MI_SIZE;
1544 mask_4x4 >>= MI_SIZE;
1545 mask_4x4_int >>= MI_SIZE;
1546 }
1547 }
1548
1549 // Horizontal pass
1550 dst->buf = dst0;
1551 mask_16x16 = lfm->above_uv[TX_16X16];
1552 mask_8x8 = lfm->above_uv[TX_8X8];
1553 mask_4x4 = lfm->above_uv[TX_4X4];
1554 mask_4x4_int = lfm->above_int_4x4_uv;
1555
1556 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
1557 const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
1558 const unsigned int mask_4x4_int_r =
1559 skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
1560 unsigned int mask_16x16_r;
1561 unsigned int mask_8x8_r;
1562 unsigned int mask_4x4_r;
1563
1564 if (mi_row + r == 0) {
1565 mask_16x16_r = 0;
1566 mask_8x8_r = 0;
1567 mask_4x4_r = 0;
1568 } else {
1569 mask_16x16_r = mask_16x16 & 0xf;
1570 mask_8x8_r = mask_8x8 & 0xf;
1571 mask_4x4_r = mask_4x4 & 0xf;
1572 }
1573
Yaowu Xuf883b422016-08-30 14:01:10 -07001574#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001575 if (cm->use_highbitdepth) {
1576 highbd_filter_selectively_horiz(
1577 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
1578 mask_4x4_r, mask_4x4_int_r, &cm->lf_info, &lfm->lfl_uv[r >> 1][0],
1579 (int)cm->bit_depth);
1580 } else {
1581 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
1582 mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
1583 &lfm->lfl_uv[r >> 1][0]);
1584 }
1585#else
1586 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
1587 mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
1588 &lfm->lfl_uv[r >> 1][0]);
Yaowu Xuf883b422016-08-30 14:01:10 -07001589#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001590
1591 dst->buf += MI_SIZE * dst->stride;
1592 mask_16x16 >>= MI_SIZE / 2;
1593 mask_8x8 >>= MI_SIZE / 2;
1594 mask_4x4 >>= MI_SIZE / 2;
1595 mask_4x4_int >>= MI_SIZE / 2;
1596 }
1597}
1598
Yaowu Xuf883b422016-08-30 14:01:10 -07001599void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
1600 struct macroblockd_plane planes[MAX_MB_PLANE],
1601 int start, int stop, int y_only) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001602#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
1603 const int num_planes = y_only ? 1 : MAX_MB_PLANE;
1604 int mi_row, mi_col;
1605
1606#if CONFIG_VAR_TX
1607 memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
1608#endif // CONFIG_VAR_TX
1609 for (mi_row = start; mi_row < stop; mi_row += cm->mib_size) {
1610 MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
1611#if CONFIG_VAR_TX
1612 memset(cm->left_txfm_context, TX_SIZES, MAX_MIB_SIZE);
1613#endif // CONFIG_VAR_TX
1614 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
1615 int plane;
1616
Yaowu Xuf883b422016-08-30 14:01:10 -07001617 av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001618
1619 for (plane = 0; plane < num_planes; ++plane)
Yaowu Xuf883b422016-08-30 14:01:10 -07001620 av1_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row,
1621 mi_col);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001622 }
1623 }
1624#else
1625 const int num_planes = y_only ? 1 : MAX_MB_PLANE;
1626 int mi_row, mi_col;
1627 enum lf_path path;
1628 LOOP_FILTER_MASK lfm;
1629
1630 if (y_only)
1631 path = LF_PATH_444;
1632 else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
1633 path = LF_PATH_420;
1634 else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
1635 path = LF_PATH_444;
1636 else
1637 path = LF_PATH_SLOW;
1638
1639 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
1640 MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
1641 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
1642 int plane;
1643
Yaowu Xuf883b422016-08-30 14:01:10 -07001644 av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001645
1646 // TODO(JBB): Make setup_mask work for non 420.
Yaowu Xuf883b422016-08-30 14:01:10 -07001647 av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001648
Yaowu Xuf883b422016-08-30 14:01:10 -07001649 av1_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001650 for (plane = 1; plane < num_planes; ++plane) {
1651 switch (path) {
1652 case LF_PATH_420:
Yaowu Xuf883b422016-08-30 14:01:10 -07001653 av1_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001654 break;
1655 case LF_PATH_444:
Yaowu Xuf883b422016-08-30 14:01:10 -07001656 av1_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001657 break;
1658 case LF_PATH_SLOW:
Yaowu Xuf883b422016-08-30 14:01:10 -07001659 av1_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
1660 mi_row, mi_col);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001661 break;
1662 }
1663 }
1664 }
1665 }
1666#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
1667}
1668
Yaowu Xuf883b422016-08-30 14:01:10 -07001669void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
1670 MACROBLOCKD *xd, int frame_filter_level, int y_only,
1671 int partial_frame) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001672 int start_mi_row, end_mi_row, mi_rows_to_filter;
1673 if (!frame_filter_level) return;
1674 start_mi_row = 0;
1675 mi_rows_to_filter = cm->mi_rows;
1676 if (partial_frame && cm->mi_rows > 8) {
1677 start_mi_row = cm->mi_rows >> 1;
1678 start_mi_row &= 0xfffffff8;
Yaowu Xuf883b422016-08-30 14:01:10 -07001679 mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001680 }
1681 end_mi_row = start_mi_row + mi_rows_to_filter;
Yaowu Xuf883b422016-08-30 14:01:10 -07001682 av1_loop_filter_frame_init(cm, frame_filter_level);
1683 av1_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001684}
1685
Yaowu Xuf883b422016-08-30 14:01:10 -07001686void av1_loop_filter_data_reset(
Yaowu Xuc27fc142016-08-22 16:08:15 -07001687 LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
Yaowu Xuf883b422016-08-30 14:01:10 -07001688 struct AV1Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001689 lf_data->frame_buffer = frame_buffer;
1690 lf_data->cm = cm;
1691 lf_data->start = 0;
1692 lf_data->stop = 0;
1693 lf_data->y_only = 0;
1694 memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
1695}
1696
Yaowu Xuf883b422016-08-30 14:01:10 -07001697int av1_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001698 (void)unused;
Yaowu Xuf883b422016-08-30 14:01:10 -07001699 av1_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
1700 lf_data->start, lf_data->stop, lf_data->y_only);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001701 return 1;
1702}