blob: 8a6f290a996a7a0cc97dc16769cdfe0877124f5a [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
chiyotsai122e4992022-02-18 12:04:02 -080012#include "aom/aom_image.h"
Tom Finegan60e653d2018-05-22 11:34:58 -070013#include "config/aom_config.h"
Ravi Chaudharye2aa4012018-06-04 14:20:00 +053014#include "config/aom_scale_rtcd.h"
Tom Finegan60e653d2018-05-22 11:34:58 -070015
Yaowu Xuf883b422016-08-30 14:01:10 -070016#include "aom_dsp/aom_dsp_common.h"
17#include "aom_mem/aom_mem.h"
Deepa K G964e72e2018-05-16 16:56:01 +053018#include "av1/common/av1_loopfilter.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070019#include "av1/common/entropymode.h"
20#include "av1/common/thread_common.h"
21#include "av1/common/reconinter.h"
Jayasanker J34ec50a2022-07-01 19:00:02 +053022#include "av1/common/reconintra.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070023
Deepa K G964e72e2018-05-16 16:56:01 +053024// Set up nsync by width.
25static INLINE int get_sync_range(int width) {
26 // nsync numbers are picked by testing. For example, for 4k
27 // video, using 4 gives best performance.
28 if (width < 640)
29 return 1;
30 else if (width <= 1280)
31 return 2;
32 else if (width <= 4096)
33 return 4;
34 else
35 return 8;
36}
37
Ravi Chaudharye2aa4012018-06-04 14:20:00 +053038static INLINE int get_lr_sync_range(int width) {
39#if 0
40 // nsync numbers are picked by testing. For example, for 4k
41 // video, using 4 gives best performance.
42 if (width < 640)
43 return 1;
44 else if (width <= 1280)
45 return 2;
46 else if (width <= 4096)
47 return 4;
48 else
49 return 8;
50#else
51 (void)width;
52 return 1;
53#endif
54}
55
Deepa K G964e72e2018-05-16 16:56:01 +053056// Allocate memory for lf row synchronization
Nithya V S709ccb32021-08-18 11:05:46 +053057void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
58 int width, int num_workers) {
Deepa K G964e72e2018-05-16 16:56:01 +053059 lf_sync->rows = rows;
Mudassir Galaganathaf905f12023-09-01 14:56:15 +053060 lf_sync->lf_mt_exit = false;
Deepa K G964e72e2018-05-16 16:56:01 +053061#if CONFIG_MULTITHREAD
62 {
63 int i, j;
64
65 for (j = 0; j < MAX_MB_PLANE; j++) {
66 CHECK_MEM_ERROR(cm, lf_sync->mutex_[j],
67 aom_malloc(sizeof(*(lf_sync->mutex_[j])) * rows));
68 if (lf_sync->mutex_[j]) {
69 for (i = 0; i < rows; ++i) {
70 pthread_mutex_init(&lf_sync->mutex_[j][i], NULL);
71 }
72 }
73
74 CHECK_MEM_ERROR(cm, lf_sync->cond_[j],
75 aom_malloc(sizeof(*(lf_sync->cond_[j])) * rows));
76 if (lf_sync->cond_[j]) {
77 for (i = 0; i < rows; ++i) {
78 pthread_cond_init(&lf_sync->cond_[j][i], NULL);
79 }
80 }
81 }
82
83 CHECK_MEM_ERROR(cm, lf_sync->job_mutex,
84 aom_malloc(sizeof(*(lf_sync->job_mutex))));
85 if (lf_sync->job_mutex) {
86 pthread_mutex_init(lf_sync->job_mutex, NULL);
87 }
88 }
89#endif // CONFIG_MULTITHREAD
90 CHECK_MEM_ERROR(cm, lf_sync->lfdata,
91 aom_malloc(num_workers * sizeof(*(lf_sync->lfdata))));
92 lf_sync->num_workers = num_workers;
93
94 for (int j = 0; j < MAX_MB_PLANE; j++) {
95 CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col[j],
96 aom_malloc(sizeof(*(lf_sync->cur_sb_col[j])) * rows));
97 }
98 CHECK_MEM_ERROR(
99 cm, lf_sync->job_queue,
100 aom_malloc(sizeof(*(lf_sync->job_queue)) * rows * MAX_MB_PLANE * 2));
101 // Set up nsync.
102 lf_sync->sync_range = get_sync_range(width);
103}
104
Yaowu Xuc27fc142016-08-22 16:08:15 -0700105// Deallocate lf synchronization related mutex and data
Yaowu Xuf883b422016-08-30 14:01:10 -0700106void av1_loop_filter_dealloc(AV1LfSync *lf_sync) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700107 if (lf_sync != NULL) {
Deepa K G964e72e2018-05-16 16:56:01 +0530108 int j;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700109#if CONFIG_MULTITHREAD
110 int i;
Deepa K G964e72e2018-05-16 16:56:01 +0530111 for (j = 0; j < MAX_MB_PLANE; j++) {
112 if (lf_sync->mutex_[j] != NULL) {
113 for (i = 0; i < lf_sync->rows; ++i) {
114 pthread_mutex_destroy(&lf_sync->mutex_[j][i]);
115 }
116 aom_free(lf_sync->mutex_[j]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700117 }
Deepa K G964e72e2018-05-16 16:56:01 +0530118 if (lf_sync->cond_[j] != NULL) {
119 for (i = 0; i < lf_sync->rows; ++i) {
120 pthread_cond_destroy(&lf_sync->cond_[j][i]);
121 }
122 aom_free(lf_sync->cond_[j]);
123 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700124 }
Deepa K G964e72e2018-05-16 16:56:01 +0530125 if (lf_sync->job_mutex != NULL) {
126 pthread_mutex_destroy(lf_sync->job_mutex);
127 aom_free(lf_sync->job_mutex);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700128 }
129#endif // CONFIG_MULTITHREAD
Yaowu Xuf883b422016-08-30 14:01:10 -0700130 aom_free(lf_sync->lfdata);
Deepa K G964e72e2018-05-16 16:56:01 +0530131 for (j = 0; j < MAX_MB_PLANE; j++) {
132 aom_free(lf_sync->cur_sb_col[j]);
133 }
134
135 aom_free(lf_sync->job_queue);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700136 // clear the structure as the source of this call may be a resize in which
137 // case this call will be followed by an _alloc() which may fail.
Yaowu Xuf883b422016-08-30 14:01:10 -0700138 av1_zero(*lf_sync);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700139 }
140}
141
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +0530142void av1_alloc_cdef_sync(AV1_COMMON *const cm, AV1CdefSync *cdef_sync,
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +0530143 int num_workers) {
144 if (num_workers < 1) return;
145#if CONFIG_MULTITHREAD
146 if (cdef_sync->mutex_ == NULL) {
147 CHECK_MEM_ERROR(cm, cdef_sync->mutex_,
148 aom_malloc(sizeof(*(cdef_sync->mutex_))));
149 if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
150 }
151#else
152 (void)cm;
153 (void)cdef_sync;
154#endif // CONFIG_MULTITHREAD
155}
156
157void av1_free_cdef_sync(AV1CdefSync *cdef_sync) {
158 if (cdef_sync == NULL) return;
159#if CONFIG_MULTITHREAD
160 if (cdef_sync->mutex_ != NULL) {
161 pthread_mutex_destroy(cdef_sync->mutex_);
162 aom_free(cdef_sync->mutex_);
163 }
164#endif // CONFIG_MULTITHREAD
165}
166
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +0530167static INLINE void cdef_row_mt_sync_read(AV1CdefSync *const cdef_sync,
168 int row) {
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +0530169 if (!row) return;
170#if CONFIG_MULTITHREAD
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +0530171 AV1CdefRowSync *const cdef_row_mt = cdef_sync->cdef_row_mt;
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +0530172 pthread_mutex_lock(cdef_row_mt[row - 1].row_mutex_);
173 while (cdef_row_mt[row - 1].is_row_done != 1)
174 pthread_cond_wait(cdef_row_mt[row - 1].row_cond_,
175 cdef_row_mt[row - 1].row_mutex_);
176 cdef_row_mt[row - 1].is_row_done = 0;
177 pthread_mutex_unlock(cdef_row_mt[row - 1].row_mutex_);
178#else
179 (void)cdef_sync;
180#endif // CONFIG_MULTITHREAD
181}
182
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +0530183static INLINE void cdef_row_mt_sync_write(AV1CdefSync *const cdef_sync,
184 int row) {
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +0530185#if CONFIG_MULTITHREAD
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +0530186 AV1CdefRowSync *const cdef_row_mt = cdef_sync->cdef_row_mt;
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +0530187 pthread_mutex_lock(cdef_row_mt[row].row_mutex_);
188 pthread_cond_signal(cdef_row_mt[row].row_cond_);
189 cdef_row_mt[row].is_row_done = 1;
190 pthread_mutex_unlock(cdef_row_mt[row].row_mutex_);
191#else
192 (void)cdef_sync;
193 (void)row;
194#endif // CONFIG_MULTITHREAD
195}
196
Deepa K G964e72e2018-05-16 16:56:01 +0530197static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c,
198 int plane) {
199#if CONFIG_MULTITHREAD
200 const int nsync = lf_sync->sync_range;
201
202 if (r && !(c & (nsync - 1))) {
203 pthread_mutex_t *const mutex = &lf_sync->mutex_[plane][r - 1];
204 pthread_mutex_lock(mutex);
205
206 while (c > lf_sync->cur_sb_col[plane][r - 1] - nsync) {
207 pthread_cond_wait(&lf_sync->cond_[plane][r - 1], mutex);
208 }
209 pthread_mutex_unlock(mutex);
210 }
211#else
212 (void)lf_sync;
213 (void)r;
214 (void)c;
215 (void)plane;
216#endif // CONFIG_MULTITHREAD
217}
218
219static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
220 const int sb_cols, int plane) {
221#if CONFIG_MULTITHREAD
222 const int nsync = lf_sync->sync_range;
223 int cur;
224 // Only signal when there are enough filtered SB for next row to run.
225 int sig = 1;
226
227 if (c < sb_cols - 1) {
228 cur = c;
229 if (c % nsync) sig = 0;
230 } else {
231 cur = sb_cols + nsync;
232 }
233
234 if (sig) {
235 pthread_mutex_lock(&lf_sync->mutex_[plane][r]);
236
237 lf_sync->cur_sb_col[plane][r] = cur;
238
239 pthread_cond_broadcast(&lf_sync->cond_[plane][r]);
240 pthread_mutex_unlock(&lf_sync->mutex_[plane][r]);
241 }
242#else
243 (void)lf_sync;
244 (void)r;
245 (void)c;
246 (void)sb_cols;
247 (void)plane;
248#endif // CONFIG_MULTITHREAD
249}
250
Yannis Guyon27bdea52021-07-21 10:41:02 +0200251// One job of row loopfiltering.
Deepa K G40153842022-09-12 14:48:14 +0530252void av1_thread_loop_filter_rows(
Deepa K G964e72e2018-05-16 16:56:01 +0530253 const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
Yannis Guyon27bdea52021-07-21 10:41:02 +0200254 struct macroblockd_plane *planes, MACROBLOCKD *xd, int mi_row, int plane,
Deepa K G3be00072022-04-12 22:10:54 +0530255 int dir, int lpf_opt_level, AV1LfSync *const lf_sync,
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530256 struct aom_internal_error_info *error_info,
Deepa K Ge48a6d12022-10-24 23:33:51 +0530257 AV1_DEBLOCKING_PARAMETERS *params_buf, TX_SIZE *tx_buf,
258 int num_mis_in_lpf_unit_height_log2) {
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530259 // TODO(aomedia:3276): Pass error_info to the low-level functions as required
260 // in future to handle error propagation.
261 (void)error_info;
Deepa K G964e72e2018-05-16 16:56:01 +0530262 const int sb_cols =
Mudassir Galagnath77f31602022-04-19 16:28:25 +0530263 CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, MAX_MIB_SIZE_LOG2);
Deepa K Ge48a6d12022-10-24 23:33:51 +0530264 const int r = mi_row >> num_mis_in_lpf_unit_height_log2;
Yannis Guyon27bdea52021-07-21 10:41:02 +0200265 int mi_col, c;
Deepa K G964e72e2018-05-16 16:56:01 +0530266
Deepa K G3be00072022-04-12 22:10:54 +0530267 const bool joint_filter_chroma = (lpf_opt_level == 2) && plane > AOM_PLANE_Y;
chiyotsai122e4992022-02-18 12:04:02 -0800268 const int num_planes = joint_filter_chroma ? 2 : 1;
chiyotsai122e4992022-02-18 12:04:02 -0800269 assert(IMPLIES(joint_filter_chroma, plane == AOM_PLANE_U));
chiyotsai122e4992022-02-18 12:04:02 -0800270
Yannis Guyon27bdea52021-07-21 10:41:02 +0200271 if (dir == 0) {
272 for (mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += MAX_MIB_SIZE) {
273 c = mi_col >> MAX_MIB_SIZE_LOG2;
Deepa K G964e72e2018-05-16 16:56:01 +0530274
Yannis Guyon27bdea52021-07-21 10:41:02 +0200275 av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer,
chiyotsai122e4992022-02-18 12:04:02 -0800276 mi_row, mi_col, plane, plane + num_planes);
Deepa K G3be00072022-04-12 22:10:54 +0530277 if (lpf_opt_level) {
chiyotsai095582c2022-02-08 12:30:42 -0800278 if (plane == AOM_PLANE_Y) {
Deepa K G3be00072022-04-12 22:10:54 +0530279 av1_filter_block_plane_vert_opt(cm, xd, &planes[plane], mi_row,
Deepa K G40153842022-09-12 14:48:14 +0530280 mi_col, params_buf, tx_buf,
Deepa K Ge48a6d12022-10-24 23:33:51 +0530281 num_mis_in_lpf_unit_height_log2);
chiyotsai095582c2022-02-08 12:30:42 -0800282 } else {
Deepa K G40153842022-09-12 14:48:14 +0530283 av1_filter_block_plane_vert_opt_chroma(
284 cm, xd, &planes[plane], mi_row, mi_col, params_buf, tx_buf, plane,
Deepa K Ge48a6d12022-10-24 23:33:51 +0530285 joint_filter_chroma, num_mis_in_lpf_unit_height_log2);
chiyotsai095582c2022-02-08 12:30:42 -0800286 }
Yannis Guyon27bdea52021-07-21 10:41:02 +0200287 } else {
288 av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row,
chiyotsai095582c2022-02-08 12:30:42 -0800289 mi_col);
Deepa K G964e72e2018-05-16 16:56:01 +0530290 }
chiyotsai122e4992022-02-18 12:04:02 -0800291 if (lf_sync != NULL) {
292 sync_write(lf_sync, r, c, sb_cols, plane);
293 }
Yannis Guyon27bdea52021-07-21 10:41:02 +0200294 }
295 } else if (dir == 1) {
296 for (mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += MAX_MIB_SIZE) {
297 c = mi_col >> MAX_MIB_SIZE_LOG2;
298
299 if (lf_sync != NULL) {
300 // Wait for vertical edge filtering of the top-right block to be
301 // completed
302 sync_read(lf_sync, r, c, plane);
303
304 // Wait for vertical edge filtering of the right block to be completed
305 sync_read(lf_sync, r + 1, c, plane);
306 }
307
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530308#if CONFIG_MULTITHREAD
309 if (lf_sync && lf_sync->num_workers > 1) {
310 pthread_mutex_lock(lf_sync->job_mutex);
311 const bool lf_mt_exit = lf_sync->lf_mt_exit;
312 pthread_mutex_unlock(lf_sync->job_mutex);
313 // Exit in case any worker has encountered an error.
314 if (lf_mt_exit) return;
315 }
316#endif
317
Yannis Guyon27bdea52021-07-21 10:41:02 +0200318 av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer,
chiyotsai122e4992022-02-18 12:04:02 -0800319 mi_row, mi_col, plane, plane + num_planes);
Deepa K G3be00072022-04-12 22:10:54 +0530320 if (lpf_opt_level) {
chiyotsai095582c2022-02-08 12:30:42 -0800321 if (plane == AOM_PLANE_Y) {
Deepa K G3be00072022-04-12 22:10:54 +0530322 av1_filter_block_plane_horz_opt(cm, xd, &planes[plane], mi_row,
Deepa K G40153842022-09-12 14:48:14 +0530323 mi_col, params_buf, tx_buf,
Deepa K Ge48a6d12022-10-24 23:33:51 +0530324 num_mis_in_lpf_unit_height_log2);
chiyotsai095582c2022-02-08 12:30:42 -0800325 } else {
Deepa K G40153842022-09-12 14:48:14 +0530326 av1_filter_block_plane_horz_opt_chroma(
327 cm, xd, &planes[plane], mi_row, mi_col, params_buf, tx_buf, plane,
Deepa K Ge48a6d12022-10-24 23:33:51 +0530328 joint_filter_chroma, num_mis_in_lpf_unit_height_log2);
chiyotsai095582c2022-02-08 12:30:42 -0800329 }
Yannis Guyon27bdea52021-07-21 10:41:02 +0200330 } else {
331 av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row,
chiyotsai095582c2022-02-08 12:30:42 -0800332 mi_col);
Yannis Guyon27bdea52021-07-21 10:41:02 +0200333 }
Deepa K G964e72e2018-05-16 16:56:01 +0530334 }
335 }
336}
337
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530338void av1_set_vert_loop_filter_done(AV1_COMMON *cm, AV1LfSync *lf_sync,
339 int num_mis_in_lpf_unit_height_log2) {
340 int plane, sb_row;
341 const int sb_cols =
342 CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, num_mis_in_lpf_unit_height_log2);
343 const int sb_rows =
344 CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, num_mis_in_lpf_unit_height_log2);
345
346 // In case of loopfilter row-multithreading, the worker on an SB row waits for
347 // the vertical edge filtering of the right and top-right SBs. Hence, in case
348 // a thread (main/worker) encounters an error, update that vertical
349 // loopfiltering of every SB row in the frame is complete in order to avoid
350 // dependent workers waiting indefinitely.
351 for (sb_row = 0; sb_row < sb_rows; ++sb_row)
352 for (plane = 0; plane < MAX_MB_PLANE; ++plane)
353 sync_write(lf_sync, sb_row, sb_cols - 1, sb_cols, plane);
354}
355
356static AOM_INLINE void sync_lf_workers(AVxWorker *const workers,
357 AV1_COMMON *const cm, int num_workers) {
358 const AVxWorkerInterface *const winterface = aom_get_worker_interface();
359 int had_error = workers[0].had_error;
360 struct aom_internal_error_info error_info;
361
362 // Read the error_info of main thread.
363 if (had_error) {
364 AVxWorker *const worker = &workers[0];
365 error_info = ((LFWorkerData *)worker->data2)->error_info;
366 }
367
368 // Wait till all rows are finished.
369 for (int i = num_workers - 1; i > 0; --i) {
370 AVxWorker *const worker = &workers[i];
371 if (!winterface->sync(worker)) {
372 had_error = 1;
373 error_info = ((LFWorkerData *)worker->data2)->error_info;
374 }
375 }
376 if (had_error)
377 aom_internal_error(cm->error, error_info.error_code, "%s",
378 error_info.detail);
379}
380
Deepa K G964e72e2018-05-16 16:56:01 +0530381// Row-based multi-threaded loopfilter hook
Wan-Teh Chang8d2f5772018-09-12 15:44:59 -0700382static int loop_filter_row_worker(void *arg1, void *arg2) {
383 AV1LfSync *const lf_sync = (AV1LfSync *)arg1;
384 LFWorkerData *const lf_data = (LFWorkerData *)arg2;
Yannis Guyon27bdea52021-07-21 10:41:02 +0200385 AV1LfMTInfo *cur_job_info;
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530386
387#if CONFIG_MULTITHREAD
388 pthread_mutex_t *job_mutex_ = lf_sync->job_mutex;
389#endif
390
391 struct aom_internal_error_info *const error_info = &lf_data->error_info;
392
393 // The jmp_buf is valid only for the duration of the function that calls
394 // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
395 // before it returns.
396 if (setjmp(error_info->jmp)) {
397 error_info->setjmp = 0;
398#if CONFIG_MULTITHREAD
399 pthread_mutex_lock(job_mutex_);
400 lf_sync->lf_mt_exit = true;
401 pthread_mutex_unlock(job_mutex_);
402#endif
403 av1_set_vert_loop_filter_done(lf_data->cm, lf_sync, MAX_MIB_SIZE_LOG2);
404 return 0;
405 }
406 error_info->setjmp = 1;
407
Yannis Guyon27bdea52021-07-21 10:41:02 +0200408 while ((cur_job_info = get_lf_job_info(lf_sync)) != NULL) {
Deepa K G3be00072022-04-12 22:10:54 +0530409 const int lpf_opt_level = cur_job_info->lpf_opt_level;
Deepa K G40153842022-09-12 14:48:14 +0530410 av1_thread_loop_filter_rows(
Deepa K G3be00072022-04-12 22:10:54 +0530411 lf_data->frame_buffer, lf_data->cm, lf_data->planes, lf_data->xd,
412 cur_job_info->mi_row, cur_job_info->plane, cur_job_info->dir,
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530413 lpf_opt_level, lf_sync, error_info, lf_data->params_buf,
414 lf_data->tx_buf, MAX_MIB_SIZE_LOG2);
Yannis Guyon27bdea52021-07-21 10:41:02 +0200415 }
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530416 error_info->setjmp = 0;
Deepa K G964e72e2018-05-16 16:56:01 +0530417 return 1;
418}
419
420static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
421 MACROBLOCKD *xd, int start, int stop,
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530422 const int planes_to_lf[MAX_MB_PLANE],
423 AVxWorker *workers, int num_workers,
424 AV1LfSync *lf_sync, int lpf_opt_level) {
Deepa K G964e72e2018-05-16 16:56:01 +0530425 const AVxWorkerInterface *const winterface = aom_get_worker_interface();
Deepa K G964e72e2018-05-16 16:56:01 +0530426 int i;
Deepa K G40153842022-09-12 14:48:14 +0530427 loop_filter_frame_mt_init(cm, start, stop, planes_to_lf, num_workers, lf_sync,
428 lpf_opt_level, MAX_MIB_SIZE_LOG2);
Deepa K G964e72e2018-05-16 16:56:01 +0530429
430 // Set up loopfilter thread data.
Deepa K G57867742021-01-19 12:52:41 -0800431 for (i = num_workers - 1; i >= 0; --i) {
Deepa K G964e72e2018-05-16 16:56:01 +0530432 AVxWorker *const worker = &workers[i];
433 LFWorkerData *const lf_data = &lf_sync->lfdata[i];
434
Wan-Teh Chang8d2f5772018-09-12 15:44:59 -0700435 worker->hook = loop_filter_row_worker;
Deepa K G964e72e2018-05-16 16:56:01 +0530436 worker->data1 = lf_sync;
437 worker->data2 = lf_data;
438
439 // Loopfilter data
440 loop_filter_data_reset(lf_data, frame, cm, xd);
441
442 // Start loopfiltering
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530443 worker->had_error = 0;
Deepa K G57867742021-01-19 12:52:41 -0800444 if (i == 0) {
Deepa K G964e72e2018-05-16 16:56:01 +0530445 winterface->execute(worker);
446 } else {
447 winterface->launch(worker);
448 }
449 }
450
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530451 sync_lf_workers(workers, cm, num_workers);
Deepa K G964e72e2018-05-16 16:56:01 +0530452}
453
Yannis Guyon27bdea52021-07-21 10:41:02 +0200454static void loop_filter_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
455 MACROBLOCKD *xd, int start, int stop,
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530456 const int planes_to_lf[MAX_MB_PLANE],
457 int lpf_opt_level) {
Yannis Guyon27bdea52021-07-21 10:41:02 +0200458 // Filter top rows of all planes first, in case the output can be partially
459 // reconstructed row by row.
460 int mi_row, plane, dir;
chiyotsai095582c2022-02-08 12:30:42 -0800461
462 AV1_DEBLOCKING_PARAMETERS params_buf[MAX_MIB_SIZE];
463 TX_SIZE tx_buf[MAX_MIB_SIZE];
Yannis Guyon27bdea52021-07-21 10:41:02 +0200464 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530465 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
Deepa K G3be00072022-04-12 22:10:54 +0530466 if (skip_loop_filter_plane(planes_to_lf, plane, lpf_opt_level)) {
chiyotsai122e4992022-02-18 12:04:02 -0800467 continue;
468 }
469
Yannis Guyon27bdea52021-07-21 10:41:02 +0200470 for (dir = 0; dir < 2; ++dir) {
Deepa K G40153842022-09-12 14:48:14 +0530471 av1_thread_loop_filter_rows(frame, cm, xd->plane, xd, mi_row, plane,
472 dir, lpf_opt_level, /*lf_sync=*/NULL,
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530473 xd->error_info, params_buf, tx_buf,
474 MAX_MIB_SIZE_LOG2);
Yannis Guyon27bdea52021-07-21 10:41:02 +0200475 }
476 }
477 }
478}
479
Deepa K G964e72e2018-05-16 16:56:01 +0530480void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
481 MACROBLOCKD *xd, int plane_start, int plane_end,
Yannis Guyonee564d22021-06-15 13:25:50 +0200482 int partial_frame, AVxWorker *workers,
483 int num_workers, AV1LfSync *lf_sync,
Deepa K G3be00072022-04-12 22:10:54 +0530484 int lpf_opt_level) {
Deepa K G964e72e2018-05-16 16:56:01 +0530485 int start_mi_row, end_mi_row, mi_rows_to_filter;
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530486 int planes_to_lf[MAX_MB_PLANE];
Yannis Guyon27bdea52021-07-21 10:41:02 +0200487
Deepa K G40153842022-09-12 14:48:14 +0530488 if (!check_planes_to_loop_filter(&cm->lf, planes_to_lf, plane_start,
489 plane_end))
490 return;
Deepa K G964e72e2018-05-16 16:56:01 +0530491
492 start_mi_row = 0;
Urvang Joshi9dc909d2020-03-23 16:07:02 -0700493 mi_rows_to_filter = cm->mi_params.mi_rows;
494 if (partial_frame && cm->mi_params.mi_rows > 8) {
495 start_mi_row = cm->mi_params.mi_rows >> 1;
Deepa K G964e72e2018-05-16 16:56:01 +0530496 start_mi_row &= 0xfffffff8;
Urvang Joshi9dc909d2020-03-23 16:07:02 -0700497 mi_rows_to_filter = AOMMAX(cm->mi_params.mi_rows / 8, 8);
Deepa K G964e72e2018-05-16 16:56:01 +0530498 }
499 end_mi_row = start_mi_row + mi_rows_to_filter;
500 av1_loop_filter_frame_init(cm, plane_start, plane_end);
Yannis Guyon27bdea52021-07-21 10:41:02 +0200501
502 if (num_workers > 1) {
503 // Enqueue and execute loopfiltering jobs.
504 loop_filter_rows_mt(frame, cm, xd, start_mi_row, end_mi_row, planes_to_lf,
Deepa K G3be00072022-04-12 22:10:54 +0530505 workers, num_workers, lf_sync, lpf_opt_level);
Yannis Guyon27bdea52021-07-21 10:41:02 +0200506 } else {
507 // Directly filter in the main thread.
508 loop_filter_rows(frame, cm, xd, start_mi_row, end_mi_row, planes_to_lf,
Deepa K G3be00072022-04-12 22:10:54 +0530509 lpf_opt_level);
Yannis Guyon27bdea52021-07-21 10:41:02 +0200510 }
Deepa K G964e72e2018-05-16 16:56:01 +0530511}
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530512
513static INLINE void lr_sync_read(void *const lr_sync, int r, int c, int plane) {
514#if CONFIG_MULTITHREAD
515 AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync;
516 const int nsync = loop_res_sync->sync_range;
517
518 if (r && !(c & (nsync - 1))) {
519 pthread_mutex_t *const mutex = &loop_res_sync->mutex_[plane][r - 1];
520 pthread_mutex_lock(mutex);
521
522 while (c > loop_res_sync->cur_sb_col[plane][r - 1] - nsync) {
523 pthread_cond_wait(&loop_res_sync->cond_[plane][r - 1], mutex);
524 }
525 pthread_mutex_unlock(mutex);
526 }
527#else
528 (void)lr_sync;
529 (void)r;
530 (void)c;
531 (void)plane;
532#endif // CONFIG_MULTITHREAD
533}
534
535static INLINE void lr_sync_write(void *const lr_sync, int r, int c,
536 const int sb_cols, int plane) {
537#if CONFIG_MULTITHREAD
538 AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync;
539 const int nsync = loop_res_sync->sync_range;
540 int cur;
541 // Only signal when there are enough filtered SB for next row to run.
542 int sig = 1;
543
544 if (c < sb_cols - 1) {
545 cur = c;
546 if (c % nsync) sig = 0;
547 } else {
548 cur = sb_cols + nsync;
549 }
550
551 if (sig) {
552 pthread_mutex_lock(&loop_res_sync->mutex_[plane][r]);
553
554 loop_res_sync->cur_sb_col[plane][r] = cur;
555
556 pthread_cond_broadcast(&loop_res_sync->cond_[plane][r]);
557 pthread_mutex_unlock(&loop_res_sync->mutex_[plane][r]);
558 }
559#else
560 (void)lr_sync;
561 (void)r;
562 (void)c;
563 (void)sb_cols;
564 (void)plane;
565#endif // CONFIG_MULTITHREAD
566}
567
568// Allocate memory for loop restoration row synchronization
Nithya V S709ccb32021-08-18 11:05:46 +0530569void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
570 int num_workers, int num_rows_lr,
571 int num_planes, int width) {
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530572 lr_sync->rows = num_rows_lr;
573 lr_sync->num_planes = num_planes;
574#if CONFIG_MULTITHREAD
575 {
576 int i, j;
577
578 for (j = 0; j < num_planes; j++) {
579 CHECK_MEM_ERROR(cm, lr_sync->mutex_[j],
580 aom_malloc(sizeof(*(lr_sync->mutex_[j])) * num_rows_lr));
581 if (lr_sync->mutex_[j]) {
582 for (i = 0; i < num_rows_lr; ++i) {
583 pthread_mutex_init(&lr_sync->mutex_[j][i], NULL);
584 }
585 }
586
587 CHECK_MEM_ERROR(cm, lr_sync->cond_[j],
588 aom_malloc(sizeof(*(lr_sync->cond_[j])) * num_rows_lr));
589 if (lr_sync->cond_[j]) {
590 for (i = 0; i < num_rows_lr; ++i) {
591 pthread_cond_init(&lr_sync->cond_[j][i], NULL);
592 }
593 }
594 }
595
596 CHECK_MEM_ERROR(cm, lr_sync->job_mutex,
597 aom_malloc(sizeof(*(lr_sync->job_mutex))));
598 if (lr_sync->job_mutex) {
599 pthread_mutex_init(lr_sync->job_mutex, NULL);
600 }
601 }
602#endif // CONFIG_MULTITHREAD
603 CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata,
604 aom_malloc(num_workers * sizeof(*(lr_sync->lrworkerdata))));
605
606 for (int worker_idx = 0; worker_idx < num_workers; ++worker_idx) {
607 if (worker_idx < num_workers - 1) {
608 CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata[worker_idx].rst_tmpbuf,
609 (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
610 CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata[worker_idx].rlbs,
611 aom_malloc(sizeof(RestorationLineBuffers)));
612
613 } else {
614 lr_sync->lrworkerdata[worker_idx].rst_tmpbuf = cm->rst_tmpbuf;
615 lr_sync->lrworkerdata[worker_idx].rlbs = cm->rlbs;
616 }
617 }
618
619 lr_sync->num_workers = num_workers;
Mudassir Galaganath7bdc7762023-10-04 19:12:11 +0530620 lr_sync->lr_mt_exit = false;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530621
622 for (int j = 0; j < num_planes; j++) {
623 CHECK_MEM_ERROR(
624 cm, lr_sync->cur_sb_col[j],
625 aom_malloc(sizeof(*(lr_sync->cur_sb_col[j])) * num_rows_lr));
626 }
627 CHECK_MEM_ERROR(
628 cm, lr_sync->job_queue,
629 aom_malloc(sizeof(*(lr_sync->job_queue)) * num_rows_lr * num_planes));
630 // Set up nsync.
631 lr_sync->sync_range = get_lr_sync_range(width);
632}
633
634// Deallocate loop restoration synchronization related mutex and data
Cheng Chen28b4f282023-10-04 22:34:33 -0700635void av1_loop_restoration_dealloc(AV1LrSync *lr_sync) {
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530636 if (lr_sync != NULL) {
637 int j;
638#if CONFIG_MULTITHREAD
639 int i;
640 for (j = 0; j < MAX_MB_PLANE; j++) {
641 if (lr_sync->mutex_[j] != NULL) {
642 for (i = 0; i < lr_sync->rows; ++i) {
643 pthread_mutex_destroy(&lr_sync->mutex_[j][i]);
644 }
645 aom_free(lr_sync->mutex_[j]);
646 }
647 if (lr_sync->cond_[j] != NULL) {
648 for (i = 0; i < lr_sync->rows; ++i) {
649 pthread_cond_destroy(&lr_sync->cond_[j][i]);
650 }
651 aom_free(lr_sync->cond_[j]);
652 }
653 }
654 if (lr_sync->job_mutex != NULL) {
655 pthread_mutex_destroy(lr_sync->job_mutex);
656 aom_free(lr_sync->job_mutex);
657 }
658#endif // CONFIG_MULTITHREAD
659 for (j = 0; j < MAX_MB_PLANE; j++) {
660 aom_free(lr_sync->cur_sb_col[j]);
661 }
662
663 aom_free(lr_sync->job_queue);
664
665 if (lr_sync->lrworkerdata) {
Cheng Chen28b4f282023-10-04 22:34:33 -0700666 for (int worker_idx = 0; worker_idx < lr_sync->num_workers - 1;
667 worker_idx++) {
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530668 LRWorkerData *const workerdata_data =
669 lr_sync->lrworkerdata + worker_idx;
670
671 aom_free(workerdata_data->rst_tmpbuf);
672 aom_free(workerdata_data->rlbs);
673 }
674 aom_free(lr_sync->lrworkerdata);
675 }
676
677 // clear the structure as the source of this call may be a resize in which
678 // case this call will be followed by an _alloc() which may fail.
679 av1_zero(*lr_sync);
680 }
681}
682
683static void enqueue_lr_jobs(AV1LrSync *lr_sync, AV1LrStruct *lr_ctxt,
684 AV1_COMMON *cm) {
685 FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
686
687 const int num_planes = av1_num_planes(cm);
688 AV1LrMTInfo *lr_job_queue = lr_sync->job_queue;
Ravi Chaudhary686f02c2018-06-07 12:39:44 +0530689 int32_t lr_job_counter[2], num_even_lr_jobs = 0;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530690 lr_sync->jobs_enqueued = 0;
691 lr_sync->jobs_dequeued = 0;
692
693 for (int plane = 0; plane < num_planes; plane++) {
694 if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
Ravi Chaudhary686f02c2018-06-07 12:39:44 +0530695 num_even_lr_jobs =
Rachel Barker0483cbe2023-07-05 23:31:38 +0000696 num_even_lr_jobs + ((ctxt[plane].rsi->vert_units + 1) >> 1);
Ravi Chaudhary686f02c2018-06-07 12:39:44 +0530697 }
698 lr_job_counter[0] = 0;
699 lr_job_counter[1] = num_even_lr_jobs;
700
701 for (int plane = 0; plane < num_planes; plane++) {
702 if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530703 const int is_uv = plane > 0;
Tarundeep Singh4243e622021-04-20 16:10:22 +0530704 const int ss_y = is_uv && cm->seq_params->subsampling_y;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530705 const int unit_size = ctxt[plane].rsi->restoration_unit_size;
Rachel Barkered54a862023-07-07 19:04:55 +0000706 const int plane_h = ctxt[plane].plane_h;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530707 const int ext_size = unit_size * 3 / 2;
708
709 int y0 = 0, i = 0;
Rachel Barker0483cbe2023-07-05 23:31:38 +0000710 while (y0 < plane_h) {
711 int remaining_h = plane_h - y0;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530712 int h = (remaining_h < ext_size) ? remaining_h : unit_size;
713
714 RestorationTileLimits limits;
Rachel Barkered54a862023-07-07 19:04:55 +0000715 limits.v_start = y0;
716 limits.v_end = y0 + h;
717 assert(limits.v_end <= plane_h);
Rachel Barker0483cbe2023-07-05 23:31:38 +0000718 // Offset upwards to align with the restoration processing stripe
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530719 const int voffset = RESTORATION_UNIT_OFFSET >> ss_y;
Rachel Barkered54a862023-07-07 19:04:55 +0000720 limits.v_start = AOMMAX(0, limits.v_start - voffset);
721 if (limits.v_end < plane_h) limits.v_end -= voffset;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530722
Ravi Chaudhary686f02c2018-06-07 12:39:44 +0530723 assert(lr_job_counter[0] <= num_even_lr_jobs);
724
725 lr_job_queue[lr_job_counter[i & 1]].lr_unit_row = i;
726 lr_job_queue[lr_job_counter[i & 1]].plane = plane;
727 lr_job_queue[lr_job_counter[i & 1]].v_start = limits.v_start;
728 lr_job_queue[lr_job_counter[i & 1]].v_end = limits.v_end;
729 lr_job_queue[lr_job_counter[i & 1]].sync_mode = i & 1;
730 if ((i & 1) == 0) {
731 lr_job_queue[lr_job_counter[i & 1]].v_copy_start =
732 limits.v_start + RESTORATION_BORDER;
733 lr_job_queue[lr_job_counter[i & 1]].v_copy_end =
734 limits.v_end - RESTORATION_BORDER;
735 if (i == 0) {
Rachel Barkered54a862023-07-07 19:04:55 +0000736 assert(limits.v_start == 0);
737 lr_job_queue[lr_job_counter[i & 1]].v_copy_start = 0;
Ravi Chaudhary686f02c2018-06-07 12:39:44 +0530738 }
Rachel Barker0483cbe2023-07-05 23:31:38 +0000739 if (i == (ctxt[plane].rsi->vert_units - 1)) {
Rachel Barkered54a862023-07-07 19:04:55 +0000740 assert(limits.v_end == plane_h);
741 lr_job_queue[lr_job_counter[i & 1]].v_copy_end = plane_h;
Ravi Chaudhary686f02c2018-06-07 12:39:44 +0530742 }
743 } else {
744 lr_job_queue[lr_job_counter[i & 1]].v_copy_start =
Rachel Barkered54a862023-07-07 19:04:55 +0000745 AOMMAX(limits.v_start - RESTORATION_BORDER, 0);
Ravi Chaudhary686f02c2018-06-07 12:39:44 +0530746 lr_job_queue[lr_job_counter[i & 1]].v_copy_end =
Rachel Barkered54a862023-07-07 19:04:55 +0000747 AOMMIN(limits.v_end + RESTORATION_BORDER, plane_h);
Ravi Chaudhary686f02c2018-06-07 12:39:44 +0530748 }
749 lr_job_counter[i & 1]++;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530750 lr_sync->jobs_enqueued++;
751
752 y0 += h;
753 ++i;
754 }
755 }
756}
757
Sarah Parker427e3b12018-10-12 12:28:44 -0700758static AV1LrMTInfo *get_lr_job_info(AV1LrSync *lr_sync) {
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530759 AV1LrMTInfo *cur_job_info = NULL;
760
761#if CONFIG_MULTITHREAD
762 pthread_mutex_lock(lr_sync->job_mutex);
763
Mudassir Galaganath7bdc7762023-10-04 19:12:11 +0530764 if (!lr_sync->lr_mt_exit && lr_sync->jobs_dequeued < lr_sync->jobs_enqueued) {
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530765 cur_job_info = lr_sync->job_queue + lr_sync->jobs_dequeued;
766 lr_sync->jobs_dequeued++;
767 }
768
769 pthread_mutex_unlock(lr_sync->job_mutex);
770#else
771 (void)lr_sync;
772#endif
773
774 return cur_job_info;
775}
776
Mudassir Galaganath7bdc7762023-10-04 19:12:11 +0530777static void set_loop_restoration_done(AV1LrSync *const lr_sync,
778 FilterFrameCtxt *const ctxt) {
779 for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
780 if (ctxt[plane].rsi->frame_restoration_type == RESTORE_NONE) continue;
781 int y0 = 0, row_number = 0;
782 const int unit_size = ctxt[plane].rsi->restoration_unit_size;
783 const int plane_h = ctxt[plane].plane_h;
784 const int ext_size = unit_size * 3 / 2;
785 const int hnum_rest_units = ctxt[plane].rsi->horz_units;
786 while (y0 < plane_h) {
787 const int remaining_h = plane_h - y0;
788 const int h = (remaining_h < ext_size) ? remaining_h : unit_size;
789 lr_sync_write(lr_sync, row_number, hnum_rest_units - 1, hnum_rest_units,
790 plane);
791 y0 += h;
792 ++row_number;
793 }
794 }
795}
796
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530797// Implement row loop restoration for each thread.
Wan-Teh Chang8d2f5772018-09-12 15:44:59 -0700798static int loop_restoration_row_worker(void *arg1, void *arg2) {
799 AV1LrSync *const lr_sync = (AV1LrSync *)arg1;
800 LRWorkerData *lrworkerdata = (LRWorkerData *)arg2;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530801 AV1LrStruct *lr_ctxt = (AV1LrStruct *)lrworkerdata->lr_ctxt;
802 FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
803 int lr_unit_row;
804 int plane;
Rachel Barkered54a862023-07-07 19:04:55 +0000805 int plane_w;
Mudassir Galaganath7bdc7762023-10-04 19:12:11 +0530806#if CONFIG_MULTITHREAD
807 pthread_mutex_t *job_mutex_ = lr_sync->job_mutex;
808#endif
809 struct aom_internal_error_info *const error_info = &lrworkerdata->error_info;
810
811 // The jmp_buf is valid only for the duration of the function that calls
812 // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
813 // before it returns.
814 if (setjmp(error_info->jmp)) {
815 error_info->setjmp = 0;
816#if CONFIG_MULTITHREAD
817 pthread_mutex_lock(job_mutex_);
818 lr_sync->lr_mt_exit = true;
819 pthread_mutex_unlock(job_mutex_);
820#endif
821 // In case of loop restoration multithreading, the worker on an even lr
822 // block row waits for the completion of the filtering of the top-right and
823 // bottom-right blocks. Hence, in case a thread (main/worker) encounters an
824 // error, update that filtering of every row in the frame is complete in
825 // order to avoid the dependent workers from waiting indefinitely.
826 set_loop_restoration_done(lr_sync, lr_ctxt->ctxt);
827 return 0;
828 }
829 error_info->setjmp = 1;
830
Ravi Chaudharya9ee6e22018-06-06 14:11:13 +0530831 typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc,
832 YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend,
833 int vstart, int vend);
Mudassir Galaganathaf905f12023-09-01 14:56:15 +0530834 static const copy_fun copy_funs[MAX_MB_PLANE] = {
835 aom_yv12_partial_coloc_copy_y, aom_yv12_partial_coloc_copy_u,
836 aom_yv12_partial_coloc_copy_v
837 };
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530838
839 while (1) {
840 AV1LrMTInfo *cur_job_info = get_lr_job_info(lr_sync);
841 if (cur_job_info != NULL) {
842 RestorationTileLimits limits;
Ravi Chaudhary686f02c2018-06-07 12:39:44 +0530843 sync_read_fn_t on_sync_read;
844 sync_write_fn_t on_sync_write;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530845 limits.v_start = cur_job_info->v_start;
846 limits.v_end = cur_job_info->v_end;
847 lr_unit_row = cur_job_info->lr_unit_row;
848 plane = cur_job_info->plane;
Rachel Barkered54a862023-07-07 19:04:55 +0000849 plane_w = ctxt[plane].plane_w;
Ravi Chaudhary686f02c2018-06-07 12:39:44 +0530850
851 // sync_mode == 1 implies only sync read is required in LR Multi-threading
852 // sync_mode == 0 implies only sync write is required.
853 on_sync_read =
854 cur_job_info->sync_mode == 1 ? lr_sync_read : av1_lr_sync_read_dummy;
855 on_sync_write = cur_job_info->sync_mode == 0 ? lr_sync_write
856 : av1_lr_sync_write_dummy;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530857
858 av1_foreach_rest_unit_in_row(
Rachel Barkered54a862023-07-07 19:04:55 +0000859 &limits, plane_w, lr_ctxt->on_rest_unit, lr_unit_row,
860 ctxt[plane].rsi->restoration_unit_size, ctxt[plane].rsi->horz_units,
861 ctxt[plane].rsi->vert_units, plane, &ctxt[plane],
862 lrworkerdata->rst_tmpbuf, lrworkerdata->rlbs, on_sync_read,
Mudassir Galaganath7bdc7762023-10-04 19:12:11 +0530863 on_sync_write, lr_sync, error_info);
Ravi Chaudharya9ee6e22018-06-06 14:11:13 +0530864
Rachel Barkered54a862023-07-07 19:04:55 +0000865 copy_funs[plane](lr_ctxt->dst, lr_ctxt->frame, 0, plane_w,
866 cur_job_info->v_copy_start, cur_job_info->v_copy_end);
Deepa K Gaeee77c2022-05-28 14:00:56 +0530867
868 if (lrworkerdata->do_extend_border) {
869 aom_extend_frame_borders_plane_row(lr_ctxt->frame, plane,
870 cur_job_info->v_copy_start,
871 cur_job_info->v_copy_end);
872 }
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530873 } else {
874 break;
875 }
876 }
Mudassir Galaganath7bdc7762023-10-04 19:12:11 +0530877 error_info->setjmp = 0;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530878 return 1;
879}
880
Mudassir Galaganath7bdc7762023-10-04 19:12:11 +0530881static AOM_INLINE void sync_lr_workers(AVxWorker *const workers,
882 AV1_COMMON *const cm, int num_workers) {
883 const AVxWorkerInterface *const winterface = aom_get_worker_interface();
884 int had_error = workers[0].had_error;
885 struct aom_internal_error_info error_info;
886
887 // Read the error_info of main thread.
888 if (had_error) {
889 AVxWorker *const worker = &workers[0];
890 error_info = ((LRWorkerData *)worker->data2)->error_info;
891 }
892
893 // Wait till all rows are finished.
894 for (int i = num_workers - 1; i > 0; --i) {
895 AVxWorker *const worker = &workers[i];
896 if (!winterface->sync(worker)) {
897 had_error = 1;
898 error_info = ((LRWorkerData *)worker->data2)->error_info;
899 }
900 }
901 if (had_error)
902 aom_internal_error(cm->error, error_info.error_code, "%s",
903 error_info.detail);
904}
905
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530906static void foreach_rest_unit_in_planes_mt(AV1LrStruct *lr_ctxt,
Wan-Teh Chang7f5f6b92023-11-02 11:12:58 -0700907 AVxWorker *workers, int num_workers,
Deepa K Gaeee77c2022-05-28 14:00:56 +0530908 AV1LrSync *lr_sync, AV1_COMMON *cm,
909 int do_extend_border) {
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530910 FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
911
912 const int num_planes = av1_num_planes(cm);
913
914 const AVxWorkerInterface *const winterface = aom_get_worker_interface();
915 int num_rows_lr = 0;
916
917 for (int plane = 0; plane < num_planes; plane++) {
Ravi Chaudhary32017742018-09-14 14:04:31 +0530918 if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
919
Rachel Barkered54a862023-07-07 19:04:55 +0000920 const int plane_h = ctxt[plane].plane_h;
Ravi Chaudhary32017742018-09-14 14:04:31 +0530921 const int unit_size = cm->rst_info[plane].restoration_unit_size;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530922
Rachel Barker0483cbe2023-07-05 23:31:38 +0000923 num_rows_lr = AOMMAX(num_rows_lr, av1_lr_count_units(unit_size, plane_h));
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530924 }
925
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530926 int i;
927 assert(MAX_MB_PLANE == 3);
928
Tarundeep Singh6238e8a2021-08-25 23:51:49 +0530929 if (!lr_sync->sync_range || num_rows_lr > lr_sync->rows ||
930 num_workers > lr_sync->num_workers || num_planes > lr_sync->num_planes) {
Cheng Chen28b4f282023-10-04 22:34:33 -0700931 av1_loop_restoration_dealloc(lr_sync);
Nithya V S709ccb32021-08-18 11:05:46 +0530932 av1_loop_restoration_alloc(lr_sync, cm, num_workers, num_rows_lr,
933 num_planes, cm->width);
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530934 }
935
936 // Initialize cur_sb_col to -1 for all SB rows.
937 for (i = 0; i < num_planes; i++) {
938 memset(lr_sync->cur_sb_col[i], -1,
939 sizeof(*(lr_sync->cur_sb_col[i])) * num_rows_lr);
940 }
941
942 enqueue_lr_jobs(lr_sync, lr_ctxt, cm);
943
944 // Set up looprestoration thread data.
Deepa K G57867742021-01-19 12:52:41 -0800945 for (i = num_workers - 1; i >= 0; --i) {
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530946 AVxWorker *const worker = &workers[i];
947 lr_sync->lrworkerdata[i].lr_ctxt = (void *)lr_ctxt;
Deepa K Gaeee77c2022-05-28 14:00:56 +0530948 lr_sync->lrworkerdata[i].do_extend_border = do_extend_border;
Wan-Teh Chang8d2f5772018-09-12 15:44:59 -0700949 worker->hook = loop_restoration_row_worker;
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530950 worker->data1 = lr_sync;
951 worker->data2 = &lr_sync->lrworkerdata[i];
952
Deepa K G57867742021-01-19 12:52:41 -0800953 // Start loop restoration
Mudassir Galaganath7bdc7762023-10-04 19:12:11 +0530954 worker->had_error = 0;
Deepa K G57867742021-01-19 12:52:41 -0800955 if (i == 0) {
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530956 winterface->execute(worker);
957 } else {
958 winterface->launch(worker);
959 }
960 }
961
Mudassir Galaganath7bdc7762023-10-04 19:12:11 +0530962 sync_lr_workers(workers, cm, num_workers);
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530963}
964
965void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
966 AV1_COMMON *cm, int optimized_lr,
967 AVxWorker *workers, int num_workers,
Deepa K Gaeee77c2022-05-28 14:00:56 +0530968 AV1LrSync *lr_sync, void *lr_ctxt,
969 int do_extend_border) {
Urvang Joshib6409e92020-03-23 11:23:27 -0700970 assert(!cm->features.all_lossless);
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530971
972 const int num_planes = av1_num_planes(cm);
973
974 AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt;
975
976 av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm,
977 optimized_lr, num_planes);
978
979 foreach_rest_unit_in_planes_mt(loop_rest_ctxt, workers, num_workers, lr_sync,
Deepa K Gaeee77c2022-05-28 14:00:56 +0530980 cm, do_extend_border);
Ravi Chaudharye2aa4012018-06-04 14:20:00 +0530981}
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +0530982
983// Initializes cdef_sync parameters.
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +0530984static AOM_INLINE void reset_cdef_job_info(AV1CdefSync *const cdef_sync) {
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +0530985 cdef_sync->end_of_frame = 0;
986 cdef_sync->fbr = 0;
987 cdef_sync->fbc = 0;
988}
989
990static AOM_INLINE void launch_cdef_workers(AVxWorker *const workers,
991 int num_workers) {
992 const AVxWorkerInterface *const winterface = aom_get_worker_interface();
993 for (int i = num_workers - 1; i >= 0; i--) {
994 AVxWorker *const worker = &workers[i];
Mudassir Galaganath24d4b292023-09-16 15:51:13 +0530995 worker->had_error = 0;
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +0530996 if (i == 0)
997 winterface->execute(worker);
998 else
999 winterface->launch(worker);
1000 }
1001}
1002
1003static AOM_INLINE void sync_cdef_workers(AVxWorker *const workers,
1004 AV1_COMMON *const cm,
1005 int num_workers) {
1006 const AVxWorkerInterface *const winterface = aom_get_worker_interface();
Mudassir Galaganath24d4b292023-09-16 15:51:13 +05301007 int had_error = workers[0].had_error;
1008 struct aom_internal_error_info error_info;
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301009
Mudassir Galaganath24d4b292023-09-16 15:51:13 +05301010 // Read the error_info of main thread.
1011 if (had_error) {
1012 AVxWorker *const worker = &workers[0];
1013 error_info = ((AV1CdefWorkerData *)worker->data2)->error_info;
1014 }
1015
1016 // Wait till all rows are finished.
1017 for (int i = num_workers - 1; i > 0; --i) {
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301018 AVxWorker *const worker = &workers[i];
Mudassir Galaganath24d4b292023-09-16 15:51:13 +05301019 if (!winterface->sync(worker)) {
1020 had_error = 1;
1021 error_info = ((AV1CdefWorkerData *)worker->data2)->error_info;
1022 }
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301023 }
1024 if (had_error)
Mudassir Galaganath24d4b292023-09-16 15:51:13 +05301025 aom_internal_error(cm->error, error_info.error_code, "%s",
1026 error_info.detail);
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301027}
1028
1029// Updates the row index of the next job to be processed.
1030// Also updates end_of_frame flag when the processing of all rows is complete.
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +05301031static void update_cdef_row_next_job_info(AV1CdefSync *const cdef_sync,
1032 const int nvfb) {
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301033 cdef_sync->fbr++;
1034 if (cdef_sync->fbr == nvfb) {
1035 cdef_sync->end_of_frame = 1;
1036 }
1037}
1038
1039// Checks if a job is available. If job is available,
1040// populates next job information and returns 1, else returns 0.
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +05301041static AOM_INLINE int get_cdef_row_next_job(AV1CdefSync *const cdef_sync,
Mudassir Galaganath24d4b292023-09-16 15:51:13 +05301042 volatile int *cur_fbr,
1043 const int nvfb) {
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301044#if CONFIG_MULTITHREAD
1045 pthread_mutex_lock(cdef_sync->mutex_);
1046#endif // CONFIG_MULTITHREAD
1047 int do_next_row = 0;
1048 // Populates information needed for current job and update the row
1049 // index of the next row to be processed.
Mudassir Galaganath24d4b292023-09-16 15:51:13 +05301050 if (!cdef_sync->cdef_mt_exit && cdef_sync->end_of_frame == 0) {
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301051 do_next_row = 1;
1052 *cur_fbr = cdef_sync->fbr;
1053 update_cdef_row_next_job_info(cdef_sync, nvfb);
1054 }
1055#if CONFIG_MULTITHREAD
1056 pthread_mutex_unlock(cdef_sync->mutex_);
1057#endif // CONFIG_MULTITHREAD
1058 return do_next_row;
1059}
1060
Mudassir Galaganath24d4b292023-09-16 15:51:13 +05301061static void set_cdef_init_fb_row_done(AV1CdefSync *const cdef_sync, int nvfb) {
1062 for (int fbr = 0; fbr < nvfb; fbr++) cdef_row_mt_sync_write(cdef_sync, fbr);
1063}
1064
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301065// Hook function for each thread in CDEF multi-threading.
1066static int cdef_sb_row_worker_hook(void *arg1, void *arg2) {
1067 AV1CdefSync *const cdef_sync = (AV1CdefSync *)arg1;
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +05301068 AV1CdefWorkerData *const cdef_worker = (AV1CdefWorkerData *)arg2;
Deepa K Gaeee77c2022-05-28 14:00:56 +05301069 AV1_COMMON *cm = cdef_worker->cm;
1070 const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
Mudassir Galaganath24d4b292023-09-16 15:51:13 +05301071
1072#if CONFIG_MULTITHREAD
1073 pthread_mutex_t *job_mutex_ = cdef_sync->mutex_;
1074#endif
1075 struct aom_internal_error_info *const error_info = &cdef_worker->error_info;
1076
1077 // The jmp_buf is valid only for the duration of the function that calls
1078 // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
1079 // before it returns.
1080 if (setjmp(error_info->jmp)) {
1081 error_info->setjmp = 0;
1082#if CONFIG_MULTITHREAD
1083 pthread_mutex_lock(job_mutex_);
1084 cdef_sync->cdef_mt_exit = true;
1085 pthread_mutex_unlock(job_mutex_);
1086#endif
1087 // In case of cdef row-multithreading, the worker on a filter block row
1088 // (fbr) waits for the line buffers (top and bottom) copy of the above row.
1089 // Hence, in case a thread (main/worker) encounters an error before copying
1090 // of the line buffers, update that line buffer copy is complete in order to
1091 // avoid dependent workers waiting indefinitely.
1092 set_cdef_init_fb_row_done(cdef_sync, nvfb);
1093 return 0;
1094 }
1095 error_info->setjmp = 1;
1096
1097 volatile int cur_fbr;
Deepa K Gaeee77c2022-05-28 14:00:56 +05301098 const int num_planes = av1_num_planes(cm);
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301099 while (get_cdef_row_next_job(cdef_sync, &cur_fbr, nvfb)) {
Deepa K Gaeee77c2022-05-28 14:00:56 +05301100 MACROBLOCKD *xd = cdef_worker->xd;
1101 av1_cdef_fb_row(cm, xd, cdef_worker->linebuf, cdef_worker->colbuf,
1102 cdef_worker->srcbuf, cur_fbr,
Mudassir Galaganath24d4b292023-09-16 15:51:13 +05301103 cdef_worker->cdef_init_fb_row_fn, cdef_sync, error_info);
Deepa K Gaeee77c2022-05-28 14:00:56 +05301104 if (cdef_worker->do_extend_border) {
1105 for (int plane = 0; plane < num_planes; ++plane) {
1106 const YV12_BUFFER_CONFIG *ybf = &cm->cur_frame->buf;
1107 const int is_uv = plane > 0;
1108 const int mi_high = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
1109 const int unit_height = MI_SIZE_64X64 << mi_high;
1110 const int v_start = cur_fbr * unit_height;
1111 const int v_end =
1112 AOMMIN(v_start + unit_height, ybf->crop_heights[is_uv]);
1113 aom_extend_frame_borders_plane_row(ybf, plane, v_start, v_end);
1114 }
1115 }
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301116 }
Mudassir Galaganath24d4b292023-09-16 15:51:13 +05301117 error_info->setjmp = 0;
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301118 return 1;
1119}
1120
1121// Assigns CDEF hook function and thread data to each worker.
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +05301122static void prepare_cdef_frame_workers(
1123 AV1_COMMON *const cm, MACROBLOCKD *xd, AV1CdefWorkerData *const cdef_worker,
1124 AVxWorkerHook hook, AVxWorker *const workers, AV1CdefSync *const cdef_sync,
Deepa K Gaeee77c2022-05-28 14:00:56 +05301125 int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
1126 int do_extend_border) {
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301127 const int num_planes = av1_num_planes(cm);
1128
1129 cdef_worker[0].srcbuf = cm->cdef_info.srcbuf;
1130 for (int plane = 0; plane < num_planes; plane++)
1131 cdef_worker[0].colbuf[plane] = cm->cdef_info.colbuf[plane];
1132 for (int i = num_workers - 1; i >= 0; i--) {
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +05301133 AVxWorker *const worker = &workers[i];
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301134 cdef_worker[i].cm = cm;
1135 cdef_worker[i].xd = xd;
1136 cdef_worker[i].cdef_init_fb_row_fn = cdef_init_fb_row_fn;
Deepa K Gaeee77c2022-05-28 14:00:56 +05301137 cdef_worker[i].do_extend_border = do_extend_border;
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301138 for (int plane = 0; plane < num_planes; plane++)
1139 cdef_worker[i].linebuf[plane] = cm->cdef_info.linebuf[plane];
1140
1141 worker->hook = hook;
1142 worker->data1 = cdef_sync;
1143 worker->data2 = &cdef_worker[i];
1144 }
1145}
1146
1147// Initializes row-level parameters for CDEF frame.
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +05301148void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
1149 const MACROBLOCKD *const xd,
1150 CdefBlockInfo *const fb_info,
1151 uint16_t **const linebuf, uint16_t *const src,
1152 struct AV1CdefSyncData *const cdef_sync, int fbr) {
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301153 const int num_planes = av1_num_planes(cm);
1154 const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
1155 const int luma_stride =
1156 ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols << MI_SIZE_LOG2, 4);
1157
1158 // for the current filter block, it's top left corner mi structure (mi_tl)
1159 // is first accessed to check whether the top and left boundaries are
1160 // frame boundaries. Then bottom-left and top-right mi structures are
1161 // accessed to check whether the bottom and right boundaries
1162 // (respectively) are frame boundaries.
1163 //
1164 // Note that we can't just check the bottom-right mi structure - eg. if
1165 // we're at the right-hand edge of the frame but not the bottom, then
1166 // the bottom-right mi is NULL but the bottom-left is not.
1167 fb_info->frame_boundary[TOP] = (MI_SIZE_64X64 * fbr == 0) ? 1 : 0;
1168 if (fbr != nvfb - 1)
1169 fb_info->frame_boundary[BOTTOM] =
1170 (MI_SIZE_64X64 * (fbr + 1) == cm->mi_params.mi_rows) ? 1 : 0;
1171 else
1172 fb_info->frame_boundary[BOTTOM] = 1;
1173
1174 fb_info->src = src;
1175 fb_info->damping = cm->cdef_info.cdef_damping;
Tarundeep Singh4243e622021-04-20 16:10:22 +05301176 fb_info->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301177 av1_zero(fb_info->dir);
1178 av1_zero(fb_info->var);
1179
1180 for (int plane = 0; plane < num_planes; plane++) {
1181 const int stride = luma_stride >> xd->plane[plane].subsampling_x;
1182 uint16_t *top_linebuf = &linebuf[plane][0];
1183 uint16_t *bot_linebuf = &linebuf[plane][nvfb * CDEF_VBORDER * stride];
1184 {
1185 const int mi_high_l2 = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
1186 const int top_offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2;
1187 const int bot_offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2;
1188
1189 if (fbr != nvfb - 1) // if (fbr != 0) // top line buffer copy
1190 av1_cdef_copy_sb8_16(
1191 cm, &top_linebuf[(fbr + 1) * CDEF_VBORDER * stride], stride,
1192 xd->plane[plane].dst.buf, top_offset - CDEF_VBORDER, 0,
1193 xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
1194 if (fbr != nvfb - 1) // bottom line buffer copy
1195 av1_cdef_copy_sb8_16(cm, &bot_linebuf[fbr * CDEF_VBORDER * stride],
1196 stride, xd->plane[plane].dst.buf, bot_offset, 0,
1197 xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
1198 }
1199
1200 fb_info->top_linebuf[plane] = &linebuf[plane][fbr * CDEF_VBORDER * stride];
1201 fb_info->bot_linebuf[plane] =
1202 &linebuf[plane]
1203 [nvfb * CDEF_VBORDER * stride + (fbr * CDEF_VBORDER * stride)];
1204 }
1205
1206 cdef_row_mt_sync_write(cdef_sync, fbr);
1207 cdef_row_mt_sync_read(cdef_sync, fbr);
1208}
1209
1210// Implements multi-threading for CDEF.
1211// Perform CDEF on input frame.
1212// Inputs:
1213// frame: Pointer to input frame buffer.
1214// cm: Pointer to common structure.
1215// xd: Pointer to common current coding block structure.
1216// Returns:
1217// Nothing will be returned.
Vishnu Teja Manyam8c40b022021-04-25 14:36:09 +05301218void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
1219 AV1CdefWorkerData *const cdef_worker,
1220 AVxWorker *const workers, AV1CdefSync *const cdef_sync,
Deepa K Gaeee77c2022-05-28 14:00:56 +05301221 int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
1222 int do_extend_border) {
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301223 YV12_BUFFER_CONFIG *frame = &cm->cur_frame->buf;
1224 const int num_planes = av1_num_planes(cm);
1225
Tarundeep Singh4243e622021-04-20 16:10:22 +05301226 av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301227 num_planes);
1228
1229 reset_cdef_job_info(cdef_sync);
1230 prepare_cdef_frame_workers(cm, xd, cdef_worker, cdef_sb_row_worker_hook,
1231 workers, cdef_sync, num_workers,
Deepa K Gaeee77c2022-05-28 14:00:56 +05301232 cdef_init_fb_row_fn, do_extend_border);
Vishnu Teja Manyamf1949812021-04-08 19:07:51 +05301233 launch_cdef_workers(workers, num_workers);
1234 sync_cdef_workers(workers, cm, num_workers);
1235}
Jayasanker J34ec50a2022-07-01 19:00:02 +05301236
1237int av1_get_intrabc_extra_top_right_sb_delay(const AV1_COMMON *cm) {
1238 // No additional top-right delay when intraBC tool is not enabled.
1239 if (!av1_allow_intrabc(cm)) return 0;
1240 // Due to the hardware constraints on processing the intraBC tool with row
1241 // multithreading, a top-right delay of 3 superblocks of size 128x128 or 5
1242 // superblocks of size 64x64 is mandated. However, a minimum top-right delay
1243 // of 1 superblock is assured with 'sync_range'. Hence return only the
1244 // additional superblock delay when the intraBC tool is enabled.
1245 return cm->seq_params->sb_size == BLOCK_128X128 ? 2 : 4;
1246}