Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | /* |
Yaowu Xu | 2ab7ff0 | 2016-09-02 12:04:54 -0700 | [diff] [blame] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 3 | * |
Yaowu Xu | 2ab7ff0 | 2016-09-02 12:04:54 -0700 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 10 | */ |
| 11 | |
chiyotsai | 122e499 | 2022-02-18 12:04:02 -0800 | [diff] [blame] | 12 | #include "aom/aom_image.h" |
Tom Finegan | 60e653d | 2018-05-22 11:34:58 -0700 | [diff] [blame] | 13 | #include "config/aom_config.h" |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 14 | #include "config/aom_scale_rtcd.h" |
Tom Finegan | 60e653d | 2018-05-22 11:34:58 -0700 | [diff] [blame] | 15 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 16 | #include "aom_dsp/aom_dsp_common.h" |
| 17 | #include "aom_mem/aom_mem.h" |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 18 | #include "av1/common/av1_loopfilter.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 19 | #include "av1/common/entropymode.h" |
| 20 | #include "av1/common/thread_common.h" |
| 21 | #include "av1/common/reconinter.h" |
Jayasanker J | 34ec50a | 2022-07-01 19:00:02 +0530 | [diff] [blame] | 22 | #include "av1/common/reconintra.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 23 | |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 24 | // Set up nsync by width. |
| 25 | static INLINE int get_sync_range(int width) { |
| 26 | // nsync numbers are picked by testing. For example, for 4k |
| 27 | // video, using 4 gives best performance. |
| 28 | if (width < 640) |
| 29 | return 1; |
| 30 | else if (width <= 1280) |
| 31 | return 2; |
| 32 | else if (width <= 4096) |
| 33 | return 4; |
| 34 | else |
| 35 | return 8; |
| 36 | } |
| 37 | |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 38 | static INLINE int get_lr_sync_range(int width) { |
| 39 | #if 0 |
| 40 | // nsync numbers are picked by testing. For example, for 4k |
| 41 | // video, using 4 gives best performance. |
| 42 | if (width < 640) |
| 43 | return 1; |
| 44 | else if (width <= 1280) |
| 45 | return 2; |
| 46 | else if (width <= 4096) |
| 47 | return 4; |
| 48 | else |
| 49 | return 8; |
| 50 | #else |
| 51 | (void)width; |
| 52 | return 1; |
| 53 | #endif |
| 54 | } |
| 55 | |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 56 | // Allocate memory for lf row synchronization |
Nithya V S | 709ccb3 | 2021-08-18 11:05:46 +0530 | [diff] [blame] | 57 | void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows, |
| 58 | int width, int num_workers) { |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 59 | lf_sync->rows = rows; |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 60 | lf_sync->lf_mt_exit = false; |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 61 | #if CONFIG_MULTITHREAD |
| 62 | { |
| 63 | int i, j; |
| 64 | |
| 65 | for (j = 0; j < MAX_MB_PLANE; j++) { |
| 66 | CHECK_MEM_ERROR(cm, lf_sync->mutex_[j], |
| 67 | aom_malloc(sizeof(*(lf_sync->mutex_[j])) * rows)); |
| 68 | if (lf_sync->mutex_[j]) { |
| 69 | for (i = 0; i < rows; ++i) { |
| 70 | pthread_mutex_init(&lf_sync->mutex_[j][i], NULL); |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | CHECK_MEM_ERROR(cm, lf_sync->cond_[j], |
| 75 | aom_malloc(sizeof(*(lf_sync->cond_[j])) * rows)); |
| 76 | if (lf_sync->cond_[j]) { |
| 77 | for (i = 0; i < rows; ++i) { |
| 78 | pthread_cond_init(&lf_sync->cond_[j][i], NULL); |
| 79 | } |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | CHECK_MEM_ERROR(cm, lf_sync->job_mutex, |
| 84 | aom_malloc(sizeof(*(lf_sync->job_mutex)))); |
| 85 | if (lf_sync->job_mutex) { |
| 86 | pthread_mutex_init(lf_sync->job_mutex, NULL); |
| 87 | } |
| 88 | } |
| 89 | #endif // CONFIG_MULTITHREAD |
| 90 | CHECK_MEM_ERROR(cm, lf_sync->lfdata, |
| 91 | aom_malloc(num_workers * sizeof(*(lf_sync->lfdata)))); |
| 92 | lf_sync->num_workers = num_workers; |
| 93 | |
| 94 | for (int j = 0; j < MAX_MB_PLANE; j++) { |
| 95 | CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col[j], |
| 96 | aom_malloc(sizeof(*(lf_sync->cur_sb_col[j])) * rows)); |
| 97 | } |
| 98 | CHECK_MEM_ERROR( |
| 99 | cm, lf_sync->job_queue, |
| 100 | aom_malloc(sizeof(*(lf_sync->job_queue)) * rows * MAX_MB_PLANE * 2)); |
| 101 | // Set up nsync. |
| 102 | lf_sync->sync_range = get_sync_range(width); |
| 103 | } |
| 104 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 105 | // Deallocate lf synchronization related mutex and data |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 106 | void av1_loop_filter_dealloc(AV1LfSync *lf_sync) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 107 | if (lf_sync != NULL) { |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 108 | int j; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 109 | #if CONFIG_MULTITHREAD |
| 110 | int i; |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 111 | for (j = 0; j < MAX_MB_PLANE; j++) { |
| 112 | if (lf_sync->mutex_[j] != NULL) { |
| 113 | for (i = 0; i < lf_sync->rows; ++i) { |
| 114 | pthread_mutex_destroy(&lf_sync->mutex_[j][i]); |
| 115 | } |
| 116 | aom_free(lf_sync->mutex_[j]); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 117 | } |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 118 | if (lf_sync->cond_[j] != NULL) { |
| 119 | for (i = 0; i < lf_sync->rows; ++i) { |
| 120 | pthread_cond_destroy(&lf_sync->cond_[j][i]); |
| 121 | } |
| 122 | aom_free(lf_sync->cond_[j]); |
| 123 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 124 | } |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 125 | if (lf_sync->job_mutex != NULL) { |
| 126 | pthread_mutex_destroy(lf_sync->job_mutex); |
| 127 | aom_free(lf_sync->job_mutex); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 128 | } |
| 129 | #endif // CONFIG_MULTITHREAD |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 130 | aom_free(lf_sync->lfdata); |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 131 | for (j = 0; j < MAX_MB_PLANE; j++) { |
| 132 | aom_free(lf_sync->cur_sb_col[j]); |
| 133 | } |
| 134 | |
| 135 | aom_free(lf_sync->job_queue); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 136 | // clear the structure as the source of this call may be a resize in which |
| 137 | // case this call will be followed by an _alloc() which may fail. |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 138 | av1_zero(*lf_sync); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 139 | } |
| 140 | } |
| 141 | |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 142 | void av1_alloc_cdef_sync(AV1_COMMON *const cm, AV1CdefSync *cdef_sync, |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 143 | int num_workers) { |
| 144 | if (num_workers < 1) return; |
| 145 | #if CONFIG_MULTITHREAD |
| 146 | if (cdef_sync->mutex_ == NULL) { |
| 147 | CHECK_MEM_ERROR(cm, cdef_sync->mutex_, |
| 148 | aom_malloc(sizeof(*(cdef_sync->mutex_)))); |
| 149 | if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL); |
| 150 | } |
| 151 | #else |
| 152 | (void)cm; |
| 153 | (void)cdef_sync; |
| 154 | #endif // CONFIG_MULTITHREAD |
| 155 | } |
| 156 | |
| 157 | void av1_free_cdef_sync(AV1CdefSync *cdef_sync) { |
| 158 | if (cdef_sync == NULL) return; |
| 159 | #if CONFIG_MULTITHREAD |
| 160 | if (cdef_sync->mutex_ != NULL) { |
| 161 | pthread_mutex_destroy(cdef_sync->mutex_); |
| 162 | aom_free(cdef_sync->mutex_); |
| 163 | } |
| 164 | #endif // CONFIG_MULTITHREAD |
| 165 | } |
| 166 | |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 167 | static INLINE void cdef_row_mt_sync_read(AV1CdefSync *const cdef_sync, |
| 168 | int row) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 169 | if (!row) return; |
| 170 | #if CONFIG_MULTITHREAD |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 171 | AV1CdefRowSync *const cdef_row_mt = cdef_sync->cdef_row_mt; |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 172 | pthread_mutex_lock(cdef_row_mt[row - 1].row_mutex_); |
| 173 | while (cdef_row_mt[row - 1].is_row_done != 1) |
| 174 | pthread_cond_wait(cdef_row_mt[row - 1].row_cond_, |
| 175 | cdef_row_mt[row - 1].row_mutex_); |
| 176 | cdef_row_mt[row - 1].is_row_done = 0; |
| 177 | pthread_mutex_unlock(cdef_row_mt[row - 1].row_mutex_); |
| 178 | #else |
| 179 | (void)cdef_sync; |
| 180 | #endif // CONFIG_MULTITHREAD |
| 181 | } |
| 182 | |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 183 | static INLINE void cdef_row_mt_sync_write(AV1CdefSync *const cdef_sync, |
| 184 | int row) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 185 | #if CONFIG_MULTITHREAD |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 186 | AV1CdefRowSync *const cdef_row_mt = cdef_sync->cdef_row_mt; |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 187 | pthread_mutex_lock(cdef_row_mt[row].row_mutex_); |
| 188 | pthread_cond_signal(cdef_row_mt[row].row_cond_); |
| 189 | cdef_row_mt[row].is_row_done = 1; |
| 190 | pthread_mutex_unlock(cdef_row_mt[row].row_mutex_); |
| 191 | #else |
| 192 | (void)cdef_sync; |
| 193 | (void)row; |
| 194 | #endif // CONFIG_MULTITHREAD |
| 195 | } |
| 196 | |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 197 | static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c, |
| 198 | int plane) { |
| 199 | #if CONFIG_MULTITHREAD |
| 200 | const int nsync = lf_sync->sync_range; |
| 201 | |
| 202 | if (r && !(c & (nsync - 1))) { |
| 203 | pthread_mutex_t *const mutex = &lf_sync->mutex_[plane][r - 1]; |
| 204 | pthread_mutex_lock(mutex); |
| 205 | |
| 206 | while (c > lf_sync->cur_sb_col[plane][r - 1] - nsync) { |
| 207 | pthread_cond_wait(&lf_sync->cond_[plane][r - 1], mutex); |
| 208 | } |
| 209 | pthread_mutex_unlock(mutex); |
| 210 | } |
| 211 | #else |
| 212 | (void)lf_sync; |
| 213 | (void)r; |
| 214 | (void)c; |
| 215 | (void)plane; |
| 216 | #endif // CONFIG_MULTITHREAD |
| 217 | } |
| 218 | |
| 219 | static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c, |
| 220 | const int sb_cols, int plane) { |
| 221 | #if CONFIG_MULTITHREAD |
| 222 | const int nsync = lf_sync->sync_range; |
| 223 | int cur; |
| 224 | // Only signal when there are enough filtered SB for next row to run. |
| 225 | int sig = 1; |
| 226 | |
| 227 | if (c < sb_cols - 1) { |
| 228 | cur = c; |
| 229 | if (c % nsync) sig = 0; |
| 230 | } else { |
| 231 | cur = sb_cols + nsync; |
| 232 | } |
| 233 | |
| 234 | if (sig) { |
| 235 | pthread_mutex_lock(&lf_sync->mutex_[plane][r]); |
| 236 | |
| 237 | lf_sync->cur_sb_col[plane][r] = cur; |
| 238 | |
| 239 | pthread_cond_broadcast(&lf_sync->cond_[plane][r]); |
| 240 | pthread_mutex_unlock(&lf_sync->mutex_[plane][r]); |
| 241 | } |
| 242 | #else |
| 243 | (void)lf_sync; |
| 244 | (void)r; |
| 245 | (void)c; |
| 246 | (void)sb_cols; |
| 247 | (void)plane; |
| 248 | #endif // CONFIG_MULTITHREAD |
| 249 | } |
| 250 | |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 251 | // One job of row loopfiltering. |
Deepa K G | 4015384 | 2022-09-12 14:48:14 +0530 | [diff] [blame] | 252 | void av1_thread_loop_filter_rows( |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 253 | const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm, |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 254 | struct macroblockd_plane *planes, MACROBLOCKD *xd, int mi_row, int plane, |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 255 | int dir, int lpf_opt_level, AV1LfSync *const lf_sync, |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 256 | struct aom_internal_error_info *error_info, |
Deepa K G | e48a6d1 | 2022-10-24 23:33:51 +0530 | [diff] [blame] | 257 | AV1_DEBLOCKING_PARAMETERS *params_buf, TX_SIZE *tx_buf, |
| 258 | int num_mis_in_lpf_unit_height_log2) { |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 259 | // TODO(aomedia:3276): Pass error_info to the low-level functions as required |
| 260 | // in future to handle error propagation. |
| 261 | (void)error_info; |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 262 | const int sb_cols = |
Mudassir Galagnath | 77f3160 | 2022-04-19 16:28:25 +0530 | [diff] [blame] | 263 | CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, MAX_MIB_SIZE_LOG2); |
Deepa K G | e48a6d1 | 2022-10-24 23:33:51 +0530 | [diff] [blame] | 264 | const int r = mi_row >> num_mis_in_lpf_unit_height_log2; |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 265 | int mi_col, c; |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 266 | |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 267 | const bool joint_filter_chroma = (lpf_opt_level == 2) && plane > AOM_PLANE_Y; |
chiyotsai | 122e499 | 2022-02-18 12:04:02 -0800 | [diff] [blame] | 268 | const int num_planes = joint_filter_chroma ? 2 : 1; |
chiyotsai | 122e499 | 2022-02-18 12:04:02 -0800 | [diff] [blame] | 269 | assert(IMPLIES(joint_filter_chroma, plane == AOM_PLANE_U)); |
chiyotsai | 122e499 | 2022-02-18 12:04:02 -0800 | [diff] [blame] | 270 | |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 271 | if (dir == 0) { |
| 272 | for (mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += MAX_MIB_SIZE) { |
| 273 | c = mi_col >> MAX_MIB_SIZE_LOG2; |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 274 | |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 275 | av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer, |
chiyotsai | 122e499 | 2022-02-18 12:04:02 -0800 | [diff] [blame] | 276 | mi_row, mi_col, plane, plane + num_planes); |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 277 | if (lpf_opt_level) { |
chiyotsai | 095582c | 2022-02-08 12:30:42 -0800 | [diff] [blame] | 278 | if (plane == AOM_PLANE_Y) { |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 279 | av1_filter_block_plane_vert_opt(cm, xd, &planes[plane], mi_row, |
Deepa K G | 4015384 | 2022-09-12 14:48:14 +0530 | [diff] [blame] | 280 | mi_col, params_buf, tx_buf, |
Deepa K G | e48a6d1 | 2022-10-24 23:33:51 +0530 | [diff] [blame] | 281 | num_mis_in_lpf_unit_height_log2); |
chiyotsai | 095582c | 2022-02-08 12:30:42 -0800 | [diff] [blame] | 282 | } else { |
Deepa K G | 4015384 | 2022-09-12 14:48:14 +0530 | [diff] [blame] | 283 | av1_filter_block_plane_vert_opt_chroma( |
| 284 | cm, xd, &planes[plane], mi_row, mi_col, params_buf, tx_buf, plane, |
Deepa K G | e48a6d1 | 2022-10-24 23:33:51 +0530 | [diff] [blame] | 285 | joint_filter_chroma, num_mis_in_lpf_unit_height_log2); |
chiyotsai | 095582c | 2022-02-08 12:30:42 -0800 | [diff] [blame] | 286 | } |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 287 | } else { |
| 288 | av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row, |
chiyotsai | 095582c | 2022-02-08 12:30:42 -0800 | [diff] [blame] | 289 | mi_col); |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 290 | } |
chiyotsai | 122e499 | 2022-02-18 12:04:02 -0800 | [diff] [blame] | 291 | if (lf_sync != NULL) { |
| 292 | sync_write(lf_sync, r, c, sb_cols, plane); |
| 293 | } |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 294 | } |
| 295 | } else if (dir == 1) { |
| 296 | for (mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += MAX_MIB_SIZE) { |
| 297 | c = mi_col >> MAX_MIB_SIZE_LOG2; |
| 298 | |
| 299 | if (lf_sync != NULL) { |
| 300 | // Wait for vertical edge filtering of the top-right block to be |
| 301 | // completed |
| 302 | sync_read(lf_sync, r, c, plane); |
| 303 | |
| 304 | // Wait for vertical edge filtering of the right block to be completed |
| 305 | sync_read(lf_sync, r + 1, c, plane); |
| 306 | } |
| 307 | |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 308 | #if CONFIG_MULTITHREAD |
| 309 | if (lf_sync && lf_sync->num_workers > 1) { |
| 310 | pthread_mutex_lock(lf_sync->job_mutex); |
| 311 | const bool lf_mt_exit = lf_sync->lf_mt_exit; |
| 312 | pthread_mutex_unlock(lf_sync->job_mutex); |
| 313 | // Exit in case any worker has encountered an error. |
| 314 | if (lf_mt_exit) return; |
| 315 | } |
| 316 | #endif |
| 317 | |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 318 | av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer, |
chiyotsai | 122e499 | 2022-02-18 12:04:02 -0800 | [diff] [blame] | 319 | mi_row, mi_col, plane, plane + num_planes); |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 320 | if (lpf_opt_level) { |
chiyotsai | 095582c | 2022-02-08 12:30:42 -0800 | [diff] [blame] | 321 | if (plane == AOM_PLANE_Y) { |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 322 | av1_filter_block_plane_horz_opt(cm, xd, &planes[plane], mi_row, |
Deepa K G | 4015384 | 2022-09-12 14:48:14 +0530 | [diff] [blame] | 323 | mi_col, params_buf, tx_buf, |
Deepa K G | e48a6d1 | 2022-10-24 23:33:51 +0530 | [diff] [blame] | 324 | num_mis_in_lpf_unit_height_log2); |
chiyotsai | 095582c | 2022-02-08 12:30:42 -0800 | [diff] [blame] | 325 | } else { |
Deepa K G | 4015384 | 2022-09-12 14:48:14 +0530 | [diff] [blame] | 326 | av1_filter_block_plane_horz_opt_chroma( |
| 327 | cm, xd, &planes[plane], mi_row, mi_col, params_buf, tx_buf, plane, |
Deepa K G | e48a6d1 | 2022-10-24 23:33:51 +0530 | [diff] [blame] | 328 | joint_filter_chroma, num_mis_in_lpf_unit_height_log2); |
chiyotsai | 095582c | 2022-02-08 12:30:42 -0800 | [diff] [blame] | 329 | } |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 330 | } else { |
| 331 | av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row, |
chiyotsai | 095582c | 2022-02-08 12:30:42 -0800 | [diff] [blame] | 332 | mi_col); |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 333 | } |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 334 | } |
| 335 | } |
| 336 | } |
| 337 | |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 338 | void av1_set_vert_loop_filter_done(AV1_COMMON *cm, AV1LfSync *lf_sync, |
| 339 | int num_mis_in_lpf_unit_height_log2) { |
| 340 | int plane, sb_row; |
| 341 | const int sb_cols = |
| 342 | CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, num_mis_in_lpf_unit_height_log2); |
| 343 | const int sb_rows = |
| 344 | CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, num_mis_in_lpf_unit_height_log2); |
| 345 | |
| 346 | // In case of loopfilter row-multithreading, the worker on an SB row waits for |
| 347 | // the vertical edge filtering of the right and top-right SBs. Hence, in case |
| 348 | // a thread (main/worker) encounters an error, update that vertical |
| 349 | // loopfiltering of every SB row in the frame is complete in order to avoid |
| 350 | // dependent workers waiting indefinitely. |
| 351 | for (sb_row = 0; sb_row < sb_rows; ++sb_row) |
| 352 | for (plane = 0; plane < MAX_MB_PLANE; ++plane) |
| 353 | sync_write(lf_sync, sb_row, sb_cols - 1, sb_cols, plane); |
| 354 | } |
| 355 | |
| 356 | static AOM_INLINE void sync_lf_workers(AVxWorker *const workers, |
| 357 | AV1_COMMON *const cm, int num_workers) { |
| 358 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
| 359 | int had_error = workers[0].had_error; |
| 360 | struct aom_internal_error_info error_info; |
| 361 | |
| 362 | // Read the error_info of main thread. |
| 363 | if (had_error) { |
| 364 | AVxWorker *const worker = &workers[0]; |
| 365 | error_info = ((LFWorkerData *)worker->data2)->error_info; |
| 366 | } |
| 367 | |
| 368 | // Wait till all rows are finished. |
| 369 | for (int i = num_workers - 1; i > 0; --i) { |
| 370 | AVxWorker *const worker = &workers[i]; |
| 371 | if (!winterface->sync(worker)) { |
| 372 | had_error = 1; |
| 373 | error_info = ((LFWorkerData *)worker->data2)->error_info; |
| 374 | } |
| 375 | } |
| 376 | if (had_error) |
| 377 | aom_internal_error(cm->error, error_info.error_code, "%s", |
| 378 | error_info.detail); |
| 379 | } |
| 380 | |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 381 | // Row-based multi-threaded loopfilter hook |
Wan-Teh Chang | 8d2f577 | 2018-09-12 15:44:59 -0700 | [diff] [blame] | 382 | static int loop_filter_row_worker(void *arg1, void *arg2) { |
| 383 | AV1LfSync *const lf_sync = (AV1LfSync *)arg1; |
| 384 | LFWorkerData *const lf_data = (LFWorkerData *)arg2; |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 385 | AV1LfMTInfo *cur_job_info; |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 386 | |
| 387 | #if CONFIG_MULTITHREAD |
| 388 | pthread_mutex_t *job_mutex_ = lf_sync->job_mutex; |
| 389 | #endif |
| 390 | |
| 391 | struct aom_internal_error_info *const error_info = &lf_data->error_info; |
| 392 | |
| 393 | // The jmp_buf is valid only for the duration of the function that calls |
| 394 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 395 | // before it returns. |
| 396 | if (setjmp(error_info->jmp)) { |
| 397 | error_info->setjmp = 0; |
| 398 | #if CONFIG_MULTITHREAD |
| 399 | pthread_mutex_lock(job_mutex_); |
| 400 | lf_sync->lf_mt_exit = true; |
| 401 | pthread_mutex_unlock(job_mutex_); |
| 402 | #endif |
| 403 | av1_set_vert_loop_filter_done(lf_data->cm, lf_sync, MAX_MIB_SIZE_LOG2); |
| 404 | return 0; |
| 405 | } |
| 406 | error_info->setjmp = 1; |
| 407 | |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 408 | while ((cur_job_info = get_lf_job_info(lf_sync)) != NULL) { |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 409 | const int lpf_opt_level = cur_job_info->lpf_opt_level; |
Deepa K G | 4015384 | 2022-09-12 14:48:14 +0530 | [diff] [blame] | 410 | av1_thread_loop_filter_rows( |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 411 | lf_data->frame_buffer, lf_data->cm, lf_data->planes, lf_data->xd, |
| 412 | cur_job_info->mi_row, cur_job_info->plane, cur_job_info->dir, |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 413 | lpf_opt_level, lf_sync, error_info, lf_data->params_buf, |
| 414 | lf_data->tx_buf, MAX_MIB_SIZE_LOG2); |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 415 | } |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 416 | error_info->setjmp = 0; |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 417 | return 1; |
| 418 | } |
| 419 | |
| 420 | static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, |
| 421 | MACROBLOCKD *xd, int start, int stop, |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 422 | const int planes_to_lf[MAX_MB_PLANE], |
| 423 | AVxWorker *workers, int num_workers, |
| 424 | AV1LfSync *lf_sync, int lpf_opt_level) { |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 425 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 426 | int i; |
Deepa K G | 4015384 | 2022-09-12 14:48:14 +0530 | [diff] [blame] | 427 | loop_filter_frame_mt_init(cm, start, stop, planes_to_lf, num_workers, lf_sync, |
| 428 | lpf_opt_level, MAX_MIB_SIZE_LOG2); |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 429 | |
| 430 | // Set up loopfilter thread data. |
Deepa K G | 5786774 | 2021-01-19 12:52:41 -0800 | [diff] [blame] | 431 | for (i = num_workers - 1; i >= 0; --i) { |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 432 | AVxWorker *const worker = &workers[i]; |
| 433 | LFWorkerData *const lf_data = &lf_sync->lfdata[i]; |
| 434 | |
Wan-Teh Chang | 8d2f577 | 2018-09-12 15:44:59 -0700 | [diff] [blame] | 435 | worker->hook = loop_filter_row_worker; |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 436 | worker->data1 = lf_sync; |
| 437 | worker->data2 = lf_data; |
| 438 | |
| 439 | // Loopfilter data |
| 440 | loop_filter_data_reset(lf_data, frame, cm, xd); |
| 441 | |
| 442 | // Start loopfiltering |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 443 | worker->had_error = 0; |
Deepa K G | 5786774 | 2021-01-19 12:52:41 -0800 | [diff] [blame] | 444 | if (i == 0) { |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 445 | winterface->execute(worker); |
| 446 | } else { |
| 447 | winterface->launch(worker); |
| 448 | } |
| 449 | } |
| 450 | |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 451 | sync_lf_workers(workers, cm, num_workers); |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 452 | } |
| 453 | |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 454 | static void loop_filter_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, |
| 455 | MACROBLOCKD *xd, int start, int stop, |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 456 | const int planes_to_lf[MAX_MB_PLANE], |
| 457 | int lpf_opt_level) { |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 458 | // Filter top rows of all planes first, in case the output can be partially |
| 459 | // reconstructed row by row. |
| 460 | int mi_row, plane, dir; |
chiyotsai | 095582c | 2022-02-08 12:30:42 -0800 | [diff] [blame] | 461 | |
| 462 | AV1_DEBLOCKING_PARAMETERS params_buf[MAX_MIB_SIZE]; |
| 463 | TX_SIZE tx_buf[MAX_MIB_SIZE]; |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 464 | for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 465 | for (plane = 0; plane < MAX_MB_PLANE; ++plane) { |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 466 | if (skip_loop_filter_plane(planes_to_lf, plane, lpf_opt_level)) { |
chiyotsai | 122e499 | 2022-02-18 12:04:02 -0800 | [diff] [blame] | 467 | continue; |
| 468 | } |
| 469 | |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 470 | for (dir = 0; dir < 2; ++dir) { |
Deepa K G | 4015384 | 2022-09-12 14:48:14 +0530 | [diff] [blame] | 471 | av1_thread_loop_filter_rows(frame, cm, xd->plane, xd, mi_row, plane, |
| 472 | dir, lpf_opt_level, /*lf_sync=*/NULL, |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 473 | xd->error_info, params_buf, tx_buf, |
| 474 | MAX_MIB_SIZE_LOG2); |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 475 | } |
| 476 | } |
| 477 | } |
| 478 | } |
| 479 | |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 480 | void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, |
| 481 | MACROBLOCKD *xd, int plane_start, int plane_end, |
Yannis Guyon | ee564d2 | 2021-06-15 13:25:50 +0200 | [diff] [blame] | 482 | int partial_frame, AVxWorker *workers, |
| 483 | int num_workers, AV1LfSync *lf_sync, |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 484 | int lpf_opt_level) { |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 485 | int start_mi_row, end_mi_row, mi_rows_to_filter; |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 486 | int planes_to_lf[MAX_MB_PLANE]; |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 487 | |
Deepa K G | 4015384 | 2022-09-12 14:48:14 +0530 | [diff] [blame] | 488 | if (!check_planes_to_loop_filter(&cm->lf, planes_to_lf, plane_start, |
| 489 | plane_end)) |
| 490 | return; |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 491 | |
| 492 | start_mi_row = 0; |
Urvang Joshi | 9dc909d | 2020-03-23 16:07:02 -0700 | [diff] [blame] | 493 | mi_rows_to_filter = cm->mi_params.mi_rows; |
| 494 | if (partial_frame && cm->mi_params.mi_rows > 8) { |
| 495 | start_mi_row = cm->mi_params.mi_rows >> 1; |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 496 | start_mi_row &= 0xfffffff8; |
Urvang Joshi | 9dc909d | 2020-03-23 16:07:02 -0700 | [diff] [blame] | 497 | mi_rows_to_filter = AOMMAX(cm->mi_params.mi_rows / 8, 8); |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 498 | } |
| 499 | end_mi_row = start_mi_row + mi_rows_to_filter; |
| 500 | av1_loop_filter_frame_init(cm, plane_start, plane_end); |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 501 | |
| 502 | if (num_workers > 1) { |
| 503 | // Enqueue and execute loopfiltering jobs. |
| 504 | loop_filter_rows_mt(frame, cm, xd, start_mi_row, end_mi_row, planes_to_lf, |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 505 | workers, num_workers, lf_sync, lpf_opt_level); |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 506 | } else { |
| 507 | // Directly filter in the main thread. |
| 508 | loop_filter_rows(frame, cm, xd, start_mi_row, end_mi_row, planes_to_lf, |
Deepa K G | 3be0007 | 2022-04-12 22:10:54 +0530 | [diff] [blame] | 509 | lpf_opt_level); |
Yannis Guyon | 27bdea5 | 2021-07-21 10:41:02 +0200 | [diff] [blame] | 510 | } |
Deepa K G | 964e72e | 2018-05-16 16:56:01 +0530 | [diff] [blame] | 511 | } |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 512 | |
| 513 | static INLINE void lr_sync_read(void *const lr_sync, int r, int c, int plane) { |
| 514 | #if CONFIG_MULTITHREAD |
| 515 | AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync; |
| 516 | const int nsync = loop_res_sync->sync_range; |
| 517 | |
| 518 | if (r && !(c & (nsync - 1))) { |
| 519 | pthread_mutex_t *const mutex = &loop_res_sync->mutex_[plane][r - 1]; |
| 520 | pthread_mutex_lock(mutex); |
| 521 | |
| 522 | while (c > loop_res_sync->cur_sb_col[plane][r - 1] - nsync) { |
| 523 | pthread_cond_wait(&loop_res_sync->cond_[plane][r - 1], mutex); |
| 524 | } |
| 525 | pthread_mutex_unlock(mutex); |
| 526 | } |
| 527 | #else |
| 528 | (void)lr_sync; |
| 529 | (void)r; |
| 530 | (void)c; |
| 531 | (void)plane; |
| 532 | #endif // CONFIG_MULTITHREAD |
| 533 | } |
| 534 | |
| 535 | static INLINE void lr_sync_write(void *const lr_sync, int r, int c, |
| 536 | const int sb_cols, int plane) { |
| 537 | #if CONFIG_MULTITHREAD |
| 538 | AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync; |
| 539 | const int nsync = loop_res_sync->sync_range; |
| 540 | int cur; |
| 541 | // Only signal when there are enough filtered SB for next row to run. |
| 542 | int sig = 1; |
| 543 | |
| 544 | if (c < sb_cols - 1) { |
| 545 | cur = c; |
| 546 | if (c % nsync) sig = 0; |
| 547 | } else { |
| 548 | cur = sb_cols + nsync; |
| 549 | } |
| 550 | |
| 551 | if (sig) { |
| 552 | pthread_mutex_lock(&loop_res_sync->mutex_[plane][r]); |
| 553 | |
| 554 | loop_res_sync->cur_sb_col[plane][r] = cur; |
| 555 | |
| 556 | pthread_cond_broadcast(&loop_res_sync->cond_[plane][r]); |
| 557 | pthread_mutex_unlock(&loop_res_sync->mutex_[plane][r]); |
| 558 | } |
| 559 | #else |
| 560 | (void)lr_sync; |
| 561 | (void)r; |
| 562 | (void)c; |
| 563 | (void)sb_cols; |
| 564 | (void)plane; |
| 565 | #endif // CONFIG_MULTITHREAD |
| 566 | } |
| 567 | |
| 568 | // Allocate memory for loop restoration row synchronization |
Nithya V S | 709ccb3 | 2021-08-18 11:05:46 +0530 | [diff] [blame] | 569 | void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm, |
| 570 | int num_workers, int num_rows_lr, |
| 571 | int num_planes, int width) { |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 572 | lr_sync->rows = num_rows_lr; |
| 573 | lr_sync->num_planes = num_planes; |
| 574 | #if CONFIG_MULTITHREAD |
| 575 | { |
| 576 | int i, j; |
| 577 | |
| 578 | for (j = 0; j < num_planes; j++) { |
| 579 | CHECK_MEM_ERROR(cm, lr_sync->mutex_[j], |
| 580 | aom_malloc(sizeof(*(lr_sync->mutex_[j])) * num_rows_lr)); |
| 581 | if (lr_sync->mutex_[j]) { |
| 582 | for (i = 0; i < num_rows_lr; ++i) { |
| 583 | pthread_mutex_init(&lr_sync->mutex_[j][i], NULL); |
| 584 | } |
| 585 | } |
| 586 | |
| 587 | CHECK_MEM_ERROR(cm, lr_sync->cond_[j], |
| 588 | aom_malloc(sizeof(*(lr_sync->cond_[j])) * num_rows_lr)); |
| 589 | if (lr_sync->cond_[j]) { |
| 590 | for (i = 0; i < num_rows_lr; ++i) { |
| 591 | pthread_cond_init(&lr_sync->cond_[j][i], NULL); |
| 592 | } |
| 593 | } |
| 594 | } |
| 595 | |
| 596 | CHECK_MEM_ERROR(cm, lr_sync->job_mutex, |
| 597 | aom_malloc(sizeof(*(lr_sync->job_mutex)))); |
| 598 | if (lr_sync->job_mutex) { |
| 599 | pthread_mutex_init(lr_sync->job_mutex, NULL); |
| 600 | } |
| 601 | } |
| 602 | #endif // CONFIG_MULTITHREAD |
| 603 | CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata, |
| 604 | aom_malloc(num_workers * sizeof(*(lr_sync->lrworkerdata)))); |
| 605 | |
| 606 | for (int worker_idx = 0; worker_idx < num_workers; ++worker_idx) { |
| 607 | if (worker_idx < num_workers - 1) { |
| 608 | CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata[worker_idx].rst_tmpbuf, |
| 609 | (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE)); |
| 610 | CHECK_MEM_ERROR(cm, lr_sync->lrworkerdata[worker_idx].rlbs, |
| 611 | aom_malloc(sizeof(RestorationLineBuffers))); |
| 612 | |
| 613 | } else { |
| 614 | lr_sync->lrworkerdata[worker_idx].rst_tmpbuf = cm->rst_tmpbuf; |
| 615 | lr_sync->lrworkerdata[worker_idx].rlbs = cm->rlbs; |
| 616 | } |
| 617 | } |
| 618 | |
| 619 | lr_sync->num_workers = num_workers; |
Mudassir Galaganath | 7bdc776 | 2023-10-04 19:12:11 +0530 | [diff] [blame] | 620 | lr_sync->lr_mt_exit = false; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 621 | |
| 622 | for (int j = 0; j < num_planes; j++) { |
| 623 | CHECK_MEM_ERROR( |
| 624 | cm, lr_sync->cur_sb_col[j], |
| 625 | aom_malloc(sizeof(*(lr_sync->cur_sb_col[j])) * num_rows_lr)); |
| 626 | } |
| 627 | CHECK_MEM_ERROR( |
| 628 | cm, lr_sync->job_queue, |
| 629 | aom_malloc(sizeof(*(lr_sync->job_queue)) * num_rows_lr * num_planes)); |
| 630 | // Set up nsync. |
| 631 | lr_sync->sync_range = get_lr_sync_range(width); |
| 632 | } |
| 633 | |
| 634 | // Deallocate loop restoration synchronization related mutex and data |
Cheng Chen | 28b4f28 | 2023-10-04 22:34:33 -0700 | [diff] [blame] | 635 | void av1_loop_restoration_dealloc(AV1LrSync *lr_sync) { |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 636 | if (lr_sync != NULL) { |
| 637 | int j; |
| 638 | #if CONFIG_MULTITHREAD |
| 639 | int i; |
| 640 | for (j = 0; j < MAX_MB_PLANE; j++) { |
| 641 | if (lr_sync->mutex_[j] != NULL) { |
| 642 | for (i = 0; i < lr_sync->rows; ++i) { |
| 643 | pthread_mutex_destroy(&lr_sync->mutex_[j][i]); |
| 644 | } |
| 645 | aom_free(lr_sync->mutex_[j]); |
| 646 | } |
| 647 | if (lr_sync->cond_[j] != NULL) { |
| 648 | for (i = 0; i < lr_sync->rows; ++i) { |
| 649 | pthread_cond_destroy(&lr_sync->cond_[j][i]); |
| 650 | } |
| 651 | aom_free(lr_sync->cond_[j]); |
| 652 | } |
| 653 | } |
| 654 | if (lr_sync->job_mutex != NULL) { |
| 655 | pthread_mutex_destroy(lr_sync->job_mutex); |
| 656 | aom_free(lr_sync->job_mutex); |
| 657 | } |
| 658 | #endif // CONFIG_MULTITHREAD |
| 659 | for (j = 0; j < MAX_MB_PLANE; j++) { |
| 660 | aom_free(lr_sync->cur_sb_col[j]); |
| 661 | } |
| 662 | |
| 663 | aom_free(lr_sync->job_queue); |
| 664 | |
| 665 | if (lr_sync->lrworkerdata) { |
Cheng Chen | 28b4f28 | 2023-10-04 22:34:33 -0700 | [diff] [blame] | 666 | for (int worker_idx = 0; worker_idx < lr_sync->num_workers - 1; |
| 667 | worker_idx++) { |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 668 | LRWorkerData *const workerdata_data = |
| 669 | lr_sync->lrworkerdata + worker_idx; |
| 670 | |
| 671 | aom_free(workerdata_data->rst_tmpbuf); |
| 672 | aom_free(workerdata_data->rlbs); |
| 673 | } |
| 674 | aom_free(lr_sync->lrworkerdata); |
| 675 | } |
| 676 | |
| 677 | // clear the structure as the source of this call may be a resize in which |
| 678 | // case this call will be followed by an _alloc() which may fail. |
| 679 | av1_zero(*lr_sync); |
| 680 | } |
| 681 | } |
| 682 | |
| 683 | static void enqueue_lr_jobs(AV1LrSync *lr_sync, AV1LrStruct *lr_ctxt, |
| 684 | AV1_COMMON *cm) { |
| 685 | FilterFrameCtxt *ctxt = lr_ctxt->ctxt; |
| 686 | |
| 687 | const int num_planes = av1_num_planes(cm); |
| 688 | AV1LrMTInfo *lr_job_queue = lr_sync->job_queue; |
Ravi Chaudhary | 686f02c | 2018-06-07 12:39:44 +0530 | [diff] [blame] | 689 | int32_t lr_job_counter[2], num_even_lr_jobs = 0; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 690 | lr_sync->jobs_enqueued = 0; |
| 691 | lr_sync->jobs_dequeued = 0; |
| 692 | |
| 693 | for (int plane = 0; plane < num_planes; plane++) { |
| 694 | if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; |
Ravi Chaudhary | 686f02c | 2018-06-07 12:39:44 +0530 | [diff] [blame] | 695 | num_even_lr_jobs = |
Rachel Barker | 0483cbe | 2023-07-05 23:31:38 +0000 | [diff] [blame] | 696 | num_even_lr_jobs + ((ctxt[plane].rsi->vert_units + 1) >> 1); |
Ravi Chaudhary | 686f02c | 2018-06-07 12:39:44 +0530 | [diff] [blame] | 697 | } |
| 698 | lr_job_counter[0] = 0; |
| 699 | lr_job_counter[1] = num_even_lr_jobs; |
| 700 | |
| 701 | for (int plane = 0; plane < num_planes; plane++) { |
| 702 | if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 703 | const int is_uv = plane > 0; |
Tarundeep Singh | 4243e62 | 2021-04-20 16:10:22 +0530 | [diff] [blame] | 704 | const int ss_y = is_uv && cm->seq_params->subsampling_y; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 705 | const int unit_size = ctxt[plane].rsi->restoration_unit_size; |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 706 | const int plane_h = ctxt[plane].plane_h; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 707 | const int ext_size = unit_size * 3 / 2; |
| 708 | |
| 709 | int y0 = 0, i = 0; |
Rachel Barker | 0483cbe | 2023-07-05 23:31:38 +0000 | [diff] [blame] | 710 | while (y0 < plane_h) { |
| 711 | int remaining_h = plane_h - y0; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 712 | int h = (remaining_h < ext_size) ? remaining_h : unit_size; |
| 713 | |
| 714 | RestorationTileLimits limits; |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 715 | limits.v_start = y0; |
| 716 | limits.v_end = y0 + h; |
| 717 | assert(limits.v_end <= plane_h); |
Rachel Barker | 0483cbe | 2023-07-05 23:31:38 +0000 | [diff] [blame] | 718 | // Offset upwards to align with the restoration processing stripe |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 719 | const int voffset = RESTORATION_UNIT_OFFSET >> ss_y; |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 720 | limits.v_start = AOMMAX(0, limits.v_start - voffset); |
| 721 | if (limits.v_end < plane_h) limits.v_end -= voffset; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 722 | |
Ravi Chaudhary | 686f02c | 2018-06-07 12:39:44 +0530 | [diff] [blame] | 723 | assert(lr_job_counter[0] <= num_even_lr_jobs); |
| 724 | |
| 725 | lr_job_queue[lr_job_counter[i & 1]].lr_unit_row = i; |
| 726 | lr_job_queue[lr_job_counter[i & 1]].plane = plane; |
| 727 | lr_job_queue[lr_job_counter[i & 1]].v_start = limits.v_start; |
| 728 | lr_job_queue[lr_job_counter[i & 1]].v_end = limits.v_end; |
| 729 | lr_job_queue[lr_job_counter[i & 1]].sync_mode = i & 1; |
| 730 | if ((i & 1) == 0) { |
| 731 | lr_job_queue[lr_job_counter[i & 1]].v_copy_start = |
| 732 | limits.v_start + RESTORATION_BORDER; |
| 733 | lr_job_queue[lr_job_counter[i & 1]].v_copy_end = |
| 734 | limits.v_end - RESTORATION_BORDER; |
| 735 | if (i == 0) { |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 736 | assert(limits.v_start == 0); |
| 737 | lr_job_queue[lr_job_counter[i & 1]].v_copy_start = 0; |
Ravi Chaudhary | 686f02c | 2018-06-07 12:39:44 +0530 | [diff] [blame] | 738 | } |
Rachel Barker | 0483cbe | 2023-07-05 23:31:38 +0000 | [diff] [blame] | 739 | if (i == (ctxt[plane].rsi->vert_units - 1)) { |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 740 | assert(limits.v_end == plane_h); |
| 741 | lr_job_queue[lr_job_counter[i & 1]].v_copy_end = plane_h; |
Ravi Chaudhary | 686f02c | 2018-06-07 12:39:44 +0530 | [diff] [blame] | 742 | } |
| 743 | } else { |
| 744 | lr_job_queue[lr_job_counter[i & 1]].v_copy_start = |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 745 | AOMMAX(limits.v_start - RESTORATION_BORDER, 0); |
Ravi Chaudhary | 686f02c | 2018-06-07 12:39:44 +0530 | [diff] [blame] | 746 | lr_job_queue[lr_job_counter[i & 1]].v_copy_end = |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 747 | AOMMIN(limits.v_end + RESTORATION_BORDER, plane_h); |
Ravi Chaudhary | 686f02c | 2018-06-07 12:39:44 +0530 | [diff] [blame] | 748 | } |
| 749 | lr_job_counter[i & 1]++; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 750 | lr_sync->jobs_enqueued++; |
| 751 | |
| 752 | y0 += h; |
| 753 | ++i; |
| 754 | } |
| 755 | } |
| 756 | } |
| 757 | |
Sarah Parker | 427e3b1 | 2018-10-12 12:28:44 -0700 | [diff] [blame] | 758 | static AV1LrMTInfo *get_lr_job_info(AV1LrSync *lr_sync) { |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 759 | AV1LrMTInfo *cur_job_info = NULL; |
| 760 | |
| 761 | #if CONFIG_MULTITHREAD |
| 762 | pthread_mutex_lock(lr_sync->job_mutex); |
| 763 | |
Mudassir Galaganath | 7bdc776 | 2023-10-04 19:12:11 +0530 | [diff] [blame] | 764 | if (!lr_sync->lr_mt_exit && lr_sync->jobs_dequeued < lr_sync->jobs_enqueued) { |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 765 | cur_job_info = lr_sync->job_queue + lr_sync->jobs_dequeued; |
| 766 | lr_sync->jobs_dequeued++; |
| 767 | } |
| 768 | |
| 769 | pthread_mutex_unlock(lr_sync->job_mutex); |
| 770 | #else |
| 771 | (void)lr_sync; |
| 772 | #endif |
| 773 | |
| 774 | return cur_job_info; |
| 775 | } |
| 776 | |
Mudassir Galaganath | 7bdc776 | 2023-10-04 19:12:11 +0530 | [diff] [blame] | 777 | static void set_loop_restoration_done(AV1LrSync *const lr_sync, |
| 778 | FilterFrameCtxt *const ctxt) { |
| 779 | for (int plane = 0; plane < MAX_MB_PLANE; ++plane) { |
| 780 | if (ctxt[plane].rsi->frame_restoration_type == RESTORE_NONE) continue; |
| 781 | int y0 = 0, row_number = 0; |
| 782 | const int unit_size = ctxt[plane].rsi->restoration_unit_size; |
| 783 | const int plane_h = ctxt[plane].plane_h; |
| 784 | const int ext_size = unit_size * 3 / 2; |
| 785 | const int hnum_rest_units = ctxt[plane].rsi->horz_units; |
| 786 | while (y0 < plane_h) { |
| 787 | const int remaining_h = plane_h - y0; |
| 788 | const int h = (remaining_h < ext_size) ? remaining_h : unit_size; |
| 789 | lr_sync_write(lr_sync, row_number, hnum_rest_units - 1, hnum_rest_units, |
| 790 | plane); |
| 791 | y0 += h; |
| 792 | ++row_number; |
| 793 | } |
| 794 | } |
| 795 | } |
| 796 | |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 797 | // Implement row loop restoration for each thread. |
Wan-Teh Chang | 8d2f577 | 2018-09-12 15:44:59 -0700 | [diff] [blame] | 798 | static int loop_restoration_row_worker(void *arg1, void *arg2) { |
| 799 | AV1LrSync *const lr_sync = (AV1LrSync *)arg1; |
| 800 | LRWorkerData *lrworkerdata = (LRWorkerData *)arg2; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 801 | AV1LrStruct *lr_ctxt = (AV1LrStruct *)lrworkerdata->lr_ctxt; |
| 802 | FilterFrameCtxt *ctxt = lr_ctxt->ctxt; |
| 803 | int lr_unit_row; |
| 804 | int plane; |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 805 | int plane_w; |
Mudassir Galaganath | 7bdc776 | 2023-10-04 19:12:11 +0530 | [diff] [blame] | 806 | #if CONFIG_MULTITHREAD |
| 807 | pthread_mutex_t *job_mutex_ = lr_sync->job_mutex; |
| 808 | #endif |
| 809 | struct aom_internal_error_info *const error_info = &lrworkerdata->error_info; |
| 810 | |
| 811 | // The jmp_buf is valid only for the duration of the function that calls |
| 812 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 813 | // before it returns. |
| 814 | if (setjmp(error_info->jmp)) { |
| 815 | error_info->setjmp = 0; |
| 816 | #if CONFIG_MULTITHREAD |
| 817 | pthread_mutex_lock(job_mutex_); |
| 818 | lr_sync->lr_mt_exit = true; |
| 819 | pthread_mutex_unlock(job_mutex_); |
| 820 | #endif |
| 821 | // In case of loop restoration multithreading, the worker on an even lr |
| 822 | // block row waits for the completion of the filtering of the top-right and |
| 823 | // bottom-right blocks. Hence, in case a thread (main/worker) encounters an |
| 824 | // error, update that filtering of every row in the frame is complete in |
| 825 | // order to avoid the dependent workers from waiting indefinitely. |
| 826 | set_loop_restoration_done(lr_sync, lr_ctxt->ctxt); |
| 827 | return 0; |
| 828 | } |
| 829 | error_info->setjmp = 1; |
| 830 | |
Ravi Chaudhary | a9ee6e2 | 2018-06-06 14:11:13 +0530 | [diff] [blame] | 831 | typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc, |
| 832 | YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend, |
| 833 | int vstart, int vend); |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 834 | static const copy_fun copy_funs[MAX_MB_PLANE] = { |
| 835 | aom_yv12_partial_coloc_copy_y, aom_yv12_partial_coloc_copy_u, |
| 836 | aom_yv12_partial_coloc_copy_v |
| 837 | }; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 838 | |
| 839 | while (1) { |
| 840 | AV1LrMTInfo *cur_job_info = get_lr_job_info(lr_sync); |
| 841 | if (cur_job_info != NULL) { |
| 842 | RestorationTileLimits limits; |
Ravi Chaudhary | 686f02c | 2018-06-07 12:39:44 +0530 | [diff] [blame] | 843 | sync_read_fn_t on_sync_read; |
| 844 | sync_write_fn_t on_sync_write; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 845 | limits.v_start = cur_job_info->v_start; |
| 846 | limits.v_end = cur_job_info->v_end; |
| 847 | lr_unit_row = cur_job_info->lr_unit_row; |
| 848 | plane = cur_job_info->plane; |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 849 | plane_w = ctxt[plane].plane_w; |
Ravi Chaudhary | 686f02c | 2018-06-07 12:39:44 +0530 | [diff] [blame] | 850 | |
| 851 | // sync_mode == 1 implies only sync read is required in LR Multi-threading |
| 852 | // sync_mode == 0 implies only sync write is required. |
| 853 | on_sync_read = |
| 854 | cur_job_info->sync_mode == 1 ? lr_sync_read : av1_lr_sync_read_dummy; |
| 855 | on_sync_write = cur_job_info->sync_mode == 0 ? lr_sync_write |
| 856 | : av1_lr_sync_write_dummy; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 857 | |
| 858 | av1_foreach_rest_unit_in_row( |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 859 | &limits, plane_w, lr_ctxt->on_rest_unit, lr_unit_row, |
| 860 | ctxt[plane].rsi->restoration_unit_size, ctxt[plane].rsi->horz_units, |
| 861 | ctxt[plane].rsi->vert_units, plane, &ctxt[plane], |
| 862 | lrworkerdata->rst_tmpbuf, lrworkerdata->rlbs, on_sync_read, |
Mudassir Galaganath | 7bdc776 | 2023-10-04 19:12:11 +0530 | [diff] [blame] | 863 | on_sync_write, lr_sync, error_info); |
Ravi Chaudhary | a9ee6e2 | 2018-06-06 14:11:13 +0530 | [diff] [blame] | 864 | |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 865 | copy_funs[plane](lr_ctxt->dst, lr_ctxt->frame, 0, plane_w, |
| 866 | cur_job_info->v_copy_start, cur_job_info->v_copy_end); |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 867 | |
| 868 | if (lrworkerdata->do_extend_border) { |
| 869 | aom_extend_frame_borders_plane_row(lr_ctxt->frame, plane, |
| 870 | cur_job_info->v_copy_start, |
| 871 | cur_job_info->v_copy_end); |
| 872 | } |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 873 | } else { |
| 874 | break; |
| 875 | } |
| 876 | } |
Mudassir Galaganath | 7bdc776 | 2023-10-04 19:12:11 +0530 | [diff] [blame] | 877 | error_info->setjmp = 0; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 878 | return 1; |
| 879 | } |
| 880 | |
Mudassir Galaganath | 7bdc776 | 2023-10-04 19:12:11 +0530 | [diff] [blame] | 881 | static AOM_INLINE void sync_lr_workers(AVxWorker *const workers, |
| 882 | AV1_COMMON *const cm, int num_workers) { |
| 883 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
| 884 | int had_error = workers[0].had_error; |
| 885 | struct aom_internal_error_info error_info; |
| 886 | |
| 887 | // Read the error_info of main thread. |
| 888 | if (had_error) { |
| 889 | AVxWorker *const worker = &workers[0]; |
| 890 | error_info = ((LRWorkerData *)worker->data2)->error_info; |
| 891 | } |
| 892 | |
| 893 | // Wait till all rows are finished. |
| 894 | for (int i = num_workers - 1; i > 0; --i) { |
| 895 | AVxWorker *const worker = &workers[i]; |
| 896 | if (!winterface->sync(worker)) { |
| 897 | had_error = 1; |
| 898 | error_info = ((LRWorkerData *)worker->data2)->error_info; |
| 899 | } |
| 900 | } |
| 901 | if (had_error) |
| 902 | aom_internal_error(cm->error, error_info.error_code, "%s", |
| 903 | error_info.detail); |
| 904 | } |
| 905 | |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 906 | static void foreach_rest_unit_in_planes_mt(AV1LrStruct *lr_ctxt, |
Wan-Teh Chang | 7f5f6b9 | 2023-11-02 11:12:58 -0700 | [diff] [blame] | 907 | AVxWorker *workers, int num_workers, |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 908 | AV1LrSync *lr_sync, AV1_COMMON *cm, |
| 909 | int do_extend_border) { |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 910 | FilterFrameCtxt *ctxt = lr_ctxt->ctxt; |
| 911 | |
| 912 | const int num_planes = av1_num_planes(cm); |
| 913 | |
| 914 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
| 915 | int num_rows_lr = 0; |
| 916 | |
| 917 | for (int plane = 0; plane < num_planes; plane++) { |
Ravi Chaudhary | 3201774 | 2018-09-14 14:04:31 +0530 | [diff] [blame] | 918 | if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; |
| 919 | |
Rachel Barker | ed54a86 | 2023-07-07 19:04:55 +0000 | [diff] [blame] | 920 | const int plane_h = ctxt[plane].plane_h; |
Ravi Chaudhary | 3201774 | 2018-09-14 14:04:31 +0530 | [diff] [blame] | 921 | const int unit_size = cm->rst_info[plane].restoration_unit_size; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 922 | |
Rachel Barker | 0483cbe | 2023-07-05 23:31:38 +0000 | [diff] [blame] | 923 | num_rows_lr = AOMMAX(num_rows_lr, av1_lr_count_units(unit_size, plane_h)); |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 924 | } |
| 925 | |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 926 | int i; |
| 927 | assert(MAX_MB_PLANE == 3); |
| 928 | |
Tarundeep Singh | 6238e8a | 2021-08-25 23:51:49 +0530 | [diff] [blame] | 929 | if (!lr_sync->sync_range || num_rows_lr > lr_sync->rows || |
| 930 | num_workers > lr_sync->num_workers || num_planes > lr_sync->num_planes) { |
Cheng Chen | 28b4f28 | 2023-10-04 22:34:33 -0700 | [diff] [blame] | 931 | av1_loop_restoration_dealloc(lr_sync); |
Nithya V S | 709ccb3 | 2021-08-18 11:05:46 +0530 | [diff] [blame] | 932 | av1_loop_restoration_alloc(lr_sync, cm, num_workers, num_rows_lr, |
| 933 | num_planes, cm->width); |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 934 | } |
| 935 | |
| 936 | // Initialize cur_sb_col to -1 for all SB rows. |
| 937 | for (i = 0; i < num_planes; i++) { |
| 938 | memset(lr_sync->cur_sb_col[i], -1, |
| 939 | sizeof(*(lr_sync->cur_sb_col[i])) * num_rows_lr); |
| 940 | } |
| 941 | |
| 942 | enqueue_lr_jobs(lr_sync, lr_ctxt, cm); |
| 943 | |
| 944 | // Set up looprestoration thread data. |
Deepa K G | 5786774 | 2021-01-19 12:52:41 -0800 | [diff] [blame] | 945 | for (i = num_workers - 1; i >= 0; --i) { |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 946 | AVxWorker *const worker = &workers[i]; |
| 947 | lr_sync->lrworkerdata[i].lr_ctxt = (void *)lr_ctxt; |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 948 | lr_sync->lrworkerdata[i].do_extend_border = do_extend_border; |
Wan-Teh Chang | 8d2f577 | 2018-09-12 15:44:59 -0700 | [diff] [blame] | 949 | worker->hook = loop_restoration_row_worker; |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 950 | worker->data1 = lr_sync; |
| 951 | worker->data2 = &lr_sync->lrworkerdata[i]; |
| 952 | |
Deepa K G | 5786774 | 2021-01-19 12:52:41 -0800 | [diff] [blame] | 953 | // Start loop restoration |
Mudassir Galaganath | 7bdc776 | 2023-10-04 19:12:11 +0530 | [diff] [blame] | 954 | worker->had_error = 0; |
Deepa K G | 5786774 | 2021-01-19 12:52:41 -0800 | [diff] [blame] | 955 | if (i == 0) { |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 956 | winterface->execute(worker); |
| 957 | } else { |
| 958 | winterface->launch(worker); |
| 959 | } |
| 960 | } |
| 961 | |
Mudassir Galaganath | 7bdc776 | 2023-10-04 19:12:11 +0530 | [diff] [blame] | 962 | sync_lr_workers(workers, cm, num_workers); |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 963 | } |
| 964 | |
| 965 | void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame, |
| 966 | AV1_COMMON *cm, int optimized_lr, |
| 967 | AVxWorker *workers, int num_workers, |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 968 | AV1LrSync *lr_sync, void *lr_ctxt, |
| 969 | int do_extend_border) { |
Urvang Joshi | b6409e9 | 2020-03-23 11:23:27 -0700 | [diff] [blame] | 970 | assert(!cm->features.all_lossless); |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 971 | |
| 972 | const int num_planes = av1_num_planes(cm); |
| 973 | |
| 974 | AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt; |
| 975 | |
| 976 | av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm, |
| 977 | optimized_lr, num_planes); |
| 978 | |
| 979 | foreach_rest_unit_in_planes_mt(loop_rest_ctxt, workers, num_workers, lr_sync, |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 980 | cm, do_extend_border); |
Ravi Chaudhary | e2aa401 | 2018-06-04 14:20:00 +0530 | [diff] [blame] | 981 | } |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 982 | |
| 983 | // Initializes cdef_sync parameters. |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 984 | static AOM_INLINE void reset_cdef_job_info(AV1CdefSync *const cdef_sync) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 985 | cdef_sync->end_of_frame = 0; |
| 986 | cdef_sync->fbr = 0; |
| 987 | cdef_sync->fbc = 0; |
| 988 | } |
| 989 | |
| 990 | static AOM_INLINE void launch_cdef_workers(AVxWorker *const workers, |
| 991 | int num_workers) { |
| 992 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
| 993 | for (int i = num_workers - 1; i >= 0; i--) { |
| 994 | AVxWorker *const worker = &workers[i]; |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 995 | worker->had_error = 0; |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 996 | if (i == 0) |
| 997 | winterface->execute(worker); |
| 998 | else |
| 999 | winterface->launch(worker); |
| 1000 | } |
| 1001 | } |
| 1002 | |
| 1003 | static AOM_INLINE void sync_cdef_workers(AVxWorker *const workers, |
| 1004 | AV1_COMMON *const cm, |
| 1005 | int num_workers) { |
| 1006 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 1007 | int had_error = workers[0].had_error; |
| 1008 | struct aom_internal_error_info error_info; |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1009 | |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 1010 | // Read the error_info of main thread. |
| 1011 | if (had_error) { |
| 1012 | AVxWorker *const worker = &workers[0]; |
| 1013 | error_info = ((AV1CdefWorkerData *)worker->data2)->error_info; |
| 1014 | } |
| 1015 | |
| 1016 | // Wait till all rows are finished. |
| 1017 | for (int i = num_workers - 1; i > 0; --i) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1018 | AVxWorker *const worker = &workers[i]; |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 1019 | if (!winterface->sync(worker)) { |
| 1020 | had_error = 1; |
| 1021 | error_info = ((AV1CdefWorkerData *)worker->data2)->error_info; |
| 1022 | } |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1023 | } |
| 1024 | if (had_error) |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 1025 | aom_internal_error(cm->error, error_info.error_code, "%s", |
| 1026 | error_info.detail); |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1027 | } |
| 1028 | |
| 1029 | // Updates the row index of the next job to be processed. |
| 1030 | // Also updates end_of_frame flag when the processing of all rows is complete. |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 1031 | static void update_cdef_row_next_job_info(AV1CdefSync *const cdef_sync, |
| 1032 | const int nvfb) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1033 | cdef_sync->fbr++; |
| 1034 | if (cdef_sync->fbr == nvfb) { |
| 1035 | cdef_sync->end_of_frame = 1; |
| 1036 | } |
| 1037 | } |
| 1038 | |
| 1039 | // Checks if a job is available. If job is available, |
| 1040 | // populates next job information and returns 1, else returns 0. |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 1041 | static AOM_INLINE int get_cdef_row_next_job(AV1CdefSync *const cdef_sync, |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 1042 | volatile int *cur_fbr, |
| 1043 | const int nvfb) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1044 | #if CONFIG_MULTITHREAD |
| 1045 | pthread_mutex_lock(cdef_sync->mutex_); |
| 1046 | #endif // CONFIG_MULTITHREAD |
| 1047 | int do_next_row = 0; |
| 1048 | // Populates information needed for current job and update the row |
| 1049 | // index of the next row to be processed. |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 1050 | if (!cdef_sync->cdef_mt_exit && cdef_sync->end_of_frame == 0) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1051 | do_next_row = 1; |
| 1052 | *cur_fbr = cdef_sync->fbr; |
| 1053 | update_cdef_row_next_job_info(cdef_sync, nvfb); |
| 1054 | } |
| 1055 | #if CONFIG_MULTITHREAD |
| 1056 | pthread_mutex_unlock(cdef_sync->mutex_); |
| 1057 | #endif // CONFIG_MULTITHREAD |
| 1058 | return do_next_row; |
| 1059 | } |
| 1060 | |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 1061 | static void set_cdef_init_fb_row_done(AV1CdefSync *const cdef_sync, int nvfb) { |
| 1062 | for (int fbr = 0; fbr < nvfb; fbr++) cdef_row_mt_sync_write(cdef_sync, fbr); |
| 1063 | } |
| 1064 | |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1065 | // Hook function for each thread in CDEF multi-threading. |
| 1066 | static int cdef_sb_row_worker_hook(void *arg1, void *arg2) { |
| 1067 | AV1CdefSync *const cdef_sync = (AV1CdefSync *)arg1; |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 1068 | AV1CdefWorkerData *const cdef_worker = (AV1CdefWorkerData *)arg2; |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 1069 | AV1_COMMON *cm = cdef_worker->cm; |
| 1070 | const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 1071 | |
| 1072 | #if CONFIG_MULTITHREAD |
| 1073 | pthread_mutex_t *job_mutex_ = cdef_sync->mutex_; |
| 1074 | #endif |
| 1075 | struct aom_internal_error_info *const error_info = &cdef_worker->error_info; |
| 1076 | |
| 1077 | // The jmp_buf is valid only for the duration of the function that calls |
| 1078 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 1079 | // before it returns. |
| 1080 | if (setjmp(error_info->jmp)) { |
| 1081 | error_info->setjmp = 0; |
| 1082 | #if CONFIG_MULTITHREAD |
| 1083 | pthread_mutex_lock(job_mutex_); |
| 1084 | cdef_sync->cdef_mt_exit = true; |
| 1085 | pthread_mutex_unlock(job_mutex_); |
| 1086 | #endif |
| 1087 | // In case of cdef row-multithreading, the worker on a filter block row |
| 1088 | // (fbr) waits for the line buffers (top and bottom) copy of the above row. |
| 1089 | // Hence, in case a thread (main/worker) encounters an error before copying |
| 1090 | // of the line buffers, update that line buffer copy is complete in order to |
| 1091 | // avoid dependent workers waiting indefinitely. |
| 1092 | set_cdef_init_fb_row_done(cdef_sync, nvfb); |
| 1093 | return 0; |
| 1094 | } |
| 1095 | error_info->setjmp = 1; |
| 1096 | |
| 1097 | volatile int cur_fbr; |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 1098 | const int num_planes = av1_num_planes(cm); |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1099 | while (get_cdef_row_next_job(cdef_sync, &cur_fbr, nvfb)) { |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 1100 | MACROBLOCKD *xd = cdef_worker->xd; |
| 1101 | av1_cdef_fb_row(cm, xd, cdef_worker->linebuf, cdef_worker->colbuf, |
| 1102 | cdef_worker->srcbuf, cur_fbr, |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 1103 | cdef_worker->cdef_init_fb_row_fn, cdef_sync, error_info); |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 1104 | if (cdef_worker->do_extend_border) { |
| 1105 | for (int plane = 0; plane < num_planes; ++plane) { |
| 1106 | const YV12_BUFFER_CONFIG *ybf = &cm->cur_frame->buf; |
| 1107 | const int is_uv = plane > 0; |
| 1108 | const int mi_high = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y; |
| 1109 | const int unit_height = MI_SIZE_64X64 << mi_high; |
| 1110 | const int v_start = cur_fbr * unit_height; |
| 1111 | const int v_end = |
| 1112 | AOMMIN(v_start + unit_height, ybf->crop_heights[is_uv]); |
| 1113 | aom_extend_frame_borders_plane_row(ybf, plane, v_start, v_end); |
| 1114 | } |
| 1115 | } |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1116 | } |
Mudassir Galaganath | 24d4b29 | 2023-09-16 15:51:13 +0530 | [diff] [blame] | 1117 | error_info->setjmp = 0; |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1118 | return 1; |
| 1119 | } |
| 1120 | |
| 1121 | // Assigns CDEF hook function and thread data to each worker. |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 1122 | static void prepare_cdef_frame_workers( |
| 1123 | AV1_COMMON *const cm, MACROBLOCKD *xd, AV1CdefWorkerData *const cdef_worker, |
| 1124 | AVxWorkerHook hook, AVxWorker *const workers, AV1CdefSync *const cdef_sync, |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 1125 | int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn, |
| 1126 | int do_extend_border) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1127 | const int num_planes = av1_num_planes(cm); |
| 1128 | |
| 1129 | cdef_worker[0].srcbuf = cm->cdef_info.srcbuf; |
| 1130 | for (int plane = 0; plane < num_planes; plane++) |
| 1131 | cdef_worker[0].colbuf[plane] = cm->cdef_info.colbuf[plane]; |
| 1132 | for (int i = num_workers - 1; i >= 0; i--) { |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 1133 | AVxWorker *const worker = &workers[i]; |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1134 | cdef_worker[i].cm = cm; |
| 1135 | cdef_worker[i].xd = xd; |
| 1136 | cdef_worker[i].cdef_init_fb_row_fn = cdef_init_fb_row_fn; |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 1137 | cdef_worker[i].do_extend_border = do_extend_border; |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1138 | for (int plane = 0; plane < num_planes; plane++) |
| 1139 | cdef_worker[i].linebuf[plane] = cm->cdef_info.linebuf[plane]; |
| 1140 | |
| 1141 | worker->hook = hook; |
| 1142 | worker->data1 = cdef_sync; |
| 1143 | worker->data2 = &cdef_worker[i]; |
| 1144 | } |
| 1145 | } |
| 1146 | |
| 1147 | // Initializes row-level parameters for CDEF frame. |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 1148 | void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm, |
| 1149 | const MACROBLOCKD *const xd, |
| 1150 | CdefBlockInfo *const fb_info, |
| 1151 | uint16_t **const linebuf, uint16_t *const src, |
| 1152 | struct AV1CdefSyncData *const cdef_sync, int fbr) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1153 | const int num_planes = av1_num_planes(cm); |
| 1154 | const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; |
| 1155 | const int luma_stride = |
| 1156 | ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols << MI_SIZE_LOG2, 4); |
| 1157 | |
| 1158 | // for the current filter block, it's top left corner mi structure (mi_tl) |
| 1159 | // is first accessed to check whether the top and left boundaries are |
| 1160 | // frame boundaries. Then bottom-left and top-right mi structures are |
| 1161 | // accessed to check whether the bottom and right boundaries |
| 1162 | // (respectively) are frame boundaries. |
| 1163 | // |
| 1164 | // Note that we can't just check the bottom-right mi structure - eg. if |
| 1165 | // we're at the right-hand edge of the frame but not the bottom, then |
| 1166 | // the bottom-right mi is NULL but the bottom-left is not. |
| 1167 | fb_info->frame_boundary[TOP] = (MI_SIZE_64X64 * fbr == 0) ? 1 : 0; |
| 1168 | if (fbr != nvfb - 1) |
| 1169 | fb_info->frame_boundary[BOTTOM] = |
| 1170 | (MI_SIZE_64X64 * (fbr + 1) == cm->mi_params.mi_rows) ? 1 : 0; |
| 1171 | else |
| 1172 | fb_info->frame_boundary[BOTTOM] = 1; |
| 1173 | |
| 1174 | fb_info->src = src; |
| 1175 | fb_info->damping = cm->cdef_info.cdef_damping; |
Tarundeep Singh | 4243e62 | 2021-04-20 16:10:22 +0530 | [diff] [blame] | 1176 | fb_info->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0); |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1177 | av1_zero(fb_info->dir); |
| 1178 | av1_zero(fb_info->var); |
| 1179 | |
| 1180 | for (int plane = 0; plane < num_planes; plane++) { |
| 1181 | const int stride = luma_stride >> xd->plane[plane].subsampling_x; |
| 1182 | uint16_t *top_linebuf = &linebuf[plane][0]; |
| 1183 | uint16_t *bot_linebuf = &linebuf[plane][nvfb * CDEF_VBORDER * stride]; |
| 1184 | { |
| 1185 | const int mi_high_l2 = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y; |
| 1186 | const int top_offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2; |
| 1187 | const int bot_offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2; |
| 1188 | |
| 1189 | if (fbr != nvfb - 1) // if (fbr != 0) // top line buffer copy |
| 1190 | av1_cdef_copy_sb8_16( |
| 1191 | cm, &top_linebuf[(fbr + 1) * CDEF_VBORDER * stride], stride, |
| 1192 | xd->plane[plane].dst.buf, top_offset - CDEF_VBORDER, 0, |
| 1193 | xd->plane[plane].dst.stride, CDEF_VBORDER, stride); |
| 1194 | if (fbr != nvfb - 1) // bottom line buffer copy |
| 1195 | av1_cdef_copy_sb8_16(cm, &bot_linebuf[fbr * CDEF_VBORDER * stride], |
| 1196 | stride, xd->plane[plane].dst.buf, bot_offset, 0, |
| 1197 | xd->plane[plane].dst.stride, CDEF_VBORDER, stride); |
| 1198 | } |
| 1199 | |
| 1200 | fb_info->top_linebuf[plane] = &linebuf[plane][fbr * CDEF_VBORDER * stride]; |
| 1201 | fb_info->bot_linebuf[plane] = |
| 1202 | &linebuf[plane] |
| 1203 | [nvfb * CDEF_VBORDER * stride + (fbr * CDEF_VBORDER * stride)]; |
| 1204 | } |
| 1205 | |
| 1206 | cdef_row_mt_sync_write(cdef_sync, fbr); |
| 1207 | cdef_row_mt_sync_read(cdef_sync, fbr); |
| 1208 | } |
| 1209 | |
| 1210 | // Implements multi-threading for CDEF. |
| 1211 | // Perform CDEF on input frame. |
| 1212 | // Inputs: |
| 1213 | // frame: Pointer to input frame buffer. |
| 1214 | // cm: Pointer to common structure. |
| 1215 | // xd: Pointer to common current coding block structure. |
| 1216 | // Returns: |
| 1217 | // Nothing will be returned. |
Vishnu Teja Manyam | 8c40b02 | 2021-04-25 14:36:09 +0530 | [diff] [blame] | 1218 | void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd, |
| 1219 | AV1CdefWorkerData *const cdef_worker, |
| 1220 | AVxWorker *const workers, AV1CdefSync *const cdef_sync, |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 1221 | int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn, |
| 1222 | int do_extend_border) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1223 | YV12_BUFFER_CONFIG *frame = &cm->cur_frame->buf; |
| 1224 | const int num_planes = av1_num_planes(cm); |
| 1225 | |
Tarundeep Singh | 4243e62 | 2021-04-20 16:10:22 +0530 | [diff] [blame] | 1226 | av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0, |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1227 | num_planes); |
| 1228 | |
| 1229 | reset_cdef_job_info(cdef_sync); |
| 1230 | prepare_cdef_frame_workers(cm, xd, cdef_worker, cdef_sb_row_worker_hook, |
| 1231 | workers, cdef_sync, num_workers, |
Deepa K G | aeee77c | 2022-05-28 14:00:56 +0530 | [diff] [blame] | 1232 | cdef_init_fb_row_fn, do_extend_border); |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 1233 | launch_cdef_workers(workers, num_workers); |
| 1234 | sync_cdef_workers(workers, cm, num_workers); |
| 1235 | } |
Jayasanker J | 34ec50a | 2022-07-01 19:00:02 +0530 | [diff] [blame] | 1236 | |
| 1237 | int av1_get_intrabc_extra_top_right_sb_delay(const AV1_COMMON *cm) { |
| 1238 | // No additional top-right delay when intraBC tool is not enabled. |
| 1239 | if (!av1_allow_intrabc(cm)) return 0; |
| 1240 | // Due to the hardware constraints on processing the intraBC tool with row |
| 1241 | // multithreading, a top-right delay of 3 superblocks of size 128x128 or 5 |
| 1242 | // superblocks of size 64x64 is mandated. However, a minimum top-right delay |
| 1243 | // of 1 superblock is assured with 'sync_range'. Hence return only the |
| 1244 | // additional superblock delay when the intraBC tool is enabled. |
| 1245 | return cm->seq_params->sb_size == BLOCK_128X128 ? 2 : 4; |
| 1246 | } |