Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | /* |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 3 | * |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 10 | */ |
| 11 | |
Wan-Teh Chang | 245d30e | 2023-11-02 11:01:27 -0700 | [diff] [blame] | 12 | #include <assert.h> |
| 13 | |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 14 | #include "av1/common/warped_motion.h" |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 15 | #include "av1/common/thread_common.h" |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 16 | |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 17 | #include "av1/encoder/allintra_vis.h" |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 18 | #include "av1/encoder/bitstream.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 19 | #include "av1/encoder/encodeframe.h" |
Remya Prakasan | 0badba3 | 2023-08-17 16:44:52 +0530 | [diff] [blame] | 20 | #include "av1/encoder/encodeframe_utils.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 21 | #include "av1/encoder/encoder.h" |
Satish Kumar Suman | d3caa0d | 2020-06-16 14:02:50 +0530 | [diff] [blame] | 22 | #include "av1/encoder/encoder_alloc.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 23 | #include "av1/encoder/ethread.h" |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 24 | #if !CONFIG_REALTIME_ONLY |
| 25 | #include "av1/encoder/firstpass.h" |
| 26 | #endif |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 27 | #include "av1/encoder/global_motion.h" |
Remya | efd09fa | 2020-05-07 15:05:34 +0530 | [diff] [blame] | 28 | #include "av1/encoder/global_motion_facade.h" |
Jayasanker J | ee22526 | 2021-10-09 22:17:34 +0530 | [diff] [blame] | 29 | #include "av1/encoder/intra_mode_search_utils.h" |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 30 | #include "av1/encoder/picklpf.h" |
Ravi Chaudhary | 95ba1fa | 2018-10-11 11:42:04 +0530 | [diff] [blame] | 31 | #include "av1/encoder/rdopt.h" |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 32 | #include "aom_dsp/aom_dsp_common.h" |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 33 | #include "av1/encoder/temporal_filter.h" |
Sachin Kumar Garg | 137265e | 2020-04-27 12:56:51 +0530 | [diff] [blame] | 34 | #include "av1/encoder/tpl_model.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 35 | |
Elliott Karpilovsky | 18fcd6a | 2019-09-16 15:15:06 -0700 | [diff] [blame] | 36 | static AOM_INLINE void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { |
Yaowu Xu | a0cc9aa | 2017-06-21 17:45:31 -0700 | [diff] [blame] | 37 | td->rd_counts.compound_ref_used_flag |= |
| 38 | td_t->rd_counts.compound_ref_used_flag; |
Zoe Liu | 8a5d343 | 2017-11-30 16:33:44 -0800 | [diff] [blame] | 39 | td->rd_counts.skip_mode_used_flag |= td_t->rd_counts.skip_mode_used_flag; |
Yunqing Wang | db70bf4 | 2019-08-19 09:28:11 -0700 | [diff] [blame] | 40 | |
Yunqing Wang | 7ad3aca | 2019-11-01 13:06:23 -0700 | [diff] [blame] | 41 | for (int i = 0; i < TX_SIZES_ALL; i++) { |
| 42 | for (int j = 0; j < TX_TYPES; j++) |
| 43 | td->rd_counts.tx_type_used[i][j] += td_t->rd_counts.tx_type_used[i][j]; |
Yunqing Wang | db70bf4 | 2019-08-19 09:28:11 -0700 | [diff] [blame] | 44 | } |
Yunqing Wang | 5f74dc2 | 2019-10-29 10:35:20 -0700 | [diff] [blame] | 45 | |
| 46 | for (int i = 0; i < BLOCK_SIZES_ALL; i++) { |
| 47 | for (int j = 0; j < 2; j++) { |
| 48 | td->rd_counts.obmc_used[i][j] += td_t->rd_counts.obmc_used[i][j]; |
| 49 | } |
| 50 | } |
Yunqing Wang | a09a46b | 2019-12-09 13:16:24 -0800 | [diff] [blame] | 51 | |
| 52 | for (int i = 0; i < 2; i++) { |
| 53 | td->rd_counts.warped_used[i] += td_t->rd_counts.warped_used[i]; |
| 54 | } |
Fyodor Kyslov | f5fb11f | 2021-10-29 12:12:46 -0700 | [diff] [blame] | 55 | |
Jingning Han | 564fe2c | 2022-03-08 23:35:01 -0800 | [diff] [blame] | 56 | td->rd_counts.seg_tmp_pred_cost[0] += td_t->rd_counts.seg_tmp_pred_cost[0]; |
| 57 | td->rd_counts.seg_tmp_pred_cost[1] += td_t->rd_counts.seg_tmp_pred_cost[1]; |
| 58 | |
Fyodor Kyslov | f5fb11f | 2021-10-29 12:12:46 -0700 | [diff] [blame] | 59 | td->rd_counts.newmv_or_intra_blocks += td_t->rd_counts.newmv_or_intra_blocks; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 60 | } |
| 61 | |
Elliott Karpilovsky | 18fcd6a | 2019-09-16 15:15:06 -0700 | [diff] [blame] | 62 | static AOM_INLINE void update_delta_lf_for_row_mt(AV1_COMP *cpi) { |
Ravi Chaudhary | 99e31c1 | 2018-11-09 10:43:53 +0530 | [diff] [blame] | 63 | AV1_COMMON *cm = &cpi->common; |
| 64 | MACROBLOCKD *xd = &cpi->td.mb.e_mbd; |
Tarundeep Singh | 4243e62 | 2021-04-20 16:10:22 +0530 | [diff] [blame] | 65 | const int mib_size = cm->seq_params->mib_size; |
Ravi Chaudhary | 99e31c1 | 2018-11-09 10:43:53 +0530 | [diff] [blame] | 66 | const int frame_lf_count = |
| 67 | av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; |
Urvang Joshi | 54ffae7 | 2020-03-23 13:37:10 -0700 | [diff] [blame] | 68 | for (int row = 0; row < cm->tiles.rows; row++) { |
| 69 | for (int col = 0; col < cm->tiles.cols; col++) { |
| 70 | TileDataEnc *tile_data = &cpi->tile_data[row * cm->tiles.cols + col]; |
Ravi Chaudhary | 99e31c1 | 2018-11-09 10:43:53 +0530 | [diff] [blame] | 71 | const TileInfo *const tile_info = &tile_data->tile_info; |
| 72 | for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; |
| 73 | mi_row += mib_size) { |
| 74 | if (mi_row == tile_info->mi_row_start) |
| 75 | av1_reset_loop_filter_delta(xd, av1_num_planes(cm)); |
| 76 | for (int mi_col = tile_info->mi_col_start; |
| 77 | mi_col < tile_info->mi_col_end; mi_col += mib_size) { |
Urvang Joshi | 9dc909d | 2020-03-23 16:07:02 -0700 | [diff] [blame] | 78 | const int idx_str = cm->mi_params.mi_stride * mi_row + mi_col; |
| 79 | MB_MODE_INFO **mi = cm->mi_params.mi_grid_base + idx_str; |
Ravi Chaudhary | 99e31c1 | 2018-11-09 10:43:53 +0530 | [diff] [blame] | 80 | MB_MODE_INFO *mbmi = mi[0]; |
Tarundeep Singh | 4243e62 | 2021-04-20 16:10:22 +0530 | [diff] [blame] | 81 | if (mbmi->skip_txfm == 1 && |
| 82 | (mbmi->bsize == cm->seq_params->sb_size)) { |
Ravi Chaudhary | 99e31c1 | 2018-11-09 10:43:53 +0530 | [diff] [blame] | 83 | for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) |
| 84 | mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id]; |
| 85 | mbmi->delta_lf_from_base = xd->delta_lf_from_base; |
| 86 | } else { |
David Turner | ebf96f4 | 2018-11-14 16:57:57 +0000 | [diff] [blame] | 87 | if (cm->delta_q_info.delta_lf_multi) { |
Ravi Chaudhary | 99e31c1 | 2018-11-09 10:43:53 +0530 | [diff] [blame] | 88 | for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) |
| 89 | xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id]; |
| 90 | } else { |
| 91 | xd->delta_lf_from_base = mbmi->delta_lf_from_base; |
| 92 | } |
| 93 | } |
| 94 | } |
| 95 | } |
| 96 | } |
| 97 | } |
| 98 | } |
| 99 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 100 | void av1_row_mt_sync_read_dummy(AV1EncRowMultiThreadSync *row_mt_sync, int r, |
| 101 | int c) { |
Ravi Chaudhary | 40cdf13 | 2018-10-08 11:04:16 +0530 | [diff] [blame] | 102 | (void)row_mt_sync; |
| 103 | (void)r; |
| 104 | (void)c; |
Ravi Chaudhary | 40cdf13 | 2018-10-08 11:04:16 +0530 | [diff] [blame] | 105 | } |
| 106 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 107 | void av1_row_mt_sync_write_dummy(AV1EncRowMultiThreadSync *row_mt_sync, int r, |
| 108 | int c, int cols) { |
Ravi Chaudhary | 40cdf13 | 2018-10-08 11:04:16 +0530 | [diff] [blame] | 109 | (void)row_mt_sync; |
| 110 | (void)r; |
| 111 | (void)c; |
| 112 | (void)cols; |
Ravi Chaudhary | 40cdf13 | 2018-10-08 11:04:16 +0530 | [diff] [blame] | 113 | } |
| 114 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 115 | void av1_row_mt_sync_read(AV1EncRowMultiThreadSync *row_mt_sync, int r, int c) { |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 116 | #if CONFIG_MULTITHREAD |
| 117 | const int nsync = row_mt_sync->sync_range; |
| 118 | |
| 119 | if (r) { |
| 120 | pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1]; |
| 121 | pthread_mutex_lock(mutex); |
| 122 | |
Jayasanker J | 55ac31c | 2022-06-22 19:15:01 +0530 | [diff] [blame] | 123 | while (c > row_mt_sync->num_finished_cols[r - 1] - nsync - |
| 124 | row_mt_sync->intrabc_extra_top_right_sb_delay) { |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 125 | pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex); |
| 126 | } |
| 127 | pthread_mutex_unlock(mutex); |
| 128 | } |
| 129 | #else |
| 130 | (void)row_mt_sync; |
| 131 | (void)r; |
| 132 | (void)c; |
| 133 | #endif // CONFIG_MULTITHREAD |
| 134 | } |
| 135 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 136 | void av1_row_mt_sync_write(AV1EncRowMultiThreadSync *row_mt_sync, int r, int c, |
| 137 | int cols) { |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 138 | #if CONFIG_MULTITHREAD |
| 139 | const int nsync = row_mt_sync->sync_range; |
| 140 | int cur; |
| 141 | // Only signal when there are enough encoded blocks for next row to run. |
| 142 | int sig = 1; |
| 143 | |
| 144 | if (c < cols - 1) { |
| 145 | cur = c; |
| 146 | if (c % nsync) sig = 0; |
| 147 | } else { |
Jayasanker J | 55ac31c | 2022-06-22 19:15:01 +0530 | [diff] [blame] | 148 | cur = cols + nsync + row_mt_sync->intrabc_extra_top_right_sb_delay; |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 149 | } |
| 150 | |
| 151 | if (sig) { |
| 152 | pthread_mutex_lock(&row_mt_sync->mutex_[r]); |
| 153 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 154 | row_mt_sync->num_finished_cols[r] = cur; |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 155 | |
| 156 | pthread_cond_signal(&row_mt_sync->cond_[r]); |
| 157 | pthread_mutex_unlock(&row_mt_sync->mutex_[r]); |
| 158 | } |
| 159 | #else |
| 160 | (void)row_mt_sync; |
| 161 | (void)r; |
| 162 | (void)c; |
| 163 | (void)cols; |
| 164 | #endif // CONFIG_MULTITHREAD |
| 165 | } |
| 166 | |
| 167 | // Allocate memory for row synchronization |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 168 | static void row_mt_sync_mem_alloc(AV1EncRowMultiThreadSync *row_mt_sync, |
| 169 | AV1_COMMON *cm, int rows) { |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 170 | #if CONFIG_MULTITHREAD |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 171 | int i; |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 172 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 173 | CHECK_MEM_ERROR(cm, row_mt_sync->mutex_, |
| 174 | aom_malloc(sizeof(*row_mt_sync->mutex_) * rows)); |
| 175 | if (row_mt_sync->mutex_) { |
| 176 | for (i = 0; i < rows; ++i) { |
| 177 | pthread_mutex_init(&row_mt_sync->mutex_[i], NULL); |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 178 | } |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 179 | } |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 180 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 181 | CHECK_MEM_ERROR(cm, row_mt_sync->cond_, |
| 182 | aom_malloc(sizeof(*row_mt_sync->cond_) * rows)); |
| 183 | if (row_mt_sync->cond_) { |
| 184 | for (i = 0; i < rows; ++i) { |
| 185 | pthread_cond_init(&row_mt_sync->cond_[i], NULL); |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 186 | } |
| 187 | } |
| 188 | #endif // CONFIG_MULTITHREAD |
| 189 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 190 | CHECK_MEM_ERROR(cm, row_mt_sync->num_finished_cols, |
| 191 | aom_malloc(sizeof(*row_mt_sync->num_finished_cols) * rows)); |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 192 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 193 | row_mt_sync->rows = rows; |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 194 | // Set up nsync. |
Ravi Chaudhary | e2cf962 | 2018-12-21 18:19:07 +0530 | [diff] [blame] | 195 | row_mt_sync->sync_range = 1; |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 196 | } |
| 197 | |
| 198 | // Deallocate row based multi-threading synchronization related mutex and data |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 199 | static void row_mt_sync_mem_dealloc(AV1EncRowMultiThreadSync *row_mt_sync) { |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 200 | if (row_mt_sync != NULL) { |
| 201 | #if CONFIG_MULTITHREAD |
| 202 | int i; |
| 203 | |
| 204 | if (row_mt_sync->mutex_ != NULL) { |
| 205 | for (i = 0; i < row_mt_sync->rows; ++i) { |
| 206 | pthread_mutex_destroy(&row_mt_sync->mutex_[i]); |
| 207 | } |
| 208 | aom_free(row_mt_sync->mutex_); |
| 209 | } |
| 210 | if (row_mt_sync->cond_ != NULL) { |
| 211 | for (i = 0; i < row_mt_sync->rows; ++i) { |
| 212 | pthread_cond_destroy(&row_mt_sync->cond_[i]); |
| 213 | } |
| 214 | aom_free(row_mt_sync->cond_); |
| 215 | } |
| 216 | #endif // CONFIG_MULTITHREAD |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 217 | aom_free(row_mt_sync->num_finished_cols); |
| 218 | |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 219 | // clear the structure as the source of this call may be dynamic change |
| 220 | // in tiles in which case this call will be followed by an _alloc() |
| 221 | // which may fail. |
| 222 | av1_zero(*row_mt_sync); |
| 223 | } |
| 224 | } |
| 225 | |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 226 | static AOM_INLINE int get_sb_rows_in_frame(AV1_COMMON *cm) { |
| 227 | return CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, |
| 228 | cm->seq_params->mib_size_log2); |
| 229 | } |
| 230 | |
Deepa K G | 14df51d | 2020-07-03 13:00:50 +0530 | [diff] [blame] | 231 | static void row_mt_mem_alloc(AV1_COMP *cpi, int max_rows, int max_cols, |
Deepa K G | 68985d0 | 2020-06-21 09:42:14 +0530 | [diff] [blame] | 232 | int alloc_row_ctx) { |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 233 | struct AV1Common *cm = &cpi->common; |
| 234 | AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt; |
| 235 | const int tile_cols = cm->tiles.cols; |
| 236 | const int tile_rows = cm->tiles.rows; |
| 237 | int tile_col, tile_row; |
| 238 | |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 239 | av1_row_mt_mem_dealloc(cpi); |
| 240 | |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 241 | // Allocate memory for row based multi-threading |
| 242 | for (tile_row = 0; tile_row < tile_rows; tile_row++) { |
| 243 | for (tile_col = 0; tile_col < tile_cols; tile_col++) { |
| 244 | int tile_index = tile_row * tile_cols + tile_col; |
| 245 | TileDataEnc *const this_tile = &cpi->tile_data[tile_index]; |
| 246 | |
Deepa K G | 14df51d | 2020-07-03 13:00:50 +0530 | [diff] [blame] | 247 | row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, max_rows); |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 248 | |
Deepa K G | 68985d0 | 2020-06-21 09:42:14 +0530 | [diff] [blame] | 249 | this_tile->row_ctx = NULL; |
| 250 | if (alloc_row_ctx) { |
Deepa K G | 14df51d | 2020-07-03 13:00:50 +0530 | [diff] [blame] | 251 | assert(max_cols > 0); |
| 252 | const int num_row_ctx = AOMMAX(1, (max_cols - 1)); |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 253 | CHECK_MEM_ERROR(cm, this_tile->row_ctx, |
| 254 | (FRAME_CONTEXT *)aom_memalign( |
| 255 | 16, num_row_ctx * sizeof(*this_tile->row_ctx))); |
| 256 | } |
| 257 | } |
| 258 | } |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 259 | const int sb_rows = get_sb_rows_in_frame(cm); |
| 260 | CHECK_MEM_ERROR( |
| 261 | cm, enc_row_mt->num_tile_cols_done, |
| 262 | aom_malloc(sizeof(*enc_row_mt->num_tile_cols_done) * sb_rows)); |
| 263 | |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 264 | enc_row_mt->allocated_tile_cols = tile_cols; |
| 265 | enc_row_mt->allocated_tile_rows = tile_rows; |
Deepa K G | 14df51d | 2020-07-03 13:00:50 +0530 | [diff] [blame] | 266 | enc_row_mt->allocated_rows = max_rows; |
| 267 | enc_row_mt->allocated_cols = max_cols - 1; |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 268 | enc_row_mt->allocated_sb_rows = sb_rows; |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 269 | enc_row_mt->row_mt_exit = false; |
Remya Prakasan | f4341c8 | 2023-08-17 19:18:16 +0530 | [diff] [blame] | 270 | enc_row_mt->firstpass_mt_exit = false; |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 271 | } |
| 272 | |
| 273 | void av1_row_mt_mem_dealloc(AV1_COMP *cpi) { |
| 274 | AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt; |
| 275 | const int tile_cols = enc_row_mt->allocated_tile_cols; |
| 276 | const int tile_rows = enc_row_mt->allocated_tile_rows; |
| 277 | int tile_col, tile_row; |
| 278 | |
| 279 | // Free row based multi-threading sync memory |
| 280 | for (tile_row = 0; tile_row < tile_rows; tile_row++) { |
| 281 | for (tile_col = 0; tile_col < tile_cols; tile_col++) { |
| 282 | int tile_index = tile_row * tile_cols + tile_col; |
| 283 | TileDataEnc *const this_tile = &cpi->tile_data[tile_index]; |
| 284 | |
| 285 | row_mt_sync_mem_dealloc(&this_tile->row_mt_sync); |
| 286 | |
Vishesh | a36c8d0 | 2020-06-30 15:42:35 +0530 | [diff] [blame] | 287 | if (cpi->oxcf.algo_cfg.cdf_update_mode) aom_free(this_tile->row_ctx); |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 288 | } |
| 289 | } |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 290 | aom_free(enc_row_mt->num_tile_cols_done); |
James Zern | 9183800 | 2022-11-01 17:14:06 -0700 | [diff] [blame] | 291 | enc_row_mt->num_tile_cols_done = NULL; |
Deepa K G | 14df51d | 2020-07-03 13:00:50 +0530 | [diff] [blame] | 292 | enc_row_mt->allocated_rows = 0; |
| 293 | enc_row_mt->allocated_cols = 0; |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 294 | enc_row_mt->allocated_tile_cols = 0; |
| 295 | enc_row_mt->allocated_tile_rows = 0; |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 296 | enc_row_mt->allocated_sb_rows = 0; |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 297 | } |
| 298 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 299 | static AOM_INLINE void assign_tile_to_thread(int *thread_id_to_tile_id, |
| 300 | int num_tiles, int num_workers) { |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 301 | int tile_id = 0; |
| 302 | int i; |
| 303 | |
| 304 | for (i = 0; i < num_workers; i++) { |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 305 | thread_id_to_tile_id[i] = tile_id++; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 306 | if (tile_id == num_tiles) tile_id = 0; |
| 307 | } |
| 308 | } |
| 309 | |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 310 | static AOM_INLINE int get_next_job(TileDataEnc *const tile_data, |
| 311 | int *current_mi_row, int mib_size) { |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 312 | AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync; |
| 313 | const int mi_row_end = tile_data->tile_info.mi_row_end; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 314 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 315 | if (row_mt_sync->next_mi_row < mi_row_end) { |
| 316 | *current_mi_row = row_mt_sync->next_mi_row; |
| 317 | row_mt_sync->num_threads_working++; |
| 318 | row_mt_sync->next_mi_row += mib_size; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 319 | return 1; |
| 320 | } |
| 321 | return 0; |
| 322 | } |
| 323 | |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 324 | static AOM_INLINE void switch_tile_and_get_next_job( |
| 325 | AV1_COMMON *const cm, TileDataEnc *const tile_data, int *cur_tile_id, |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 326 | int *current_mi_row, int *end_of_frame, int is_firstpass, |
| 327 | const BLOCK_SIZE fp_block_size) { |
Urvang Joshi | 54ffae7 | 2020-03-23 13:37:10 -0700 | [diff] [blame] | 328 | const int tile_cols = cm->tiles.cols; |
| 329 | const int tile_rows = cm->tiles.rows; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 330 | |
| 331 | int tile_id = -1; // Stores the tile ID with minimum proc done |
| 332 | int max_mis_to_encode = 0; |
| 333 | int min_num_threads_working = INT_MAX; |
| 334 | |
| 335 | for (int tile_row = 0; tile_row < tile_rows; tile_row++) { |
| 336 | for (int tile_col = 0; tile_col < tile_cols; tile_col++) { |
| 337 | int tile_index = tile_row * tile_cols + tile_col; |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 338 | TileDataEnc *const this_tile = &tile_data[tile_index]; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 339 | AV1EncRowMultiThreadSync *const row_mt_sync = &this_tile->row_mt_sync; |
| 340 | |
Jerome Jiang | f1656cd | 2020-10-19 16:29:40 -0700 | [diff] [blame] | 341 | #if CONFIG_REALTIME_ONLY |
| 342 | int num_b_rows_in_tile = |
Wan-Teh Chang | 5350a5e | 2022-01-29 11:50:17 -0800 | [diff] [blame] | 343 | av1_get_sb_rows_in_tile(cm, &this_tile->tile_info); |
Jerome Jiang | f1656cd | 2020-10-19 16:29:40 -0700 | [diff] [blame] | 344 | int num_b_cols_in_tile = |
Wan-Teh Chang | 5350a5e | 2022-01-29 11:50:17 -0800 | [diff] [blame] | 345 | av1_get_sb_cols_in_tile(cm, &this_tile->tile_info); |
Jerome Jiang | f1656cd | 2020-10-19 16:29:40 -0700 | [diff] [blame] | 346 | #else |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 347 | int num_b_rows_in_tile = |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 348 | is_firstpass |
Wan-Teh Chang | 5350a5e | 2022-01-29 11:50:17 -0800 | [diff] [blame] | 349 | ? av1_get_unit_rows_in_tile(&this_tile->tile_info, fp_block_size) |
| 350 | : av1_get_sb_rows_in_tile(cm, &this_tile->tile_info); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 351 | int num_b_cols_in_tile = |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 352 | is_firstpass |
Wan-Teh Chang | 5350a5e | 2022-01-29 11:50:17 -0800 | [diff] [blame] | 353 | ? av1_get_unit_cols_in_tile(&this_tile->tile_info, fp_block_size) |
| 354 | : av1_get_sb_cols_in_tile(cm, &this_tile->tile_info); |
Jerome Jiang | f1656cd | 2020-10-19 16:29:40 -0700 | [diff] [blame] | 355 | #endif |
Ravi Chaudhary | 41debdc | 2019-02-08 17:21:52 +0530 | [diff] [blame] | 356 | int theoretical_limit_on_threads = |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 357 | AOMMIN((num_b_cols_in_tile + 1) >> 1, num_b_rows_in_tile); |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 358 | int num_threads_working = row_mt_sync->num_threads_working; |
| 359 | |
Ravi Chaudhary | c692de3 | 2019-01-11 15:13:55 +0530 | [diff] [blame] | 360 | if (num_threads_working < theoretical_limit_on_threads) { |
| 361 | int num_mis_to_encode = |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 362 | this_tile->tile_info.mi_row_end - row_mt_sync->next_mi_row; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 363 | |
Ravi Chaudhary | c692de3 | 2019-01-11 15:13:55 +0530 | [diff] [blame] | 364 | // Tile to be processed by this thread is selected on the basis of |
| 365 | // availability of jobs: |
| 366 | // 1) If jobs are available, tile to be processed is chosen on the |
| 367 | // basis of minimum number of threads working for that tile. If two or |
| 368 | // more tiles have same number of threads working for them, then the |
| 369 | // tile with maximum number of jobs available will be chosen. |
| 370 | // 2) If no jobs are available, then end_of_frame is reached. |
| 371 | if (num_mis_to_encode > 0) { |
| 372 | if (num_threads_working < min_num_threads_working) { |
| 373 | min_num_threads_working = num_threads_working; |
| 374 | max_mis_to_encode = 0; |
| 375 | } |
| 376 | if (num_threads_working == min_num_threads_working && |
| 377 | num_mis_to_encode > max_mis_to_encode) { |
| 378 | tile_id = tile_index; |
| 379 | max_mis_to_encode = num_mis_to_encode; |
| 380 | } |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 381 | } |
| 382 | } |
| 383 | } |
| 384 | } |
| 385 | if (tile_id == -1) { |
| 386 | *end_of_frame = 1; |
| 387 | } else { |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 388 | // Update the current tile id to the tile id that will be processed next, |
| 389 | // which will be the least processed tile. |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 390 | *cur_tile_id = tile_id; |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 391 | const int unit_height = mi_size_high[fp_block_size]; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 392 | get_next_job(&tile_data[tile_id], current_mi_row, |
Tarundeep Singh | 4243e62 | 2021-04-20 16:10:22 +0530 | [diff] [blame] | 393 | is_firstpass ? unit_height : cm->seq_params->mib_size); |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 394 | } |
| 395 | } |
| 396 | |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 397 | #if !CONFIG_REALTIME_ONLY |
Remya Prakasan | f4341c8 | 2023-08-17 19:18:16 +0530 | [diff] [blame] | 398 | static void set_firstpass_encode_done(AV1_COMP *cpi) { |
| 399 | AV1_COMMON *const cm = &cpi->common; |
| 400 | AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt; |
| 401 | const int tile_cols = cm->tiles.cols; |
| 402 | const int tile_rows = cm->tiles.rows; |
| 403 | const BLOCK_SIZE fp_block_size = cpi->fp_block_size; |
| 404 | const int unit_height = mi_size_high[fp_block_size]; |
| 405 | |
| 406 | // In case of multithreading of firstpass encode, due to top-right |
| 407 | // dependency, the worker on a firstpass row waits for the completion of the |
| 408 | // firstpass processing of the top and top-right fp_blocks. Hence, in case a |
| 409 | // thread (main/worker) encounters an error, update the firstpass processing |
| 410 | // of every row in the frame to indicate that it is complete in order to avoid |
| 411 | // dependent workers waiting indefinitely. |
| 412 | for (int tile_row = 0; tile_row < tile_rows; ++tile_row) { |
| 413 | for (int tile_col = 0; tile_col < tile_cols; ++tile_col) { |
| 414 | TileDataEnc *const tile_data = |
| 415 | &cpi->tile_data[tile_row * tile_cols + tile_col]; |
| 416 | TileInfo *tile = &tile_data->tile_info; |
| 417 | AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync; |
| 418 | const int unit_cols_in_tile = |
| 419 | av1_get_unit_cols_in_tile(tile, fp_block_size); |
| 420 | for (int mi_row = tile->mi_row_start, unit_row_in_tile = 0; |
| 421 | mi_row < tile->mi_row_end; |
| 422 | mi_row += unit_height, unit_row_in_tile++) { |
| 423 | enc_row_mt->sync_write_ptr(row_mt_sync, unit_row_in_tile, |
| 424 | unit_cols_in_tile - 1, unit_cols_in_tile); |
| 425 | } |
| 426 | } |
| 427 | } |
| 428 | } |
| 429 | |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 430 | static int fp_enc_row_mt_worker_hook(void *arg1, void *unused) { |
| 431 | EncWorkerData *const thread_data = (EncWorkerData *)arg1; |
| 432 | AV1_COMP *const cpi = thread_data->cpi; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 433 | int thread_id = thread_data->thread_id; |
| 434 | AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 435 | #if CONFIG_MULTITHREAD |
| 436 | pthread_mutex_t *enc_row_mt_mutex_ = enc_row_mt->mutex_; |
| 437 | #endif |
| 438 | (void)unused; |
Remya Prakasan | f4341c8 | 2023-08-17 19:18:16 +0530 | [diff] [blame] | 439 | struct aom_internal_error_info *const error_info = &thread_data->error_info; |
| 440 | MACROBLOCKD *const xd = &thread_data->td->mb.e_mbd; |
| 441 | xd->error_info = error_info; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 442 | |
Remya Prakasan | f4341c8 | 2023-08-17 19:18:16 +0530 | [diff] [blame] | 443 | // The jmp_buf is valid only for the duration of the function that calls |
| 444 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 445 | // before it returns. |
| 446 | if (setjmp(error_info->jmp)) { |
| 447 | error_info->setjmp = 0; |
| 448 | #if CONFIG_MULTITHREAD |
| 449 | pthread_mutex_lock(enc_row_mt_mutex_); |
| 450 | enc_row_mt->firstpass_mt_exit = true; |
| 451 | pthread_mutex_unlock(enc_row_mt_mutex_); |
| 452 | #endif |
| 453 | set_firstpass_encode_done(cpi); |
| 454 | return 0; |
| 455 | } |
| 456 | error_info->setjmp = 1; |
| 457 | |
| 458 | AV1_COMMON *const cm = &cpi->common; |
| 459 | int cur_tile_id = enc_row_mt->thread_id_to_tile_id[thread_id]; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 460 | assert(cur_tile_id != -1); |
| 461 | |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 462 | const BLOCK_SIZE fp_block_size = cpi->fp_block_size; |
| 463 | const int unit_height = mi_size_high[fp_block_size]; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 464 | int end_of_frame = 0; |
| 465 | while (1) { |
| 466 | int current_mi_row = -1; |
| 467 | #if CONFIG_MULTITHREAD |
| 468 | pthread_mutex_lock(enc_row_mt_mutex_); |
| 469 | #endif |
Remya Prakasan | f4341c8 | 2023-08-17 19:18:16 +0530 | [diff] [blame] | 470 | bool firstpass_mt_exit = enc_row_mt->firstpass_mt_exit; |
| 471 | if (!firstpass_mt_exit && !get_next_job(&cpi->tile_data[cur_tile_id], |
| 472 | ¤t_mi_row, unit_height)) { |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 473 | // No jobs are available for the current tile. Query for the status of |
| 474 | // other tiles and get the next job if available |
| 475 | switch_tile_and_get_next_job(cm, cpi->tile_data, &cur_tile_id, |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 476 | ¤t_mi_row, &end_of_frame, 1, |
| 477 | fp_block_size); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 478 | } |
| 479 | #if CONFIG_MULTITHREAD |
| 480 | pthread_mutex_unlock(enc_row_mt_mutex_); |
| 481 | #endif |
Remya Prakasan | f4341c8 | 2023-08-17 19:18:16 +0530 | [diff] [blame] | 482 | // When firstpass_mt_exit is set to true, other workers need not pursue any |
| 483 | // further jobs. |
Mudassir Galaganath | 17c3321 | 2023-09-04 15:06:17 +0530 | [diff] [blame] | 484 | if (firstpass_mt_exit || end_of_frame) break; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 485 | |
| 486 | TileDataEnc *const this_tile = &cpi->tile_data[cur_tile_id]; |
| 487 | AV1EncRowMultiThreadSync *const row_mt_sync = &this_tile->row_mt_sync; |
| 488 | ThreadData *td = thread_data->td; |
| 489 | |
| 490 | assert(current_mi_row != -1 && |
Wan-Teh Chang | 0e4ed9b | 2022-01-29 09:49:43 -0800 | [diff] [blame] | 491 | current_mi_row < this_tile->tile_info.mi_row_end); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 492 | |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 493 | const int unit_height_log2 = mi_size_high_log2[fp_block_size]; |
| 494 | av1_first_pass_row(cpi, td, this_tile, current_mi_row >> unit_height_log2, |
| 495 | fp_block_size); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 496 | #if CONFIG_MULTITHREAD |
| 497 | pthread_mutex_lock(enc_row_mt_mutex_); |
| 498 | #endif |
| 499 | row_mt_sync->num_threads_working--; |
| 500 | #if CONFIG_MULTITHREAD |
| 501 | pthread_mutex_unlock(enc_row_mt_mutex_); |
| 502 | #endif |
| 503 | } |
Remya Prakasan | f4341c8 | 2023-08-17 19:18:16 +0530 | [diff] [blame] | 504 | error_info->setjmp = 0; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 505 | return 1; |
| 506 | } |
| 507 | #endif |
| 508 | |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 509 | static void launch_loop_filter_rows(AV1_COMMON *cm, EncWorkerData *thread_data, |
| 510 | AV1EncRowMultiThreadInfo *enc_row_mt, |
| 511 | int mib_size_log2) { |
| 512 | AV1LfSync *const lf_sync = (AV1LfSync *)thread_data->lf_sync; |
| 513 | const int sb_rows = get_sb_rows_in_frame(cm); |
| 514 | AV1LfMTInfo *cur_job_info; |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 515 | bool row_mt_exit = false; |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 516 | (void)enc_row_mt; |
| 517 | #if CONFIG_MULTITHREAD |
| 518 | pthread_mutex_t *enc_row_mt_mutex_ = enc_row_mt->mutex_; |
| 519 | #endif |
| 520 | |
| 521 | while ((cur_job_info = get_lf_job_info(lf_sync)) != NULL) { |
| 522 | LFWorkerData *const lf_data = (LFWorkerData *)thread_data->lf_data; |
| 523 | const int lpf_opt_level = cur_job_info->lpf_opt_level; |
| 524 | (void)sb_rows; |
| 525 | #if CONFIG_MULTITHREAD |
| 526 | const int cur_sb_row = cur_job_info->mi_row >> mib_size_log2; |
| 527 | const int next_sb_row = AOMMIN(sb_rows - 1, cur_sb_row + 1); |
| 528 | // Wait for current and next superblock row to finish encoding. |
| 529 | pthread_mutex_lock(enc_row_mt_mutex_); |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 530 | while (!enc_row_mt->row_mt_exit && |
| 531 | (enc_row_mt->num_tile_cols_done[cur_sb_row] < cm->tiles.cols || |
| 532 | enc_row_mt->num_tile_cols_done[next_sb_row] < cm->tiles.cols)) { |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 533 | pthread_cond_wait(enc_row_mt->cond_, enc_row_mt_mutex_); |
| 534 | } |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 535 | row_mt_exit = enc_row_mt->row_mt_exit; |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 536 | pthread_mutex_unlock(enc_row_mt_mutex_); |
| 537 | #endif |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 538 | if (row_mt_exit) return; |
| 539 | |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 540 | av1_thread_loop_filter_rows( |
| 541 | lf_data->frame_buffer, lf_data->cm, lf_data->planes, lf_data->xd, |
| 542 | cur_job_info->mi_row, cur_job_info->plane, cur_job_info->dir, |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 543 | lpf_opt_level, lf_sync, &thread_data->error_info, lf_data->params_buf, |
| 544 | lf_data->tx_buf, mib_size_log2); |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 545 | } |
| 546 | } |
| 547 | |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 548 | static void set_encoding_done(AV1_COMP *cpi) { |
| 549 | AV1_COMMON *const cm = &cpi->common; |
| 550 | const int tile_cols = cm->tiles.cols; |
| 551 | const int tile_rows = cm->tiles.rows; |
| 552 | AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt; |
| 553 | const int mib_size = cm->seq_params->mib_size; |
| 554 | |
| 555 | // In case of row-multithreading, due to top-right dependency, the worker on |
| 556 | // an SB row waits for the completion of the encode of the top and top-right |
| 557 | // SBs. Hence, in case a thread (main/worker) encounters an error, update that |
| 558 | // encoding of every SB row in the frame is complete in order to avoid the |
| 559 | // dependent workers of every tile from waiting indefinitely. |
| 560 | for (int tile_row = 0; tile_row < tile_rows; tile_row++) { |
| 561 | for (int tile_col = 0; tile_col < tile_cols; tile_col++) { |
| 562 | TileDataEnc *const this_tile = |
| 563 | &cpi->tile_data[tile_row * tile_cols + tile_col]; |
| 564 | const TileInfo *const tile_info = &this_tile->tile_info; |
| 565 | AV1EncRowMultiThreadSync *const row_mt_sync = &this_tile->row_mt_sync; |
| 566 | const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info); |
| 567 | for (int mi_row = tile_info->mi_row_start, sb_row_in_tile = 0; |
| 568 | mi_row < tile_info->mi_row_end; |
| 569 | mi_row += mib_size, sb_row_in_tile++) { |
| 570 | enc_row_mt->sync_write_ptr(row_mt_sync, sb_row_in_tile, |
| 571 | sb_cols_in_tile - 1, sb_cols_in_tile); |
| 572 | } |
| 573 | } |
| 574 | } |
| 575 | } |
| 576 | |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 577 | static int enc_row_mt_worker_hook(void *arg1, void *unused) { |
| 578 | EncWorkerData *const thread_data = (EncWorkerData *)arg1; |
| 579 | AV1_COMP *const cpi = thread_data->cpi; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 580 | int thread_id = thread_data->thread_id; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 581 | AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 582 | #if CONFIG_MULTITHREAD |
| 583 | pthread_mutex_t *enc_row_mt_mutex_ = enc_row_mt->mutex_; |
| 584 | #endif |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 585 | (void)unused; |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 586 | |
| 587 | struct aom_internal_error_info *const error_info = &thread_data->error_info; |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 588 | AV1LfSync *const lf_sync = thread_data->lf_sync; |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 589 | MACROBLOCKD *const xd = &thread_data->td->mb.e_mbd; |
| 590 | xd->error_info = error_info; |
| 591 | |
| 592 | // The jmp_buf is valid only for the duration of the function that calls |
| 593 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 594 | // before it returns. |
| 595 | if (setjmp(error_info->jmp)) { |
| 596 | error_info->setjmp = 0; |
| 597 | #if CONFIG_MULTITHREAD |
| 598 | pthread_mutex_lock(enc_row_mt_mutex_); |
| 599 | enc_row_mt->row_mt_exit = true; |
| 600 | // Wake up all the workers waiting in launch_loop_filter_rows() to exit in |
| 601 | // case of an error. |
| 602 | pthread_cond_broadcast(enc_row_mt->cond_); |
| 603 | pthread_mutex_unlock(enc_row_mt_mutex_); |
| 604 | #endif |
| 605 | set_encoding_done(cpi); |
Mudassir Galaganath | af905f1 | 2023-09-01 14:56:15 +0530 | [diff] [blame] | 606 | |
| 607 | if (cpi->mt_info.pipeline_lpf_mt_with_enc) { |
| 608 | #if CONFIG_MULTITHREAD |
| 609 | pthread_mutex_lock(lf_sync->job_mutex); |
| 610 | lf_sync->lf_mt_exit = true; |
| 611 | pthread_mutex_unlock(lf_sync->job_mutex); |
| 612 | #endif |
| 613 | av1_set_vert_loop_filter_done(&cpi->common, lf_sync, |
| 614 | cpi->common.seq_params->mib_size_log2); |
| 615 | } |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 616 | return 0; |
| 617 | } |
| 618 | error_info->setjmp = 1; |
| 619 | |
| 620 | AV1_COMMON *const cm = &cpi->common; |
| 621 | const int mib_size_log2 = cm->seq_params->mib_size_log2; |
| 622 | int cur_tile_id = enc_row_mt->thread_id_to_tile_id[thread_id]; |
| 623 | |
Deepa K G | a95eff4 | 2022-09-15 12:25:45 +0530 | [diff] [blame] | 624 | // Preallocate the pc_tree for realtime coding to reduce the cost of memory |
| 625 | // allocation. |
Mudassir Galaganath | 3a79057 | 2023-10-16 12:30:23 +0530 | [diff] [blame] | 626 | if (cpi->sf.rt_sf.use_nonrd_pick_mode) { |
| 627 | thread_data->td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size); |
| 628 | if (!thread_data->td->pc_root) |
| 629 | aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, |
| 630 | "Failed to allocate PC_TREE"); |
| 631 | } else { |
| 632 | thread_data->td->pc_root = NULL; |
| 633 | } |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 634 | |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 635 | assert(cur_tile_id != -1); |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 636 | |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 637 | const BLOCK_SIZE fp_block_size = cpi->fp_block_size; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 638 | int end_of_frame = 0; |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 639 | bool row_mt_exit = false; |
venkat sanampudi | 3e65b48 | 2021-05-07 08:49:28 +0530 | [diff] [blame] | 640 | |
| 641 | // When master thread does not have a valid job to process, xd->tile_ctx |
| 642 | // is not set and it contains NULL pointer. This can result in NULL pointer |
| 643 | // access violation if accessed beyond the encode stage. Hence, updating |
| 644 | // thread_data->td->mb.e_mbd.tile_ctx is initialized with common frame |
| 645 | // context to avoid NULL pointer access in subsequent stages. |
| 646 | thread_data->td->mb.e_mbd.tile_ctx = cm->fc; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 647 | while (1) { |
| 648 | int current_mi_row = -1; |
| 649 | #if CONFIG_MULTITHREAD |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 650 | pthread_mutex_lock(enc_row_mt_mutex_); |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 651 | #endif |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 652 | row_mt_exit = enc_row_mt->row_mt_exit; |
Mudassir Galaganath | 0874cac | 2023-07-07 09:01:37 +0530 | [diff] [blame] | 653 | // row_mt_exit check here can be avoided as it is checked after |
| 654 | // sync_read_ptr() in encode_sb_row(). However, checking row_mt_exit here, |
| 655 | // tries to return before calling the function get_next_job(). |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 656 | if (!row_mt_exit && |
| 657 | !get_next_job(&cpi->tile_data[cur_tile_id], ¤t_mi_row, |
Tarundeep Singh | 4243e62 | 2021-04-20 16:10:22 +0530 | [diff] [blame] | 658 | cm->seq_params->mib_size)) { |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 659 | // No jobs are available for the current tile. Query for the status of |
| 660 | // other tiles and get the next job if available |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 661 | switch_tile_and_get_next_job(cm, cpi->tile_data, &cur_tile_id, |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 662 | ¤t_mi_row, &end_of_frame, 0, |
| 663 | fp_block_size); |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 664 | } |
| 665 | #if CONFIG_MULTITHREAD |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 666 | pthread_mutex_unlock(enc_row_mt_mutex_); |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 667 | #endif |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 668 | // When row_mt_exit is set to true, other workers need not pursue any |
| 669 | // further jobs. |
Mudassir Galaganath | 17c3321 | 2023-09-04 15:06:17 +0530 | [diff] [blame] | 670 | if (row_mt_exit) { |
| 671 | error_info->setjmp = 0; |
| 672 | return 1; |
| 673 | } |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 674 | |
| 675 | if (end_of_frame) break; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 676 | |
| 677 | TileDataEnc *const this_tile = &cpi->tile_data[cur_tile_id]; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 678 | AV1EncRowMultiThreadSync *const row_mt_sync = &this_tile->row_mt_sync; |
| 679 | const TileInfo *const tile_info = &this_tile->tile_info; |
| 680 | const int tile_row = tile_info->tile_row; |
| 681 | const int tile_col = tile_info->tile_col; |
Ravi Chaudhary | 1cf7d16 | 2018-10-09 17:00:43 +0530 | [diff] [blame] | 682 | ThreadData *td = thread_data->td; |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 683 | const int sb_row = current_mi_row >> mib_size_log2; |
Ravi Chaudhary | 1cf7d16 | 2018-10-09 17:00:43 +0530 | [diff] [blame] | 684 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 685 | assert(current_mi_row != -1 && current_mi_row <= tile_info->mi_row_end); |
| 686 | |
Ravi Chaudhary | 1cf7d16 | 2018-10-09 17:00:43 +0530 | [diff] [blame] | 687 | td->mb.e_mbd.tile_ctx = td->tctx; |
Ravi Chaudhary | 982ac04 | 2018-11-02 14:30:29 +0530 | [diff] [blame] | 688 | td->mb.tile_pb_ctx = &this_tile->tctx; |
Vishesh | 16d6f73 | 2021-05-10 17:57:36 +0530 | [diff] [blame] | 689 | td->abs_sum_level = 0; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 690 | |
Deepa K G | e71afbf | 2021-03-02 11:05:18 +0530 | [diff] [blame] | 691 | if (this_tile->allow_update_cdf) { |
Ravi Chaudhary | 39502fd | 2018-12-25 10:27:31 +0530 | [diff] [blame] | 692 | td->mb.row_ctx = this_tile->row_ctx; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 693 | if (current_mi_row == tile_info->mi_row_start) |
Ravi Chaudhary | 39502fd | 2018-12-25 10:27:31 +0530 | [diff] [blame] | 694 | memcpy(td->mb.e_mbd.tile_ctx, &this_tile->tctx, sizeof(FRAME_CONTEXT)); |
| 695 | } else { |
Ravi Chaudhary | 6bd5ccb | 2018-11-02 17:26:39 +0530 | [diff] [blame] | 696 | memcpy(td->mb.e_mbd.tile_ctx, &this_tile->tctx, sizeof(FRAME_CONTEXT)); |
Ravi Chaudhary | 39502fd | 2018-12-25 10:27:31 +0530 | [diff] [blame] | 697 | } |
| 698 | |
Urvang Joshi | 5c8625a | 2020-03-30 13:16:37 -0700 | [diff] [blame] | 699 | av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row, |
| 700 | &td->mb.e_mbd); |
Ravi Chaudhary | 95ba1fa | 2018-10-11 11:42:04 +0530 | [diff] [blame] | 701 | |
Tarundeep Singh | 4243e62 | 2021-04-20 16:10:22 +0530 | [diff] [blame] | 702 | cfl_init(&td->mb.e_mbd.cfl, cm->seq_params); |
venkat sanampudi | e61793d | 2021-10-13 14:26:05 +0530 | [diff] [blame] | 703 | if (td->mb.txfm_search_info.mb_rd_record != NULL) { |
Fyodor Kyslov | 677cc01 | 2021-01-27 18:35:07 -0800 | [diff] [blame] | 704 | av1_crc32c_calculator_init( |
venkat sanampudi | e61793d | 2021-10-13 14:26:05 +0530 | [diff] [blame] | 705 | &td->mb.txfm_search_info.mb_rd_record->crc_calculator); |
Fyodor Kyslov | 677cc01 | 2021-01-27 18:35:07 -0800 | [diff] [blame] | 706 | } |
Ravi Chaudhary | 95ba1fa | 2018-10-11 11:42:04 +0530 | [diff] [blame] | 707 | |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 708 | av1_encode_sb_row(cpi, td, tile_row, tile_col, current_mi_row); |
| 709 | #if CONFIG_MULTITHREAD |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 710 | pthread_mutex_lock(enc_row_mt_mutex_); |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 711 | #endif |
Vishesh | 16d6f73 | 2021-05-10 17:57:36 +0530 | [diff] [blame] | 712 | this_tile->abs_sum_level += td->abs_sum_level; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 713 | row_mt_sync->num_threads_working--; |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 714 | enc_row_mt->num_tile_cols_done[sb_row]++; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 715 | #if CONFIG_MULTITHREAD |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 716 | pthread_cond_broadcast(enc_row_mt->cond_); |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 717 | pthread_mutex_unlock(enc_row_mt_mutex_); |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 718 | #endif |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 719 | } |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 720 | if (cpi->mt_info.pipeline_lpf_mt_with_enc && |
| 721 | (cm->lf.filter_level[PLANE_TYPE_Y] || |
| 722 | cm->lf.filter_level[PLANE_TYPE_UV])) { |
| 723 | // Loop-filter a superblock row if encoding of the current and next |
| 724 | // superblock row is complete. |
| 725 | // TODO(deepa.kg @ittiam.com) Evaluate encoder speed by interleaving |
| 726 | // encoding and loop filter stage. |
| 727 | launch_loop_filter_rows(cm, thread_data, enc_row_mt, mib_size_log2); |
| 728 | } |
Mudassir Galaganath | 1aad206 | 2023-10-16 12:32:23 +0530 | [diff] [blame] | 729 | av1_free_pc_tree_recursive(thread_data->td->pc_root, av1_num_planes(cm), 0, 0, |
| 730 | cpi->sf.part_sf.partition_search_type); |
| 731 | thread_data->td->pc_root = NULL; |
Mudassir Galaganath | 2f4df98 | 2023-06-12 17:20:43 +0530 | [diff] [blame] | 732 | error_info->setjmp = 0; |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 733 | return 1; |
| 734 | } |
| 735 | |
Wan-Teh Chang | 8d2f577 | 2018-09-12 15:44:59 -0700 | [diff] [blame] | 736 | static int enc_worker_hook(void *arg1, void *unused) { |
| 737 | EncWorkerData *const thread_data = (EncWorkerData *)arg1; |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 738 | AV1_COMP *const cpi = thread_data->cpi; |
Mudassir Galaganath | 25f6296 | 2023-05-25 12:57:59 +0530 | [diff] [blame] | 739 | MACROBLOCKD *const xd = &thread_data->td->mb.e_mbd; |
| 740 | struct aom_internal_error_info *const error_info = &thread_data->error_info; |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 741 | const AV1_COMMON *const cm = &cpi->common; |
Urvang Joshi | 54ffae7 | 2020-03-23 13:37:10 -0700 | [diff] [blame] | 742 | const int tile_cols = cm->tiles.cols; |
| 743 | const int tile_rows = cm->tiles.rows; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 744 | int t; |
| 745 | |
| 746 | (void)unused; |
Mudassir Galaganath | 25f6296 | 2023-05-25 12:57:59 +0530 | [diff] [blame] | 747 | |
| 748 | xd->error_info = error_info; |
| 749 | |
| 750 | // The jmp_buf is valid only for the duration of the function that calls |
| 751 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 752 | // before it returns. |
| 753 | if (setjmp(error_info->jmp)) { |
| 754 | error_info->setjmp = 0; |
| 755 | return 0; |
| 756 | } |
| 757 | error_info->setjmp = 1; |
| 758 | |
Deepa K G | a95eff4 | 2022-09-15 12:25:45 +0530 | [diff] [blame] | 759 | // Preallocate the pc_tree for realtime coding to reduce the cost of memory |
| 760 | // allocation. |
Mudassir Galaganath | 3a79057 | 2023-10-16 12:30:23 +0530 | [diff] [blame] | 761 | if (cpi->sf.rt_sf.use_nonrd_pick_mode) { |
| 762 | thread_data->td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size); |
| 763 | if (!thread_data->td->pc_root) |
| 764 | aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, |
| 765 | "Failed to allocate PC_TREE"); |
| 766 | } else { |
| 767 | thread_data->td->pc_root = NULL; |
| 768 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 769 | |
| 770 | for (t = thread_data->start; t < tile_rows * tile_cols; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 771 | t += cpi->mt_info.num_workers) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 772 | int tile_row = t / tile_cols; |
| 773 | int tile_col = t % tile_cols; |
| 774 | |
Ravi Chaudhary | 84a280a | 2018-09-24 16:09:48 +0530 | [diff] [blame] | 775 | TileDataEnc *const this_tile = |
Urvang Joshi | 54ffae7 | 2020-03-23 13:37:10 -0700 | [diff] [blame] | 776 | &cpi->tile_data[tile_row * cm->tiles.cols + tile_col]; |
Ravi Chaudhary | bf0a6f9 | 2018-12-07 17:47:20 +0530 | [diff] [blame] | 777 | thread_data->td->mb.e_mbd.tile_ctx = &this_tile->tctx; |
| 778 | thread_data->td->mb.tile_pb_ctx = &this_tile->tctx; |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 779 | av1_encode_tile(cpi, thread_data->td, tile_row, tile_col); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 780 | } |
| 781 | |
Mudassir Galaganath | 1aad206 | 2023-10-16 12:32:23 +0530 | [diff] [blame] | 782 | av1_free_pc_tree_recursive(thread_data->td->pc_root, av1_num_planes(cm), 0, 0, |
| 783 | cpi->sf.part_sf.partition_search_type); |
| 784 | thread_data->td->pc_root = NULL; |
Mudassir Galaganath | 25f6296 | 2023-05-25 12:57:59 +0530 | [diff] [blame] | 785 | error_info->setjmp = 0; |
Wan-Teh Chang | 3f0cbf1 | 2018-07-03 14:59:18 -0700 | [diff] [blame] | 786 | return 1; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 787 | } |
| 788 | |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 789 | void av1_init_frame_mt(AV1_PRIMARY *ppi, AV1_COMP *cpi) { |
| 790 | cpi->mt_info.workers = ppi->p_mt_info.workers; |
| 791 | cpi->mt_info.num_workers = ppi->p_mt_info.num_workers; |
| 792 | cpi->mt_info.tile_thr_data = ppi->p_mt_info.tile_thr_data; |
| 793 | int i; |
| 794 | for (i = MOD_FP; i < NUM_MT_MODULES; i++) { |
| 795 | cpi->mt_info.num_mod_workers[i] = |
| 796 | AOMMIN(cpi->mt_info.num_workers, ppi->p_mt_info.num_mod_workers[i]); |
| 797 | } |
| 798 | } |
| 799 | |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 800 | void av1_init_cdef_worker(AV1_COMP *cpi) { |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 801 | // The allocation is done only for level 0 parallel frames. No change |
| 802 | // in config is supported in the middle of a parallel encode set, since the |
| 803 | // rest of the MT modules also do not support dynamic change of config. |
| 804 | if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) return; |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 805 | PrimaryMultiThreadInfo *const p_mt_info = &cpi->ppi->p_mt_info; |
| 806 | int num_cdef_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_CDEF); |
| 807 | |
| 808 | av1_alloc_cdef_buffers(&cpi->common, &p_mt_info->cdef_worker, |
| 809 | &cpi->mt_info.cdef_sync, num_cdef_workers, 1); |
Yannis Guyon | be2690e | 2022-04-05 15:11:33 +0200 | [diff] [blame] | 810 | cpi->mt_info.cdef_worker = p_mt_info->cdef_worker; |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 811 | } |
| 812 | |
| 813 | #if !CONFIG_REALTIME_ONLY |
| 814 | void av1_init_lr_mt_buffers(AV1_COMP *cpi) { |
| 815 | AV1_COMMON *const cm = &cpi->common; |
| 816 | AV1LrSync *lr_sync = &cpi->mt_info.lr_row_sync; |
| 817 | if (lr_sync->sync_range) { |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 818 | if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) |
| 819 | return; |
Wan-Teh Chang | 245d30e | 2023-11-02 11:01:27 -0700 | [diff] [blame] | 820 | int num_lr_workers = |
| 821 | av1_get_num_mod_workers_for_alloc(&cpi->ppi->p_mt_info, MOD_LR); |
| 822 | assert(num_lr_workers <= lr_sync->num_workers); |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 823 | lr_sync->lrworkerdata[num_lr_workers - 1].rst_tmpbuf = cm->rst_tmpbuf; |
| 824 | lr_sync->lrworkerdata[num_lr_workers - 1].rlbs = cm->rlbs; |
| 825 | } |
| 826 | } |
| 827 | #endif |
| 828 | |
Mufaddal Chakera | a313888 | 2021-06-01 13:43:06 +0530 | [diff] [blame] | 829 | #if CONFIG_MULTITHREAD |
| 830 | void av1_init_mt_sync(AV1_COMP *cpi, int is_first_pass) { |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 831 | AV1_COMMON *const cm = &cpi->common; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 832 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 833 | |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 834 | // Initialize enc row MT object. |
Mufaddal Chakera | a313888 | 2021-06-01 13:43:06 +0530 | [diff] [blame] | 835 | if (is_first_pass || cpi->oxcf.row_mt == 1) { |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 836 | AV1EncRowMultiThreadInfo *enc_row_mt = &mt_info->enc_row_mt; |
| 837 | if (enc_row_mt->mutex_ == NULL) { |
| 838 | CHECK_MEM_ERROR(cm, enc_row_mt->mutex_, |
| 839 | aom_malloc(sizeof(*(enc_row_mt->mutex_)))); |
| 840 | if (enc_row_mt->mutex_) pthread_mutex_init(enc_row_mt->mutex_, NULL); |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 841 | } |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 842 | if (enc_row_mt->cond_ == NULL) { |
| 843 | CHECK_MEM_ERROR(cm, enc_row_mt->cond_, |
| 844 | aom_malloc(sizeof(*(enc_row_mt->cond_)))); |
| 845 | if (enc_row_mt->cond_) pthread_cond_init(enc_row_mt->cond_, NULL); |
| 846 | } |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 847 | } |
Mufaddal Chakera | a313888 | 2021-06-01 13:43:06 +0530 | [diff] [blame] | 848 | |
| 849 | if (!is_first_pass) { |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 850 | // Initialize global motion MT object. |
Mufaddal Chakera | a313888 | 2021-06-01 13:43:06 +0530 | [diff] [blame] | 851 | AV1GlobalMotionSync *gm_sync = &mt_info->gm_sync; |
| 852 | if (gm_sync->mutex_ == NULL) { |
| 853 | CHECK_MEM_ERROR(cm, gm_sync->mutex_, |
| 854 | aom_malloc(sizeof(*(gm_sync->mutex_)))); |
| 855 | if (gm_sync->mutex_) pthread_mutex_init(gm_sync->mutex_, NULL); |
| 856 | } |
Wan-Teh Chang | c52e3ca | 2021-04-20 08:06:03 -0700 | [diff] [blame] | 857 | #if !CONFIG_REALTIME_ONLY |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 858 | // Initialize temporal filtering MT object. |
Mufaddal Chakera | a313888 | 2021-06-01 13:43:06 +0530 | [diff] [blame] | 859 | AV1TemporalFilterSync *tf_sync = &mt_info->tf_sync; |
| 860 | if (tf_sync->mutex_ == NULL) { |
| 861 | CHECK_MEM_ERROR(cm, tf_sync->mutex_, |
| 862 | aom_malloc(sizeof(*tf_sync->mutex_))); |
| 863 | if (tf_sync->mutex_) pthread_mutex_init(tf_sync->mutex_, NULL); |
| 864 | } |
Wan-Teh Chang | c52e3ca | 2021-04-20 08:06:03 -0700 | [diff] [blame] | 865 | #endif // !CONFIG_REALTIME_ONLY |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 866 | // Initialize CDEF MT object. |
Mufaddal Chakera | a313888 | 2021-06-01 13:43:06 +0530 | [diff] [blame] | 867 | AV1CdefSync *cdef_sync = &mt_info->cdef_sync; |
| 868 | if (cdef_sync->mutex_ == NULL) { |
| 869 | CHECK_MEM_ERROR(cm, cdef_sync->mutex_, |
| 870 | aom_malloc(sizeof(*(cdef_sync->mutex_)))); |
| 871 | if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL); |
| 872 | } |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 873 | |
| 874 | // Initialize loop filter MT object. |
| 875 | AV1LfSync *lf_sync = &mt_info->lf_row_sync; |
| 876 | // Number of superblock rows |
| 877 | const int sb_rows = |
Mudassir Galagnath | 77f3160 | 2022-04-19 16:28:25 +0530 | [diff] [blame] | 878 | CEIL_POWER_OF_TWO(cm->height >> MI_SIZE_LOG2, MAX_MIB_SIZE_LOG2); |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 879 | PrimaryMultiThreadInfo *const p_mt_info = &cpi->ppi->p_mt_info; |
| 880 | int num_lf_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_LPF); |
| 881 | |
| 882 | if (!lf_sync->sync_range || sb_rows != lf_sync->rows || |
| 883 | num_lf_workers > lf_sync->num_workers) { |
| 884 | av1_loop_filter_dealloc(lf_sync); |
| 885 | av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_lf_workers); |
| 886 | } |
| 887 | |
Mudassir Galaganath | e3bf24b | 2023-07-27 15:09:26 +0530 | [diff] [blame] | 888 | // Initialize tpl MT object. |
| 889 | AV1TplRowMultiThreadInfo *tpl_row_mt = &mt_info->tpl_row_mt; |
| 890 | if (tpl_row_mt->mutex_ == NULL) { |
| 891 | CHECK_MEM_ERROR(cm, tpl_row_mt->mutex_, |
| 892 | aom_malloc(sizeof(*(tpl_row_mt->mutex_)))); |
| 893 | if (tpl_row_mt->mutex_) pthread_mutex_init(tpl_row_mt->mutex_, NULL); |
| 894 | } |
| 895 | tpl_row_mt->tpl_mt_exit = false; |
| 896 | |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 897 | #if !CONFIG_REALTIME_ONLY |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 898 | if (is_restoration_used(cm)) { |
| 899 | // Initialize loop restoration MT object. |
| 900 | AV1LrSync *lr_sync = &mt_info->lr_row_sync; |
Rachel Barker | 5a277b2 | 2023-10-31 19:33:09 +0000 | [diff] [blame] | 901 | int rst_unit_size = cpi->sf.lpf_sf.min_lr_unit_size; |
Rachel Barker | 0483cbe | 2023-07-05 23:31:38 +0000 | [diff] [blame] | 902 | int num_rows_lr = av1_lr_count_units(rst_unit_size, cm->height); |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 903 | int num_lr_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_LR); |
| 904 | if (!lr_sync->sync_range || num_rows_lr > lr_sync->rows || |
| 905 | num_lr_workers > lr_sync->num_workers || |
| 906 | MAX_MB_PLANE > lr_sync->num_planes) { |
Cheng Chen | 28b4f28 | 2023-10-04 22:34:33 -0700 | [diff] [blame] | 907 | av1_loop_restoration_dealloc(lr_sync); |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 908 | av1_loop_restoration_alloc(lr_sync, cm, num_lr_workers, num_rows_lr, |
| 909 | MAX_MB_PLANE, cm->width); |
| 910 | } |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 911 | } |
| 912 | #endif |
| 913 | |
| 914 | // Initialization of pack bitstream MT object. |
| 915 | AV1EncPackBSSync *pack_bs_sync = &mt_info->pack_bs_sync; |
| 916 | if (pack_bs_sync->mutex_ == NULL) { |
| 917 | CHECK_MEM_ERROR(cm, pack_bs_sync->mutex_, |
| 918 | aom_malloc(sizeof(*pack_bs_sync->mutex_))); |
| 919 | if (pack_bs_sync->mutex_) pthread_mutex_init(pack_bs_sync->mutex_, NULL); |
| 920 | } |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 921 | } |
Mufaddal Chakera | a313888 | 2021-06-01 13:43:06 +0530 | [diff] [blame] | 922 | } |
Wan-Teh Chang | c52e3ca | 2021-04-20 08:06:03 -0700 | [diff] [blame] | 923 | #endif // CONFIG_MULTITHREAD |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 924 | |
Nithya V S | e6f0b27 | 2021-08-18 10:55:10 +0530 | [diff] [blame] | 925 | // Computes the number of workers to be considered while allocating memory for a |
| 926 | // multi-threaded module under FPMT. |
Wan-Teh Chang | 085fb70 | 2023-10-09 17:00:23 -0700 | [diff] [blame] | 927 | int av1_get_num_mod_workers_for_alloc(const PrimaryMultiThreadInfo *p_mt_info, |
Nithya V S | e6f0b27 | 2021-08-18 10:55:10 +0530 | [diff] [blame] | 928 | MULTI_THREADED_MODULES mod_name) { |
| 929 | int num_mod_workers = p_mt_info->num_mod_workers[mod_name]; |
| 930 | if (p_mt_info->num_mod_workers[MOD_FRAME_ENC] > 1) { |
| 931 | // TODO(anyone): Change num_mod_workers to num_mod_workers[MOD_FRAME_ENC]. |
| 932 | // As frame parallel jobs will only perform multi-threading for the encode |
| 933 | // stage, we can limit the allocations according to num_enc_workers per |
| 934 | // frame parallel encode(a.k.a num_mod_workers[MOD_FRAME_ENC]). |
| 935 | num_mod_workers = p_mt_info->num_workers; |
| 936 | } |
| 937 | return num_mod_workers; |
| 938 | } |
| 939 | |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 940 | void av1_init_tile_thread_data(AV1_PRIMARY *ppi, int is_first_pass) { |
| 941 | PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 942 | |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 943 | assert(p_mt_info->workers != NULL); |
| 944 | assert(p_mt_info->tile_thr_data != NULL); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 945 | |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 946 | int num_workers = p_mt_info->num_workers; |
Nithya V S | e6f0b27 | 2021-08-18 10:55:10 +0530 | [diff] [blame] | 947 | int num_enc_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_ENC); |
Remya Prakasan | 61fcda0 | 2023-05-08 15:03:27 +0530 | [diff] [blame] | 948 | assert(num_enc_workers <= num_workers); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 949 | for (int i = num_workers - 1; i >= 0; i--) { |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 950 | EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[i]; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 951 | |
| 952 | if (i > 0) { |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 953 | // Allocate thread data. |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 954 | AOM_CHECK_MEM_ERROR(&ppi->error, thread_data->td, |
| 955 | aom_memalign(32, sizeof(*thread_data->td))); |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 956 | av1_zero(*thread_data->td); |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 957 | thread_data->original_td = thread_data->td; |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 958 | |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 959 | // Set up shared coeff buffers. |
Cherma Rajan A | 04f3e64 | 2021-08-13 12:47:49 +0530 | [diff] [blame] | 960 | av1_setup_shared_coeff_buffer( |
Cherma Rajan A | b15d416 | 2021-08-17 11:34:13 +0530 | [diff] [blame] | 961 | &ppi->seq_params, &thread_data->td->shared_coeff_buf, &ppi->error); |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 962 | AOM_CHECK_MEM_ERROR( |
| 963 | &ppi->error, thread_data->td->tmp_conv_dst, |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 964 | aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE * |
| 965 | sizeof(*thread_data->td->tmp_conv_dst))); |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 966 | |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 967 | if (i < p_mt_info->num_mod_workers[MOD_FP]) { |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 968 | // Set up firstpass PICK_MODE_CONTEXT. |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 969 | thread_data->td->firstpass_ctx = av1_alloc_pmc( |
| 970 | ppi->cpi, BLOCK_16X16, &thread_data->td->shared_coeff_buf); |
Samuthirika S | 69a86e2 | 2023-09-12 15:33:39 +0530 | [diff] [blame] | 971 | if (!thread_data->td->firstpass_ctx) |
| 972 | aom_internal_error(&ppi->error, AOM_CODEC_MEM_ERROR, |
| 973 | "Failed to allocate PICK_MODE_CONTEXT"); |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 974 | } |
| 975 | |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 976 | if (!is_first_pass && i < num_enc_workers) { |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 977 | // Set up sms_tree. |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 978 | av1_setup_sms_tree(ppi->cpi, thread_data->td); |
Jayasanker J | 759b320 | 2021-03-24 19:12:32 +0530 | [diff] [blame] | 979 | |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 980 | for (int x = 0; x < 2; x++) |
| 981 | for (int y = 0; y < 2; y++) |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 982 | AOM_CHECK_MEM_ERROR( |
| 983 | &ppi->error, thread_data->td->hash_value_buffer[x][y], |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 984 | (uint32_t *)aom_malloc( |
| 985 | AOM_BUFFER_SIZE_FOR_BLOCK_HASH * |
| 986 | sizeof(*thread_data->td->hash_value_buffer[0][0]))); |
| 987 | |
| 988 | // Allocate frame counters in thread data. |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 989 | AOM_CHECK_MEM_ERROR(&ppi->error, thread_data->td->counts, |
| 990 | aom_calloc(1, sizeof(*thread_data->td->counts))); |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 991 | |
| 992 | // Allocate buffers used by palette coding mode. |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 993 | AOM_CHECK_MEM_ERROR( |
| 994 | &ppi->error, thread_data->td->palette_buffer, |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 995 | aom_memalign(16, sizeof(*thread_data->td->palette_buffer))); |
| 996 | |
Mudassir Galagnath | 804a0c3 | 2021-10-13 13:54:17 +0530 | [diff] [blame] | 997 | // The buffers 'tmp_pred_bufs[]', 'comp_rd_buffer' and 'obmc_buffer' are |
| 998 | // used in inter frames to store intermediate inter mode prediction |
| 999 | // results and are not required for allintra encoding mode. Hence, the |
| 1000 | // memory allocations for these buffers are avoided for allintra |
| 1001 | // encoding mode. |
Aniket Wanare | 9812abf | 2021-08-20 17:19:15 +0530 | [diff] [blame] | 1002 | if (ppi->cpi->oxcf.kf_cfg.key_freq_max != 0) { |
Mudassir Galagnath | 804a0c3 | 2021-10-13 13:54:17 +0530 | [diff] [blame] | 1003 | alloc_obmc_buffers(&thread_data->td->obmc_buffer, &ppi->error); |
| 1004 | |
| 1005 | alloc_compound_type_rd_buffers(&ppi->error, |
| 1006 | &thread_data->td->comp_rd_buffer); |
| 1007 | |
Aniket Wanare | 9812abf | 2021-08-20 17:19:15 +0530 | [diff] [blame] | 1008 | for (int j = 0; j < 2; ++j) { |
| 1009 | AOM_CHECK_MEM_ERROR( |
| 1010 | &ppi->error, thread_data->td->tmp_pred_bufs[j], |
| 1011 | aom_memalign(32, |
| 1012 | 2 * MAX_MB_PLANE * MAX_SB_SQUARE * |
| 1013 | sizeof(*thread_data->td->tmp_pred_bufs[j]))); |
| 1014 | } |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 1015 | } |
| 1016 | |
Jayasanker J | ee22526 | 2021-10-09 22:17:34 +0530 | [diff] [blame] | 1017 | if (is_gradient_caching_for_hog_enabled(ppi->cpi)) { |
chiyotsai | d0a2dc9 | 2021-08-24 15:03:00 -0700 | [diff] [blame] | 1018 | const int plane_types = PLANE_TYPES >> ppi->seq_params.monochrome; |
| 1019 | AOM_CHECK_MEM_ERROR( |
| 1020 | &ppi->error, thread_data->td->pixel_gradient_info, |
| 1021 | aom_malloc(sizeof(*thread_data->td->pixel_gradient_info) * |
| 1022 | plane_types * MAX_SB_SQUARE)); |
| 1023 | } |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 1024 | |
Mudassir Galagnath | ad26836 | 2021-12-27 10:57:44 +0530 | [diff] [blame] | 1025 | if (is_src_var_for_4x4_sub_blocks_caching_enabled(ppi->cpi)) { |
| 1026 | const BLOCK_SIZE sb_size = ppi->cpi->common.seq_params->sb_size; |
| 1027 | const int mi_count_in_sb = |
| 1028 | mi_size_wide[sb_size] * mi_size_high[sb_size]; |
| 1029 | |
| 1030 | AOM_CHECK_MEM_ERROR( |
| 1031 | &ppi->error, thread_data->td->src_var_info_of_4x4_sub_blocks, |
| 1032 | aom_malloc( |
| 1033 | sizeof(*thread_data->td->src_var_info_of_4x4_sub_blocks) * |
| 1034 | mi_count_in_sb)); |
| 1035 | } |
| 1036 | |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1037 | if (ppi->cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION) { |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 1038 | const int num_64x64_blocks = |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1039 | (ppi->seq_params.sb_size == BLOCK_64X64) ? 1 : 4; |
| 1040 | AOM_CHECK_MEM_ERROR( |
| 1041 | &ppi->error, thread_data->td->vt64x64, |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 1042 | aom_malloc(sizeof(*thread_data->td->vt64x64) * num_64x64_blocks)); |
| 1043 | } |
Fyodor Kyslov | 166648a | 2020-03-18 16:15:27 -0700 | [diff] [blame] | 1044 | } |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1045 | } |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 1046 | |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1047 | if (!is_first_pass && ppi->cpi->oxcf.row_mt == 1 && i < num_enc_workers) { |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1048 | if (i == 0) { |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1049 | for (int j = 0; j < ppi->num_fp_contexts; j++) { |
| 1050 | AOM_CHECK_MEM_ERROR(&ppi->error, ppi->parallel_cpi[j]->td.tctx, |
| 1051 | (FRAME_CONTEXT *)aom_memalign( |
| 1052 | 16, sizeof(*ppi->parallel_cpi[j]->td.tctx))); |
| 1053 | } |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1054 | } else { |
| 1055 | AOM_CHECK_MEM_ERROR( |
| 1056 | &ppi->error, thread_data->td->tctx, |
| 1057 | (FRAME_CONTEXT *)aom_memalign(16, sizeof(*thread_data->td->tctx))); |
| 1058 | } |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 1059 | } |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1060 | } |
Remya Prakasan | 61fcda0 | 2023-05-08 15:03:27 +0530 | [diff] [blame] | 1061 | |
| 1062 | // Record the number of workers in encode stage multi-threading for which |
| 1063 | // allocation is done. |
| 1064 | p_mt_info->prev_num_enc_workers = num_enc_workers; |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1065 | } |
| 1066 | |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1067 | void av1_create_workers(AV1_PRIMARY *ppi, int num_workers) { |
| 1068 | PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info; |
Sachin Kumar Garg | 800e70a | 2020-05-15 19:19:51 +0530 | [diff] [blame] | 1069 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1070 | |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1071 | AOM_CHECK_MEM_ERROR(&ppi->error, p_mt_info->workers, |
| 1072 | aom_malloc(num_workers * sizeof(*p_mt_info->workers))); |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 1073 | |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1074 | AOM_CHECK_MEM_ERROR( |
| 1075 | &ppi->error, p_mt_info->tile_thr_data, |
| 1076 | aom_calloc(num_workers, sizeof(*p_mt_info->tile_thr_data))); |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 1077 | |
Sachin Kumar Garg | 800e70a | 2020-05-15 19:19:51 +0530 | [diff] [blame] | 1078 | for (int i = num_workers - 1; i >= 0; i--) { |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1079 | AVxWorker *const worker = &p_mt_info->workers[i]; |
| 1080 | EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[i]; |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 1081 | |
Sachin Kumar Garg | 800e70a | 2020-05-15 19:19:51 +0530 | [diff] [blame] | 1082 | winterface->init(worker); |
| 1083 | worker->thread_name = "aom enc worker"; |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 1084 | |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1085 | thread_data->thread_id = i; |
venkat sanampudi | 162bc48 | 2020-11-25 15:04:15 +0530 | [diff] [blame] | 1086 | // Set the starting tile for each thread. |
| 1087 | thread_data->start = i; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1088 | |
| 1089 | if (i > 0) { |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 1090 | // Create threads |
| 1091 | if (!winterface->reset(worker)) |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1092 | aom_internal_error(&ppi->error, AOM_CODEC_ERROR, |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 1093 | "Tile encoder thread creation failed"); |
| 1094 | } |
| 1095 | winterface->sync(worker); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1096 | |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1097 | ++p_mt_info->num_workers; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1098 | } |
| 1099 | } |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1100 | |
Mufaddal Chakera | f59b2ba | 2021-07-02 17:04:44 +0530 | [diff] [blame] | 1101 | // This function returns 1 if frame parallel encode is supported for |
| 1102 | // the current configuration. Returns 0 otherwise. |
Tarundeep Singh | 6107e33 | 2021-09-24 11:56:15 +0530 | [diff] [blame] | 1103 | static AOM_INLINE int is_fpmt_config(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf) { |
Aasaipriya | 5255e8b | 2021-08-12 11:36:14 +0530 | [diff] [blame] | 1104 | // FPMT is enabled for AOM_Q and AOM_VBR. |
Tarundeep Singh | 0192bca | 2021-08-18 23:44:38 +0530 | [diff] [blame] | 1105 | // TODO(Tarun): Test and enable resize config. |
Aasaipriya | 5255e8b | 2021-08-12 11:36:14 +0530 | [diff] [blame] | 1106 | if (oxcf->rc_cfg.mode == AOM_CBR || oxcf->rc_cfg.mode == AOM_CQ) { |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 1107 | return 0; |
| 1108 | } |
| 1109 | if (ppi->use_svc) { |
| 1110 | return 0; |
| 1111 | } |
Tarundeep Singh | 6b16c15 | 2021-09-24 11:50:57 +0530 | [diff] [blame] | 1112 | if (oxcf->tile_cfg.enable_large_scale_tile) { |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 1113 | return 0; |
| 1114 | } |
| 1115 | if (oxcf->dec_model_cfg.timing_info_present) { |
| 1116 | return 0; |
| 1117 | } |
| 1118 | if (oxcf->mode != GOOD) { |
| 1119 | return 0; |
| 1120 | } |
| 1121 | if (oxcf->tool_cfg.error_resilient_mode) { |
| 1122 | return 0; |
| 1123 | } |
| 1124 | if (oxcf->resize_cfg.resize_mode) { |
| 1125 | return 0; |
| 1126 | } |
Tarundeep Singh | 0192bca | 2021-08-18 23:44:38 +0530 | [diff] [blame] | 1127 | if (oxcf->pass != AOM_RC_SECOND_PASS) { |
| 1128 | return 0; |
| 1129 | } |
| 1130 | if (oxcf->max_threads < 2) { |
Remya Prakasan | fd8d2c0 | 2021-08-06 19:53:57 +0530 | [diff] [blame] | 1131 | return 0; |
| 1132 | } |
Remya Prakasan | 93c1c37 | 2021-10-25 14:36:07 +0530 | [diff] [blame] | 1133 | if (!oxcf->fp_mt) { |
| 1134 | return 0; |
| 1135 | } |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 1136 | |
| 1137 | return 1; |
| 1138 | } |
| 1139 | |
Tarundeep Singh | 0192bca | 2021-08-18 23:44:38 +0530 | [diff] [blame] | 1140 | int av1_check_fpmt_config(AV1_PRIMARY *const ppi, |
| 1141 | AV1EncoderConfig *const oxcf) { |
| 1142 | if (is_fpmt_config(ppi, oxcf)) return 1; |
| 1143 | // Reset frame parallel configuration for unsupported config |
| 1144 | if (ppi->num_fp_contexts > 1) { |
| 1145 | for (int i = 1; i < ppi->num_fp_contexts; i++) { |
| 1146 | // Release the previously-used frame-buffer |
| 1147 | if (ppi->parallel_cpi[i]->common.cur_frame != NULL) { |
| 1148 | --ppi->parallel_cpi[i]->common.cur_frame->ref_count; |
| 1149 | ppi->parallel_cpi[i]->common.cur_frame = NULL; |
| 1150 | } |
| 1151 | } |
| 1152 | |
| 1153 | int cur_gf_index = ppi->cpi->gf_frame_index; |
| 1154 | int reset_size = AOMMAX(0, ppi->gf_group.size - cur_gf_index); |
| 1155 | av1_zero_array(&ppi->gf_group.frame_parallel_level[cur_gf_index], |
| 1156 | reset_size); |
| 1157 | av1_zero_array(&ppi->gf_group.is_frame_non_ref[cur_gf_index], reset_size); |
| 1158 | av1_zero_array(&ppi->gf_group.src_offset[cur_gf_index], reset_size); |
Tarundeep Singh | 0192bca | 2021-08-18 23:44:38 +0530 | [diff] [blame] | 1159 | memset(&ppi->gf_group.skip_frame_refresh[cur_gf_index][0], INVALID_IDX, |
| 1160 | sizeof(ppi->gf_group.skip_frame_refresh[cur_gf_index][0]) * |
| 1161 | reset_size * REF_FRAMES); |
| 1162 | memset(&ppi->gf_group.skip_frame_as_ref[cur_gf_index], INVALID_IDX, |
| 1163 | sizeof(ppi->gf_group.skip_frame_as_ref[cur_gf_index]) * reset_size); |
Tarundeep Singh | 0192bca | 2021-08-18 23:44:38 +0530 | [diff] [blame] | 1164 | ppi->num_fp_contexts = 1; |
| 1165 | } |
| 1166 | return 0; |
| 1167 | } |
| 1168 | |
Remya Prakasan | 077c847 | 2021-08-30 12:02:28 +0530 | [diff] [blame] | 1169 | // A large value for threads used to compute the max num_enc_workers |
| 1170 | // possible for each resolution. |
| 1171 | #define MAX_THREADS 100 |
| 1172 | |
Tarundeep Singh | 7c55c1e | 2021-12-15 13:22:00 +0530 | [diff] [blame] | 1173 | // Computes the max number of enc workers possible for each resolution. |
| 1174 | static AOM_INLINE int compute_max_num_enc_workers( |
| 1175 | CommonModeInfoParams *const mi_params, int mib_size_log2) { |
Mudassir Galagnath | 77f3160 | 2022-04-19 16:28:25 +0530 | [diff] [blame] | 1176 | int num_sb_rows = CEIL_POWER_OF_TWO(mi_params->mi_rows, mib_size_log2); |
| 1177 | int num_sb_cols = CEIL_POWER_OF_TWO(mi_params->mi_cols, mib_size_log2); |
Tarundeep Singh | 7c55c1e | 2021-12-15 13:22:00 +0530 | [diff] [blame] | 1178 | |
| 1179 | return AOMMIN((num_sb_cols + 1) >> 1, num_sb_rows); |
| 1180 | } |
| 1181 | |
Mufaddal Chakera | f59b2ba | 2021-07-02 17:04:44 +0530 | [diff] [blame] | 1182 | // Computes the number of frame parallel(fp) contexts to be created |
| 1183 | // based on the number of max_enc_workers. |
Remya Prakasan | 077c847 | 2021-08-30 12:02:28 +0530 | [diff] [blame] | 1184 | int av1_compute_num_fp_contexts(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf) { |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1185 | ppi->p_mt_info.num_mod_workers[MOD_FRAME_ENC] = 0; |
Tarundeep Singh | 0192bca | 2021-08-18 23:44:38 +0530 | [diff] [blame] | 1186 | if (!av1_check_fpmt_config(ppi, oxcf)) { |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 1187 | return 1; |
| 1188 | } |
Tarundeep Singh | 7c55c1e | 2021-12-15 13:22:00 +0530 | [diff] [blame] | 1189 | int max_num_enc_workers = compute_max_num_enc_workers( |
| 1190 | &ppi->cpi->common.mi_params, ppi->cpi->common.seq_params->mib_size_log2); |
Remya Prakasan | b2b845b | 2021-11-22 14:51:36 +0530 | [diff] [blame] | 1191 | // Scaling factors and rounding factors used to tune worker_per_frame |
| 1192 | // computation. |
| 1193 | int rounding_factor[2] = { 2, 4 }; |
| 1194 | int scaling_factor[2] = { 4, 8 }; |
| 1195 | int is_480p_or_lesser = |
| 1196 | AOMMIN(oxcf->frm_dim_cfg.width, oxcf->frm_dim_cfg.height) <= 480; |
| 1197 | int is_sb_64 = 0; |
| 1198 | if (ppi->cpi != NULL) |
| 1199 | is_sb_64 = ppi->cpi->common.seq_params->sb_size == BLOCK_64X64; |
| 1200 | // A parallel frame encode has at least 1/4th the |
| 1201 | // theoretical limit of max enc workers in default case. For resolutions |
| 1202 | // larger than 480p, if SB size is 64x64, optimal performance is obtained with |
| 1203 | // limit of 1/8. |
| 1204 | int index = (!is_480p_or_lesser && is_sb_64) ? 1 : 0; |
| 1205 | int workers_per_frame = |
| 1206 | AOMMAX(1, (max_num_enc_workers + rounding_factor[index]) / |
| 1207 | scaling_factor[index]); |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 1208 | int max_threads = oxcf->max_threads; |
| 1209 | int num_fp_contexts = max_threads / workers_per_frame; |
Tarundeep Singh | 7c55c1e | 2021-12-15 13:22:00 +0530 | [diff] [blame] | 1210 | // Based on empirical results, FPMT gains with multi-tile are significant when |
| 1211 | // more parallel frames are available. Use FPMT with multi-tile encode only |
| 1212 | // when sufficient threads are available for parallel encode of |
| 1213 | // MAX_PARALLEL_FRAMES frames. |
| 1214 | if (oxcf->tile_cfg.tile_columns > 0 || oxcf->tile_cfg.tile_rows > 0) { |
| 1215 | if (num_fp_contexts < MAX_PARALLEL_FRAMES) num_fp_contexts = 1; |
| 1216 | } |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 1217 | |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1218 | num_fp_contexts = AOMMAX(1, AOMMIN(num_fp_contexts, MAX_PARALLEL_FRAMES)); |
Tarundeep Singh | 0192bca | 2021-08-18 23:44:38 +0530 | [diff] [blame] | 1219 | // Limit recalculated num_fp_contexts to ppi->num_fp_contexts. |
| 1220 | num_fp_contexts = (ppi->num_fp_contexts == 1) |
| 1221 | ? num_fp_contexts |
| 1222 | : AOMMIN(num_fp_contexts, ppi->num_fp_contexts); |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1223 | if (num_fp_contexts > 1) { |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1224 | ppi->p_mt_info.num_mod_workers[MOD_FRAME_ENC] = |
| 1225 | AOMMIN(max_num_enc_workers * num_fp_contexts, oxcf->max_threads); |
| 1226 | } |
| 1227 | return num_fp_contexts; |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 1228 | } |
| 1229 | |
Nithya V S | 5e7282e | 2021-08-18 10:51:39 +0530 | [diff] [blame] | 1230 | // Computes the number of workers to process each of the parallel frames. |
| 1231 | static AOM_INLINE int compute_num_workers_per_frame( |
| 1232 | const int num_workers, const int parallel_frame_count) { |
Remya Prakasan | 8287a5f | 2021-09-01 19:54:44 +0530 | [diff] [blame] | 1233 | // Number of level 2 workers per frame context (floor division). |
| 1234 | int workers_per_frame = (num_workers / parallel_frame_count); |
Nithya V S | 5e7282e | 2021-08-18 10:51:39 +0530 | [diff] [blame] | 1235 | return workers_per_frame; |
| 1236 | } |
| 1237 | |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1238 | // Prepare level 1 workers. This function is only called for |
| 1239 | // parallel_frame_count > 1. This function populates the mt_info structure of |
| 1240 | // frame level contexts appropriately by dividing the total number of available |
| 1241 | // workers amongst the frames as level 2 workers. It also populates the hook and |
| 1242 | // data members of level 1 workers. |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1243 | static AOM_INLINE void prepare_fpmt_workers(AV1_PRIMARY *ppi, |
| 1244 | AV1_COMP_DATA *first_cpi_data, |
| 1245 | AVxWorkerHook hook, |
| 1246 | int parallel_frame_count) { |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1247 | assert(parallel_frame_count <= ppi->num_fp_contexts && |
| 1248 | parallel_frame_count > 1); |
| 1249 | |
| 1250 | PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info; |
| 1251 | int num_workers = p_mt_info->num_workers; |
| 1252 | |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1253 | int frame_idx = 0; |
Remya Prakasan | 8287a5f | 2021-09-01 19:54:44 +0530 | [diff] [blame] | 1254 | int i = 0; |
| 1255 | while (i < num_workers) { |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1256 | // Assign level 1 worker |
| 1257 | AVxWorker *frame_worker = p_mt_info->p_workers[frame_idx] = |
| 1258 | &p_mt_info->workers[i]; |
Nithya V S | 5e7282e | 2021-08-18 10:51:39 +0530 | [diff] [blame] | 1259 | AV1_COMP *cur_cpi = ppi->parallel_cpi[frame_idx]; |
| 1260 | MultiThreadInfo *mt_info = &cur_cpi->mt_info; |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 1261 | AV1_COMMON *const cm = &cur_cpi->common; |
| 1262 | const int num_planes = av1_num_planes(cm); |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1263 | |
| 1264 | // Assign start of level 2 worker pool |
Nithya V S | 5e7282e | 2021-08-18 10:51:39 +0530 | [diff] [blame] | 1265 | mt_info->workers = &p_mt_info->workers[i]; |
| 1266 | mt_info->tile_thr_data = &p_mt_info->tile_thr_data[i]; |
Remya Prakasan | 8287a5f | 2021-09-01 19:54:44 +0530 | [diff] [blame] | 1267 | // Assign number of workers for each frame in the parallel encode set. |
| 1268 | mt_info->num_workers = compute_num_workers_per_frame( |
| 1269 | num_workers - i, parallel_frame_count - frame_idx); |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1270 | for (int j = MOD_FP; j < NUM_MT_MODULES; j++) { |
Nithya V S | 5e7282e | 2021-08-18 10:51:39 +0530 | [diff] [blame] | 1271 | mt_info->num_mod_workers[j] = |
Nithya V S | 8bc6412 | 2021-08-18 10:53:10 +0530 | [diff] [blame] | 1272 | AOMMIN(mt_info->num_workers, ppi->p_mt_info.num_mod_workers[j]); |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1273 | } |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 1274 | if (ppi->p_mt_info.cdef_worker != NULL) { |
| 1275 | mt_info->cdef_worker = &ppi->p_mt_info.cdef_worker[i]; |
| 1276 | |
| 1277 | // Back up the original cdef_worker pointers. |
| 1278 | mt_info->restore_state_buf.cdef_srcbuf = mt_info->cdef_worker->srcbuf; |
| 1279 | for (int plane = 0; plane < num_planes; plane++) |
| 1280 | mt_info->restore_state_buf.cdef_colbuf[plane] = |
| 1281 | mt_info->cdef_worker->colbuf[plane]; |
| 1282 | } |
| 1283 | #if !CONFIG_REALTIME_ONLY |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 1284 | if (is_restoration_used(cm)) { |
| 1285 | // Back up the original LR buffers before update. |
| 1286 | int idx = i + mt_info->num_workers - 1; |
Wan-Teh Chang | 245d30e | 2023-11-02 11:01:27 -0700 | [diff] [blame] | 1287 | assert(idx < mt_info->lr_row_sync.num_workers); |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 1288 | mt_info->restore_state_buf.rst_tmpbuf = |
| 1289 | mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf; |
| 1290 | mt_info->restore_state_buf.rlbs = |
| 1291 | mt_info->lr_row_sync.lrworkerdata[idx].rlbs; |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 1292 | |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 1293 | // Update LR buffers. |
| 1294 | mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf = cm->rst_tmpbuf; |
| 1295 | mt_info->lr_row_sync.lrworkerdata[idx].rlbs = cm->rlbs; |
| 1296 | } |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 1297 | #endif |
| 1298 | |
| 1299 | // At this stage, the thread specific CDEF buffers for the current frame's |
| 1300 | // 'common' and 'cdef_sync' only need to be allocated. 'cdef_worker' has |
| 1301 | // already been allocated across parallel frames. |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 1302 | av1_alloc_cdef_buffers(cm, &p_mt_info->cdef_worker, &mt_info->cdef_sync, |
| 1303 | p_mt_info->num_workers, 0); |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1304 | |
| 1305 | frame_worker->hook = hook; |
Nithya V S | 5e7282e | 2021-08-18 10:51:39 +0530 | [diff] [blame] | 1306 | frame_worker->data1 = cur_cpi; |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1307 | frame_worker->data2 = (frame_idx == 0) |
| 1308 | ? first_cpi_data |
| 1309 | : &ppi->parallel_frames_data[frame_idx - 1]; |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1310 | frame_idx++; |
Remya Prakasan | 8287a5f | 2021-09-01 19:54:44 +0530 | [diff] [blame] | 1311 | i += mt_info->num_workers; |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1312 | } |
| 1313 | p_mt_info->p_num_workers = parallel_frame_count; |
| 1314 | } |
| 1315 | |
| 1316 | // Launch level 1 workers to perform frame parallel encode. |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1317 | static AOM_INLINE void launch_fpmt_workers(AV1_PRIMARY *ppi) { |
Mufaddal Chakera | 42a400a | 2021-06-22 14:47:43 +0530 | [diff] [blame] | 1318 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
| 1319 | int num_workers = ppi->p_mt_info.p_num_workers; |
| 1320 | |
| 1321 | for (int i = num_workers - 1; i >= 0; i--) { |
| 1322 | AVxWorker *const worker = ppi->p_mt_info.p_workers[i]; |
| 1323 | if (i == 0) |
| 1324 | winterface->execute(worker); |
| 1325 | else |
| 1326 | winterface->launch(worker); |
| 1327 | } |
| 1328 | } |
| 1329 | |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 1330 | // Restore worker states after parallel encode. |
| 1331 | static AOM_INLINE void restore_workers_after_fpmt(AV1_PRIMARY *ppi, |
| 1332 | int parallel_frame_count) { |
| 1333 | assert(parallel_frame_count <= ppi->num_fp_contexts && |
| 1334 | parallel_frame_count > 1); |
Remya Prakasan | 8287a5f | 2021-09-01 19:54:44 +0530 | [diff] [blame] | 1335 | (void)parallel_frame_count; |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 1336 | |
| 1337 | PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info; |
| 1338 | int num_workers = p_mt_info->num_workers; |
| 1339 | |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 1340 | int frame_idx = 0; |
Remya Prakasan | 8287a5f | 2021-09-01 19:54:44 +0530 | [diff] [blame] | 1341 | int i = 0; |
| 1342 | while (i < num_workers) { |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 1343 | AV1_COMP *cur_cpi = ppi->parallel_cpi[frame_idx]; |
| 1344 | MultiThreadInfo *mt_info = &cur_cpi->mt_info; |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 1345 | const AV1_COMMON *const cm = &cur_cpi->common; |
| 1346 | const int num_planes = av1_num_planes(cm); |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 1347 | |
| 1348 | // Restore the original cdef_worker pointers. |
| 1349 | if (ppi->p_mt_info.cdef_worker != NULL) { |
| 1350 | mt_info->cdef_worker->srcbuf = mt_info->restore_state_buf.cdef_srcbuf; |
| 1351 | for (int plane = 0; plane < num_planes; plane++) |
| 1352 | mt_info->cdef_worker->colbuf[plane] = |
| 1353 | mt_info->restore_state_buf.cdef_colbuf[plane]; |
| 1354 | } |
| 1355 | #if !CONFIG_REALTIME_ONLY |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 1356 | if (is_restoration_used(cm)) { |
| 1357 | // Restore the original LR buffers. |
| 1358 | int idx = i + mt_info->num_workers - 1; |
Wan-Teh Chang | 245d30e | 2023-11-02 11:01:27 -0700 | [diff] [blame] | 1359 | assert(idx < mt_info->lr_row_sync.num_workers); |
venkat sanampudi | 0b928fe | 2021-10-08 13:19:00 +0530 | [diff] [blame] | 1360 | mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf = |
| 1361 | mt_info->restore_state_buf.rst_tmpbuf; |
| 1362 | mt_info->lr_row_sync.lrworkerdata[idx].rlbs = |
| 1363 | mt_info->restore_state_buf.rlbs; |
| 1364 | } |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 1365 | #endif |
| 1366 | |
| 1367 | frame_idx++; |
Remya Prakasan | 8287a5f | 2021-09-01 19:54:44 +0530 | [diff] [blame] | 1368 | i += mt_info->num_workers; |
Nithya V S | 75d5d07 | 2021-08-18 11:09:13 +0530 | [diff] [blame] | 1369 | } |
| 1370 | } |
| 1371 | |
Mudassir Galaganath | f32b973 | 2023-08-17 19:44:03 +0530 | [diff] [blame] | 1372 | // Synchronize level 1 workers. |
| 1373 | static AOM_INLINE void sync_fpmt_workers(AV1_PRIMARY *ppi, |
| 1374 | int frames_in_parallel_set) { |
| 1375 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
| 1376 | int num_workers = ppi->p_mt_info.p_num_workers; |
| 1377 | int had_error = 0; |
| 1378 | // Points to error in the earliest display order frame in the parallel set. |
| 1379 | const struct aom_internal_error_info *error; |
| 1380 | |
| 1381 | // Encoding ends. |
| 1382 | for (int i = num_workers - 1; i >= 0; --i) { |
| 1383 | AVxWorker *const worker = ppi->p_mt_info.p_workers[i]; |
| 1384 | if (!winterface->sync(worker)) { |
| 1385 | had_error = 1; |
| 1386 | error = ppi->parallel_cpi[i]->common.error; |
| 1387 | } |
| 1388 | } |
| 1389 | |
| 1390 | restore_workers_after_fpmt(ppi, frames_in_parallel_set); |
| 1391 | |
| 1392 | if (had_error) |
| 1393 | aom_internal_error(&ppi->error, error->error_code, "%s", error->detail); |
| 1394 | } |
| 1395 | |
Mufaddal Chakera | d777f7b | 2021-07-26 02:13:20 +0530 | [diff] [blame] | 1396 | static int get_compressed_data_hook(void *arg1, void *arg2) { |
| 1397 | AV1_COMP *cpi = (AV1_COMP *)arg1; |
| 1398 | AV1_COMP_DATA *cpi_data = (AV1_COMP_DATA *)arg2; |
Tarundeep Singh | 99fdcd7 | 2021-07-28 21:17:36 +0530 | [diff] [blame] | 1399 | int status = av1_get_compressed_data(cpi, cpi_data); |
Mufaddal Chakera | d777f7b | 2021-07-26 02:13:20 +0530 | [diff] [blame] | 1400 | |
Tarundeep Singh | 99fdcd7 | 2021-07-28 21:17:36 +0530 | [diff] [blame] | 1401 | // AOM_CODEC_OK(0) means no error. |
| 1402 | return !status; |
Mufaddal Chakera | d777f7b | 2021-07-26 02:13:20 +0530 | [diff] [blame] | 1403 | } |
| 1404 | |
| 1405 | // This function encodes the raw frame data for each frame in parallel encode |
| 1406 | // set, and outputs the frame bit stream to the designated buffers. |
| 1407 | int av1_compress_parallel_frames(AV1_PRIMARY *const ppi, |
| 1408 | AV1_COMP_DATA *const first_cpi_data) { |
Remya Prakasan | f46244b | 2021-08-03 14:21:03 +0530 | [diff] [blame] | 1409 | // Bitmask for the frame buffers referenced by cpi->scaled_ref_buf |
| 1410 | // corresponding to frames in the current parallel encode set. |
| 1411 | int ref_buffers_used_map = 0; |
| 1412 | int frames_in_parallel_set = av1_init_parallel_frame_context( |
| 1413 | first_cpi_data, ppi, &ref_buffers_used_map); |
Mufaddal Chakera | d777f7b | 2021-07-26 02:13:20 +0530 | [diff] [blame] | 1414 | prepare_fpmt_workers(ppi, first_cpi_data, get_compressed_data_hook, |
| 1415 | frames_in_parallel_set); |
| 1416 | launch_fpmt_workers(ppi); |
Mudassir Galaganath | f32b973 | 2023-08-17 19:44:03 +0530 | [diff] [blame] | 1417 | sync_fpmt_workers(ppi, frames_in_parallel_set); |
Mufaddal Chakera | d777f7b | 2021-07-26 02:13:20 +0530 | [diff] [blame] | 1418 | |
Remya Prakasan | f46244b | 2021-08-03 14:21:03 +0530 | [diff] [blame] | 1419 | // Release cpi->scaled_ref_buf corresponding to frames in the current parallel |
| 1420 | // encode set. |
| 1421 | for (int i = 0; i < frames_in_parallel_set; ++i) { |
| 1422 | av1_release_scaled_references_fpmt(ppi->parallel_cpi[i]); |
| 1423 | } |
| 1424 | av1_decrement_ref_counts_fpmt(ppi->cpi->common.buffer_pool, |
| 1425 | ref_buffers_used_map); |
Mufaddal Chakera | d777f7b | 2021-07-26 02:13:20 +0530 | [diff] [blame] | 1426 | return AOM_CODEC_OK; |
| 1427 | } |
Mufaddal Chakera | d777f7b | 2021-07-26 02:13:20 +0530 | [diff] [blame] | 1428 | |
venkat sanampudi | 162bc48 | 2020-11-25 15:04:15 +0530 | [diff] [blame] | 1429 | static AOM_INLINE void launch_workers(MultiThreadInfo *const mt_info, |
| 1430 | int num_workers) { |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1431 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 1432 | for (int i = num_workers - 1; i >= 0; i--) { |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1433 | AVxWorker *const worker = &mt_info->workers[i]; |
Mudassir Galaganath | 17c3321 | 2023-09-04 15:06:17 +0530 | [diff] [blame] | 1434 | worker->had_error = 0; |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 1435 | if (i == 0) |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1436 | winterface->execute(worker); |
| 1437 | else |
| 1438 | winterface->launch(worker); |
| 1439 | } |
| 1440 | } |
| 1441 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1442 | static AOM_INLINE void sync_enc_workers(MultiThreadInfo *const mt_info, |
| 1443 | AV1_COMMON *const cm, int num_workers) { |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1444 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
Mudassir Galaganath | 23f023c | 2023-10-11 13:37:50 +0530 | [diff] [blame] | 1445 | const AVxWorker *const worker_main = &mt_info->workers[0]; |
| 1446 | int had_error = worker_main->had_error; |
Mudassir Galaganath | 25f6296 | 2023-05-25 12:57:59 +0530 | [diff] [blame] | 1447 | struct aom_internal_error_info error_info; |
| 1448 | |
| 1449 | // Read the error_info of main thread. |
| 1450 | if (had_error) { |
Mudassir Galaganath | 23f023c | 2023-10-11 13:37:50 +0530 | [diff] [blame] | 1451 | error_info = ((EncWorkerData *)worker_main->data1)->error_info; |
Mudassir Galaganath | 25f6296 | 2023-05-25 12:57:59 +0530 | [diff] [blame] | 1452 | } |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1453 | |
| 1454 | // Encoding ends. |
Mufaddal Chakera | 6c44401 | 2021-06-20 14:28:22 +0530 | [diff] [blame] | 1455 | for (int i = num_workers - 1; i > 0; i--) { |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1456 | AVxWorker *const worker = &mt_info->workers[i]; |
Mudassir Galaganath | 25f6296 | 2023-05-25 12:57:59 +0530 | [diff] [blame] | 1457 | if (!winterface->sync(worker)) { |
| 1458 | had_error = 1; |
| 1459 | error_info = ((EncWorkerData *)worker->data1)->error_info; |
| 1460 | } |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1461 | } |
Wan-Teh Chang | e45fa2d | 2018-10-19 11:02:22 -0700 | [diff] [blame] | 1462 | |
| 1463 | if (had_error) |
Mudassir Galaganath | 25f6296 | 2023-05-25 12:57:59 +0530 | [diff] [blame] | 1464 | aom_internal_error(cm->error, error_info.error_code, "%s", |
| 1465 | error_info.detail); |
Mudassir Galaganath | 23f023c | 2023-10-11 13:37:50 +0530 | [diff] [blame] | 1466 | |
| 1467 | // Restore xd->error_info of the main thread back to cm->error so that the |
| 1468 | // multithreaded code, when executed using a single thread, has a valid |
| 1469 | // xd->error_info. |
| 1470 | MACROBLOCKD *const xd = &((EncWorkerData *)worker_main->data1)->td->mb.e_mbd; |
| 1471 | xd->error_info = cm->error; |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1472 | } |
| 1473 | |
Elliott Karpilovsky | 18fcd6a | 2019-09-16 15:15:06 -0700 | [diff] [blame] | 1474 | static AOM_INLINE void accumulate_counters_enc_workers(AV1_COMP *cpi, |
| 1475 | int num_workers) { |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 1476 | for (int i = num_workers - 1; i >= 0; i--) { |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1477 | AVxWorker *const worker = &cpi->mt_info.workers[i]; |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1478 | EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; |
Ravi Chaudhary | 00525ef | 2018-10-31 19:52:42 +0530 | [diff] [blame] | 1479 | cpi->intrabc_used |= thread_data->td->intrabc_used; |
Yue Chen | c87d749 | 2019-05-30 17:22:49 -0700 | [diff] [blame] | 1480 | cpi->deltaq_used |= thread_data->td->deltaq_used; |
Marco Paniconi | 60e4e24 | 2022-07-24 23:32:02 -0700 | [diff] [blame] | 1481 | // Accumulate rtc counters. |
| 1482 | if (!frame_is_intra_only(&cpi->common)) |
| 1483 | av1_accumulate_rtc_counters(cpi, &thread_data->td->mb); |
Lin Zheng | 4c3c2c1 | 2023-03-02 01:49:58 +0000 | [diff] [blame] | 1484 | cpi->palette_pixel_num += thread_data->td->mb.palette_pixels; |
chiyotsai | ad4d3ea | 2021-03-17 13:53:01 -0700 | [diff] [blame] | 1485 | if (thread_data->td != &cpi->td) { |
Jayasanker J | fb4f4ef | 2022-04-13 14:00:55 +0530 | [diff] [blame] | 1486 | // Keep these conditional expressions in sync with the corresponding ones |
| 1487 | // in prepare_enc_workers(). |
venkat sanampudi | 7a6c8b9 | 2021-11-09 23:19:02 +0530 | [diff] [blame] | 1488 | if (cpi->sf.inter_sf.mv_cost_upd_level != INTERNAL_COST_UPD_OFF) { |
Samuthirika S | 794dabd | 2023-10-05 18:49:52 +0530 | [diff] [blame] | 1489 | aom_free(thread_data->td->mv_costs_alloc); |
| 1490 | thread_data->td->mv_costs_alloc = NULL; |
chiyotsai | ad4d3ea | 2021-03-17 13:53:01 -0700 | [diff] [blame] | 1491 | } |
Jayasanker J | fb4f4ef | 2022-04-13 14:00:55 +0530 | [diff] [blame] | 1492 | if (cpi->sf.intra_sf.dv_cost_upd_level != INTERNAL_COST_UPD_OFF) { |
Samuthirika S | 794dabd | 2023-10-05 18:49:52 +0530 | [diff] [blame] | 1493 | aom_free(thread_data->td->dv_costs_alloc); |
| 1494 | thread_data->td->dv_costs_alloc = NULL; |
chiyotsai | ad4d3ea | 2021-03-17 13:53:01 -0700 | [diff] [blame] | 1495 | } |
chiyotsai | 41fd15c | 2021-03-15 14:12:02 -0700 | [diff] [blame] | 1496 | } |
Satheesh Kumar | 631e2f1 | 2023-09-25 14:42:31 +0530 | [diff] [blame] | 1497 | av1_dealloc_mb_data(&thread_data->td->mb, av1_num_planes(&cpi->common)); |
Yue Chen | c87d749 | 2019-05-30 17:22:49 -0700 | [diff] [blame] | 1498 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1499 | // Accumulate counters. |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 1500 | if (i > 0) { |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1501 | av1_accumulate_frame_counts(&cpi->counts, thread_data->td->counts); |
| 1502 | accumulate_rd_opt(&cpi->td, thread_data->td); |
chiyotsai | 4c1e5c6 | 2020-04-30 17:54:14 -0700 | [diff] [blame] | 1503 | cpi->td.mb.txfm_search_info.txb_split_count += |
| 1504 | thread_data->td->mb.txfm_search_info.txb_split_count; |
Debargha Mukherjee | 0857e66 | 2019-01-04 16:22:09 -0800 | [diff] [blame] | 1505 | #if CONFIG_SPEED_STATS |
chiyotsai | 4c1e5c6 | 2020-04-30 17:54:14 -0700 | [diff] [blame] | 1506 | cpi->td.mb.txfm_search_info.tx_search_count += |
| 1507 | thread_data->td->mb.txfm_search_info.tx_search_count; |
Debargha Mukherjee | 0857e66 | 2019-01-04 16:22:09 -0800 | [diff] [blame] | 1508 | #endif // CONFIG_SPEED_STATS |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1509 | } |
| 1510 | } |
| 1511 | } |
| 1512 | |
Elliott Karpilovsky | 18fcd6a | 2019-09-16 15:15:06 -0700 | [diff] [blame] | 1513 | static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook, |
| 1514 | int num_workers) { |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1515 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
Fyodor Kyslov | 648c650 | 2021-02-02 18:41:10 -0800 | [diff] [blame] | 1516 | AV1_COMMON *const cm = &cpi->common; |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 1517 | for (int i = num_workers - 1; i >= 0; i--) { |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1518 | AVxWorker *const worker = &mt_info->workers[i]; |
| 1519 | EncWorkerData *const thread_data = &mt_info->tile_thr_data[i]; |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1520 | |
| 1521 | worker->hook = hook; |
Wan-Teh Chang | 3f0cbf1 | 2018-07-03 14:59:18 -0700 | [diff] [blame] | 1522 | worker->data1 = thread_data; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1523 | worker->data2 = NULL; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1524 | |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1525 | thread_data->thread_id = i; |
| 1526 | // Set the starting tile for each thread. |
| 1527 | thread_data->start = i; |
| 1528 | |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 1529 | thread_data->cpi = cpi; |
| 1530 | if (i == 0) { |
| 1531 | thread_data->td = &cpi->td; |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1532 | } else { |
| 1533 | thread_data->td = thread_data->original_td; |
| 1534 | } |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 1535 | |
Ravi Chaudhary | 00525ef | 2018-10-31 19:52:42 +0530 | [diff] [blame] | 1536 | thread_data->td->intrabc_used = 0; |
Yue Chen | c87d749 | 2019-05-30 17:22:49 -0700 | [diff] [blame] | 1537 | thread_data->td->deltaq_used = 0; |
Vishesh | 16d6f73 | 2021-05-10 17:57:36 +0530 | [diff] [blame] | 1538 | thread_data->td->abs_sum_level = 0; |
Jingning Han | 564fe2c | 2022-03-08 23:35:01 -0800 | [diff] [blame] | 1539 | thread_data->td->rd_counts.seg_tmp_pred_cost[0] = 0; |
| 1540 | thread_data->td->rd_counts.seg_tmp_pred_cost[1] = 0; |
Ravi Chaudhary | 00525ef | 2018-10-31 19:52:42 +0530 | [diff] [blame] | 1541 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1542 | // Before encoding a frame, copy the thread data from cpi. |
| 1543 | if (thread_data->td != &cpi->td) { |
| 1544 | thread_data->td->mb = cpi->td.mb; |
| 1545 | thread_data->td->rd_counts = cpi->td.rd_counts; |
chiyotsai | d2b1221 | 2020-04-28 20:57:19 -0700 | [diff] [blame] | 1546 | thread_data->td->mb.obmc_buffer = thread_data->td->obmc_buffer; |
wenyao.liu | 22d8ab3 | 2018-10-16 09:11:29 +0800 | [diff] [blame] | 1547 | |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 1548 | for (int x = 0; x < 2; x++) { |
Ravi Chaudhary | 783d6a3 | 2018-08-28 18:21:02 +0530 | [diff] [blame] | 1549 | for (int y = 0; y < 2; y++) { |
| 1550 | memcpy(thread_data->td->hash_value_buffer[x][y], |
chiyotsai | 82f36c9 | 2020-04-09 16:18:02 -0700 | [diff] [blame] | 1551 | cpi->td.mb.intrabc_hash_info.hash_value_buffer[x][y], |
Ravi Chaudhary | 783d6a3 | 2018-08-28 18:21:02 +0530 | [diff] [blame] | 1552 | AOM_BUFFER_SIZE_FOR_BLOCK_HASH * |
| 1553 | sizeof(*thread_data->td->hash_value_buffer[0][0])); |
chiyotsai | 82f36c9 | 2020-04-09 16:18:02 -0700 | [diff] [blame] | 1554 | thread_data->td->mb.intrabc_hash_info.hash_value_buffer[x][y] = |
Ravi Chaudhary | 783d6a3 | 2018-08-28 18:21:02 +0530 | [diff] [blame] | 1555 | thread_data->td->hash_value_buffer[x][y]; |
| 1556 | } |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 1557 | } |
Jayasanker J | fb4f4ef | 2022-04-13 14:00:55 +0530 | [diff] [blame] | 1558 | // Keep these conditional expressions in sync with the corresponding ones |
| 1559 | // in accumulate_counters_enc_workers(). |
venkat sanampudi | 7a6c8b9 | 2021-11-09 23:19:02 +0530 | [diff] [blame] | 1560 | if (cpi->sf.inter_sf.mv_cost_upd_level != INTERNAL_COST_UPD_OFF) { |
Samuthirika S | 794dabd | 2023-10-05 18:49:52 +0530 | [diff] [blame] | 1561 | CHECK_MEM_ERROR( |
| 1562 | cm, thread_data->td->mv_costs_alloc, |
| 1563 | (MvCosts *)aom_malloc(sizeof(*thread_data->td->mv_costs_alloc))); |
| 1564 | thread_data->td->mb.mv_costs = thread_data->td->mv_costs_alloc; |
Fyodor Kyslov | 648c650 | 2021-02-02 18:41:10 -0800 | [diff] [blame] | 1565 | memcpy(thread_data->td->mb.mv_costs, cpi->td.mb.mv_costs, |
| 1566 | sizeof(MvCosts)); |
| 1567 | } |
Jayasanker J | fb4f4ef | 2022-04-13 14:00:55 +0530 | [diff] [blame] | 1568 | if (cpi->sf.intra_sf.dv_cost_upd_level != INTERNAL_COST_UPD_OFF) { |
| 1569 | // Reset dv_costs to NULL for worker threads when dv cost update is |
| 1570 | // enabled so that only dv_cost_upd_level needs to be checked before the |
| 1571 | // aom_free() call for the same. |
| 1572 | thread_data->td->mb.dv_costs = NULL; |
| 1573 | if (av1_need_dv_costs(cpi)) { |
Samuthirika S | 794dabd | 2023-10-05 18:49:52 +0530 | [diff] [blame] | 1574 | CHECK_MEM_ERROR(cm, thread_data->td->dv_costs_alloc, |
| 1575 | (IntraBCMVCosts *)aom_malloc( |
| 1576 | sizeof(*thread_data->td->dv_costs_alloc))); |
| 1577 | thread_data->td->mb.dv_costs = thread_data->td->dv_costs_alloc; |
Jayasanker J | fb4f4ef | 2022-04-13 14:00:55 +0530 | [diff] [blame] | 1578 | memcpy(thread_data->td->mb.dv_costs, cpi->td.mb.dv_costs, |
| 1579 | sizeof(IntraBCMVCosts)); |
| 1580 | } |
chiyotsai | 41fd15c | 2021-03-15 14:12:02 -0700 | [diff] [blame] | 1581 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1582 | } |
Mudassir Galagnath | b9bdc59 | 2022-03-30 22:05:27 +0530 | [diff] [blame] | 1583 | av1_alloc_mb_data(cpi, &thread_data->td->mb); |
Chethan Kumar R E | 75cf98e | 2021-03-23 11:36:32 +0530 | [diff] [blame] | 1584 | |
Marco Paniconi | 60e4e24 | 2022-07-24 23:32:02 -0700 | [diff] [blame] | 1585 | // Reset rtc counters. |
| 1586 | av1_init_rtc_counters(&thread_data->td->mb); |
Cherma Rajan A | d27a147 | 2021-02-19 11:35:40 +0530 | [diff] [blame] | 1587 | |
Lin Zheng | 4c3c2c1 | 2023-03-02 01:49:58 +0000 | [diff] [blame] | 1588 | thread_data->td->mb.palette_pixels = 0; |
| 1589 | |
Yue Chen | cc6a6ef | 2018-05-21 16:21:05 -0700 | [diff] [blame] | 1590 | if (thread_data->td->counts != &cpi->counts) { |
| 1591 | memcpy(thread_data->td->counts, &cpi->counts, sizeof(cpi->counts)); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1592 | } |
| 1593 | |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 1594 | if (i > 0) { |
hui su | 5d49314 | 2017-05-08 12:06:12 -0700 | [diff] [blame] | 1595 | thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer; |
Hui Su | 38711e7 | 2019-06-11 10:49:47 -0700 | [diff] [blame] | 1596 | thread_data->td->mb.comp_rd_buffer = thread_data->td->comp_rd_buffer; |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 1597 | thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst; |
| 1598 | for (int j = 0; j < 2; ++j) { |
chiyotsai | 2a897eb | 2020-04-28 19:22:13 -0700 | [diff] [blame] | 1599 | thread_data->td->mb.tmp_pred_bufs[j] = |
| 1600 | thread_data->td->tmp_pred_bufs[j]; |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 1601 | } |
Jayasanker J | 759b320 | 2021-03-24 19:12:32 +0530 | [diff] [blame] | 1602 | thread_data->td->mb.pixel_gradient_info = |
| 1603 | thread_data->td->pixel_gradient_info; |
Urvang Joshi | e58f6eca | 2018-09-10 15:10:12 -0700 | [diff] [blame] | 1604 | |
Mudassir Galagnath | ad26836 | 2021-12-27 10:57:44 +0530 | [diff] [blame] | 1605 | thread_data->td->mb.src_var_info_of_4x4_sub_blocks = |
| 1606 | thread_data->td->src_var_info_of_4x4_sub_blocks; |
| 1607 | |
Urvang Joshi | e58f6eca | 2018-09-10 15:10:12 -0700 | [diff] [blame] | 1608 | thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst; |
| 1609 | for (int j = 0; j < 2; ++j) { |
| 1610 | thread_data->td->mb.e_mbd.tmp_obmc_bufs[j] = |
chiyotsai | 2a897eb | 2020-04-28 19:22:13 -0700 | [diff] [blame] | 1611 | thread_data->td->mb.tmp_pred_bufs[j]; |
Urvang Joshi | e58f6eca | 2018-09-10 15:10:12 -0700 | [diff] [blame] | 1612 | } |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 1613 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1614 | } |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1615 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1616 | |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1617 | #if !CONFIG_REALTIME_ONLY |
| 1618 | static AOM_INLINE void fp_prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook, |
| 1619 | int num_workers) { |
Fyodor Kyslov | 648c650 | 2021-02-02 18:41:10 -0800 | [diff] [blame] | 1620 | AV1_COMMON *const cm = &cpi->common; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1621 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
| 1622 | for (int i = num_workers - 1; i >= 0; i--) { |
| 1623 | AVxWorker *const worker = &mt_info->workers[i]; |
| 1624 | EncWorkerData *const thread_data = &mt_info->tile_thr_data[i]; |
| 1625 | |
| 1626 | worker->hook = hook; |
| 1627 | worker->data1 = thread_data; |
| 1628 | worker->data2 = NULL; |
| 1629 | |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1630 | thread_data->thread_id = i; |
| 1631 | // Set the starting tile for each thread. |
| 1632 | thread_data->start = i; |
| 1633 | |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 1634 | thread_data->cpi = cpi; |
| 1635 | if (i == 0) { |
| 1636 | thread_data->td = &cpi->td; |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 1637 | } else { |
| 1638 | thread_data->td = thread_data->original_td; |
| 1639 | } |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 1640 | |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1641 | if (thread_data->td != &cpi->td) { |
Remya Prakasan | 0badba3 | 2023-08-17 16:44:52 +0530 | [diff] [blame] | 1642 | // Before encoding a frame, copy the thread data from cpi. |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1643 | thread_data->td->mb = cpi->td.mb; |
Remya Prakasan | 0badba3 | 2023-08-17 16:44:52 +0530 | [diff] [blame] | 1644 | av1_alloc_src_diff_buf(cm, &thread_data->td->mb); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1645 | } |
| 1646 | } |
| 1647 | } |
| 1648 | #endif |
| 1649 | |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1650 | // Computes the number of workers for row multi-threading of encoding stage |
Wan-Teh Chang | c8c2c61 | 2023-10-10 16:41:23 -0700 | [diff] [blame] | 1651 | static AOM_INLINE int compute_num_enc_row_mt_workers(const AV1_COMMON *cm, |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1652 | int max_threads) { |
| 1653 | TileInfo tile_info; |
| 1654 | const int tile_cols = cm->tiles.cols; |
| 1655 | const int tile_rows = cm->tiles.rows; |
| 1656 | int total_num_threads_row_mt = 0; |
| 1657 | for (int row = 0; row < tile_rows; row++) { |
| 1658 | for (int col = 0; col < tile_cols; col++) { |
| 1659 | av1_tile_init(&tile_info, cm, row, col); |
Wan-Teh Chang | 5350a5e | 2022-01-29 11:50:17 -0800 | [diff] [blame] | 1660 | const int num_sb_rows_in_tile = av1_get_sb_rows_in_tile(cm, &tile_info); |
| 1661 | const int num_sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, &tile_info); |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1662 | total_num_threads_row_mt += |
| 1663 | AOMMIN((num_sb_cols_in_tile + 1) >> 1, num_sb_rows_in_tile); |
| 1664 | } |
| 1665 | } |
| 1666 | return AOMMIN(max_threads, total_num_threads_row_mt); |
| 1667 | } |
| 1668 | |
| 1669 | // Computes the number of workers for tile multi-threading of encoding stage |
Wan-Teh Chang | c8c2c61 | 2023-10-10 16:41:23 -0700 | [diff] [blame] | 1670 | static AOM_INLINE int compute_num_enc_tile_mt_workers(const AV1_COMMON *cm, |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1671 | int max_threads) { |
| 1672 | const int tile_cols = cm->tiles.cols; |
| 1673 | const int tile_rows = cm->tiles.rows; |
| 1674 | return AOMMIN(max_threads, tile_cols * tile_rows); |
| 1675 | } |
| 1676 | |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 1677 | // Find max worker of all MT stages |
Angie Chiang | 09543c1 | 2022-05-18 10:14:41 -0700 | [diff] [blame] | 1678 | int av1_get_max_num_workers(const AV1_COMP *cpi) { |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 1679 | int max_num_workers = 0; |
| 1680 | for (int i = MOD_FP; i < NUM_MT_MODULES; i++) |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 1681 | max_num_workers = |
| 1682 | AOMMAX(cpi->ppi->p_mt_info.num_mod_workers[i], max_num_workers); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 1683 | assert(max_num_workers >= 1); |
| 1684 | return AOMMIN(max_num_workers, cpi->oxcf.max_threads); |
| 1685 | } |
| 1686 | |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1687 | // Computes the number of workers for encoding stage (row/tile multi-threading) |
Wan-Teh Chang | c8c2c61 | 2023-10-10 16:41:23 -0700 | [diff] [blame] | 1688 | int av1_compute_num_enc_workers(const AV1_COMP *cpi, int max_workers) { |
Deepa K G | a822b3c | 2020-06-27 17:02:12 +0530 | [diff] [blame] | 1689 | if (max_workers <= 1) return 1; |
Wan-Teh Chang | 0c0d971 | 2020-08-03 17:17:39 -0700 | [diff] [blame] | 1690 | if (cpi->oxcf.row_mt) |
Deepa K G | a822b3c | 2020-06-27 17:02:12 +0530 | [diff] [blame] | 1691 | return compute_num_enc_row_mt_workers(&cpi->common, max_workers); |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1692 | else |
Deepa K G | a822b3c | 2020-06-27 17:02:12 +0530 | [diff] [blame] | 1693 | return compute_num_enc_tile_mt_workers(&cpi->common, max_workers); |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1694 | } |
| 1695 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1696 | void av1_encode_tiles_mt(AV1_COMP *cpi) { |
| 1697 | AV1_COMMON *const cm = &cpi->common; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1698 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
Urvang Joshi | 54ffae7 | 2020-03-23 13:37:10 -0700 | [diff] [blame] | 1699 | const int tile_cols = cm->tiles.cols; |
| 1700 | const int tile_rows = cm->tiles.rows; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 1701 | int num_workers = mt_info->num_mod_workers[MOD_ENC]; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1702 | |
Vishesh | 7096410 | 2020-04-17 18:09:41 +0530 | [diff] [blame] | 1703 | assert(IMPLIES(cpi->tile_data == NULL, |
| 1704 | cpi->allocated_tiles < tile_cols * tile_rows)); |
| 1705 | if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi); |
Ravi Chaudhary | a497eb4 | 2018-09-07 12:38:08 +0530 | [diff] [blame] | 1706 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1707 | av1_init_tile_data(cpi); |
Mufaddal Chakera | 756a2a0 | 2021-05-31 17:10:57 +0530 | [diff] [blame] | 1708 | num_workers = AOMMIN(num_workers, mt_info->num_workers); |
| 1709 | |
Wan-Teh Chang | 8d2f577 | 2018-09-12 15:44:59 -0700 | [diff] [blame] | 1710 | prepare_enc_workers(cpi, enc_worker_hook, num_workers); |
venkat sanampudi | 162bc48 | 2020-11-25 15:04:15 +0530 | [diff] [blame] | 1711 | launch_workers(&cpi->mt_info, num_workers); |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1712 | sync_enc_workers(&cpi->mt_info, cm, num_workers); |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 1713 | accumulate_counters_enc_workers(cpi, num_workers); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1714 | } |
Yue Chen | cc6a6ef | 2018-05-21 16:21:05 -0700 | [diff] [blame] | 1715 | |
| 1716 | // Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int' |
| 1717 | // members, so we treat it as an array, and sum over the whole length. |
| 1718 | void av1_accumulate_frame_counts(FRAME_COUNTS *acc_counts, |
| 1719 | const FRAME_COUNTS *counts) { |
| 1720 | unsigned int *const acc = (unsigned int *)acc_counts; |
| 1721 | const unsigned int *const cnt = (const unsigned int *)counts; |
| 1722 | |
| 1723 | const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int); |
| 1724 | |
| 1725 | for (unsigned int i = 0; i < n_counts; i++) acc[i] += cnt[i]; |
| 1726 | } |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 1727 | |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1728 | // Computes the maximum number of sb rows and sb_cols across tiles which are |
| 1729 | // used to allocate memory for multi-threaded encoding with row-mt=1. |
| 1730 | static AOM_INLINE void compute_max_sb_rows_cols(const AV1_COMMON *cm, |
| 1731 | int *max_sb_rows_in_tile, |
| 1732 | int *max_sb_cols_in_tile) { |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1733 | const int tile_rows = cm->tiles.rows; |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1734 | const int mib_size_log2 = cm->seq_params->mib_size_log2; |
| 1735 | const int num_mi_rows = cm->mi_params.mi_rows; |
| 1736 | const int *const row_start_sb = cm->tiles.row_start_sb; |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1737 | for (int row = 0; row < tile_rows; row++) { |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1738 | const int mi_row_start = row_start_sb[row] << mib_size_log2; |
| 1739 | const int mi_row_end = |
| 1740 | AOMMIN(row_start_sb[row + 1] << mib_size_log2, num_mi_rows); |
| 1741 | const int num_sb_rows_in_tile = |
| 1742 | CEIL_POWER_OF_TWO(mi_row_end - mi_row_start, mib_size_log2); |
| 1743 | *max_sb_rows_in_tile = AOMMAX(*max_sb_rows_in_tile, num_sb_rows_in_tile); |
| 1744 | } |
| 1745 | |
| 1746 | const int tile_cols = cm->tiles.cols; |
| 1747 | const int num_mi_cols = cm->mi_params.mi_cols; |
| 1748 | const int *const col_start_sb = cm->tiles.col_start_sb; |
| 1749 | for (int col = 0; col < tile_cols; col++) { |
| 1750 | const int mi_col_start = col_start_sb[col] << mib_size_log2; |
| 1751 | const int mi_col_end = |
| 1752 | AOMMIN(col_start_sb[col + 1] << mib_size_log2, num_mi_cols); |
| 1753 | const int num_sb_cols_in_tile = |
| 1754 | CEIL_POWER_OF_TWO(mi_col_end - mi_col_start, mib_size_log2); |
| 1755 | *max_sb_cols_in_tile = AOMMAX(*max_sb_cols_in_tile, num_sb_cols_in_tile); |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1756 | } |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1757 | } |
| 1758 | |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1759 | #if !CONFIG_REALTIME_ONLY |
| 1760 | // Computes the number of workers for firstpass stage (row/tile multi-threading) |
Mufaddal Chakera | a5e3f02 | 2020-05-20 13:03:38 +0530 | [diff] [blame] | 1761 | int av1_fp_compute_num_enc_workers(AV1_COMP *cpi) { |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1762 | AV1_COMMON *cm = &cpi->common; |
| 1763 | const int tile_cols = cm->tiles.cols; |
| 1764 | const int tile_rows = cm->tiles.rows; |
| 1765 | int total_num_threads_row_mt = 0; |
Mufaddal Chakera | a5e3f02 | 2020-05-20 13:03:38 +0530 | [diff] [blame] | 1766 | TileInfo tile_info; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1767 | |
| 1768 | if (cpi->oxcf.max_threads <= 1) return 1; |
| 1769 | |
| 1770 | for (int row = 0; row < tile_rows; row++) { |
| 1771 | for (int col = 0; col < tile_cols; col++) { |
Mufaddal Chakera | a5e3f02 | 2020-05-20 13:03:38 +0530 | [diff] [blame] | 1772 | av1_tile_init(&tile_info, cm, row, col); |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 1773 | const int num_mb_rows_in_tile = |
Wan-Teh Chang | 5350a5e | 2022-01-29 11:50:17 -0800 | [diff] [blame] | 1774 | av1_get_unit_rows_in_tile(&tile_info, cpi->fp_block_size); |
Cheng Chen | fd2c0cf | 2020-11-30 16:28:37 -0800 | [diff] [blame] | 1775 | const int num_mb_cols_in_tile = |
Wan-Teh Chang | 5350a5e | 2022-01-29 11:50:17 -0800 | [diff] [blame] | 1776 | av1_get_unit_cols_in_tile(&tile_info, cpi->fp_block_size); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1777 | total_num_threads_row_mt += |
| 1778 | AOMMIN((num_mb_cols_in_tile + 1) >> 1, num_mb_rows_in_tile); |
| 1779 | } |
| 1780 | } |
| 1781 | return AOMMIN(cpi->oxcf.max_threads, total_num_threads_row_mt); |
| 1782 | } |
| 1783 | |
| 1784 | // Computes the maximum number of mb_rows for row multi-threading of firstpass |
| 1785 | // stage |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1786 | static AOM_INLINE int fp_compute_max_mb_rows(const AV1_COMMON *cm, |
| 1787 | BLOCK_SIZE fp_block_size) { |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1788 | const int tile_rows = cm->tiles.rows; |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1789 | const int unit_height_log2 = mi_size_high_log2[fp_block_size]; |
| 1790 | const int mib_size_log2 = cm->seq_params->mib_size_log2; |
| 1791 | const int num_mi_rows = cm->mi_params.mi_rows; |
| 1792 | const int *const row_start_sb = cm->tiles.row_start_sb; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1793 | int max_mb_rows = 0; |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1794 | |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1795 | for (int row = 0; row < tile_rows; row++) { |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1796 | const int mi_row_start = row_start_sb[row] << mib_size_log2; |
| 1797 | const int mi_row_end = |
| 1798 | AOMMIN(row_start_sb[row + 1] << mib_size_log2, num_mi_rows); |
| 1799 | const int num_mb_rows_in_tile = |
| 1800 | CEIL_POWER_OF_TWO(mi_row_end - mi_row_start, unit_height_log2); |
| 1801 | max_mb_rows = AOMMAX(max_mb_rows, num_mb_rows_in_tile); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1802 | } |
| 1803 | return max_mb_rows; |
| 1804 | } |
| 1805 | #endif |
| 1806 | |
Deepa K G | 826ce59 | 2023-04-28 12:48:10 +0530 | [diff] [blame] | 1807 | static void lpf_pipeline_mt_init(AV1_COMP *cpi, int num_workers) { |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1808 | // Pipelining of loop-filtering after encoding is enabled when loop-filter |
| 1809 | // level is chosen based on quantizer and frame type. It is disabled in case |
| 1810 | // of 'LOOPFILTER_SELECTIVELY' as the stats collected during encoding stage |
| 1811 | // decides the filter level. Loop-filtering is disabled in case |
| 1812 | // of non-reference frames and for frames with intra block copy tool enabled. |
| 1813 | AV1_COMMON *cm = &cpi->common; |
Mudassir Galagnath | 58f157b | 2022-10-17 17:36:36 +0530 | [diff] [blame] | 1814 | const int use_loopfilter = is_loopfilter_used(cm); |
| 1815 | const int use_superres = av1_superres_scaled(cm); |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1816 | const int use_cdef = is_cdef_used(cm); |
| 1817 | const int use_restoration = is_restoration_used(cm); |
Deepa K G | 826ce59 | 2023-04-28 12:48:10 +0530 | [diff] [blame] | 1818 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
| 1819 | MACROBLOCKD *xd = &cpi->td.mb.e_mbd; |
Mudassir Galagnath | 58f157b | 2022-10-17 17:36:36 +0530 | [diff] [blame] | 1820 | |
| 1821 | const unsigned int skip_apply_postproc_filters = |
| 1822 | derive_skip_apply_postproc_filters(cpi, use_loopfilter, use_cdef, |
| 1823 | use_superres, use_restoration); |
Deepa K G | 826ce59 | 2023-04-28 12:48:10 +0530 | [diff] [blame] | 1824 | mt_info->pipeline_lpf_mt_with_enc = |
Deepa K G | 493603c | 2022-10-18 23:37:46 +0530 | [diff] [blame] | 1825 | (cpi->oxcf.mode == REALTIME) && (cpi->oxcf.speed >= 5) && |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1826 | (cpi->sf.lpf_sf.lpf_pick == LPF_PICK_FROM_Q) && |
| 1827 | (cpi->oxcf.algo_cfg.loopfilter_control != LOOPFILTER_SELECTIVELY) && |
| 1828 | !cpi->ppi->rtc_ref.non_reference_frame && !cm->features.allow_intrabc && |
Mudassir Galagnath | 58f157b | 2022-10-17 17:36:36 +0530 | [diff] [blame] | 1829 | ((skip_apply_postproc_filters & SKIP_APPLY_LOOPFILTER) == 0); |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1830 | |
Deepa K G | 826ce59 | 2023-04-28 12:48:10 +0530 | [diff] [blame] | 1831 | if (!mt_info->pipeline_lpf_mt_with_enc) return; |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1832 | |
| 1833 | set_postproc_filter_default_params(cm); |
| 1834 | |
Mudassir Galagnath | 58f157b | 2022-10-17 17:36:36 +0530 | [diff] [blame] | 1835 | if (!use_loopfilter) return; |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1836 | |
| 1837 | const LPF_PICK_METHOD method = cpi->sf.lpf_sf.lpf_pick; |
| 1838 | assert(method == LPF_PICK_FROM_Q); |
| 1839 | assert(cpi->oxcf.algo_cfg.loopfilter_control != LOOPFILTER_SELECTIVELY); |
| 1840 | |
| 1841 | av1_pick_filter_level(cpi->source, cpi, method); |
| 1842 | |
| 1843 | struct loopfilter *lf = &cm->lf; |
| 1844 | const int plane_start = 0; |
| 1845 | const int plane_end = av1_num_planes(cm); |
| 1846 | int planes_to_lf[MAX_MB_PLANE]; |
| 1847 | if ((lf->filter_level[PLANE_TYPE_Y] || lf->filter_level[PLANE_TYPE_UV]) && |
| 1848 | check_planes_to_loop_filter(lf, planes_to_lf, plane_start, plane_end)) { |
| 1849 | int lpf_opt_level = get_lpf_opt_level(&cpi->sf); |
| 1850 | assert(lpf_opt_level == 2); |
| 1851 | |
| 1852 | const int start_mi_row = 0; |
| 1853 | const int end_mi_row = start_mi_row + cm->mi_params.mi_rows; |
| 1854 | |
| 1855 | av1_loop_filter_frame_init(cm, plane_start, plane_end); |
| 1856 | |
Deepa K G | 826ce59 | 2023-04-28 12:48:10 +0530 | [diff] [blame] | 1857 | assert(mt_info->num_mod_workers[MOD_ENC] == |
| 1858 | mt_info->num_mod_workers[MOD_LPF]); |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1859 | loop_filter_frame_mt_init(cm, start_mi_row, end_mi_row, planes_to_lf, |
Deepa K G | 826ce59 | 2023-04-28 12:48:10 +0530 | [diff] [blame] | 1860 | mt_info->num_mod_workers[MOD_LPF], |
| 1861 | &mt_info->lf_row_sync, lpf_opt_level, |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1862 | cm->seq_params->mib_size_log2); |
Deepa K G | 826ce59 | 2023-04-28 12:48:10 +0530 | [diff] [blame] | 1863 | |
| 1864 | for (int i = num_workers - 1; i >= 0; i--) { |
| 1865 | EncWorkerData *const thread_data = &mt_info->tile_thr_data[i]; |
| 1866 | // Initialize loopfilter data |
| 1867 | thread_data->lf_sync = &mt_info->lf_row_sync; |
| 1868 | thread_data->lf_data = &thread_data->lf_sync->lfdata[i]; |
| 1869 | loop_filter_data_reset(thread_data->lf_data, &cm->cur_frame->buf, cm, xd); |
| 1870 | } |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1871 | } |
| 1872 | } |
| 1873 | |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 1874 | void av1_encode_tiles_row_mt(AV1_COMP *cpi) { |
| 1875 | AV1_COMMON *const cm = &cpi->common; |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1876 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
| 1877 | AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt; |
Urvang Joshi | 54ffae7 | 2020-03-23 13:37:10 -0700 | [diff] [blame] | 1878 | const int tile_cols = cm->tiles.cols; |
| 1879 | const int tile_rows = cm->tiles.rows; |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1880 | const int sb_rows_in_frame = get_sb_rows_in_frame(cm); |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1881 | int *thread_id_to_tile_id = enc_row_mt->thread_id_to_tile_id; |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1882 | int max_sb_rows_in_tile = 0, max_sb_cols_in_tile = 0; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 1883 | int num_workers = mt_info->num_mod_workers[MOD_ENC]; |
Sachin Kumar Garg | 6f46f9f | 2020-04-28 13:26:22 +0530 | [diff] [blame] | 1884 | |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1885 | compute_max_sb_rows_cols(cm, &max_sb_rows_in_tile, &max_sb_cols_in_tile); |
| 1886 | const bool alloc_row_mt_mem = |
| 1887 | (enc_row_mt->allocated_tile_cols != tile_cols || |
| 1888 | enc_row_mt->allocated_tile_rows != tile_rows || |
| 1889 | enc_row_mt->allocated_rows != max_sb_rows_in_tile || |
| 1890 | enc_row_mt->allocated_cols != (max_sb_cols_in_tile - 1) || |
| 1891 | enc_row_mt->allocated_sb_rows != sb_rows_in_frame); |
| 1892 | const bool alloc_tile_data = cpi->allocated_tiles < tile_cols * tile_rows; |
| 1893 | |
| 1894 | assert(IMPLIES(cpi->tile_data == NULL, alloc_tile_data)); |
| 1895 | if (alloc_tile_data) { |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 1896 | av1_alloc_tile_data(cpi); |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 1897 | } |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 1898 | |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1899 | assert(IMPLIES(alloc_tile_data, alloc_row_mt_mem)); |
| 1900 | if (alloc_row_mt_mem) { |
| 1901 | row_mt_mem_alloc(cpi, max_sb_rows_in_tile, max_sb_cols_in_tile, |
| 1902 | cpi->oxcf.algo_cfg.cdf_update_mode); |
| 1903 | } |
| 1904 | |
Deepa K G | 826ce59 | 2023-04-28 12:48:10 +0530 | [diff] [blame] | 1905 | num_workers = AOMMIN(num_workers, mt_info->num_workers); |
Deepa K G | 826ce59 | 2023-04-28 12:48:10 +0530 | [diff] [blame] | 1906 | lpf_pipeline_mt_init(cpi, num_workers); |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1907 | |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 1908 | av1_init_tile_data(cpi); |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 1909 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1910 | memset(thread_id_to_tile_id, -1, |
| 1911 | sizeof(*thread_id_to_tile_id) * MAX_NUM_THREADS); |
Deepa K G | 1a38c9c | 2022-09-23 15:53:18 +0530 | [diff] [blame] | 1912 | memset(enc_row_mt->num_tile_cols_done, 0, |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1913 | sizeof(*enc_row_mt->num_tile_cols_done) * sb_rows_in_frame); |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 1914 | |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 1915 | for (int tile_row = 0; tile_row < tile_rows; tile_row++) { |
| 1916 | for (int tile_col = 0; tile_col < tile_cols; tile_col++) { |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1917 | int tile_index = tile_row * tile_cols + tile_col; |
| 1918 | TileDataEnc *const this_tile = &cpi->tile_data[tile_index]; |
| 1919 | AV1EncRowMultiThreadSync *const row_mt_sync = &this_tile->row_mt_sync; |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 1920 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1921 | // Initialize num_finished_cols to -1 for all rows. |
| 1922 | memset(row_mt_sync->num_finished_cols, -1, |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1923 | sizeof(*row_mt_sync->num_finished_cols) * max_sb_rows_in_tile); |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1924 | row_mt_sync->next_mi_row = this_tile->tile_info.mi_row_start; |
| 1925 | row_mt_sync->num_threads_working = 0; |
Jayasanker J | 55ac31c | 2022-06-22 19:15:01 +0530 | [diff] [blame] | 1926 | row_mt_sync->intrabc_extra_top_right_sb_delay = |
Jayasanker J | 34ec50a | 2022-07-01 19:00:02 +0530 | [diff] [blame] | 1927 | av1_get_intrabc_extra_top_right_sb_delay(cm); |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 1928 | |
Ravi Chaudhary | 95ba1fa | 2018-10-11 11:42:04 +0530 | [diff] [blame] | 1929 | av1_inter_mode_data_init(this_tile); |
Ravi Chaudhary | 95ba1fa | 2018-10-11 11:42:04 +0530 | [diff] [blame] | 1930 | av1_zero_above_context(cm, &cpi->td.mb.e_mbd, |
| 1931 | this_tile->tile_info.mi_col_start, |
| 1932 | this_tile->tile_info.mi_col_end, tile_row); |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 1933 | } |
| 1934 | } |
| 1935 | |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1936 | assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows, |
| 1937 | num_workers); |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 1938 | prepare_enc_workers(cpi, enc_row_mt_worker_hook, num_workers); |
venkat sanampudi | 162bc48 | 2020-11-25 15:04:15 +0530 | [diff] [blame] | 1939 | launch_workers(&cpi->mt_info, num_workers); |
Deepa K G | 74de2a0 | 2020-04-11 13:09:11 +0530 | [diff] [blame] | 1940 | sync_enc_workers(&cpi->mt_info, cm, num_workers); |
David Turner | ebf96f4 | 2018-11-14 16:57:57 +0000 | [diff] [blame] | 1941 | if (cm->delta_q_info.delta_lf_present_flag) update_delta_lf_for_row_mt(cpi); |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 1942 | accumulate_counters_enc_workers(cpi, num_workers); |
| 1943 | } |
Sachin Kumar Garg | 137265e | 2020-04-27 12:56:51 +0530 | [diff] [blame] | 1944 | |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1945 | #if !CONFIG_REALTIME_ONLY |
Remya Prakasan | 0badba3 | 2023-08-17 16:44:52 +0530 | [diff] [blame] | 1946 | static void dealloc_thread_data_src_diff_buf(AV1_COMP *cpi, int num_workers) { |
| 1947 | for (int i = num_workers - 1; i >= 0; --i) { |
| 1948 | EncWorkerData *const thread_data = &cpi->mt_info.tile_thr_data[i]; |
| 1949 | if (thread_data->td != &cpi->td) |
| 1950 | av1_dealloc_src_diff_buf(&thread_data->td->mb, |
| 1951 | av1_num_planes(&cpi->common)); |
| 1952 | } |
| 1953 | } |
| 1954 | |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1955 | void av1_fp_encode_tiles_row_mt(AV1_COMP *cpi) { |
| 1956 | AV1_COMMON *const cm = &cpi->common; |
| 1957 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
| 1958 | AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt; |
| 1959 | const int tile_cols = cm->tiles.cols; |
| 1960 | const int tile_rows = cm->tiles.rows; |
| 1961 | int *thread_id_to_tile_id = enc_row_mt->thread_id_to_tile_id; |
| 1962 | int num_workers = 0; |
| 1963 | int max_mb_rows = 0; |
| 1964 | |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1965 | max_mb_rows = fp_compute_max_mb_rows(cm, cpi->fp_block_size); |
| 1966 | const bool alloc_row_mt_mem = enc_row_mt->allocated_tile_cols != tile_cols || |
| 1967 | enc_row_mt->allocated_tile_rows != tile_rows || |
| 1968 | enc_row_mt->allocated_rows != max_mb_rows; |
| 1969 | const bool alloc_tile_data = cpi->allocated_tiles < tile_cols * tile_rows; |
| 1970 | |
| 1971 | assert(IMPLIES(cpi->tile_data == NULL, alloc_tile_data)); |
| 1972 | if (alloc_tile_data) { |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1973 | av1_alloc_tile_data(cpi); |
| 1974 | } |
| 1975 | |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1976 | assert(IMPLIES(alloc_tile_data, alloc_row_mt_mem)); |
| 1977 | if (alloc_row_mt_mem) { |
| 1978 | row_mt_mem_alloc(cpi, max_mb_rows, -1, 0); |
| 1979 | } |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1980 | |
Deepa K G | f7d3b86 | 2022-11-02 13:45:53 +0530 | [diff] [blame] | 1981 | av1_init_tile_data(cpi); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1982 | |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 1983 | // For pass = 1, compute the no. of workers needed. For single-pass encode |
| 1984 | // (pass = 0), no. of workers are already computed. |
| 1985 | if (mt_info->num_mod_workers[MOD_FP] == 0) |
| 1986 | num_workers = av1_fp_compute_num_enc_workers(cpi); |
| 1987 | else |
| 1988 | num_workers = mt_info->num_mod_workers[MOD_FP]; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1989 | |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 1990 | memset(thread_id_to_tile_id, -1, |
| 1991 | sizeof(*thread_id_to_tile_id) * MAX_NUM_THREADS); |
| 1992 | |
| 1993 | for (int tile_row = 0; tile_row < tile_rows; tile_row++) { |
| 1994 | for (int tile_col = 0; tile_col < tile_cols; tile_col++) { |
| 1995 | int tile_index = tile_row * tile_cols + tile_col; |
| 1996 | TileDataEnc *const this_tile = &cpi->tile_data[tile_index]; |
| 1997 | AV1EncRowMultiThreadSync *const row_mt_sync = &this_tile->row_mt_sync; |
| 1998 | |
| 1999 | // Initialize num_finished_cols to -1 for all rows. |
| 2000 | memset(row_mt_sync->num_finished_cols, -1, |
| 2001 | sizeof(*row_mt_sync->num_finished_cols) * max_mb_rows); |
| 2002 | row_mt_sync->next_mi_row = this_tile->tile_info.mi_row_start; |
| 2003 | row_mt_sync->num_threads_working = 0; |
Jayasanker J | 55ac31c | 2022-06-22 19:15:01 +0530 | [diff] [blame] | 2004 | |
| 2005 | // intraBC mode is not evaluated during first-pass encoding. Hence, no |
| 2006 | // additional top-right delay is required. |
| 2007 | row_mt_sync->intrabc_extra_top_right_sb_delay = 0; |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 2008 | } |
| 2009 | } |
| 2010 | |
Sachin Kumar Garg | 800e70a | 2020-05-15 19:19:51 +0530 | [diff] [blame] | 2011 | num_workers = AOMMIN(num_workers, mt_info->num_workers); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 2012 | assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows, |
| 2013 | num_workers); |
| 2014 | fp_prepare_enc_workers(cpi, fp_enc_row_mt_worker_hook, num_workers); |
venkat sanampudi | 162bc48 | 2020-11-25 15:04:15 +0530 | [diff] [blame] | 2015 | launch_workers(&cpi->mt_info, num_workers); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 2016 | sync_enc_workers(&cpi->mt_info, cm, num_workers); |
Remya Prakasan | 0badba3 | 2023-08-17 16:44:52 +0530 | [diff] [blame] | 2017 | dealloc_thread_data_src_diff_buf(cpi, num_workers); |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 2018 | } |
Mufaddal Chakera | 65b6910 | 2020-05-01 04:07:13 +0530 | [diff] [blame] | 2019 | |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2020 | void av1_tpl_row_mt_sync_read_dummy(AV1TplRowMultiThreadSync *tpl_mt_sync, |
Sachin Kumar Garg | 47f069a | 2020-04-29 10:54:37 +0530 | [diff] [blame] | 2021 | int r, int c) { |
Sachin Kumar Garg | 137265e | 2020-04-27 12:56:51 +0530 | [diff] [blame] | 2022 | (void)tpl_mt_sync; |
| 2023 | (void)r; |
| 2024 | (void)c; |
Sachin Kumar Garg | 137265e | 2020-04-27 12:56:51 +0530 | [diff] [blame] | 2025 | } |
| 2026 | |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2027 | void av1_tpl_row_mt_sync_write_dummy(AV1TplRowMultiThreadSync *tpl_mt_sync, |
| 2028 | int r, int c, int cols) { |
Sachin Kumar Garg | 137265e | 2020-04-27 12:56:51 +0530 | [diff] [blame] | 2029 | (void)tpl_mt_sync; |
| 2030 | (void)r; |
| 2031 | (void)c; |
| 2032 | (void)cols; |
Sachin Kumar Garg | 137265e | 2020-04-27 12:56:51 +0530 | [diff] [blame] | 2033 | } |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2034 | |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2035 | void av1_tpl_row_mt_sync_read(AV1TplRowMultiThreadSync *tpl_row_mt_sync, int r, |
| 2036 | int c) { |
| 2037 | #if CONFIG_MULTITHREAD |
| 2038 | int nsync = tpl_row_mt_sync->sync_range; |
| 2039 | |
| 2040 | if (r) { |
| 2041 | pthread_mutex_t *const mutex = &tpl_row_mt_sync->mutex_[r - 1]; |
| 2042 | pthread_mutex_lock(mutex); |
| 2043 | |
| 2044 | while (c > tpl_row_mt_sync->num_finished_cols[r - 1] - nsync) |
| 2045 | pthread_cond_wait(&tpl_row_mt_sync->cond_[r - 1], mutex); |
| 2046 | pthread_mutex_unlock(mutex); |
| 2047 | } |
| 2048 | #else |
| 2049 | (void)tpl_row_mt_sync; |
| 2050 | (void)r; |
| 2051 | (void)c; |
| 2052 | #endif // CONFIG_MULTITHREAD |
| 2053 | } |
| 2054 | |
| 2055 | void av1_tpl_row_mt_sync_write(AV1TplRowMultiThreadSync *tpl_row_mt_sync, int r, |
| 2056 | int c, int cols) { |
| 2057 | #if CONFIG_MULTITHREAD |
| 2058 | int nsync = tpl_row_mt_sync->sync_range; |
| 2059 | int cur; |
| 2060 | // Only signal when there are enough encoded blocks for next row to run. |
| 2061 | int sig = 1; |
| 2062 | |
| 2063 | if (c < cols - 1) { |
| 2064 | cur = c; |
| 2065 | if (c % nsync) sig = 0; |
| 2066 | } else { |
| 2067 | cur = cols + nsync; |
| 2068 | } |
| 2069 | |
| 2070 | if (sig) { |
| 2071 | pthread_mutex_lock(&tpl_row_mt_sync->mutex_[r]); |
| 2072 | |
| 2073 | tpl_row_mt_sync->num_finished_cols[r] = cur; |
| 2074 | |
| 2075 | pthread_cond_signal(&tpl_row_mt_sync->cond_[r]); |
| 2076 | pthread_mutex_unlock(&tpl_row_mt_sync->mutex_[r]); |
| 2077 | } |
| 2078 | #else |
| 2079 | (void)tpl_row_mt_sync; |
| 2080 | (void)r; |
| 2081 | (void)c; |
| 2082 | (void)cols; |
| 2083 | #endif // CONFIG_MULTITHREAD |
| 2084 | } |
| 2085 | |
Mudassir Galaganath | e3bf24b | 2023-07-27 15:09:26 +0530 | [diff] [blame] | 2086 | static AOM_INLINE void set_mode_estimation_done(AV1_COMP *cpi) { |
| 2087 | const CommonModeInfoParams *const mi_params = &cpi->common.mi_params; |
| 2088 | TplParams *const tpl_data = &cpi->ppi->tpl_data; |
| 2089 | const BLOCK_SIZE bsize = |
| 2090 | convert_length_to_bsize(cpi->ppi->tpl_data.tpl_bsize_1d); |
| 2091 | const int mi_height = mi_size_high[bsize]; |
| 2092 | AV1TplRowMultiThreadInfo *const tpl_row_mt = &cpi->mt_info.tpl_row_mt; |
| 2093 | const int tplb_cols_in_tile = |
| 2094 | ROUND_POWER_OF_TWO(mi_params->mi_cols, mi_size_wide_log2[bsize]); |
| 2095 | // In case of tpl row-multithreading, due to top-right dependency, the worker |
| 2096 | // on an mb_row waits for the completion of the tpl processing of the top and |
| 2097 | // top-right blocks. Hence, in case a thread (main/worker) encounters an |
| 2098 | // error, update that the tpl processing of every mb_row in the frame is |
| 2099 | // complete in order to avoid dependent workers waiting indefinitely. |
| 2100 | for (int mi_row = 0, tplb_row = 0; mi_row < mi_params->mi_rows; |
| 2101 | mi_row += mi_height, tplb_row++) { |
| 2102 | (*tpl_row_mt->sync_write_ptr)(&tpl_data->tpl_mt_sync, tplb_row, |
| 2103 | tplb_cols_in_tile - 1, tplb_cols_in_tile); |
| 2104 | } |
| 2105 | } |
| 2106 | |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2107 | // Each worker calls tpl_worker_hook() and computes the tpl data. |
| 2108 | static int tpl_worker_hook(void *arg1, void *unused) { |
| 2109 | (void)unused; |
| 2110 | EncWorkerData *thread_data = (EncWorkerData *)arg1; |
| 2111 | AV1_COMP *cpi = thread_data->cpi; |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2112 | AV1_COMMON *cm = &cpi->common; |
| 2113 | MACROBLOCK *x = &thread_data->td->mb; |
| 2114 | MACROBLOCKD *xd = &x->e_mbd; |
Mudassir Galagnath | b845a7c | 2021-03-11 11:12:17 +0530 | [diff] [blame] | 2115 | TplTxfmStats *tpl_txfm_stats = &thread_data->td->tpl_txfm_stats; |
Deepa K G | b0a0a9c | 2023-06-20 11:09:04 +0530 | [diff] [blame] | 2116 | TplBuffers *tpl_tmp_buffers = &thread_data->td->tpl_tmp_buffers; |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2117 | CommonModeInfoParams *mi_params = &cm->mi_params; |
Mudassir Galaganath | e3bf24b | 2023-07-27 15:09:26 +0530 | [diff] [blame] | 2118 | int num_active_workers = cpi->ppi->tpl_data.tpl_mt_sync.num_threads_working; |
| 2119 | |
| 2120 | struct aom_internal_error_info *const error_info = &thread_data->error_info; |
| 2121 | xd->error_info = error_info; |
| 2122 | AV1TplRowMultiThreadInfo *const tpl_row_mt = &cpi->mt_info.tpl_row_mt; |
| 2123 | (void)tpl_row_mt; |
| 2124 | #if CONFIG_MULTITHREAD |
| 2125 | pthread_mutex_t *tpl_error_mutex_ = tpl_row_mt->mutex_; |
| 2126 | #endif |
| 2127 | |
| 2128 | // The jmp_buf is valid only for the duration of the function that calls |
| 2129 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 2130 | // before it returns. |
| 2131 | if (setjmp(error_info->jmp)) { |
| 2132 | error_info->setjmp = 0; |
| 2133 | #if CONFIG_MULTITHREAD |
| 2134 | pthread_mutex_lock(tpl_error_mutex_); |
| 2135 | tpl_row_mt->tpl_mt_exit = true; |
| 2136 | pthread_mutex_unlock(tpl_error_mutex_); |
| 2137 | #endif |
| 2138 | set_mode_estimation_done(cpi); |
| 2139 | return 0; |
| 2140 | } |
| 2141 | error_info->setjmp = 1; |
| 2142 | |
Mufaddal Chakera | 7260d14 | 2021-04-12 01:03:40 +0530 | [diff] [blame] | 2143 | BLOCK_SIZE bsize = convert_length_to_bsize(cpi->ppi->tpl_data.tpl_bsize_1d); |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2144 | TX_SIZE tx_size = max_txsize_lookup[bsize]; |
| 2145 | int mi_height = mi_size_high[bsize]; |
Mudassir Galagnath | b845a7c | 2021-03-11 11:12:17 +0530 | [diff] [blame] | 2146 | |
Angie Chiang | 63719b8 | 2021-05-18 08:56:23 -0700 | [diff] [blame] | 2147 | av1_init_tpl_txfm_stats(tpl_txfm_stats); |
Mudassir Galagnath | b845a7c | 2021-03-11 11:12:17 +0530 | [diff] [blame] | 2148 | |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2149 | for (int mi_row = thread_data->start * mi_height; mi_row < mi_params->mi_rows; |
| 2150 | mi_row += num_active_workers * mi_height) { |
| 2151 | // Motion estimation row boundary |
| 2152 | av1_set_mv_row_limits(mi_params, &x->mv_limits, mi_row, mi_height, |
| 2153 | cpi->oxcf.border_in_pixels); |
| 2154 | xd->mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE); |
| 2155 | xd->mb_to_bottom_edge = |
| 2156 | GET_MV_SUBPEL((mi_params->mi_rows - mi_height - mi_row) * MI_SIZE); |
Deepa K G | b0a0a9c | 2023-06-20 11:09:04 +0530 | [diff] [blame] | 2157 | av1_mc_flow_dispenser_row(cpi, tpl_txfm_stats, tpl_tmp_buffers, x, mi_row, |
| 2158 | bsize, tx_size); |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2159 | } |
Mudassir Galaganath | e3bf24b | 2023-07-27 15:09:26 +0530 | [diff] [blame] | 2160 | error_info->setjmp = 0; |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2161 | return 1; |
| 2162 | } |
| 2163 | |
| 2164 | // Deallocate tpl synchronization related mutex and data. |
| 2165 | void av1_tpl_dealloc(AV1TplRowMultiThreadSync *tpl_sync) { |
| 2166 | assert(tpl_sync != NULL); |
| 2167 | |
| 2168 | #if CONFIG_MULTITHREAD |
| 2169 | if (tpl_sync->mutex_ != NULL) { |
| 2170 | for (int i = 0; i < tpl_sync->rows; ++i) |
| 2171 | pthread_mutex_destroy(&tpl_sync->mutex_[i]); |
| 2172 | aom_free(tpl_sync->mutex_); |
| 2173 | } |
| 2174 | if (tpl_sync->cond_ != NULL) { |
| 2175 | for (int i = 0; i < tpl_sync->rows; ++i) |
| 2176 | pthread_cond_destroy(&tpl_sync->cond_[i]); |
| 2177 | aom_free(tpl_sync->cond_); |
| 2178 | } |
| 2179 | #endif // CONFIG_MULTITHREAD |
| 2180 | |
| 2181 | aom_free(tpl_sync->num_finished_cols); |
| 2182 | // clear the structure as the source of this call may be a resize in which |
| 2183 | // case this call will be followed by an _alloc() which may fail. |
| 2184 | av1_zero(*tpl_sync); |
| 2185 | } |
| 2186 | |
| 2187 | // Allocate memory for tpl row synchronization. |
| 2188 | void av1_tpl_alloc(AV1TplRowMultiThreadSync *tpl_sync, AV1_COMMON *cm, |
Deepa K G | 14df51d | 2020-07-03 13:00:50 +0530 | [diff] [blame] | 2189 | int mb_rows) { |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2190 | tpl_sync->rows = mb_rows; |
| 2191 | #if CONFIG_MULTITHREAD |
| 2192 | { |
| 2193 | CHECK_MEM_ERROR(cm, tpl_sync->mutex_, |
| 2194 | aom_malloc(sizeof(*tpl_sync->mutex_) * mb_rows)); |
| 2195 | if (tpl_sync->mutex_) { |
| 2196 | for (int i = 0; i < mb_rows; ++i) |
| 2197 | pthread_mutex_init(&tpl_sync->mutex_[i], NULL); |
| 2198 | } |
| 2199 | |
| 2200 | CHECK_MEM_ERROR(cm, tpl_sync->cond_, |
| 2201 | aom_malloc(sizeof(*tpl_sync->cond_) * mb_rows)); |
| 2202 | if (tpl_sync->cond_) { |
| 2203 | for (int i = 0; i < mb_rows; ++i) |
| 2204 | pthread_cond_init(&tpl_sync->cond_[i], NULL); |
| 2205 | } |
| 2206 | } |
| 2207 | #endif // CONFIG_MULTITHREAD |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2208 | CHECK_MEM_ERROR(cm, tpl_sync->num_finished_cols, |
| 2209 | aom_malloc(sizeof(*tpl_sync->num_finished_cols) * mb_rows)); |
| 2210 | |
| 2211 | // Set up nsync. |
| 2212 | tpl_sync->sync_range = 1; |
| 2213 | } |
| 2214 | |
| 2215 | // Each worker is prepared by assigning the hook function and individual thread |
| 2216 | // data. |
| 2217 | static AOM_INLINE void prepare_tpl_workers(AV1_COMP *cpi, AVxWorkerHook hook, |
| 2218 | int num_workers) { |
| 2219 | MultiThreadInfo *mt_info = &cpi->mt_info; |
| 2220 | for (int i = num_workers - 1; i >= 0; i--) { |
| 2221 | AVxWorker *worker = &mt_info->workers[i]; |
| 2222 | EncWorkerData *thread_data = &mt_info->tile_thr_data[i]; |
| 2223 | |
| 2224 | worker->hook = hook; |
| 2225 | worker->data1 = thread_data; |
| 2226 | worker->data2 = NULL; |
| 2227 | |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 2228 | thread_data->thread_id = i; |
| 2229 | // Set the starting tile for each thread. |
| 2230 | thread_data->start = i; |
| 2231 | |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 2232 | thread_data->cpi = cpi; |
| 2233 | if (i == 0) { |
| 2234 | thread_data->td = &cpi->td; |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 2235 | } else { |
| 2236 | thread_data->td = thread_data->original_td; |
| 2237 | } |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 2238 | |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2239 | // Before encoding a frame, copy the thread data from cpi. |
| 2240 | if (thread_data->td != &cpi->td) { |
| 2241 | thread_data->td->mb = cpi->td.mb; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 2242 | // OBMC buffers are used only to init MS params and remain unused when |
| 2243 | // called from tpl, hence set the buffers to defaults. |
| 2244 | av1_init_obmc_buffer(&thread_data->td->mb.obmc_buffer); |
Deepa K G | b0a0a9c | 2023-06-20 11:09:04 +0530 | [diff] [blame] | 2245 | if (!tpl_alloc_temp_buffers(&thread_data->td->tpl_tmp_buffers, |
| 2246 | cpi->ppi->tpl_data.tpl_bsize_1d)) { |
| 2247 | aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR, |
| 2248 | "Error allocating tpl data"); |
| 2249 | } |
Mudassir Galagnath | b845a7c | 2021-03-11 11:12:17 +0530 | [diff] [blame] | 2250 | thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst; |
| 2251 | thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst; |
| 2252 | } |
| 2253 | } |
| 2254 | } |
| 2255 | |
Deepa K G | e802b78 | 2023-05-29 15:41:19 +0530 | [diff] [blame] | 2256 | #if CONFIG_BITRATE_ACCURACY |
Mudassir Galagnath | b845a7c | 2021-03-11 11:12:17 +0530 | [diff] [blame] | 2257 | // Accumulate transform stats after tpl. |
Angie Chiang | 3972211 | 2021-05-18 11:20:34 -0700 | [diff] [blame] | 2258 | static void tpl_accumulate_txfm_stats(ThreadData *main_td, |
| 2259 | const MultiThreadInfo *mt_info, |
| 2260 | int num_workers) { |
| 2261 | TplTxfmStats *accumulated_stats = &main_td->tpl_txfm_stats; |
Mudassir Galagnath | b845a7c | 2021-03-11 11:12:17 +0530 | [diff] [blame] | 2262 | for (int i = num_workers - 1; i >= 0; i--) { |
Angie Chiang | 3972211 | 2021-05-18 11:20:34 -0700 | [diff] [blame] | 2263 | AVxWorker *const worker = &mt_info->workers[i]; |
Mudassir Galagnath | b845a7c | 2021-03-11 11:12:17 +0530 | [diff] [blame] | 2264 | EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; |
| 2265 | ThreadData *td = thread_data->td; |
Angie Chiang | 3972211 | 2021-05-18 11:20:34 -0700 | [diff] [blame] | 2266 | if (td != main_td) { |
| 2267 | const TplTxfmStats *tpl_txfm_stats = &td->tpl_txfm_stats; |
| 2268 | av1_accumulate_tpl_txfm_stats(tpl_txfm_stats, accumulated_stats); |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2269 | } |
| 2270 | } |
| 2271 | } |
Deepa K G | e802b78 | 2023-05-29 15:41:19 +0530 | [diff] [blame] | 2272 | #endif // CONFIG_BITRATE_ACCURACY |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2273 | |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2274 | // Implements multi-threading for tpl. |
| 2275 | void av1_mc_flow_dispenser_mt(AV1_COMP *cpi) { |
| 2276 | AV1_COMMON *cm = &cpi->common; |
| 2277 | CommonModeInfoParams *mi_params = &cm->mi_params; |
| 2278 | MultiThreadInfo *mt_info = &cpi->mt_info; |
Mufaddal Chakera | 7260d14 | 2021-04-12 01:03:40 +0530 | [diff] [blame] | 2279 | TplParams *tpl_data = &cpi->ppi->tpl_data; |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2280 | AV1TplRowMultiThreadSync *tpl_sync = &tpl_data->tpl_mt_sync; |
| 2281 | int mb_rows = mi_params->mb_rows; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 2282 | int num_workers = |
| 2283 | AOMMIN(mt_info->num_mod_workers[MOD_TPL], mt_info->num_workers); |
Deepa K G | bbaba3a | 2020-07-03 13:31:14 +0530 | [diff] [blame] | 2284 | |
Deepa K G | 14df51d | 2020-07-03 13:00:50 +0530 | [diff] [blame] | 2285 | if (mb_rows != tpl_sync->rows) { |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2286 | av1_tpl_dealloc(tpl_sync); |
Deepa K G | 14df51d | 2020-07-03 13:00:50 +0530 | [diff] [blame] | 2287 | av1_tpl_alloc(tpl_sync, cm, mb_rows); |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2288 | } |
| 2289 | tpl_sync->num_threads_working = num_workers; |
| 2290 | |
| 2291 | // Initialize cur_mb_col to -1 for all MB rows. |
| 2292 | memset(tpl_sync->num_finished_cols, -1, |
| 2293 | sizeof(*tpl_sync->num_finished_cols) * mb_rows); |
| 2294 | |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2295 | prepare_tpl_workers(cpi, tpl_worker_hook, num_workers); |
venkat sanampudi | 162bc48 | 2020-11-25 15:04:15 +0530 | [diff] [blame] | 2296 | launch_workers(&cpi->mt_info, num_workers); |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2297 | sync_enc_workers(&cpi->mt_info, cm, num_workers); |
Deepa K G | e802b78 | 2023-05-29 15:41:19 +0530 | [diff] [blame] | 2298 | #if CONFIG_BITRATE_ACCURACY |
Angie Chiang | 3972211 | 2021-05-18 11:20:34 -0700 | [diff] [blame] | 2299 | tpl_accumulate_txfm_stats(&cpi->td, &cpi->mt_info, num_workers); |
Deepa K G | e802b78 | 2023-05-29 15:41:19 +0530 | [diff] [blame] | 2300 | #endif // CONFIG_BITRATE_ACCURACY |
Deepa K G | b0a0a9c | 2023-06-20 11:09:04 +0530 | [diff] [blame] | 2301 | for (int i = num_workers - 1; i >= 0; i--) { |
| 2302 | EncWorkerData *thread_data = &mt_info->tile_thr_data[i]; |
| 2303 | ThreadData *td = thread_data->td; |
| 2304 | if (td != &cpi->td) tpl_dealloc_temp_buffers(&td->tpl_tmp_buffers); |
| 2305 | } |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2306 | } |
Sachin Kumar Garg | 194c57b | 2020-05-06 08:50:13 +0530 | [diff] [blame] | 2307 | |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 2308 | // Deallocate memory for temporal filter multi-thread synchronization. |
| 2309 | void av1_tf_mt_dealloc(AV1TemporalFilterSync *tf_sync) { |
| 2310 | assert(tf_sync != NULL); |
| 2311 | #if CONFIG_MULTITHREAD |
| 2312 | if (tf_sync->mutex_ != NULL) { |
| 2313 | pthread_mutex_destroy(tf_sync->mutex_); |
| 2314 | aom_free(tf_sync->mutex_); |
| 2315 | } |
| 2316 | #endif // CONFIG_MULTITHREAD |
| 2317 | tf_sync->next_tf_row = 0; |
| 2318 | } |
| 2319 | |
| 2320 | // Checks if a job is available. If job is available, |
| 2321 | // populates next_tf_row and returns 1, else returns 0. |
| 2322 | static AOM_INLINE int tf_get_next_job(AV1TemporalFilterSync *tf_mt_sync, |
| 2323 | int *current_mb_row, int mb_rows) { |
| 2324 | int do_next_row = 0; |
| 2325 | #if CONFIG_MULTITHREAD |
| 2326 | pthread_mutex_t *tf_mutex_ = tf_mt_sync->mutex_; |
| 2327 | pthread_mutex_lock(tf_mutex_); |
| 2328 | #endif |
Mudassir Galaganath | 0874cac | 2023-07-07 09:01:37 +0530 | [diff] [blame] | 2329 | if (!tf_mt_sync->tf_mt_exit && tf_mt_sync->next_tf_row < mb_rows) { |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 2330 | *current_mb_row = tf_mt_sync->next_tf_row; |
| 2331 | tf_mt_sync->next_tf_row++; |
| 2332 | do_next_row = 1; |
| 2333 | } |
| 2334 | #if CONFIG_MULTITHREAD |
| 2335 | pthread_mutex_unlock(tf_mutex_); |
| 2336 | #endif |
| 2337 | return do_next_row; |
| 2338 | } |
| 2339 | |
| 2340 | // Hook function for each thread in temporal filter multi-threading. |
| 2341 | static int tf_worker_hook(void *arg1, void *unused) { |
| 2342 | (void)unused; |
| 2343 | EncWorkerData *thread_data = (EncWorkerData *)arg1; |
| 2344 | AV1_COMP *cpi = thread_data->cpi; |
| 2345 | ThreadData *td = thread_data->td; |
| 2346 | TemporalFilterCtx *tf_ctx = &cpi->tf_ctx; |
| 2347 | AV1TemporalFilterSync *tf_sync = &cpi->mt_info.tf_sync; |
| 2348 | const struct scale_factors *scale = &cpi->tf_ctx.sf; |
Mudassir Galaganath | 0874cac | 2023-07-07 09:01:37 +0530 | [diff] [blame] | 2349 | |
| 2350 | #if CONFIG_MULTITHREAD |
| 2351 | pthread_mutex_t *tf_mutex_ = tf_sync->mutex_; |
| 2352 | #endif |
| 2353 | MACROBLOCKD *const xd = &thread_data->td->mb.e_mbd; |
| 2354 | struct aom_internal_error_info *const error_info = &thread_data->error_info; |
| 2355 | xd->error_info = error_info; |
| 2356 | |
| 2357 | // The jmp_buf is valid only for the duration of the function that calls |
| 2358 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 2359 | // before it returns. |
| 2360 | if (setjmp(error_info->jmp)) { |
| 2361 | error_info->setjmp = 0; |
| 2362 | #if CONFIG_MULTITHREAD |
| 2363 | pthread_mutex_lock(tf_mutex_); |
| 2364 | tf_sync->tf_mt_exit = true; |
| 2365 | pthread_mutex_unlock(tf_mutex_); |
| 2366 | #endif |
| 2367 | return 0; |
| 2368 | } |
| 2369 | error_info->setjmp = 1; |
| 2370 | |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 2371 | const int num_planes = av1_num_planes(&cpi->common); |
| 2372 | assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE); |
| 2373 | |
| 2374 | MACROBLOCKD *mbd = &td->mb.e_mbd; |
| 2375 | uint8_t *input_buffer[MAX_MB_PLANE]; |
| 2376 | MB_MODE_INFO **input_mb_mode_info; |
| 2377 | tf_save_state(mbd, &input_mb_mode_info, input_buffer, num_planes); |
| 2378 | tf_setup_macroblockd(mbd, &td->tf_data, scale); |
| 2379 | |
| 2380 | int current_mb_row = -1; |
| 2381 | |
| 2382 | while (tf_get_next_job(tf_sync, ¤t_mb_row, tf_ctx->mb_rows)) |
| 2383 | av1_tf_do_filtering_row(cpi, td, current_mb_row); |
| 2384 | |
| 2385 | tf_restore_state(mbd, input_mb_mode_info, input_buffer, num_planes); |
| 2386 | |
Mudassir Galaganath | 0874cac | 2023-07-07 09:01:37 +0530 | [diff] [blame] | 2387 | error_info->setjmp = 0; |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 2388 | return 1; |
| 2389 | } |
| 2390 | |
| 2391 | // Assigns temporal filter hook function and thread data to each worker. |
| 2392 | static void prepare_tf_workers(AV1_COMP *cpi, AVxWorkerHook hook, |
| 2393 | int num_workers, int is_highbitdepth) { |
| 2394 | MultiThreadInfo *mt_info = &cpi->mt_info; |
| 2395 | mt_info->tf_sync.next_tf_row = 0; |
Mudassir Galaganath | 0874cac | 2023-07-07 09:01:37 +0530 | [diff] [blame] | 2396 | mt_info->tf_sync.tf_mt_exit = false; |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 2397 | for (int i = num_workers - 1; i >= 0; i--) { |
| 2398 | AVxWorker *worker = &mt_info->workers[i]; |
| 2399 | EncWorkerData *thread_data = &mt_info->tile_thr_data[i]; |
| 2400 | |
| 2401 | worker->hook = hook; |
| 2402 | worker->data1 = thread_data; |
| 2403 | worker->data2 = NULL; |
| 2404 | |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 2405 | thread_data->thread_id = i; |
| 2406 | // Set the starting tile for each thread. |
| 2407 | thread_data->start = i; |
| 2408 | |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 2409 | thread_data->cpi = cpi; |
| 2410 | if (i == 0) { |
| 2411 | thread_data->td = &cpi->td; |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 2412 | } else { |
| 2413 | thread_data->td = thread_data->original_td; |
| 2414 | } |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 2415 | |
| 2416 | // Before encoding a frame, copy the thread data from cpi. |
| 2417 | if (thread_data->td != &cpi->td) { |
| 2418 | thread_data->td->mb = cpi->td.mb; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 2419 | // OBMC buffers are used only to init MS params and remain unused when |
| 2420 | // called from tf, hence set the buffers to defaults. |
| 2421 | av1_init_obmc_buffer(&thread_data->td->mb.obmc_buffer); |
James Zern | 9b70b52 | 2022-04-28 13:11:27 -0700 | [diff] [blame] | 2422 | if (!tf_alloc_and_reset_data(&thread_data->td->tf_data, |
| 2423 | cpi->tf_ctx.num_pels, is_highbitdepth)) { |
| 2424 | aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR, |
| 2425 | "Error allocating temporal filter data"); |
| 2426 | } |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 2427 | } |
| 2428 | } |
| 2429 | } |
| 2430 | |
| 2431 | // Deallocate thread specific data for temporal filter. |
| 2432 | static void tf_dealloc_thread_data(AV1_COMP *cpi, int num_workers, |
| 2433 | int is_highbitdepth) { |
| 2434 | MultiThreadInfo *mt_info = &cpi->mt_info; |
| 2435 | for (int i = num_workers - 1; i >= 0; i--) { |
| 2436 | EncWorkerData *thread_data = &mt_info->tile_thr_data[i]; |
| 2437 | ThreadData *td = thread_data->td; |
| 2438 | if (td != &cpi->td) tf_dealloc_data(&td->tf_data, is_highbitdepth); |
| 2439 | } |
| 2440 | } |
| 2441 | |
| 2442 | // Accumulate sse and sum after temporal filtering. |
| 2443 | static void tf_accumulate_frame_diff(AV1_COMP *cpi, int num_workers) { |
| 2444 | FRAME_DIFF *total_diff = &cpi->td.tf_data.diff; |
| 2445 | for (int i = num_workers - 1; i >= 0; i--) { |
| 2446 | AVxWorker *const worker = &cpi->mt_info.workers[i]; |
| 2447 | EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; |
| 2448 | ThreadData *td = thread_data->td; |
| 2449 | FRAME_DIFF *diff = &td->tf_data.diff; |
| 2450 | if (td != &cpi->td) { |
| 2451 | total_diff->sse += diff->sse; |
| 2452 | total_diff->sum += diff->sum; |
| 2453 | } |
| 2454 | } |
| 2455 | } |
| 2456 | |
| 2457 | // Implements multi-threading for temporal filter. |
| 2458 | void av1_tf_do_filtering_mt(AV1_COMP *cpi) { |
| 2459 | AV1_COMMON *cm = &cpi->common; |
| 2460 | MultiThreadInfo *mt_info = &cpi->mt_info; |
| 2461 | const int is_highbitdepth = cpi->tf_ctx.is_highbitdepth; |
| 2462 | |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 2463 | int num_workers = |
| 2464 | AOMMIN(mt_info->num_mod_workers[MOD_TF], mt_info->num_workers); |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 2465 | |
| 2466 | prepare_tf_workers(cpi, tf_worker_hook, num_workers, is_highbitdepth); |
venkat sanampudi | 162bc48 | 2020-11-25 15:04:15 +0530 | [diff] [blame] | 2467 | launch_workers(mt_info, num_workers); |
Jayasanker J | 0761c69 | 2020-11-13 12:36:40 +0530 | [diff] [blame] | 2468 | sync_enc_workers(mt_info, cm, num_workers); |
| 2469 | tf_accumulate_frame_diff(cpi, num_workers); |
| 2470 | tf_dealloc_thread_data(cpi, num_workers, is_highbitdepth); |
| 2471 | } |
| 2472 | |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2473 | // Checks if a job is available in the current direction. If a job is available, |
| 2474 | // frame_idx will be populated and returns 1, else returns 0. |
| 2475 | static AOM_INLINE int get_next_gm_job(AV1_COMP *cpi, int *frame_idx, |
| 2476 | int cur_dir) { |
| 2477 | GlobalMotionInfo *gm_info = &cpi->gm_info; |
| 2478 | JobInfo *job_info = &cpi->mt_info.gm_sync.job_info; |
| 2479 | |
| 2480 | int total_refs = gm_info->num_ref_frames[cur_dir]; |
| 2481 | int8_t cur_frame_to_process = job_info->next_frame_to_process[cur_dir]; |
| 2482 | |
| 2483 | if (cur_frame_to_process < total_refs && !job_info->early_exit[cur_dir]) { |
| 2484 | *frame_idx = gm_info->reference_frames[cur_dir][cur_frame_to_process].frame; |
| 2485 | job_info->next_frame_to_process[cur_dir] += 1; |
| 2486 | return 1; |
| 2487 | } |
| 2488 | return 0; |
| 2489 | } |
| 2490 | |
| 2491 | // Switches the current direction and calls the function get_next_gm_job() if |
| 2492 | // the speed feature 'prune_ref_frame_for_gm_search' is not set. |
| 2493 | static AOM_INLINE void switch_direction(AV1_COMP *cpi, int *frame_idx, |
| 2494 | int *cur_dir) { |
| 2495 | if (cpi->sf.gm_sf.prune_ref_frame_for_gm_search) return; |
| 2496 | // Switch the direction and get next job |
| 2497 | *cur_dir = !(*cur_dir); |
| 2498 | get_next_gm_job(cpi, frame_idx, *(cur_dir)); |
| 2499 | } |
| 2500 | |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2501 | // Hook function for each thread in global motion multi-threading. |
| 2502 | static int gm_mt_worker_hook(void *arg1, void *unused) { |
| 2503 | (void)unused; |
| 2504 | |
| 2505 | EncWorkerData *thread_data = (EncWorkerData *)arg1; |
| 2506 | AV1_COMP *cpi = thread_data->cpi; |
| 2507 | GlobalMotionInfo *gm_info = &cpi->gm_info; |
Mudassir Galaganath | 440749f | 2023-08-01 16:51:27 +0530 | [diff] [blame] | 2508 | AV1GlobalMotionSync *gm_sync = &cpi->mt_info.gm_sync; |
| 2509 | JobInfo *job_info = &gm_sync->job_info; |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2510 | int thread_id = thread_data->thread_id; |
Mudassir Galaganath | cfffbda | 2023-08-03 18:01:35 +0530 | [diff] [blame] | 2511 | GlobalMotionData *gm_thread_data = &thread_data->td->gm_data; |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2512 | #if CONFIG_MULTITHREAD |
Mudassir Galaganath | 440749f | 2023-08-01 16:51:27 +0530 | [diff] [blame] | 2513 | pthread_mutex_t *gm_mt_mutex_ = gm_sync->mutex_; |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2514 | #endif |
| 2515 | |
Mudassir Galaganath | 440749f | 2023-08-01 16:51:27 +0530 | [diff] [blame] | 2516 | MACROBLOCKD *const xd = &thread_data->td->mb.e_mbd; |
| 2517 | struct aom_internal_error_info *const error_info = &thread_data->error_info; |
| 2518 | xd->error_info = error_info; |
| 2519 | |
| 2520 | // The jmp_buf is valid only for the duration of the function that calls |
| 2521 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 2522 | // before it returns. |
| 2523 | if (setjmp(error_info->jmp)) { |
| 2524 | error_info->setjmp = 0; |
| 2525 | #if CONFIG_MULTITHREAD |
| 2526 | pthread_mutex_lock(gm_mt_mutex_); |
| 2527 | gm_sync->gm_mt_exit = true; |
| 2528 | pthread_mutex_unlock(gm_mt_mutex_); |
| 2529 | #endif |
| 2530 | return 0; |
| 2531 | } |
| 2532 | error_info->setjmp = 1; |
| 2533 | |
| 2534 | int cur_dir = job_info->thread_id_to_dir[thread_id]; |
| 2535 | bool gm_mt_exit = false; |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2536 | while (1) { |
Remya | a51aee1 | 2020-05-09 13:46:10 +0530 | [diff] [blame] | 2537 | int ref_buf_idx = -1; |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2538 | |
| 2539 | #if CONFIG_MULTITHREAD |
| 2540 | pthread_mutex_lock(gm_mt_mutex_); |
| 2541 | #endif |
| 2542 | |
Mudassir Galaganath | 440749f | 2023-08-01 16:51:27 +0530 | [diff] [blame] | 2543 | gm_mt_exit = gm_sync->gm_mt_exit; |
Remya | a51aee1 | 2020-05-09 13:46:10 +0530 | [diff] [blame] | 2544 | // Populates ref_buf_idx(the reference frame type) for which global motion |
| 2545 | // estimation will be done. |
Mudassir Galaganath | 440749f | 2023-08-01 16:51:27 +0530 | [diff] [blame] | 2546 | if (!gm_mt_exit && !get_next_gm_job(cpi, &ref_buf_idx, cur_dir)) { |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2547 | // No jobs are available for the current direction. Switch |
| 2548 | // to other direction and get the next job, if available. |
Remya | a51aee1 | 2020-05-09 13:46:10 +0530 | [diff] [blame] | 2549 | switch_direction(cpi, &ref_buf_idx, &cur_dir); |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2550 | } |
| 2551 | |
| 2552 | #if CONFIG_MULTITHREAD |
| 2553 | pthread_mutex_unlock(gm_mt_mutex_); |
| 2554 | #endif |
| 2555 | |
Mudassir Galaganath | 440749f | 2023-08-01 16:51:27 +0530 | [diff] [blame] | 2556 | // When gm_mt_exit is set to true, other workers need not pursue any |
| 2557 | // further jobs. |
Mudassir Galaganath | 17c3321 | 2023-09-04 15:06:17 +0530 | [diff] [blame] | 2558 | if (gm_mt_exit || ref_buf_idx == -1) break; |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2559 | |
Remya | a51aee1 | 2020-05-09 13:46:10 +0530 | [diff] [blame] | 2560 | // Compute global motion for the given ref_buf_idx. |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2561 | av1_compute_gm_for_valid_ref_frames( |
Wan-Teh Chang | 4829841 | 2023-09-21 13:18:04 -0700 | [diff] [blame] | 2562 | cpi, error_info, gm_info->ref_buf, ref_buf_idx, |
| 2563 | gm_thread_data->motion_models, gm_thread_data->segment_map, |
| 2564 | gm_info->segment_map_w, gm_info->segment_map_h); |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2565 | |
| 2566 | #if CONFIG_MULTITHREAD |
| 2567 | pthread_mutex_lock(gm_mt_mutex_); |
| 2568 | #endif |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2569 | // If global motion w.r.t. current ref frame is |
| 2570 | // INVALID/TRANSLATION/IDENTITY, skip the evaluation of global motion w.r.t |
Rachel Barker | 277c8ce | 2023-01-25 16:11:07 +0000 | [diff] [blame] | 2571 | // the remaining ref frames in that direction. |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2572 | if (cpi->sf.gm_sf.prune_ref_frame_for_gm_search && |
Rachel Barker | 8892c51 | 2023-03-13 22:41:24 +0000 | [diff] [blame] | 2573 | cpi->common.global_motion[ref_buf_idx].wmtype <= TRANSLATION) |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2574 | job_info->early_exit[cur_dir] = 1; |
| 2575 | |
| 2576 | #if CONFIG_MULTITHREAD |
| 2577 | pthread_mutex_unlock(gm_mt_mutex_); |
| 2578 | #endif |
| 2579 | } |
Mudassir Galaganath | 440749f | 2023-08-01 16:51:27 +0530 | [diff] [blame] | 2580 | error_info->setjmp = 0; |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2581 | return 1; |
| 2582 | } |
| 2583 | |
| 2584 | // Assigns global motion hook function and thread data to each worker. |
| 2585 | static AOM_INLINE void prepare_gm_workers(AV1_COMP *cpi, AVxWorkerHook hook, |
| 2586 | int num_workers) { |
| 2587 | MultiThreadInfo *mt_info = &cpi->mt_info; |
Mudassir Galaganath | 440749f | 2023-08-01 16:51:27 +0530 | [diff] [blame] | 2588 | mt_info->gm_sync.gm_mt_exit = false; |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2589 | for (int i = num_workers - 1; i >= 0; i--) { |
| 2590 | AVxWorker *worker = &mt_info->workers[i]; |
| 2591 | EncWorkerData *thread_data = &mt_info->tile_thr_data[i]; |
| 2592 | |
| 2593 | worker->hook = hook; |
| 2594 | worker->data1 = thread_data; |
| 2595 | worker->data2 = NULL; |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 2596 | |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 2597 | thread_data->thread_id = i; |
| 2598 | // Set the starting tile for each thread. |
| 2599 | thread_data->start = i; |
| 2600 | |
Mufaddal Chakera | 9157925 | 2020-05-18 17:41:46 +0530 | [diff] [blame] | 2601 | thread_data->cpi = cpi; |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 2602 | if (i == 0) { |
| 2603 | thread_data->td = &cpi->td; |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 2604 | } else { |
| 2605 | thread_data->td = thread_data->original_td; |
| 2606 | } |
Mudassir Galaganath | cfffbda | 2023-08-03 18:01:35 +0530 | [diff] [blame] | 2607 | |
| 2608 | if (thread_data->td != &cpi->td) |
| 2609 | gm_alloc_data(cpi, &thread_data->td->gm_data); |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2610 | } |
| 2611 | } |
| 2612 | |
| 2613 | // Assigns available threads to past/future direction. |
| 2614 | static AOM_INLINE void assign_thread_to_dir(int8_t *thread_id_to_dir, |
| 2615 | int num_workers) { |
| 2616 | int8_t frame_dir_idx = 0; |
| 2617 | |
| 2618 | for (int i = 0; i < num_workers; i++) { |
| 2619 | thread_id_to_dir[i] = frame_dir_idx++; |
| 2620 | if (frame_dir_idx == MAX_DIRECTIONS) frame_dir_idx = 0; |
| 2621 | } |
| 2622 | } |
| 2623 | |
| 2624 | // Computes number of workers for global motion multi-threading. |
| 2625 | static AOM_INLINE int compute_gm_workers(const AV1_COMP *cpi) { |
| 2626 | int total_refs = |
| 2627 | cpi->gm_info.num_ref_frames[0] + cpi->gm_info.num_ref_frames[1]; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 2628 | int num_gm_workers = cpi->sf.gm_sf.prune_ref_frame_for_gm_search |
| 2629 | ? AOMMIN(MAX_DIRECTIONS, total_refs) |
| 2630 | : total_refs; |
Mufaddal Chakera | b8fd9b3 | 2021-01-13 14:07:09 +0530 | [diff] [blame] | 2631 | num_gm_workers = AOMMIN(num_gm_workers, cpi->mt_info.num_workers); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 2632 | return (num_gm_workers); |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2633 | } |
| 2634 | |
| 2635 | // Frees the memory allocated for each worker in global motion multi-threading. |
Mudassir Galaganath | cfffbda | 2023-08-03 18:01:35 +0530 | [diff] [blame] | 2636 | static AOM_INLINE void gm_dealloc_thread_data(AV1_COMP *cpi, int num_workers) { |
| 2637 | MultiThreadInfo *mt_info = &cpi->mt_info; |
| 2638 | for (int j = 0; j < num_workers; j++) { |
| 2639 | EncWorkerData *thread_data = &mt_info->tile_thr_data[j]; |
| 2640 | ThreadData *td = thread_data->td; |
| 2641 | if (td != &cpi->td) gm_dealloc_data(&td->gm_data); |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2642 | } |
| 2643 | } |
| 2644 | |
| 2645 | // Implements multi-threading for global motion. |
| 2646 | void av1_global_motion_estimation_mt(AV1_COMP *cpi) { |
Mudassir Galaganath | cfffbda | 2023-08-03 18:01:35 +0530 | [diff] [blame] | 2647 | JobInfo *job_info = &cpi->mt_info.gm_sync.job_info; |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2648 | |
| 2649 | av1_zero(*job_info); |
| 2650 | |
| 2651 | int num_workers = compute_gm_workers(cpi); |
| 2652 | |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2653 | assign_thread_to_dir(job_info->thread_id_to_dir, num_workers); |
| 2654 | prepare_gm_workers(cpi, gm_mt_worker_hook, num_workers); |
venkat sanampudi | 162bc48 | 2020-11-25 15:04:15 +0530 | [diff] [blame] | 2655 | launch_workers(&cpi->mt_info, num_workers); |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2656 | sync_enc_workers(&cpi->mt_info, &cpi->common, num_workers); |
Mudassir Galaganath | cfffbda | 2023-08-03 18:01:35 +0530 | [diff] [blame] | 2657 | gm_dealloc_thread_data(cpi, num_workers); |
Remya | 1a090d5 | 2020-05-04 11:52:10 +0530 | [diff] [blame] | 2658 | } |
Jerome Jiang | fbbef17 | 2020-10-28 11:32:59 -0700 | [diff] [blame] | 2659 | #endif // !CONFIG_REALTIME_ONLY |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 2660 | |
Cheng Chen | 5e9d457 | 2023-02-02 16:40:52 -0800 | [diff] [blame] | 2661 | static AOM_INLINE int get_next_job_allintra( |
| 2662 | AV1EncRowMultiThreadSync *const row_mt_sync, const int mi_row_end, |
| 2663 | int *current_mi_row, int mib_size) { |
| 2664 | if (row_mt_sync->next_mi_row < mi_row_end) { |
| 2665 | *current_mi_row = row_mt_sync->next_mi_row; |
| 2666 | row_mt_sync->num_threads_working++; |
| 2667 | row_mt_sync->next_mi_row += mib_size; |
| 2668 | return 1; |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2669 | } |
Cheng Chen | 5e9d457 | 2023-02-02 16:40:52 -0800 | [diff] [blame] | 2670 | return 0; |
Cheng Chen | e142124 | 2023-01-31 12:42:50 -0800 | [diff] [blame] | 2671 | } |
| 2672 | |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2673 | static AOM_INLINE void prepare_wiener_var_workers(AV1_COMP *const cpi, |
| 2674 | AVxWorkerHook hook, |
| 2675 | const int num_workers) { |
| 2676 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
| 2677 | for (int i = num_workers - 1; i >= 0; i--) { |
| 2678 | AVxWorker *const worker = &mt_info->workers[i]; |
| 2679 | EncWorkerData *const thread_data = &mt_info->tile_thr_data[i]; |
| 2680 | |
| 2681 | worker->hook = hook; |
| 2682 | worker->data1 = thread_data; |
| 2683 | worker->data2 = NULL; |
| 2684 | |
| 2685 | thread_data->thread_id = i; |
Cheng Chen | e142124 | 2023-01-31 12:42:50 -0800 | [diff] [blame] | 2686 | // Set the starting tile for each thread, in this case the preprocessing |
| 2687 | // stage does not need tiles. So we set it to 0. |
| 2688 | thread_data->start = 0; |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2689 | |
| 2690 | thread_data->cpi = cpi; |
Cheng Chen | e142124 | 2023-01-31 12:42:50 -0800 | [diff] [blame] | 2691 | if (i == 0) { |
| 2692 | thread_data->td = &cpi->td; |
| 2693 | } else { |
| 2694 | thread_data->td = thread_data->original_td; |
| 2695 | } |
| 2696 | |
| 2697 | if (thread_data->td != &cpi->td) { |
| 2698 | thread_data->td->mb = cpi->td.mb; |
| 2699 | } |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2700 | } |
| 2701 | } |
| 2702 | |
| 2703 | static int cal_mb_wiener_var_hook(void *arg1, void *unused) { |
| 2704 | (void)unused; |
| 2705 | EncWorkerData *const thread_data = (EncWorkerData *)arg1; |
| 2706 | AV1_COMP *const cpi = thread_data->cpi; |
Cheng Chen | e142124 | 2023-01-31 12:42:50 -0800 | [diff] [blame] | 2707 | MACROBLOCK *x = &thread_data->td->mb; |
| 2708 | MACROBLOCKD *xd = &x->e_mbd; |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2709 | const BLOCK_SIZE bsize = cpi->weber_bsize; |
| 2710 | const int mb_step = mi_size_wide[bsize]; |
Cheng Chen | 5e9d457 | 2023-02-02 16:40:52 -0800 | [diff] [blame] | 2711 | AV1EncRowMultiThreadSync *const intra_row_mt_sync = |
| 2712 | &cpi->ppi->intra_row_mt_sync; |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2713 | AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt; |
| 2714 | (void)enc_row_mt; |
| 2715 | #if CONFIG_MULTITHREAD |
| 2716 | pthread_mutex_t *enc_row_mt_mutex_ = enc_row_mt->mutex_; |
| 2717 | #endif |
| 2718 | DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]); |
| 2719 | DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]); |
| 2720 | DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]); |
| 2721 | DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]); |
| 2722 | double sum_rec_distortion = 0; |
| 2723 | double sum_est_rate = 0; |
| 2724 | int has_jobs = 1; |
| 2725 | while (has_jobs) { |
| 2726 | int current_mi_row = -1; |
| 2727 | #if CONFIG_MULTITHREAD |
| 2728 | pthread_mutex_lock(enc_row_mt_mutex_); |
| 2729 | #endif |
Cheng Chen | 5e9d457 | 2023-02-02 16:40:52 -0800 | [diff] [blame] | 2730 | has_jobs = |
| 2731 | get_next_job_allintra(intra_row_mt_sync, cpi->common.mi_params.mi_rows, |
| 2732 | ¤t_mi_row, mb_step); |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2733 | #if CONFIG_MULTITHREAD |
| 2734 | pthread_mutex_unlock(enc_row_mt_mutex_); |
| 2735 | #endif |
| 2736 | if (!has_jobs) break; |
| 2737 | // TODO(chengchen): properly accumulate the distortion and rate. |
Cheng Chen | e142124 | 2023-01-31 12:42:50 -0800 | [diff] [blame] | 2738 | av1_calc_mb_wiener_var_row(cpi, x, xd, current_mi_row, src_diff, coeff, |
| 2739 | qcoeff, dqcoeff, &sum_rec_distortion, |
| 2740 | &sum_est_rate); |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2741 | #if CONFIG_MULTITHREAD |
| 2742 | pthread_mutex_lock(enc_row_mt_mutex_); |
| 2743 | #endif |
Cheng Chen | 5e9d457 | 2023-02-02 16:40:52 -0800 | [diff] [blame] | 2744 | intra_row_mt_sync->num_threads_working--; |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2745 | #if CONFIG_MULTITHREAD |
| 2746 | pthread_mutex_unlock(enc_row_mt_mutex_); |
| 2747 | #endif |
| 2748 | } |
| 2749 | return 1; |
| 2750 | } |
| 2751 | |
| 2752 | // This function is the multi-threading version of computing the wiener |
| 2753 | // variance. |
| 2754 | // Note that the wiener variance is used for allintra mode (1 pass) and its |
| 2755 | // computation is before the frame encoding, so we don't need to consider |
| 2756 | // the number of tiles, instead we allocate all available threads to |
| 2757 | // the computation. |
Wan-Teh Chang | 75fe580 | 2022-12-02 15:08:58 -0800 | [diff] [blame] | 2758 | void av1_calc_mb_wiener_var_mt(AV1_COMP *cpi, int num_workers, |
| 2759 | double *sum_rec_distortion, |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2760 | double *sum_est_rate) { |
| 2761 | (void)sum_rec_distortion; |
| 2762 | (void)sum_est_rate; |
| 2763 | AV1_COMMON *const cm = &cpi->common; |
| 2764 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
Cheng Chen | 5e9d457 | 2023-02-02 16:40:52 -0800 | [diff] [blame] | 2765 | AV1EncRowMultiThreadSync *const intra_row_mt_sync = |
| 2766 | &cpi->ppi->intra_row_mt_sync; |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2767 | |
| 2768 | // TODO(chengchen): the memory usage could be improved. |
| 2769 | const int mi_rows = cm->mi_params.mi_rows; |
Cheng Chen | 5e9d457 | 2023-02-02 16:40:52 -0800 | [diff] [blame] | 2770 | row_mt_sync_mem_alloc(intra_row_mt_sync, cm, mi_rows); |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2771 | |
Cheng Chen | 5e9d457 | 2023-02-02 16:40:52 -0800 | [diff] [blame] | 2772 | intra_row_mt_sync->intrabc_extra_top_right_sb_delay = 0; |
| 2773 | intra_row_mt_sync->num_threads_working = num_workers; |
| 2774 | intra_row_mt_sync->next_mi_row = 0; |
| 2775 | memset(intra_row_mt_sync->num_finished_cols, -1, |
Wan-Teh Chang | 2ff9db5 | 2023-03-02 11:39:19 -0800 | [diff] [blame] | 2776 | sizeof(*intra_row_mt_sync->num_finished_cols) * mi_rows); |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2777 | |
| 2778 | prepare_wiener_var_workers(cpi, cal_mb_wiener_var_hook, num_workers); |
Wan-Teh Chang | 75fe580 | 2022-12-02 15:08:58 -0800 | [diff] [blame] | 2779 | launch_workers(mt_info, num_workers); |
| 2780 | sync_enc_workers(mt_info, cm, num_workers); |
Cheng Chen | e142124 | 2023-01-31 12:42:50 -0800 | [diff] [blame] | 2781 | |
Cheng Chen | 5e9d457 | 2023-02-02 16:40:52 -0800 | [diff] [blame] | 2782 | row_mt_sync_mem_dealloc(intra_row_mt_sync); |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 2783 | } |
| 2784 | |
Vishesh | 16d6f73 | 2021-05-10 17:57:36 +0530 | [diff] [blame] | 2785 | // Compare and order tiles based on absolute sum of tx coeffs. |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2786 | static int compare_tile_order(const void *a, const void *b) { |
| 2787 | const PackBSTileOrder *const tile_a = (const PackBSTileOrder *)a; |
| 2788 | const PackBSTileOrder *const tile_b = (const PackBSTileOrder *)b; |
| 2789 | |
Vishesh | 16d6f73 | 2021-05-10 17:57:36 +0530 | [diff] [blame] | 2790 | if (tile_a->abs_sum_level > tile_b->abs_sum_level) |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2791 | return -1; |
Vishesh | 16d6f73 | 2021-05-10 17:57:36 +0530 | [diff] [blame] | 2792 | else if (tile_a->abs_sum_level == tile_b->abs_sum_level) |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2793 | return (tile_a->tile_idx > tile_b->tile_idx ? 1 : -1); |
| 2794 | else |
| 2795 | return 1; |
| 2796 | } |
| 2797 | |
| 2798 | // Get next tile index to be processed for pack bitstream |
| 2799 | static AOM_INLINE int get_next_pack_bs_tile_idx( |
| 2800 | AV1EncPackBSSync *const pack_bs_sync, const int num_tiles) { |
| 2801 | assert(pack_bs_sync->next_job_idx <= num_tiles); |
| 2802 | if (pack_bs_sync->next_job_idx == num_tiles) return -1; |
| 2803 | |
| 2804 | return pack_bs_sync->pack_bs_tile_order[pack_bs_sync->next_job_idx++] |
| 2805 | .tile_idx; |
| 2806 | } |
| 2807 | |
| 2808 | // Calculates bitstream chunk size based on total buffer size and tile or tile |
| 2809 | // group size. |
| 2810 | static AOM_INLINE size_t get_bs_chunk_size(int tg_or_tile_size, |
| 2811 | const int frame_or_tg_size, |
| 2812 | size_t *remain_buf_size, |
| 2813 | size_t max_buf_size, |
| 2814 | int is_last_chunk) { |
| 2815 | size_t this_chunk_size; |
| 2816 | assert(*remain_buf_size > 0); |
| 2817 | if (is_last_chunk) { |
| 2818 | this_chunk_size = *remain_buf_size; |
| 2819 | *remain_buf_size = 0; |
| 2820 | } else { |
Cherma Rajan A | 0e17c87 | 2021-04-30 15:39:19 +0530 | [diff] [blame] | 2821 | const uint64_t size_scale = (uint64_t)max_buf_size * tg_or_tile_size; |
| 2822 | this_chunk_size = (size_t)(size_scale / frame_or_tg_size); |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2823 | *remain_buf_size -= this_chunk_size; |
| 2824 | assert(*remain_buf_size > 0); |
| 2825 | } |
| 2826 | assert(this_chunk_size > 0); |
| 2827 | return this_chunk_size; |
| 2828 | } |
| 2829 | |
| 2830 | // Initializes params required for pack bitstream tile. |
| 2831 | static void init_tile_pack_bs_params(AV1_COMP *const cpi, uint8_t *const dst, |
| 2832 | struct aom_write_bit_buffer *saved_wb, |
| 2833 | PackBSParams *const pack_bs_params_arr, |
| 2834 | uint8_t obu_extn_header) { |
Vishesh | 8bd59d9 | 2021-04-27 11:34:17 +0530 | [diff] [blame] | 2835 | MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2836 | AV1_COMMON *const cm = &cpi->common; |
| 2837 | const CommonTileParams *const tiles = &cm->tiles; |
| 2838 | const int num_tiles = tiles->cols * tiles->rows; |
| 2839 | // Fixed size tile groups for the moment |
| 2840 | const int num_tg_hdrs = cpi->num_tg; |
| 2841 | // Tile group size in terms of number of tiles. |
| 2842 | const int tg_size_in_tiles = (num_tiles + num_tg_hdrs - 1) / num_tg_hdrs; |
| 2843 | uint8_t *tile_dst = dst; |
| 2844 | uint8_t *tile_data_curr = dst; |
| 2845 | // Max tile group count can not be more than MAX_TILES. |
| 2846 | int tg_size_mi[MAX_TILES] = { 0 }; // Size of tile group in mi units |
| 2847 | int tile_idx; |
| 2848 | int tg_idx = 0; |
| 2849 | int tile_count_in_tg = 0; |
| 2850 | int new_tg = 1; |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2851 | |
| 2852 | // Populate pack bitstream params of all tiles. |
| 2853 | for (tile_idx = 0; tile_idx < num_tiles; tile_idx++) { |
| 2854 | const TileInfo *const tile_info = &cpi->tile_data[tile_idx].tile_info; |
| 2855 | PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx]; |
| 2856 | // Calculate tile size in mi units. |
| 2857 | const int tile_size_mi = (tile_info->mi_col_end - tile_info->mi_col_start) * |
| 2858 | (tile_info->mi_row_end - tile_info->mi_row_start); |
| 2859 | int is_last_tile_in_tg = 0; |
| 2860 | tile_count_in_tg++; |
| 2861 | if (tile_count_in_tg == tg_size_in_tiles || tile_idx == (num_tiles - 1)) |
| 2862 | is_last_tile_in_tg = 1; |
| 2863 | |
| 2864 | // Populate pack bitstream params of this tile. |
| 2865 | pack_bs_params->curr_tg_hdr_size = 0; |
| 2866 | pack_bs_params->obu_extn_header = obu_extn_header; |
| 2867 | pack_bs_params->saved_wb = saved_wb; |
| 2868 | pack_bs_params->obu_header_size = 0; |
| 2869 | pack_bs_params->is_last_tile_in_tg = is_last_tile_in_tg; |
| 2870 | pack_bs_params->new_tg = new_tg; |
| 2871 | pack_bs_params->tile_col = tile_info->tile_col; |
| 2872 | pack_bs_params->tile_row = tile_info->tile_row; |
| 2873 | pack_bs_params->tile_size_mi = tile_size_mi; |
| 2874 | tg_size_mi[tg_idx] += tile_size_mi; |
| 2875 | |
| 2876 | if (new_tg) new_tg = 0; |
| 2877 | if (is_last_tile_in_tg) { |
| 2878 | tile_count_in_tg = 0; |
| 2879 | new_tg = 1; |
| 2880 | tg_idx++; |
| 2881 | } |
| 2882 | } |
| 2883 | |
| 2884 | assert(cpi->available_bs_size > 0); |
| 2885 | size_t tg_buf_size[MAX_TILES] = { 0 }; |
| 2886 | size_t max_buf_size = cpi->available_bs_size; |
| 2887 | size_t remain_buf_size = max_buf_size; |
| 2888 | const int frame_size_mi = cm->mi_params.mi_rows * cm->mi_params.mi_cols; |
| 2889 | |
| 2890 | tile_idx = 0; |
| 2891 | // Prepare obu, tile group and frame header of each tile group. |
| 2892 | for (tg_idx = 0; tg_idx < cpi->num_tg; tg_idx++) { |
| 2893 | PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx]; |
| 2894 | int is_last_tg = tg_idx == cpi->num_tg - 1; |
| 2895 | // Prorate bitstream buffer size based on tile group size and available |
| 2896 | // buffer size. This buffer will be used to store headers and tile data. |
| 2897 | tg_buf_size[tg_idx] = |
| 2898 | get_bs_chunk_size(tg_size_mi[tg_idx], frame_size_mi, &remain_buf_size, |
| 2899 | max_buf_size, is_last_tg); |
| 2900 | |
| 2901 | pack_bs_params->dst = tile_dst; |
| 2902 | pack_bs_params->tile_data_curr = tile_dst; |
| 2903 | |
| 2904 | // Write obu, tile group and frame header at first tile in the tile |
| 2905 | // group. |
Vishesh | 8bd59d9 | 2021-04-27 11:34:17 +0530 | [diff] [blame] | 2906 | av1_write_obu_tg_tile_headers(cpi, xd, pack_bs_params, tile_idx); |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2907 | tile_dst += tg_buf_size[tg_idx]; |
| 2908 | |
| 2909 | // Exclude headers from tile group buffer size. |
| 2910 | tg_buf_size[tg_idx] -= pack_bs_params->curr_tg_hdr_size; |
| 2911 | tile_idx += tg_size_in_tiles; |
| 2912 | } |
| 2913 | |
| 2914 | tg_idx = 0; |
| 2915 | // Calculate bitstream buffer size of each tile in the tile group. |
| 2916 | for (tile_idx = 0; tile_idx < num_tiles; tile_idx++) { |
| 2917 | PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx]; |
| 2918 | |
| 2919 | if (pack_bs_params->new_tg) { |
| 2920 | max_buf_size = tg_buf_size[tg_idx]; |
| 2921 | remain_buf_size = max_buf_size; |
| 2922 | } |
| 2923 | |
| 2924 | // Prorate bitstream buffer size of this tile based on tile size and |
| 2925 | // available buffer size. For this proration, header size is not accounted. |
| 2926 | const size_t tile_buf_size = get_bs_chunk_size( |
| 2927 | pack_bs_params->tile_size_mi, tg_size_mi[tg_idx], &remain_buf_size, |
| 2928 | max_buf_size, pack_bs_params->is_last_tile_in_tg); |
| 2929 | pack_bs_params->tile_buf_size = tile_buf_size; |
| 2930 | |
| 2931 | // Update base address of bitstream buffer for tile and tile group. |
| 2932 | if (pack_bs_params->new_tg) { |
| 2933 | tile_dst = pack_bs_params->dst; |
| 2934 | tile_data_curr = pack_bs_params->tile_data_curr; |
| 2935 | // Account header size in first tile of a tile group. |
| 2936 | pack_bs_params->tile_buf_size += pack_bs_params->curr_tg_hdr_size; |
| 2937 | } else { |
| 2938 | pack_bs_params->dst = tile_dst; |
| 2939 | pack_bs_params->tile_data_curr = tile_data_curr; |
| 2940 | } |
| 2941 | |
| 2942 | if (pack_bs_params->is_last_tile_in_tg) tg_idx++; |
| 2943 | tile_dst += pack_bs_params->tile_buf_size; |
| 2944 | } |
| 2945 | } |
| 2946 | |
| 2947 | // Worker hook function of pack bitsteam multithreading. |
| 2948 | static int pack_bs_worker_hook(void *arg1, void *arg2) { |
| 2949 | EncWorkerData *const thread_data = (EncWorkerData *)arg1; |
| 2950 | PackBSParams *const pack_bs_params = (PackBSParams *)arg2; |
| 2951 | AV1_COMP *const cpi = thread_data->cpi; |
| 2952 | AV1_COMMON *const cm = &cpi->common; |
| 2953 | AV1EncPackBSSync *const pack_bs_sync = &cpi->mt_info.pack_bs_sync; |
| 2954 | const CommonTileParams *const tiles = &cm->tiles; |
| 2955 | const int num_tiles = tiles->cols * tiles->rows; |
| 2956 | |
Mudassir Galaganath | 0402d9a | 2023-10-09 12:30:07 +0530 | [diff] [blame] | 2957 | #if CONFIG_MULTITHREAD |
| 2958 | pthread_mutex_t *const pack_bs_mutex = pack_bs_sync->mutex_; |
| 2959 | #endif |
| 2960 | MACROBLOCKD *const xd = &thread_data->td->mb.e_mbd; |
| 2961 | struct aom_internal_error_info *const error_info = &thread_data->error_info; |
| 2962 | xd->error_info = error_info; |
| 2963 | |
| 2964 | // The jmp_buf is valid only for the duration of the function that calls |
| 2965 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 2966 | // before it returns. |
| 2967 | if (setjmp(error_info->jmp)) { |
| 2968 | error_info->setjmp = 0; |
| 2969 | #if CONFIG_MULTITHREAD |
| 2970 | pthread_mutex_lock(pack_bs_mutex); |
| 2971 | pack_bs_sync->pack_bs_mt_exit = true; |
| 2972 | pthread_mutex_unlock(pack_bs_mutex); |
| 2973 | #endif |
| 2974 | return 0; |
| 2975 | } |
| 2976 | error_info->setjmp = 1; |
| 2977 | |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2978 | while (1) { |
| 2979 | #if CONFIG_MULTITHREAD |
Mudassir Galaganath | 0402d9a | 2023-10-09 12:30:07 +0530 | [diff] [blame] | 2980 | pthread_mutex_lock(pack_bs_mutex); |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2981 | #endif |
Mudassir Galaganath | 0402d9a | 2023-10-09 12:30:07 +0530 | [diff] [blame] | 2982 | const int tile_idx = |
| 2983 | pack_bs_sync->pack_bs_mt_exit |
| 2984 | ? -1 |
| 2985 | : get_next_pack_bs_tile_idx(pack_bs_sync, num_tiles); |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2986 | #if CONFIG_MULTITHREAD |
Mudassir Galaganath | 0402d9a | 2023-10-09 12:30:07 +0530 | [diff] [blame] | 2987 | pthread_mutex_unlock(pack_bs_mutex); |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2988 | #endif |
Mudassir Galaganath | 0402d9a | 2023-10-09 12:30:07 +0530 | [diff] [blame] | 2989 | // When pack_bs_mt_exit is set to true, other workers need not pursue any |
| 2990 | // further jobs. |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2991 | if (tile_idx == -1) break; |
| 2992 | TileDataEnc *this_tile = &cpi->tile_data[tile_idx]; |
| 2993 | thread_data->td->mb.e_mbd.tile_ctx = &this_tile->tctx; |
| 2994 | |
| 2995 | av1_pack_tile_info(cpi, thread_data->td, &pack_bs_params[tile_idx]); |
| 2996 | } |
| 2997 | |
Mudassir Galaganath | 0402d9a | 2023-10-09 12:30:07 +0530 | [diff] [blame] | 2998 | error_info->setjmp = 0; |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 2999 | return 1; |
| 3000 | } |
| 3001 | |
| 3002 | // Prepares thread data and workers of pack bitsteam multithreading. |
| 3003 | static void prepare_pack_bs_workers(AV1_COMP *const cpi, |
| 3004 | PackBSParams *const pack_bs_params, |
| 3005 | AVxWorkerHook hook, const int num_workers) { |
| 3006 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
| 3007 | for (int i = num_workers - 1; i >= 0; i--) { |
| 3008 | AVxWorker *worker = &mt_info->workers[i]; |
| 3009 | EncWorkerData *const thread_data = &mt_info->tile_thr_data[i]; |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 3010 | if (i == 0) { |
| 3011 | thread_data->td = &cpi->td; |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 3012 | } else { |
| 3013 | thread_data->td = thread_data->original_td; |
| 3014 | } |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 3015 | |
| 3016 | if (thread_data->td != &cpi->td) thread_data->td->mb = cpi->td.mb; |
| 3017 | |
| 3018 | thread_data->cpi = cpi; |
| 3019 | thread_data->start = i; |
| 3020 | thread_data->thread_id = i; |
| 3021 | av1_reset_pack_bs_thread_data(thread_data->td); |
| 3022 | |
| 3023 | worker->hook = hook; |
| 3024 | worker->data1 = thread_data; |
| 3025 | worker->data2 = pack_bs_params; |
| 3026 | } |
| 3027 | |
| 3028 | AV1_COMMON *const cm = &cpi->common; |
| 3029 | AV1EncPackBSSync *const pack_bs_sync = &mt_info->pack_bs_sync; |
| 3030 | const uint16_t num_tiles = cm->tiles.rows * cm->tiles.cols; |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 3031 | pack_bs_sync->next_job_idx = 0; |
| 3032 | |
| 3033 | PackBSTileOrder *const pack_bs_tile_order = pack_bs_sync->pack_bs_tile_order; |
| 3034 | // Reset tile order data of pack bitstream |
| 3035 | av1_zero_array(pack_bs_tile_order, num_tiles); |
| 3036 | |
| 3037 | // Populate pack bitstream tile order structure |
| 3038 | for (uint16_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) { |
Vishesh | 16d6f73 | 2021-05-10 17:57:36 +0530 | [diff] [blame] | 3039 | pack_bs_tile_order[tile_idx].abs_sum_level = |
| 3040 | cpi->tile_data[tile_idx].abs_sum_level; |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 3041 | pack_bs_tile_order[tile_idx].tile_idx = tile_idx; |
| 3042 | } |
| 3043 | |
| 3044 | // Sort tiles in descending order based on tile area. |
Vishesh | 3ef57df | 2021-04-29 16:24:02 +0530 | [diff] [blame] | 3045 | qsort(pack_bs_tile_order, num_tiles, sizeof(*pack_bs_tile_order), |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 3046 | compare_tile_order); |
| 3047 | } |
| 3048 | |
| 3049 | // Accumulates data after pack bitsteam processing. |
| 3050 | static void accumulate_pack_bs_data( |
| 3051 | AV1_COMP *const cpi, const PackBSParams *const pack_bs_params_arr, |
| 3052 | uint8_t *const dst, uint32_t *total_size, const FrameHeaderInfo *fh_info, |
| 3053 | int *const largest_tile_id, unsigned int *max_tile_size, |
| 3054 | uint32_t *const obu_header_size, uint8_t **tile_data_start, |
| 3055 | const int num_workers) { |
| 3056 | const AV1_COMMON *const cm = &cpi->common; |
| 3057 | const CommonTileParams *const tiles = &cm->tiles; |
| 3058 | const int tile_count = tiles->cols * tiles->rows; |
| 3059 | // Fixed size tile groups for the moment |
| 3060 | size_t curr_tg_data_size = 0; |
| 3061 | int is_first_tg = 1; |
| 3062 | uint8_t *curr_tg_start = dst; |
| 3063 | size_t src_offset = 0; |
| 3064 | size_t dst_offset = 0; |
| 3065 | |
| 3066 | for (int tile_idx = 0; tile_idx < tile_count; tile_idx++) { |
| 3067 | // PackBSParams stores all parameters required to pack tile and header |
| 3068 | // info. |
| 3069 | const PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx]; |
| 3070 | uint32_t tile_size = 0; |
| 3071 | |
| 3072 | if (pack_bs_params->new_tg) { |
| 3073 | curr_tg_start = dst + *total_size; |
| 3074 | curr_tg_data_size = pack_bs_params->curr_tg_hdr_size; |
| 3075 | *tile_data_start += pack_bs_params->curr_tg_hdr_size; |
| 3076 | *obu_header_size = pack_bs_params->obu_header_size; |
| 3077 | } |
| 3078 | curr_tg_data_size += |
| 3079 | pack_bs_params->buf.size + (pack_bs_params->is_last_tile_in_tg ? 0 : 4); |
| 3080 | |
| 3081 | if (pack_bs_params->buf.size > *max_tile_size) { |
| 3082 | *largest_tile_id = tile_idx; |
| 3083 | *max_tile_size = (unsigned int)pack_bs_params->buf.size; |
| 3084 | } |
| 3085 | tile_size += |
| 3086 | (uint32_t)pack_bs_params->buf.size + *pack_bs_params->total_size; |
| 3087 | |
| 3088 | // Pack all the chunks of tile bitstreams together |
| 3089 | if (tile_idx != 0) memmove(dst + dst_offset, dst + src_offset, tile_size); |
| 3090 | |
| 3091 | if (pack_bs_params->is_last_tile_in_tg) |
| 3092 | av1_write_last_tile_info( |
| 3093 | cpi, fh_info, pack_bs_params->saved_wb, &curr_tg_data_size, |
| 3094 | curr_tg_start, &tile_size, tile_data_start, largest_tile_id, |
| 3095 | &is_first_tg, *obu_header_size, pack_bs_params->obu_extn_header); |
| 3096 | src_offset += pack_bs_params->tile_buf_size; |
| 3097 | dst_offset += tile_size; |
| 3098 | *total_size += tile_size; |
| 3099 | } |
| 3100 | |
| 3101 | // Accumulate thread data |
| 3102 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
| 3103 | for (int idx = num_workers - 1; idx >= 0; idx--) { |
| 3104 | ThreadData const *td = mt_info->tile_thr_data[idx].td; |
| 3105 | av1_accumulate_pack_bs_thread_data(cpi, td); |
| 3106 | } |
| 3107 | } |
| 3108 | |
| 3109 | void av1_write_tile_obu_mt( |
| 3110 | AV1_COMP *const cpi, uint8_t *const dst, uint32_t *total_size, |
| 3111 | struct aom_write_bit_buffer *saved_wb, uint8_t obu_extn_header, |
| 3112 | const FrameHeaderInfo *fh_info, int *const largest_tile_id, |
| 3113 | unsigned int *max_tile_size, uint32_t *const obu_header_size, |
Cherma Rajan A | 248e8d3 | 2021-06-15 09:06:38 +0530 | [diff] [blame] | 3114 | uint8_t **tile_data_start, const int num_workers) { |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 3115 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 3116 | |
| 3117 | PackBSParams pack_bs_params[MAX_TILES]; |
| 3118 | uint32_t tile_size[MAX_TILES] = { 0 }; |
| 3119 | |
| 3120 | for (int tile_idx = 0; tile_idx < MAX_TILES; tile_idx++) |
| 3121 | pack_bs_params[tile_idx].total_size = &tile_size[tile_idx]; |
| 3122 | |
| 3123 | init_tile_pack_bs_params(cpi, dst, saved_wb, pack_bs_params, obu_extn_header); |
| 3124 | prepare_pack_bs_workers(cpi, pack_bs_params, pack_bs_worker_hook, |
| 3125 | num_workers); |
| 3126 | launch_workers(mt_info, num_workers); |
| 3127 | sync_enc_workers(mt_info, &cpi->common, num_workers); |
| 3128 | accumulate_pack_bs_data(cpi, pack_bs_params, dst, total_size, fh_info, |
| 3129 | largest_tile_id, max_tile_size, obu_header_size, |
| 3130 | tile_data_start, num_workers); |
| 3131 | } |
| 3132 | |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3133 | // Deallocate memory for CDEF search multi-thread synchronization. |
| 3134 | void av1_cdef_mt_dealloc(AV1CdefSync *cdef_sync) { |
| 3135 | (void)cdef_sync; |
| 3136 | assert(cdef_sync != NULL); |
| 3137 | #if CONFIG_MULTITHREAD |
| 3138 | if (cdef_sync->mutex_ != NULL) { |
| 3139 | pthread_mutex_destroy(cdef_sync->mutex_); |
| 3140 | aom_free(cdef_sync->mutex_); |
| 3141 | } |
| 3142 | #endif // CONFIG_MULTITHREAD |
| 3143 | } |
| 3144 | |
| 3145 | // Updates the row and column indices of the next job to be processed. |
| 3146 | // Also updates end_of_frame flag when the processing of all blocks is complete. |
| 3147 | static void update_next_job_info(AV1CdefSync *cdef_sync, int nvfb, int nhfb) { |
| 3148 | cdef_sync->fbc++; |
| 3149 | if (cdef_sync->fbc == nhfb) { |
| 3150 | cdef_sync->fbr++; |
| 3151 | if (cdef_sync->fbr == nvfb) { |
| 3152 | cdef_sync->end_of_frame = 1; |
| 3153 | } else { |
| 3154 | cdef_sync->fbc = 0; |
| 3155 | } |
| 3156 | } |
| 3157 | } |
| 3158 | |
| 3159 | // Initializes cdef_sync parameters. |
| 3160 | static AOM_INLINE void cdef_reset_job_info(AV1CdefSync *cdef_sync) { |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 3161 | #if CONFIG_MULTITHREAD |
| 3162 | if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL); |
| 3163 | #endif // CONFIG_MULTITHREAD |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3164 | cdef_sync->end_of_frame = 0; |
| 3165 | cdef_sync->fbr = 0; |
| 3166 | cdef_sync->fbc = 0; |
| 3167 | } |
| 3168 | |
| 3169 | // Checks if a job is available. If job is available, |
| 3170 | // populates next job information and returns 1, else returns 0. |
| 3171 | static AOM_INLINE int cdef_get_next_job(AV1CdefSync *cdef_sync, |
| 3172 | CdefSearchCtx *cdef_search_ctx, |
Mudassir Galaganath | 753a732 | 2023-09-12 16:16:31 +0530 | [diff] [blame] | 3173 | volatile int *cur_fbr, |
| 3174 | volatile int *cur_fbc, |
| 3175 | volatile int *sb_count) { |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3176 | #if CONFIG_MULTITHREAD |
| 3177 | pthread_mutex_lock(cdef_sync->mutex_); |
| 3178 | #endif // CONFIG_MULTITHREAD |
| 3179 | int do_next_block = 0; |
| 3180 | const int nvfb = cdef_search_ctx->nvfb; |
| 3181 | const int nhfb = cdef_search_ctx->nhfb; |
| 3182 | |
| 3183 | // If a block is skip, do not process the block and |
| 3184 | // check the skip condition for the next block. |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3185 | while (!cdef_sync->cdef_mt_exit && !cdef_sync->end_of_frame && |
| 3186 | cdef_sb_skip(cdef_search_ctx->mi_params, cdef_sync->fbr, |
| 3187 | cdef_sync->fbc)) { |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3188 | update_next_job_info(cdef_sync, nvfb, nhfb); |
| 3189 | } |
| 3190 | |
| 3191 | // Populates information needed for current job and update the row, |
| 3192 | // column indices of the next block to be processed. |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3193 | if (!cdef_sync->cdef_mt_exit && cdef_sync->end_of_frame == 0) { |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3194 | do_next_block = 1; |
| 3195 | *cur_fbr = cdef_sync->fbr; |
| 3196 | *cur_fbc = cdef_sync->fbc; |
| 3197 | *sb_count = cdef_search_ctx->sb_count; |
| 3198 | cdef_search_ctx->sb_count++; |
| 3199 | update_next_job_info(cdef_sync, nvfb, nhfb); |
| 3200 | } |
| 3201 | #if CONFIG_MULTITHREAD |
| 3202 | pthread_mutex_unlock(cdef_sync->mutex_); |
| 3203 | #endif // CONFIG_MULTITHREAD |
| 3204 | return do_next_block; |
| 3205 | } |
| 3206 | |
| 3207 | // Hook function for each thread in CDEF search multi-threading. |
| 3208 | static int cdef_filter_block_worker_hook(void *arg1, void *arg2) { |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3209 | EncWorkerData *thread_data = (EncWorkerData *)arg1; |
| 3210 | AV1CdefSync *const cdef_sync = (AV1CdefSync *)arg2; |
| 3211 | |
| 3212 | #if CONFIG_MULTITHREAD |
| 3213 | pthread_mutex_t *cdef_mutex_ = cdef_sync->mutex_; |
| 3214 | #endif |
| 3215 | struct aom_internal_error_info *const error_info = &thread_data->error_info; |
| 3216 | CdefSearchCtx *cdef_search_ctx = thread_data->cpi->cdef_search_ctx; |
| 3217 | |
| 3218 | // The jmp_buf is valid only for the duration of the function that calls |
| 3219 | // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 |
| 3220 | // before it returns. |
| 3221 | if (setjmp(error_info->jmp)) { |
| 3222 | error_info->setjmp = 0; |
| 3223 | #if CONFIG_MULTITHREAD |
| 3224 | pthread_mutex_lock(cdef_mutex_); |
| 3225 | cdef_sync->cdef_mt_exit = true; |
| 3226 | pthread_mutex_unlock(cdef_mutex_); |
| 3227 | #endif |
| 3228 | return 0; |
| 3229 | } |
| 3230 | error_info->setjmp = 1; |
| 3231 | |
Mudassir Galaganath | 753a732 | 2023-09-12 16:16:31 +0530 | [diff] [blame] | 3232 | volatile int cur_fbr, cur_fbc, sb_count; |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3233 | while (cdef_get_next_job(cdef_sync, cdef_search_ctx, &cur_fbr, &cur_fbc, |
| 3234 | &sb_count)) { |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3235 | av1_cdef_mse_calc_block(cdef_search_ctx, error_info, cur_fbr, cur_fbc, |
| 3236 | sb_count); |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3237 | } |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3238 | error_info->setjmp = 0; |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3239 | return 1; |
| 3240 | } |
| 3241 | |
| 3242 | // Assigns CDEF search hook function and thread data to each worker. |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3243 | static void prepare_cdef_workers(AV1_COMP *cpi, AVxWorkerHook hook, |
| 3244 | int num_workers) { |
| 3245 | MultiThreadInfo *mt_info = &cpi->mt_info; |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3246 | for (int i = num_workers - 1; i >= 0; i--) { |
| 3247 | AVxWorker *worker = &mt_info->workers[i]; |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3248 | EncWorkerData *thread_data = &mt_info->tile_thr_data[i]; |
| 3249 | |
| 3250 | thread_data->cpi = cpi; |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3251 | worker->hook = hook; |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3252 | worker->data1 = thread_data; |
| 3253 | worker->data2 = &mt_info->cdef_sync; |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3254 | } |
| 3255 | } |
| 3256 | |
| 3257 | // Implements multi-threading for CDEF search. |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3258 | void av1_cdef_mse_calc_frame_mt(AV1_COMP *cpi) { |
| 3259 | MultiThreadInfo *mt_info = &cpi->mt_info; |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3260 | AV1CdefSync *cdef_sync = &mt_info->cdef_sync; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3261 | const int num_workers = mt_info->num_mod_workers[MOD_CDEF_SEARCH]; |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3262 | |
| 3263 | cdef_reset_job_info(cdef_sync); |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3264 | prepare_cdef_workers(cpi, cdef_filter_block_worker_hook, num_workers); |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3265 | launch_workers(mt_info, num_workers); |
Mudassir Galaganath | 71ff9ae | 2023-08-25 12:37:11 +0530 | [diff] [blame] | 3266 | sync_enc_workers(mt_info, &cpi->common, num_workers); |
venkat sanampudi | d7bf550 | 2020-10-21 10:55:34 +0530 | [diff] [blame] | 3267 | } |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3268 | |
| 3269 | // Computes num_workers for temporal filter multi-threading. |
Wan-Teh Chang | c8c2c61 | 2023-10-10 16:41:23 -0700 | [diff] [blame] | 3270 | static AOM_INLINE int compute_num_tf_workers(const AV1_COMP *cpi) { |
Nithya V S | e2d1a42 | 2020-12-29 22:17:26 +0530 | [diff] [blame] | 3271 | // For single-pass encode, using no. of workers as per tf block size was not |
| 3272 | // found to improve speed. Hence the thread assignment for single-pass encode |
| 3273 | // is kept based on compute_num_enc_workers(). |
Bohan Li | 445fdf6 | 2021-06-03 16:16:00 -0700 | [diff] [blame] | 3274 | if (cpi->oxcf.pass < AOM_RC_SECOND_PASS) |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 3275 | return (av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads)); |
Nithya V S | e2d1a42 | 2020-12-29 22:17:26 +0530 | [diff] [blame] | 3276 | |
| 3277 | if (cpi->oxcf.max_threads <= 1) return 1; |
| 3278 | |
| 3279 | const int frame_height = cpi->common.height; |
| 3280 | const BLOCK_SIZE block_size = TF_BLOCK_SIZE; |
| 3281 | const int mb_height = block_size_high[block_size]; |
| 3282 | const int mb_rows = get_num_blocks(frame_height, mb_height); |
| 3283 | return AOMMIN(cpi->oxcf.max_threads, mb_rows); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3284 | } |
| 3285 | |
| 3286 | // Computes num_workers for tpl multi-threading. |
| 3287 | static AOM_INLINE int compute_num_tpl_workers(AV1_COMP *cpi) { |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 3288 | return av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3289 | } |
| 3290 | |
| 3291 | // Computes num_workers for loop filter multi-threading. |
| 3292 | static AOM_INLINE int compute_num_lf_workers(AV1_COMP *cpi) { |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 3293 | return av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3294 | } |
| 3295 | |
| 3296 | // Computes num_workers for cdef multi-threading. |
| 3297 | static AOM_INLINE int compute_num_cdef_workers(AV1_COMP *cpi) { |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 3298 | return av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3299 | } |
| 3300 | |
| 3301 | // Computes num_workers for loop-restoration multi-threading. |
| 3302 | static AOM_INLINE int compute_num_lr_workers(AV1_COMP *cpi) { |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 3303 | return av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3304 | } |
| 3305 | |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 3306 | // Computes num_workers for pack bitstream multi-threading. |
| 3307 | static AOM_INLINE int compute_num_pack_bs_workers(AV1_COMP *cpi) { |
| 3308 | if (cpi->oxcf.max_threads <= 1) return 1; |
| 3309 | return compute_num_enc_tile_mt_workers(&cpi->common, cpi->oxcf.max_threads); |
| 3310 | } |
| 3311 | |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 3312 | // Computes num_workers for all intra multi-threading. |
| 3313 | static AOM_INLINE int compute_num_ai_workers(AV1_COMP *cpi) { |
| 3314 | if (cpi->oxcf.max_threads <= 1) return 1; |
Cheng Chen | 51b883e | 2023-04-28 11:45:34 -0700 | [diff] [blame] | 3315 | // The multi-threading implementation of deltaq-mode = 3 in allintra |
| 3316 | // mode is based on row multi threading. |
| 3317 | if (!cpi->oxcf.row_mt) return 1; |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 3318 | cpi->weber_bsize = BLOCK_8X8; |
| 3319 | const BLOCK_SIZE bsize = cpi->weber_bsize; |
| 3320 | const int mb_step = mi_size_wide[bsize]; |
| 3321 | const int num_mb_rows = cpi->common.mi_params.mi_rows / mb_step; |
| 3322 | return AOMMIN(num_mb_rows, cpi->oxcf.max_threads); |
| 3323 | } |
| 3324 | |
Cheng Chen | 28b4f28 | 2023-10-04 22:34:33 -0700 | [diff] [blame] | 3325 | static int compute_num_mod_workers(AV1_COMP *cpi, |
| 3326 | MULTI_THREADED_MODULES mod_name) { |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3327 | int num_mod_workers = 0; |
| 3328 | switch (mod_name) { |
| 3329 | case MOD_FP: |
Bohan Li | 445fdf6 | 2021-06-03 16:16:00 -0700 | [diff] [blame] | 3330 | if (cpi->oxcf.pass >= AOM_RC_SECOND_PASS) |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3331 | num_mod_workers = 0; |
| 3332 | else |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 3333 | num_mod_workers = |
| 3334 | av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3335 | break; |
| 3336 | case MOD_TF: num_mod_workers = compute_num_tf_workers(cpi); break; |
| 3337 | case MOD_TPL: num_mod_workers = compute_num_tpl_workers(cpi); break; |
| 3338 | case MOD_GME: num_mod_workers = 1; break; |
| 3339 | case MOD_ENC: |
Mufaddal Chakera | 92fbfed | 2021-06-25 10:18:05 +0530 | [diff] [blame] | 3340 | num_mod_workers = av1_compute_num_enc_workers(cpi, cpi->oxcf.max_threads); |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3341 | break; |
| 3342 | case MOD_LPF: num_mod_workers = compute_num_lf_workers(cpi); break; |
| 3343 | case MOD_CDEF_SEARCH: |
| 3344 | num_mod_workers = compute_num_cdef_workers(cpi); |
| 3345 | break; |
Vishnu Teja Manyam | f194981 | 2021-04-08 19:07:51 +0530 | [diff] [blame] | 3346 | case MOD_CDEF: num_mod_workers = compute_num_cdef_workers(cpi); break; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3347 | case MOD_LR: num_mod_workers = compute_num_lr_workers(cpi); break; |
Cherma Rajan A | ce0c423 | 2021-04-23 21:29:51 +0530 | [diff] [blame] | 3348 | case MOD_PACK_BS: num_mod_workers = compute_num_pack_bs_workers(cpi); break; |
Mufaddal Chakera | bf4f358 | 2021-07-25 19:32:40 +0530 | [diff] [blame] | 3349 | case MOD_FRAME_ENC: |
| 3350 | num_mod_workers = cpi->ppi->p_mt_info.num_mod_workers[MOD_FRAME_ENC]; |
| 3351 | break; |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 3352 | case MOD_AI: |
| 3353 | if (cpi->oxcf.pass == AOM_RC_ONE_PASS) { |
| 3354 | num_mod_workers = compute_num_ai_workers(cpi); |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 3355 | } else { |
| 3356 | num_mod_workers = 0; |
Cheng Chen | ef49e87 | 2022-11-07 23:15:51 -0800 | [diff] [blame] | 3357 | } |
Wan-Teh Chang | c8c2c61 | 2023-10-10 16:41:23 -0700 | [diff] [blame] | 3358 | break; |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3359 | default: assert(0); break; |
| 3360 | } |
| 3361 | return (num_mod_workers); |
| 3362 | } |
| 3363 | // Computes the number of workers for each MT modules in the encoder |
| 3364 | void av1_compute_num_workers_for_mt(AV1_COMP *cpi) { |
Cheng Chen | 28b4f28 | 2023-10-04 22:34:33 -0700 | [diff] [blame] | 3365 | for (int i = MOD_FP; i < NUM_MT_MODULES; i++) { |
Mufaddal Chakera | 8c2d517 | 2021-06-09 01:33:48 +0530 | [diff] [blame] | 3366 | cpi->ppi->p_mt_info.num_mod_workers[i] = |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3367 | compute_num_mod_workers(cpi, (MULTI_THREADED_MODULES)i); |
Cheng Chen | 28b4f28 | 2023-10-04 22:34:33 -0700 | [diff] [blame] | 3368 | } |
Nithya V S | b6871a7 | 2020-12-25 23:05:48 +0530 | [diff] [blame] | 3369 | } |