Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | /* |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 3 | * |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 10 | */ |
| 11 | |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 12 | #include "av1/encoder/av1_multi_thread.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 13 | #include "av1/encoder/encodeframe.h" |
| 14 | #include "av1/encoder/encoder.h" |
| 15 | #include "av1/encoder/ethread.h" |
Ravi Chaudhary | 95ba1fa | 2018-10-11 11:42:04 +0530 | [diff] [blame] | 16 | #include "av1/encoder/rdopt.h" |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 17 | #include "aom_dsp/aom_dsp_common.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 18 | |
| 19 | static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { |
hui su | b53682f | 2017-08-01 17:09:18 -0700 | [diff] [blame] | 20 | for (int i = 0; i < REFERENCE_MODES; i++) |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 21 | td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i]; |
| 22 | |
Zoe Liu | 27deb38 | 2018-03-27 15:13:56 -0700 | [diff] [blame] | 23 | for (int i = 0; i < REF_FRAMES; i++) |
Debargha Mukherjee | a575d23 | 2017-04-28 17:46:47 -0700 | [diff] [blame] | 24 | td->rd_counts.global_motion_used[i] += |
| 25 | td_t->rd_counts.global_motion_used[i]; |
Debargha Mukherjee | a575d23 | 2017-04-28 17:46:47 -0700 | [diff] [blame] | 26 | |
Yaowu Xu | a0cc9aa | 2017-06-21 17:45:31 -0700 | [diff] [blame] | 27 | td->rd_counts.compound_ref_used_flag |= |
| 28 | td_t->rd_counts.compound_ref_used_flag; |
Zoe Liu | 8a5d343 | 2017-11-30 16:33:44 -0800 | [diff] [blame] | 29 | td->rd_counts.skip_mode_used_flag |= td_t->rd_counts.skip_mode_used_flag; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 30 | } |
| 31 | |
Ravi Chaudhary | 99e31c1 | 2018-11-09 10:43:53 +0530 | [diff] [blame] | 32 | static void update_delta_lf_for_row_mt(AV1_COMP *cpi) { |
| 33 | AV1_COMMON *cm = &cpi->common; |
| 34 | MACROBLOCKD *xd = &cpi->td.mb.e_mbd; |
| 35 | const int mib_size = cm->seq_params.mib_size; |
| 36 | const int frame_lf_count = |
| 37 | av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; |
| 38 | for (int row = 0; row < cm->tile_rows; row++) { |
| 39 | for (int col = 0; col < cm->tile_cols; col++) { |
| 40 | TileDataEnc *tile_data = &cpi->tile_data[row * cm->tile_cols + col]; |
| 41 | const TileInfo *const tile_info = &tile_data->tile_info; |
| 42 | for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; |
| 43 | mi_row += mib_size) { |
| 44 | if (mi_row == tile_info->mi_row_start) |
| 45 | av1_reset_loop_filter_delta(xd, av1_num_planes(cm)); |
| 46 | for (int mi_col = tile_info->mi_col_start; |
| 47 | mi_col < tile_info->mi_col_end; mi_col += mib_size) { |
| 48 | const int idx_str = cm->mi_stride * mi_row + mi_col; |
| 49 | MB_MODE_INFO **mi = cm->mi_grid_visible + idx_str; |
| 50 | MB_MODE_INFO *mbmi = mi[0]; |
| 51 | if (mbmi->skip == 1 && (mbmi->sb_type == cm->seq_params.sb_size)) { |
| 52 | for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) |
| 53 | mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id]; |
| 54 | mbmi->delta_lf_from_base = xd->delta_lf_from_base; |
| 55 | } else { |
David Turner | ebf96f4 | 2018-11-14 16:57:57 +0000 | [diff] [blame] | 56 | if (cm->delta_q_info.delta_lf_multi) { |
Ravi Chaudhary | 99e31c1 | 2018-11-09 10:43:53 +0530 | [diff] [blame] | 57 | for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) |
| 58 | xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id]; |
| 59 | } else { |
| 60 | xd->delta_lf_from_base = mbmi->delta_lf_from_base; |
| 61 | } |
| 62 | } |
| 63 | } |
| 64 | } |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | |
Ravi Chaudhary | 40cdf13 | 2018-10-08 11:04:16 +0530 | [diff] [blame] | 69 | void av1_row_mt_sync_read_dummy(struct AV1RowMTSyncData *const row_mt_sync, |
| 70 | int r, int c) { |
| 71 | (void)row_mt_sync; |
| 72 | (void)r; |
| 73 | (void)c; |
| 74 | return; |
| 75 | } |
| 76 | |
| 77 | void av1_row_mt_sync_write_dummy(struct AV1RowMTSyncData *const row_mt_sync, |
| 78 | int r, int c, const int cols) { |
| 79 | (void)row_mt_sync; |
| 80 | (void)r; |
| 81 | (void)c; |
| 82 | (void)cols; |
| 83 | return; |
| 84 | } |
| 85 | |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 86 | void av1_row_mt_sync_read(AV1RowMTSync *const row_mt_sync, int r, int c) { |
| 87 | #if CONFIG_MULTITHREAD |
| 88 | const int nsync = row_mt_sync->sync_range; |
| 89 | |
| 90 | if (r) { |
| 91 | pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1]; |
| 92 | pthread_mutex_lock(mutex); |
| 93 | |
| 94 | while (c > row_mt_sync->cur_col[r - 1] - nsync) { |
| 95 | pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex); |
| 96 | } |
| 97 | pthread_mutex_unlock(mutex); |
| 98 | } |
| 99 | #else |
| 100 | (void)row_mt_sync; |
| 101 | (void)r; |
| 102 | (void)c; |
| 103 | #endif // CONFIG_MULTITHREAD |
| 104 | } |
| 105 | |
| 106 | void av1_row_mt_sync_write(AV1RowMTSync *const row_mt_sync, int r, int c, |
| 107 | const int cols) { |
| 108 | #if CONFIG_MULTITHREAD |
| 109 | const int nsync = row_mt_sync->sync_range; |
| 110 | int cur; |
| 111 | // Only signal when there are enough encoded blocks for next row to run. |
| 112 | int sig = 1; |
| 113 | |
| 114 | if (c < cols - 1) { |
| 115 | cur = c; |
| 116 | if (c % nsync) sig = 0; |
| 117 | } else { |
| 118 | cur = cols + nsync; |
| 119 | } |
| 120 | |
| 121 | if (sig) { |
| 122 | pthread_mutex_lock(&row_mt_sync->mutex_[r]); |
| 123 | |
| 124 | row_mt_sync->cur_col[r] = cur; |
| 125 | |
| 126 | pthread_cond_signal(&row_mt_sync->cond_[r]); |
| 127 | pthread_mutex_unlock(&row_mt_sync->mutex_[r]); |
| 128 | } |
| 129 | #else |
| 130 | (void)row_mt_sync; |
| 131 | (void)r; |
| 132 | (void)c; |
| 133 | (void)cols; |
| 134 | #endif // CONFIG_MULTITHREAD |
| 135 | } |
| 136 | |
| 137 | // Allocate memory for row synchronization |
| 138 | void av1_row_mt_sync_mem_alloc(AV1RowMTSync *row_mt_sync, AV1_COMMON *cm, |
| 139 | int rows) { |
| 140 | row_mt_sync->rows = rows; |
| 141 | #if CONFIG_MULTITHREAD |
| 142 | { |
| 143 | int i; |
| 144 | |
| 145 | CHECK_MEM_ERROR(cm, row_mt_sync->mutex_, |
| 146 | aom_malloc(sizeof(*row_mt_sync->mutex_) * rows)); |
| 147 | if (row_mt_sync->mutex_) { |
| 148 | for (i = 0; i < rows; ++i) { |
| 149 | pthread_mutex_init(&row_mt_sync->mutex_[i], NULL); |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | CHECK_MEM_ERROR(cm, row_mt_sync->cond_, |
| 154 | aom_malloc(sizeof(*row_mt_sync->cond_) * rows)); |
| 155 | if (row_mt_sync->cond_) { |
| 156 | for (i = 0; i < rows; ++i) { |
| 157 | pthread_cond_init(&row_mt_sync->cond_[i], NULL); |
| 158 | } |
| 159 | } |
| 160 | } |
| 161 | #endif // CONFIG_MULTITHREAD |
| 162 | |
| 163 | CHECK_MEM_ERROR(cm, row_mt_sync->cur_col, |
| 164 | aom_malloc(sizeof(*row_mt_sync->cur_col) * rows)); |
| 165 | |
| 166 | // Set up nsync. |
Ravi Chaudhary | e2cf962 | 2018-12-21 18:19:07 +0530 | [diff] [blame] | 167 | row_mt_sync->sync_range = 1; |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 168 | } |
| 169 | |
| 170 | // Deallocate row based multi-threading synchronization related mutex and data |
| 171 | void av1_row_mt_sync_mem_dealloc(AV1RowMTSync *row_mt_sync) { |
| 172 | if (row_mt_sync != NULL) { |
| 173 | #if CONFIG_MULTITHREAD |
| 174 | int i; |
| 175 | |
| 176 | if (row_mt_sync->mutex_ != NULL) { |
| 177 | for (i = 0; i < row_mt_sync->rows; ++i) { |
| 178 | pthread_mutex_destroy(&row_mt_sync->mutex_[i]); |
| 179 | } |
| 180 | aom_free(row_mt_sync->mutex_); |
| 181 | } |
| 182 | if (row_mt_sync->cond_ != NULL) { |
| 183 | for (i = 0; i < row_mt_sync->rows; ++i) { |
| 184 | pthread_cond_destroy(&row_mt_sync->cond_[i]); |
| 185 | } |
| 186 | aom_free(row_mt_sync->cond_); |
| 187 | } |
| 188 | #endif // CONFIG_MULTITHREAD |
| 189 | aom_free(row_mt_sync->cur_col); |
| 190 | // clear the structure as the source of this call may be dynamic change |
| 191 | // in tiles in which case this call will be followed by an _alloc() |
| 192 | // which may fail. |
| 193 | av1_zero(*row_mt_sync); |
| 194 | } |
| 195 | } |
| 196 | |
Sarah Parker | 427e3b1 | 2018-10-12 12:28:44 -0700 | [diff] [blame] | 197 | static void assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt, |
| 198 | int num_tiles, int num_workers) { |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 199 | int tile_id = 0; |
| 200 | int i; |
| 201 | |
| 202 | for (i = 0; i < num_workers; i++) { |
| 203 | multi_thread_ctxt->thread_id_to_tile_id[i] = tile_id++; |
| 204 | if (tile_id == num_tiles) tile_id = 0; |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | static int get_next_job(AV1_COMP *const cpi, int *current_mi_row, |
| 209 | int cur_tile_id) { |
| 210 | AV1_COMMON *const cm = &cpi->common; |
| 211 | TileDataEnc *const this_tile = &cpi->tile_data[cur_tile_id]; |
| 212 | AV1RowMTInfo *row_mt_info = &this_tile->row_mt_info; |
| 213 | |
| 214 | if (row_mt_info->current_mi_row < this_tile->tile_info.mi_row_end) { |
| 215 | *current_mi_row = row_mt_info->current_mi_row; |
| 216 | row_mt_info->num_threads_working++; |
| 217 | row_mt_info->current_mi_row += cm->seq_params.mib_size; |
| 218 | return 1; |
| 219 | } |
| 220 | return 0; |
| 221 | } |
| 222 | |
| 223 | static void switch_tile_and_get_next_job(AV1_COMP *const cpi, int *cur_tile_id, |
| 224 | int *current_mi_row, |
| 225 | int *end_of_frame) { |
| 226 | AV1_COMMON *const cm = &cpi->common; |
| 227 | const int tile_cols = cm->tile_cols; |
| 228 | const int tile_rows = cm->tile_rows; |
| 229 | |
| 230 | int tile_id = -1; // Stores the tile ID with minimum proc done |
| 231 | int max_mis_to_encode = 0; |
| 232 | int min_num_threads_working = INT_MAX; |
| 233 | |
| 234 | for (int tile_row = 0; tile_row < tile_rows; tile_row++) { |
| 235 | for (int tile_col = 0; tile_col < tile_cols; tile_col++) { |
| 236 | int tile_index = tile_row * tile_cols + tile_col; |
| 237 | TileDataEnc *this_tile = &cpi->tile_data[tile_index]; |
| 238 | AV1RowMTInfo *row_mt_info = &this_tile->row_mt_info; |
| 239 | int num_mis_to_encode = |
| 240 | this_tile->tile_info.mi_row_end - row_mt_info->current_mi_row; |
| 241 | |
| 242 | // Tile to be processed by this thread is selected on the basis of |
| 243 | // availability of jobs: |
| 244 | // 1) If jobs are available, tile to be processed is chosen on the |
| 245 | // basis of minimum number of threads working for that tile. If two or |
| 246 | // more tiles have same number of threads working for them, then the tile |
| 247 | // with maximum number of jobs available will be chosen. |
| 248 | // 2) If no jobs are available, then end_of_frame is reached. |
| 249 | if (num_mis_to_encode > 0) { |
| 250 | int num_threads_working = row_mt_info->num_threads_working; |
| 251 | if (num_threads_working < min_num_threads_working) { |
| 252 | min_num_threads_working = num_threads_working; |
| 253 | max_mis_to_encode = 0; |
| 254 | } |
| 255 | if (num_threads_working == min_num_threads_working && |
| 256 | num_mis_to_encode > max_mis_to_encode) { |
| 257 | tile_id = tile_index; |
| 258 | max_mis_to_encode = num_mis_to_encode; |
| 259 | } |
| 260 | } |
| 261 | } |
| 262 | } |
| 263 | if (tile_id == -1) { |
| 264 | *end_of_frame = 1; |
| 265 | } else { |
| 266 | // Update the cur ID to the next tile ID that will be processed, |
| 267 | // which will be the least processed tile |
| 268 | *cur_tile_id = tile_id; |
| 269 | get_next_job(cpi, current_mi_row, *cur_tile_id); |
| 270 | } |
| 271 | } |
| 272 | |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 273 | static int enc_row_mt_worker_hook(void *arg1, void *unused) { |
| 274 | EncWorkerData *const thread_data = (EncWorkerData *)arg1; |
| 275 | AV1_COMP *const cpi = thread_data->cpi; |
Ravi Chaudhary | 95ba1fa | 2018-10-11 11:42:04 +0530 | [diff] [blame] | 276 | AV1_COMMON *const cm = &cpi->common; |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 277 | |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 278 | MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; |
| 279 | int thread_id = thread_data->thread_id; |
| 280 | int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id]; |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 281 | (void)unused; |
| 282 | |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 283 | assert(cur_tile_id != -1); |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 284 | |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 285 | int end_of_frame = 0; |
| 286 | while (1) { |
| 287 | int current_mi_row = -1; |
| 288 | #if CONFIG_MULTITHREAD |
| 289 | pthread_mutex_lock(cpi->row_mt_mutex_); |
| 290 | #endif |
| 291 | if (!get_next_job(cpi, ¤t_mi_row, cur_tile_id)) { |
| 292 | // No jobs are available for the current tile. Query for the status of |
| 293 | // other tiles and get the next job if available |
| 294 | switch_tile_and_get_next_job(cpi, &cur_tile_id, ¤t_mi_row, |
| 295 | &end_of_frame); |
| 296 | } |
| 297 | #if CONFIG_MULTITHREAD |
| 298 | pthread_mutex_unlock(cpi->row_mt_mutex_); |
| 299 | #endif |
| 300 | if (end_of_frame == 1) break; |
| 301 | |
| 302 | TileDataEnc *const this_tile = &cpi->tile_data[cur_tile_id]; |
| 303 | int tile_row = this_tile->tile_info.tile_row; |
| 304 | int tile_col = this_tile->tile_info.tile_col; |
| 305 | |
| 306 | assert(current_mi_row != -1 && |
| 307 | current_mi_row <= this_tile->tile_info.mi_row_end); |
| 308 | |
Ravi Chaudhary | 1cf7d16 | 2018-10-09 17:00:43 +0530 | [diff] [blame] | 309 | ThreadData *td = thread_data->td; |
| 310 | |
| 311 | td->mb.e_mbd.tile_ctx = td->tctx; |
Ravi Chaudhary | 982ac04 | 2018-11-02 14:30:29 +0530 | [diff] [blame] | 312 | td->mb.tile_pb_ctx = &this_tile->tctx; |
Ravi Chaudhary | 39502fd | 2018-12-25 10:27:31 +0530 | [diff] [blame] | 313 | if (this_tile->allow_update_cdf) { |
| 314 | td->mb.row_ctx = this_tile->row_ctx; |
| 315 | if (current_mi_row == this_tile->tile_info.mi_row_start) |
| 316 | memcpy(td->mb.e_mbd.tile_ctx, &this_tile->tctx, sizeof(FRAME_CONTEXT)); |
| 317 | } else { |
Ravi Chaudhary | 6bd5ccb | 2018-11-02 17:26:39 +0530 | [diff] [blame] | 318 | memcpy(td->mb.e_mbd.tile_ctx, &this_tile->tctx, sizeof(FRAME_CONTEXT)); |
Ravi Chaudhary | 39502fd | 2018-12-25 10:27:31 +0530 | [diff] [blame] | 319 | } |
| 320 | |
Ravi Chaudhary | 95ba1fa | 2018-10-11 11:42:04 +0530 | [diff] [blame] | 321 | av1_init_above_context(cm, &td->mb.e_mbd, tile_row); |
| 322 | |
| 323 | // Disable exhaustive search speed features for row based multi-threading of |
| 324 | // encoder. |
| 325 | td->mb.m_search_count_ptr = NULL; |
| 326 | td->mb.ex_search_count_ptr = NULL; |
| 327 | |
| 328 | cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params); |
| 329 | av1_crc32c_calculator_init(&td->mb.mb_rd_record.crc_calculator); |
Ravi Chaudhary | 95ba1fa | 2018-10-11 11:42:04 +0530 | [diff] [blame] | 330 | |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 331 | av1_encode_sb_row(cpi, td, tile_row, tile_col, current_mi_row); |
| 332 | #if CONFIG_MULTITHREAD |
| 333 | pthread_mutex_lock(cpi->row_mt_mutex_); |
| 334 | #endif |
| 335 | this_tile->row_mt_info.num_threads_working--; |
| 336 | #if CONFIG_MULTITHREAD |
| 337 | pthread_mutex_unlock(cpi->row_mt_mutex_); |
| 338 | #endif |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 339 | } |
| 340 | |
| 341 | return 1; |
| 342 | } |
| 343 | |
Wan-Teh Chang | 8d2f577 | 2018-09-12 15:44:59 -0700 | [diff] [blame] | 344 | static int enc_worker_hook(void *arg1, void *unused) { |
| 345 | EncWorkerData *const thread_data = (EncWorkerData *)arg1; |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 346 | AV1_COMP *const cpi = thread_data->cpi; |
| 347 | const AV1_COMMON *const cm = &cpi->common; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 348 | const int tile_cols = cm->tile_cols; |
| 349 | const int tile_rows = cm->tile_rows; |
| 350 | int t; |
| 351 | |
| 352 | (void)unused; |
| 353 | |
| 354 | for (t = thread_data->start; t < tile_rows * tile_cols; |
| 355 | t += cpi->num_workers) { |
| 356 | int tile_row = t / tile_cols; |
| 357 | int tile_col = t % tile_cols; |
| 358 | |
Ravi Chaudhary | 84a280a | 2018-09-24 16:09:48 +0530 | [diff] [blame] | 359 | TileDataEnc *const this_tile = |
| 360 | &cpi->tile_data[tile_row * cm->tile_cols + tile_col]; |
Ravi Chaudhary | bf0a6f9 | 2018-12-07 17:47:20 +0530 | [diff] [blame] | 361 | thread_data->td->mb.e_mbd.tile_ctx = &this_tile->tctx; |
| 362 | thread_data->td->mb.tile_pb_ctx = &this_tile->tctx; |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 363 | av1_encode_tile(cpi, thread_data->td, tile_row, tile_col); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 364 | } |
| 365 | |
Wan-Teh Chang | 3f0cbf1 | 2018-07-03 14:59:18 -0700 | [diff] [blame] | 366 | return 1; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 367 | } |
| 368 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 369 | static void create_enc_workers(AV1_COMP *cpi, int num_workers) { |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 370 | AV1_COMMON *const cm = &cpi->common; |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 371 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 372 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 373 | CHECK_MEM_ERROR(cm, cpi->workers, |
| 374 | aom_malloc(num_workers * sizeof(*cpi->workers))); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 375 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 376 | CHECK_MEM_ERROR(cm, cpi->tile_thr_data, |
| 377 | aom_calloc(num_workers, sizeof(*cpi->tile_thr_data))); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 378 | |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 379 | #if CONFIG_MULTITHREAD |
| 380 | if (cpi->row_mt == 1) { |
| 381 | if (cpi->row_mt_mutex_ == NULL) { |
| 382 | CHECK_MEM_ERROR(cm, cpi->row_mt_mutex_, |
| 383 | aom_malloc(sizeof(*(cpi->row_mt_mutex_)))); |
| 384 | if (cpi->row_mt_mutex_) pthread_mutex_init(cpi->row_mt_mutex_, NULL); |
| 385 | } |
| 386 | } |
| 387 | #endif |
| 388 | |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 389 | for (int i = num_workers - 1; i >= 0; i--) { |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 390 | AVxWorker *const worker = &cpi->workers[i]; |
Wan-Teh Chang | 3f0cbf1 | 2018-07-03 14:59:18 -0700 | [diff] [blame] | 391 | EncWorkerData *const thread_data = &cpi->tile_thr_data[i]; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 392 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 393 | ++cpi->num_workers; |
| 394 | winterface->init(worker); |
Wan-Teh Chang | 4d29ee8 | 2018-09-20 10:07:52 -0700 | [diff] [blame] | 395 | worker->thread_name = "aom enc worker"; |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 396 | |
| 397 | thread_data->cpi = cpi; |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 398 | thread_data->thread_id = i; |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 399 | |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 400 | if (i > 0) { |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 401 | // Allocate thread data. |
| 402 | CHECK_MEM_ERROR(cm, thread_data->td, |
| 403 | aom_memalign(32, sizeof(*thread_data->td))); |
| 404 | av1_zero(*thread_data->td); |
| 405 | |
| 406 | // Set up pc_tree. |
| 407 | thread_data->td->pc_tree = NULL; |
| 408 | av1_setup_pc_tree(cm, thread_data->td); |
| 409 | |
| 410 | CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf, |
| 411 | (uint8_t *)aom_memalign( |
| 412 | 16, MAX_MB_PLANE * MAX_SB_SQUARE * |
| 413 | sizeof(*thread_data->td->above_pred_buf))); |
| 414 | CHECK_MEM_ERROR(cm, thread_data->td->left_pred_buf, |
| 415 | (uint8_t *)aom_memalign( |
| 416 | 16, MAX_MB_PLANE * MAX_SB_SQUARE * |
| 417 | sizeof(*thread_data->td->left_pred_buf))); |
| 418 | |
| 419 | CHECK_MEM_ERROR( |
| 420 | cm, thread_data->td->wsrc_buf, |
| 421 | (int32_t *)aom_memalign( |
| 422 | 16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf))); |
| 423 | |
Ravi Chaudhary | 5d970f4 | 2018-09-25 11:25:32 +0530 | [diff] [blame] | 424 | #if CONFIG_COLLECT_INTER_MODE_RD_STATS |
| 425 | CHECK_MEM_ERROR(cm, thread_data->td->inter_modes_info, |
| 426 | (InterModesInfo *)aom_malloc( |
| 427 | sizeof(*thread_data->td->inter_modes_info))); |
| 428 | #endif |
| 429 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 430 | for (int x = 0; x < 2; x++) |
| 431 | for (int y = 0; y < 2; y++) |
| 432 | CHECK_MEM_ERROR( |
| 433 | cm, thread_data->td->hash_value_buffer[x][y], |
| 434 | (uint32_t *)aom_malloc( |
| 435 | AOM_BUFFER_SIZE_FOR_BLOCK_HASH * |
| 436 | sizeof(*thread_data->td->hash_value_buffer[0][0]))); |
| 437 | |
| 438 | CHECK_MEM_ERROR( |
| 439 | cm, thread_data->td->mask_buf, |
| 440 | (int32_t *)aom_memalign( |
| 441 | 16, MAX_SB_SQUARE * sizeof(*thread_data->td->mask_buf))); |
| 442 | // Allocate frame counters in thread data. |
| 443 | CHECK_MEM_ERROR(cm, thread_data->td->counts, |
| 444 | aom_calloc(1, sizeof(*thread_data->td->counts))); |
| 445 | |
| 446 | // Allocate buffers used by palette coding mode. |
| 447 | CHECK_MEM_ERROR( |
| 448 | cm, thread_data->td->palette_buffer, |
| 449 | aom_memalign(16, sizeof(*thread_data->td->palette_buffer))); |
| 450 | |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 451 | CHECK_MEM_ERROR( |
| 452 | cm, thread_data->td->tmp_conv_dst, |
| 453 | aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE * |
| 454 | sizeof(*thread_data->td->tmp_conv_dst))); |
| 455 | for (int j = 0; j < 2; ++j) { |
| 456 | CHECK_MEM_ERROR( |
| 457 | cm, thread_data->td->tmp_obmc_bufs[j], |
wenyao.liu | 22d8ab3 | 2018-10-16 09:11:29 +0800 | [diff] [blame] | 458 | aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE * |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 459 | sizeof(*thread_data->td->tmp_obmc_bufs[j]))); |
| 460 | } |
| 461 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 462 | // Create threads |
| 463 | if (!winterface->reset(worker)) |
| 464 | aom_internal_error(&cm->error, AOM_CODEC_ERROR, |
| 465 | "Tile encoder thread creation failed"); |
| 466 | } else { |
| 467 | // Main thread acts as a worker and uses the thread data in cpi. |
| 468 | thread_data->td = &cpi->td; |
| 469 | } |
Ravi Chaudhary | 1cf7d16 | 2018-10-09 17:00:43 +0530 | [diff] [blame] | 470 | if (cpi->row_mt == 1) |
| 471 | CHECK_MEM_ERROR( |
| 472 | cm, thread_data->td->tctx, |
| 473 | (FRAME_CONTEXT *)aom_memalign(16, sizeof(*thread_data->td->tctx))); |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 474 | winterface->sync(worker); |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | static void launch_enc_workers(AV1_COMP *cpi, int num_workers) { |
| 479 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
| 480 | // Encode a frame |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 481 | for (int i = num_workers - 1; i >= 0; i--) { |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 482 | AVxWorker *const worker = &cpi->workers[i]; |
| 483 | EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; |
| 484 | |
| 485 | // Set the starting tile for each thread. |
| 486 | thread_data->start = i; |
| 487 | |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 488 | if (i == 0) |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 489 | winterface->execute(worker); |
| 490 | else |
| 491 | winterface->launch(worker); |
| 492 | } |
| 493 | } |
| 494 | |
| 495 | static void sync_enc_workers(AV1_COMP *cpi, int num_workers) { |
| 496 | const AVxWorkerInterface *const winterface = aom_get_worker_interface(); |
Wan-Teh Chang | e45fa2d | 2018-10-19 11:02:22 -0700 | [diff] [blame] | 497 | int had_error = 0; |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 498 | |
| 499 | // Encoding ends. |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 500 | for (int i = num_workers - 1; i >= 0; i--) { |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 501 | AVxWorker *const worker = &cpi->workers[i]; |
Wan-Teh Chang | e45fa2d | 2018-10-19 11:02:22 -0700 | [diff] [blame] | 502 | had_error |= !winterface->sync(worker); |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 503 | } |
Wan-Teh Chang | e45fa2d | 2018-10-19 11:02:22 -0700 | [diff] [blame] | 504 | |
| 505 | if (had_error) |
| 506 | aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, |
| 507 | "Failed to encode tile data"); |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 508 | } |
| 509 | |
| 510 | static void accumulate_counters_enc_workers(AV1_COMP *cpi, int num_workers) { |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 511 | for (int i = num_workers - 1; i >= 0; i--) { |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 512 | AVxWorker *const worker = &cpi->workers[i]; |
| 513 | EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; |
Ravi Chaudhary | 00525ef | 2018-10-31 19:52:42 +0530 | [diff] [blame] | 514 | cpi->intrabc_used |= thread_data->td->intrabc_used; |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 515 | // Accumulate counters. |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 516 | if (i > 0) { |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 517 | av1_accumulate_frame_counts(&cpi->counts, thread_data->td->counts); |
| 518 | accumulate_rd_opt(&cpi->td, thread_data->td); |
| 519 | cpi->td.mb.txb_split_count += thread_data->td->mb.txb_split_count; |
Debargha Mukherjee | 0857e66 | 2019-01-04 16:22:09 -0800 | [diff] [blame^] | 520 | #if CONFIG_SPEED_STATS |
| 521 | cpi->td.mb.tx_search_count += thread_data->td->mb.tx_search_count; |
| 522 | #endif // CONFIG_SPEED_STATS |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 523 | } |
| 524 | } |
| 525 | } |
| 526 | |
| 527 | static void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook, |
| 528 | int num_workers) { |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 529 | for (int i = num_workers - 1; i >= 0; i--) { |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 530 | AVxWorker *const worker = &cpi->workers[i]; |
| 531 | EncWorkerData *const thread_data = &cpi->tile_thr_data[i]; |
| 532 | |
| 533 | worker->hook = hook; |
Wan-Teh Chang | 3f0cbf1 | 2018-07-03 14:59:18 -0700 | [diff] [blame] | 534 | worker->data1 = thread_data; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 535 | worker->data2 = NULL; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 536 | |
Ravi Chaudhary | 00525ef | 2018-10-31 19:52:42 +0530 | [diff] [blame] | 537 | thread_data->td->intrabc_used = 0; |
| 538 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 539 | // Before encoding a frame, copy the thread data from cpi. |
| 540 | if (thread_data->td != &cpi->td) { |
| 541 | thread_data->td->mb = cpi->td.mb; |
| 542 | thread_data->td->rd_counts = cpi->td.rd_counts; |
Jingning Han | d064cf0 | 2017-06-01 10:00:39 -0700 | [diff] [blame] | 543 | thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf; |
| 544 | thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf; |
| 545 | thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf; |
wenyao.liu | 22d8ab3 | 2018-10-16 09:11:29 +0800 | [diff] [blame] | 546 | |
Ravi Chaudhary | 5d970f4 | 2018-09-25 11:25:32 +0530 | [diff] [blame] | 547 | #if CONFIG_COLLECT_INTER_MODE_RD_STATS |
| 548 | thread_data->td->mb.inter_modes_info = thread_data->td->inter_modes_info; |
| 549 | #endif |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 550 | for (int x = 0; x < 2; x++) { |
Ravi Chaudhary | 783d6a3 | 2018-08-28 18:21:02 +0530 | [diff] [blame] | 551 | for (int y = 0; y < 2; y++) { |
| 552 | memcpy(thread_data->td->hash_value_buffer[x][y], |
| 553 | cpi->td.mb.hash_value_buffer[x][y], |
| 554 | AOM_BUFFER_SIZE_FOR_BLOCK_HASH * |
| 555 | sizeof(*thread_data->td->hash_value_buffer[0][0])); |
| 556 | thread_data->td->mb.hash_value_buffer[x][y] = |
| 557 | thread_data->td->hash_value_buffer[x][y]; |
| 558 | } |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 559 | } |
Jingning Han | d064cf0 | 2017-06-01 10:00:39 -0700 | [diff] [blame] | 560 | thread_data->td->mb.mask_buf = thread_data->td->mask_buf; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 561 | } |
Yue Chen | cc6a6ef | 2018-05-21 16:21:05 -0700 | [diff] [blame] | 562 | if (thread_data->td->counts != &cpi->counts) { |
| 563 | memcpy(thread_data->td->counts, &cpi->counts, sizeof(cpi->counts)); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 564 | } |
| 565 | |
Ravi Chaudhary | 1f58dd8 | 2018-12-07 17:24:15 +0530 | [diff] [blame] | 566 | if (i > 0) { |
hui su | 5d49314 | 2017-05-08 12:06:12 -0700 | [diff] [blame] | 567 | thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer; |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 568 | thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst; |
| 569 | for (int j = 0; j < 2; ++j) { |
| 570 | thread_data->td->mb.tmp_obmc_bufs[j] = |
| 571 | thread_data->td->tmp_obmc_bufs[j]; |
| 572 | } |
Urvang Joshi | e58f6ec | 2018-09-10 15:10:12 -0700 | [diff] [blame] | 573 | |
| 574 | thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst; |
| 575 | for (int j = 0; j < 2; ++j) { |
| 576 | thread_data->td->mb.e_mbd.tmp_obmc_bufs[j] = |
| 577 | thread_data->td->mb.tmp_obmc_bufs[j]; |
| 578 | } |
Urvang Joshi | 0a4cfad | 2018-09-07 11:10:39 -0700 | [diff] [blame] | 579 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 580 | } |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 581 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 582 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 583 | void av1_encode_tiles_mt(AV1_COMP *cpi) { |
| 584 | AV1_COMMON *const cm = &cpi->common; |
| 585 | const int tile_cols = cm->tile_cols; |
| 586 | const int tile_rows = cm->tile_rows; |
| 587 | int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols * tile_rows); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 588 | |
Ravi Chaudhary | a497eb4 | 2018-09-07 12:38:08 +0530 | [diff] [blame] | 589 | if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) |
| 590 | av1_alloc_tile_data(cpi); |
| 591 | |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 592 | av1_init_tile_data(cpi); |
| 593 | // Only run once to create threads and allocate thread data. |
| 594 | if (cpi->num_workers == 0) { |
| 595 | create_enc_workers(cpi, num_workers); |
| 596 | } else { |
| 597 | num_workers = AOMMIN(num_workers, cpi->num_workers); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 598 | } |
Wan-Teh Chang | 8d2f577 | 2018-09-12 15:44:59 -0700 | [diff] [blame] | 599 | prepare_enc_workers(cpi, enc_worker_hook, num_workers); |
Ravi Chaudhary | 9ff9050 | 2018-08-31 15:46:26 +0530 | [diff] [blame] | 600 | launch_enc_workers(cpi, num_workers); |
| 601 | sync_enc_workers(cpi, num_workers); |
| 602 | accumulate_counters_enc_workers(cpi, num_workers); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 603 | } |
Yue Chen | cc6a6ef | 2018-05-21 16:21:05 -0700 | [diff] [blame] | 604 | |
| 605 | // Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int' |
| 606 | // members, so we treat it as an array, and sum over the whole length. |
| 607 | void av1_accumulate_frame_counts(FRAME_COUNTS *acc_counts, |
| 608 | const FRAME_COUNTS *counts) { |
| 609 | unsigned int *const acc = (unsigned int *)acc_counts; |
| 610 | const unsigned int *const cnt = (const unsigned int *)counts; |
| 611 | |
| 612 | const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int); |
| 613 | |
| 614 | for (unsigned int i = 0; i < n_counts; i++) acc[i] += cnt[i]; |
| 615 | } |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 616 | |
| 617 | void av1_encode_tiles_row_mt(AV1_COMP *cpi) { |
| 618 | AV1_COMMON *const cm = &cpi->common; |
| 619 | const int tile_cols = cm->tile_cols; |
| 620 | const int tile_rows = cm->tile_rows; |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 621 | MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; |
Ravi Chaudhary | 0b3531a | 2018-10-12 19:47:37 +0530 | [diff] [blame] | 622 | int num_workers = 0; |
| 623 | int total_num_sb_rows = 0; |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 624 | int max_sb_rows = 0; |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 625 | |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 626 | if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) { |
| 627 | av1_row_mt_mem_dealloc(cpi); |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 628 | av1_alloc_tile_data(cpi); |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 629 | } |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 630 | |
| 631 | av1_init_tile_data(cpi); |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 632 | |
| 633 | for (int row = 0; row < tile_rows; row++) { |
| 634 | for (int col = 0; col < tile_cols; col++) { |
| 635 | TileDataEnc *tile_data = &cpi->tile_data[row * cm->tile_cols + col]; |
Ravi Chaudhary | 0b3531a | 2018-10-12 19:47:37 +0530 | [diff] [blame] | 636 | int num_sb_rows_in_tile = |
| 637 | av1_get_sb_rows_in_tile(cm, tile_data->tile_info); |
| 638 | total_num_sb_rows += num_sb_rows_in_tile; |
| 639 | max_sb_rows = AOMMAX(max_sb_rows, num_sb_rows_in_tile); |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 640 | } |
| 641 | } |
Ravi Chaudhary | 0b3531a | 2018-10-12 19:47:37 +0530 | [diff] [blame] | 642 | num_workers = AOMMIN(cpi->oxcf.max_threads, total_num_sb_rows); |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 643 | |
| 644 | if (multi_thread_ctxt->allocated_tile_cols != tile_cols || |
| 645 | multi_thread_ctxt->allocated_tile_rows != tile_rows || |
| 646 | multi_thread_ctxt->allocated_sb_rows != max_sb_rows) { |
| 647 | av1_row_mt_mem_dealloc(cpi); |
| 648 | av1_row_mt_mem_alloc(cpi, max_sb_rows); |
| 649 | } |
| 650 | |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 651 | memset(multi_thread_ctxt->thread_id_to_tile_id, -1, |
| 652 | sizeof(*multi_thread_ctxt->thread_id_to_tile_id) * MAX_NUM_THREADS); |
| 653 | |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 654 | for (int tile_row = 0; tile_row < tile_rows; tile_row++) { |
| 655 | for (int tile_col = 0; tile_col < tile_cols; tile_col++) { |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 656 | int tile_id = tile_row * tile_cols + tile_col; |
| 657 | TileDataEnc *this_tile = &cpi->tile_data[tile_id]; |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 658 | |
| 659 | // Initialize cur_col to -1 for all rows. |
| 660 | memset(this_tile->row_mt_sync.cur_col, -1, |
| 661 | sizeof(*this_tile->row_mt_sync.cur_col) * max_sb_rows); |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 662 | this_tile->row_mt_info.current_mi_row = this_tile->tile_info.mi_row_start; |
| 663 | this_tile->row_mt_info.num_threads_working = 0; |
| 664 | |
Ravi Chaudhary | 95ba1fa | 2018-10-11 11:42:04 +0530 | [diff] [blame] | 665 | #if CONFIG_COLLECT_INTER_MODE_RD_STATS |
| 666 | av1_inter_mode_data_init(this_tile); |
| 667 | #endif |
| 668 | av1_zero_above_context(cm, &cpi->td.mb.e_mbd, |
| 669 | this_tile->tile_info.mi_col_start, |
| 670 | this_tile->tile_info.mi_col_end, tile_row); |
| 671 | this_tile->m_search_count = 0; // Count of motion search hits. |
| 672 | this_tile->ex_search_count = 0; // Exhaustive mesh search hits. |
Ravi Chaudhary | c5e7469 | 2018-10-08 16:05:38 +0530 | [diff] [blame] | 673 | } |
| 674 | } |
| 675 | |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 676 | // Only run once to create threads and allocate thread data. |
| 677 | if (cpi->num_workers == 0) { |
| 678 | create_enc_workers(cpi, num_workers); |
| 679 | } else { |
| 680 | num_workers = AOMMIN(num_workers, cpi->num_workers); |
| 681 | } |
Ravi Chaudhary | 90a15f4 | 2018-10-11 18:56:35 +0530 | [diff] [blame] | 682 | assign_tile_to_thread(multi_thread_ctxt, tile_cols * tile_rows, num_workers); |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 683 | prepare_enc_workers(cpi, enc_row_mt_worker_hook, num_workers); |
| 684 | launch_enc_workers(cpi, num_workers); |
| 685 | sync_enc_workers(cpi, num_workers); |
David Turner | ebf96f4 | 2018-11-14 16:57:57 +0000 | [diff] [blame] | 686 | if (cm->delta_q_info.delta_lf_present_flag) update_delta_lf_for_row_mt(cpi); |
Ravi Chaudhary | da4c872 | 2018-10-05 17:55:20 +0530 | [diff] [blame] | 687 | accumulate_counters_enc_workers(cpi, num_workers); |
| 688 | } |