Blame - av1/common/thread_common.c - avm

blob: d515912312f119388052bcb1366dc04bd5f5748a [file] [log] [blame]

Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	1	/*
Yaowu Xu	2ab7ff0	2016-09-02 12:04:54 -0700	[diff] [blame]	2	* Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	3	*
Yaowu Xu	2ab7ff0	2016-09-02 12:04:54 -0700	[diff] [blame]	4	* This source code is subject to the terms of the BSD 2 Clause License and
				5	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
				6	* was not distributed with this source code in the LICENSE file, you can
				7	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
				8	* Media Patent License 1.0 was not distributed with this source code in the
				9	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	10	*/
				11
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	12	#include "./aom_config.h"
				13	#include "aom_dsp/aom_dsp_common.h"
				14	#include "aom_mem/aom_mem.h"
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	15	#include "av1/common/entropymode.h"
				16	#include "av1/common/thread_common.h"
				17	#include "av1/common/reconinter.h"
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	18
				19	#if CONFIG_MULTITHREAD
				20	static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
				21	const int kMaxTryLocks = 4000;
				22	int locked = 0;
				23	int i;
				24
				25	for (i = 0; i < kMaxTryLocks; ++i) {
				26	if (!pthread_mutex_trylock(mutex)) {
				27	locked = 1;
				28	break;
				29	}
				30	}
				31
				32	if (!locked) pthread_mutex_lock(mutex);
				33	}
				34	#endif // CONFIG_MULTITHREAD
				35
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	36	static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c) {
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	37	#if CONFIG_MULTITHREAD
				38	const int nsync = lf_sync->sync_range;
				39
				40	if (r && !(c & (nsync - 1))) {
				41	pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1];
				42	mutex_lock(mutex);
				43
				44	while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
				45	pthread_cond_wait(&lf_sync->cond_[r - 1], mutex);
				46	}
				47	pthread_mutex_unlock(mutex);
				48	}
				49	#else
				50	(void)lf_sync;
				51	(void)r;
				52	(void)c;
				53	#endif // CONFIG_MULTITHREAD
				54	}
				55
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	56	static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	57	const int sb_cols) {
				58	#if CONFIG_MULTITHREAD
				59	const int nsync = lf_sync->sync_range;
				60	int cur;
				61	// Only signal when there are enough filtered SB for next row to run.
				62	int sig = 1;
				63
				64	if (c < sb_cols - 1) {
				65	cur = c;
				66	if (c % nsync) sig = 0;
				67	} else {
				68	cur = sb_cols + nsync;
				69	}
				70
				71	if (sig) {
				72	mutex_lock(&lf_sync->mutex_[r]);
				73
				74	lf_sync->cur_sb_col[r] = cur;
				75
				76	pthread_cond_signal(&lf_sync->cond_[r]);
				77	pthread_mutex_unlock(&lf_sync->mutex_[r]);
				78	}
				79	#else
				80	(void)lf_sync;
				81	(void)r;
				82	(void)c;
				83	(void)sb_cols;
				84	#endif // CONFIG_MULTITHREAD
				85	}
				86
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	87	#if !CONFIG_EXT_PARTITION_TYPES
				88	static INLINE enum lf_path get_loop_filter_path(
				89	int y_only, struct macroblockd_plane planes[MAX_MB_PLANE]) {
				90	if (y_only)
				91	return LF_PATH_444;
				92	else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
				93	return LF_PATH_420;
				94	else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
				95	return LF_PATH_444;
				96	else
				97	return LF_PATH_SLOW;
				98	}
				99
				100	static INLINE void loop_filter_block_plane_ver(
				101	AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
				102	MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
				103	LOOP_FILTER_MASK *lfm) {
				104	if (plane == 0) {
				105	av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, lfm);
				106	} else {
				107	switch (path) {
				108	case LF_PATH_420:
				109	av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, lfm);
				110	break;
				111	case LF_PATH_444:
				112	av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, lfm);
				113	break;
				114	case LF_PATH_SLOW:
Ryan Lei	6f8c1a7	2016-10-26 10:52:12 -0700	[diff] [blame]	115	av1_filter_block_plane_non420_ver(cm, &planes[plane], mi, mi_row,
Jingning Han	6e4955d	2017-05-30 22:54:48 -0700	[diff] [blame]	116	mi_col, plane);
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	117	break;
				118	}
				119	}
				120	}
				121
				122	static INLINE void loop_filter_block_plane_hor(
				123	AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
Ryan Lei	6f8c1a7	2016-10-26 10:52:12 -0700	[diff] [blame]	124	MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
				125	LOOP_FILTER_MASK *lfm) {
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	126	if (plane == 0) {
				127	av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, lfm);
				128	} else {
				129	switch (path) {
				130	case LF_PATH_420:
				131	av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, lfm);
				132	break;
				133	case LF_PATH_444:
				134	av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, lfm);
				135	break;
				136	case LF_PATH_SLOW:
Ryan Lei	6f8c1a7	2016-10-26 10:52:12 -0700	[diff] [blame]	137	av1_filter_block_plane_non420_hor(cm, &planes[plane], mi, mi_row,
Jingning Han	6e4955d	2017-05-30 22:54:48 -0700	[diff] [blame]	138	mi_col, plane);
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	139	break;
				140	}
				141	}
				142	}
				143	#endif
				144	// Row-based multi-threaded loopfilter hook
				145	#if CONFIG_PARALLEL_DEBLOCKING
				146	static int loop_filter_ver_row_worker(AV1LfSync *const lf_sync,
				147	LFWorkerData *const lf_data) {
				148	const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	149	int mi_row, mi_col;
				150	#if !CONFIG_EXT_PARTITION_TYPES
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	151	enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
				152	#endif
				153	for (mi_row = lf_data->start; mi_row < lf_data->stop;
				154	mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
				155	MODE_INFO **const mi =
				156	lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
				157
				158	for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
				159	mi_col += lf_data->cm->mib_size) {
				160	LOOP_FILTER_MASK lfm;
				161	int plane;
				162
Jingning Han	91d9a79	2017-04-18 12:01:52 -0700	[diff] [blame]	163	av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
				164	lf_data->frame_buffer, mi_row, mi_col);
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	165	av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
				166	lf_data->cm->mi_stride, &lfm);
				167
				168	#if CONFIG_EXT_PARTITION_TYPES
				169	for (plane = 0; plane < num_planes; ++plane)
				170	av1_filter_block_plane_non420_ver(lf_data->cm, &lf_data->planes[plane],
Jingning Han	6e4955d	2017-05-30 22:54:48 -0700	[diff] [blame]	171	mi + mi_col, mi_row, mi_col, plane);
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	172	#else
				173
				174	for (plane = 0; plane < num_planes; ++plane)
Ryan Lei	6f8c1a7	2016-10-26 10:52:12 -0700	[diff] [blame]	175	loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane,
				176	mi + mi_col, mi_row, mi_col, path, &lfm);
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	177	#endif
				178	}
				179	}
				180	return 1;
				181	}
				182
				183	static int loop_filter_hor_row_worker(AV1LfSync *const lf_sync,
				184	LFWorkerData *const lf_data) {
				185	const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
				186	const int sb_cols =
				187	mi_cols_aligned_to_sb(lf_data->cm) >> lf_data->cm->mib_size_log2;
				188	int mi_row, mi_col;
				189	#if !CONFIG_EXT_PARTITION_TYPES
				190	enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
				191	#endif
				192
				193	for (mi_row = lf_data->start; mi_row < lf_data->stop;
				194	mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
				195	MODE_INFO **const mi =
				196	lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
				197
				198	for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
				199	mi_col += lf_data->cm->mib_size) {
				200	const int r = mi_row >> lf_data->cm->mib_size_log2;
				201	const int c = mi_col >> lf_data->cm->mib_size_log2;
				202	LOOP_FILTER_MASK lfm;
				203	int plane;
				204
				205	// TODO(wenhao.zhang@intel.com): For better parallelization, reorder
				206	// the outer loop to column-based and remove the synchronizations here.
				207	sync_read(lf_sync, r, c);
				208
Jingning Han	91d9a79	2017-04-18 12:01:52 -0700	[diff] [blame]	209	av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
				210	lf_data->frame_buffer, mi_row, mi_col);
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	211	av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
				212	lf_data->cm->mi_stride, &lfm);
				213	#if CONFIG_EXT_PARTITION_TYPES
				214	for (plane = 0; plane < num_planes; ++plane)
				215	av1_filter_block_plane_non420_hor(lf_data->cm, &lf_data->planes[plane],
Jingning Han	6e4955d	2017-05-30 22:54:48 -0700	[diff] [blame]	216	mi + mi_col, mi_row, mi_col, plane);
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	217	#else
				218	for (plane = 0; plane < num_planes; ++plane)
Ryan Lei	6f8c1a7	2016-10-26 10:52:12 -0700	[diff] [blame]	219	loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane,
				220	mi + mi_col, mi_row, mi_col, path, &lfm);
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	221	#endif
				222	sync_write(lf_sync, r, c, sb_cols);
				223	}
				224	}
				225	return 1;
				226	}
				227	#else // CONFIG_PARALLEL_DEBLOCKING
				228	static int loop_filter_row_worker(AV1LfSync *const lf_sync,
				229	LFWorkerData *const lf_data) {
				230	const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
				231	const int sb_cols =
				232	mi_cols_aligned_to_sb(lf_data->cm) >> lf_data->cm->mib_size_log2;
				233	int mi_row, mi_col;
				234	#if !CONFIG_EXT_PARTITION_TYPES
				235	enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	236	#endif // !CONFIG_EXT_PARTITION_TYPES
				237
				238	#if CONFIG_EXT_PARTITION
				239	printf(
				240	"STOPPING: This code has not been modified to work with the "
				241	"extended coding unit size experiment");
				242	exit(EXIT_FAILURE);
				243	#endif // CONFIG_EXT_PARTITION
				244
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	245	for (mi_row = lf_data->start; mi_row < lf_data->stop;
				246	mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
				247	MODE_INFO **const mi =
				248	lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	249
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	250	for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
				251	mi_col += lf_data->cm->mib_size) {
				252	const int r = mi_row >> lf_data->cm->mib_size_log2;
				253	const int c = mi_col >> lf_data->cm->mib_size_log2;
				254	#if !CONFIG_EXT_PARTITION_TYPES
				255	LOOP_FILTER_MASK lfm;
				256	#endif
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	257	int plane;
				258
				259	sync_read(lf_sync, r, c);
				260
Jingning Han	91d9a79	2017-04-18 12:01:52 -0700	[diff] [blame]	261	av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
				262	lf_data->frame_buffer, mi_row, mi_col);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	263	#if CONFIG_EXT_PARTITION_TYPES
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	264	for (plane = 0; plane < num_planes; ++plane) {
				265	av1_filter_block_plane_non420_ver(lf_data->cm, &lf_data->planes[plane],
Jingning Han	6e4955d	2017-05-30 22:54:48 -0700	[diff] [blame]	266	mi + mi_col, mi_row, mi_col, plane);
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	267	av1_filter_block_plane_non420_hor(lf_data->cm, &lf_data->planes[plane],
Jingning Han	6e4955d	2017-05-30 22:54:48 -0700	[diff] [blame]	268	mi + mi_col, mi_row, mi_col, plane);
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	269	}
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	270	#else
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	271	av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
				272	lf_data->cm->mi_stride, &lfm);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	273
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	274	for (plane = 0; plane < num_planes; ++plane) {
Ryan Lei	6f8c1a7	2016-10-26 10:52:12 -0700	[diff] [blame]	275	loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane,
				276	mi + mi_col, mi_row, mi_col, path, &lfm);
				277	loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane,
				278	mi + mi_col, mi_row, mi_col, path, &lfm);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	279	}
				280	#endif // CONFIG_EXT_PARTITION_TYPES
				281	sync_write(lf_sync, r, c, sb_cols);
				282	}
				283	}
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	284	return 1;
				285	}
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	286	#endif // CONFIG_PARALLEL_DEBLOCKING
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	287
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	288	static void loop_filter_rows_mt(YV12_BUFFER_CONFIG frame, AV1_COMMON cm,
Yaowu Xu	989dd5b	2017-10-11 21:59:46 -0700	[diff] [blame]	289	struct macroblockd_plane *planes, int start,
				290	int stop, int y_only, AVxWorker *workers,
				291	int nworkers, AV1LfSync *lf_sync) {
Debargha Mukherjee	e36a08c	2017-10-08 21:17:31 -0700	[diff] [blame]	292	#if CONFIG_EXT_PARTITION
				293	printf(
				294	"STOPPING: This code has not been modified to work with the "
				295	"extended coding unit size experiment");
				296	exit(EXIT_FAILURE);
				297	#endif // CONFIG_EXT_PARTITION
				298
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	299	const AVxWorkerInterface *const winterface = aom_get_worker_interface();
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	300	// Number of superblock rows and cols
				301	const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2;
				302	// Decoder may allocate more threads than number of tiles based on user's
				303	// input.
				304	const int tile_cols = cm->tile_cols;
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	305	const int num_workers = AOMMIN(nworkers, tile_cols);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	306	int i;
				307
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	308	if (!lf_sync->sync_range \|\| sb_rows != lf_sync->rows \|\|
				309	num_workers > lf_sync->num_workers) {
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	310	av1_loop_filter_dealloc(lf_sync);
				311	av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	312	}
				313
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	314	// Set up loopfilter thread data.
				315	// The decoder is capping num_workers because it has been observed that using
				316	// more threads on the loopfilter than there are cores will hurt performance
				317	// on Android. This is because the system will only schedule the tile decode
				318	// workers on cores equal to the number of tile columns. Then if the decoder
				319	// tries to use more threads for the loopfilter, it will hurt performance
				320	// because of contention. If the multithreading code changes in the future
				321	// then the number of workers used by the loopfilter should be revisited.
				322
				323	#if CONFIG_PARALLEL_DEBLOCKING
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	324	// Initialize cur_sb_col to -1 for all SB rows.
				325	memset(lf_sync->cur_sb_col, -1, sizeof(lf_sync->cur_sb_col) sb_rows);
				326
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	327	// Filter all the vertical edges in the whole frame
				328	for (i = 0; i < num_workers; ++i) {
				329	AVxWorker *const worker = &workers[i];
				330	LFWorkerData *const lf_data = &lf_sync->lfdata[i];
				331
				332	worker->hook = (AVxWorkerHook)loop_filter_ver_row_worker;
				333	worker->data1 = lf_sync;
				334	worker->data2 = lf_data;
				335
				336	// Loopfilter data
				337	av1_loop_filter_data_reset(lf_data, frame, cm, planes);
				338	lf_data->start = start + i * cm->mib_size;
				339	lf_data->stop = stop;
				340	lf_data->y_only = y_only;
				341
				342	// Start loopfiltering
				343	if (i == num_workers - 1) {
				344	winterface->execute(worker);
				345	} else {
				346	winterface->launch(worker);
				347	}
				348	}
				349
				350	// Wait till all rows are finished
				351	for (i = 0; i < num_workers; ++i) {
				352	winterface->sync(&workers[i]);
				353	}
				354
				355	memset(lf_sync->cur_sb_col, -1, sizeof(lf_sync->cur_sb_col) sb_rows);
				356	// Filter all the horizontal edges in the whole frame
				357	for (i = 0; i < num_workers; ++i) {
				358	AVxWorker *const worker = &workers[i];
				359	LFWorkerData *const lf_data = &lf_sync->lfdata[i];
				360
				361	worker->hook = (AVxWorkerHook)loop_filter_hor_row_worker;
				362	worker->data1 = lf_sync;
				363	worker->data2 = lf_data;
				364
				365	// Loopfilter data
				366	av1_loop_filter_data_reset(lf_data, frame, cm, planes);
				367	lf_data->start = start + i * cm->mib_size;
				368	lf_data->stop = stop;
				369	lf_data->y_only = y_only;
				370
				371	// Start loopfiltering
				372	if (i == num_workers - 1) {
				373	winterface->execute(worker);
				374	} else {
				375	winterface->launch(worker);
				376	}
				377	}
				378
				379	// Wait till all rows are finished
				380	for (i = 0; i < num_workers; ++i) {
				381	winterface->sync(&workers[i]);
				382	}
				383	#else // CONFIG_PARALLEL_DEBLOCKING
				384	// Initialize cur_sb_col to -1 for all SB rows.
				385	memset(lf_sync->cur_sb_col, -1, sizeof(lf_sync->cur_sb_col) sb_rows);
				386
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	387	for (i = 0; i < num_workers; ++i) {
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	388	AVxWorker *const worker = &workers[i];
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	389	LFWorkerData *const lf_data = &lf_sync->lfdata[i];
				390
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	391	worker->hook = (AVxWorkerHook)loop_filter_row_worker;
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	392	worker->data1 = lf_sync;
				393	worker->data2 = lf_data;
				394
				395	// Loopfilter data
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	396	av1_loop_filter_data_reset(lf_data, frame, cm, planes);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	397	lf_data->start = start + i * cm->mib_size;
				398	lf_data->stop = stop;
				399	lf_data->y_only = y_only;
				400
				401	// Start loopfiltering
				402	if (i == num_workers - 1) {
				403	winterface->execute(worker);
				404	} else {
				405	winterface->launch(worker);
				406	}
				407	}
				408
				409	// Wait till all rows are finished
				410	for (i = 0; i < num_workers; ++i) {
				411	winterface->sync(&workers[i]);
				412	}
Ryan Lei	1514948	2016-10-25 18:48:43 -0700	[diff] [blame]	413	#endif // CONFIG_PARALLEL_DEBLOCKING
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	414	}
				415
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	416	void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG frame, AV1_COMMON cm,
Yaowu Xu	989dd5b	2017-10-11 21:59:46 -0700	[diff] [blame]	417	struct macroblockd_plane *planes,
Cheng Chen	179479f	2017-08-04 10:56:39 -0700	[diff] [blame]	418	int frame_filter_level,
Cheng Chen	13fc819	2017-08-19 11:49:28 -0700	[diff] [blame]	419	#if CONFIG_LOOPFILTER_LEVEL
Cheng Chen	179479f	2017-08-04 10:56:39 -0700	[diff] [blame]	420	int frame_filter_level_r,
				421	#endif
				422	int y_only, int partial_frame, AVxWorker *workers,
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	423	int num_workers, AV1LfSync *lf_sync) {
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	424	int start_mi_row, end_mi_row, mi_rows_to_filter;
				425
				426	if (!frame_filter_level) return;
				427
				428	start_mi_row = 0;
				429	mi_rows_to_filter = cm->mi_rows;
				430	if (partial_frame && cm->mi_rows > 8) {
				431	start_mi_row = cm->mi_rows >> 1;
				432	start_mi_row &= 0xfffffff8;
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	433	mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	434	}
				435	end_mi_row = start_mi_row + mi_rows_to_filter;
Cheng Chen	13fc819	2017-08-19 11:49:28 -0700	[diff] [blame]	436	#if CONFIG_LOOPFILTER_LEVEL
Cheng Chen	d8184da	2017-09-26 18:15:22 -0700	[diff] [blame]	437	av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level_r,
				438	y_only);
Cheng Chen	179479f	2017-08-04 10:56:39 -0700	[diff] [blame]	439	#else
				440	av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level);
Cheng Chen	13fc819	2017-08-19 11:49:28 -0700	[diff] [blame]	441	#endif // CONFIG_LOOPFILTER_LEVEL
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	442	loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, y_only,
				443	workers, num_workers, lf_sync);
				444	}
				445
				446	// Set up nsync by width.
				447	static INLINE int get_sync_range(int width) {
				448	// nsync numbers are picked by testing. For example, for 4k
				449	// video, using 4 gives best performance.
				450	if (width < 640)
				451	return 1;
				452	else if (width <= 1280)
				453	return 2;
				454	else if (width <= 4096)
				455	return 4;
				456	else
				457	return 8;
				458	}
				459
				460	// Allocate memory for lf row synchronization
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	461	void av1_loop_filter_alloc(AV1LfSync lf_sync, AV1_COMMON cm, int rows,
				462	int width, int num_workers) {
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	463	lf_sync->rows = rows;
				464	#if CONFIG_MULTITHREAD
				465	{
				466	int i;
				467
				468	CHECK_MEM_ERROR(cm, lf_sync->mutex_,
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	469	aom_malloc(sizeof(lf_sync->mutex_) rows));
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	470	if (lf_sync->mutex_) {
				471	for (i = 0; i < rows; ++i) {
				472	pthread_mutex_init(&lf_sync->mutex_[i], NULL);
				473	}
				474	}
				475
				476	CHECK_MEM_ERROR(cm, lf_sync->cond_,
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	477	aom_malloc(sizeof(lf_sync->cond_) rows));
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	478	if (lf_sync->cond_) {
				479	for (i = 0; i < rows; ++i) {
				480	pthread_cond_init(&lf_sync->cond_[i], NULL);
				481	}
				482	}
				483	}
				484	#endif // CONFIG_MULTITHREAD
				485
				486	CHECK_MEM_ERROR(cm, lf_sync->lfdata,
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	487	aom_malloc(num_workers * sizeof(*lf_sync->lfdata)));
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	488	lf_sync->num_workers = num_workers;
				489
				490	CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	491	aom_malloc(sizeof(lf_sync->cur_sb_col) rows));
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	492
				493	// Set up nsync.
				494	lf_sync->sync_range = get_sync_range(width);
				495	}
				496
				497	// Deallocate lf synchronization related mutex and data
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	498	void av1_loop_filter_dealloc(AV1LfSync *lf_sync) {
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	499	if (lf_sync != NULL) {
				500	#if CONFIG_MULTITHREAD
				501	int i;
				502
				503	if (lf_sync->mutex_ != NULL) {
				504	for (i = 0; i < lf_sync->rows; ++i) {
				505	pthread_mutex_destroy(&lf_sync->mutex_[i]);
				506	}
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	507	aom_free(lf_sync->mutex_);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	508	}
				509	if (lf_sync->cond_ != NULL) {
				510	for (i = 0; i < lf_sync->rows; ++i) {
				511	pthread_cond_destroy(&lf_sync->cond_[i]);
				512	}
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	513	aom_free(lf_sync->cond_);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	514	}
				515	#endif // CONFIG_MULTITHREAD
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	516	aom_free(lf_sync->lfdata);
				517	aom_free(lf_sync->cur_sb_col);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	518	// clear the structure as the source of this call may be a resize in which
				519	// case this call will be followed by an _alloc() which may fail.
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	520	av1_zero(*lf_sync);
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	521	}
				522	}
				523
				524	// Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int'
				525	// members, so we treat it as an array, and sum over the whole length.
Debargha Mukherjee	5802ebe	2016-12-21 04:17:24 -0800	[diff] [blame]	526	void av1_accumulate_frame_counts(FRAME_COUNTS *acc_counts,
				527	FRAME_COUNTS *counts) {
				528	unsigned int const acc = (unsigned int )acc_counts;
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	529	const unsigned int const cnt = (unsigned int )counts;
				530
				531	const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int);
				532	unsigned int i;
				533
				534	for (i = 0; i < n_counts; i++) acc[i] += cnt[i];
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	535	}