blob: 4cf23f2fc3174ec8117a5bd3fe877be2aa1c1525 [file] [log] [blame]
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AOM_AV1_COMMON_THREAD_COMMON_H_
#define AOM_AV1_COMMON_THREAD_COMMON_H_
#include "config/aom_config.h"
#include "av1/common/av1_loopfilter.h"
#include "av1/common/cdef.h"
#include "aom_util/aom_thread.h"
#ifdef __cplusplus
extern "C" {
#endif
struct AV1Common;
typedef struct AV1LfMTInfo {
int mi_row;
int plane;
int dir;
int lpf_opt_level;
} AV1LfMTInfo;
// Loopfilter row synchronization
typedef struct AV1LfSyncData {
#if CONFIG_MULTITHREAD
pthread_mutex_t *mutex_[MAX_MB_PLANE];
pthread_cond_t *cond_[MAX_MB_PLANE];
#endif
// Allocate memory to store the loop-filtered superblock index in each row.
int *cur_sb_col[MAX_MB_PLANE];
// The optimal sync_range for different resolution and platform should be
// determined by testing. Currently, it is chosen to be a power-of-2 number.
int sync_range;
int rows;
// Row-based parallel loopfilter data
LFWorkerData *lfdata;
int num_workers;
#if CONFIG_MULTITHREAD
pthread_mutex_t *job_mutex;
#endif
AV1LfMTInfo *job_queue;
int jobs_enqueued;
int jobs_dequeued;
} AV1LfSync;
typedef struct AV1LrMTInfo {
int v_start;
int v_end;
int lr_unit_row;
int plane;
int sync_mode;
int v_copy_start;
int v_copy_end;
} AV1LrMTInfo;
typedef struct LoopRestorationWorkerData {
int32_t *rst_tmpbuf;
void *rlbs;
void *lr_ctxt;
int do_extend_border;
} LRWorkerData;
// Looprestoration row synchronization
typedef struct AV1LrSyncData {
#if CONFIG_MULTITHREAD
pthread_mutex_t *mutex_[MAX_MB_PLANE];
pthread_cond_t *cond_[MAX_MB_PLANE];
#endif
// Allocate memory to store the loop-restoration block index in each row.
int *cur_sb_col[MAX_MB_PLANE];
// The optimal sync_range for different resolution and platform should be
// determined by testing. Currently, it is chosen to be a power-of-2 number.
int sync_range;
int rows;
int num_planes;
int num_workers;
#if CONFIG_MULTITHREAD
pthread_mutex_t *job_mutex;
#endif
// Row-based parallel loopfilter data
LRWorkerData *lrworkerdata;
AV1LrMTInfo *job_queue;
int jobs_enqueued;
int jobs_dequeued;
} AV1LrSync;
typedef struct AV1CdefWorker {
AV1_COMMON *cm;
MACROBLOCKD *xd;
uint16_t *colbuf[MAX_MB_PLANE];
uint16_t *srcbuf;
uint16_t *linebuf[MAX_MB_PLANE];
cdef_init_fb_row_t cdef_init_fb_row_fn;
int do_extend_border;
} AV1CdefWorkerData;
typedef struct AV1CdefRowSync {
#if CONFIG_MULTITHREAD
pthread_mutex_t *row_mutex_;
pthread_cond_t *row_cond_;
#endif // CONFIG_MULTITHREAD
int is_row_done;
} AV1CdefRowSync;
// Data related to CDEF search multi-thread synchronization.
typedef struct AV1CdefSyncData {
#if CONFIG_MULTITHREAD
// Mutex lock used while dispatching jobs.
pthread_mutex_t *mutex_;
#endif // CONFIG_MULTITHREAD
// Data related to CDEF row mt sync information
AV1CdefRowSync *cdef_row_mt;
// Flag to indicate all blocks are processed and end of frame is reached
int end_of_frame;
// Row index in units of 64x64 block
int fbr;
// Column index in units of 64x64 block
int fbc;
} AV1CdefSync;
void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
AV1CdefWorkerData *const cdef_worker,
AVxWorker *const workers, AV1CdefSync *const cdef_sync,
int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
int do_extend_border);
void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
const MACROBLOCKD *const xd,
CdefBlockInfo *const fb_info,
uint16_t **const linebuf, uint16_t *const src,
struct AV1CdefSyncData *const cdef_sync, int fbr);
void av1_cdef_copy_sb8_16(const AV1_COMMON *const cm, uint16_t *const dst,
int dstride, const uint8_t *src, int src_voffset,
int src_hoffset, int sstride, int vsize, int hsize);
void av1_cdef_copy_sb8_16_lowbd(uint16_t *const dst, int dstride,
const uint8_t *src, int src_voffset,
int src_hoffset, int sstride, int vsize,
int hsize);
void av1_cdef_copy_sb8_16_highbd(uint16_t *const dst, int dstride,
const uint8_t *src, int src_voffset,
int src_hoffset, int sstride, int vsize,
int hsize);
void av1_alloc_cdef_sync(AV1_COMMON *const cm, AV1CdefSync *cdef_sync,
int num_workers);
void av1_free_cdef_sync(AV1CdefSync *cdef_sync);
// Deallocate loopfilter synchronization related mutex and data.
void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
int width, int num_workers);
void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
struct macroblockd *xd, int plane_start,
int plane_end, int partial_frame,
AVxWorker *workers, int num_workers,
AV1LfSync *lf_sync, int lpf_opt_level);
void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
struct AV1Common *cm,
int optimized_lr, AVxWorker *workers,
int num_workers, AV1LrSync *lr_sync,
void *lr_ctxt, int do_extend_border);
void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers);
void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
int num_workers, int num_rows_lr,
int num_planes, int width);
int av1_get_intrabc_extra_top_right_sb_delay(const AV1_COMMON *cm);
void av1_thread_loop_filter_rows(
const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
struct macroblockd_plane *planes, MACROBLOCKD *xd, int mi_row, int plane,
int dir, int lpf_opt_level, AV1LfSync *const lf_sync,
AV1_DEBLOCKING_PARAMETERS *params_buf, TX_SIZE *tx_buf, int mib_size_log2);
static AOM_FORCE_INLINE bool skip_loop_filter_plane(const int planes_to_lf[3],
int plane,
int lpf_opt_level) {
// If LPF_PICK_METHOD is LPF_PICK_FROM_Q, we have the option to filter both
// chroma planes together
if (lpf_opt_level == 2) {
if (plane == AOM_PLANE_Y) {
return !planes_to_lf[plane];
}
if (plane == AOM_PLANE_U) {
// U and V are handled together
return !planes_to_lf[1] && !planes_to_lf[2];
}
assert(plane == AOM_PLANE_V);
if (plane == AOM_PLANE_V) {
// V is handled when u is filtered
return true;
}
}
// Normal operation mode
return !planes_to_lf[plane];
}
static AOM_INLINE void enqueue_lf_jobs(AV1LfSync *lf_sync, int start, int stop,
const int planes_to_lf[3],
int lpf_opt_level,
int num_mis_in_lpf_unit_height) {
int mi_row, plane, dir;
AV1LfMTInfo *lf_job_queue = lf_sync->job_queue;
lf_sync->jobs_enqueued = 0;
lf_sync->jobs_dequeued = 0;
// Launch all vertical jobs first, as they are blocking the horizontal ones.
// Launch top row jobs for all planes first, in case the output can be
// partially reconstructed row by row.
for (dir = 0; dir < 2; ++dir) {
for (mi_row = start; mi_row < stop; mi_row += num_mis_in_lpf_unit_height) {
for (plane = 0; plane < 3; ++plane) {
if (skip_loop_filter_plane(planes_to_lf, plane, lpf_opt_level)) {
continue;
}
if (!planes_to_lf[plane]) continue;
lf_job_queue->mi_row = mi_row;
lf_job_queue->plane = plane;
lf_job_queue->dir = dir;
lf_job_queue->lpf_opt_level = lpf_opt_level;
lf_job_queue++;
lf_sync->jobs_enqueued++;
}
}
}
}
static AOM_INLINE void loop_filter_frame_mt_init(
AV1_COMMON *cm, int start_mi_row, int end_mi_row, const int planes_to_lf[3],
int num_workers, AV1LfSync *lf_sync, int lpf_opt_level,
int num_mis_in_lpf_unit_height_log2) {
// Number of superblock rows
const int sb_rows =
CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, num_mis_in_lpf_unit_height_log2);
if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
num_workers > lf_sync->num_workers) {
av1_loop_filter_dealloc(lf_sync);
av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
}
// Initialize cur_sb_col to -1 for all SB rows.
for (int i = 0; i < MAX_MB_PLANE; i++) {
memset(lf_sync->cur_sb_col[i], -1,
sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows);
}
enqueue_lf_jobs(lf_sync, start_mi_row, end_mi_row, planes_to_lf,
lpf_opt_level, (1 << num_mis_in_lpf_unit_height_log2));
}
static AOM_INLINE AV1LfMTInfo *get_lf_job_info(AV1LfSync *lf_sync) {
AV1LfMTInfo *cur_job_info = NULL;
#if CONFIG_MULTITHREAD
pthread_mutex_lock(lf_sync->job_mutex);
if (lf_sync->jobs_dequeued < lf_sync->jobs_enqueued) {
cur_job_info = lf_sync->job_queue + lf_sync->jobs_dequeued;
lf_sync->jobs_dequeued++;
}
pthread_mutex_unlock(lf_sync->job_mutex);
#else
(void)lf_sync;
#endif
return cur_job_info;
}
static AOM_INLINE void loop_filter_data_reset(LFWorkerData *lf_data,
YV12_BUFFER_CONFIG *frame_buffer,
struct AV1Common *cm,
MACROBLOCKD *xd) {
struct macroblockd_plane *pd = xd->plane;
lf_data->frame_buffer = frame_buffer;
lf_data->cm = cm;
lf_data->xd = xd;
for (int i = 0; i < MAX_MB_PLANE; i++) {
memcpy(&lf_data->planes[i].dst, &pd[i].dst, sizeof(lf_data->planes[i].dst));
lf_data->planes[i].subsampling_x = pd[i].subsampling_x;
lf_data->planes[i].subsampling_y = pd[i].subsampling_y;
}
}
static AOM_INLINE int check_planes_to_loop_filter(const struct loopfilter *lf,
int *planes_to_lf,
int plane_start,
int plane_end) {
// For each luma and chroma plane, whether to filter it or not.
planes_to_lf[0] = (lf->filter_level[0] || lf->filter_level[1]) &&
plane_start <= 0 && 0 < plane_end;
planes_to_lf[1] = lf->filter_level_u && plane_start <= 1 && 1 < plane_end;
planes_to_lf[2] = lf->filter_level_v && plane_start <= 2 && 2 < plane_end;
// If the luma plane is purposely not filtered, neither are the chroma
// planes.
if (!planes_to_lf[0] && plane_start <= 0 && 0 < plane_end) return 0;
// Early exit.
if (!planes_to_lf[0] && !planes_to_lf[1] && !planes_to_lf[2]) return 0;
return 1;
}
#ifdef __cplusplus
} // extern "C"
#endif
#endif // AOM_AV1_COMMON_THREAD_COMMON_H_