Modify memory allocations in cdef frame to facilitate MT
This CL modifies the memory allocation of colbuf and srcbuf
to suit row-level multi-threading.
Change-Id: I3761cf4ea23ffd0ea83fefd4ba68c841b2e02330
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index d8594ed..acc6280 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -53,10 +53,16 @@
}
void av1_free_cdef_linebuf(AV1_COMMON *const cm) {
+ if (cm->cdef_info.srcbuf != NULL) aom_free(cm->cdef_info.srcbuf);
+ cm->cdef_info.srcbuf = NULL;
for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
if (cm->cdef_info.linebuf[plane] != NULL)
aom_free(cm->cdef_info.linebuf[plane]);
cm->cdef_info.linebuf[plane] = NULL;
+
+ if (cm->cdef_info.colbuf[plane] != NULL)
+ aom_free(cm->cdef_info.colbuf[plane]);
+ cm->cdef_info.colbuf[plane] = NULL;
}
}
@@ -103,6 +109,22 @@
(CDEF_VBORDER << 1) * stride));
}
}
+
+ if (cm->cdef_info.srcbuf == NULL)
+ CHECK_MEM_ERROR(
+ cm, cm->cdef_info.srcbuf,
+ aom_malloc(sizeof(*cm->cdef_info.srcbuf) * CDEF_INBUF_SIZE));
+
+ for (int plane = 0; plane < num_planes; plane++) {
+ const int shift = plane == AOM_PLANE_Y ? 0 : cm->seq_params.subsampling_x;
+ const int block_height =
+ (CDEF_BLOCKSIZE << (MI_SIZE_LOG2 - shift)) * 2 * CDEF_VBORDER;
+
+ if (cm->cdef_info.colbuf[plane] == NULL)
+ CHECK_MEM_ERROR(cm, cm->cdef_info.colbuf[plane],
+ aom_malloc(sizeof(*cm->cdef_info.colbuf[plane]) *
+ block_height * CDEF_HBORDER));
+ }
}
#if !CONFIG_REALTIME_ONLY
diff --git a/av1/common/cdef.c b/av1/common/cdef.c
index c7ba6c1..97c643b 100644
--- a/av1/common/cdef.c
+++ b/av1/common/cdef.c
@@ -28,7 +28,6 @@
uint16_t *src;
uint16_t *top_linebuf[MAX_MB_PLANE];
uint8_t *dst;
- uint16_t *colbuf[MAX_MB_PLANE];
cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
int xdec;
@@ -152,17 +151,18 @@
// Inputs:
// cm: Pointer to common structure.
// fb_info: Pointer to the CDEF block-level parameter structure.
+// colbuf: Left column buffer for CDEF.
// cdef_left: Left block is filtered or not.
// fbc, fbr: col and row index of a block.
// plane: plane index Y/CB/CR.
// Returns:
// Nothing will be returned.
static void cdef_prepare_fb(AV1_COMMON *cm, CdefBlockInfo *fb_info,
- const int *cdef_left, int fbc, int fbr,
- uint8_t plane) {
+ uint16_t **const colbuf, const int *cdef_left,
+ int fbc, int fbr, uint8_t plane) {
const CommonModeInfoParams *const mi_params = &cm->mi_params;
uint16_t *src = fb_info->src;
- const int stride = mi_params->mi_cols << MI_SIZE_LOG2;
+ const int luma_stride = mi_params->mi_cols << MI_SIZE_LOG2;
const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
int cstart = 0;
@@ -173,6 +173,8 @@
int hsize = nhb << fb_info->mi_wide_l2;
int vsize = nvb << fb_info->mi_high_l2;
const uint16_t *top_linebuf = fb_info->top_linebuf[plane];
+ const int stride =
+ luma_stride >> (plane == AOM_PLANE_Y ? 0 : cm->seq_params.subsampling_x);
if (fbc == nhfb - 1)
cend = hsize;
@@ -221,12 +223,12 @@
if (*cdef_left) {
/* If we deringed the superblock on the left then we need to copy in
saved pixels. */
- copy_rect(src, CDEF_BSTRIDE, fb_info->colbuf[plane], CDEF_HBORDER,
+ copy_rect(src, CDEF_BSTRIDE, colbuf[plane], CDEF_HBORDER,
rend + CDEF_VBORDER, CDEF_HBORDER);
}
/* Saving pixels in case we need to dering the superblock on the
right. */
- copy_rect(fb_info->colbuf[plane], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE,
+ copy_rect(colbuf[plane], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE,
rend + CDEF_VBORDER, CDEF_HBORDER);
if (fb_info->frame_boundary[LEFT]) {
@@ -298,7 +300,8 @@
}
static void cdef_fb_col(AV1_COMMON *cm, MACROBLOCKD *xd, CdefBlockInfo *fb_info,
- int *cdef_left, int fbc, int fbr) {
+ uint16_t **const colbuf, int *cdef_left, int fbc,
+ int fbr) {
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int mbmi_cdef_strength =
mi_params
@@ -323,18 +326,19 @@
*cdef_left = 0;
return;
}
- cdef_prepare_fb(cm, fb_info, cdef_left, fbc, fbr, plane);
+ cdef_prepare_fb(cm, fb_info, colbuf, cdef_left, fbc, fbr, plane);
cdef_filter_fb(fb_info, plane, cm->seq_params.use_highbitdepth);
}
*cdef_left = 1;
}
-static INLINE void cdef_init_fb_row(AV1_COMMON *cm, const MACROBLOCKD *const xd,
- CdefBlockInfo *const fb_info,
- uint16_t **const linebuf, int fbr) {
+static INLINE void cdef_init_fb_row(AV1_COMMON *cm, MACROBLOCKD *const xd,
+ CdefBlockInfo *fb_info,
+ uint16_t **const linebuf,
+ uint16_t *const src, int fbr) {
const int num_planes = av1_num_planes(cm);
const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- const int stride = cm->mi_params.mi_cols << MI_SIZE_LOG2;
+ const int luma_stride = cm->mi_params.mi_cols << MI_SIZE_LOG2;
const bool ping_pong = fbr & 1;
// for the current filter block, it's top left corner mi structure (mi_tl)
// is first accessed to check whether the top and left boundaries are
@@ -352,9 +356,16 @@
else
fb_info->frame_boundary[BOTTOM] = 1;
+ fb_info->src = src;
+ fb_info->damping = cm->cdef_info.cdef_damping;
+ fb_info->coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
+ av1_zero(fb_info->dir);
+ av1_zero(fb_info->var);
+
for (uint8_t plane = 0; plane < num_planes; plane++) {
const int mi_high_l2 = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
const int offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2;
+ const int stride = luma_stride >> xd->plane[plane].subsampling_x;
// here ping-pong buffers are maintained for top linebuf
// to avoid linebuf over-write by consecutive row.
uint16_t *const top_linebuf =
@@ -369,64 +380,25 @@
}
}
-static void cdef_fb_row(AV1_COMMON *cm, MACROBLOCKD *xd, CdefBlockInfo *fb_info,
- uint16_t **const linebuf, int fbr) {
+static void cdef_fb_row(AV1_COMMON *cm, MACROBLOCKD *xd,
+ uint16_t **const linebuf, uint16_t **const colbuf,
+ uint16_t *const src, int fbr) {
+ CdefBlockInfo fb_info;
int cdef_left = 1;
const int nhfb = (cm->mi_params.mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- cdef_init_fb_row(cm, xd, fb_info, linebuf, fbr);
+ cdef_init_fb_row(cm, xd, &fb_info, linebuf, src, fbr);
for (int fbc = 0; fbc < nhfb; fbc++) {
- fb_info->frame_boundary[LEFT] = (MI_SIZE_64X64 * fbc == 0) ? 1 : 0;
+ fb_info.frame_boundary[LEFT] = (MI_SIZE_64X64 * fbc == 0) ? 1 : 0;
if (fbc != nhfb - 1)
- fb_info->frame_boundary[RIGHT] =
+ fb_info.frame_boundary[RIGHT] =
(MI_SIZE_64X64 * (fbc + 1) == cm->mi_params.mi_cols) ? 1 : 0;
else
- fb_info->frame_boundary[RIGHT] = 1;
- cdef_fb_col(cm, xd, fb_info, &cdef_left, fbc, fbr);
+ fb_info.frame_boundary[RIGHT] = 1;
+ cdef_fb_col(cm, xd, &fb_info, colbuf, &cdef_left, fbc, fbr);
}
}
-// Initialize the frame-level CDEF parameters.
-// Inputs:
-// frame: Pointer to input frame buffer.
-// cm: Pointer to common structure.
-// xd: Pointer to common current coding block structure.
-// fb_info: Pointer to the CDEF block-level parameter structure.
-// src: Intermediate input buffer for CDEF.
-// colbuf: Left line buffer for CDEF.
-// Returns:
-// Nothing will be returned.
-static void cdef_prepare_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- MACROBLOCKD *xd, CdefBlockInfo *fb_info,
- uint16_t *src, uint16_t **colbuf) {
- const int num_planes = av1_num_planes(cm);
- av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
- num_planes);
-
- for (uint8_t plane = 0; plane < num_planes; plane++) {
- const int mi_high_l2 = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
- const int block_height = (MI_SIZE_64X64 << mi_high_l2) + 2 * CDEF_VBORDER;
- colbuf[plane] = aom_malloc(
- sizeof(*colbuf) *
- ((CDEF_BLOCKSIZE << (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) +
- 2 * CDEF_VBORDER) *
- CDEF_HBORDER);
- fill_rect(colbuf[plane], CDEF_HBORDER, block_height, CDEF_HBORDER,
- CDEF_VERY_LARGE);
- fb_info->colbuf[plane] = colbuf[plane];
- }
-
- fb_info->src = src;
- fb_info->damping = cm->cdef_info.cdef_damping;
- fb_info->coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
- memset(fb_info->dir, 0, sizeof(fb_info->dir));
- memset(fb_info->var, 0, sizeof(fb_info->var));
-}
-
-static void cdef_free(uint16_t **colbuf, const int num_planes) {
- for (uint8_t plane = 0; plane < num_planes; plane++) aom_free(colbuf[plane]);
-}
-
// Perform CDEF on input frame.
// Inputs:
// frame: Pointer to input frame buffer.
@@ -436,16 +408,13 @@
// Nothing will be returned.
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MACROBLOCKD *xd) {
- DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
- uint16_t *colbuf[MAX_MB_PLANE] = { NULL };
- CdefBlockInfo fb_info;
const int num_planes = av1_num_planes(cm);
const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- cdef_prepare_frame(frame, cm, xd, &fb_info, src, colbuf);
+ av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
+ num_planes);
for (int fbr = 0; fbr < nvfb; fbr++)
- cdef_fb_row(cm, xd, &fb_info, cm->cdef_info.linebuf, fbr);
-
- cdef_free(colbuf, num_planes);
+ cdef_fb_row(cm, xd, cm->cdef_info.linebuf, cm->cdef_info.colbuf,
+ cm->cdef_info.srcbuf, fbr);
}