Facilitate multi-threading in loop restoration
Copy and sync functions have been added to facilitate
multi-threading in loop restoration.
Change-Id: I4b45f9671c8b4e2add3038670096c32f1a4e032d
diff --git a/aom_scale/aom_scale_rtcd.pl b/aom_scale/aom_scale_rtcd.pl
index 6d1255a..c5990b1 100644
--- a/aom_scale/aom_scale_rtcd.pl
+++ b/aom_scale/aom_scale_rtcd.pl
@@ -36,6 +36,12 @@
add_proto qw/void aom_yv12_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc";
+add_proto qw/void aom_yv12_partial_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc, int hstart, int hend, int vstart, int vend";
+
+add_proto qw/void aom_yv12_partial_copy_u/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";
+
+add_proto qw/void aom_yv12_partial_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";
+
add_proto qw/void aom_extend_frame_borders/, "struct yv12_buffer_config *ybf, const int num_planes";
specialize qw/aom_extend_frame_borders dspr2/;
diff --git a/aom_scale/generic/yv12extend.c b/aom_scale/generic/yv12extend.c
index 9d6eb76..ba18352 100644
--- a/aom_scale/generic/yv12extend.c
+++ b/aom_scale/generic/yv12extend.c
@@ -320,3 +320,92 @@
dst += dst_bc->uv_stride;
}
}
+
+void aom_yv12_partial_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc, int hstart,
+ int hend, int vstart, int vend) {
+ int row;
+ const uint8_t *src = src_ybc->y_buffer;
+ uint8_t *dst = dst_ybc->y_buffer;
+
+ if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) {
+ const uint16_t *src16 =
+ CONVERT_TO_SHORTPTR(src + vstart * src_ybc->y_stride + hstart);
+ uint16_t *dst16 =
+ CONVERT_TO_SHORTPTR(dst + vstart * dst_ybc->y_stride + hstart);
+ for (row = vstart; row < vend; ++row) {
+ memcpy(dst16, src16, (hend - hstart) * sizeof(uint16_t));
+ src16 += src_ybc->y_stride;
+ dst16 += dst_ybc->y_stride;
+ }
+ return;
+ }
+ src = (src + vstart * src_ybc->y_stride + hstart);
+ dst = (dst + vstart * dst_ybc->y_stride + hstart);
+
+ for (row = vstart; row < vend; ++row) {
+ memcpy(dst, src, (hend - hstart));
+ src += src_ybc->y_stride;
+ dst += dst_ybc->y_stride;
+ }
+}
+
+void aom_yv12_partial_copy_u_c(const YV12_BUFFER_CONFIG *src_bc,
+ YV12_BUFFER_CONFIG *dst_bc, int hstart, int hend,
+ int vstart, int vend) {
+ int row;
+ const uint8_t *src = src_bc->u_buffer;
+ uint8_t *dst = dst_bc->u_buffer;
+
+ if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
+ const uint16_t *src16 =
+ CONVERT_TO_SHORTPTR(src + vstart * src_bc->uv_stride + hstart);
+ uint16_t *dst16 =
+ CONVERT_TO_SHORTPTR(dst + vstart * dst_bc->uv_stride + hstart);
+ for (row = vstart; row < vend; ++row) {
+ memcpy(dst16, src16, (hend - hstart) * sizeof(uint16_t));
+ src16 += src_bc->uv_stride;
+ dst16 += dst_bc->uv_stride;
+ }
+ return;
+ }
+
+ src = (src + vstart * src_bc->uv_stride + hstart);
+ dst = (dst + vstart * dst_bc->uv_stride + hstart);
+
+ for (row = vstart; row < vend; ++row) {
+ memcpy(dst, src, (hend - hstart));
+ src += src_bc->uv_stride;
+ dst += dst_bc->uv_stride;
+ }
+}
+
+void aom_yv12_partial_copy_v_c(const YV12_BUFFER_CONFIG *src_bc,
+ YV12_BUFFER_CONFIG *dst_bc, int hstart, int hend,
+ int vstart, int vend) {
+ int row;
+ const uint8_t *src = src_bc->v_buffer;
+ uint8_t *dst = dst_bc->v_buffer;
+
+ if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
+ const uint16_t *src16 =
+ CONVERT_TO_SHORTPTR(src + vstart * src_bc->uv_stride + hstart);
+ uint16_t *dst16 =
+ CONVERT_TO_SHORTPTR(dst + vstart * dst_bc->uv_stride + hstart);
+ for (row = vstart; row < vend; ++row) {
+ memcpy(dst16, src16, (hend - hstart) * sizeof(uint16_t));
+ src16 += src_bc->uv_stride;
+ dst16 += dst_bc->uv_stride;
+ }
+ return;
+ }
+
+ src = (src + vstart * src_bc->uv_stride + hstart);
+ dst = (dst + vstart * dst_bc->uv_stride + hstart);
+
+ for (row = vstart; row < vend; ++row) {
+ memcpy(dst, src, (hend - hstart));
+ src += src_bc->uv_stride;
+ dst += dst_bc->uv_stride;
+ }
+}
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 411d89e..2e37ac7 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -59,7 +59,7 @@
// restoration unit can extend to up to 150% its normal width or height. The
// max with 1 is to deal with tiles that are smaller than half of a restoration
// unit.
-static int count_units_in_tile(int unit_size, int tile_size) {
+int av1_lr_count_units_in_tile(int unit_size, int tile_size) {
return AOMMAX((tile_size + (unit_size >> 1)) / unit_size, 1);
}
@@ -82,8 +82,8 @@
// max with 1 is to deal with tiles that are smaller than half of a
// restoration unit.
const int unit_size = rsi->restoration_unit_size;
- const int hpertile = count_units_in_tile(unit_size, max_tile_w);
- const int vpertile = count_units_in_tile(unit_size, max_tile_h);
+ const int hpertile = av1_lr_count_units_in_tile(unit_size, max_tile_w);
+ const int vpertile = av1_lr_count_units_in_tile(unit_size, max_tile_h);
rsi->units_per_tile = hpertile * vpertile;
rsi->horz_units_per_tile = hpertile;
@@ -1189,20 +1189,24 @@
lr_plane_ctxt->data_stride = frame->strides[is_uv];
lr_plane_ctxt->dst_stride = lr_ctxt->dst->strides[is_uv];
lr_plane_ctxt->tile_rect = av1_whole_frame_rect(cm, is_uv);
- filter_frame_on_tile(0, 0, lr_plane_ctxt, cm);
+ filter_frame_on_tile(LR_TILE_ROW, LR_TILE_COL, lr_plane_ctxt, cm);
}
}
void av1_loop_restoration_copy_planes(AV1LrStruct *loop_rest_ctxt,
AV1_COMMON *cm, int num_planes) {
- typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst);
- static const copy_fun copy_funs[3] = { aom_yv12_copy_y, aom_yv12_copy_u,
- aom_yv12_copy_v };
+ typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc,
+ YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend,
+ int vstart, int vend);
+ static const copy_fun copy_funs[3] = {
+ aom_yv12_partial_copy_y, aom_yv12_partial_copy_u, aom_yv12_partial_copy_v
+ };
for (int plane = 0; plane < num_planes; ++plane) {
if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
- copy_funs[plane](loop_rest_ctxt->dst, loop_rest_ctxt->frame);
+ AV1PixelRect tile_rect = loop_rest_ctxt->ctxt[plane].tile_rect;
+ copy_funs[plane](loop_rest_ctxt->dst, loop_rest_ctxt->frame, tile_rect.left,
+ tile_rect.right, tile_rect.top, tile_rect.bottom);
}
}
@@ -1241,9 +1245,10 @@
const AV1PixelRect *tile_rect,
rest_unit_visitor_t on_rest_unit,
int row_number, int unit_size, int unit_idx0,
- int hunits_per_tile, void *priv,
- int32_t *tmpbuf,
- RestorationLineBuffers *rlbs) {
+ int hunits_per_tile, int plane, void *priv,
+ int32_t *tmpbuf, RestorationLineBuffers *rlbs,
+ sync_read_fn_t on_sync_read,
+ sync_write_fn_t on_sync_write) {
const int tile_w = tile_rect->right - tile_rect->left;
const int ext_size = unit_size * 3 / 2;
int x0 = 0, j = 0;
@@ -1257,16 +1262,37 @@
const int unit_idx = unit_idx0 + row_number * hunits_per_tile + j;
+ on_sync_read(NULL, row_number, j, plane);
+
on_rest_unit(limits, tile_rect, unit_idx, priv, tmpbuf, rlbs);
+
+ on_sync_write(NULL, row_number, j, hunits_per_tile, plane);
+
x0 += w;
++j;
}
}
+void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane) {
+ (void)lr_sync;
+ (void)r;
+ (void)c;
+ (void)plane;
+}
+
+void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c,
+ const int sb_cols, int plane) {
+ (void)lr_sync;
+ (void)r;
+ (void)c;
+ (void)sb_cols;
+ (void)plane;
+}
+
static void foreach_rest_unit_in_tile(const AV1PixelRect *tile_rect,
int tile_row, int tile_col, int tile_cols,
int hunits_per_tile, int units_per_tile,
- int unit_size, int ss_y,
+ int unit_size, int ss_y, int plane,
rest_unit_visitor_t on_rest_unit,
void *priv, int32_t *tmpbuf,
RestorationLineBuffers *rlbs) {
@@ -1291,8 +1317,9 @@
if (limits.v_end < tile_rect->bottom) limits.v_end -= voffset;
av1_foreach_rest_unit_in_row(&limits, tile_rect, on_rest_unit, i, unit_size,
- unit_idx0, hunits_per_tile, priv, tmpbuf,
- rlbs);
+ unit_idx0, hunits_per_tile, plane, priv,
+ tmpbuf, rlbs, av1_lr_sync_read_dummy,
+ av1_lr_sync_write_dummy);
y0 += h;
++i;
@@ -1309,9 +1336,10 @@
const RestorationInfo *rsi = &cm->rst_info[plane];
- foreach_rest_unit_in_tile(tile_rect, 0, 0, 1, rsi->horz_units_per_tile,
- rsi->units_per_tile, rsi->restoration_unit_size,
- ss_y, on_rest_unit, priv, tmpbuf, rlbs);
+ foreach_rest_unit_in_tile(tile_rect, LR_TILE_ROW, LR_TILE_COL, LR_TILE_COLS,
+ rsi->horz_units_per_tile, rsi->units_per_tile,
+ rsi->restoration_unit_size, ss_y, plane,
+ on_rest_unit, priv, tmpbuf, rlbs);
}
int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
@@ -1346,8 +1374,8 @@
// Calculate the number of restoration units in this tile (which might be
// strictly less than rsi->horz_units_per_tile and rsi->vert_units_per_tile)
- const int horz_units = count_units_in_tile(size, tile_w);
- const int vert_units = count_units_in_tile(size, tile_h);
+ const int horz_units = av1_lr_count_units_in_tile(size, tile_w);
+ const int vert_units = av1_lr_count_units_in_tile(size, tile_h);
// The size of an MI-unit on this plane of the image
const int ss_x = is_uv && cm->subsampling_x;
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index 48087c2..81a03e3 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h
@@ -171,6 +171,10 @@
#error "Wiener filter currently only works if WIENER_FILT_PREC_BITS == 7"
#endif
+#define LR_TILE_ROW 0
+#define LR_TILE_COL 0
+#define LR_TILE_COLS 1
+
typedef struct {
int r[2]; // radii
int s[2]; // sgr parameters for r[0] and r[1], based on GenSgrprojVtable()
@@ -317,6 +321,11 @@
typedef void (*rest_tile_start_visitor_t)(int tile_row, int tile_col,
void *priv);
+typedef void (*sync_read_fn_t)(void *const lr_sync, int r, int c, int plane);
+
+typedef void (*sync_write_fn_t)(void *const lr_sync, int r, int c,
+ const int sb_cols, int plane);
+
// Call on_rest_unit for each loop restoration unit in the plane.
void av1_foreach_rest_unit_in_plane(const struct AV1Common *cm, int plane,
rest_unit_visitor_t on_rest_unit,
@@ -351,10 +360,15 @@
const AV1PixelRect *tile_rect,
rest_unit_visitor_t on_rest_unit,
int row_number, int unit_size, int unit_idx0,
- int hunits_per_tile, void *priv,
- int32_t *tmpbuf,
- RestorationLineBuffers *rlbs);
+ int hunits_per_tile, int plane, void *priv,
+ int32_t *tmpbuf, RestorationLineBuffers *rlbs,
+ sync_read_fn_t on_sync_read,
+ sync_write_fn_t on_sync_write);
AV1PixelRect av1_whole_frame_rect(const struct AV1Common *cm, int is_uv);
+int av1_lr_count_units_in_tile(int unit_size, int tile_size);
+void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane);
+void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c,
+ const int sb_cols, int plane);
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index d3a1b74..0dbc489 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -1243,7 +1243,7 @@
};
reset_rsc(rsc);
- rsc_on_tile(0, 0, rsc);
+ rsc_on_tile(LR_TILE_ROW, LR_TILE_COL, rsc);
av1_foreach_rest_unit_in_plane(rsc->cm, rsc->plane, funs[rtype], rsc,
&rsc->tile_rect, rsc->cm->rst_tmpbuf, NULL);
return RDCOST_DBL(rsc->x->rdmult, rsc->bits >> 4, rsc->sse);