Optimize av1_loop_restoration_filter_frame
1. avoid frame allocation for each frame
2. remove unnecessary copy if frame_restoration_type is RESTORE_NONE
Decoder speedup: ~5%
Change-Id: If49027e04244174e7f34bb1bb90aa53438a82df0
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index 410fd6c..f8eca2c 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -181,6 +181,8 @@
boundaries->stripe_boundary_above = NULL;
boundaries->stripe_boundary_below = NULL;
}
+
+ aom_free_frame_buffer(&cm->rst_frame);
}
#if LOOP_FILTER_BITMASK
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index 290918a..c44e9a2 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -437,6 +437,9 @@
// Pointer to a scratch buffer used by self-guided restoration
int32_t *rst_tmpbuf;
+ // Output of loop restoration
+ YV12_BUFFER_CONFIG rst_frame;
+
// Flag signaling how frame contexts should be updated at the end of
// a frame decode
REFRESH_FRAME_CONTEXT_MODE refresh_frame_context;
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 2e81c41..64a0cf3 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -1358,11 +1358,11 @@
static const copy_fun copy_funs[3] = { aom_yv12_copy_y, aom_yv12_copy_u,
aom_yv12_copy_v };
- YV12_BUFFER_CONFIG dst;
- memset(&dst, 0, sizeof(dst));
+ YV12_BUFFER_CONFIG *dst = &cm->rst_frame;
+
const int frame_width = frame->crop_widths[0];
const int frame_height = frame->crop_heights[0];
- if (aom_realloc_frame_buffer(&dst, frame_width, frame_height,
+ if (aom_realloc_frame_buffer(dst, frame_width, frame_height,
cm->subsampling_x, cm->subsampling_y,
cm->use_highbitdepth, AOM_BORDER_IN_PIXELS,
cm->byte_alignment, NULL, NULL, NULL) < 0)
@@ -1377,7 +1377,6 @@
const RestorationInfo *rsi = &cm->rst_info[plane];
RestorationType rtype = rsi->frame_restoration_type;
if (rtype == RESTORE_NONE) {
- copy_funs[plane](frame, &dst);
continue;
}
@@ -1398,19 +1397,16 @@
ctxt.highbd = highbd;
ctxt.bit_depth = bit_depth;
ctxt.data8 = frame->buffers[plane];
- ctxt.dst8 = dst.buffers[plane];
+ ctxt.dst8 = dst->buffers[plane];
ctxt.data_stride = frame->strides[is_uv];
- ctxt.dst_stride = dst.strides[is_uv];
+ ctxt.dst_stride = dst->strides[is_uv];
ctxt.tmpbuf = cm->rst_tmpbuf;
av1_foreach_rest_unit_in_frame(cm, plane, filter_frame_on_tile,
filter_frame_on_unit, &ctxt);
- }
- for (int plane = 0; plane < num_planes; ++plane) {
- copy_funs[plane](&dst, frame);
+ copy_funs[plane](dst, frame);
}
- aom_free_frame_buffer(&dst);
}
static void foreach_rest_unit_in_tile(const AV1PixelRect *tile_rect,