Reducing copies in deringing filter
Only copy the modified pixels from the first filter back into the input of the
second filter.
Change-Id: Ifb9094c33c876a8c6caa0f68771fc7ef59c78b53
diff --git a/av1/common/od_dering.c b/av1/common/od_dering.c
index c15fd11..a99c86b 100644
--- a/av1/common/od_dering.c
+++ b/av1/common/od_dering.c
@@ -262,6 +262,44 @@
return (threshold * OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
}
+static INLINE void copy_8x8_16bit(int16_t *dst, int dstride, int16_t *src, int sstride) {
+ int i, j;
+ for (i = 0; i < 8; i++)
+ for (j = 0; j < 8; j++)
+ dst[i * dstride + j] = src[i * sstride + j];
+}
+
+static INLINE void copy_4x4_16bit(int16_t *dst, int dstride, int16_t *src, int sstride) {
+ int i, j;
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 4; j++)
+ dst[i * dstride + j] = src[i * sstride + j];
+}
+
+/* TODO: Optimize this function for SSE. */
+void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src, int sstride,
+ unsigned char (*bskip)[2], int dering_count, int bsize)
+{
+ int bi, bx, by;
+ if (bsize == 3) {
+ for (bi = 0; bi < dering_count; bi++) {
+ by = bskip[bi][0];
+ bx = bskip[bi][1];
+ copy_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)],
+ dstride,
+ &src[(by << 3) * sstride + (bx << 3)], sstride);
+ }
+ } else {
+ for (bi = 0; bi < dering_count; bi++) {
+ by = bskip[bi][0];
+ bx = bskip[bi][1];
+ copy_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)],
+ dstride,
+ &src[(by << 2) * sstride + (bx << 2)], sstride);
+ }
+ }
+}
+
void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
int nhb, int nvb, int sbx, int sby, int nhsb, int nvsb, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
@@ -332,11 +370,8 @@
dir[by][bx]);
}
}
- for (i = 0; i < nvb << bsize; i++) {
- for (j = 0; j < nhb << bsize; j++) {
- in[i * OD_FILT_BSTRIDE + j] = y[i * ystride + j];
- }
- }
+ copy_blocks_16bit(in, OD_FILT_BSTRIDE, y, ystride, bskip, dering_count,
+ bsize);
for (bi = 0; bi < dering_count; bi++) {
by = bskip[bi][0];
bx = bskip[bi][1];