Only copy the deringed blocks back into the buffer No change in output Change-Id: I2ddc2d70c6534e7cfd315d66e838410677f91356
diff --git a/av1/common/dering.c b/av1/common/dering.c index 547baf4..e67d1f1 100644 --- a/av1/common/dering.c +++ b/av1/common/dering.c
@@ -75,6 +75,44 @@ return skip; } +static INLINE void copy_8x8_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride) { + int i, j; + for (i = 0; i < 8; i++) + for (j = 0; j < 8; j++) + dst[i * dstride + j] = src[i * sstride + j]; +} + +static INLINE void copy_4x4_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride) { + int i, j; + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + dst[i * dstride + j] = src[i * sstride + j]; +} + +/* TODO: Optimize this function for SSE. */ +void copy_blocks_16_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride, + unsigned char (*bskip)[2], int dering_count, int bsize) +{ + int bi, bx, by; + if (bsize == 3) { + for (bi = 0; bi < dering_count; bi++) { + by = bskip[bi][0]; + bx = bskip[bi][1]; + copy_8x8_16_8bit(&dst[(by << 3) * dstride + (bx << 3)], + dstride, + &src[(by << 3) * sstride + (bx << 3)], sstride); + } + } else { + for (bi = 0; bi < dering_count; bi++) { + by = bskip[bi][0]; + bx = bskip[bi][1]; + copy_4x4_16_8bit(&dst[(by << 2) * dstride + (bx << 2)], + dstride, + &src[(by << 2) * sstride + (bx << 2)], sstride); + } + } +} + void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd, int global_level) { int r, c; @@ -149,26 +187,26 @@ sbc * bsize[pli] * MAX_MIB_SIZE], stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli, bskip, dering_count, threshold, coeff_shift); - for (r = 0; r < bsize[pli] * nvb; ++r) { - for (c = 0; c < bsize[pli] * nhb; ++c) { #if CONFIG_AOM_HIGHBITDEPTH - if (cm->use_highbitdepth) { - CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) - [xd->plane[pli].dst.stride * - (bsize[pli] * MAX_MIB_SIZE * sbr + r) + - sbc * bsize[pli] * MAX_MIB_SIZE + c] = - dst[r * MAX_MIB_SIZE * bsize[pli] + c]; - } else { + if (cm->use_highbitdepth) { + copy_blocks_16bit( + (int16_t*)&CONVERT_TO_SHORTPTR( + xd->plane[pli].dst.buf)[xd->plane[pli].dst.stride * + (bsize[pli] * MAX_MIB_SIZE * sbr) + + sbc * bsize[pli] * MAX_MIB_SIZE], + xd->plane[pli].dst.stride, dst, MAX_MIB_SIZE * bsize[pli], bskip, + dering_count, 3 - dec[pli]); + } else { #endif - xd->plane[pli].dst.buf[xd->plane[pli].dst.stride * - (bsize[pli] * MAX_MIB_SIZE * sbr + r) + - sbc * bsize[pli] * MAX_MIB_SIZE + c] = - dst[r * MAX_MIB_SIZE * bsize[pli] + c]; + copy_blocks_16_8bit( + &xd->plane[pli].dst.buf[xd->plane[pli].dst.stride * + (bsize[pli] * MAX_MIB_SIZE * sbr) + + sbc * bsize[pli] * MAX_MIB_SIZE], + xd->plane[pli].dst.stride, dst, MAX_MIB_SIZE * bsize[pli], bskip, + dering_count, 3 - dec[pli]); #if CONFIG_AOM_HIGHBITDEPTH - } -#endif - } } +#endif } } }
diff --git a/av1/common/od_dering.h b/av1/common/od_dering.h index d027f5b..97090e5 100644 --- a/av1/common/od_dering.h +++ b/av1/common/od_dering.h
@@ -36,6 +36,9 @@ typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride, const int16_t *in, int threshold, int dir); +void copy_blocks_16bit(int16_t *dst, int dstride, int16_t *src, int sstride, + unsigned char (*bskip)[2], int dering_count, int bsize); + void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride, int nvb, int nhb, int sbx, int sby, int nhsb, int nvsb, int xdec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,