Let od_dering() handle 16 to 8 bit conversion
Change-Id: Ief5df3d5b1b81f09190d34022a3cb7d500992da2
diff --git a/av1/common/cdef.c b/av1/common/cdef.c
index 891c4bd..6faccdc 100644
--- a/av1/common/cdef.c
+++ b/av1/common/cdef.c
@@ -78,44 +78,6 @@
return count;
}
-static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride,
- uint16_t *src, int sstride) {
- int i, j;
- for (i = 0; i < 8; i++)
- for (j = 0; j < 8; j++)
- dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
-}
-
-static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride,
- uint16_t *src, int sstride) {
- int i, j;
- for (i = 0; i < 4; i++)
- for (j = 0; j < 4; j++)
- dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
-}
-
-/* TODO: Optimize this function for SSE. */
-void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, uint16_t *src,
- dering_list *dlist, int dering_count,
- int bsize) {
- int bi, bx, by;
- if (bsize == 3) {
- for (bi = 0; bi < dering_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
- &src[bi << 2 * bsize], 1 << bsize);
- }
- } else {
- for (bi = 0; bi < dering_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
- &src[bi << 2 * bsize], 1 << bsize);
- }
- }
-}
-
/* TODO: Optimize this function for SSE. */
static void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
const uint8_t *src, int src_voffset, int src_hoffset,
@@ -384,27 +346,28 @@
threshold = level << coeff_shift;
if (threshold == 0 && clpf_strength == 0) continue;
- od_dering(dst,
- &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
- dec[pli], dir, NULL, var, pli, dlist, dering_count, threshold,
- clpf_strength, clpf_damping, coeff_shift, 0);
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- copy_dering_16bit_to_16bit(
- &CONVERT_TO_SHORTPTR(
- xd->plane[pli]
- .dst.buf)[xd->plane[pli].dst.stride *
- (MAX_MIB_SIZE * sbr << bsize[pli]) +
- (sbc * MAX_MIB_SIZE << bsize[pli])],
- xd->plane[pli].dst.stride, dst, dlist, dering_count,
- 3 - dec[pli]);
+ od_dering((uint8_t *)&CONVERT_TO_SHORTPTR(
+ xd->plane[pli]
+ .dst.buf)[xd->plane[pli].dst.stride *
+ (MAX_MIB_SIZE * sbr << bsize[pli]) +
+ (sbc * MAX_MIB_SIZE << bsize[pli])],
+ xd->plane[pli].dst.stride, dst,
+ &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
+ dec[pli], dir, NULL, var, pli, dlist, dering_count,
+ threshold, clpf_strength, clpf_damping, coeff_shift, 0, 1);
} else {
#endif
- copy_dering_16bit_to_8bit(
+ od_dering(
&xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *
(MAX_MIB_SIZE * sbr << bsize[pli]) +
(sbc * MAX_MIB_SIZE << bsize[pli])],
- xd->plane[pli].dst.stride, dst, dlist, dering_count, bsize[pli]);
+ xd->plane[pli].dst.stride, dst,
+ &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
+ dec[pli], dir, NULL, var, pli, dlist, dering_count, threshold,
+ clpf_strength, clpf_damping, coeff_shift, 0, 0);
+
#if CONFIG_AOM_HIGHBITDEPTH
}
#endif
diff --git a/av1/common/od_dering.c b/av1/common/od_dering.c
index 65ea4f2..6729676 100644
--- a/av1/common/od_dering.c
+++ b/av1/common/od_dering.c
@@ -239,12 +239,50 @@
}
}
-void od_dering(uint16_t *y, uint16_t *in, int xdec,
+static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride,
+ uint16_t *src, int sstride) {
+ int i, j;
+ for (i = 0; i < 8; i++)
+ for (j = 0; j < 8; j++)
+ dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
+}
+
+static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride,
+ uint16_t *src, int sstride) {
+ int i, j;
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 4; j++)
+ dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
+}
+
+/* TODO: Optimize this function for SSE. */
+static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, uint16_t *src,
+ dering_list *dlist, int dering_count,
+ int bsize) {
+ int bi, bx, by;
+ if (bsize == 3) {
+ for (bi = 0; bi < dering_count; bi++) {
+ by = dlist[bi].by;
+ bx = dlist[bi].bx;
+ copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
+ &src[bi << 2 * bsize], 1 << bsize);
+ }
+ } else {
+ for (bi = 0; bi < dering_count; bi++) {
+ by = dlist[bi].by;
+ bx = dlist[bi].bx;
+ copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
+ &src[bi << 2 * bsize], 1 << bsize);
+ }
+ }
+}
+
+void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int *dirinit,
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
dering_list *dlist, int dering_count, int threshold,
int clpf_strength, int clpf_damping, int coeff_shift,
- int skip_dering) {
+ int skip_dering, int hbd) {
int bi;
int bx;
int by;
@@ -292,18 +330,27 @@
}
}
}
- if (!clpf_strength) return;
- if (threshold && !skip_dering)
- copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
- bsize);
- for (bi = 0; bi < dering_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
+ if (clpf_strength) {
+ if (threshold && !skip_dering)
+ copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
+ bsize);
+ for (bi = 0; bi < dering_count; bi++) {
+ by = dlist[bi].by;
+ bx = dlist[bi].bx;
- (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
- : aom_clpf_hblock_hbd)(
- in, &y[((bi - by) << 2 * bsize) - (bx << bsize)], OD_FILT_BSTRIDE,
- 1 << bsize, bx << bsize, by << bsize, 1 << bsize, 1 << bsize,
- clpf_strength << coeff_shift, clpf_damping + coeff_shift);
+ (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
+ : aom_clpf_hblock_hbd)(
+ in, &y[((bi - by) << 2 * bsize) - (bx << bsize)], OD_FILT_BSTRIDE,
+ 1 << bsize, bx << bsize, by << bsize, 1 << bsize, 1 << bsize,
+ clpf_strength << coeff_shift, clpf_damping + coeff_shift);
+ }
+ }
+ if (dst) {
+ if (hbd) {
+ copy_dering_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist,
+ dering_count, 3 - xdec);
+ } else {
+ copy_dering_16bit_to_8bit(dst, dstride, y, dlist, dering_count, bsize);
+ }
}
}
diff --git a/av1/common/od_dering.h b/av1/common/od_dering.h
index a3efec2..9247785 100644
--- a/av1/common/od_dering.h
+++ b/av1/common/od_dering.h
@@ -46,12 +46,12 @@
dering_list *dlist, int dering_count,
int bsize);
-void od_dering(uint16_t *y, uint16_t *in, int xdec,
+void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int *dirinit,
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
dering_list *dlist, int dering_count, int threshold,
int clpf_strength, int clpf_damping, int coeff_shift,
- int skip_dering);
+ int skip_dering, int hbd);
int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
const uint16_t *in, int threshold,
int dir);
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index bcb82b7..f2099fd 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -263,14 +263,11 @@
src[pli], (sbr * MAX_MIB_SIZE << bsize[pli]) - yoff,
(sbc * MAX_MIB_SIZE << bsize[pli]) - xoff, stride[pli],
ysize, xsize);
- od_dering(tmp_dst, in, dec[pli], dir, &dirinit, var, pli, dlist,
+ od_dering(clpf_strength ? NULL : (uint8_t *)in, OD_FILT_BSTRIDE,
+ tmp_dst, in, dec[pli], dir, &dirinit, var, pli, dlist,
dering_count, threshold,
clpf_strength + (clpf_strength == 3), clpf_damping,
- coeff_shift, clpf_strength != 0);
- if (clpf_strength == 0) {
- copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, tmp_dst, dlist,
- dering_count, bsize[pli]);
- }
+ coeff_shift, clpf_strength != 0, 1);
mse[pli][sb_count][gi] = compute_dering_mse(
ref_coeff[pli] +
(sbr * MAX_MIB_SIZE << bsize[pli]) * stride[pli] +