Remove CDEF_SINGLEPASS defines
The experiment has been adopted and has been enabled by default for a
while and the alternative code path has not been maintained for a long
time, which is now removed.
Change-Id: Iaf22f2969b45b71b2bf67707e131ab4c439b7fa6
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 107f852..b08086e 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -279,30 +279,6 @@
${AOM_AV1_COMMON_INTRIN_NEON}
"${AOM_ROOT}/av1/common/cdef_block_neon.c")
-if (NOT CONFIG_CDEF_SINGLEPASS)
- set(AOM_AV1_COMMON_SOURCES
- ${AOM_AV1_COMMON_SOURCES}
- "${AOM_ROOT}/av1/common/clpf.c"
- "${AOM_ROOT}/av1/common/clpf_simd.h"
- "${AOM_ROOT}/av1/common/cdef_block_simd.h")
-
- set(AOM_AV1_COMMON_INTRIN_SSE2
- ${AOM_AV1_COMMON_INTRIN_SSE2}
- "${AOM_ROOT}/av1/common/clpf_sse2.c")
-
- set(AOM_AV1_COMMON_INTRIN_SSSE3
- ${AOM_AV1_COMMON_INTRIN_SSSE3}
- "${AOM_ROOT}/av1/common/clpf_ssse3.c")
-
- set(AOM_AV1_COMMON_INTRIN_SSE4_1
- ${AOM_AV1_COMMON_INTRIN_SSE4_1}
- "${AOM_ROOT}/av1/common/clpf_sse4.c")
-
- set(AOM_AV1_COMMON_INTRIN_NEON
- ${AOM_AV1_COMMON_INTRIN_NEON}
- "${AOM_ROOT}/av1/common/clpf_neon.c")
-endif ()
-
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/x86/convolve_2d_sse2.c")
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index ad3ff2e..01a2e99 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -94,16 +94,7 @@
endif
AV1_COMMON_SRCS-yes += common/warped_motion.h
AV1_COMMON_SRCS-yes += common/warped_motion.c
-ifeq ($(CONFIG_CDEF_SINGLEPASS),yes)
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/cdef_block_avx2.c
-else
-AV1_COMMON_SRCS-yes += common/clpf.c
-AV1_COMMON_SRCS-yes += common/clpf_simd.h
-AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c
-AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c
-AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c
-AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c
-endif
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/cdef_block_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/cdef_block_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/cdef_block_sse4.c
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index b19fe1c..9c1cf66 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -499,20 +499,7 @@
# Deringing Functions
add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
-if (aom_config("CONFIG_CDEF_SINGLEPASS") ne "yes") {
- add_proto qw/void aom_clpf_block_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
- add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
- add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
- add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
- add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
- add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
- add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
- add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
- add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
- add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
-} else {
- add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift";
-}
+add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift";
add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
@@ -521,28 +508,10 @@
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
- if (aom_config("CONFIG_CDEF_SINGLEPASS") eq "yes") {
- specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
- } else {
- specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
- specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
- specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
- specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
- specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
- specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
- specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
- specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
-
- specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
- specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
- specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
- specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
- specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
- specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
- }
+ specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
+ specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
+ specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
+ specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
}
# WARPED_MOTION / GLOBAL_MOTION functions
diff --git a/av1/common/cdef.c b/av1/common/cdef.c
index c53b7a2..ed768cd 100644
--- a/av1/common/cdef.c
+++ b/av1/common/cdef.c
@@ -281,9 +281,6 @@
curr_row_cdef[fbc] = 1;
for (int pli = 0; pli < nplanes; pli++) {
-#if !CONFIG_CDEF_SINGLEPASS
- DECLARE_ALIGNED(16, uint16_t, dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE]);
-#endif
int coffset;
int rend, cend;
int pri_damping = cm->cdef_pri_damping;
@@ -399,27 +396,16 @@
#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth) {
cdef_filter_fb(
-#if CONFIG_CDEF_SINGLEPASS
NULL,
- &CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
-#else
- (uint8_t *)&CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
-#endif
- [xd->plane[pli].dst.stride *
- (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
- (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
-#if CONFIG_CDEF_SINGLEPASS
+ &CONVERT_TO_SHORTPTR(
+ xd->plane[pli]
+ .dst.buf)[xd->plane[pli].dst.stride *
+ (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
+ (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
xd->plane[pli].dst.stride,
-#else
- xd->plane[pli].dst.stride, dst,
-#endif
&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
-#if CONFIG_CDEF_SINGLEPASS
sec_strength, pri_damping, sec_damping, coeff_shift);
-#else
- sec_strength, sec_damping, pri_damping, coeff_shift, 0, 1);
-#endif
} else {
#endif
cdef_filter_fb(
@@ -427,18 +413,10 @@
.dst.buf[xd->plane[pli].dst.stride *
(MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
-#if CONFIG_CDEF_SINGLEPASS
NULL, xd->plane[pli].dst.stride,
-#else
- xd->plane[pli].dst.stride, dst,
-#endif
&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
-#if CONFIG_CDEF_SINGLEPASS
sec_strength, pri_damping, sec_damping, coeff_shift);
-#else
- sec_strength, sec_damping, pri_damping, coeff_shift, 0, 0);
-#endif
#if CONFIG_HIGHBITDEPTH
}
diff --git a/av1/common/cdef_block.c b/av1/common/cdef_block.c
index 3b04d6a..be5d6bd 100644
--- a/av1/common/cdef_block.c
+++ b/av1/common/cdef_block.c
@@ -21,7 +21,7 @@
#include "./cdef.h"
/* Generated from gen_filter_tables.c. */
-#if !CONFIG_CDEF_SINGLEPASS || CDEF_FULL
+#if CDEF_FULL
DECLARE_ALIGNED(16, const int, cdef_directions[8][3]) = {
{ -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
{ 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
@@ -123,7 +123,6 @@
return best_dir;
}
-#if CONFIG_CDEF_SINGLEPASS
#if CDEF_FULL
const int cdef_pri_taps[2][3] = { { 3, 2, 1 }, { 2, 2, 2 } };
const int cdef_sec_taps[2][2] = { { 3, 1 }, { 3, 1 } };
@@ -209,67 +208,6 @@
}
}
-#else
-
-/* Smooth in the direction detected. */
-void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in,
- int threshold, int dir, int damping) {
- int i;
- int j;
- int k;
- static const int taps[3] = { 3, 2, 1 };
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- int16_t sum;
- int16_t xx;
- int16_t yy;
- xx = in[i * CDEF_BSTRIDE + j];
- sum = 0;
- for (k = 0; k < 3; k++) {
- int16_t p0;
- int16_t p1;
- p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
- p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
- sum += taps[k] * constrain(p0, threshold, damping);
- sum += taps[k] * constrain(p1, threshold, damping);
- }
- sum = (sum + 8) >> 4;
- yy = xx + sum;
- y[i * ystride + j] = yy;
- }
- }
-}
-
-/* Smooth in the direction detected. */
-void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in,
- int threshold, int dir, int damping) {
- int i;
- int j;
- int k;
- static const int taps[2] = { 4, 1 };
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++) {
- int16_t sum;
- int16_t xx;
- int16_t yy;
- xx = in[i * CDEF_BSTRIDE + j];
- sum = 0;
- for (k = 0; k < 2; k++) {
- int16_t p0;
- int16_t p1;
- p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
- p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
- sum += taps[k] * constrain(p0, threshold, damping);
- sum += taps[k] * constrain(p1, threshold, damping);
- }
- sum = (sum + 8) >> 4;
- yy = xx + sum;
- y[i * ystride + j] = yy;
- }
- }
-}
-#endif
-
/* Compute the primary filter strength for an 8x8 block based on the
directional variance difference. A high variance difference means
that we have a highly directional pattern (e.g. a high contrast
@@ -282,160 +220,26 @@
return var ? (strength * (4 + i) + 8) >> 4 : 0;
}
-#if !CONFIG_CDEF_SINGLEPASS
-void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
- int sstride) {
- int i, j;
- for (i = 0; i < 8; i++)
- for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
-}
-
-void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
- int sstride) {
- int i, j;
- for (i = 0; i < 4; i++)
- for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
-}
-
-static void copy_block_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
- cdef_list *dlist, int cdef_count,
- int bsize) {
- int bi, bx, by;
-
- if (bsize == BLOCK_8X8) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
- &src[bi << (3 + 3)], 8);
- }
- } else if (bsize == BLOCK_4X8) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
- &src[bi << (3 + 2)], 4);
- copy_4x4_16bit_to_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
- dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
- }
- } else if (bsize == BLOCK_8X4) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
- &src[bi << (2 + 3)], 8);
- copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4],
- dstride, &src[(bi << (2 + 3)) + 4], 8);
- }
- } else {
- assert(bsize == BLOCK_4X4);
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
- &src[bi << (2 + 2)], 4);
- }
- }
-}
-
-void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
- int sstride) {
- int i, j;
- for (i = 0; i < 8; i++)
- for (j = 0; j < 8; j++)
- dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
-}
-
-void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
- int sstride) {
- int i, j;
- for (i = 0; i < 4; i++)
- for (j = 0; j < 4; j++)
- dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
-}
-
-static void copy_block_16bit_to_8bit(uint8_t *dst, int dstride,
- const uint16_t *src, cdef_list *dlist,
- int cdef_count, int bsize) {
- int bi, bx, by;
- if (bsize == BLOCK_8X8) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
- &src[bi << (3 + 3)], 8);
- }
- } else if (bsize == BLOCK_4X8) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
- &src[bi << (3 + 2)], 4);
- copy_4x4_16bit_to_8bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
- dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
- }
- } else if (bsize == BLOCK_8X4) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
- &src[bi << (2 + 3)], 8);
- copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
- &src[(bi << (2 + 3)) + 4], 8);
- }
- } else {
- assert(bsize == BLOCK_4X4);
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
- &src[bi << (2 * 2)], 4);
- }
- }
-}
-
-void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
- int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
- int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
- cdef_list *dlist, int cdef_count, int level,
- int sec_strength, int sec_damping, int pri_damping,
- int coeff_shift, int skip_dering, int hbd) {
-#else
-
void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int pri_damping, int sec_damping,
int coeff_shift) {
-#endif
int bi;
int bx;
int by;
int bsize, bsizex, bsizey;
-#if CONFIG_CDEF_SINGLEPASS
int pri_strength = level << coeff_shift;
sec_strength <<= coeff_shift;
-#else
- int threshold = level << coeff_shift;
-
- cdef_direction_func cdef_direction[] = { cdef_direction_4x4,
- cdef_direction_8x8 };
-#endif
sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
bsize =
ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
bsizex = 3 - xdec;
bsizey = 3 - ydec;
-#if CONFIG_CDEF_SINGLEPASS
- if (dirinit && pri_strength == 0 && sec_strength == 0)
-#else
- if (!skip_dering)
-#endif
- {
-#if CONFIG_CDEF_SINGLEPASS
+ if (dirinit && pri_strength == 0 && sec_strength == 0) {
// If we're here, both primary and secondary strengths are 0, and
// we still haven't written anything to y[] yet, so we just copy
// the input to y[]. This is necessary only for av1_cdef_search()
@@ -443,106 +247,16 @@
for (bi = 0; bi < cdef_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
-#else
- if (pli == 0) {
- if (!dirinit || !*dirinit) {
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
- CDEF_BSTRIDE, &var[by][bx], coeff_shift);
- }
- if (dirinit) *dirinit = 1;
- }
- }
- if (pli == 1 && xdec != ydec) {
- for (bi = 0; bi < cdef_count; bi++) {
- static const int conv422[8] = { 7, 0, 2, 4, 5, 6, 6, 6 };
- static const int conv440[8] = { 1, 2, 2, 2, 3, 4, 6, 0 };
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- dir[by][bx] = (xdec ? conv422 : conv440)[dir[by][bx]];
- }
- }
-
- // Only run dering for non-zero threshold. If we don't dering, we
- // still need to eventually write something out in y[] later.
- if (threshold != 0) {
- assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
- for (bi = 0; bi < cdef_count; bi++) {
- int t = dlist[bi].skip ? 0 : threshold;
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- (cdef_direction[bsize == BLOCK_8X8])(
- &y[bi << (bsizex + bsizey)], 1 << bsizex,
- &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
- pli ? t : adjust_strength(t, var[by][bx]), dir[by][bx],
- pri_damping);
- }
- }
- }
-
- if (sec_strength) {
- if (threshold && !skip_dering)
- copy_block_16bit_to_16bit(in, CDEF_BSTRIDE, y, dlist, cdef_count, bsize);
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
- int py = by << bsizey;
- int px = bx << bsizex;
-
- if (dlist[bi].skip) continue;
- if (!dst || hbd) {
- // 16 bit destination if high bitdepth or 8 bit destination not given
- (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
- : aom_clpf_hblock_hbd)(
- dst ? (uint16_t *)dst + py * dstride + px
- : &y[bi << (bsizex + bsizey)],
- in + py * CDEF_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
- CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
- sec_damping);
- } else {
- // Do clpf and write the result to an 8 bit destination
- (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
- : aom_clpf_hblock)(
- dst + py * dstride + px, in + py * CDEF_BSTRIDE + px, dstride,
- CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
- sec_damping);
- }
- }
- } else if (threshold != 0) {
- // No clpf, so copy instead
- if (hbd) {
- copy_block_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist, cdef_count,
- bsize);
- } else {
- copy_block_16bit_to_8bit(dst, dstride, y, dlist, cdef_count, bsize);
- }
- } else if (dirinit) {
- // If we're here, both dering and clpf are off, and we still haven't written
- // anything to y[] yet, so we just copy the input to y[]. This is necessary
- // only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
- for (bi = 0; bi < cdef_count; bi++) {
- by = dlist[bi].by;
- bx = dlist[bi].bx;
-#endif
int iy, ix;
// TODO(stemidts/jmvalin): SIMD optimisations
for (iy = 0; iy < 1 << bsizey; iy++)
for (ix = 0; ix < 1 << bsizex; ix++)
-#if CONFIG_CDEF_SINGLEPASS
dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
-#else
- y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
-#endif
in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
}
-#if CONFIG_CDEF_SINGLEPASS
return;
-#endif
}
-#if CONFIG_CDEF_SINGLEPASS
if (pli == 0) {
if (!dirinit || !*dirinit) {
for (bi = 0; bi < cdef_count; bi++) {
@@ -587,5 +301,4 @@
pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1,
coeff_shift);
}
-#endif
}
diff --git a/av1/common/cdef_block.h b/av1/common/cdef_block.h
index bf93802..14fea27 100644
--- a/av1/common/cdef_block.h
+++ b/av1/common/cdef_block.h
@@ -17,9 +17,7 @@
#define CDEF_BLOCKSIZE 64
#define CDEF_BLOCKSIZE_LOG2 6
#define CDEF_NBLOCKS ((1 << MAX_SB_SIZE_LOG2) / 8)
-#if CONFIG_CDEF_SINGLEPASS
#define CDEF_SB_SHIFT (MAX_SB_SIZE_LOG2 - CDEF_BLOCKSIZE_LOG2)
-#endif
/* We need to buffer three vertical lines. */
#define CDEF_VBORDER (3)
@@ -33,7 +31,6 @@
#define CDEF_INBUF_SIZE \
(CDEF_BSTRIDE * ((1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_VBORDER))
-#if CONFIG_CDEF_SINGLEPASS
// Filter configuration
#define CDEF_CAP 1 // 1 = Cap change to largest diff
#define CDEF_FULL 0 // 1 = 7x7 filter, 0 = 5x5 filter
@@ -48,17 +45,12 @@
DECLARE_ALIGNED(16, extern const int, cdef_directions[8][2]);
#endif
-#else // CONFIG_CDEF_SINGLEPASS
-DECLARE_ALIGNED(16, extern const int, cdef_directions[8][3]);
-#endif
-
typedef struct {
uint8_t by;
uint8_t bx;
uint8_t skip;
} cdef_list;
-#if CONFIG_CDEF_SINGLEPASS
typedef void (*cdef_filter_block_func)(uint8_t *dst8, uint16_t *dst16,
int dstride, const uint16_t *in,
int pri_strength, int sec_strength,
@@ -67,26 +59,11 @@
int coeff_shift);
void copy_cdef_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
cdef_list *dlist, int cdef_count, int bsize);
-#else
-typedef void (*cdef_direction_func)(uint16_t *y, int ystride,
- const uint16_t *in, int threshold, int dir,
- int damping);
-#endif
-
-#if CONFIG_CDEF_SINGLEPASS
void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
cdef_list *dlist, int cdef_count, int level,
int sec_strength, int pri_damping, int sec_damping,
int coeff_shift);
-#else
-void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
- int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
- int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
- cdef_list *dlist, int cdef_count, int level,
- int sec_strength, int sec_damping, int pri_damping,
- int coeff_shift, int skip_dering, int hbd);
-#endif
#endif
diff --git a/av1/common/cdef_block_simd.h b/av1/common/cdef_block_simd.h
index afd2aed..5ee20f3 100644
--- a/av1/common/cdef_block_simd.h
+++ b/av1/common/cdef_block_simd.h
@@ -221,7 +221,6 @@
return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
}
-#if CONFIG_CDEF_SINGLEPASS
// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
unsigned int adjdamp) {
@@ -1081,153 +1080,6 @@
}
}
-#else
-
-void SIMD_FUNC(cdef_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in,
- int threshold, int dir, int damping) {
- int i;
- v128 p0, p1, sum, row, res;
- int o1 = cdef_directions[dir][0];
- int o2 = cdef_directions[dir][1];
-
- if (threshold) damping -= get_msb(threshold);
- for (i = 0; i < 4; i += 2) {
- sum = v128_zero();
- row = v128_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
- v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
-
- // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o1]));
- p0 = constrain16(p0, row, threshold, damping);
-
- // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o1]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o1]));
- p1 = constrain16(p1, row, threshold, damping);
-
- // sum += 4 * (p0 + p1)
- sum = v128_add_16(sum, v128_shl_n_16(v128_add_16(p0, p1), 2));
-
- // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o2]));
- p0 = constrain16(p0, row, threshold, damping);
-
- // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o2]),
- v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o2]));
- p1 = constrain16(p1, row, threshold, damping);
-
- // sum += 1 * (p0 + p1)
- sum = v128_add_16(sum, v128_add_16(p0, p1));
-
- // res = row + ((sum + 8) >> 4)
- res = v128_add_16(sum, v128_dup_16(8));
- res = v128_shr_n_s16(res, 4);
- res = v128_add_16(row, res);
- v64_store_aligned(&y[i * ystride], v128_high_v64(res));
- v64_store_aligned(&y[(i + 1) * ystride], v128_low_v64(res));
- }
-}
-
-void SIMD_FUNC(cdef_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in,
- int threshold, int dir, int damping) {
- int i;
- v128 sum, p0, p1, row, res;
- int o1 = cdef_directions[dir][0];
- int o2 = cdef_directions[dir][1];
- int o3 = cdef_directions[dir][2];
-
- if (threshold) damping -= get_msb(threshold);
- for (i = 0; i < 8; i++) {
- sum = v128_zero();
- row = v128_load_aligned(&in[i * CDEF_BSTRIDE]);
-
- // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o1]);
- p0 = constrain16(p0, row, threshold, damping);
-
- // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o1]);
- p1 = constrain16(p1, row, threshold, damping);
-
- // sum += 3 * (p0 + p1)
- p0 = v128_add_16(p0, p1);
- p0 = v128_add_16(p0, v128_shl_n_16(p0, 1));
- sum = v128_add_16(sum, p0);
-
- // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o2]);
- p0 = constrain16(p0, row, threshold, damping);
-
- // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o2]);
- p1 = constrain16(p1, row, threshold, damping);
-
- // sum += 2 * (p0 + p1)
- p0 = v128_shl_n_16(v128_add_16(p0, p1), 1);
- sum = v128_add_16(sum, p0);
-
- // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
- p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o3]);
- p0 = constrain16(p0, row, threshold, damping);
-
- // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
- p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o3]);
- p1 = constrain16(p1, row, threshold, damping);
-
- // sum += (p0 + p1)
- p0 = v128_add_16(p0, p1);
- sum = v128_add_16(sum, p0);
-
- // res = row + ((sum + 8) >> 4)
- res = v128_add_16(sum, v128_dup_16(8));
- res = v128_shr_n_s16(res, 4);
- res = v128_add_16(row, res);
- v128_store_unaligned(&y[i * ystride], res);
- }
-}
-
-void SIMD_FUNC(copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride,
- const uint16_t *src, int sstride) {
- int i;
- for (i = 0; i < 8; i++) {
- v128 row = v128_load_unaligned(&src[i * sstride]);
- row = v128_pack_s16_u8(row, row);
- v64_store_unaligned(&dst[i * dstride], v128_low_v64(row));
- }
-}
-
-void SIMD_FUNC(copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride,
- const uint16_t *src, int sstride) {
- int i;
- for (i = 0; i < 4; i++) {
- v128 row = v128_load_unaligned(&src[i * sstride]);
- row = v128_pack_s16_u8(row, row);
- u32_store_unaligned(&dst[i * dstride], v128_low_u32(row));
- }
-}
-
-void SIMD_FUNC(copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride,
- const uint16_t *src, int sstride) {
- int i;
- for (i = 0; i < 8; i++) {
- v128 row = v128_load_unaligned(&src[i * sstride]);
- v128_store_unaligned(&dst[i * dstride], row);
- }
-}
-
-void SIMD_FUNC(copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride,
- const uint16_t *src, int sstride) {
- int i;
- for (i = 0; i < 4; i++) {
- v64 row = v64_load_unaligned(&src[i * sstride]);
- v64_store_unaligned(&dst[i * dstride], row);
- }
-}
-#endif
-
void SIMD_FUNC(copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride,
const uint8_t *src, int sstride, int v,
int h) {
diff --git a/av1/common/clpf.c b/av1/common/clpf.c
deleted file mode 100644
index d643236..0000000
--- a/av1/common/clpf.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./av1_rtcd.h"
-#include "./cdef.h"
-#include "aom/aom_image.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-static int clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G,
- int H, int s, unsigned int dmp) {
- int delta = 1 * constrain(A - X, s, dmp) + 3 * constrain(B - X, s, dmp) +
- 1 * constrain(C - X, s, dmp) + 3 * constrain(D - X, s, dmp) +
- 3 * constrain(E - X, s, dmp) + 1 * constrain(F - X, s, dmp) +
- 3 * constrain(G - X, s, dmp) + 1 * constrain(H - X, s, dmp);
- return (8 + delta - (delta < 0)) >> 4;
-}
-
-static int clpf_hsample(int X, int A, int B, int C, int D, int s,
- unsigned int dmp) {
- int delta = 1 * constrain(A - X, s, dmp) + 3 * constrain(B - X, s, dmp) +
- 3 * constrain(C - X, s, dmp) + 1 * constrain(D - X, s, dmp);
- return (4 + delta - (delta < 0)) >> 3;
-}
-
-void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey, unsigned int strength,
- unsigned int damping) {
- int x, y;
-
- for (y = 0; y < sizey; y++) {
- for (x = 0; x < sizex; x++) {
- const int X = src[y * sstride + x];
- const int A = src[(y - 2) * sstride + x];
- const int B = src[(y - 1) * sstride + x];
- const int C = src[y * sstride + x - 2];
- const int D = src[y * sstride + x - 1];
- const int E = src[y * sstride + x + 1];
- const int F = src[y * sstride + x + 2];
- const int G = src[(y + 1) * sstride + x];
- const int H = src[(y + 2) * sstride + x];
- const int delta =
- clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
- dst[y * dstride + x] = X + delta;
- }
- }
-}
-
-// Identical to aom_clpf_block_c() apart from "dst".
-void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int damping) {
- int x, y;
-
- for (y = 0; y < sizey; y++) {
- for (x = 0; x < sizex; x++) {
- const int X = src[y * sstride + x];
- const int A = src[(y - 2) * sstride + x];
- const int B = src[(y - 1) * sstride + x];
- const int C = src[y * sstride + x - 2];
- const int D = src[y * sstride + x - 1];
- const int E = src[y * sstride + x + 1];
- const int F = src[y * sstride + x + 2];
- const int G = src[(y + 1) * sstride + x];
- const int H = src[(y + 2) * sstride + x];
- const int delta =
- clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
- dst[y * dstride + x] = X + delta;
- }
- }
-}
-
-// Vertically restricted filter
-void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey, unsigned int strength,
- unsigned int damping) {
- int x, y;
-
- for (y = 0; y < sizey; y++) {
- for (x = 0; x < sizex; x++) {
- const int X = src[y * sstride + x];
- const int A = src[y * sstride + x - 2];
- const int B = src[y * sstride + x - 1];
- const int C = src[y * sstride + x + 1];
- const int D = src[y * sstride + x + 2];
- const int delta = clpf_hsample(X, A, B, C, D, strength, damping);
- dst[y * dstride + x] = X + delta;
- }
- }
-}
-
-void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int damping) {
- int x, y;
-
- for (y = 0; y < sizey; y++) {
- for (x = 0; x < sizex; x++) {
- const int X = src[y * sstride + x];
- const int A = src[y * sstride + x - 2];
- const int B = src[y * sstride + x - 1];
- const int C = src[y * sstride + x + 1];
- const int D = src[y * sstride + x + 2];
- const int delta = clpf_hsample(X, A, B, C, D, strength, damping);
- dst[y * dstride + x] = X + delta;
- }
- }
-}
diff --git a/av1/common/clpf_neon.c b/av1/common/clpf_neon.c
deleted file mode 100644
index f1a004c..0000000
--- a/av1/common/clpf_neon.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_neon
-#include "./clpf_simd.h"
diff --git a/av1/common/clpf_simd.h b/av1/common/clpf_simd.h
deleted file mode 100644
index c7ffc56..0000000
--- a/av1/common/clpf_simd.h
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./av1_rtcd.h"
-#include "aom_ports/bitops.h"
-#include "aom_ports/mem.h"
-
-// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
-SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
- unsigned int adjdamp) {
- v128 diff = v128_sub_16(a, b);
- const v128 sign = v128_shr_n_s16(diff, 15);
- diff = v128_abs_s16(diff);
- const v128 s =
- v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
- return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
-}
-
-// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
-SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
- unsigned int adjdamp) {
- const v256 diff16 = v256_sub_16(a, b);
- v128 diff = v128_pack_s16_s8(v256_high_v128(diff16), v256_low_v128(diff16));
- const v128 sign = v128_cmplt_s8(diff, v128_zero());
- diff = v128_abs_s8(diff);
- return v128_xor(
- v128_add_8(sign,
- v128_min_u8(diff, v128_ssub_u8(v128_dup_8(strength),
- v128_shr_u8(diff, adjdamp)))),
- sign);
-}
-
-// delta = 1/16 * constrain(a, x, s, d) + 3/16 * constrain(b, x, s, d) +
-// 1/16 * constrain(c, x, s, d) + 3/16 * constrain(d, x, s, d) +
-// 3/16 * constrain(e, x, s, d) + 1/16 * constrain(f, x, s, d) +
-// 3/16 * constrain(g, x, s, d) + 1/16 * constrain(h, x, s, d)
-SIMD_INLINE v128 calc_delta(v256 x, v256 a, v256 b, v256 c, v256 d, v256 e,
- v256 f, v256 g, v256 h, unsigned int s,
- unsigned int dmp) {
- const v128 bdeg =
- v128_add_8(v128_add_8(constrain(b, x, s, dmp), constrain(d, x, s, dmp)),
- v128_add_8(constrain(e, x, s, dmp), constrain(g, x, s, dmp)));
- const v128 delta = v128_add_8(
- v128_add_8(v128_add_8(constrain(a, x, s, dmp), constrain(c, x, s, dmp)),
- v128_add_8(constrain(f, x, s, dmp), constrain(h, x, s, dmp))),
- v128_add_8(v128_add_8(bdeg, bdeg), bdeg));
- return v128_add_8(
- v128_pack_s16_u8(v256_high_v128(x), v256_low_v128(x)),
- v128_shr_s8(
- v128_add_8(v128_dup_8(8),
- v128_add_8(delta, v128_cmplt_s8(delta, v128_zero()))),
- 4));
-}
-
-// delta = 1/8 * constrain(a, x, s, d) + 3/8 * constrain(b, x, s, d) +
-// 3/8 * constrain(c, x, s, d) + 1/8 * constrain(d, x, s, d) +
-SIMD_INLINE v128 calc_hdelta(v256 x, v256 a, v256 b, v256 c, v256 d,
- unsigned int s, unsigned int dmp) {
- const v128 bc = v128_add_8(constrain(b, x, s, dmp), constrain(c, x, s, dmp));
- const v128 delta =
- v128_add_8(v128_add_8(constrain(a, x, s, dmp), constrain(d, x, s, dmp)),
- v128_add_8(v128_add_8(bc, bc), bc));
- return v128_add_8(
- v128_pack_s16_u8(v256_high_v128(x), v256_low_v128(x)),
- v128_shr_s8(
- v128_add_8(v128_dup_8(4),
- v128_add_8(delta, v128_cmplt_s8(delta, v128_zero()))),
- 3));
-}
-
-// Process blocks of width 8, two lines at a time, 8 bit.
-static void SIMD_FUNC(clpf_block8)(uint8_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 2) {
- const v128 l1 = v128_load_aligned(src);
- const v128 l2 = v128_load_aligned(src + sstride);
- const v128 l3 = v128_load_aligned(src - sstride);
- const v128 l4 = v128_load_aligned(src + 2 * sstride);
- const v256 a = v256_from_v128(v128_load_aligned(src - 2 * sstride), l3);
- const v256 b = v256_from_v128(l3, l1);
- const v256 g = v256_from_v128(l2, l4);
- const v256 h = v256_from_v128(l4, v128_load_aligned(src + 3 * sstride));
- const v256 c = v256_from_v128(v128_load_unaligned(src - 2),
- v128_load_unaligned(src - 2 + sstride));
- const v256 d = v256_from_v128(v128_load_unaligned(src - 1),
- v128_load_unaligned(src - 1 + sstride));
- const v256 e = v256_from_v128(v128_load_unaligned(src + 1),
- v128_load_unaligned(src + 1 + sstride));
- const v256 f = v256_from_v128(v128_load_unaligned(src + 2),
- v128_load_unaligned(src + 2 + sstride));
- const v128 o = calc_delta(v256_from_v128(l1, l2), a, b, c, d, e, f, g, h,
- strength, adjdamp);
-
- v64_store_aligned(dst, v128_high_v64(o));
- v64_store_aligned(dst + dstride, v128_low_v64(o));
- src += sstride * 2;
- dst += dstride * 2;
- }
-}
-
-// Process blocks of width 4, four lines at a time, 8 bit.
-static void SIMD_FUNC(clpf_block4)(uint8_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 4) {
- const v64 l0 = v64_load_aligned(src - 2 * sstride);
- const v64 l1 = v64_load_aligned(src - sstride);
- const v64 l2 = v64_load_aligned(src);
- const v64 l3 = v64_load_aligned(src + sstride);
- const v64 l4 = v64_load_aligned(src + 2 * sstride);
- const v64 l5 = v64_load_aligned(src + 3 * sstride);
- const v64 l6 = v64_load_aligned(src + 4 * sstride);
- const v64 l7 = v64_load_aligned(src + 5 * sstride);
- const v128 o =
- calc_delta(v256_from_v64(l2, l3, l4, l5), v256_from_v64(l0, l1, l2, l3),
- v256_from_v64(l1, l2, l3, l4),
- v256_from_v64(v64_load_unaligned(src - 2),
- v64_load_unaligned(src + sstride - 2),
- v64_load_unaligned(src + 2 * sstride - 2),
- v64_load_unaligned(src + 3 * sstride - 2)),
- v256_from_v64(v64_load_unaligned(src - 1),
- v64_load_unaligned(src + sstride - 1),
- v64_load_unaligned(src + 2 * sstride - 1),
- v64_load_unaligned(src + 3 * sstride - 1)),
- v256_from_v64(v64_load_unaligned(src + 1),
- v64_load_unaligned(src + sstride + 1),
- v64_load_unaligned(src + 2 * sstride + 1),
- v64_load_unaligned(src + 3 * sstride + 1)),
- v256_from_v64(v64_load_unaligned(src + 2),
- v64_load_unaligned(src + sstride + 2),
- v64_load_unaligned(src + 2 * sstride + 2),
- v64_load_unaligned(src + 3 * sstride + 2)),
- v256_from_v64(l3, l4, l5, l6), v256_from_v64(l4, l5, l6, l7),
- strength, adjdamp);
-
- u32_store_aligned(dst, v128_low_u32(v128_shr_n_byte(o, 12)));
- u32_store_aligned(dst + dstride, v128_low_u32(v128_shr_n_byte(o, 8)));
- u32_store_aligned(dst + 2 * dstride, v128_low_u32(v128_shr_n_byte(o, 4)));
- u32_store_aligned(dst + 3 * dstride, v128_low_u32(o));
-
- dst += 4 * dstride;
- src += 4 * sstride;
- }
-}
-
-static void SIMD_FUNC(clpf_hblock8)(uint8_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 2) {
- const v256 x = v256_from_v128(v128_load_aligned(src),
- v128_load_aligned(src + sstride));
- const v256 a = v256_from_v128(v128_load_unaligned(src - 2),
- v128_load_unaligned(src - 2 + sstride));
- const v256 b = v256_from_v128(v128_load_unaligned(src - 1),
- v128_load_unaligned(src - 1 + sstride));
- const v256 c = v256_from_v128(v128_load_unaligned(src + 1),
- v128_load_unaligned(src + 1 + sstride));
- const v256 d = v256_from_v128(v128_load_unaligned(src + 2),
- v128_load_unaligned(src + 2 + sstride));
- const v128 o = calc_hdelta(x, a, b, c, d, strength, adjdamp);
-
- v64_store_aligned(dst, v128_high_v64(o));
- v64_store_aligned(dst + dstride, v128_low_v64(o));
- src += sstride * 2;
- dst += dstride * 2;
- }
-}
-
-// Process blocks of width 4, four lines at a time, 8 bit.
-static void SIMD_FUNC(clpf_hblock4)(uint8_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 4) {
- const v256 a = v256_from_v64(v64_load_unaligned(src - 2),
- v64_load_unaligned(src + sstride - 2),
- v64_load_unaligned(src + 2 * sstride - 2),
- v64_load_unaligned(src + 3 * sstride - 2));
- const v256 b = v256_from_v64(v64_load_unaligned(src - 1),
- v64_load_unaligned(src + sstride - 1),
- v64_load_unaligned(src + 2 * sstride - 1),
- v64_load_unaligned(src + 3 * sstride - 1));
- const v256 c = v256_from_v64(v64_load_unaligned(src + 1),
- v64_load_unaligned(src + sstride + 1),
- v64_load_unaligned(src + 2 * sstride + 1),
- v64_load_unaligned(src + 3 * sstride + 1));
- const v256 d = v256_from_v64(v64_load_unaligned(src + 2),
- v64_load_unaligned(src + sstride + 2),
- v64_load_unaligned(src + 2 * sstride + 2),
- v64_load_unaligned(src + 3 * sstride + 2));
-
- const v128 o = calc_hdelta(
- v256_from_v64(v64_load_aligned(src), v64_load_aligned(src + sstride),
- v64_load_aligned(src + 2 * sstride),
- v64_load_aligned(src + 3 * sstride)),
- a, b, c, d, strength, adjdamp);
-
- u32_store_aligned(dst, v128_low_u32(v128_shr_n_byte(o, 12)));
- u32_store_aligned(dst + dstride, v128_low_u32(v128_shr_n_byte(o, 8)));
- u32_store_aligned(dst + 2 * dstride, v128_low_u32(v128_shr_n_byte(o, 4)));
- u32_store_aligned(dst + 3 * dstride, v128_low_u32(o));
-
- dst += 4 * dstride;
- src += 4 * sstride;
- }
-}
-
-void SIMD_FUNC(aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int dmp) {
- if ((sizex != 4 && sizex != 8) || ((sizey & 3) && sizex == 4)) {
- // Fallback to C for odd sizes:
- // * block widths not 4 or 8
- // * block heights not a multiple of 4 if the block width is 4
- aom_clpf_block_c(dst, src, dstride, sstride, sizex, sizey, strength, dmp);
- } else {
- (sizex == 4 ? SIMD_FUNC(clpf_block4) : SIMD_FUNC(clpf_block8))(
- dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
- }
-}
-
-void SIMD_FUNC(aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int dmp) {
- if ((sizex != 4 && sizex != 8) || ((sizey & 3) && sizex == 4)) {
- // Fallback to C for odd sizes:
- // * block widths not 4 or 8
- // * block heights not a multiple of 4 if the block width is 4
- aom_clpf_hblock_c(dst, src, dstride, sstride, sizex, sizey, strength, dmp);
- } else {
- (sizex == 4 ? SIMD_FUNC(clpf_hblock4) : SIMD_FUNC(clpf_hblock8))(
- dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
- }
-}
-
-// delta = 1/16 * constrain(a, x, s, d) + 3/16 * constrain(b, x, s, d) +
-// 1/16 * constrain(c, x, s, d) + 3/16 * constrain(d, x, s, d) +
-// 3/16 * constrain(e, x, s, d) + 1/16 * constrain(f, x, s, d) +
-// 3/16 * constrain(g, x, s, d) + 1/16 * constrain(h, x, s, d)
-SIMD_INLINE v128 calc_delta_hbd(v128 x, v128 a, v128 b, v128 c, v128 d, v128 e,
- v128 f, v128 g, v128 h, unsigned int s,
- unsigned int dmp) {
- const v128 bdeg = v128_add_16(
- v128_add_16(constrain16(b, x, s, dmp), constrain16(d, x, s, dmp)),
- v128_add_16(constrain16(e, x, s, dmp), constrain16(g, x, s, dmp)));
- const v128 delta = v128_add_16(
- v128_add_16(
- v128_add_16(constrain16(a, x, s, dmp), constrain16(c, x, s, dmp)),
- v128_add_16(constrain16(f, x, s, dmp), constrain16(h, x, s, dmp))),
- v128_add_16(v128_add_16(bdeg, bdeg), bdeg));
- return v128_add_16(
- x,
- v128_shr_s16(
- v128_add_16(v128_dup_16(8),
- v128_add_16(delta, v128_cmplt_s16(delta, v128_zero()))),
- 4));
-}
-
-static void calc_delta_hbd4(v128 o, v128 a, v128 b, v128 c, v128 d, v128 e,
- v128 f, v128 g, v128 h, uint16_t *dst,
- unsigned int s, unsigned int dmp, int dstride) {
- o = calc_delta_hbd(o, a, b, c, d, e, f, g, h, s, dmp);
- v64_store_aligned(dst, v128_high_v64(o));
- v64_store_aligned(dst + dstride, v128_low_v64(o));
-}
-
-static void calc_delta_hbd8(v128 o, v128 a, v128 b, v128 c, v128 d, v128 e,
- v128 f, v128 g, v128 h, uint16_t *dst,
- unsigned int s, unsigned int adjdamp) {
- v128_store_aligned(dst,
- calc_delta_hbd(o, a, b, c, d, e, f, g, h, s, adjdamp));
-}
-
-// delta = 1/16 * constrain(a, x, s, dmp) + 3/16 * constrain(b, x, s, dmp) +
-// 3/16 * constrain(c, x, s, dmp) + 1/16 * constrain(d, x, s, dmp)
-SIMD_INLINE v128 calc_hdelta_hbd(v128 x, v128 a, v128 b, v128 c, v128 d,
- unsigned int s, unsigned int dmp) {
- const v128 bc =
- v128_add_16(constrain16(b, x, s, dmp), constrain16(c, x, s, dmp));
- const v128 delta = v128_add_16(
- v128_add_16(constrain16(a, x, s, dmp), constrain16(d, x, s, dmp)),
- v128_add_16(v128_add_16(bc, bc), bc));
- return v128_add_16(
- x,
- v128_shr_s16(
- v128_add_16(v128_dup_16(4),
- v128_add_16(delta, v128_cmplt_s16(delta, v128_zero()))),
- 3));
-}
-
-static void calc_hdelta_hbd4(v128 o, v128 a, v128 b, v128 c, v128 d,
- uint16_t *dst, unsigned int s,
- unsigned int adjdamp, int dstride) {
- o = calc_hdelta_hbd(o, a, b, c, d, s, adjdamp);
- v64_store_aligned(dst, v128_high_v64(o));
- v64_store_aligned(dst + dstride, v128_low_v64(o));
-}
-
-static void calc_hdelta_hbd8(v128 o, v128 a, v128 b, v128 c, v128 d,
- uint16_t *dst, unsigned int s,
- unsigned int adjdamp) {
- v128_store_aligned(dst, calc_hdelta_hbd(o, a, b, c, d, s, adjdamp));
-}
-
-// Process blocks of width 4, two lines at time.
-static void SIMD_FUNC(clpf_block_hbd4)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 2) {
- const v64 l1 = v64_load_aligned(src);
- const v64 l2 = v64_load_aligned(src + sstride);
- const v64 l3 = v64_load_aligned(src - sstride);
- const v64 l4 = v64_load_aligned(src + 2 * sstride);
- const v128 a = v128_from_v64(v64_load_aligned(src - 2 * sstride), l3);
- const v128 b = v128_from_v64(l3, l1);
- const v128 g = v128_from_v64(l2, l4);
- const v128 h = v128_from_v64(l4, v64_load_aligned(src + 3 * sstride));
- const v128 c = v128_from_v64(v64_load_unaligned(src - 2),
- v64_load_unaligned(src - 2 + sstride));
- const v128 d = v128_from_v64(v64_load_unaligned(src - 1),
- v64_load_unaligned(src - 1 + sstride));
- const v128 e = v128_from_v64(v64_load_unaligned(src + 1),
- v64_load_unaligned(src + 1 + sstride));
- const v128 f = v128_from_v64(v64_load_unaligned(src + 2),
- v64_load_unaligned(src + 2 + sstride));
-
- calc_delta_hbd4(v128_from_v64(l1, l2), a, b, c, d, e, f, g, h, dst,
- strength, adjdamp, dstride);
- src += sstride * 2;
- dst += dstride * 2;
- }
-}
-
-// The most simple case. Start here if you need to understand the functions.
-static void SIMD_FUNC(clpf_block_hbd)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y++) {
- const v128 o = v128_load_aligned(src);
- const v128 a = v128_load_aligned(src - 2 * sstride);
- const v128 b = v128_load_aligned(src - 1 * sstride);
- const v128 g = v128_load_aligned(src + sstride);
- const v128 h = v128_load_aligned(src + 2 * sstride);
- const v128 c = v128_load_unaligned(src - 2);
- const v128 d = v128_load_unaligned(src - 1);
- const v128 e = v128_load_unaligned(src + 1);
- const v128 f = v128_load_unaligned(src + 2);
-
- calc_delta_hbd8(o, a, b, c, d, e, f, g, h, dst, strength, adjdamp);
- src += sstride;
- dst += dstride;
- }
-}
-
-// Process blocks of width 4, horizontal filter, two lines at time.
-static void SIMD_FUNC(clpf_hblock_hbd4)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y += 2) {
- const v128 a = v128_from_v64(v64_load_unaligned(src - 2),
- v64_load_unaligned(src - 2 + sstride));
- const v128 b = v128_from_v64(v64_load_unaligned(src - 1),
- v64_load_unaligned(src - 1 + sstride));
- const v128 c = v128_from_v64(v64_load_unaligned(src + 1),
- v64_load_unaligned(src + 1 + sstride));
- const v128 d = v128_from_v64(v64_load_unaligned(src + 2),
- v64_load_unaligned(src + 2 + sstride));
-
- calc_hdelta_hbd4(v128_from_v64(v64_load_unaligned(src),
- v64_load_unaligned(src + sstride)),
- a, b, c, d, dst, strength, adjdamp, dstride);
- src += sstride * 2;
- dst += dstride * 2;
- }
-}
-
-// Process blocks of width 8, horizontal filter, two lines at time.
-static void SIMD_FUNC(clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizey,
- unsigned int strength,
- unsigned int adjdamp) {
- int y;
-
- for (y = 0; y < sizey; y++) {
- const v128 o = v128_load_aligned(src);
- const v128 a = v128_load_unaligned(src - 2);
- const v128 b = v128_load_unaligned(src - 1);
- const v128 c = v128_load_unaligned(src + 1);
- const v128 d = v128_load_unaligned(src + 2);
-
- calc_hdelta_hbd8(o, a, b, c, d, dst, strength, adjdamp);
- src += sstride;
- dst += dstride;
- }
-}
-
-void SIMD_FUNC(aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizex,
- int sizey, unsigned int strength,
- unsigned int dmp) {
- if ((sizex != 4 && sizex != 8) || ((sizey & 1) && sizex == 4)) {
- // Fallback to C for odd sizes:
- // * block width not 4 or 8
- // * block heights not a multiple of 2 if the block width is 4
- aom_clpf_block_hbd_c(dst, src, dstride, sstride, sizex, sizey, strength,
- dmp);
- } else {
- (sizex == 4 ? SIMD_FUNC(clpf_block_hbd4) : SIMD_FUNC(clpf_block_hbd))(
- dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
- }
-}
-
-void SIMD_FUNC(aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizex,
- int sizey, unsigned int strength,
- unsigned int dmp) {
- if ((sizex != 4 && sizex != 8) || ((sizey & 1) && sizex == 4)) {
- // Fallback to C for odd sizes:
- // * block width not 4 or 8
- // * block heights not a multiple of 2 if the block width is 4
- aom_clpf_hblock_hbd_c(dst, src, dstride, sstride, sizex, sizey, strength,
- dmp);
- } else {
- (sizex == 4 ? SIMD_FUNC(clpf_hblock_hbd4) : SIMD_FUNC(clpf_hblock_hbd))(
- dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
- }
-}
diff --git a/av1/common/clpf_sse2.c b/av1/common/clpf_sse2.c
deleted file mode 100644
index e29c2ab..0000000
--- a/av1/common/clpf_sse2.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_sse2
-#include "./clpf_simd.h"
diff --git a/av1/common/clpf_sse4.c b/av1/common/clpf_sse4.c
deleted file mode 100644
index 537139f..0000000
--- a/av1/common/clpf_sse4.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_sse4_1
-#include "./clpf_simd.h"
diff --git a/av1/common/clpf_ssse3.c b/av1/common/clpf_ssse3.c
deleted file mode 100644
index d7ed8de..0000000
--- a/av1/common/clpf_ssse3.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_ssse3
-#include "./clpf_simd.h"
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 2a31d39..436506a 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1034,12 +1034,7 @@
#if CONFIG_INTRABC
if (cm->allow_intrabc && NO_FILTER_FOR_IBC) return;
#endif // CONFIG_INTRABC
-#if CONFIG_CDEF_SINGLEPASS
cm->cdef_pri_damping = cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
-#else
- cm->cdef_pri_damping = aom_rb_read_literal(rb, 1) + 5;
- cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
-#endif
cm->cdef_bits = aom_rb_read_literal(rb, 2);
cm->nb_cdef_strengths = 1 << cm->cdef_bits;
for (int i = 0; i < cm->nb_cdef_strengths; i++) {
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index a0f67e1..99680b3 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2501,13 +2501,8 @@
if (cm->allow_intrabc && NO_FILTER_FOR_IBC) return;
#endif // CONFIG_INTRABC
int i;
-#if CONFIG_CDEF_SINGLEPASS
aom_wb_write_literal(wb, cm->cdef_pri_damping - 3, 2);
assert(cm->cdef_pri_damping == cm->cdef_sec_damping);
-#else
- aom_wb_write_literal(wb, cm->cdef_pri_damping - 5, 1);
- aom_wb_write_literal(wb, cm->cdef_sec_damping - 3, 2);
-#endif
aom_wb_write_literal(wb, cm->cdef_bits, 2);
for (i = 0; i < cm->nb_cdef_strengths; i++) {
aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index 09fe563..be97b65 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -68,16 +68,11 @@
uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
int fast) {
uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
-#if !CONFIG_CDEF_SINGLEPASS
- const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
-#endif
int i, j;
uint64_t best_tot_mse = (uint64_t)1 << 63;
int best_id0 = 0;
int best_id1 = 0;
-#if CONFIG_CDEF_SINGLEPASS
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
-#endif
memset(tot_mse, 0, sizeof(tot_mse));
for (i = 0; i < sb_count; i++) {
int gi;
@@ -314,11 +309,7 @@
int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
uint64_t(*mse[2])[TOTAL_STRENGTHS];
-#if CONFIG_CDEF_SINGLEPASS
int pri_damping = 3 + (cm->base_qindex >> 6);
-#else
- int pri_damping = 6;
-#endif
int sec_damping = 3 + (cm->base_qindex >> 6);
int i;
int nb_strengths;
@@ -449,7 +440,6 @@
int xsize = (nhb << mi_wide_l2[pli]) +
CDEF_HBORDER * (fbc != nhfb - 1) + xoff;
sec_strength = gi % CDEF_SEC_STRENGTHS;
-#if CONFIG_CDEF_SINGLEPASS
copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
src[pli],
(fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
@@ -459,19 +449,6 @@
dir, &dirinit, var, pli, dlist, cdef_count, threshold,
sec_strength + (sec_strength == 3), pri_damping,
sec_damping, coeff_shift);
-#else
- if (sec_strength == 0)
- copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
- src[pli],
- (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
- (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
- stride[pli], ysize, xsize);
- cdef_filter_fb(sec_strength ? NULL : (uint8_t *)in, CDEF_BSTRIDE,
- tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var,
- pli, dlist, cdef_count, threshold,
- sec_strength + (sec_strength == 3), sec_damping,
- pri_damping, coeff_shift, sec_strength != 0, 1);
-#endif
curr_mse = compute_cdef_dist(
ref_coeff[pli] +
(fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 2bab3ec..6419acb 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -110,7 +110,6 @@
set(CONFIG_AMVR 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_AOM_QM 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_BGSPRITE 0 CACHE NUMBER "AV1 experiment flag.")
-set(CONFIG_CDEF_SINGLEPASS 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CFL 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_COLORSPACE_HEADERS 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DAALA_TX 0 CACHE NUMBER "AV1 experiment flag.")
diff --git a/configure b/configure
index 2ffeafe..ac58848 100755
--- a/configure
+++ b/configure
@@ -248,7 +248,6 @@
"
EXPERIMENT_LIST="
fp_mb_stats
- cdef_singlepass
rect_tx_ext
rect_tx_ext_intra
short_filter
@@ -491,7 +490,6 @@
# Enable adopted experiments by default
soft_enable adopted_experiments
if enabled adopted_experiments; then
- soft_enable cdef_singlepass
soft_enable ext_intra
soft_enable intra_edge
soft_enable mv_compress
diff --git a/test/clpf_test.cc b/test/clpf_test.cc
deleted file mode 100644
index ecb0428..0000000
--- a/test/clpf_test.cc
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/common/cdef_block.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-typedef void (*clpf_block_t)(uint8_t *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int bitdepth);
-
-typedef std::tr1::tuple<clpf_block_t, clpf_block_t, int, int>
- clpf_block_param_t;
-
-class CDEFClpfBlockTest : public ::testing::TestWithParam<clpf_block_param_t> {
- public:
- virtual ~CDEFClpfBlockTest() {}
- virtual void SetUp() {
- clpf = GET_PARAM(0);
- ref_clpf = GET_PARAM(1);
- sizex = GET_PARAM(2);
- sizey = GET_PARAM(3);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- int sizex;
- int sizey;
- clpf_block_t clpf;
- clpf_block_t ref_clpf;
-};
-
-typedef CDEFClpfBlockTest CDEFClpfSpeedTest;
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*clpf_block_hbd_t)(uint16_t *dst, const uint16_t *src,
- int dstride, int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int bitdepth);
-
-typedef std::tr1::tuple<clpf_block_hbd_t, clpf_block_hbd_t, int, int>
- clpf_block_hbd_param_t;
-
-class CDEFClpfBlockHbdTest
- : public ::testing::TestWithParam<clpf_block_hbd_param_t> {
- public:
- virtual ~CDEFClpfBlockHbdTest() {}
- virtual void SetUp() {
- clpf = GET_PARAM(0);
- ref_clpf = GET_PARAM(1);
- sizex = GET_PARAM(2);
- sizey = GET_PARAM(3);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- int sizex;
- int sizey;
- clpf_block_hbd_t clpf;
- clpf_block_hbd_t ref_clpf;
-};
-
-typedef CDEFClpfBlockHbdTest ClpfHbdSpeedTest;
-#endif
-
-template <typename pixel>
-void test_clpf(int w, int h, unsigned int depth, unsigned int iterations,
- void (*clpf)(pixel *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int bitdepth),
- void (*ref_clpf)(pixel *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int bitdepth)) {
- const int size = 24;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, s[size * size]);
- DECLARE_ALIGNED(16, pixel, d[size * size]);
- DECLARE_ALIGNED(16, pixel, ref_d[size * size]);
- memset(ref_d, 0, size * size * sizeof(*ref_d));
- memset(d, 0, size * size * sizeof(*d));
-
- int error = 0, pos = 0, xpos = 8, ypos = 8;
- unsigned int strength = 0, bits, level, count, damp = 0, boundary = 0;
-
- assert(size >= w + 16 && size >= h + 16);
- assert(depth >= 8);
-
- // Test every combination of:
- // * Input with up to <depth> bits of noise
- // * Noise level around every value from 0 to (1<<depth)-1
- // * All strengths
- // * All dampings
- // * Boundaries
- // If clpf and ref_clpf are the same, we're just testing speed
- for (boundary = 0; boundary < 16; boundary++) {
- for (count = 0; count < iterations; count++) {
- for (level = 0; level < (1U << depth) && !error;
- level += (1 + 4 * !!boundary) << (depth - 8)) {
- for (bits = 1; bits <= depth && !error; bits++) {
- for (damp = 4 + depth - 8; damp < depth - 1 && !error; damp++) {
- for (int i = 0; i < size * size; i++)
- s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
- (1 << depth) - 1);
- if (boundary) {
- if (boundary & 1) { // Left
- for (int i = 0; i < size; i++)
- for (int j = 0; j < xpos; j++)
- s[i * size + j] = CDEF_VERY_LARGE;
- }
- if (boundary & 2) { // Right
- for (int i = 0; i < size; i++)
- for (int j = xpos + w; j < size; j++)
- s[i * size + j] = CDEF_VERY_LARGE;
- }
- if (boundary & 4) { // Above
- for (int i = 0; i < ypos; i++)
- for (int j = 0; j < size; j++)
- s[i * size + j] = CDEF_VERY_LARGE;
- }
- if (boundary & 8) { // Below
- for (int i = ypos + h; i < size; i++)
- for (int j = 0; j < size; j++)
- s[i * size + j] = CDEF_VERY_LARGE;
- }
- }
- for (strength = depth - 8; strength < depth - 5 && !error;
- strength += !error) {
- ref_clpf(ref_d + ypos * size + xpos, s + ypos * size + xpos, size,
- size, w, h, 1 << strength, damp);
- if (clpf != ref_clpf)
- ASM_REGISTER_STATE_CHECK(clpf(d + ypos * size + xpos,
- s + ypos * size + xpos, size,
- size, w, h, 1 << strength, damp));
- if (ref_clpf != clpf) {
- for (pos = 0; pos < size * size && !error; pos++) {
- error = ref_d[pos] != d[pos];
- }
- }
- }
- }
- }
- }
- }
- }
-
- pos--;
- EXPECT_EQ(0, error)
- << "Error: CDEFClpfBlockTest, SIMD and C mismatch." << std::endl
- << "First error at " << pos % size << "," << pos / size << " ("
- << (int16_t)ref_d[pos] << " != " << (int16_t)d[pos] << ") " << std::endl
- << "strength: " << (1 << strength) << std::endl
- << "damping: " << damp << std::endl
- << "depth: " << depth << std::endl
- << "boundary: " << boundary << std::endl
- << "w: " << w << std::endl
- << "h: " << h << std::endl
- << "A=" << (pos > 2 * size ? (int16_t)s[pos - 2 * size] : -1) << std::endl
- << "B=" << (pos > size ? (int16_t)s[pos - size] : -1) << std::endl
- << "C=" << (pos % size - 2 >= 0 ? (int16_t)s[pos - 2] : -1) << std::endl
- << "D=" << (pos % size - 1 >= 0 ? (int16_t)s[pos - 1] : -1) << std::endl
- << "X=" << (int16_t)s[pos] << std::endl
- << "E=" << (pos % size + 1 < size ? (int16_t)s[pos + 1] : -1) << std::endl
- << "F=" << (pos % size + 2 < size ? (int16_t)s[pos + 2] : -1) << std::endl
- << "G=" << (pos + size < size * size ? (int16_t)s[pos + size] : -1)
- << std::endl
- << "H="
- << (pos + 2 * size < size * size ? (int16_t)s[pos + 2 * size] : -1)
- << std::endl;
-}
-
-template <typename pixel>
-void test_clpf_speed(int w, int h, unsigned int depth, unsigned int iterations,
- void (*clpf)(pixel *dst, const uint16_t *src, int dstride,
- int sstride, int sizex, int sizey,
- unsigned int strength, unsigned int bitdepth),
- void (*ref_clpf)(pixel *dst, const uint16_t *src,
- int dstride, int sstride, int sizex,
- int sizey, unsigned int strength,
- unsigned int bitdepth)) {
- aom_usec_timer ref_timer;
- aom_usec_timer timer;
-
- aom_usec_timer_start(&ref_timer);
- test_clpf(w, h, depth, iterations, ref_clpf, ref_clpf);
- aom_usec_timer_mark(&ref_timer);
- int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer_start(&timer);
- test_clpf(w, h, depth, iterations, clpf, clpf);
- aom_usec_timer_mark(&timer);
- int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
-#if 0
- std::cout << "[ ] C time = " << ref_elapsed_time / 1000
- << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
- EXPECT_GT(ref_elapsed_time, elapsed_time)
- << "Error: CDEFClpfSpeedTest, SIMD slower than C." << std::endl
- << "C time: " << ref_elapsed_time << " us" << std::endl
- << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-TEST_P(CDEFClpfBlockTest, TestSIMDNoMismatch) {
- test_clpf(sizex, sizey, 8, 1, clpf, ref_clpf);
-}
-
-TEST_P(CDEFClpfSpeedTest, DISABLED_TestSpeed) {
- test_clpf_speed(sizex, sizey, 8, 16, clpf, ref_clpf);
-}
-
-#if CONFIG_HIGHBITDEPTH
-TEST_P(CDEFClpfBlockHbdTest, TestSIMDNoMismatch) {
- test_clpf(sizex, sizey, 12, 1, clpf, ref_clpf);
-}
-
-TEST_P(ClpfHbdSpeedTest, DISABLED_TestSpeed) {
- test_clpf_speed(sizex, sizey, 12, 4, clpf, ref_clpf);
-}
-#endif
-
-using std::tr1::make_tuple;
-
-// VS compiling for 32 bit targets does not support vector types in
-// structs as arguments, which makes the v256 type of the intrinsics
-// hard to support, so optimizations for this target are disabled.
-#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
-// Test all supported architectures and block sizes
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, CDEFClpfBlockTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8),
- make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 4),
- make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 8),
- make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 4),
- make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, 8),
- make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, 4),
- make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 4, 8),
- make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, CDEFClpfBlockTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 8),
- make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 4),
- make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 8),
- make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 4),
- make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 8, 8),
- make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 8, 4),
- make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 4, 8),
- make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, CDEFClpfBlockTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 8),
- make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 4),
- make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 8),
- make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 4),
- make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 8, 8),
- make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 8, 4),
- make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 4, 8),
- make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, CDEFClpfBlockTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8),
- make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 4),
- make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 8),
- make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 4),
- make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, 8),
- make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, 4),
- make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 4, 8),
- make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if CONFIG_HIGHBITDEPTH
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, CDEFClpfBlockHbdTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8),
- make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 4),
- make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 8),
- make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 4),
- make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 8),
- make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 4),
- make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 4, 8),
- make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, CDEFClpfBlockHbdTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8),
- make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 4),
- make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 8),
- make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 4),
- make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 8),
- make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 4),
- make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 4, 8),
- make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, CDEFClpfBlockHbdTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8),
- make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 4),
- make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 8),
- make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 4),
- make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 8),
- make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 4),
- make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 4, 8),
- make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, CDEFClpfBlockHbdTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8),
- make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 4),
- make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 8),
- make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 4),
- make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 8),
- make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 4),
- make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 4, 8),
- make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-#endif // CONFIG_HIGHBITDEPTH
-
-// Test speed for all supported architectures
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, CDEFClpfSpeedTest,
- ::testing::Values(make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8),
- make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8,
- 8)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFClpfSpeedTest,
- ::testing::Values(make_tuple(&aom_clpf_block_ssse3,
- &aom_clpf_block_c, 8, 8),
- make_tuple(&aom_clpf_hblock_ssse3,
- &aom_clpf_hblock_c, 8,
- 8)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFClpfSpeedTest,
- ::testing::Values(make_tuple(&aom_clpf_block_sse4_1,
- &aom_clpf_block_c, 8, 8),
- make_tuple(&aom_clpf_hblock_sse4_1,
- &aom_clpf_hblock_c, 8,
- 8)));
-
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, CDEFClpfSpeedTest,
- ::testing::Values(make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8),
- make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8,
- 8)));
-#endif
-
-#if CONFIG_HIGHBITDEPTH
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
- SSE2, ClpfHbdSpeedTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8),
- make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
- SSSE3, ClpfHbdSpeedTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8),
- make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, ClpfHbdSpeedTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8),
- make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
- NEON, ClpfHbdSpeedTest,
- ::testing::Values(
- make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8),
- make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-#endif // CONFIG_HIGHBITDEPTH
-#endif // defined(_WIN64) || !defined(_MSC_VER)
-
-} // namespace
diff --git a/test/dering_test.cc b/test/dering_test.cc
deleted file mode 100644
index 6b76561..0000000
--- a/test/dering_test.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/common/cdef_block.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-typedef std::tr1::tuple<cdef_direction_func, cdef_direction_func, int>
- dering_dir_param_t;
-
-class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> {
- public:
- virtual ~CDEFDeringDirTest() {}
- virtual void SetUp() {
- dering = GET_PARAM(0);
- ref_dering = GET_PARAM(1);
- bsize = GET_PARAM(2);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- int bsize;
- cdef_direction_func dering;
- cdef_direction_func ref_dering;
-};
-
-typedef CDEFDeringDirTest CDEFDeringSpeedTest;
-
-void test_dering(int bsize, int iterations, cdef_direction_func dering,
- cdef_direction_func ref_dering) {
- const int size = 8;
- const int ysize = size + 2 * CDEF_VBORDER;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
- DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
- DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
- memset(ref_d, 0, sizeof(ref_d));
- memset(d, 0, sizeof(d));
-
- int error = 0, threshold = 0, dir;
- int boundary, damping, depth, bits, level, count,
- errdepth = 0, errthreshold = 0, errboundary = 0, errdamping = 0;
- unsigned int pos = 0;
-
- for (boundary = 0; boundary < 16; boundary++) {
- for (depth = 8; depth <= 12; depth += 2) {
- for (damping = 5 + depth - 8; damping < 7 + depth - 8; damping++) {
- for (count = 0; count < iterations; count++) {
- for (level = 0; level < (1 << depth) && !error;
- level += (1 + 4 * !!boundary) << (depth - 8)) {
- for (bits = 1; bits <= depth && !error; bits++) {
- for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
- s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
- (1 << depth) - 1);
- if (boundary) {
- if (boundary & 1) { // Left
- for (int i = 0; i < ysize; i++)
- for (int j = 0; j < CDEF_HBORDER; j++)
- s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
- }
- if (boundary & 2) { // Right
- for (int i = 0; i < ysize; i++)
- for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
- s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
- }
- if (boundary & 4) { // Above
- for (int i = 0; i < CDEF_VBORDER; i++)
- for (int j = 0; j < CDEF_BSTRIDE; j++)
- s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
- }
- if (boundary & 8) { // Below
- for (int i = CDEF_VBORDER + size; i < ysize; i++)
- for (int j = 0; j < CDEF_BSTRIDE; j++)
- s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
- }
- }
- for (dir = 0; dir < 8; dir++) {
- for (threshold = 0; threshold < 64 << (depth - 8) && !error;
- threshold += (1 + 4 * !!boundary) << (depth - 8)) {
- ref_dering(ref_d, size,
- s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
- threshold, dir, damping);
- // If dering and ref_dering are the same, we're just testing
- // speed
- if (dering != ref_dering)
- ASM_REGISTER_STATE_CHECK(dering(
- d, size, s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
- threshold, dir, damping));
- if (ref_dering != dering) {
- for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error;
- pos++) {
- error = ref_d[pos] != d[pos];
- errdepth = depth;
- errthreshold = threshold;
- errboundary = boundary;
- errdamping = damping;
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
-
- pos--;
- EXPECT_EQ(0, error) << "Error: CDEFDeringDirTest, SIMD and C mismatch."
- << std::endl
- << "First error at " << pos % size << "," << pos / size
- << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
- << ") " << std::endl
- << "threshold: " << errthreshold << std::endl
- << "damping: " << errdamping << std::endl
- << "depth: " << errdepth << std::endl
- << "size: " << bsize << std::endl
- << "boundary: " << errboundary << std::endl
- << std::endl;
-}
-
-void test_dering_speed(int bsize, int iterations, cdef_direction_func dering,
- cdef_direction_func ref_dering) {
- aom_usec_timer ref_timer;
- aom_usec_timer timer;
-
- aom_usec_timer_start(&ref_timer);
- test_dering(bsize, iterations, ref_dering, ref_dering);
- aom_usec_timer_mark(&ref_timer);
- int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer_start(&timer);
- test_dering(bsize, iterations, dering, dering);
- aom_usec_timer_mark(&timer);
- int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
-#if 0
- std::cout << "[ ] C time = " << ref_elapsed_time / 1000
- << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
- EXPECT_GT(ref_elapsed_time, elapsed_time)
- << "Error: CDEFDeringSpeedTest, SIMD slower than C." << std::endl
- << "C time: " << ref_elapsed_time << " us" << std::endl
- << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
- int coeff_shift);
-
-typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t;
-
-class CDEFDeringFindDirTest
- : public ::testing::TestWithParam<find_dir_param_t> {
- public:
- virtual ~CDEFDeringFindDirTest() {}
- virtual void SetUp() {
- finddir = GET_PARAM(0);
- ref_finddir = GET_PARAM(1);
- }
-
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- find_dir_t finddir;
- find_dir_t ref_finddir;
-};
-
-typedef CDEFDeringFindDirTest CDEFDeringFindDirSpeedTest;
-
-void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
- int coeff_shift),
- int (*ref_finddir)(const uint16_t *img, int stride,
- int32_t *var, int coeff_shift)) {
- const int size = 8;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED(16, uint16_t, s[size * size]);
-
- int error = 0;
- int depth, bits, level, count, errdepth = 0;
- int ref_res = 0, res = 0;
- int32_t ref_var = 0, var = 0;
-
- for (depth = 8; depth <= 12 && !error; depth += 2) {
- for (count = 0; count < 512 && !error; count++) {
- for (level = 0; level < (1 << depth) && !error;
- level += 1 << (depth - 8)) {
- for (bits = 1; bits <= depth && !error; bits++) {
- for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
- s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
- (1 << depth) - 1);
- for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
- ref_res = ref_finddir(s, size, &ref_var, depth - 8);
- if (finddir != ref_finddir)
- ASM_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
- if (ref_finddir != finddir) {
- if (res != ref_res || var != ref_var) error = 1;
- errdepth = depth;
- }
- }
- }
- }
- }
-
- EXPECT_EQ(0, error) << "Error: CDEFDeringFindDirTest, SIMD and C mismatch."
- << std::endl
- << "return: " << res << " : " << ref_res << std::endl
- << "var: " << var << " : " << ref_var << std::endl
- << "depth: " << errdepth << std::endl
- << std::endl;
-}
-
-void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
- int32_t *var, int coeff_shift),
- int (*ref_finddir)(const uint16_t *img, int stride,
- int32_t *var, int coeff_shift)) {
- aom_usec_timer ref_timer;
- aom_usec_timer timer;
-
- aom_usec_timer_start(&ref_timer);
- test_finddir(ref_finddir, ref_finddir);
- aom_usec_timer_mark(&ref_timer);
- int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
- aom_usec_timer_start(&timer);
- test_finddir(finddir, finddir);
- aom_usec_timer_mark(&timer);
- int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
-#if 0
- std::cout << "[ ] C time = " << ref_elapsed_time / 1000
- << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
- EXPECT_GT(ref_elapsed_time, elapsed_time)
- << "Error: CDEFDeringFindDirSpeedTest, SIMD slower than C." << std::endl
- << "C time: " << ref_elapsed_time << " us" << std::endl
- << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-TEST_P(CDEFDeringDirTest, TestSIMDNoMismatch) {
- test_dering(bsize, 1, dering, ref_dering);
-}
-
-TEST_P(CDEFDeringSpeedTest, DISABLED_TestSpeed) {
- test_dering_speed(bsize, 4, dering, ref_dering);
-}
-
-TEST_P(CDEFDeringFindDirTest, TestSIMDNoMismatch) {
- test_finddir(finddir, ref_finddir);
-}
-
-TEST_P(CDEFDeringFindDirSpeedTest, DISABLED_TestSpeed) {
- test_finddir_speed(finddir, ref_finddir);
-}
-
-using std::tr1::make_tuple;
-
-// VS compiling for 32 bit targets does not support vector types in
-// structs as arguments, which makes the v256 type of the intrinsics
-// hard to support, so optimizations for this target are disabled.
-#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringDirTest,
- ::testing::Values(make_tuple(&cdef_direction_4x4_sse2,
- &cdef_direction_4x4_c, 4),
- make_tuple(&cdef_direction_8x8_sse2,
- &cdef_direction_8x8_c,
- 8)));
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirTest,
- ::testing::Values(make_tuple(&cdef_find_dir_sse2,
- &cdef_find_dir_c)));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringDirTest,
- ::testing::Values(make_tuple(&cdef_direction_4x4_ssse3,
- &cdef_direction_4x4_c, 4),
- make_tuple(&cdef_direction_8x8_ssse3,
- &cdef_direction_8x8_c,
- 8)));
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirTest,
- ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringDirTest,
- ::testing::Values(make_tuple(&cdef_direction_4x4_sse4_1,
- &cdef_direction_4x4_c, 4),
- make_tuple(&cdef_direction_8x8_sse4_1,
- &cdef_direction_8x8_c,
- 8)));
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirTest,
- ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringDirTest,
- ::testing::Values(make_tuple(&cdef_direction_4x4_neon,
- &cdef_direction_4x4_c, 4),
- make_tuple(&cdef_direction_8x8_neon,
- &cdef_direction_8x8_c,
- 8)));
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirTest,
- ::testing::Values(make_tuple(&cdef_find_dir_neon,
- &cdef_find_dir_c)));
-#endif
-
-// Test speed for all supported architectures
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringSpeedTest,
- ::testing::Values(make_tuple(&cdef_direction_4x4_sse2,
- &cdef_direction_4x4_c, 4),
- make_tuple(&cdef_direction_8x8_sse2,
- &cdef_direction_8x8_c,
- 8)));
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirSpeedTest,
- ::testing::Values(make_tuple(&cdef_find_dir_sse2,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringSpeedTest,
- ::testing::Values(make_tuple(&cdef_direction_4x4_ssse3,
- &cdef_direction_4x4_c, 4),
- make_tuple(&cdef_direction_8x8_ssse3,
- &cdef_direction_8x8_c,
- 8)));
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirSpeedTest,
- ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringSpeedTest,
- ::testing::Values(make_tuple(&cdef_direction_4x4_sse4_1,
- &cdef_direction_4x4_c, 4),
- make_tuple(&cdef_direction_8x8_sse4_1,
- &cdef_direction_8x8_c,
- 8)));
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirSpeedTest,
- ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
- &cdef_find_dir_c)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringSpeedTest,
- ::testing::Values(make_tuple(&cdef_direction_4x4_neon,
- &cdef_direction_4x4_c, 4),
- make_tuple(&cdef_direction_8x8_neon,
- &cdef_direction_8x8_c,
- 8)));
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirSpeedTest,
- ::testing::Values(make_tuple(&cdef_find_dir_neon,
- &cdef_find_dir_c)));
-#endif
-
-#endif // defined(_WIN64) || !defined(_MSC_VER)
-} // namespace
diff --git a/test/test.cmake b/test/test.cmake
index 2101800..4a66c20 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -130,16 +130,9 @@
${AOM_UNIT_TEST_ENCODER_SOURCES}
"${AOM_ROOT}/test/motion_vector_test.cc")
- if (CONFIG_CDEF_SINGLEPASS)
- set(AOM_UNIT_TEST_COMMON_SOURCES
- ${AOM_UNIT_TEST_COMMON_SOURCES}
- "${AOM_ROOT}/test/cdef_test.cc")
- else ()
- set(AOM_UNIT_TEST_COMMON_SOURCES
- ${AOM_UNIT_TEST_COMMON_SOURCES}
- "${AOM_ROOT}/test/clpf_test.cc"
- "${AOM_ROOT}/test/dering_test.cc")
- endif ()
+ set(AOM_UNIT_TEST_COMMON_SOURCES
+ ${AOM_UNIT_TEST_COMMON_SOURCES}
+ "${AOM_ROOT}/test/cdef_test.cc")
if (CONFIG_INTRABC)
set(AOM_UNIT_TEST_COMMON_SOURCES
diff --git a/test/test.mk b/test/test.mk
index 29722a7..4d0fad2 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -127,12 +127,7 @@
LIBAOM_TEST_SRCS-yes += convolve_test.cc
LIBAOM_TEST_SRCS-yes += lpf_test.cc
-ifeq ($(CONFIG_CDEF_SINGLEPASS),yes)
LIBAOM_TEST_SRCS-yes += cdef_test.cc
-else
-LIBAOM_TEST_SRCS-yes += dering_test.cc
-LIBAOM_TEST_SRCS-yes += clpf_test.cc
-endif
LIBAOM_TEST_SRCS-yes += simd_cmp_impl.h
LIBAOM_TEST_SRCS-$(HAVE_SSE2) += simd_cmp_sse2.cc
LIBAOM_TEST_SRCS-$(HAVE_SSSE3) += simd_cmp_ssse3.cc