Remove CDEF_SINGLEPASS defines

The experiment has been adopted and has been enabled by default for a
while and the alternative code path has not been maintained for a long
time, which is now removed.

Change-Id: Iaf22f2969b45b71b2bf67707e131ab4c439b7fa6
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 107f852..b08086e 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -279,30 +279,6 @@
     ${AOM_AV1_COMMON_INTRIN_NEON}
     "${AOM_ROOT}/av1/common/cdef_block_neon.c")
 
-if (NOT CONFIG_CDEF_SINGLEPASS)
-  set(AOM_AV1_COMMON_SOURCES
-      ${AOM_AV1_COMMON_SOURCES}
-      "${AOM_ROOT}/av1/common/clpf.c"
-      "${AOM_ROOT}/av1/common/clpf_simd.h"
-      "${AOM_ROOT}/av1/common/cdef_block_simd.h")
-
-  set(AOM_AV1_COMMON_INTRIN_SSE2
-      ${AOM_AV1_COMMON_INTRIN_SSE2}
-      "${AOM_ROOT}/av1/common/clpf_sse2.c")
-
-  set(AOM_AV1_COMMON_INTRIN_SSSE3
-      ${AOM_AV1_COMMON_INTRIN_SSSE3}
-      "${AOM_ROOT}/av1/common/clpf_ssse3.c")
-
-  set(AOM_AV1_COMMON_INTRIN_SSE4_1
-      ${AOM_AV1_COMMON_INTRIN_SSE4_1}
-      "${AOM_ROOT}/av1/common/clpf_sse4.c")
-
-  set(AOM_AV1_COMMON_INTRIN_NEON
-      ${AOM_AV1_COMMON_INTRIN_NEON}
-      "${AOM_ROOT}/av1/common/clpf_neon.c")
-endif ()
-
 set(AOM_AV1_COMMON_INTRIN_SSE2
     ${AOM_AV1_COMMON_INTRIN_SSE2}
     "${AOM_ROOT}/av1/common/x86/convolve_2d_sse2.c")
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index ad3ff2e..01a2e99 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -94,16 +94,7 @@
 endif
 AV1_COMMON_SRCS-yes += common/warped_motion.h
 AV1_COMMON_SRCS-yes += common/warped_motion.c
-ifeq ($(CONFIG_CDEF_SINGLEPASS),yes)
 AV1_COMMON_SRCS-$(HAVE_AVX2) += common/cdef_block_avx2.c
-else
-AV1_COMMON_SRCS-yes += common/clpf.c
-AV1_COMMON_SRCS-yes += common/clpf_simd.h
-AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c
-AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c
-AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c
-AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c
-endif
 AV1_COMMON_SRCS-$(HAVE_SSE2) += common/cdef_block_sse2.c
 AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/cdef_block_ssse3.c
 AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/cdef_block_sse4.c
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index b19fe1c..9c1cf66 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -499,20 +499,7 @@
 # Deringing Functions
 
 add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
-if (aom_config("CONFIG_CDEF_SINGLEPASS") ne "yes") {
-  add_proto qw/void aom_clpf_block_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
-  add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
-  add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
-  add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
-  add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
-  add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
-  add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
-  add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
-  add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
-  add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
-} else {
-  add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift";
-}
+add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max, int coeff_shift";
 
 add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
 add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
@@ -521,28 +508,10 @@
 # structs as arguments, which makes the v256 type of the intrinsics
 # hard to support, so optimizations for this target are disabled.
 if ($opts{config} !~ /libs-x86-win32-vs.*/) {
-  if (aom_config("CONFIG_CDEF_SINGLEPASS") eq "yes") {
-    specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
-    specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
-    specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
-    specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
-  } else {
-    specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
-    specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
-    specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
-    specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
-    specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
-    specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
-    specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
-    specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
-
-    specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
-    specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
-    specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
-    specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
-    specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
-    specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
-  }
+  specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
+  specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
+  specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
+  specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
 }
 
 # WARPED_MOTION / GLOBAL_MOTION functions
diff --git a/av1/common/cdef.c b/av1/common/cdef.c
index c53b7a2..ed768cd 100644
--- a/av1/common/cdef.c
+++ b/av1/common/cdef.c
@@ -281,9 +281,6 @@
 
       curr_row_cdef[fbc] = 1;
       for (int pli = 0; pli < nplanes; pli++) {
-#if !CONFIG_CDEF_SINGLEPASS
-        DECLARE_ALIGNED(16, uint16_t, dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE]);
-#endif
         int coffset;
         int rend, cend;
         int pri_damping = cm->cdef_pri_damping;
@@ -399,27 +396,16 @@
 #if CONFIG_HIGHBITDEPTH
         if (cm->use_highbitdepth) {
           cdef_filter_fb(
-#if CONFIG_CDEF_SINGLEPASS
               NULL,
-              &CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
-#else
-              (uint8_t *)&CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
-#endif
-                  [xd->plane[pli].dst.stride *
-                       (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
-                   (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
-#if CONFIG_CDEF_SINGLEPASS
+              &CONVERT_TO_SHORTPTR(
+                  xd->plane[pli]
+                      .dst.buf)[xd->plane[pli].dst.stride *
+                                    (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
+                                (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
               xd->plane[pli].dst.stride,
-#else
-              xd->plane[pli].dst.stride, dst,
-#endif
               &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
               ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
-#if CONFIG_CDEF_SINGLEPASS
               sec_strength, pri_damping, sec_damping, coeff_shift);
-#else
-              sec_strength, sec_damping, pri_damping, coeff_shift, 0, 1);
-#endif
         } else {
 #endif
           cdef_filter_fb(
@@ -427,18 +413,10 @@
                    .dst.buf[xd->plane[pli].dst.stride *
                                 (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
                             (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
-#if CONFIG_CDEF_SINGLEPASS
               NULL, xd->plane[pli].dst.stride,
-#else
-            xd->plane[pli].dst.stride, dst,
-#endif
               &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
               ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
-#if CONFIG_CDEF_SINGLEPASS
               sec_strength, pri_damping, sec_damping, coeff_shift);
-#else
-            sec_strength, sec_damping, pri_damping, coeff_shift, 0, 0);
-#endif
 
 #if CONFIG_HIGHBITDEPTH
         }
diff --git a/av1/common/cdef_block.c b/av1/common/cdef_block.c
index 3b04d6a..be5d6bd 100644
--- a/av1/common/cdef_block.c
+++ b/av1/common/cdef_block.c
@@ -21,7 +21,7 @@
 #include "./cdef.h"
 
 /* Generated from gen_filter_tables.c. */
-#if !CONFIG_CDEF_SINGLEPASS || CDEF_FULL
+#if CDEF_FULL
 DECLARE_ALIGNED(16, const int, cdef_directions[8][3]) = {
   { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
   { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
@@ -123,7 +123,6 @@
   return best_dir;
 }
 
-#if CONFIG_CDEF_SINGLEPASS
 #if CDEF_FULL
 const int cdef_pri_taps[2][3] = { { 3, 2, 1 }, { 2, 2, 2 } };
 const int cdef_sec_taps[2][2] = { { 3, 1 }, { 3, 1 } };
@@ -209,67 +208,6 @@
   }
 }
 
-#else
-
-/* Smooth in the direction detected. */
-void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in,
-                          int threshold, int dir, int damping) {
-  int i;
-  int j;
-  int k;
-  static const int taps[3] = { 3, 2, 1 };
-  for (i = 0; i < 8; i++) {
-    for (j = 0; j < 8; j++) {
-      int16_t sum;
-      int16_t xx;
-      int16_t yy;
-      xx = in[i * CDEF_BSTRIDE + j];
-      sum = 0;
-      for (k = 0; k < 3; k++) {
-        int16_t p0;
-        int16_t p1;
-        p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
-        p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
-        sum += taps[k] * constrain(p0, threshold, damping);
-        sum += taps[k] * constrain(p1, threshold, damping);
-      }
-      sum = (sum + 8) >> 4;
-      yy = xx + sum;
-      y[i * ystride + j] = yy;
-    }
-  }
-}
-
-/* Smooth in the direction detected. */
-void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in,
-                          int threshold, int dir, int damping) {
-  int i;
-  int j;
-  int k;
-  static const int taps[2] = { 4, 1 };
-  for (i = 0; i < 4; i++) {
-    for (j = 0; j < 4; j++) {
-      int16_t sum;
-      int16_t xx;
-      int16_t yy;
-      xx = in[i * CDEF_BSTRIDE + j];
-      sum = 0;
-      for (k = 0; k < 2; k++) {
-        int16_t p0;
-        int16_t p1;
-        p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
-        p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
-        sum += taps[k] * constrain(p0, threshold, damping);
-        sum += taps[k] * constrain(p1, threshold, damping);
-      }
-      sum = (sum + 8) >> 4;
-      yy = xx + sum;
-      y[i * ystride + j] = yy;
-    }
-  }
-}
-#endif
-
 /* Compute the primary filter strength for an 8x8 block based on the
    directional variance difference. A high variance difference means
    that we have a highly directional pattern (e.g. a high contrast
@@ -282,160 +220,26 @@
   return var ? (strength * (4 + i) + 8) >> 4 : 0;
 }
 
-#if !CONFIG_CDEF_SINGLEPASS
-void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
-                               int sstride) {
-  int i, j;
-  for (i = 0; i < 8; i++)
-    for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
-}
-
-void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
-                               int sstride) {
-  int i, j;
-  for (i = 0; i < 4; i++)
-    for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
-}
-
-static void copy_block_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
-                                      cdef_list *dlist, int cdef_count,
-                                      int bsize) {
-  int bi, bx, by;
-
-  if (bsize == BLOCK_8X8) {
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
-                              &src[bi << (3 + 3)], 8);
-    }
-  } else if (bsize == BLOCK_4X8) {
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
-                              &src[bi << (3 + 2)], 4);
-      copy_4x4_16bit_to_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
-                              dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
-    }
-  } else if (bsize == BLOCK_8X4) {
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
-                              &src[bi << (2 + 3)], 8);
-      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4],
-                              dstride, &src[(bi << (2 + 3)) + 4], 8);
-    }
-  } else {
-    assert(bsize == BLOCK_4X4);
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
-                              &src[bi << (2 + 2)], 4);
-    }
-  }
-}
-
-void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
-                              int sstride) {
-  int i, j;
-  for (i = 0; i < 8; i++)
-    for (j = 0; j < 8; j++)
-      dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
-}
-
-void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
-                              int sstride) {
-  int i, j;
-  for (i = 0; i < 4; i++)
-    for (j = 0; j < 4; j++)
-      dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
-}
-
-static void copy_block_16bit_to_8bit(uint8_t *dst, int dstride,
-                                     const uint16_t *src, cdef_list *dlist,
-                                     int cdef_count, int bsize) {
-  int bi, bx, by;
-  if (bsize == BLOCK_8X8) {
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
-                             &src[bi << (3 + 3)], 8);
-    }
-  } else if (bsize == BLOCK_4X8) {
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
-                             &src[bi << (3 + 2)], 4);
-      copy_4x4_16bit_to_8bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
-                             dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
-    }
-  } else if (bsize == BLOCK_8X4) {
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
-                             &src[bi << (2 + 3)], 8);
-      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
-                             &src[(bi << (2 + 3)) + 4], 8);
-    }
-  } else {
-    assert(bsize == BLOCK_4X4);
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
-                             &src[bi << (2 * 2)], 4);
-    }
-  }
-}
-
-void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
-                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
-                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
-                    cdef_list *dlist, int cdef_count, int level,
-                    int sec_strength, int sec_damping, int pri_damping,
-                    int coeff_shift, int skip_dering, int hbd) {
-#else
-
 void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
                     int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
                     int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
                     cdef_list *dlist, int cdef_count, int level,
                     int sec_strength, int pri_damping, int sec_damping,
                     int coeff_shift) {
-#endif
   int bi;
   int bx;
   int by;
   int bsize, bsizex, bsizey;
 
-#if CONFIG_CDEF_SINGLEPASS
   int pri_strength = level << coeff_shift;
   sec_strength <<= coeff_shift;
-#else
-  int threshold = level << coeff_shift;
-
-  cdef_direction_func cdef_direction[] = { cdef_direction_4x4,
-                                           cdef_direction_8x8 };
-#endif
   sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
   pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
   bsize =
       ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
   bsizex = 3 - xdec;
   bsizey = 3 - ydec;
-#if CONFIG_CDEF_SINGLEPASS
-  if (dirinit && pri_strength == 0 && sec_strength == 0)
-#else
-  if (!skip_dering)
-#endif
-  {
-#if CONFIG_CDEF_SINGLEPASS
+  if (dirinit && pri_strength == 0 && sec_strength == 0) {
     // If we're here, both primary and secondary strengths are 0, and
     // we still haven't written anything to y[] yet, so we just copy
     // the input to y[]. This is necessary only for av1_cdef_search()
@@ -443,106 +247,16 @@
     for (bi = 0; bi < cdef_count; bi++) {
       by = dlist[bi].by;
       bx = dlist[bi].bx;
-#else
-    if (pli == 0) {
-      if (!dirinit || !*dirinit) {
-        for (bi = 0; bi < cdef_count; bi++) {
-          by = dlist[bi].by;
-          bx = dlist[bi].bx;
-          dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
-                                      CDEF_BSTRIDE, &var[by][bx], coeff_shift);
-        }
-        if (dirinit) *dirinit = 1;
-      }
-    }
-    if (pli == 1 && xdec != ydec) {
-      for (bi = 0; bi < cdef_count; bi++) {
-        static const int conv422[8] = { 7, 0, 2, 4, 5, 6, 6, 6 };
-        static const int conv440[8] = { 1, 2, 2, 2, 3, 4, 6, 0 };
-        by = dlist[bi].by;
-        bx = dlist[bi].bx;
-        dir[by][bx] = (xdec ? conv422 : conv440)[dir[by][bx]];
-      }
-    }
-
-    // Only run dering for non-zero threshold. If we don't dering, we
-    // still need to eventually write something out in y[] later.
-    if (threshold != 0) {
-      assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
-      for (bi = 0; bi < cdef_count; bi++) {
-        int t = dlist[bi].skip ? 0 : threshold;
-        by = dlist[bi].by;
-        bx = dlist[bi].bx;
-        (cdef_direction[bsize == BLOCK_8X8])(
-            &y[bi << (bsizex + bsizey)], 1 << bsizex,
-            &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
-            pli ? t : adjust_strength(t, var[by][bx]), dir[by][bx],
-            pri_damping);
-      }
-    }
-  }
-
-  if (sec_strength) {
-    if (threshold && !skip_dering)
-      copy_block_16bit_to_16bit(in, CDEF_BSTRIDE, y, dlist, cdef_count, bsize);
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      int py = by << bsizey;
-      int px = bx << bsizex;
-
-      if (dlist[bi].skip) continue;
-      if (!dst || hbd) {
-        // 16 bit destination if high bitdepth or 8 bit destination not given
-        (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
-                                                        : aom_clpf_hblock_hbd)(
-            dst ? (uint16_t *)dst + py * dstride + px
-                : &y[bi << (bsizex + bsizey)],
-            in + py * CDEF_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
-            CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
-            sec_damping);
-      } else {
-        // Do clpf and write the result to an 8 bit destination
-        (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
-                                                        : aom_clpf_hblock)(
-            dst + py * dstride + px, in + py * CDEF_BSTRIDE + px, dstride,
-            CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
-            sec_damping);
-      }
-    }
-  } else if (threshold != 0) {
-    // No clpf, so copy instead
-    if (hbd) {
-      copy_block_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist, cdef_count,
-                                bsize);
-    } else {
-      copy_block_16bit_to_8bit(dst, dstride, y, dlist, cdef_count, bsize);
-    }
-  } else if (dirinit) {
-    // If we're here, both dering and clpf are off, and we still haven't written
-    // anything to y[] yet, so we just copy the input to y[]. This is necessary
-    // only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-#endif
       int iy, ix;
       // TODO(stemidts/jmvalin): SIMD optimisations
       for (iy = 0; iy < 1 << bsizey; iy++)
         for (ix = 0; ix < 1 << bsizex; ix++)
-#if CONFIG_CDEF_SINGLEPASS
           dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
-#else
-          y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
-#endif
               in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
     }
-#if CONFIG_CDEF_SINGLEPASS
     return;
-#endif
   }
 
-#if CONFIG_CDEF_SINGLEPASS
   if (pli == 0) {
     if (!dirinit || !*dirinit) {
       for (bi = 0; bi < cdef_count; bi++) {
@@ -587,5 +301,4 @@
           pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1,
           coeff_shift);
   }
-#endif
 }
diff --git a/av1/common/cdef_block.h b/av1/common/cdef_block.h
index bf93802..14fea27 100644
--- a/av1/common/cdef_block.h
+++ b/av1/common/cdef_block.h
@@ -17,9 +17,7 @@
 #define CDEF_BLOCKSIZE 64
 #define CDEF_BLOCKSIZE_LOG2 6
 #define CDEF_NBLOCKS ((1 << MAX_SB_SIZE_LOG2) / 8)
-#if CONFIG_CDEF_SINGLEPASS
 #define CDEF_SB_SHIFT (MAX_SB_SIZE_LOG2 - CDEF_BLOCKSIZE_LOG2)
-#endif
 
 /* We need to buffer three vertical lines. */
 #define CDEF_VBORDER (3)
@@ -33,7 +31,6 @@
 #define CDEF_INBUF_SIZE \
   (CDEF_BSTRIDE * ((1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_VBORDER))
 
-#if CONFIG_CDEF_SINGLEPASS
 // Filter configuration
 #define CDEF_CAP 1   // 1 = Cap change to largest diff
 #define CDEF_FULL 0  // 1 = 7x7 filter, 0 = 5x5 filter
@@ -48,17 +45,12 @@
 DECLARE_ALIGNED(16, extern const int, cdef_directions[8][2]);
 #endif
 
-#else  // CONFIG_CDEF_SINGLEPASS
-DECLARE_ALIGNED(16, extern const int, cdef_directions[8][3]);
-#endif
-
 typedef struct {
   uint8_t by;
   uint8_t bx;
   uint8_t skip;
 } cdef_list;
 
-#if CONFIG_CDEF_SINGLEPASS
 typedef void (*cdef_filter_block_func)(uint8_t *dst8, uint16_t *dst16,
                                        int dstride, const uint16_t *in,
                                        int pri_strength, int sec_strength,
@@ -67,26 +59,11 @@
                                        int coeff_shift);
 void copy_cdef_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
                               cdef_list *dlist, int cdef_count, int bsize);
-#else
-typedef void (*cdef_direction_func)(uint16_t *y, int ystride,
-                                    const uint16_t *in, int threshold, int dir,
-                                    int damping);
 
-#endif
-
-#if CONFIG_CDEF_SINGLEPASS
 void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
                     int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
                     int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
                     cdef_list *dlist, int cdef_count, int level,
                     int sec_strength, int pri_damping, int sec_damping,
                     int coeff_shift);
-#else
-void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
-                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
-                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
-                    cdef_list *dlist, int cdef_count, int level,
-                    int sec_strength, int sec_damping, int pri_damping,
-                    int coeff_shift, int skip_dering, int hbd);
-#endif
 #endif
diff --git a/av1/common/cdef_block_simd.h b/av1/common/cdef_block_simd.h
index afd2aed..5ee20f3 100644
--- a/av1/common/cdef_block_simd.h
+++ b/av1/common/cdef_block_simd.h
@@ -221,7 +221,6 @@
   return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
 }
 
-#if CONFIG_CDEF_SINGLEPASS
 // sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
 SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
                            unsigned int adjdamp) {
@@ -1081,153 +1080,6 @@
   }
 }
 
-#else
-
-void SIMD_FUNC(cdef_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in,
-                                   int threshold, int dir, int damping) {
-  int i;
-  v128 p0, p1, sum, row, res;
-  int o1 = cdef_directions[dir][0];
-  int o2 = cdef_directions[dir][1];
-
-  if (threshold) damping -= get_msb(threshold);
-  for (i = 0; i < 4; i += 2) {
-    sum = v128_zero();
-    row = v128_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
-                        v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
-
-    // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
-    p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o1]),
-                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o1]));
-    p0 = constrain16(p0, row, threshold, damping);
-
-    // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
-    p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o1]),
-                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o1]));
-    p1 = constrain16(p1, row, threshold, damping);
-
-    // sum += 4 * (p0 + p1)
-    sum = v128_add_16(sum, v128_shl_n_16(v128_add_16(p0, p1), 2));
-
-    // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
-    p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o2]),
-                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o2]));
-    p0 = constrain16(p0, row, threshold, damping);
-
-    // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
-    p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o2]),
-                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o2]));
-    p1 = constrain16(p1, row, threshold, damping);
-
-    // sum += 1 * (p0 + p1)
-    sum = v128_add_16(sum, v128_add_16(p0, p1));
-
-    // res = row + ((sum + 8) >> 4)
-    res = v128_add_16(sum, v128_dup_16(8));
-    res = v128_shr_n_s16(res, 4);
-    res = v128_add_16(row, res);
-    v64_store_aligned(&y[i * ystride], v128_high_v64(res));
-    v64_store_aligned(&y[(i + 1) * ystride], v128_low_v64(res));
-  }
-}
-
-void SIMD_FUNC(cdef_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in,
-                                   int threshold, int dir, int damping) {
-  int i;
-  v128 sum, p0, p1, row, res;
-  int o1 = cdef_directions[dir][0];
-  int o2 = cdef_directions[dir][1];
-  int o3 = cdef_directions[dir][2];
-
-  if (threshold) damping -= get_msb(threshold);
-  for (i = 0; i < 8; i++) {
-    sum = v128_zero();
-    row = v128_load_aligned(&in[i * CDEF_BSTRIDE]);
-
-    // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
-    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o1]);
-    p0 = constrain16(p0, row, threshold, damping);
-
-    // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
-    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o1]);
-    p1 = constrain16(p1, row, threshold, damping);
-
-    // sum += 3 * (p0 + p1)
-    p0 = v128_add_16(p0, p1);
-    p0 = v128_add_16(p0, v128_shl_n_16(p0, 1));
-    sum = v128_add_16(sum, p0);
-
-    // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
-    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o2]);
-    p0 = constrain16(p0, row, threshold, damping);
-
-    // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
-    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o2]);
-    p1 = constrain16(p1, row, threshold, damping);
-
-    // sum += 2 * (p0 + p1)
-    p0 = v128_shl_n_16(v128_add_16(p0, p1), 1);
-    sum = v128_add_16(sum, p0);
-
-    // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
-    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o3]);
-    p0 = constrain16(p0, row, threshold, damping);
-
-    // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
-    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o3]);
-    p1 = constrain16(p1, row, threshold, damping);
-
-    // sum += (p0 + p1)
-    p0 = v128_add_16(p0, p1);
-    sum = v128_add_16(sum, p0);
-
-    // res = row + ((sum + 8) >> 4)
-    res = v128_add_16(sum, v128_dup_16(8));
-    res = v128_shr_n_s16(res, 4);
-    res = v128_add_16(row, res);
-    v128_store_unaligned(&y[i * ystride], res);
-  }
-}
-
-void SIMD_FUNC(copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride,
-                                       const uint16_t *src, int sstride) {
-  int i;
-  for (i = 0; i < 8; i++) {
-    v128 row = v128_load_unaligned(&src[i * sstride]);
-    row = v128_pack_s16_u8(row, row);
-    v64_store_unaligned(&dst[i * dstride], v128_low_v64(row));
-  }
-}
-
-void SIMD_FUNC(copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride,
-                                       const uint16_t *src, int sstride) {
-  int i;
-  for (i = 0; i < 4; i++) {
-    v128 row = v128_load_unaligned(&src[i * sstride]);
-    row = v128_pack_s16_u8(row, row);
-    u32_store_unaligned(&dst[i * dstride], v128_low_u32(row));
-  }
-}
-
-void SIMD_FUNC(copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride,
-                                        const uint16_t *src, int sstride) {
-  int i;
-  for (i = 0; i < 8; i++) {
-    v128 row = v128_load_unaligned(&src[i * sstride]);
-    v128_store_unaligned(&dst[i * dstride], row);
-  }
-}
-
-void SIMD_FUNC(copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride,
-                                        const uint16_t *src, int sstride) {
-  int i;
-  for (i = 0; i < 4; i++) {
-    v64 row = v64_load_unaligned(&src[i * sstride]);
-    v64_store_unaligned(&dst[i * dstride], row);
-  }
-}
-#endif
-
 void SIMD_FUNC(copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride,
                                          const uint8_t *src, int sstride, int v,
                                          int h) {
diff --git a/av1/common/clpf.c b/av1/common/clpf.c
deleted file mode 100644
index d643236..0000000
--- a/av1/common/clpf.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./av1_rtcd.h"
-#include "./cdef.h"
-#include "aom/aom_image.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-static int clpf_sample(int X, int A, int B, int C, int D, int E, int F, int G,
-                       int H, int s, unsigned int dmp) {
-  int delta = 1 * constrain(A - X, s, dmp) + 3 * constrain(B - X, s, dmp) +
-              1 * constrain(C - X, s, dmp) + 3 * constrain(D - X, s, dmp) +
-              3 * constrain(E - X, s, dmp) + 1 * constrain(F - X, s, dmp) +
-              3 * constrain(G - X, s, dmp) + 1 * constrain(H - X, s, dmp);
-  return (8 + delta - (delta < 0)) >> 4;
-}
-
-static int clpf_hsample(int X, int A, int B, int C, int D, int s,
-                        unsigned int dmp) {
-  int delta = 1 * constrain(A - X, s, dmp) + 3 * constrain(B - X, s, dmp) +
-              3 * constrain(C - X, s, dmp) + 1 * constrain(D - X, s, dmp);
-  return (4 + delta - (delta < 0)) >> 3;
-}
-
-void aom_clpf_block_c(uint8_t *dst, const uint16_t *src, int dstride,
-                      int sstride, int sizex, int sizey, unsigned int strength,
-                      unsigned int damping) {
-  int x, y;
-
-  for (y = 0; y < sizey; y++) {
-    for (x = 0; x < sizex; x++) {
-      const int X = src[y * sstride + x];
-      const int A = src[(y - 2) * sstride + x];
-      const int B = src[(y - 1) * sstride + x];
-      const int C = src[y * sstride + x - 2];
-      const int D = src[y * sstride + x - 1];
-      const int E = src[y * sstride + x + 1];
-      const int F = src[y * sstride + x + 2];
-      const int G = src[(y + 1) * sstride + x];
-      const int H = src[(y + 2) * sstride + x];
-      const int delta =
-          clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
-      dst[y * dstride + x] = X + delta;
-    }
-  }
-}
-
-// Identical to aom_clpf_block_c() apart from "dst".
-void aom_clpf_block_hbd_c(uint16_t *dst, const uint16_t *src, int dstride,
-                          int sstride, int sizex, int sizey,
-                          unsigned int strength, unsigned int damping) {
-  int x, y;
-
-  for (y = 0; y < sizey; y++) {
-    for (x = 0; x < sizex; x++) {
-      const int X = src[y * sstride + x];
-      const int A = src[(y - 2) * sstride + x];
-      const int B = src[(y - 1) * sstride + x];
-      const int C = src[y * sstride + x - 2];
-      const int D = src[y * sstride + x - 1];
-      const int E = src[y * sstride + x + 1];
-      const int F = src[y * sstride + x + 2];
-      const int G = src[(y + 1) * sstride + x];
-      const int H = src[(y + 2) * sstride + x];
-      const int delta =
-          clpf_sample(X, A, B, C, D, E, F, G, H, strength, damping);
-      dst[y * dstride + x] = X + delta;
-    }
-  }
-}
-
-// Vertically restricted filter
-void aom_clpf_hblock_c(uint8_t *dst, const uint16_t *src, int dstride,
-                       int sstride, int sizex, int sizey, unsigned int strength,
-                       unsigned int damping) {
-  int x, y;
-
-  for (y = 0; y < sizey; y++) {
-    for (x = 0; x < sizex; x++) {
-      const int X = src[y * sstride + x];
-      const int A = src[y * sstride + x - 2];
-      const int B = src[y * sstride + x - 1];
-      const int C = src[y * sstride + x + 1];
-      const int D = src[y * sstride + x + 2];
-      const int delta = clpf_hsample(X, A, B, C, D, strength, damping);
-      dst[y * dstride + x] = X + delta;
-    }
-  }
-}
-
-void aom_clpf_hblock_hbd_c(uint16_t *dst, const uint16_t *src, int dstride,
-                           int sstride, int sizex, int sizey,
-                           unsigned int strength, unsigned int damping) {
-  int x, y;
-
-  for (y = 0; y < sizey; y++) {
-    for (x = 0; x < sizex; x++) {
-      const int X = src[y * sstride + x];
-      const int A = src[y * sstride + x - 2];
-      const int B = src[y * sstride + x - 1];
-      const int C = src[y * sstride + x + 1];
-      const int D = src[y * sstride + x + 2];
-      const int delta = clpf_hsample(X, A, B, C, D, strength, damping);
-      dst[y * dstride + x] = X + delta;
-    }
-  }
-}
diff --git a/av1/common/clpf_neon.c b/av1/common/clpf_neon.c
deleted file mode 100644
index f1a004c..0000000
--- a/av1/common/clpf_neon.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_neon
-#include "./clpf_simd.h"
diff --git a/av1/common/clpf_simd.h b/av1/common/clpf_simd.h
deleted file mode 100644
index c7ffc56..0000000
--- a/av1/common/clpf_simd.h
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./av1_rtcd.h"
-#include "aom_ports/bitops.h"
-#include "aom_ports/mem.h"
-
-// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
-SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
-                             unsigned int adjdamp) {
-  v128 diff = v128_sub_16(a, b);
-  const v128 sign = v128_shr_n_s16(diff, 15);
-  diff = v128_abs_s16(diff);
-  const v128 s =
-      v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
-  return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
-}
-
-// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
-SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
-                           unsigned int adjdamp) {
-  const v256 diff16 = v256_sub_16(a, b);
-  v128 diff = v128_pack_s16_s8(v256_high_v128(diff16), v256_low_v128(diff16));
-  const v128 sign = v128_cmplt_s8(diff, v128_zero());
-  diff = v128_abs_s8(diff);
-  return v128_xor(
-      v128_add_8(sign,
-                 v128_min_u8(diff, v128_ssub_u8(v128_dup_8(strength),
-                                                v128_shr_u8(diff, adjdamp)))),
-      sign);
-}
-
-// delta = 1/16 * constrain(a, x, s, d) + 3/16 * constrain(b, x, s, d) +
-//         1/16 * constrain(c, x, s, d) + 3/16 * constrain(d, x, s, d) +
-//         3/16 * constrain(e, x, s, d) + 1/16 * constrain(f, x, s, d) +
-//         3/16 * constrain(g, x, s, d) + 1/16 * constrain(h, x, s, d)
-SIMD_INLINE v128 calc_delta(v256 x, v256 a, v256 b, v256 c, v256 d, v256 e,
-                            v256 f, v256 g, v256 h, unsigned int s,
-                            unsigned int dmp) {
-  const v128 bdeg =
-      v128_add_8(v128_add_8(constrain(b, x, s, dmp), constrain(d, x, s, dmp)),
-                 v128_add_8(constrain(e, x, s, dmp), constrain(g, x, s, dmp)));
-  const v128 delta = v128_add_8(
-      v128_add_8(v128_add_8(constrain(a, x, s, dmp), constrain(c, x, s, dmp)),
-                 v128_add_8(constrain(f, x, s, dmp), constrain(h, x, s, dmp))),
-      v128_add_8(v128_add_8(bdeg, bdeg), bdeg));
-  return v128_add_8(
-      v128_pack_s16_u8(v256_high_v128(x), v256_low_v128(x)),
-      v128_shr_s8(
-          v128_add_8(v128_dup_8(8),
-                     v128_add_8(delta, v128_cmplt_s8(delta, v128_zero()))),
-          4));
-}
-
-// delta = 1/8 * constrain(a, x, s, d) + 3/8 * constrain(b, x, s, d) +
-//         3/8 * constrain(c, x, s, d) + 1/8 * constrain(d, x, s, d) +
-SIMD_INLINE v128 calc_hdelta(v256 x, v256 a, v256 b, v256 c, v256 d,
-                             unsigned int s, unsigned int dmp) {
-  const v128 bc = v128_add_8(constrain(b, x, s, dmp), constrain(c, x, s, dmp));
-  const v128 delta =
-      v128_add_8(v128_add_8(constrain(a, x, s, dmp), constrain(d, x, s, dmp)),
-                 v128_add_8(v128_add_8(bc, bc), bc));
-  return v128_add_8(
-      v128_pack_s16_u8(v256_high_v128(x), v256_low_v128(x)),
-      v128_shr_s8(
-          v128_add_8(v128_dup_8(4),
-                     v128_add_8(delta, v128_cmplt_s8(delta, v128_zero()))),
-          3));
-}
-
-// Process blocks of width 8, two lines at a time, 8 bit.
-static void SIMD_FUNC(clpf_block8)(uint8_t *dst, const uint16_t *src,
-                                   int dstride, int sstride, int sizey,
-                                   unsigned int strength,
-                                   unsigned int adjdamp) {
-  int y;
-
-  for (y = 0; y < sizey; y += 2) {
-    const v128 l1 = v128_load_aligned(src);
-    const v128 l2 = v128_load_aligned(src + sstride);
-    const v128 l3 = v128_load_aligned(src - sstride);
-    const v128 l4 = v128_load_aligned(src + 2 * sstride);
-    const v256 a = v256_from_v128(v128_load_aligned(src - 2 * sstride), l3);
-    const v256 b = v256_from_v128(l3, l1);
-    const v256 g = v256_from_v128(l2, l4);
-    const v256 h = v256_from_v128(l4, v128_load_aligned(src + 3 * sstride));
-    const v256 c = v256_from_v128(v128_load_unaligned(src - 2),
-                                  v128_load_unaligned(src - 2 + sstride));
-    const v256 d = v256_from_v128(v128_load_unaligned(src - 1),
-                                  v128_load_unaligned(src - 1 + sstride));
-    const v256 e = v256_from_v128(v128_load_unaligned(src + 1),
-                                  v128_load_unaligned(src + 1 + sstride));
-    const v256 f = v256_from_v128(v128_load_unaligned(src + 2),
-                                  v128_load_unaligned(src + 2 + sstride));
-    const v128 o = calc_delta(v256_from_v128(l1, l2), a, b, c, d, e, f, g, h,
-                              strength, adjdamp);
-
-    v64_store_aligned(dst, v128_high_v64(o));
-    v64_store_aligned(dst + dstride, v128_low_v64(o));
-    src += sstride * 2;
-    dst += dstride * 2;
-  }
-}
-
-// Process blocks of width 4, four lines at a time, 8 bit.
-static void SIMD_FUNC(clpf_block4)(uint8_t *dst, const uint16_t *src,
-                                   int dstride, int sstride, int sizey,
-                                   unsigned int strength,
-                                   unsigned int adjdamp) {
-  int y;
-
-  for (y = 0; y < sizey; y += 4) {
-    const v64 l0 = v64_load_aligned(src - 2 * sstride);
-    const v64 l1 = v64_load_aligned(src - sstride);
-    const v64 l2 = v64_load_aligned(src);
-    const v64 l3 = v64_load_aligned(src + sstride);
-    const v64 l4 = v64_load_aligned(src + 2 * sstride);
-    const v64 l5 = v64_load_aligned(src + 3 * sstride);
-    const v64 l6 = v64_load_aligned(src + 4 * sstride);
-    const v64 l7 = v64_load_aligned(src + 5 * sstride);
-    const v128 o =
-        calc_delta(v256_from_v64(l2, l3, l4, l5), v256_from_v64(l0, l1, l2, l3),
-                   v256_from_v64(l1, l2, l3, l4),
-                   v256_from_v64(v64_load_unaligned(src - 2),
-                                 v64_load_unaligned(src + sstride - 2),
-                                 v64_load_unaligned(src + 2 * sstride - 2),
-                                 v64_load_unaligned(src + 3 * sstride - 2)),
-                   v256_from_v64(v64_load_unaligned(src - 1),
-                                 v64_load_unaligned(src + sstride - 1),
-                                 v64_load_unaligned(src + 2 * sstride - 1),
-                                 v64_load_unaligned(src + 3 * sstride - 1)),
-                   v256_from_v64(v64_load_unaligned(src + 1),
-                                 v64_load_unaligned(src + sstride + 1),
-                                 v64_load_unaligned(src + 2 * sstride + 1),
-                                 v64_load_unaligned(src + 3 * sstride + 1)),
-                   v256_from_v64(v64_load_unaligned(src + 2),
-                                 v64_load_unaligned(src + sstride + 2),
-                                 v64_load_unaligned(src + 2 * sstride + 2),
-                                 v64_load_unaligned(src + 3 * sstride + 2)),
-                   v256_from_v64(l3, l4, l5, l6), v256_from_v64(l4, l5, l6, l7),
-                   strength, adjdamp);
-
-    u32_store_aligned(dst, v128_low_u32(v128_shr_n_byte(o, 12)));
-    u32_store_aligned(dst + dstride, v128_low_u32(v128_shr_n_byte(o, 8)));
-    u32_store_aligned(dst + 2 * dstride, v128_low_u32(v128_shr_n_byte(o, 4)));
-    u32_store_aligned(dst + 3 * dstride, v128_low_u32(o));
-
-    dst += 4 * dstride;
-    src += 4 * sstride;
-  }
-}
-
-static void SIMD_FUNC(clpf_hblock8)(uint8_t *dst, const uint16_t *src,
-                                    int dstride, int sstride, int sizey,
-                                    unsigned int strength,
-                                    unsigned int adjdamp) {
-  int y;
-
-  for (y = 0; y < sizey; y += 2) {
-    const v256 x = v256_from_v128(v128_load_aligned(src),
-                                  v128_load_aligned(src + sstride));
-    const v256 a = v256_from_v128(v128_load_unaligned(src - 2),
-                                  v128_load_unaligned(src - 2 + sstride));
-    const v256 b = v256_from_v128(v128_load_unaligned(src - 1),
-                                  v128_load_unaligned(src - 1 + sstride));
-    const v256 c = v256_from_v128(v128_load_unaligned(src + 1),
-                                  v128_load_unaligned(src + 1 + sstride));
-    const v256 d = v256_from_v128(v128_load_unaligned(src + 2),
-                                  v128_load_unaligned(src + 2 + sstride));
-    const v128 o = calc_hdelta(x, a, b, c, d, strength, adjdamp);
-
-    v64_store_aligned(dst, v128_high_v64(o));
-    v64_store_aligned(dst + dstride, v128_low_v64(o));
-    src += sstride * 2;
-    dst += dstride * 2;
-  }
-}
-
-// Process blocks of width 4, four lines at a time, 8 bit.
-static void SIMD_FUNC(clpf_hblock4)(uint8_t *dst, const uint16_t *src,
-                                    int dstride, int sstride, int sizey,
-                                    unsigned int strength,
-                                    unsigned int adjdamp) {
-  int y;
-
-  for (y = 0; y < sizey; y += 4) {
-    const v256 a = v256_from_v64(v64_load_unaligned(src - 2),
-                                 v64_load_unaligned(src + sstride - 2),
-                                 v64_load_unaligned(src + 2 * sstride - 2),
-                                 v64_load_unaligned(src + 3 * sstride - 2));
-    const v256 b = v256_from_v64(v64_load_unaligned(src - 1),
-                                 v64_load_unaligned(src + sstride - 1),
-                                 v64_load_unaligned(src + 2 * sstride - 1),
-                                 v64_load_unaligned(src + 3 * sstride - 1));
-    const v256 c = v256_from_v64(v64_load_unaligned(src + 1),
-                                 v64_load_unaligned(src + sstride + 1),
-                                 v64_load_unaligned(src + 2 * sstride + 1),
-                                 v64_load_unaligned(src + 3 * sstride + 1));
-    const v256 d = v256_from_v64(v64_load_unaligned(src + 2),
-                                 v64_load_unaligned(src + sstride + 2),
-                                 v64_load_unaligned(src + 2 * sstride + 2),
-                                 v64_load_unaligned(src + 3 * sstride + 2));
-
-    const v128 o = calc_hdelta(
-        v256_from_v64(v64_load_aligned(src), v64_load_aligned(src + sstride),
-                      v64_load_aligned(src + 2 * sstride),
-                      v64_load_aligned(src + 3 * sstride)),
-        a, b, c, d, strength, adjdamp);
-
-    u32_store_aligned(dst, v128_low_u32(v128_shr_n_byte(o, 12)));
-    u32_store_aligned(dst + dstride, v128_low_u32(v128_shr_n_byte(o, 8)));
-    u32_store_aligned(dst + 2 * dstride, v128_low_u32(v128_shr_n_byte(o, 4)));
-    u32_store_aligned(dst + 3 * dstride, v128_low_u32(o));
-
-    dst += 4 * dstride;
-    src += 4 * sstride;
-  }
-}
-
-void SIMD_FUNC(aom_clpf_block)(uint8_t *dst, const uint16_t *src, int dstride,
-                               int sstride, int sizex, int sizey,
-                               unsigned int strength, unsigned int dmp) {
-  if ((sizex != 4 && sizex != 8) || ((sizey & 3) && sizex == 4)) {
-    // Fallback to C for odd sizes:
-    // * block widths not 4 or 8
-    // * block heights not a multiple of 4 if the block width is 4
-    aom_clpf_block_c(dst, src, dstride, sstride, sizex, sizey, strength, dmp);
-  } else {
-    (sizex == 4 ? SIMD_FUNC(clpf_block4) : SIMD_FUNC(clpf_block8))(
-        dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
-  }
-}
-
-void SIMD_FUNC(aom_clpf_hblock)(uint8_t *dst, const uint16_t *src, int dstride,
-                                int sstride, int sizex, int sizey,
-                                unsigned int strength, unsigned int dmp) {
-  if ((sizex != 4 && sizex != 8) || ((sizey & 3) && sizex == 4)) {
-    // Fallback to C for odd sizes:
-    // * block widths not 4 or 8
-    // * block heights not a multiple of 4 if the block width is 4
-    aom_clpf_hblock_c(dst, src, dstride, sstride, sizex, sizey, strength, dmp);
-  } else {
-    (sizex == 4 ? SIMD_FUNC(clpf_hblock4) : SIMD_FUNC(clpf_hblock8))(
-        dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
-  }
-}
-
-// delta = 1/16 * constrain(a, x, s, d) + 3/16 * constrain(b, x, s, d) +
-//         1/16 * constrain(c, x, s, d) + 3/16 * constrain(d, x, s, d) +
-//         3/16 * constrain(e, x, s, d) + 1/16 * constrain(f, x, s, d) +
-//         3/16 * constrain(g, x, s, d) + 1/16 * constrain(h, x, s, d)
-SIMD_INLINE v128 calc_delta_hbd(v128 x, v128 a, v128 b, v128 c, v128 d, v128 e,
-                                v128 f, v128 g, v128 h, unsigned int s,
-                                unsigned int dmp) {
-  const v128 bdeg = v128_add_16(
-      v128_add_16(constrain16(b, x, s, dmp), constrain16(d, x, s, dmp)),
-      v128_add_16(constrain16(e, x, s, dmp), constrain16(g, x, s, dmp)));
-  const v128 delta = v128_add_16(
-      v128_add_16(
-          v128_add_16(constrain16(a, x, s, dmp), constrain16(c, x, s, dmp)),
-          v128_add_16(constrain16(f, x, s, dmp), constrain16(h, x, s, dmp))),
-      v128_add_16(v128_add_16(bdeg, bdeg), bdeg));
-  return v128_add_16(
-      x,
-      v128_shr_s16(
-          v128_add_16(v128_dup_16(8),
-                      v128_add_16(delta, v128_cmplt_s16(delta, v128_zero()))),
-          4));
-}
-
-static void calc_delta_hbd4(v128 o, v128 a, v128 b, v128 c, v128 d, v128 e,
-                            v128 f, v128 g, v128 h, uint16_t *dst,
-                            unsigned int s, unsigned int dmp, int dstride) {
-  o = calc_delta_hbd(o, a, b, c, d, e, f, g, h, s, dmp);
-  v64_store_aligned(dst, v128_high_v64(o));
-  v64_store_aligned(dst + dstride, v128_low_v64(o));
-}
-
-static void calc_delta_hbd8(v128 o, v128 a, v128 b, v128 c, v128 d, v128 e,
-                            v128 f, v128 g, v128 h, uint16_t *dst,
-                            unsigned int s, unsigned int adjdamp) {
-  v128_store_aligned(dst,
-                     calc_delta_hbd(o, a, b, c, d, e, f, g, h, s, adjdamp));
-}
-
-// delta = 1/16 * constrain(a, x, s, dmp) + 3/16 * constrain(b, x, s, dmp) +
-//         3/16 * constrain(c, x, s, dmp) + 1/16 * constrain(d, x, s, dmp)
-SIMD_INLINE v128 calc_hdelta_hbd(v128 x, v128 a, v128 b, v128 c, v128 d,
-                                 unsigned int s, unsigned int dmp) {
-  const v128 bc =
-      v128_add_16(constrain16(b, x, s, dmp), constrain16(c, x, s, dmp));
-  const v128 delta = v128_add_16(
-      v128_add_16(constrain16(a, x, s, dmp), constrain16(d, x, s, dmp)),
-      v128_add_16(v128_add_16(bc, bc), bc));
-  return v128_add_16(
-      x,
-      v128_shr_s16(
-          v128_add_16(v128_dup_16(4),
-                      v128_add_16(delta, v128_cmplt_s16(delta, v128_zero()))),
-          3));
-}
-
-static void calc_hdelta_hbd4(v128 o, v128 a, v128 b, v128 c, v128 d,
-                             uint16_t *dst, unsigned int s,
-                             unsigned int adjdamp, int dstride) {
-  o = calc_hdelta_hbd(o, a, b, c, d, s, adjdamp);
-  v64_store_aligned(dst, v128_high_v64(o));
-  v64_store_aligned(dst + dstride, v128_low_v64(o));
-}
-
-static void calc_hdelta_hbd8(v128 o, v128 a, v128 b, v128 c, v128 d,
-                             uint16_t *dst, unsigned int s,
-                             unsigned int adjdamp) {
-  v128_store_aligned(dst, calc_hdelta_hbd(o, a, b, c, d, s, adjdamp));
-}
-
-// Process blocks of width 4, two lines at time.
-static void SIMD_FUNC(clpf_block_hbd4)(uint16_t *dst, const uint16_t *src,
-                                       int dstride, int sstride, int sizey,
-                                       unsigned int strength,
-                                       unsigned int adjdamp) {
-  int y;
-
-  for (y = 0; y < sizey; y += 2) {
-    const v64 l1 = v64_load_aligned(src);
-    const v64 l2 = v64_load_aligned(src + sstride);
-    const v64 l3 = v64_load_aligned(src - sstride);
-    const v64 l4 = v64_load_aligned(src + 2 * sstride);
-    const v128 a = v128_from_v64(v64_load_aligned(src - 2 * sstride), l3);
-    const v128 b = v128_from_v64(l3, l1);
-    const v128 g = v128_from_v64(l2, l4);
-    const v128 h = v128_from_v64(l4, v64_load_aligned(src + 3 * sstride));
-    const v128 c = v128_from_v64(v64_load_unaligned(src - 2),
-                                 v64_load_unaligned(src - 2 + sstride));
-    const v128 d = v128_from_v64(v64_load_unaligned(src - 1),
-                                 v64_load_unaligned(src - 1 + sstride));
-    const v128 e = v128_from_v64(v64_load_unaligned(src + 1),
-                                 v64_load_unaligned(src + 1 + sstride));
-    const v128 f = v128_from_v64(v64_load_unaligned(src + 2),
-                                 v64_load_unaligned(src + 2 + sstride));
-
-    calc_delta_hbd4(v128_from_v64(l1, l2), a, b, c, d, e, f, g, h, dst,
-                    strength, adjdamp, dstride);
-    src += sstride * 2;
-    dst += dstride * 2;
-  }
-}
-
-// The most simple case.  Start here if you need to understand the functions.
-static void SIMD_FUNC(clpf_block_hbd)(uint16_t *dst, const uint16_t *src,
-                                      int dstride, int sstride, int sizey,
-                                      unsigned int strength,
-                                      unsigned int adjdamp) {
-  int y;
-
-  for (y = 0; y < sizey; y++) {
-    const v128 o = v128_load_aligned(src);
-    const v128 a = v128_load_aligned(src - 2 * sstride);
-    const v128 b = v128_load_aligned(src - 1 * sstride);
-    const v128 g = v128_load_aligned(src + sstride);
-    const v128 h = v128_load_aligned(src + 2 * sstride);
-    const v128 c = v128_load_unaligned(src - 2);
-    const v128 d = v128_load_unaligned(src - 1);
-    const v128 e = v128_load_unaligned(src + 1);
-    const v128 f = v128_load_unaligned(src + 2);
-
-    calc_delta_hbd8(o, a, b, c, d, e, f, g, h, dst, strength, adjdamp);
-    src += sstride;
-    dst += dstride;
-  }
-}
-
-// Process blocks of width 4, horizontal filter, two lines at time.
-static void SIMD_FUNC(clpf_hblock_hbd4)(uint16_t *dst, const uint16_t *src,
-                                        int dstride, int sstride, int sizey,
-                                        unsigned int strength,
-                                        unsigned int adjdamp) {
-  int y;
-
-  for (y = 0; y < sizey; y += 2) {
-    const v128 a = v128_from_v64(v64_load_unaligned(src - 2),
-                                 v64_load_unaligned(src - 2 + sstride));
-    const v128 b = v128_from_v64(v64_load_unaligned(src - 1),
-                                 v64_load_unaligned(src - 1 + sstride));
-    const v128 c = v128_from_v64(v64_load_unaligned(src + 1),
-                                 v64_load_unaligned(src + 1 + sstride));
-    const v128 d = v128_from_v64(v64_load_unaligned(src + 2),
-                                 v64_load_unaligned(src + 2 + sstride));
-
-    calc_hdelta_hbd4(v128_from_v64(v64_load_unaligned(src),
-                                   v64_load_unaligned(src + sstride)),
-                     a, b, c, d, dst, strength, adjdamp, dstride);
-    src += sstride * 2;
-    dst += dstride * 2;
-  }
-}
-
-// Process blocks of width 8, horizontal filter, two lines at time.
-static void SIMD_FUNC(clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src,
-                                       int dstride, int sstride, int sizey,
-                                       unsigned int strength,
-                                       unsigned int adjdamp) {
-  int y;
-
-  for (y = 0; y < sizey; y++) {
-    const v128 o = v128_load_aligned(src);
-    const v128 a = v128_load_unaligned(src - 2);
-    const v128 b = v128_load_unaligned(src - 1);
-    const v128 c = v128_load_unaligned(src + 1);
-    const v128 d = v128_load_unaligned(src + 2);
-
-    calc_hdelta_hbd8(o, a, b, c, d, dst, strength, adjdamp);
-    src += sstride;
-    dst += dstride;
-  }
-}
-
-void SIMD_FUNC(aom_clpf_block_hbd)(uint16_t *dst, const uint16_t *src,
-                                   int dstride, int sstride, int sizex,
-                                   int sizey, unsigned int strength,
-                                   unsigned int dmp) {
-  if ((sizex != 4 && sizex != 8) || ((sizey & 1) && sizex == 4)) {
-    // Fallback to C for odd sizes:
-    // * block width not 4 or 8
-    // * block heights not a multiple of 2 if the block width is 4
-    aom_clpf_block_hbd_c(dst, src, dstride, sstride, sizex, sizey, strength,
-                         dmp);
-  } else {
-    (sizex == 4 ? SIMD_FUNC(clpf_block_hbd4) : SIMD_FUNC(clpf_block_hbd))(
-        dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
-  }
-}
-
-void SIMD_FUNC(aom_clpf_hblock_hbd)(uint16_t *dst, const uint16_t *src,
-                                    int dstride, int sstride, int sizex,
-                                    int sizey, unsigned int strength,
-                                    unsigned int dmp) {
-  if ((sizex != 4 && sizex != 8) || ((sizey & 1) && sizex == 4)) {
-    // Fallback to C for odd sizes:
-    // * block width not 4 or 8
-    // * block heights not a multiple of 2 if the block width is 4
-    aom_clpf_hblock_hbd_c(dst, src, dstride, sstride, sizex, sizey, strength,
-                          dmp);
-  } else {
-    (sizex == 4 ? SIMD_FUNC(clpf_hblock_hbd4) : SIMD_FUNC(clpf_hblock_hbd))(
-        dst, src, dstride, sstride, sizey, strength, dmp - get_msb(strength));
-  }
-}
diff --git a/av1/common/clpf_sse2.c b/av1/common/clpf_sse2.c
deleted file mode 100644
index e29c2ab..0000000
--- a/av1/common/clpf_sse2.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_sse2
-#include "./clpf_simd.h"
diff --git a/av1/common/clpf_sse4.c b/av1/common/clpf_sse4.c
deleted file mode 100644
index 537139f..0000000
--- a/av1/common/clpf_sse4.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_sse4_1
-#include "./clpf_simd.h"
diff --git a/av1/common/clpf_ssse3.c b/av1/common/clpf_ssse3.c
deleted file mode 100644
index d7ed8de..0000000
--- a/av1/common/clpf_ssse3.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_simd.h"
-#define SIMD_FUNC(name) name##_ssse3
-#include "./clpf_simd.h"
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 2a31d39..436506a 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1034,12 +1034,7 @@
 #if CONFIG_INTRABC
   if (cm->allow_intrabc && NO_FILTER_FOR_IBC) return;
 #endif  // CONFIG_INTRABC
-#if CONFIG_CDEF_SINGLEPASS
   cm->cdef_pri_damping = cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
-#else
-  cm->cdef_pri_damping = aom_rb_read_literal(rb, 1) + 5;
-  cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
-#endif
   cm->cdef_bits = aom_rb_read_literal(rb, 2);
   cm->nb_cdef_strengths = 1 << cm->cdef_bits;
   for (int i = 0; i < cm->nb_cdef_strengths; i++) {
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index a0f67e1..99680b3 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2501,13 +2501,8 @@
   if (cm->allow_intrabc && NO_FILTER_FOR_IBC) return;
 #endif  // CONFIG_INTRABC
   int i;
-#if CONFIG_CDEF_SINGLEPASS
   aom_wb_write_literal(wb, cm->cdef_pri_damping - 3, 2);
   assert(cm->cdef_pri_damping == cm->cdef_sec_damping);
-#else
-  aom_wb_write_literal(wb, cm->cdef_pri_damping - 5, 1);
-  aom_wb_write_literal(wb, cm->cdef_sec_damping - 3, 2);
-#endif
   aom_wb_write_literal(wb, cm->cdef_bits, 2);
   for (i = 0; i < cm->nb_cdef_strengths; i++) {
     aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index 09fe563..be97b65 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -68,16 +68,11 @@
                                 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
                                 int fast) {
   uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
-#if !CONFIG_CDEF_SINGLEPASS
-  const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
-#endif
   int i, j;
   uint64_t best_tot_mse = (uint64_t)1 << 63;
   int best_id0 = 0;
   int best_id1 = 0;
-#if CONFIG_CDEF_SINGLEPASS
   const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
-#endif
   memset(tot_mse, 0, sizeof(tot_mse));
   for (i = 0; i < sb_count; i++) {
     int gi;
@@ -314,11 +309,7 @@
   int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
   int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
   uint64_t(*mse[2])[TOTAL_STRENGTHS];
-#if CONFIG_CDEF_SINGLEPASS
   int pri_damping = 3 + (cm->base_qindex >> 6);
-#else
-  int pri_damping = 6;
-#endif
   int sec_damping = 3 + (cm->base_qindex >> 6);
   int i;
   int nb_strengths;
@@ -449,7 +440,6 @@
           int xsize = (nhb << mi_wide_l2[pli]) +
                       CDEF_HBORDER * (fbc != nhfb - 1) + xoff;
           sec_strength = gi % CDEF_SEC_STRENGTHS;
-#if CONFIG_CDEF_SINGLEPASS
           copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
                        src[pli],
                        (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
@@ -459,19 +449,6 @@
                          dir, &dirinit, var, pli, dlist, cdef_count, threshold,
                          sec_strength + (sec_strength == 3), pri_damping,
                          sec_damping, coeff_shift);
-#else
-          if (sec_strength == 0)
-            copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
-                         src[pli],
-                         (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
-                         (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
-                         stride[pli], ysize, xsize);
-          cdef_filter_fb(sec_strength ? NULL : (uint8_t *)in, CDEF_BSTRIDE,
-                         tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var,
-                         pli, dlist, cdef_count, threshold,
-                         sec_strength + (sec_strength == 3), sec_damping,
-                         pri_damping, coeff_shift, sec_strength != 0, 1);
-#endif
           curr_mse = compute_cdef_dist(
               ref_coeff[pli] +
                   (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 2bab3ec..6419acb 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -110,7 +110,6 @@
 set(CONFIG_AMVR 0 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_AOM_QM 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_BGSPRITE 0 CACHE NUMBER "AV1 experiment flag.")
-set(CONFIG_CDEF_SINGLEPASS 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_CFL 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_COLORSPACE_HEADERS 0 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_DAALA_TX 0 CACHE NUMBER "AV1 experiment flag.")
diff --git a/configure b/configure
index 2ffeafe..ac58848 100755
--- a/configure
+++ b/configure
@@ -248,7 +248,6 @@
 "
 EXPERIMENT_LIST="
     fp_mb_stats
-    cdef_singlepass
     rect_tx_ext
     rect_tx_ext_intra
     short_filter
@@ -491,7 +490,6 @@
     # Enable adopted experiments by default
     soft_enable adopted_experiments
     if enabled adopted_experiments; then
-      soft_enable cdef_singlepass
       soft_enable ext_intra
       soft_enable intra_edge
       soft_enable mv_compress
diff --git a/test/clpf_test.cc b/test/clpf_test.cc
deleted file mode 100644
index ecb0428..0000000
--- a/test/clpf_test.cc
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/common/cdef_block.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-typedef void (*clpf_block_t)(uint8_t *dst, const uint16_t *src, int dstride,
-                             int sstride, int sizex, int sizey,
-                             unsigned int strength, unsigned int bitdepth);
-
-typedef std::tr1::tuple<clpf_block_t, clpf_block_t, int, int>
-    clpf_block_param_t;
-
-class CDEFClpfBlockTest : public ::testing::TestWithParam<clpf_block_param_t> {
- public:
-  virtual ~CDEFClpfBlockTest() {}
-  virtual void SetUp() {
-    clpf = GET_PARAM(0);
-    ref_clpf = GET_PARAM(1);
-    sizex = GET_PARAM(2);
-    sizey = GET_PARAM(3);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  int sizex;
-  int sizey;
-  clpf_block_t clpf;
-  clpf_block_t ref_clpf;
-};
-
-typedef CDEFClpfBlockTest CDEFClpfSpeedTest;
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*clpf_block_hbd_t)(uint16_t *dst, const uint16_t *src,
-                                 int dstride, int sstride, int sizex, int sizey,
-                                 unsigned int strength, unsigned int bitdepth);
-
-typedef std::tr1::tuple<clpf_block_hbd_t, clpf_block_hbd_t, int, int>
-    clpf_block_hbd_param_t;
-
-class CDEFClpfBlockHbdTest
-    : public ::testing::TestWithParam<clpf_block_hbd_param_t> {
- public:
-  virtual ~CDEFClpfBlockHbdTest() {}
-  virtual void SetUp() {
-    clpf = GET_PARAM(0);
-    ref_clpf = GET_PARAM(1);
-    sizex = GET_PARAM(2);
-    sizey = GET_PARAM(3);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  int sizex;
-  int sizey;
-  clpf_block_hbd_t clpf;
-  clpf_block_hbd_t ref_clpf;
-};
-
-typedef CDEFClpfBlockHbdTest ClpfHbdSpeedTest;
-#endif
-
-template <typename pixel>
-void test_clpf(int w, int h, unsigned int depth, unsigned int iterations,
-               void (*clpf)(pixel *dst, const uint16_t *src, int dstride,
-                            int sstride, int sizex, int sizey,
-                            unsigned int strength, unsigned int bitdepth),
-               void (*ref_clpf)(pixel *dst, const uint16_t *src, int dstride,
-                                int sstride, int sizex, int sizey,
-                                unsigned int strength, unsigned int bitdepth)) {
-  const int size = 24;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, s[size * size]);
-  DECLARE_ALIGNED(16, pixel, d[size * size]);
-  DECLARE_ALIGNED(16, pixel, ref_d[size * size]);
-  memset(ref_d, 0, size * size * sizeof(*ref_d));
-  memset(d, 0, size * size * sizeof(*d));
-
-  int error = 0, pos = 0, xpos = 8, ypos = 8;
-  unsigned int strength = 0, bits, level, count, damp = 0, boundary = 0;
-
-  assert(size >= w + 16 && size >= h + 16);
-  assert(depth >= 8);
-
-  // Test every combination of:
-  // * Input with up to <depth> bits of noise
-  // * Noise level around every value from 0 to (1<<depth)-1
-  // * All strengths
-  // * All dampings
-  // * Boundaries
-  // If clpf and ref_clpf are the same, we're just testing speed
-  for (boundary = 0; boundary < 16; boundary++) {
-    for (count = 0; count < iterations; count++) {
-      for (level = 0; level < (1U << depth) && !error;
-           level += (1 + 4 * !!boundary) << (depth - 8)) {
-        for (bits = 1; bits <= depth && !error; bits++) {
-          for (damp = 4 + depth - 8; damp < depth - 1 && !error; damp++) {
-            for (int i = 0; i < size * size; i++)
-              s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
-                           (1 << depth) - 1);
-            if (boundary) {
-              if (boundary & 1) {  // Left
-                for (int i = 0; i < size; i++)
-                  for (int j = 0; j < xpos; j++)
-                    s[i * size + j] = CDEF_VERY_LARGE;
-              }
-              if (boundary & 2) {  // Right
-                for (int i = 0; i < size; i++)
-                  for (int j = xpos + w; j < size; j++)
-                    s[i * size + j] = CDEF_VERY_LARGE;
-              }
-              if (boundary & 4) {  // Above
-                for (int i = 0; i < ypos; i++)
-                  for (int j = 0; j < size; j++)
-                    s[i * size + j] = CDEF_VERY_LARGE;
-              }
-              if (boundary & 8) {  // Below
-                for (int i = ypos + h; i < size; i++)
-                  for (int j = 0; j < size; j++)
-                    s[i * size + j] = CDEF_VERY_LARGE;
-              }
-            }
-            for (strength = depth - 8; strength < depth - 5 && !error;
-                 strength += !error) {
-              ref_clpf(ref_d + ypos * size + xpos, s + ypos * size + xpos, size,
-                       size, w, h, 1 << strength, damp);
-              if (clpf != ref_clpf)
-                ASM_REGISTER_STATE_CHECK(clpf(d + ypos * size + xpos,
-                                              s + ypos * size + xpos, size,
-                                              size, w, h, 1 << strength, damp));
-              if (ref_clpf != clpf) {
-                for (pos = 0; pos < size * size && !error; pos++) {
-                  error = ref_d[pos] != d[pos];
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  pos--;
-  EXPECT_EQ(0, error)
-      << "Error: CDEFClpfBlockTest, SIMD and C mismatch." << std::endl
-      << "First error at " << pos % size << "," << pos / size << " ("
-      << (int16_t)ref_d[pos] << " != " << (int16_t)d[pos] << ") " << std::endl
-      << "strength: " << (1 << strength) << std::endl
-      << "damping: " << damp << std::endl
-      << "depth: " << depth << std::endl
-      << "boundary: " << boundary << std::endl
-      << "w: " << w << std::endl
-      << "h: " << h << std::endl
-      << "A=" << (pos > 2 * size ? (int16_t)s[pos - 2 * size] : -1) << std::endl
-      << "B=" << (pos > size ? (int16_t)s[pos - size] : -1) << std::endl
-      << "C=" << (pos % size - 2 >= 0 ? (int16_t)s[pos - 2] : -1) << std::endl
-      << "D=" << (pos % size - 1 >= 0 ? (int16_t)s[pos - 1] : -1) << std::endl
-      << "X=" << (int16_t)s[pos] << std::endl
-      << "E=" << (pos % size + 1 < size ? (int16_t)s[pos + 1] : -1) << std::endl
-      << "F=" << (pos % size + 2 < size ? (int16_t)s[pos + 2] : -1) << std::endl
-      << "G=" << (pos + size < size * size ? (int16_t)s[pos + size] : -1)
-      << std::endl
-      << "H="
-      << (pos + 2 * size < size * size ? (int16_t)s[pos + 2 * size] : -1)
-      << std::endl;
-}
-
-template <typename pixel>
-void test_clpf_speed(int w, int h, unsigned int depth, unsigned int iterations,
-                     void (*clpf)(pixel *dst, const uint16_t *src, int dstride,
-                                  int sstride, int sizex, int sizey,
-                                  unsigned int strength, unsigned int bitdepth),
-                     void (*ref_clpf)(pixel *dst, const uint16_t *src,
-                                      int dstride, int sstride, int sizex,
-                                      int sizey, unsigned int strength,
-                                      unsigned int bitdepth)) {
-  aom_usec_timer ref_timer;
-  aom_usec_timer timer;
-
-  aom_usec_timer_start(&ref_timer);
-  test_clpf(w, h, depth, iterations, ref_clpf, ref_clpf);
-  aom_usec_timer_mark(&ref_timer);
-  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
-  aom_usec_timer_start(&timer);
-  test_clpf(w, h, depth, iterations, clpf, clpf);
-  aom_usec_timer_mark(&timer);
-  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
-#if 0
-  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
-            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
-  EXPECT_GT(ref_elapsed_time, elapsed_time)
-      << "Error: CDEFClpfSpeedTest, SIMD slower than C." << std::endl
-      << "C time: " << ref_elapsed_time << " us" << std::endl
-      << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-TEST_P(CDEFClpfBlockTest, TestSIMDNoMismatch) {
-  test_clpf(sizex, sizey, 8, 1, clpf, ref_clpf);
-}
-
-TEST_P(CDEFClpfSpeedTest, DISABLED_TestSpeed) {
-  test_clpf_speed(sizex, sizey, 8, 16, clpf, ref_clpf);
-}
-
-#if CONFIG_HIGHBITDEPTH
-TEST_P(CDEFClpfBlockHbdTest, TestSIMDNoMismatch) {
-  test_clpf(sizex, sizey, 12, 1, clpf, ref_clpf);
-}
-
-TEST_P(ClpfHbdSpeedTest, DISABLED_TestSpeed) {
-  test_clpf_speed(sizex, sizey, 12, 4, clpf, ref_clpf);
-}
-#endif
-
-using std::tr1::make_tuple;
-
-// VS compiling for 32 bit targets does not support vector types in
-// structs as arguments, which makes the v256 type of the intrinsics
-// hard to support, so optimizations for this target are disabled.
-#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
-// Test all supported architectures and block sizes
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, CDEFClpfBlockTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8),
-        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 4),
-        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 8),
-        make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, CDEFClpfBlockTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 8),
-        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 8, 4),
-        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 8),
-        make_tuple(&aom_clpf_block_ssse3, &aom_clpf_block_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_ssse3, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, CDEFClpfBlockTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 8),
-        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 8, 4),
-        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 8),
-        make_tuple(&aom_clpf_block_sse4_1, &aom_clpf_block_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_sse4_1, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, CDEFClpfBlockTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8),
-        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 4),
-        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 8),
-        make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 4, 4)));
-#endif
-
-#if CONFIG_HIGHBITDEPTH
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, CDEFClpfBlockHbdTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, CDEFClpfBlockHbdTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, CDEFClpfBlockHbdTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, CDEFClpfBlockHbdTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 4, 4),
-        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 4),
-        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 4, 8),
-        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 4, 4)));
-#endif
-#endif  // CONFIG_HIGHBITDEPTH
-
-// Test speed for all supported architectures
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, CDEFClpfSpeedTest,
-    ::testing::Values(make_tuple(&aom_clpf_block_sse2, &aom_clpf_block_c, 8, 8),
-                      make_tuple(&aom_clpf_hblock_sse2, &aom_clpf_hblock_c, 8,
-                                 8)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFClpfSpeedTest,
-                        ::testing::Values(make_tuple(&aom_clpf_block_ssse3,
-                                                     &aom_clpf_block_c, 8, 8),
-                                          make_tuple(&aom_clpf_hblock_ssse3,
-                                                     &aom_clpf_hblock_c, 8,
-                                                     8)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFClpfSpeedTest,
-                        ::testing::Values(make_tuple(&aom_clpf_block_sse4_1,
-                                                     &aom_clpf_block_c, 8, 8),
-                                          make_tuple(&aom_clpf_hblock_sse4_1,
-                                                     &aom_clpf_hblock_c, 8,
-                                                     8)));
-
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, CDEFClpfSpeedTest,
-    ::testing::Values(make_tuple(&aom_clpf_block_neon, &aom_clpf_block_c, 8, 8),
-                      make_tuple(&aom_clpf_hblock_neon, &aom_clpf_hblock_c, 8,
-                                 8)));
-#endif
-
-#if CONFIG_HIGHBITDEPTH
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, ClpfHbdSpeedTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_sse2, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse2, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, ClpfHbdSpeedTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_ssse3, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_ssse3, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, ClpfHbdSpeedTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_sse4_1, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_sse4_1, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, ClpfHbdSpeedTest,
-    ::testing::Values(
-        make_tuple(&aom_clpf_block_hbd_neon, &aom_clpf_block_hbd_c, 8, 8),
-        make_tuple(&aom_clpf_hblock_hbd_neon, &aom_clpf_hblock_hbd_c, 8, 8)));
-#endif
-#endif  // CONFIG_HIGHBITDEPTH
-#endif  // defined(_WIN64) || !defined(_MSC_VER)
-
-}  // namespace
diff --git a/test/dering_test.cc b/test/dering_test.cc
deleted file mode 100644
index 6b76561..0000000
--- a/test/dering_test.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-*/
-
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/common/cdef_block.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-
-typedef std::tr1::tuple<cdef_direction_func, cdef_direction_func, int>
-    dering_dir_param_t;
-
-class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> {
- public:
-  virtual ~CDEFDeringDirTest() {}
-  virtual void SetUp() {
-    dering = GET_PARAM(0);
-    ref_dering = GET_PARAM(1);
-    bsize = GET_PARAM(2);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  int bsize;
-  cdef_direction_func dering;
-  cdef_direction_func ref_dering;
-};
-
-typedef CDEFDeringDirTest CDEFDeringSpeedTest;
-
-void test_dering(int bsize, int iterations, cdef_direction_func dering,
-                 cdef_direction_func ref_dering) {
-  const int size = 8;
-  const int ysize = size + 2 * CDEF_VBORDER;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
-  DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
-  DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
-  memset(ref_d, 0, sizeof(ref_d));
-  memset(d, 0, sizeof(d));
-
-  int error = 0, threshold = 0, dir;
-  int boundary, damping, depth, bits, level, count,
-      errdepth = 0, errthreshold = 0, errboundary = 0, errdamping = 0;
-  unsigned int pos = 0;
-
-  for (boundary = 0; boundary < 16; boundary++) {
-    for (depth = 8; depth <= 12; depth += 2) {
-      for (damping = 5 + depth - 8; damping < 7 + depth - 8; damping++) {
-        for (count = 0; count < iterations; count++) {
-          for (level = 0; level < (1 << depth) && !error;
-               level += (1 + 4 * !!boundary) << (depth - 8)) {
-            for (bits = 1; bits <= depth && !error; bits++) {
-              for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
-                s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
-                             (1 << depth) - 1);
-              if (boundary) {
-                if (boundary & 1) {  // Left
-                  for (int i = 0; i < ysize; i++)
-                    for (int j = 0; j < CDEF_HBORDER; j++)
-                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                }
-                if (boundary & 2) {  // Right
-                  for (int i = 0; i < ysize; i++)
-                    for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
-                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                }
-                if (boundary & 4) {  // Above
-                  for (int i = 0; i < CDEF_VBORDER; i++)
-                    for (int j = 0; j < CDEF_BSTRIDE; j++)
-                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                }
-                if (boundary & 8) {  // Below
-                  for (int i = CDEF_VBORDER + size; i < ysize; i++)
-                    for (int j = 0; j < CDEF_BSTRIDE; j++)
-                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
-                }
-              }
-              for (dir = 0; dir < 8; dir++) {
-                for (threshold = 0; threshold < 64 << (depth - 8) && !error;
-                     threshold += (1 + 4 * !!boundary) << (depth - 8)) {
-                  ref_dering(ref_d, size,
-                             s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
-                             threshold, dir, damping);
-                  // If dering and ref_dering are the same, we're just testing
-                  // speed
-                  if (dering != ref_dering)
-                    ASM_REGISTER_STATE_CHECK(dering(
-                        d, size, s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
-                        threshold, dir, damping));
-                  if (ref_dering != dering) {
-                    for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error;
-                         pos++) {
-                      error = ref_d[pos] != d[pos];
-                      errdepth = depth;
-                      errthreshold = threshold;
-                      errboundary = boundary;
-                      errdamping = damping;
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  pos--;
-  EXPECT_EQ(0, error) << "Error: CDEFDeringDirTest, SIMD and C mismatch."
-                      << std::endl
-                      << "First error at " << pos % size << "," << pos / size
-                      << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
-                      << ") " << std::endl
-                      << "threshold: " << errthreshold << std::endl
-                      << "damping: " << errdamping << std::endl
-                      << "depth: " << errdepth << std::endl
-                      << "size: " << bsize << std::endl
-                      << "boundary: " << errboundary << std::endl
-                      << std::endl;
-}
-
-void test_dering_speed(int bsize, int iterations, cdef_direction_func dering,
-                       cdef_direction_func ref_dering) {
-  aom_usec_timer ref_timer;
-  aom_usec_timer timer;
-
-  aom_usec_timer_start(&ref_timer);
-  test_dering(bsize, iterations, ref_dering, ref_dering);
-  aom_usec_timer_mark(&ref_timer);
-  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
-  aom_usec_timer_start(&timer);
-  test_dering(bsize, iterations, dering, dering);
-  aom_usec_timer_mark(&timer);
-  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
-#if 0
-  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
-            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
-  EXPECT_GT(ref_elapsed_time, elapsed_time)
-      << "Error: CDEFDeringSpeedTest, SIMD slower than C." << std::endl
-      << "C time: " << ref_elapsed_time << " us" << std::endl
-      << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
-                          int coeff_shift);
-
-typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t;
-
-class CDEFDeringFindDirTest
-    : public ::testing::TestWithParam<find_dir_param_t> {
- public:
-  virtual ~CDEFDeringFindDirTest() {}
-  virtual void SetUp() {
-    finddir = GET_PARAM(0);
-    ref_finddir = GET_PARAM(1);
-  }
-
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  find_dir_t finddir;
-  find_dir_t ref_finddir;
-};
-
-typedef CDEFDeringFindDirTest CDEFDeringFindDirSpeedTest;
-
-void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
-                                 int coeff_shift),
-                  int (*ref_finddir)(const uint16_t *img, int stride,
-                                     int32_t *var, int coeff_shift)) {
-  const int size = 8;
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, s[size * size]);
-
-  int error = 0;
-  int depth, bits, level, count, errdepth = 0;
-  int ref_res = 0, res = 0;
-  int32_t ref_var = 0, var = 0;
-
-  for (depth = 8; depth <= 12 && !error; depth += 2) {
-    for (count = 0; count < 512 && !error; count++) {
-      for (level = 0; level < (1 << depth) && !error;
-           level += 1 << (depth - 8)) {
-        for (bits = 1; bits <= depth && !error; bits++) {
-          for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
-            s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
-                         (1 << depth) - 1);
-          for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
-            ref_res = ref_finddir(s, size, &ref_var, depth - 8);
-          if (finddir != ref_finddir)
-            ASM_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
-          if (ref_finddir != finddir) {
-            if (res != ref_res || var != ref_var) error = 1;
-            errdepth = depth;
-          }
-        }
-      }
-    }
-  }
-
-  EXPECT_EQ(0, error) << "Error: CDEFDeringFindDirTest, SIMD and C mismatch."
-                      << std::endl
-                      << "return: " << res << " : " << ref_res << std::endl
-                      << "var: " << var << " : " << ref_var << std::endl
-                      << "depth: " << errdepth << std::endl
-                      << std::endl;
-}
-
-void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
-                                       int32_t *var, int coeff_shift),
-                        int (*ref_finddir)(const uint16_t *img, int stride,
-                                           int32_t *var, int coeff_shift)) {
-  aom_usec_timer ref_timer;
-  aom_usec_timer timer;
-
-  aom_usec_timer_start(&ref_timer);
-  test_finddir(ref_finddir, ref_finddir);
-  aom_usec_timer_mark(&ref_timer);
-  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
-
-  aom_usec_timer_start(&timer);
-  test_finddir(finddir, finddir);
-  aom_usec_timer_mark(&timer);
-  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
-
-#if 0
-  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
-            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
-#endif
-
-  EXPECT_GT(ref_elapsed_time, elapsed_time)
-      << "Error: CDEFDeringFindDirSpeedTest, SIMD slower than C." << std::endl
-      << "C time: " << ref_elapsed_time << " us" << std::endl
-      << "SIMD time: " << elapsed_time << " us" << std::endl;
-}
-
-TEST_P(CDEFDeringDirTest, TestSIMDNoMismatch) {
-  test_dering(bsize, 1, dering, ref_dering);
-}
-
-TEST_P(CDEFDeringSpeedTest, DISABLED_TestSpeed) {
-  test_dering_speed(bsize, 4, dering, ref_dering);
-}
-
-TEST_P(CDEFDeringFindDirTest, TestSIMDNoMismatch) {
-  test_finddir(finddir, ref_finddir);
-}
-
-TEST_P(CDEFDeringFindDirSpeedTest, DISABLED_TestSpeed) {
-  test_finddir_speed(finddir, ref_finddir);
-}
-
-using std::tr1::make_tuple;
-
-// VS compiling for 32 bit targets does not support vector types in
-// structs as arguments, which makes the v256 type of the intrinsics
-// hard to support, so optimizations for this target are disabled.
-#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringDirTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse2,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_sse2,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_sse2,
-                                                     &cdef_find_dir_c)));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringDirTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_ssse3,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_ssse3,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringDirTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse4_1,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_sse4_1,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringDirTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_neon,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_neon,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_neon,
-                                                     &cdef_find_dir_c)));
-#endif
-
-// Test speed for all supported architectures
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse2,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_sse2,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_sse2,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_ssse3,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_ssse3,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse4_1,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_sse4_1,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_direction_4x4_neon,
-                                                     &cdef_direction_4x4_c, 4),
-                                          make_tuple(&cdef_direction_8x8_neon,
-                                                     &cdef_direction_8x8_c,
-                                                     8)));
-INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&cdef_find_dir_neon,
-                                                     &cdef_find_dir_c)));
-#endif
-
-#endif  // defined(_WIN64) || !defined(_MSC_VER)
-}  // namespace
diff --git a/test/test.cmake b/test/test.cmake
index 2101800..4a66c20 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -130,16 +130,9 @@
         ${AOM_UNIT_TEST_ENCODER_SOURCES}
         "${AOM_ROOT}/test/motion_vector_test.cc")
 
-    if (CONFIG_CDEF_SINGLEPASS)
-      set(AOM_UNIT_TEST_COMMON_SOURCES
-          ${AOM_UNIT_TEST_COMMON_SOURCES}
-          "${AOM_ROOT}/test/cdef_test.cc")
-    else ()
-      set(AOM_UNIT_TEST_COMMON_SOURCES
-          ${AOM_UNIT_TEST_COMMON_SOURCES}
-          "${AOM_ROOT}/test/clpf_test.cc"
-          "${AOM_ROOT}/test/dering_test.cc")
-    endif ()
+    set(AOM_UNIT_TEST_COMMON_SOURCES
+        ${AOM_UNIT_TEST_COMMON_SOURCES}
+        "${AOM_ROOT}/test/cdef_test.cc")
 
     if (CONFIG_INTRABC)
         set(AOM_UNIT_TEST_COMMON_SOURCES
diff --git a/test/test.mk b/test/test.mk
index 29722a7..4d0fad2 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -127,12 +127,7 @@
 
 LIBAOM_TEST_SRCS-yes                   += convolve_test.cc
 LIBAOM_TEST_SRCS-yes                   += lpf_test.cc
-ifeq ($(CONFIG_CDEF_SINGLEPASS),yes)
 LIBAOM_TEST_SRCS-yes                   += cdef_test.cc
-else
-LIBAOM_TEST_SRCS-yes                   += dering_test.cc
-LIBAOM_TEST_SRCS-yes                   += clpf_test.cc
-endif
 LIBAOM_TEST_SRCS-yes                   += simd_cmp_impl.h
 LIBAOM_TEST_SRCS-$(HAVE_SSE2)          += simd_cmp_sse2.cc
 LIBAOM_TEST_SRCS-$(HAVE_SSSE3)         += simd_cmp_ssse3.cc