Exclude highbd loopfilter from CONFIG_AV1_HIGHBITDEPTH

BUG=aomedia:2397

Change-Id: I200694aa0aa1f6ccbcf292137584358cd2f64ce1
diff --git a/aom_dsp/aom_dsp.cmake b/aom_dsp/aom_dsp.cmake
index 1f206b5..c6c2925 100644
--- a/aom_dsp/aom_dsp.cmake
+++ b/aom_dsp/aom_dsp.cmake
@@ -78,7 +78,8 @@
 
 if(NOT CONFIG_AV1_HIGHBITDEPTH)
   list(REMOVE_ITEM AOM_DSP_COMMON_INTRIN_SSE2
-                   "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_sse2.c")
+                   "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_sse2.c"
+                   "${AOM_ROOT}/aom_dsp/x86/highbd_loopfilter_sse2.c")
 endif()
 
 list(APPEND AOM_DSP_COMMON_ASM_SSSE3
@@ -116,7 +117,8 @@
 
 if(NOT CONFIG_AV1_HIGHBITDEPTH)
   list(REMOVE_ITEM AOM_DSP_COMMON_INTRIN_AVX2
-                   "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c")
+                   "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c"
+                   "${AOM_ROOT}/aom_dsp/x86/highbd_loopfilter_avx2.c")
 endif()
 
 list(APPEND AOM_DSP_COMMON_INTRIN_NEON "${AOM_ROOT}/aom_dsp/arm/fwd_txfm_neon.c"
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index b3fe0bf..a9f0a92 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -417,56 +417,58 @@
 add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 specialize qw/aom_lpf_horizontal_4_dual sse2/;
 
-add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_vertical_14 sse2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_vertical_14_dual sse2 avx2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_vertical_8 sse2/;
-
-add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_vertical_6 sse2/;
-
 add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 specialize qw/aom_lpf_vertical_6_dual sse2/;
 
-add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_vertical_6_dual sse2/;
+if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
+  add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+  specialize qw/aom_highbd_lpf_vertical_14 sse2/;
 
-add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
+  add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+  specialize qw/aom_highbd_lpf_vertical_14_dual sse2 avx2/;
 
-add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_vertical_4 sse2/;
+  add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+  specialize qw/aom_highbd_lpf_vertical_8 sse2/;
 
-add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
+  add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+  specialize qw/aom_highbd_lpf_vertical_6 sse2/;
 
-add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_horizontal_14 sse2/;
+  add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+  specialize qw/aom_highbd_lpf_vertical_6_dual sse2/;
 
-add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd";
-specialize qw/aom_highbd_lpf_horizontal_14_dual sse2 avx2/;
+  add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+  specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
 
-add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_horizontal_6 sse2/;
+  add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+  specialize qw/aom_highbd_lpf_vertical_4 sse2/;
 
-add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_horizontal_6_dual sse2/;
+  add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+  specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
 
-add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_horizontal_8 sse2/;
+  add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+  specialize qw/aom_highbd_lpf_horizontal_14 sse2/;
 
-add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
+  add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd";
+  specialize qw/aom_highbd_lpf_horizontal_14_dual sse2 avx2/;
 
-add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-specialize qw/aom_highbd_lpf_horizontal_4 sse2/;
+  add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+  specialize qw/aom_highbd_lpf_horizontal_6 sse2/;
 
-add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
+  add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+  specialize qw/aom_highbd_lpf_horizontal_6_dual sse2/;
+
+  add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+  specialize qw/aom_highbd_lpf_horizontal_8 sse2/;
+
+  add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+  specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
+
+  add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+  specialize qw/aom_highbd_lpf_horizontal_4 sse2/;
+
+  add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+  specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
+}
 
 #
 # Encoder functions.
diff --git a/aom_dsp/loopfilter.c b/aom_dsp/loopfilter.c
index 9647e18..2a17428 100644
--- a/aom_dsp/loopfilter.c
+++ b/aom_dsp/loopfilter.c
@@ -442,6 +442,7 @@
   mb_lpf_vertical_edge_w(s + 4 * pitch, pitch, blimit1, limit1, thresh1, 4);
 }
 
+#if CONFIG_AV1_HIGHBITDEPTH
 // Should we apply any filter at all: 11111111 yes, 00000000 no ?
 static INLINE int8_t highbd_filter_mask2(uint8_t limit, uint8_t blimit,
                                          uint16_t p1, uint16_t p0, uint16_t q0,
@@ -923,3 +924,4 @@
   highbd_mb_lpf_vertical_edge_w(s + 4 * pitch, pitch, blimit1, limit1, thresh1,
                                 4, bd);
 }
+#endif  // CONFIG_AV1_HIGHBITDEPTH
diff --git a/av1/common/av1_loopfilter.c b/av1/common/av1_loopfilter.c
index 69ad113..6e38a9e 100644
--- a/av1/common/av1_loopfilter.c
+++ b/av1/common/av1_loopfilter.c
@@ -352,8 +352,6 @@
   const int dst_stride = plane_ptr->dst.stride;
   const int y_range = (MAX_MIB_SIZE >> scale_vert);
   const int x_range = (MAX_MIB_SIZE >> scale_horz);
-  const int use_highbitdepth = cm->seq_params.use_highbitdepth;
-  const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
   for (int y = 0; y < y_range; y++) {
     uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
     for (int x = 0; x < x_range;) {
@@ -376,6 +374,9 @@
         tx_size = TX_4X4;
       }
 
+#if CONFIG_AV1_HIGHBITDEPTH
+      const int use_highbitdepth = cm->seq_params.use_highbitdepth;
+      const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
       switch (params.filter_length) {
         // apply 4-tap filtering
         case 4:
@@ -420,6 +421,32 @@
         // no filtering
         default: break;
       }
+#else
+      switch (params.filter_length) {
+        // apply 4-tap filtering
+        case 4:
+          aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
+                             params.hev_thr);
+          break;
+        case 6:  // apply 6-tap filter for chroma plane only
+          assert(plane != 0);
+          aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim,
+                             params.hev_thr);
+          break;
+        // apply 8-tap filtering
+        case 8:
+          aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim,
+                             params.hev_thr);
+          break;
+        // apply 14-tap filtering
+        case 14:
+          aom_lpf_vertical_14(p, dst_stride, params.mblim, params.lim,
+                              params.hev_thr);
+          break;
+        // no filtering
+        default: break;
+      }
+#endif  // CONFIG_AV1_HIGHBITDEPTH
       // advance the destination pointer
       advance_units = tx_size_wide_unit[tx_size];
       x += advance_units;
@@ -438,8 +465,6 @@
   const int dst_stride = plane_ptr->dst.stride;
   const int y_range = (MAX_MIB_SIZE >> scale_vert);
   const int x_range = (MAX_MIB_SIZE >> scale_horz);
-  const int use_highbitdepth = cm->seq_params.use_highbitdepth;
-  const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
   for (int x = 0; x < x_range; x++) {
     uint8_t *p = dst_ptr + x * MI_SIZE;
     for (int y = 0; y < y_range;) {
@@ -462,6 +487,9 @@
         tx_size = TX_4X4;
       }
 
+#if CONFIG_AV1_HIGHBITDEPTH
+      const int use_highbitdepth = cm->seq_params.use_highbitdepth;
+      const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
       switch (params.filter_length) {
         // apply 4-tap filtering
         case 4:
@@ -507,6 +535,33 @@
         // no filtering
         default: break;
       }
+#else
+      switch (params.filter_length) {
+        // apply 4-tap filtering
+        case 4:
+          aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
+                               params.hev_thr);
+          break;
+        // apply 6-tap filtering
+        case 6:
+          assert(plane != 0);
+          aom_lpf_horizontal_6(p, dst_stride, params.mblim, params.lim,
+                               params.hev_thr);
+          break;
+        // apply 8-tap filtering
+        case 8:
+          aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim,
+                               params.hev_thr);
+          break;
+        // apply 14-tap filtering
+        case 14:
+          aom_lpf_horizontal_14(p, dst_stride, params.mblim, params.lim,
+                                params.hev_thr);
+          break;
+        // no filtering
+        default: break;
+      }
+#endif  // CONFIG_AV1_HIGHBITDEPTH
 
       // advance the destination pointer
       advance_units = tx_size_high_unit[tx_size];
diff --git a/av1/common/loopfiltermask.c b/av1/common/loopfiltermask.c
index 0b68679..ce31285 100644
--- a/av1/common/loopfiltermask.c
+++ b/av1/common/loopfiltermask.c
@@ -518,6 +518,7 @@
   }
 }
 
+#if CONFIG_AV1_HIGHBITDEPTH
 static void highbd_filter_selectively_vert_row2(
     int subsampling_factor, uint16_t *s, int pitch, int plane,
     uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
@@ -607,6 +608,7 @@
     mask_4x4_1 >>= step;
   }
 }
+#endif  // CONFIG_AV1_HIGHBITDEPTH
 
 static void filter_selectively_horiz(uint8_t *s, int pitch, int plane,
                                      int subsampling, uint64_t mask_16x16,
@@ -691,6 +693,7 @@
   }
 }
 
+#if CONFIG_AV1_HIGHBITDEPTH
 static void highbd_filter_selectively_horiz(
     uint16_t *s, int pitch, int plane, int subsampling, uint64_t mask_16x16,
     uint64_t mask_8x8, uint64_t mask_4x4, const loop_filter_info_n *lfi_n,
@@ -771,6 +774,7 @@
     offset += step * count;
   }
 }
+#endif  // CONFIG_AV1_HIGHBITDEPTH
 
 void av1_build_bitmask_vert_info(
     AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
@@ -997,6 +1001,7 @@
       mask_4x4_1 = 0;
     }
 
+#if CONFIG_AV1_HIGHBITDEPTH
     if (cm->seq_params.use_highbitdepth)
       highbd_filter_selectively_vert_row2(
           ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
@@ -1006,6 +1011,11 @@
       filter_selectively_vert_row2(
           ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
           mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
+#else
+    filter_selectively_vert_row2(
+        ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
+        mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
+#endif
     dst->buf += two_row_stride;
   }
   // reset buf pointer for horizontal filtering
@@ -1064,6 +1074,7 @@
     mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
     mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
 
+#if CONFIG_AV1_HIGHBITDEPTH
     if (cm->seq_params.use_highbitdepth)
       highbd_filter_selectively_horiz(
           CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16,
@@ -1071,6 +1082,10 @@
     else
       filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
                                mask_8x8, mask_4x4, &cm->lf_info, lfl);
+#else
+    filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
+                             mask_8x8, mask_4x4, &cm->lf_info, lfl);
+#endif
     dst->buf += row_stride;
   }
   // reset buf pointer for next block
@@ -1143,6 +1158,7 @@
       uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
       uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
 
+#if CONFIG_AV1_HIGHBITDEPTH
       if (cm->seq_params.use_highbitdepth)
         highbd_filter_selectively_vert_row2(
             ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
@@ -1153,6 +1169,11 @@
                                      mask_16x16_0, mask_8x8_0, mask_4x4_0,
                                      mask_16x16_1, mask_8x8_1, mask_4x4_1,
                                      &cm->lf_info, lfl, lfl2);
+#else
+      filter_selectively_vert_row2(
+          ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
+          mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
+#endif
       dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
     }
     dst->buf += 2 * MI_SIZE * dst->stride;
@@ -1213,6 +1234,7 @@
       mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
       mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
 
+#if CONFIG_AV1_HIGHBITDEPTH
       if (cm->seq_params.use_highbitdepth)
         highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
                                         dst->stride, pl, ssx, mask_16x16,
@@ -1221,6 +1243,10 @@
       else
         filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
                                  mask_8x8, mask_4x4, &cm->lf_info, lfl);
+#else
+      filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
+                               mask_8x8, mask_4x4, &cm->lf_info, lfl);
+#endif
       dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
     }
     dst->buf += MI_SIZE * dst->stride;
diff --git a/test/lpf_test.cc b/test/lpf_test.cc
index 57cbb7d..2586e9e 100644
--- a/test/lpf_test.cc
+++ b/test/lpf_test.cc
@@ -144,26 +144,30 @@
   func_type_t ref_loopfilter_op_;
 };
 
+#if CONFIG_AV1_HIGHBITDEPTH
 void call_filter(uint16_t *s, LOOP_PARAM, int bd, hbdloop_op_t op) {
   op(s, p, blimit, limit, thresh, bd);
 }
-void call_filter(uint8_t *s, LOOP_PARAM, int bd, loop_op_t op) {
-  (void)bd;
-  op(s, p, blimit, limit, thresh);
-}
 void call_dualfilter(uint16_t *s, DUAL_LOOP_PARAM, int bd,
                      hbddual_loop_op_t op) {
   op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bd);
 }
+#endif
+void call_filter(uint8_t *s, LOOP_PARAM, int bd, loop_op_t op) {
+  (void)bd;
+  op(s, p, blimit, limit, thresh);
+}
 void call_dualfilter(uint8_t *s, DUAL_LOOP_PARAM, int bd, dual_loop_op_t op) {
   (void)bd;
   op(s, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
 };
 
+#if CONFIG_AV1_HIGHBITDEPTH
 typedef LoopTestParam<hbdloop_op_t, hbdloop_param_t> Loop8Test6Param_hbd;
-typedef LoopTestParam<loop_op_t, loop_param_t> Loop8Test6Param_lbd;
 typedef LoopTestParam<hbddual_loop_op_t, hbddual_loop_param_t>
     Loop8Test9Param_hbd;
+#endif
+typedef LoopTestParam<loop_op_t, loop_param_t> Loop8Test6Param_lbd;
 typedef LoopTestParam<dual_loop_op_t, dual_loop_param_t> Loop8Test9Param_lbd;
 
 #define OPCHECK(a, b)                                                          \
@@ -206,7 +210,9 @@
          "loopfilter output. "                                                 \
       << "First failed at test case " << first_failure;
 
+#if CONFIG_AV1_HIGHBITDEPTH
 TEST_P(Loop8Test6Param_hbd, OperationCheck) { OPCHECK(uint16_t, 16); }
+#endif
 TEST_P(Loop8Test6Param_lbd, OperationCheck) { OPCHECK(uint8_t, 8); }
 
 #define VALCHECK(a, b)                                                         \
@@ -252,7 +258,9 @@
          "loopfilter output. "                                                 \
       << "First failed at test case " << first_failure;
 
+#if CONFIG_AV1_HIGHBITDEPTH
 TEST_P(Loop8Test6Param_hbd, ValueCheck) { VALCHECK(uint16_t, 16); }
+#endif
 TEST_P(Loop8Test6Param_lbd, ValueCheck) { VALCHECK(uint8_t, 8); }
 
 #define SPEEDCHECK(a, b)                                                      \
@@ -280,7 +288,9 @@
     call_filter(s + 8 + p * 8, p, blimit, limit, thresh, bd, loopfilter_op_); \
   }
 
+#if CONFIG_AV1_HIGHBITDEPTH
 TEST_P(Loop8Test6Param_hbd, DISABLED_Speed) { SPEEDCHECK(uint16_t, 16); }
+#endif
 TEST_P(Loop8Test6Param_lbd, DISABLED_Speed) { SPEEDCHECK(uint8_t, 8); }
 
 #define OPCHECKd(a, b)                                                         \
@@ -337,7 +347,9 @@
          "loopfilter output. "                                                 \
       << "First failed at test case " << first_failure;
 
+#if CONFIG_AV1_HIGHBITDEPTH
 TEST_P(Loop8Test9Param_hbd, OperationCheck) { OPCHECKd(uint16_t, 16); }
+#endif
 TEST_P(Loop8Test9Param_lbd, OperationCheck) { OPCHECKd(uint8_t, 8); }
 
 #define VALCHECKd(a, b)                                                        \
@@ -396,7 +408,9 @@
          "loopfilter output. "                                                 \
       << "First failed at test case " << first_failure;
 
+#if CONFIG_AV1_HIGHBITDEPTH
 TEST_P(Loop8Test9Param_hbd, ValueCheck) { VALCHECKd(uint16_t, 16); }
+#endif
 TEST_P(Loop8Test9Param_lbd, ValueCheck) { VALCHECKd(uint8_t, 8); }
 
 #define SPEEDCHECKd(a, b)                                                    \
@@ -436,13 +450,15 @@
                     limit1, thresh1, bit_depth_, loopfilter_op_);            \
   }
 
+#if CONFIG_AV1_HIGHBITDEPTH
 TEST_P(Loop8Test9Param_hbd, DISABLED_Speed) { SPEEDCHECKd(uint16_t, 16); }
+#endif
 TEST_P(Loop8Test9Param_lbd, DISABLED_Speed) { SPEEDCHECKd(uint8_t, 8); }
 
 using ::testing::make_tuple;
 
 #if HAVE_SSE2
-
+#if CONFIG_AV1_HIGHBITDEPTH
 const hbdloop_param_t kHbdLoop8Test6[] = {
   make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
              8),
@@ -488,6 +504,7 @@
 
 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_hbd,
                         ::testing::ValuesIn(kHbdLoop8Test6));
+#endif  // CONFIG_AV1_HIGHBITDEPTH
 
 const loop_param_t kLoop8Test6[] = {
   make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
@@ -520,7 +537,7 @@
 
 #endif  // HAVE_SSE2
 
-#if HAVE_SSE2
+#if HAVE_SSE2 && CONFIG_AV1_HIGHBITDEPTH
 const hbddual_loop_param_t kHbdLoop8Test9[] = {
   make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
              &aom_highbd_lpf_horizontal_4_dual_c, 8),
@@ -575,7 +592,7 @@
 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param_hbd,
                         ::testing::ValuesIn(kHbdLoop8Test9));
 
-#endif  // HAVE_SSE2
+#endif  // HAVE_SSE2 && CONFIG_AV1_HIGHBITDEPTH
 
 #if HAVE_NEON
 const loop_param_t kLoop8Test6[] = {
@@ -593,7 +610,7 @@
                         ::testing::ValuesIn(kLoop8Test6));
 #endif  // HAVE_NEON
 
-#if HAVE_AVX2
+#if HAVE_AVX2 && CONFIG_AV1_HIGHBITDEPTH
 const hbddual_loop_param_t kHbdLoop8Test9Avx2[] = {
   make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
              &aom_highbd_lpf_horizontal_4_dual_c, 8),