Remove CONFIG_LOWPRECISION_BLEND This tool is fully adopted. Change-Id: Id349a3aac5bf125a23d5961c2baacbe3f585eb73

commit: 5af4891665a64008af1cc4c7a6f22293196d0503 [log] [tgz]
author: Yaowu Xu <yaowu@google.com> Wed Mar 28 15:04:34 2018 -0700
committer: Yaowu Xu <yaowu@google.com> Thu Mar 29 05:39:14 2018 +0000
tree: d6204ec086d9b6e904aedba2af1c57ca5aff77c6
parent: 66dde40b1fc2284a64176348253a6650349173f7 [diff]
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 9b1a99c..dc81bda 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl

@@ -473,12 +473,8 @@
   #
   # Alpha blending with mask
   #
-  if (aom_config("CONFIG_LOWPRECISION_BLEND") eq "yes") {
-    add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, ConvolveParams *conv_params";
-    add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, ConvolveParams *conv_params, const int bd";
-  } else {
-    add_proto qw/void aom_blend_a64_d32_mask/, "int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
-  }
+  add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, ConvolveParams *conv_params";
+  add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, ConvolveParams *conv_params, const int bd";
   add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
   add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
   add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";

diff --git a/aom_dsp/blend_a64_mask.c b/aom_dsp/blend_a64_mask.c
index 559f44d..99b0d20 100644
--- a/aom_dsp/blend_a64_mask.c
+++ b/aom_dsp/blend_a64_mask.c

@@ -33,7 +33,6 @@
 // In contrast, the output of the non-d32 functions will not be further rounded,
 // so we *should* use ROUND_POWER_OF_TWO there.
 
-#if CONFIG_LOWPRECISION_BLEND
 void aom_lowbd_blend_a64_d16_mask(uint8_t *dst, uint32_t dst_stride,
                                   const CONV_BUF_TYPE *src0,
                                   uint32_t src0_stride,
@@ -209,72 +208,6 @@
     }
   }
 }
-#else   // CONFIG_LOWPRECISION_BLEND
-void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride,
-                              const int32_t *src0, uint32_t src0_stride,
-                              const int32_t *src1, uint32_t src1_stride,
-                              const uint8_t *mask, uint32_t mask_stride, int h,
-                              int w, int subh, int subw) {
-  int i, j;
-
-  assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
-  assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
-
-  assert(h >= 1);
-  assert(w >= 1);
-  assert(IS_POWER_OF_TWO(h));
-  assert(IS_POWER_OF_TWO(w));
-
-  if (subw == 0 && subh == 0) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = mask[i * mask_stride + j];
-        dst[i * dst_stride + j] =
-            ((m * src0[i * src0_stride + j] +
-              (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
-             AOM_BLEND_A64_ROUND_BITS);
-      }
-    }
-  } else if (subw == 1 && subh == 1) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = ROUND_POWER_OF_TWO(
-            mask[(2 * i) * mask_stride + (2 * j)] +
-                mask[(2 * i + 1) * mask_stride + (2 * j)] +
-                mask[(2 * i) * mask_stride + (2 * j + 1)] +
-                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
-            2);
-        dst[i * dst_stride + j] =
-            ((m * src0[i * src0_stride + j] +
-              (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
-             AOM_BLEND_A64_ROUND_BITS);
-      }
-    }
-  } else if (subw == 1 && subh == 0) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
-                                    mask[i * mask_stride + (2 * j + 1)]);
-        dst[i * dst_stride + j] =
-            ((m * src0[i * src0_stride + j] +
-              (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
-             AOM_BLEND_A64_ROUND_BITS);
-      }
-    }
-  } else {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; ++j) {
-        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
-                                    mask[(2 * i + 1) * mask_stride + j]);
-        dst[i * dst_stride + j] =
-            ((m * src0[i * src0_stride + j] +
-              (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
-             AOM_BLEND_A64_ROUND_BITS);
-      }
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
 
 // Blending with alpha mask. Mask values come from the range [0, 64],
 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can

diff --git a/aom_dsp/x86/convolve_avx2.h b/aom_dsp/x86/convolve_avx2.h
index 5190c99..7790baf 100644
--- a/aom_dsp/x86/convolve_avx2.h
+++ b/aom_dsp/x86/convolve_avx2.h

@@ -134,7 +134,6 @@
   _mm256_store_si256((__m256i *)dst, d);
 }
 
-#if CONFIG_LOWPRECISION_BLEND
 static INLINE __m256i comp_avg(const __m256i *const data_ref_0,
                                const __m256i *const res_unsigned,
                                const __m256i *const wt,
@@ -195,23 +194,5 @@
 
   return res_round;
 }
-#else
-static INLINE void mult_add_store_aligned_256(CONV_BUF_TYPE *const dst,
-                                              const __m256i *const res,
-                                              const __m256i *const wt0,
-                                              const __m256i *const wt1,
-                                              const int do_average) {
-  __m256i d;
-  if (do_average) {
-    d = _mm256_load_si256((__m256i *)dst);
-    d = _mm256_add_epi32(_mm256_mullo_epi32(d, *wt0),
-                         _mm256_mullo_epi32(*res, *wt1));
-    d = _mm256_srai_epi32(d, DIST_PRECISION_BITS);
-  } else {
-    d = *res;
-  }
-  _mm256_store_si256((__m256i *)dst, d);
-}
-#endif
 
 #endif

diff --git a/aom_dsp/x86/convolve_sse2.h b/aom_dsp/x86/convolve_sse2.h
index 9790580..846fe7b 100644
--- a/aom_dsp/x86/convolve_sse2.h
+++ b/aom_dsp/x86/convolve_sse2.h

@@ -75,7 +75,6 @@
   return convolve(ss, coeffs);
 }
 
-#if CONFIG_LOWPRECISION_BLEND
 static INLINE __m128i comp_avg(const __m128i *const data_ref_0,
                                const __m128i *const res_unsigned,
                                const __m128i *const wt,
@@ -120,5 +119,3 @@
 }
 
 #endif
-
-#endif

diff --git a/aom_dsp/x86/convolve_sse4_1.h b/aom_dsp/x86/convolve_sse4_1.h
index 4b5a9b8..d48c256 100644
--- a/aom_dsp/x86/convolve_sse4_1.h
+++ b/aom_dsp/x86/convolve_sse4_1.h

@@ -31,7 +31,6 @@
   _mm_store_si128((__m128i *)dst, d);
 }
 
-#if CONFIG_LOWPRECISION_BLEND
 static INLINE __m128i highbd_comp_avg_sse4_1(const __m128i *const data_ref_0,
                                              const __m128i *const res_unsigned,
                                              const __m128i *const wt0,
@@ -50,6 +49,5 @@
   }
   return res;
 }
-#endif
 
 #endif  // _AOM_DSP_X86_TXFM_COMMON_INTRIN_H_

diff --git a/av1/av1.cmake b/av1/av1.cmake
index fdad64a..818c5fb 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake

@@ -286,20 +286,14 @@
     ${AOM_AV1_COMMON_INTRIN_SSE4_1}
     "${AOM_ROOT}/av1/common/x86/highbd_jnt_convolve_sse4.c")
 
-if (CONFIG_LOWPRECISION_BLEND)
-  set(AOM_AV1_COMMON_INTRIN_SSE2
-      ${AOM_AV1_COMMON_INTRIN_SSE2}
-      "${AOM_ROOT}/av1/common/x86/jnt_convolve_sse2.c")
+set(AOM_AV1_COMMON_INTRIN_SSE2
+    ${AOM_AV1_COMMON_INTRIN_SSE2}
+    "${AOM_ROOT}/av1/common/x86/jnt_convolve_sse2.c")
 
-  set(AOM_AV1_COMMON_INTRIN_SSSE3
-      ${AOM_AV1_COMMON_INTRIN_SSSE3}
-      "${AOM_ROOT}/av1/common/x86/jnt_convolve_ssse3.c")
-else()
+set(AOM_AV1_COMMON_INTRIN_SSSE3
+    ${AOM_AV1_COMMON_INTRIN_SSSE3}
+    "${AOM_ROOT}/av1/common/x86/jnt_convolve_ssse3.c")
 
-  set(AOM_AV1_COMMON_INTRIN_SSE4_1
-      ${AOM_AV1_COMMON_INTRIN_SSE4_1}
-      "${AOM_ROOT}/av1/common/x86/jnt_convolve_sse4.c")
-endif()
 set(AOM_AV1_COMMON_INTRIN_AVX2
     ${AOM_AV1_COMMON_INTRIN_AVX2}
     "${AOM_ROOT}/av1/common/x86/jnt_convolve_avx2.c")

diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 1960169..cd4ac0a 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -307,7 +307,6 @@
 add_proto qw/void av1_highbd_jnt_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
 add_proto qw/void av1_highbd_jnt_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
 
-if (aom_config("CONFIG_LOWPRECISION_BLEND") eq "yes") {
   add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
   add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
   specialize qw/av1_convolve_2d_sr sse2 avx2/;
@@ -328,22 +327,6 @@
   specialize qw/av1_highbd_jnt_convolve_x sse4_1 avx2/;
   specialize qw/av1_highbd_jnt_convolve_y sse4_1 avx2/;
   specialize qw/av1_highbd_jnt_convolve_2d_copy sse4_1 avx2/;
-}
-else
-{
-  add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
-  add_proto qw/void av1_highbd_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd";
-  add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
-  add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
-  add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-  add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-  add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-  add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-  add_proto qw/void av1_highbd_convolve_2d/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-  add_proto qw/void av1_highbd_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-  add_proto qw/void av1_highbd_convolve_x/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-  add_proto qw/void av1_highbd_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-}
 
 # INTRA_EDGE functions
 add_proto qw/void av1_filter_intra_edge/, "uint8_t *p, int sz, int strength";

diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index 855779b..df6eb2c 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c

@@ -76,190 +76,6 @@
   }
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst,
-                             int dst_stride, int w, int h, int bits) {
-  for (int r = 0; r < h; ++r) {
-    for (int c = 0; c < w; ++c) {
-      dst[r * dst_stride + c] =
-          clip_pixel(ROUND_POWER_OF_TWO(src[r * src_stride + c], bits));
-    }
-  }
-}
-
-/* Note: For notes on hardware implementations, including the required
-   bit widths for various intermediate values, see the comments above
-   av1_warp_affine_c.
-*/
-void av1_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst0,
-                       int dst_stride0, int w, int h,
-                       InterpFilterParams *filter_params_x,
-                       InterpFilterParams *filter_params_y,
-                       const int subpel_x_q4, const int subpel_y_q4,
-                       ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = w;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const int bd = 8;
-  (void)dst0;
-  (void)dst_stride0;
-
-  // horizontal filter
-  const uint8_t *src_horiz = src - fo_vert * src_stride;
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-  for (int y = 0; y < im_h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      int32_t sum = (1 << (bd + FILTER_BITS - 1));
-      for (int k = 0; k < filter_params_x->taps; ++k) {
-        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
-      }
-      assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
-      im_block[y * im_stride + x] =
-          (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
-    }
-  }
-
-  // vertical filter
-  int16_t *src_vert = im_block + fo_vert * im_stride;
-  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE sum = 1 << offset_bits;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
-      }
-      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
-                          ((1 << (offset_bits - conv_params->round_1)) +
-                           (1 << (offset_bits - conv_params->round_1 - 1)));
-      if (conv_params->do_average) {
-        int32_t tmp = dst[y * dst_stride + x];
-        tmp += res;
-        dst[y * dst_stride + x] = tmp >> 1;
-      } else {
-        dst[y * dst_stride + x] = res;
-      }
-    }
-  }
-}
-
-void av1_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst0,
-                      int dst_stride0, int w, int h,
-                      InterpFilterParams *filter_params_x,
-                      InterpFilterParams *filter_params_y,
-                      const int subpel_x_q4, const int subpel_y_q4,
-                      ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int bits = FILTER_BITS - conv_params->round_0;
-  (void)filter_params_x;
-  (void)subpel_x_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  assert(bits >= 0);
-
-  // vertical filter
-  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE res = 0;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
-      }
-      res *= (1 << bits);
-      res = ROUND_POWER_OF_TWO(res, conv_params->round_1);
-      if (conv_params->do_average) {
-        int32_t tmp = dst[y * dst_stride + x];
-        tmp += res;
-        dst[y * dst_stride + x] = tmp >> 1;
-      } else {
-        dst[y * dst_stride + x] = res;
-      }
-    }
-  }
-}
-
-void av1_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst0,
-                      int dst_stride0, int w, int h,
-                      InterpFilterParams *filter_params_x,
-                      InterpFilterParams *filter_params_y,
-                      const int subpel_x_q4, const int subpel_y_q4,
-                      ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const int bits = FILTER_BITS - conv_params->round_1;
-  (void)filter_params_y;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  assert(bits >= 0);
-
-  // horizontal filter
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE res = 0;
-      for (int k = 0; k < filter_params_x->taps; ++k) {
-        res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
-      }
-      res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
-      if (conv_params->do_average) {
-        int32_t tmp = dst[y * dst_stride + x];
-        tmp += res;
-        dst[y * dst_stride + x] = tmp >> 1;
-      } else {
-        dst[y * dst_stride + x] = res;
-      }
-    }
-  }
-}
-
-void av1_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst0,
-                            int dst_stride0, int w, int h,
-                            InterpFilterParams *filter_params_x,
-                            InterpFilterParams *filter_params_y,
-                            const int subpel_x_q4, const int subpel_y_q4,
-                            ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int bits =
-      FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
-
-  (void)filter_params_x;
-  (void)filter_params_y;
-  (void)subpel_x_q4;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE res = src[y * src_stride + x] << bits;
-      if (conv_params->do_average) {
-        int32_t tmp = dst[y * dst_stride + x];
-        tmp += res;
-        dst[y * dst_stride + x] = tmp >> 1;
-      } else {
-        dst[y * dst_stride + x] = res;
-      }
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
                           int dst_stride, int w, int h,
                           InterpFilterParams *filter_params_x,
@@ -298,7 +114,6 @@
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t sum = 1 << offset_bits;
       for (int k = 0; k < filter_params_y->taps; ++k) {
         sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
@@ -307,16 +122,6 @@
       int16_t res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
                     ((1 << (offset_bits - conv_params->round_1)) +
                      (1 << (offset_bits - conv_params->round_1 - 1)));
-#else   // CONFIG_LOWPRECISION_BLEND
-      CONV_BUF_TYPE sum = 1 << offset_bits;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
-      }
-      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
-                          ((1 << (offset_bits - conv_params->round_1)) +
-                           (1 << (offset_bits - conv_params->round_1 - 1)));
-#endif  // CONFIG_LOWPRECISION_BLEND
       dst[y * dst_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(res, bits));
     }
   }
@@ -342,11 +147,7 @@
       *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t res = 0;
-#else
-      CONV_BUF_TYPE res = 0;
-#endif
       for (int k = 0; k < filter_params_y->taps; ++k) {
         res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
       }
@@ -377,11 +178,7 @@
       *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t res = 0;
-#else
-      CONV_BUF_TYPE res = 0;
-#endif
       for (int k = 0; k < filter_params_x->taps; ++k) {
         res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
       }
@@ -424,13 +221,8 @@
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
   const int bd = 8;
-#if CONFIG_LOWPRECISION_BLEND
   const int round_bits =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-#else
-  (void)dst8;
-  (void)dst8_stride;
-#endif
 
   // horizontal filter
   const uint8_t *src_horiz = src - fo_vert * src_stride;
@@ -455,7 +247,6 @@
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t sum = 1 << offset_bits;
       for (int k = 0; k < filter_params_y->taps; ++k) {
         sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
@@ -478,33 +269,6 @@
       } else {
         dst[y * dst_stride + x] = res;
       }
-#else   // CONFIG_LOWPRECISION_BLEND
-      CONV_BUF_TYPE sum = 1 << offset_bits;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
-      }
-      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
-                          ((1 << (offset_bits - conv_params->round_1)) +
-                           (1 << (offset_bits - conv_params->round_1 - 1)));
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-#endif  // CONFIG_LOWPRECISION_BLEND
     }
   }
 }
@@ -519,27 +283,20 @@
   int dst_stride = conv_params->dst_stride;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int bits = FILTER_BITS - conv_params->round_0;
-#if CONFIG_LOWPRECISION_BLEND
   const int bd = 8;
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
   const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
                            (1 << (offset_bits - conv_params->round_1 - 1));
   const int round_bits =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-#endif
   (void)filter_params_x;
   (void)subpel_x_q4;
-#if !CONFIG_LOWPRECISION_BLEND
-  (void)dst8;
-  (void)dst8_stride;
-#endif
 
   // vertical filter
   const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
       *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t res = 0;
       for (int k = 0; k < filter_params_y->taps; ++k) {
         res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
@@ -562,31 +319,6 @@
       } else {
         dst[y * dst_stride + x] = res;
       }
-#else   // CONFIG_LOWPRECISION_BLEND
-      CONV_BUF_TYPE res = 0;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
-      }
-      res *= (1 << bits);
-      res = ROUND_POWER_OF_TWO(res, conv_params->round_1);
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-#endif  // CONFIG_LOWPRECISION_BLEND
     }
   }
 }
@@ -601,27 +333,20 @@
   int dst_stride = conv_params->dst_stride;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
   const int bits = FILTER_BITS - conv_params->round_1;
-#if CONFIG_LOWPRECISION_BLEND
   const int bd = 8;
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
   const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
                            (1 << (offset_bits - conv_params->round_1 - 1));
   const int round_bits =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-#endif
   (void)filter_params_y;
   (void)subpel_y_q4;
-#if !CONFIG_LOWPRECISION_BLEND
-  (void)dst8;
-  (void)dst8_stride;
-#endif
 
   // horizontal filter
   const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
       *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t res = 0;
       for (int k = 0; k < filter_params_x->taps; ++k) {
         res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
@@ -644,30 +369,6 @@
       } else {
         dst[y * dst_stride + x] = res;
       }
-#else   // CONFIG_LOWPRECISION_BLEND
-      CONV_BUF_TYPE res = 0;
-      for (int k = 0; k < filter_params_x->taps; ++k) {
-        res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
-      }
-      res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-#endif  // CONFIG_LOWPRECISION_BLEND
     }
   }
 }
@@ -682,25 +383,18 @@
   int dst_stride = conv_params->dst_stride;
   const int bits =
       FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
-#if CONFIG_LOWPRECISION_BLEND
   const int bd = 8;
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
   const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
                            (1 << (offset_bits - conv_params->round_1 - 1));
-#endif
   (void)filter_params_x;
   (void)filter_params_y;
   (void)subpel_x_q4;
   (void)subpel_y_q4;
-#if !CONFIG_LOWPRECISION_BLEND
-  (void)dst8;
-  (void)dst8_stride;
-#endif
 
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
       CONV_BUF_TYPE res = src[y * src_stride + x] << bits;
-#if CONFIG_LOWPRECISION_BLEND
       res += round_offset;
 
       if (conv_params->do_average) {
@@ -717,57 +411,25 @@
       } else {
         dst[y * dst_stride + x] = res;
       }
-#else   // CONFIG_LOWPRECISION_BLEND
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-#endif  // CONFIG_LOWPRECISION_BLEND
     }
   }
 }
 
-#if CONFIG_LOWPRECISION_BLEND
 void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
                              int dst8_stride, int w, int h,
                              InterpFilterParams *filter_params_x,
                              InterpFilterParams *filter_params_y,
                              const int subpel_x_qn, const int x_step_qn,
                              const int subpel_y_qn, const int y_step_qn,
-                             ConvolveParams *conv_params)
-#else
-void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
-                             CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
-                             InterpFilterParams *filter_params_x,
-                             InterpFilterParams *filter_params_y,
-                             const int subpel_x_qn, const int x_step_qn,
-                             const int subpel_y_qn, const int y_step_qn,
-                             ConvolveParams *conv_params)
-#endif
-{
+                             ConvolveParams *conv_params) {
   int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
   int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
              filter_params_y->taps;
-#if CONFIG_LOWPRECISION_BLEND
   CONV_BUF_TYPE *dst16 = conv_params->dst;
   const int dst16_stride = conv_params->dst_stride;
   const int bits =
       FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
   assert(bits >= 0);
-#endif
   int im_stride = w;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
@@ -805,7 +467,6 @@
       assert(y_filter_idx < SUBPEL_SHIFTS);
       const int16_t *y_filter =
           av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
-#if CONFIG_LOWPRECISION_BLEND
       int32_t sum = 1 << offset_bits;
       for (int k = 0; k < filter_params_y->taps; ++k) {
         sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
@@ -835,33 +496,6 @@
                              (1 << (offset_bits - conv_params->round_1 - 1)));
         dst8[y * dst8_stride + x] = clip_pixel(ROUND_POWER_OF_TWO(tmp, bits));
       }
-#else   // CONFIG_LOWPRECISION_BLEND
-      CONV_BUF_TYPE sum = 1 << offset_bits;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
-      }
-      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
-                          ((1 << (offset_bits - conv_params->round_1)) +
-                           (1 << (offset_bits - conv_params->round_1 - 1)));
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-#endif  // CONFIG_LOWPRECISION_BLEND
     }
     src_vert++;
   }
@@ -873,32 +507,12 @@
     InterpFilterParams *filter_params_y, const int subpel_x_qn,
     const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
     ConvolveParams *conv_params) {
-#if CONFIG_LOWPRECISION_BLEND
   if (conv_params->is_compound) {
     assert(conv_params->dst != NULL);
   }
   av1_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h, filter_params_x,
                         filter_params_y, subpel_x_qn, x_step_qn, subpel_y_qn,
                         y_step_qn, conv_params);
-#else   // CONFIG_LOWPRECISION_BLEND
-  if (conv_params->is_compound) {
-    assert(conv_params->dst != NULL);
-    av1_convolve_2d_scale(src, src_stride, conv_params->dst,
-                          conv_params->dst_stride, w, h, filter_params_x,
-                          filter_params_y, subpel_x_qn, x_step_qn, subpel_y_qn,
-                          y_step_qn, conv_params);
-  } else {
-    CONV_BUF_TYPE tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE];
-    int tmp_dst_stride = MAX_SB_SIZE;
-    av1_convolve_2d_scale(src, src_stride, tmp_dst, tmp_dst_stride, w, h,
-                          filter_params_x, filter_params_y, subpel_x_qn,
-                          x_step_qn, subpel_y_qn, y_step_qn, conv_params);
-    const int rbits =
-        2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-    av1_convolve_rounding(tmp_dst, tmp_dst_stride, dst, dst_stride, w, h,
-                          rbits);
-  }
-#endif  // CONFIG_LOWPRECISION_BLEND
 }
 
 void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
@@ -926,191 +540,6 @@
         &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride,
-                                    uint8_t *dst8, int dst_stride, int w, int h,
-                                    int bits, int bd) {
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-  for (int r = 0; r < h; ++r) {
-    for (int c = 0; c < w; ++c) {
-      dst[r * dst_stride + c] = clip_pixel_highbd(
-          ROUND_POWER_OF_TWO(src[r * src_stride + c], bits), bd);
-    }
-  }
-}
-
-void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
-                              uint16_t *dst0, int dst_stride0, int w, int h,
-                              InterpFilterParams *filter_params_x,
-                              InterpFilterParams *filter_params_y,
-                              const int subpel_x_q4, const int subpel_y_q4,
-                              ConvolveParams *conv_params, int bd) {
-  int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = w;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  (void)dst0;
-  (void)dst_stride0;
-
-  // horizontal filter
-  const uint16_t *src_horiz = src - fo_vert * src_stride;
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-  for (int y = 0; y < im_h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      int32_t sum = (1 << (bd + FILTER_BITS - 1));
-      for (int k = 0; k < filter_params_x->taps; ++k) {
-        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
-      }
-      assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
-      (void)bd;
-      im_block[y * im_stride + x] =
-          (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
-    }
-  }
-
-  // vertical filter
-  int16_t *src_vert = im_block + fo_vert * im_stride;
-  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
-  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE sum = 1 << offset_bits;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
-      }
-      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
-                          ((1 << (offset_bits - conv_params->round_1)) +
-                           (1 << (offset_bits - conv_params->round_1 - 1)));
-      if (conv_params->do_average) {
-        int32_t tmp = dst[y * dst_stride + x];
-        tmp += res;
-        dst[y * dst_stride + x] = tmp >> 1;
-      } else {
-        dst[y * dst_stride + x] = res;
-      }
-    }
-  }
-}
-
-void av1_highbd_convolve_2d_copy_c(const uint16_t *src, int src_stride,
-                                   uint16_t *dst0, int dst_stride0, int w,
-                                   int h, InterpFilterParams *filter_params_x,
-                                   InterpFilterParams *filter_params_y,
-                                   const int subpel_x_q4, const int subpel_y_q4,
-                                   ConvolveParams *conv_params, int bd) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int bits =
-      FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
-
-  (void)filter_params_x;
-  (void)filter_params_y;
-  (void)subpel_x_q4;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-  (void)bd;
-
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE res = src[y * src_stride + x] << bits;
-      if (conv_params->do_average) {
-        int32_t tmp = dst[y * dst_stride + x];
-        tmp += res;
-        dst[y * dst_stride + x] = tmp >> 1;
-      } else {
-        dst[y * dst_stride + x] = res;
-      }
-    }
-  }
-}
-
-void av1_highbd_convolve_x_c(const uint16_t *src, int src_stride,
-                             uint16_t *dst0, int dst_stride0, int w, int h,
-                             InterpFilterParams *filter_params_x,
-                             InterpFilterParams *filter_params_y,
-                             const int subpel_x_q4, const int subpel_y_q4,
-                             ConvolveParams *conv_params, int bd) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const int bits = FILTER_BITS - conv_params->round_1;
-  (void)filter_params_y;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-  (void)bd;
-
-  assert(bits >= 0);
-
-  // horizontal filter
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE res = 0;
-      for (int k = 0; k < filter_params_x->taps; ++k) {
-        res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
-      }
-      res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
-      if (conv_params->do_average) {
-        int32_t tmp = dst[y * dst_stride + x];
-        tmp += res;
-        dst[y * dst_stride + x] = tmp >> 1;
-      } else {
-        dst[y * dst_stride + x] = res;
-      }
-    }
-  }
-}
-
-void av1_highbd_convolve_y_c(const uint16_t *src, int src_stride,
-                             uint16_t *dst0, int dst_stride0, int w, int h,
-                             InterpFilterParams *filter_params_x,
-                             InterpFilterParams *filter_params_y,
-                             const int subpel_x_q4, const int subpel_y_q4,
-                             ConvolveParams *conv_params, int bd) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int bits = FILTER_BITS - conv_params->round_0;
-  (void)filter_params_x;
-  (void)subpel_x_q4;
-  (void)dst0;
-  (void)dst_stride0;
-  (void)bd;
-
-  assert(bits >= 0);
-
-  // vertical filter
-  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE res = 0;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
-      }
-      res *= (1 << bits);
-      res = ROUND_POWER_OF_TWO(res, conv_params->round_1);
-      if (conv_params->do_average) {
-        int32_t tmp = dst[y * dst_stride + x];
-        tmp += res;
-        dst[y * dst_stride + x] = tmp >> 1;
-      } else {
-        dst[y * dst_stride + x] = res;
-      }
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 void av1_highbd_convolve_2d_copy_sr_c(
     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
     int h, InterpFilterParams *filter_params_x,
@@ -1150,11 +579,7 @@
       *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t res = 0;
-#else
-      CONV_BUF_TYPE res = 0;
-#endif
       for (int k = 0; k < filter_params_x->taps; ++k) {
         res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
       }
@@ -1184,11 +609,7 @@
       *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t res = 0;
-#else
-      CONV_BUF_TYPE res = 0;
-#endif
       for (int k = 0; k < filter_params_y->taps; ++k) {
         res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
       }
@@ -1236,7 +657,6 @@
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t sum = 1 << offset_bits;
       for (int k = 0; k < filter_params_y->taps; ++k) {
         sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
@@ -1245,38 +665,18 @@
       int32_t res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
                     ((1 << (offset_bits - conv_params->round_1)) +
                      (1 << (offset_bits - conv_params->round_1 - 1)));
-#else   // CONFIG_LOWPRECISION_BLEND
-      CONV_BUF_TYPE sum = 1 << offset_bits;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
-      }
-      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
-                          ((1 << (offset_bits - conv_params->round_1)) +
-                           (1 << (offset_bits - conv_params->round_1 - 1)));
-#endif  // CONFIG_LOWPRECISION_BLEND
       dst[y * dst_stride + x] =
           clip_pixel_highbd(ROUND_POWER_OF_TWO(res, bits), bd);
     }
   }
 }
 
-#if CONFIG_LOWPRECISION_BLEND
 void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
                                   uint16_t *dst16, int dst16_stride, int w,
                                   int h, InterpFilterParams *filter_params_x,
                                   InterpFilterParams *filter_params_y,
                                   const int subpel_x_q4, const int subpel_y_q4,
-                                  ConvolveParams *conv_params, int bd)
-#else   // CONFIG_LOWPRECISION_BLEND
-void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
-                                  uint16_t *dst0, int dst_stride0, int w, int h,
-                                  InterpFilterParams *filter_params_x,
-                                  InterpFilterParams *filter_params_y,
-                                  const int subpel_x_q4, const int subpel_y_q4,
-                                  ConvolveParams *conv_params, int bd)
-#endif  // CONFIG_LOWPRECISION_BLEND
-{
+                                  ConvolveParams *conv_params, int bd) {
   int x, y, k;
   int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
   CONV_BUF_TYPE *dst = conv_params->dst;
@@ -1285,14 +685,9 @@
   int im_stride = w;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
-#if CONFIG_LOWPRECISION_BLEND
   const int round_bits =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
   assert(round_bits >= 0);
-#else
-  (void)dst0;
-  (void)dst_stride0;
-#endif
 
   // horizontal filter
   const uint16_t *src_horiz = src - fo_vert * src_stride;
@@ -1318,7 +713,6 @@
       *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
   for (y = 0; y < h; ++y) {
     for (x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t sum = 1 << offset_bits;
       for (k = 0; k < filter_params_y->taps; ++k) {
         sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
@@ -1341,33 +735,6 @@
       } else {
         dst[y * dst_stride + x] = res;
       }
-#else
-      CONV_BUF_TYPE sum = 1 << offset_bits;
-      for (k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
-      }
-      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
-                          ((1 << (offset_bits - conv_params->round_1)) +
-                           (1 << (offset_bits - conv_params->round_1 - 1)));
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-#endif
     }
   }
 }
@@ -1382,28 +749,20 @@
   int dst_stride = conv_params->dst_stride;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
   const int bits = FILTER_BITS - conv_params->round_1;
-#if CONFIG_LOWPRECISION_BLEND
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
   const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
                            (1 << (offset_bits - conv_params->round_1 - 1));
   const int round_bits =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
   assert(round_bits >= 0);
-#endif
   (void)filter_params_y;
   (void)subpel_y_q4;
-#if !CONFIG_LOWPRECISION_BLEND
-  (void)dst16;
-  (void)dst16_stride;
-  (void)bd;
-#endif
   assert(bits >= 0);
   // horizontal filter
   const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
       *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t res = 0;
       for (int k = 0; k < filter_params_x->taps; ++k) {
         res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
@@ -1426,30 +785,6 @@
       } else {
         dst[y * dst_stride + x] = res;
       }
-#else   // CONFIG_LOWPRECISION_BLEND
-      CONV_BUF_TYPE res = 0;
-      for (int k = 0; k < filter_params_x->taps; ++k) {
-        res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
-      }
-      res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-#endif  // CONFIG_LOWPRECISION_BLEND
     }
   }
 }
@@ -1464,28 +799,20 @@
   int dst_stride = conv_params->dst_stride;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int bits = FILTER_BITS - conv_params->round_0;
-#if CONFIG_LOWPRECISION_BLEND
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
   const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
                            (1 << (offset_bits - conv_params->round_1 - 1));
   const int round_bits =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
   assert(round_bits >= 0);
-#endif
   (void)filter_params_x;
   (void)subpel_x_q4;
-#if !CONFIG_LOWPRECISION_BLEND
-  (void)dst16;
-  (void)dst16_stride;
-  (void)bd;
-#endif
   assert(bits >= 0);
   // vertical filter
   const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
       *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
-#if CONFIG_LOWPRECISION_BLEND
       int32_t res = 0;
       for (int k = 0; k < filter_params_y->taps; ++k) {
         res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
@@ -1508,31 +835,6 @@
       } else {
         dst[y * dst_stride + x] = res;
       }
-#else   // CONFIG_LOWPRECISION_BLEND
-      CONV_BUF_TYPE res = 0;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
-      }
-      res *= (1 << bits);
-      res = ROUND_POWER_OF_TWO(res, conv_params->round_1);
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-#endif  // CONFIG_LOWPRECISION_BLEND
     }
   }
 }
@@ -1546,26 +848,18 @@
   int dst_stride = conv_params->dst_stride;
   const int bits =
       FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
-#if CONFIG_LOWPRECISION_BLEND
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
   const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
                            (1 << (offset_bits - conv_params->round_1 - 1));
   assert(bits >= 0);
-#endif
   (void)filter_params_x;
   (void)filter_params_y;
   (void)subpel_x_q4;
   (void)subpel_y_q4;
-#if !CONFIG_LOWPRECISION_BLEND
-  (void)dst16;
-  (void)dst16_stride;
-  (void)bd;
-#endif
 
   for (int y = 0; y < h; ++y) {
     for (int x = 0; x < w; ++x) {
       CONV_BUF_TYPE res = src[y * src_stride + x] << bits;
-#if CONFIG_LOWPRECISION_BLEND
       res += round_offset;
       if (conv_params->do_average) {
         int32_t tmp = dst[y * dst_stride + x];
@@ -1582,60 +876,28 @@
       } else {
         dst[y * dst_stride + x] = res;
       }
-#else   // CONFIG_LOWPRECISION_BLEND
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-#endif  // CONFIG_LOWPRECISION_BLEND
     }
   }
 }
 
-#if CONFIG_LOWPRECISION_BLEND
 void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
                                     uint16_t *dst, int dst_stride, int w, int h,
                                     InterpFilterParams *filter_params_x,
                                     InterpFilterParams *filter_params_y,
                                     const int subpel_x_qn, const int x_step_qn,
                                     const int subpel_y_qn, const int y_step_qn,
-                                    ConvolveParams *conv_params, int bd)
-#else
-void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
-                                    CONV_BUF_TYPE *dst, int dst_stride, int w,
-                                    int h, InterpFilterParams *filter_params_x,
-                                    InterpFilterParams *filter_params_y,
-                                    const int subpel_x_qn, const int x_step_qn,
-                                    const int subpel_y_qn, const int y_step_qn,
-                                    ConvolveParams *conv_params, int bd)
-#endif  // CONFIG_LOWPRECISION_BLEND
-{
+                                    ConvolveParams *conv_params, int bd) {
   int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
   int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
              filter_params_y->taps;
   int im_stride = w;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
-#if CONFIG_LOWPRECISION_BLEND
   CONV_BUF_TYPE *dst16 = conv_params->dst;
   const int dst16_stride = conv_params->dst_stride;
   const int bits =
       FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
   assert(bits >= 0);
-#endif
   // horizontal filter
   const uint16_t *src_horiz = src - fo_vert * src_stride;
   for (int y = 0; y < im_h; ++y) {
@@ -1668,7 +930,6 @@
       assert(y_filter_idx < SUBPEL_SHIFTS);
       const int16_t *y_filter =
           av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
-#if CONFIG_LOWPRECISION_BLEND
       int32_t sum = 1 << offset_bits;
       for (int k = 0; k < filter_params_y->taps; ++k) {
         sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
@@ -1700,33 +961,6 @@
         dst[y * dst_stride + x] =
             clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
       }
-#else   // CONFIG_LOWPRECISION_BLEND
-      CONV_BUF_TYPE sum = 1 << offset_bits;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
-      }
-      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
-                          ((1 << (offset_bits - conv_params->round_1)) +
-                           (1 << (offset_bits - conv_params->round_1 - 1)));
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-#endif  // CONFIG_LOWPRECISION_BLEND
     }
     src_vert++;
   }
@@ -1749,7 +983,6 @@
                                  &filter_params_y, w, h);
 
   if (scaled) {
-#if CONFIG_LOWPRECISION_BLEND
     uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     if (conv_params->is_compound) {
       assert(conv_params->dst != NULL);
@@ -1758,28 +991,6 @@
                                  &filter_params_x, &filter_params_y,
                                  subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
                                  conv_params, bd);
-#else   // CONFIG_LOWPRECISION_BLEND
-    if (conv_params->is_compound) {
-      av1_highbd_convolve_2d_scale(
-          src, src_stride, conv_params->dst, conv_params->dst_stride, w, h,
-          &filter_params_x, &filter_params_y, subpel_x_q4, x_step_q4,
-          subpel_y_q4, y_step_q4, conv_params, bd);
-    } else {
-      CONV_BUF_TYPE tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE];
-      int tmp_dst_stride = MAX_SB_SIZE;
-      av1_highbd_convolve_2d_scale(src, src_stride, tmp_dst, tmp_dst_stride, w,
-                                   h, &filter_params_x, &filter_params_y,
-                                   subpel_x_q4, x_step_q4, subpel_y_q4,
-                                   y_step_q4, conv_params, bd);
-
-      // 0-bit rounding just to convert from int32 to uint16
-      const int rbits =
-          2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-      assert(rbits >= 0);
-      av1_highbd_convolve_rounding(tmp_dst, tmp_dst_stride, dst8, dst_stride, w,
-                                   h, rbits, bd);
-    }
-#endif  // CONFIG_LOWPRECISION_BLEND
   } else {
     uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
 

diff --git a/av1/common/convolve.h b/av1/common/convolve.h
index b388af3..1b2c2d0 100644
--- a/av1/common/convolve.h
+++ b/av1/common/convolve.h

@@ -17,11 +17,7 @@
 extern "C" {
 #endif
 
-#if CONFIG_LOWPRECISION_BLEND
 typedef uint16_t CONV_BUF_TYPE;
-#else
-typedef int32_t CONV_BUF_TYPE;
-#endif
 typedef struct ConvolveParams {
   int ref;
   int do_average;
@@ -36,15 +32,9 @@
   int bck_offset;
 } ConvolveParams;
 
-#if CONFIG_LOWPRECISION_BLEND
 #define ROUND0_BITS 3
 #define COMPOUND_ROUND1_BITS 7
 #define WIENER_ROUND0_BITS 3
-#else
-#define ROUND0_BITS 5
-#define COMPOUND_ROUND1_BITS 0
-#define WIENER_ROUND0_BITS 5
-#endif  // CONFIG_LOWPRECISION_BLEND
 
 #define WIENER_CLAMP_LIMIT(r0, bd) (1 << ((bd) + 1 + FILTER_BITS - r0))
 
@@ -92,7 +82,6 @@
   assert(IMPLIES(do_average, is_compound));
   conv_params.is_compound = is_compound;
   conv_params.round_0 = ROUND0_BITS;
-#if CONFIG_LOWPRECISION_BLEND
   conv_params.round_1 = is_compound ? COMPOUND_ROUND1_BITS
                                     : 2 * FILTER_BITS - conv_params.round_0;
   const int intbufrange = bd + FILTER_BITS - conv_params.round_0 + 2;
@@ -101,10 +90,6 @@
     conv_params.round_0 += intbufrange - 16;
     if (!is_compound) conv_params.round_1 -= intbufrange - 16;
   }
-#else
-  (void)bd;
-  conv_params.round_1 = 0;
-#endif  // CONFIG_LOWPRECISION_BLEND
   // TODO(yunqing): The following dst should only be valid while
   // is_compound = 1;
   conv_params.dst = dst;

diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index b5c11e4..3caab50 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c

@@ -32,14 +32,6 @@
 #define USE_PRECOMPUTED_WEDGE_MASK 1
 #define USE_PRECOMPUTED_WEDGE_SIGN 1
 
-#if !CONFIG_LOWPRECISION_BLEND
-static INLINE int get_compound_post_rounding_bits(
-    const ConvolveParams *conv_params) {
-  assert(conv_params->is_compound);
-  return 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
-}
-#endif
-
 // This function will determine whether or not to create a warped
 // prediction.
 static INLINE int allow_warp(const MODE_INFO *const mi,
@@ -362,20 +354,11 @@
 #elif COMPOUND_SEGMENT_TYPE == 1
 #define DIFF_FACTOR 16
 
-#if CONFIG_LOWPRECISION_BLEND
 static void diffwtd_mask_d32(uint8_t *mask, int which_inverse, int mask_base,
                              const CONV_BUF_TYPE *src0, int src0_stride,
                              const CONV_BUF_TYPE *src1, int src1_stride,
                              BLOCK_SIZE sb_type, int h, int w,
-                             ConvolveParams *conv_params, int bd)
-#else
-static void diffwtd_mask_d32(uint8_t *mask, int which_inverse, int mask_base,
-                             const int32_t *src0, int src0_stride,
-                             const int32_t *src1, int src1_stride,
-                             BLOCK_SIZE sb_type, int h, int w,
-                             ConvolveParams *conv_params, int bd)
-#endif
-{
+                             ConvolveParams *conv_params, int bd) {
   int round =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
   int i, j, m, diff;
@@ -391,19 +374,10 @@
   }
 }
 
-#if CONFIG_LOWPRECISION_BLEND
 static void build_compound_seg_mask_d16(
     uint8_t *mask, SEG_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride,
-    BLOCK_SIZE sb_type, int h, int w, ConvolveParams *conv_params, int bd)
-#else
-static void build_compound_seg_mask_d32(uint8_t *mask, SEG_MASK_TYPE mask_type,
-                                        const int32_t *src0, int src0_stride,
-                                        const int32_t *src1, int src1_stride,
-                                        BLOCK_SIZE sb_type, int h, int w,
-                                        ConvolveParams *conv_params, int bd)
-#endif
-{
+    BLOCK_SIZE sb_type, int h, int w, ConvolveParams *conv_params, int bd) {
   switch (mask_type) {
     case DIFFWTD_38:
       diffwtd_mask_d32(mask, 0, 38, src0, src0_stride, src1, src1_stride,
@@ -627,7 +601,6 @@
   init_wedge_masks();
 }
 
-#if CONFIG_LOWPRECISION_BLEND
 static void build_masked_compound_no_round(
     uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
     const CONV_BUF_TYPE *src1, int src1_stride,
@@ -647,21 +620,6 @@
                                  src1_stride, mask, block_size_wide[sb_type], h,
                                  w, subh, subw, conv_params);
 }
-#else   // CONFIG_LOWPRECISION_BLEND
-static void build_masked_compound_no_round(
-    CONV_BUF_TYPE *dst, int dst_stride, const CONV_BUF_TYPE *src0,
-    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride,
-    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
-    int w) {
-  // Derive subsampling from h and w passed in. May be refactored to
-  // pass in subsampling factors directly.
-  const int subh = (2 << mi_size_high_log2[sb_type]) == h;
-  const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
-  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
-  aom_blend_a64_d32_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
-                         mask, block_size_wide[sb_type], h, w, subh, subw);
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
 
 static void build_masked_compound(
     uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
@@ -713,14 +671,7 @@
 // a temporary buffer, then will blend that temporary buffer with that from
 // the other reference.
 //
-#if CONFIG_LOWPRECISION_BLEND
 #define INTER_PRED_BYTES_PER_PIXEL 2
-#else
-// The predictions are at 32-bits, so we'll need 32 bits per
-// pixel. Otherwise, we'll need up to 16 bits per pixel if
-// CONFIG_HIGHBITDEPTH or just 8 otherwise.
-#define INTER_PRED_BYTES_PER_PIXEL 4
-#endif
 
   DECLARE_ALIGNED(32, uint8_t,
                   tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
@@ -733,13 +684,8 @@
   const int tmp_buf_stride = MAX_SB_SIZE;
   CONV_BUF_TYPE *org_dst = conv_params->dst;
   int org_dst_stride = conv_params->dst_stride;
-#if CONFIG_LOWPRECISION_BLEND
   CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
   conv_params->dst = tmp_buf16;
-#else
-  CONV_BUF_TYPE *tmp_buf32 = (CONV_BUF_TYPE *)tmp_buf;
-  conv_params->dst = tmp_buf32;
-#endif
   conv_params->dst_stride = tmp_buf_stride;
   assert(conv_params->do_average == 0);
 
@@ -750,25 +696,13 @@
                            xd, can_use_previous);
 
   if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
-#if CONFIG_LOWPRECISION_BLEND
     build_compound_seg_mask_d16(
         comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride,
         tmp_buf16, tmp_buf_stride, mi->mbmi.sb_type, h, w, conv_params, xd->bd);
-#else
-    build_compound_seg_mask_d32(
-        comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride,
-        tmp_buf32, tmp_buf_stride, mi->mbmi.sb_type, h, w, conv_params, xd->bd);
-#endif
   }
-#if CONFIG_LOWPRECISION_BLEND
   build_masked_compound_no_round(dst, dst_stride, org_dst, org_dst_stride,
                                  tmp_buf16, tmp_buf_stride, &comp_data,
                                  mi->mbmi.sb_type, h, w, conv_params, xd);
-#else
-  build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
-                                 org_dst_stride, tmp_buf32, tmp_buf_stride,
-                                 &comp_data, mi->mbmi.sb_type, h, w);
-#endif
 }
 
 // TODO(sarahparker) av1_highbd_build_inter_predictor and
@@ -956,11 +890,7 @@
       for (idx = 0; idx < b8_w; idx += b4_w) {
         MB_MODE_INFO *this_mbmi = &xd->mi[row * xd->mi_stride + col]->mbmi;
         is_compound = has_second_ref(this_mbmi);
-#if CONFIG_LOWPRECISION_BLEND
         DECLARE_ALIGNED(32, CONV_BUF_TYPE, tmp_dst[8 * 8]);
-#else
-        DECLARE_ALIGNED(32, int32_t, tmp_dst[8 * 8]);
-#endif
         int tmp_dst_stride = 8;
         assert(w < 8 || h < 8);
         ConvolveParams conv_params = get_conv_params_no_round(
@@ -1064,11 +994,7 @@
     uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
     uint8_t *pre[2];
     SubpelParams subpel_params[2];
-#if CONFIG_LOWPRECISION_BLEND
     DECLARE_ALIGNED(32, uint16_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]);
-#else
-    DECLARE_ALIGNED(32, int32_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]);
-#endif
     for (ref = 0; ref < 1 + is_compound; ++ref) {
       const struct scale_factors *const sf =
           is_intrabc ? &cm->sf_identity : &xd->block_refs[ref]->sf;
@@ -1164,20 +1090,6 @@
             plane, ref, mi, build_for_obmc, subpel_params[ref].xs,
             subpel_params[ref].ys, xd, cm->allow_warped_motion);
     }
-
-#if !CONFIG_LOWPRECISION_BLEND
-    // TODO(angiebird): This part needs optimization
-    if (conv_params.is_compound) {
-      assert(conv_params.dst != NULL);
-      int round_bits = get_compound_post_rounding_bits(&conv_params);
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-        av1_highbd_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride,
-                                     w, h, round_bits, xd->bd);
-      else
-        av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
-                              round_bits);
-    }
-#endif
   }
 }
 

diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 95ee5d5..0e0068f 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c

@@ -431,11 +431,9 @@
   const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
   const int offset_bits_horiz = bd + FILTER_BITS - 1;
   const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
-#if CONFIG_LOWPRECISION_BLEND
   const int round_bits =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
-#endif
   (void)max_bits_horiz;
   assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
 
@@ -506,7 +504,6 @@
                 &conv_params
                      ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
                            (j - p_col + l + 4)];
-#if CONFIG_LOWPRECISION_BLEND
             sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
             if (conv_params->do_average) {
               uint16_t *dst16 =
@@ -527,30 +524,6 @@
             } else {
               *p = sum;
             }
-#else   // CONFIG_LOWPRECISION_BLEND
-            sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert) -
-                  (1 << (offset_bits_horiz + FILTER_BITS - reduce_bits_horiz -
-                         reduce_bits_vert)) -
-                  (1 << (offset_bits_vert - reduce_bits_vert));
-            if (conv_params->use_jnt_comp_avg) {
-              if (conv_params->do_average) {
-                int32_t tmp32 = *p;
-                tmp32 = tmp32 * conv_params->fwd_offset +
-                        sum * conv_params->bck_offset;
-                *p = tmp32 >> DIST_PRECISION_BITS;
-              } else {
-                *p = sum;
-              }
-            } else {
-              if (conv_params->do_average) {
-                int32_t tmp32 = *p;
-                tmp32 += sum;
-                *p = tmp32 >> 1;
-              } else {
-                *p = sum;
-              }
-            }
-#endif  // CONFIG_LOWPRECISION_BLEND
           } else {
             uint16_t *p =
                 &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
@@ -744,11 +717,9 @@
   const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
   const int offset_bits_horiz = bd + FILTER_BITS - 1;
   const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
-#if CONFIG_LOWPRECISION_BLEND
   const int round_bits =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
-#endif
   (void)max_bits_horiz;
   assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
 
@@ -825,7 +796,6 @@
                 &conv_params
                      ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
                            (j - p_col + l + 4)];
-#if CONFIG_LOWPRECISION_BLEND
             sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
             if (conv_params->do_average) {
               uint8_t *dst8 =
@@ -845,30 +815,6 @@
             } else {
               *p = sum;
             }
-#else   // CONFIG_LOWPRECISION_BLEND
-            sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert) -
-                  (1 << (offset_bits_horiz + FILTER_BITS - reduce_bits_horiz -
-                         reduce_bits_vert)) -
-                  (1 << (offset_bits_vert - reduce_bits_vert));
-            if (conv_params->use_jnt_comp_avg) {
-              if (conv_params->do_average) {
-                int32_t tmp32 = *p;
-                tmp32 = tmp32 * conv_params->fwd_offset +
-                        sum * conv_params->bck_offset;
-                *p = tmp32 >> DIST_PRECISION_BITS;
-              } else {
-                *p = sum;
-              }
-            } else {
-              if (conv_params->do_average) {
-                int32_t tmp32 = *p;
-                tmp32 += sum;
-                *p = tmp32 >> 1;
-              } else {
-                *p = sum;
-              }
-            }
-#endif  // CONFIG_LOWPRECISION_BLEND
           } else {
             uint8_t *p =
                 &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];

diff --git a/av1/common/x86/av1_convolve_scale_sse4.c b/av1/common/x86/av1_convolve_scale_sse4.c
index 09acddd..366c664 100644
--- a/av1/common/x86/av1_convolve_scale_sse4.c
+++ b/av1/common/x86/av1_convolve_scale_sse4.c

@@ -42,107 +42,8 @@
   return _mm_castpd_si128(_mm_and_pd(ad, bd));
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-// The horizontal filter for av1_convolve_2d_scale_sse4_1. This is the more
-// general version, supporting 10 and 12 tap filters. For 8-tap filters, use
-// hfilter8.
-static void hfilter(const uint8_t *src, int src_stride, int32_t *dst, int w,
-                    int h, int subpel_x_qn, int x_step_qn,
-                    const InterpFilterParams *filter_params, unsigned round) {
-  const int bd = 8;
-  const int ntaps = filter_params->taps;
-  assert(ntaps == 10 || ntaps == 12);
-
-  src -= ntaps / 2 - 1;
-
-  // Construct a mask with which we'll AND filter coefficients 89ab89ab to zero
-  // out the unneeded entries.
-  const __m128i hicoeff_mask = make_1012_mask(ntaps);
-
-  int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
-  const __m128i round_add = _mm_set1_epi32(round_add32);
-  const __m128i round_shift = extend_32_to_128(round);
-
-  int x_qn = subpel_x_qn;
-  for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
-    const uint8_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
-    const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
-    assert(filter_idx < SUBPEL_SHIFTS);
-    const int16_t *filter =
-        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
-
-    // The "lo" coefficients are coefficients 0..7. For a 12-tap filter, the
-    // "hi" coefficients are arranged as 89ab89ab. For a 10-tap filter, they
-    // are masked out with hicoeff_mask.
-    const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
-    const __m128i coeffhi = load_and_128i(filter + 8, hicoeff_mask);
-    const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
-
-    int y;
-    for (y = 0; y <= h - 4; y += 4) {
-      const uint8_t *const src0 = src_col + y * src_stride;
-      const uint8_t *const src1 = src0 + 1 * src_stride;
-      const uint8_t *const src2 = src0 + 2 * src_stride;
-      const uint8_t *const src3 = src0 + 3 * src_stride;
-
-      // Load up source data. This is 8-bit input data, so each load gets 16
-      // pixels (we need at most 12)
-      const __m128i data08 = _mm_loadu_si128((__m128i *)src0);
-      const __m128i data18 = _mm_loadu_si128((__m128i *)src1);
-      const __m128i data28 = _mm_loadu_si128((__m128i *)src2);
-      const __m128i data38 = _mm_loadu_si128((__m128i *)src3);
-
-      // Now zero-extend up to 16-bit precision by interleaving with zeros. For
-      // the "high" pixels (8 to 11), interleave first (so that the expansion
-      // to 16-bits operates on an entire register).
-      const __m128i data0lo = _mm_unpacklo_epi8(data08, zero);
-      const __m128i data1lo = _mm_unpacklo_epi8(data18, zero);
-      const __m128i data2lo = _mm_unpacklo_epi8(data28, zero);
-      const __m128i data3lo = _mm_unpacklo_epi8(data38, zero);
-      const __m128i data01hi8 = _mm_unpackhi_epi32(data08, data18);
-      const __m128i data23hi8 = _mm_unpackhi_epi32(data28, data38);
-      const __m128i data01hi = _mm_unpacklo_epi8(data01hi8, zero);
-      const __m128i data23hi = _mm_unpacklo_epi8(data23hi8, zero);
-
-      // Multiply by coefficients
-      const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
-      const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
-      const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
-      const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
-      const __m128i conv01hi = _mm_madd_epi16(data01hi, coeffhi);
-      const __m128i conv23hi = _mm_madd_epi16(data23hi, coeffhi);
-
-      // Reduce horizontally and add
-      const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
-      const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
-      const __m128i convlo = _mm_hadd_epi32(conv01lo, conv23lo);
-      const __m128i convhi = _mm_hadd_epi32(conv01hi, conv23hi);
-      const __m128i conv = _mm_add_epi32(convlo, convhi);
-
-      // Divide down by (1 << round), rounding to nearest.
-      const __m128i shifted =
-          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
-
-      // Write transposed to the output
-      _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
-    }
-    for (; y < h; ++y) {
-      const uint8_t *const src_row = src_col + y * src_stride;
-
-      int32_t sum = (1 << (bd + FILTER_BITS - 1));
-      for (int k = 0; k < ntaps; ++k) {
-        sum += filter[k] * src_row[k];
-      }
-
-      dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 // A specialised version of hfilter, the horizontal filter for
 // av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters.
-#if CONFIG_LOWPRECISION_BLEND
 static void hfilter8(const uint8_t *src, int src_stride, int16_t *dst, int w,
                      int h, int subpel_x_qn, int x_step_qn,
                      const InterpFilterParams *filter_params, unsigned round) {
@@ -219,83 +120,6 @@
     }
   }
 }
-#else
-static void hfilter8(const uint8_t *src, int src_stride, int32_t *dst, int w,
-                     int h, int subpel_x_qn, int x_step_qn,
-                     const InterpFilterParams *filter_params, unsigned round) {
-  const int bd = 8;
-  const int ntaps = 8;
-
-  src -= ntaps / 2 - 1;
-
-  int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
-  const __m128i round_add = _mm_set1_epi32(round_add32);
-  const __m128i round_shift = extend_32_to_128(round);
-
-  int x_qn = subpel_x_qn;
-  for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
-    const uint8_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
-    const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
-    assert(filter_idx < SUBPEL_SHIFTS);
-    const int16_t *filter =
-        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
-
-    // Load the filter coefficients
-    const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
-    const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
-
-    int y;
-    for (y = 0; y <= h - 4; y += 4) {
-      const uint8_t *const src0 = src_col + y * src_stride;
-      const uint8_t *const src1 = src0 + 1 * src_stride;
-      const uint8_t *const src2 = src0 + 2 * src_stride;
-      const uint8_t *const src3 = src0 + 3 * src_stride;
-
-      // Load up source data. This is 8-bit input data; each load is just
-      // loading the lower half of the register and gets 8 pixels
-      const __m128i data08 = _mm_loadl_epi64((__m128i *)src0);
-      const __m128i data18 = _mm_loadl_epi64((__m128i *)src1);
-      const __m128i data28 = _mm_loadl_epi64((__m128i *)src2);
-      const __m128i data38 = _mm_loadl_epi64((__m128i *)src3);
-
-      // Now zero-extend up to 16-bit precision by interleaving with
-      // zeros. Drop the upper half of each register (which just had zeros)
-      const __m128i data0lo = _mm_unpacklo_epi8(data08, zero);
-      const __m128i data1lo = _mm_unpacklo_epi8(data18, zero);
-      const __m128i data2lo = _mm_unpacklo_epi8(data28, zero);
-      const __m128i data3lo = _mm_unpacklo_epi8(data38, zero);
-
-      // Multiply by coefficients
-      const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
-      const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
-      const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
-      const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
-
-      // Reduce horizontally and add
-      const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
-      const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
-      const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo);
-
-      // Divide down by (1 << round), rounding to nearest.
-      const __m128i shifted =
-          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
-
-      // Write transposed to the output
-      _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
-    }
-    for (; y < h; ++y) {
-      const uint8_t *const src_row = src_col + y * src_stride;
-
-      int32_t sum = (1 << (bd + FILTER_BITS - 1));
-      for (int k = 0; k < ntaps; ++k) {
-        sum += filter[k] * src_row[k];
-      }
-
-      dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
 
 // Do a 12-tap convolution with the given coefficients, loading data from src.
 static __m128i convolve_32(const int32_t *src, __m128i coeff03, __m128i coeff47,
@@ -319,134 +143,13 @@
   return _mm_add_epi32(conv03, conv47);
 }
 
-#if CONFIG_LOWPRECISION_BLEND
 static __m128i convolve_16_8(const int16_t *src, __m128i coeff) {
   __m128i data = _mm_loadu_si128((__m128i *)src);
   return _mm_madd_epi16(data, coeff);
 }
-#endif
-
-#if !CONFIG_LOWPRECISION_BLEND
-// The vertical filter for av1_convolve_2d_scale_sse4_1. This is the more
-// general version, supporting 10 and 12 tap filters. For 8-tap filters, use
-// vfilter8.
-static void vfilter(const int32_t *src, int src_stride, int32_t *dst,
-                    int dst_stride, int w, int h, int subpel_y_qn,
-                    int y_step_qn, const InterpFilterParams *filter_params,
-                    const ConvolveParams *conv_params, int bd) {
-  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
-  const int ntaps = filter_params->taps;
-
-  // Construct a mask with which we'll AND filter coefficients 89ab to zero out
-  // the unneeded entries. The upper bits of this mask are unused.
-  const __m128i hicoeff_mask = make_1012_mask(ntaps);
-
-  int32_t round_add32 = (1 << conv_params->round_1) / 2 + (1 << offset_bits);
-  const __m128i round_add = _mm_set1_epi32(round_add32);
-  const __m128i round_shift = extend_32_to_128(conv_params->round_1);
-
-  const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) +
-                         (1 << (offset_bits - conv_params->round_1 - 1)));
-  const __m128i sub = _mm_set1_epi32(sub32);
-
-  const __m128i fwd_offset = _mm_set1_epi32(conv_params->fwd_offset);
-  const __m128i bck_offset = _mm_set1_epi32(conv_params->bck_offset);
-
-  int y_qn = subpel_y_qn;
-  for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
-    const int32_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS);
-    const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
-    assert(filter_idx < SUBPEL_SHIFTS);
-    const int16_t *filter =
-        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
-
-    // Load up coefficients for the filter and sign-extend to 32-bit precision
-    // (to do so, calculate sign bits and then interleave)
-    const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
-    const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter);
-    const __m128i coeffhi16 = load_and_128i(filter + 8, hicoeff_mask);
-    const __m128i csign0716 = _mm_cmplt_epi16(coeff0716, zero);
-    const __m128i csignhi16 = _mm_cmplt_epi16(coeffhi16, zero);
-    const __m128i coeff03 = _mm_unpacklo_epi16(coeff0716, csign0716);
-    const __m128i coeff47 = _mm_unpackhi_epi16(coeff0716, csign0716);
-    const __m128i coeff8d = _mm_unpacklo_epi16(coeffhi16, csignhi16);
-
-    int x;
-    for (x = 0; x <= w - 4; x += 4) {
-      const int32_t *const src0 = src_y + x * src_stride;
-      const int32_t *const src1 = src0 + 1 * src_stride;
-      const int32_t *const src2 = src0 + 2 * src_stride;
-      const int32_t *const src3 = src0 + 3 * src_stride;
-
-      // Load the source data for the three rows, adding the three registers of
-      // convolved products to one as we go (conv0..conv3) to avoid the
-      // register pressure getting too high.
-      const __m128i conv0 = convolve_32(src0, coeff03, coeff47, coeff8d);
-      const __m128i conv1 = convolve_32(src1, coeff03, coeff47, coeff8d);
-      const __m128i conv2 = convolve_32(src2, coeff03, coeff47, coeff8d);
-      const __m128i conv3 = convolve_32(src3, coeff03, coeff47, coeff8d);
-
-      // Now reduce horizontally to get one lane for each result
-      const __m128i conv01 = _mm_hadd_epi32(conv0, conv1);
-      const __m128i conv23 = _mm_hadd_epi32(conv2, conv3);
-      const __m128i conv = _mm_hadd_epi32(conv01, conv23);
-
-      // Divide down by (1 << round_1), rounding to nearest and subtract sub32.
-      const __m128i shifted =
-          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
-      const __m128i subbed = _mm_sub_epi32(shifted, sub);
-
-      int32_t *dst_x = dst + y * dst_stride + x;
-      __m128i result;
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          __m128i tmp = _mm_loadu_si128((__m128i *)dst_x);
-          tmp = _mm_add_epi32(_mm_mullo_epi32(tmp, fwd_offset),
-                              _mm_mullo_epi32(subbed, bck_offset));
-          result = _mm_srai_epi32(tmp, DIST_PRECISION_BITS);
-        } else {
-          result = subbed;
-        }
-      } else {
-        result =
-            (conv_params->do_average)
-                ? _mm_srai_epi32(
-                      _mm_add_epi32(subbed, _mm_loadu_si128((__m128i *)dst_x)),
-                      1)
-                : subbed;
-      }
-      _mm_storeu_si128((__m128i *)dst_x, result);
-    }
-    for (; x < w; ++x) {
-      const int32_t *src_x = src_y + x * src_stride;
-      CONV_BUF_TYPE sum = 1 << offset_bits;
-      for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k];
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - sub32;
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
 
 // A specialised version of vfilter, the vertical filter for
 // av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters.
-#if CONFIG_LOWPRECISION_BLEND
 static void vfilter8(const int16_t *src, int src_stride, uint8_t *dst,
                      int dst_stride, int w, int h, int subpel_y_qn,
                      int y_step_qn, const InterpFilterParams *filter_params,
@@ -572,116 +275,6 @@
     }
   }
 }
-#else
-static void vfilter8(const int32_t *src, int src_stride, int32_t *dst,
-                     int dst_stride, int w, int h, int subpel_y_qn,
-                     int y_step_qn, const InterpFilterParams *filter_params,
-                     const ConvolveParams *conv_params, int bd) {
-  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
-  const int ntaps = 8;
-
-  int32_t round_add32 = (1 << conv_params->round_1) / 2 + (1 << offset_bits);
-  const __m128i round_add = _mm_set1_epi32(round_add32);
-  const __m128i round_shift = extend_32_to_128(conv_params->round_1);
-
-  const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) +
-                         (1 << (offset_bits - conv_params->round_1 - 1)));
-  const __m128i sub = _mm_set1_epi32(sub32);
-
-  const __m128i fwd_offset = _mm_set1_epi32(conv_params->fwd_offset);
-  const __m128i bck_offset = _mm_set1_epi32(conv_params->bck_offset);
-
-  int y_qn = subpel_y_qn;
-  for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
-    const int32_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS);
-    const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
-    assert(filter_idx < SUBPEL_SHIFTS);
-    const int16_t *filter =
-        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
-
-    // Load up coefficients for the filter and sign-extend to 32-bit precision
-    // (to do so, calculate sign bits and then interleave)
-    const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
-    const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter);
-    const __m128i csign0716 = _mm_cmplt_epi16(coeff0716, zero);
-    const __m128i coeff03 = _mm_unpacklo_epi16(coeff0716, csign0716);
-    const __m128i coeff47 = _mm_unpackhi_epi16(coeff0716, csign0716);
-
-    int x;
-    for (x = 0; x <= w - 4; x += 4) {
-      const int32_t *const src0 = src_y + x * src_stride;
-      const int32_t *const src1 = src0 + 1 * src_stride;
-      const int32_t *const src2 = src0 + 2 * src_stride;
-      const int32_t *const src3 = src0 + 3 * src_stride;
-
-      // Load the source data for the three rows, adding the three registers of
-      // convolved products to one as we go (conv0..conv3) to avoid the
-      // register pressure getting too high.
-      const __m128i conv0 = convolve_32_8(src0, coeff03, coeff47);
-      const __m128i conv1 = convolve_32_8(src1, coeff03, coeff47);
-      const __m128i conv2 = convolve_32_8(src2, coeff03, coeff47);
-      const __m128i conv3 = convolve_32_8(src3, coeff03, coeff47);
-
-      // Now reduce horizontally to get one lane for each result
-      const __m128i conv01 = _mm_hadd_epi32(conv0, conv1);
-      const __m128i conv23 = _mm_hadd_epi32(conv2, conv3);
-      const __m128i conv = _mm_hadd_epi32(conv01, conv23);
-
-      // Divide down by (1 << round_1), rounding to nearest and subtract sub32.
-      const __m128i shifted =
-          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
-      const __m128i subbed = _mm_sub_epi32(shifted, sub);
-
-      int32_t *dst_x = dst + y * dst_stride + x;
-      __m128i result;
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          __m128i tmp = _mm_loadu_si128((__m128i *)dst_x);
-          tmp = _mm_add_epi32(_mm_mullo_epi32(tmp, fwd_offset),
-                              _mm_mullo_epi32(subbed, bck_offset));
-          result = _mm_srai_epi32(tmp, DIST_PRECISION_BITS);
-        } else {
-          result = subbed;
-        }
-      } else {
-        result =
-            (conv_params->do_average)
-                ? _mm_srai_epi32(
-                      _mm_add_epi32(subbed, _mm_loadu_si128((__m128i *)dst_x)),
-                      1)
-                : subbed;
-      }
-
-      _mm_storeu_si128((__m128i *)dst_x, result);
-    }
-    for (; x < w; ++x) {
-      const int32_t *src_x = src_y + x * src_stride;
-      CONV_BUF_TYPE sum = 1 << offset_bits;
-      for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k];
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - sub32;
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
-          dst[y * dst_stride + x] = tmp >> DIST_PRECISION_BITS;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      } else {
-        if (conv_params->do_average) {
-          int32_t tmp = dst[y * dst_stride + x];
-          tmp += res;
-          dst[y * dst_stride + x] = tmp >> 1;
-        } else {
-          dst[y * dst_stride + x] = res;
-        }
-      }
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
-#if CONFIG_LOWPRECISION_BLEND
 void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride,
                                   uint8_t *dst8, int dst8_stride, int w, int h,
                                   InterpFilterParams *filter_params_x,
@@ -708,41 +301,6 @@
   vfilter8(tmp, im_h, dst8, dst8_stride, w, h, subpel_y_qn, y_step_qn,
            filter_params_y, conv_params, 8);
 }
-#else
-void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride,
-                                  CONV_BUF_TYPE *dst, int dst_stride, int w,
-                                  int h, InterpFilterParams *filter_params_x,
-                                  InterpFilterParams *filter_params_y,
-                                  const int subpel_x_qn, const int x_step_qn,
-                                  const int subpel_y_qn, const int y_step_qn,
-                                  ConvolveParams *conv_params) {
-  // TODO(yaowu): remove unnecessary initializations
-  int32_t tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE] = { 0 };
-  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
-             filter_params_y->taps;
-
-  const int xtaps = filter_params_x->taps;
-  const int ytaps = filter_params_y->taps;
-
-  const int fo_vert = ytaps / 2 - 1;
-
-  // horizontal filter
-  if (xtaps == 8)
-    hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h, subpel_x_qn,
-             x_step_qn, filter_params_x, conv_params->round_0);
-  else
-    hfilter(src - fo_vert * src_stride, src_stride, tmp, w, im_h, subpel_x_qn,
-            x_step_qn, filter_params_x, conv_params->round_0);
-
-  // vertical filter (input is transposed)
-  if (ytaps == 8)
-    vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
-             filter_params_y, conv_params, 8);
-  else
-    vfilter(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
-            filter_params_y, conv_params, 8);
-}
-#endif
 
 // An wrapper to generate the SHUFPD instruction with __m128i types (just
 // writing _mm_shuffle_pd at the callsites gets a bit ugly because of the
@@ -753,104 +311,9 @@
   return _mm_castpd_si128(_mm_shuffle_pd(ad, bd, 0));
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-// The horizontal filter for av1_highbd_convolve_2d_scale_sse4_1. This
-// is the more general version, supporting 10 and 12 tap filters. For
-// 8-tap filters, use hfilter8.
-static void highbd_hfilter(const uint16_t *src, int src_stride, int32_t *dst,
-                           int w, int h, int subpel_x_qn, int x_step_qn,
-                           const InterpFilterParams *filter_params,
-                           unsigned round, int bd) {
-  const int ntaps = filter_params->taps;
-  assert(ntaps == 10 || ntaps == 12);
-
-  src -= ntaps / 2 - 1;
-
-  // Construct a mask with which we'll AND filter coefficients 89ab89ab to zero
-  // out the unneeded entries.
-  const __m128i hicoeff_mask = make_1012_mask(ntaps);
-
-  int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
-  const __m128i round_add = _mm_set1_epi32(round_add32);
-  const __m128i round_shift = extend_32_to_128(round);
-
-  int x_qn = subpel_x_qn;
-  for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
-    const uint16_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
-    const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
-    assert(filter_idx < SUBPEL_SHIFTS);
-    const int16_t *filter =
-        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
-
-    // The "lo" coefficients are coefficients 0..7. For a 12-tap filter, the
-    // "hi" coefficients are arranged as 89ab89ab. For a 10-tap filter, they
-    // are masked out with hicoeff_mask.
-    const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
-    const __m128i coeffhi = load_and_128i(filter + 8, hicoeff_mask);
-
-    int y;
-    for (y = 0; y <= h - 4; y += 4) {
-      const uint16_t *const src0 = src_col + y * src_stride;
-      const uint16_t *const src1 = src0 + 1 * src_stride;
-      const uint16_t *const src2 = src0 + 2 * src_stride;
-      const uint16_t *const src3 = src0 + 3 * src_stride;
-
-      // Load up source data. This is 16-bit input data, so each load gets 8
-      // pixels (we need at most 12)
-      const __m128i data0lo = _mm_loadu_si128((__m128i *)src0);
-      const __m128i data1lo = _mm_loadu_si128((__m128i *)src1);
-      const __m128i data2lo = _mm_loadu_si128((__m128i *)src2);
-      const __m128i data3lo = _mm_loadu_si128((__m128i *)src3);
-      const __m128i data0hi = _mm_loadu_si128((__m128i *)(src0 + 8));
-      const __m128i data1hi = _mm_loadu_si128((__m128i *)(src1 + 8));
-      const __m128i data2hi = _mm_loadu_si128((__m128i *)(src2 + 8));
-      const __m128i data3hi = _mm_loadu_si128((__m128i *)(src3 + 8));
-
-      // The "hi" data has rubbish in the top half so interleave pairs together
-      // to minimise the calculation we need to do.
-      const __m128i data01hi = mm_shuffle0_si128(data0hi, data1hi);
-      const __m128i data23hi = mm_shuffle0_si128(data2hi, data3hi);
-
-      // Multiply by coefficients
-      const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
-      const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
-      const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
-      const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
-      const __m128i conv01hi = _mm_madd_epi16(data01hi, coeffhi);
-      const __m128i conv23hi = _mm_madd_epi16(data23hi, coeffhi);
-
-      // Reduce horizontally and add
-      const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
-      const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
-      const __m128i convlo = _mm_hadd_epi32(conv01lo, conv23lo);
-      const __m128i convhi = _mm_hadd_epi32(conv01hi, conv23hi);
-      const __m128i conv = _mm_add_epi32(convlo, convhi);
-
-      // Divide down by (1 << round), rounding to nearest.
-      const __m128i shifted =
-          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
-
-      // Write transposed to the output
-      _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
-    }
-    for (; y < h; ++y) {
-      const uint16_t *const src_row = src_col + y * src_stride;
-
-      int32_t sum = (1 << (bd + FILTER_BITS - 1));
-      for (int k = 0; k < ntaps; ++k) {
-        sum += filter[k] * src_row[k];
-      }
-
-      dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 // A specialised version of hfilter, the horizontal filter for
 // av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap
 // filters.
-#if CONFIG_LOWPRECISION_BLEND
 static void highbd_hfilter8(const uint16_t *src, int src_stride, int16_t *dst,
                             int w, int h, int subpel_x_qn, int x_step_qn,
                             const InterpFilterParams *filter_params,
@@ -919,77 +382,6 @@
     }
   }
 }
-#else   // CONFIG_LOWPRECISION_BLEND
-static void highbd_hfilter8(const uint16_t *src, int src_stride, int32_t *dst,
-                            int w, int h, int subpel_x_qn, int x_step_qn,
-                            const InterpFilterParams *filter_params,
-                            unsigned round, int bd) {
-  const int ntaps = 8;
-
-  src -= ntaps / 2 - 1;
-
-  int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
-  const __m128i round_add = _mm_set1_epi32(round_add32);
-  const __m128i round_shift = extend_32_to_128(round);
-
-  int x_qn = subpel_x_qn;
-  for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
-    const uint16_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
-    const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
-    assert(filter_idx < SUBPEL_SHIFTS);
-    const int16_t *filter =
-        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
-
-    // Load the filter coefficients
-    const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
-
-    int y;
-    for (y = 0; y <= h - 4; y += 4) {
-      const uint16_t *const src0 = src_col + y * src_stride;
-      const uint16_t *const src1 = src0 + 1 * src_stride;
-      const uint16_t *const src2 = src0 + 2 * src_stride;
-      const uint16_t *const src3 = src0 + 3 * src_stride;
-
-      // Load up source data. This is 16-bit input data, so each load gets the 8
-      // pixels we need.
-      const __m128i data0lo = _mm_loadu_si128((__m128i *)src0);
-      const __m128i data1lo = _mm_loadu_si128((__m128i *)src1);
-      const __m128i data2lo = _mm_loadu_si128((__m128i *)src2);
-      const __m128i data3lo = _mm_loadu_si128((__m128i *)src3);
-
-      // Multiply by coefficients
-      const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
-      const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
-      const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
-      const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
-
-      // Reduce horizontally and add
-      const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
-      const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
-      const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo);
-
-      // Divide down by (1 << round), rounding to nearest.
-      const __m128i shifted =
-          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
-
-      // Write transposed to the output
-      _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
-    }
-    for (; y < h; ++y) {
-      const uint16_t *const src_row = src_col + y * src_stride;
-
-      int32_t sum = (1 << (bd + FILTER_BITS - 1));
-      for (int k = 0; k < ntaps; ++k) {
-        sum += filter[k] * src_row[k];
-      }
-
-      dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
-#if CONFIG_LOWPRECISION_BLEND
 // A specialised version of vfilter, the vertical filter for
 // av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap
 // filters.
@@ -1160,39 +552,3 @@
   highbd_vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
                   filter_params_y, conv_params, bd);
 }
-#else   // CONFIG_LOWPRECISION_BLEND
-void av1_highbd_convolve_2d_scale_sse4_1(
-    const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride,
-    int w, int h, InterpFilterParams *filter_params_x,
-    InterpFilterParams *filter_params_y, const int subpel_x_qn,
-    const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
-    ConvolveParams *conv_params, int bd) {
-  // TODO(yaowu): Move this out of stack
-  DECLARE_ALIGNED(16, int32_t,
-                  tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]);
-  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
-             filter_params_y->taps;
-  const int xtaps = filter_params_x->taps;
-  const int ytaps = filter_params_y->taps;
-  const int fo_vert = ytaps / 2 - 1;
-
-  memset(tmp, 0, sizeof(tmp));
-  // horizontal filter
-  if (xtaps == 8)
-    highbd_hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h,
-                    subpel_x_qn, x_step_qn, filter_params_x,
-                    conv_params->round_0, bd);
-  else
-    highbd_hfilter(src - fo_vert * src_stride, src_stride, tmp, w, im_h,
-                   subpel_x_qn, x_step_qn, filter_params_x,
-                   conv_params->round_0, bd);
-
-  // vertical filter (input is transposed)
-  if (ytaps == 8)
-    vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
-             filter_params_y, conv_params, bd);
-  else
-    vfilter(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
-            filter_params_y, conv_params, bd);
-}
-#endif  // CONFIG_LOWPRECISION_BLEND

diff --git a/av1/common/x86/convolve_2d_avx2.c b/av1/common/x86/convolve_2d_avx2.c
index 168e699..3e523ac 100644
--- a/av1/common/x86/convolve_2d_avx2.c
+++ b/av1/common/x86/convolve_2d_avx2.c

@@ -21,137 +21,6 @@
 #include "aom_dsp/x86/synonyms.h"
 #include "av1/common/convolve.h"
 
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
-                          int dst_stride0, int w, int h,
-                          InterpFilterParams *filter_params_x,
-                          InterpFilterParams *filter_params_y,
-                          const int subpel_x_q4, const int subpel_y_q4,
-                          ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int bd = 8;
-  (void)dst0;
-  (void)dst_stride0;
-
-  DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = 8;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-  const int do_average = conv_params->do_average;
-
-  __m256i filt[4], s[8], coeffs_x[4], coeffs_y[4];
-
-  assert(conv_params->round_0 > 0);
-
-  filt[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2);
-  filt[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2);
-  filt[2] = _mm256_load_si256((__m256i const *)filt3_global_avx2);
-  filt[3] = _mm256_load_si256((__m256i const *)filt4_global_avx2);
-
-  prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs_x);
-  prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
-  for (j = 0; j < w; j += 8) {
-    /* Horizontal filter */
-    {
-      const __m256i round_const =
-          _mm256_set1_epi16(((1 << (conv_params->round_0 - 1)) >> 1) +
-                            (1 << (bd + FILTER_BITS - 2)));
-      const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0 - 1);
-
-      for (i = 0; i < im_h; i += 2) {
-        __m256i data = _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)&src_ptr[(i * src_stride) + j]));
-        if (i + 1 < im_h)
-          data = _mm256_inserti128_si256(
-              data,
-              _mm_loadu_si128(
-                  (__m128i *)&src_ptr[(i * src_stride) + j + src_stride]),
-              1);
-        __m256i res = convolve_lowbd_x(data, coeffs_x, filt);
-
-        res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const), round_shift);
-
-        // 0 1 2 3 8 9 10 11 4 5 6 7 12 13 14 15
-        _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
-      }
-    }
-
-    /* Vertical filter */
-    {
-      const __m256i round_const = _mm256_set1_epi32(
-          ((1 << conv_params->round_1) >> 1) -
-          (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
-      const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-      __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
-      __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
-      __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
-      __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
-      __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
-      __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
-
-      s[0] = _mm256_unpacklo_epi16(s0, s1);
-      s[1] = _mm256_unpacklo_epi16(s2, s3);
-      s[2] = _mm256_unpacklo_epi16(s4, s5);
-
-      s[4] = _mm256_unpackhi_epi16(s0, s1);
-      s[5] = _mm256_unpackhi_epi16(s2, s3);
-      s[6] = _mm256_unpackhi_epi16(s4, s5);
-
-      for (i = 0; i < h; i += 2) {
-        const int16_t *data = &im_block[i * im_stride];
-
-        const __m256i s6 =
-            _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
-        const __m256i s7 =
-            _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
-
-        s[3] = _mm256_unpacklo_epi16(s6, s7);
-        s[7] = _mm256_unpackhi_epi16(s6, s7);
-
-        const __m256i res_a = convolve(s, coeffs_y);
-        const __m256i res_b = convolve(s + 4, coeffs_y);
-
-        const __m256i res_a_round =
-            _mm256_sra_epi32(_mm256_add_epi32(res_a, round_const), round_shift);
-        const __m256i res_b_round =
-            _mm256_sra_epi32(_mm256_add_epi32(res_b, round_const), round_shift);
-
-        if (w - j > 4) {
-          const __m256i res_ax =
-              _mm256_permute2x128_si256(res_a_round, res_b_round, 0x20);
-          const __m256i res_bx =
-              _mm256_permute2x128_si256(res_a_round, res_b_round, 0x31);
-
-          add_store_aligned_256(&dst[i * dst_stride + j], &res_ax, do_average);
-          add_store_aligned_256(&dst[i * dst_stride + j + dst_stride], &res_bx,
-                                do_average);
-        } else {
-          const __m128i res_ax = _mm256_extracti128_si256(res_a_round, 0);
-          const __m128i res_bx = _mm256_extracti128_si256(res_a_round, 1);
-
-          add_store(&dst[i * dst_stride + j], &res_ax, do_average);
-          add_store(&dst[i * dst_stride + j + dst_stride], &res_bx, do_average);
-        }
-
-        s[0] = s[1];
-        s[1] = s[2];
-        s[2] = s[3];
-
-        s[4] = s[5];
-        s[5] = s[6];
-        s[6] = s[7];
-      }
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
                              int dst_stride, int w, int h,
                              InterpFilterParams *filter_params_x,

diff --git a/av1/common/x86/convolve_2d_sse2.c b/av1/common/x86/convolve_2d_sse2.c
index e814675..174b6dd 100644
--- a/av1/common/x86/convolve_2d_sse2.c
+++ b/av1/common/x86/convolve_2d_sse2.c

@@ -18,201 +18,6 @@
 #include "aom_dsp/x86/convolve_sse2.h"
 #include "av1/common/convolve.h"
 
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_convolve_2d_sse2(const uint8_t *src, int src_stride, uint8_t *dst0,
-                          int dst_stride0, int w, int h,
-                          InterpFilterParams *filter_params_x,
-                          InterpFilterParams *filter_params_y,
-                          const int subpel_x_q4, const int subpel_y_q4,
-                          ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int bd = 8;
-  (void)dst0;
-  (void)dst_stride0;
-
-  DECLARE_ALIGNED(16, int16_t,
-                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = MAX_SB_SIZE;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const int do_average = conv_params->do_average;
-  const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
-  const __m128i zero = _mm_setzero_si128();
-
-  assert(conv_params->round_0 > 0);
-
-  /* Horizontal filter */
-  {
-    const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-    const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const = _mm_set1_epi32(
-        ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
-    for (i = 0; i < im_h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i data =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-
-        // Filter even-index pixels
-        const __m128i src_0 = _mm_unpacklo_epi8(data, zero);
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
-        const __m128i src_2 = _mm_unpacklo_epi8(_mm_srli_si128(data, 2), zero);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(data, 4), zero);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i src_6 = _mm_unpacklo_epi8(_mm_srli_si128(data, 6), zero);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
-                                         _mm_add_epi32(res_2, res_6));
-        res_even =
-            _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
-        // Filter odd-index pixels
-        const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero);
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i src_3 = _mm_unpacklo_epi8(_mm_srli_si128(data, 3), zero);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i src_5 = _mm_unpacklo_epi8(_mm_srli_si128(data, 5), zero);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i src_7 = _mm_unpacklo_epi8(_mm_srli_si128(data, 7), zero);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
-                                        _mm_add_epi32(res_3, res_7));
-        res_odd =
-            _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
-        // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
-        __m128i res = _mm_packs_epi32(res_even, res_odd);
-        _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res);
-      }
-    }
-  }
-
-  /* Vertical filter */
-  {
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-    const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const = _mm_set1_epi32(
-        ((1 << conv_params->round_1) >> 1) -
-        (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        // Filter even-index pixels
-        const int16_t *data = &im_block[i * im_stride + j];
-        const __m128i src_0 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
-                               *(__m128i *)(data + 1 * im_stride));
-        const __m128i src_2 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
-                               *(__m128i *)(data + 3 * im_stride));
-        const __m128i src_4 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
-                               *(__m128i *)(data + 5 * im_stride));
-        const __m128i src_6 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
-                               *(__m128i *)(data + 7 * im_stride));
-
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
-                                               _mm_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m128i src_1 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
-                               *(__m128i *)(data + 1 * im_stride));
-        const __m128i src_3 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
-                               *(__m128i *)(data + 3 * im_stride));
-        const __m128i src_5 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
-                               *(__m128i *)(data + 5 * im_stride));
-        const __m128i src_7 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
-                               *(__m128i *)(data + 7 * im_stride));
-
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
-                                              _mm_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        const __m128i res_lo_round =
-            _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-        const __m128i res_hi_round =
-            _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
-        // Accumulate values into the destination buffer
-        __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        if (do_average) {
-          _mm_storeu_si128(
-              p + 0,
-              _mm_srai_epi32(
-                  _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round), 1));
-          _mm_storeu_si128(
-              p + 1,
-              _mm_srai_epi32(
-                  _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round), 1));
-        } else {
-          _mm_storeu_si128(p + 0, res_lo_round);
-          _mm_storeu_si128(p + 1, res_hi_round);
-        }
-      }
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
                              int dst_stride, int w, int h,
                              InterpFilterParams *filter_params_x,
@@ -411,138 +216,6 @@
   }
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
-                               uint8_t *dst0, int dst_stride0, int w, int h,
-                               InterpFilterParams *filter_params_x,
-                               InterpFilterParams *filter_params_y,
-                               const int subpel_x_q4, const int subpel_y_q4,
-                               ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  (void)filter_params_x;
-  (void)filter_params_y;
-  (void)subpel_x_q4;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  const int bits =
-      FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
-  const int do_average = conv_params->do_average;
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i left_shift = _mm_cvtsi32_si128(bits);
-  int i, j;
-
-  if (!(w % 16)) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 16) {
-        const __m128i d8 = _mm_loadu_si128((__m128i *)&src[j]);
-        const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero);
-        const __m128i d16_1 = _mm_unpackhi_epi8(d8, zero);
-        __m128i d32_0 = _mm_unpacklo_epi16(d16_0, zero);
-        __m128i d32_1 = _mm_unpackhi_epi16(d16_0, zero);
-        __m128i d32_2 = _mm_unpacklo_epi16(d16_1, zero);
-        __m128i d32_3 = _mm_unpackhi_epi16(d16_1, zero);
-
-        d32_0 = _mm_sll_epi32(d32_0, left_shift);
-        d32_1 = _mm_sll_epi32(d32_1, left_shift);
-        d32_2 = _mm_sll_epi32(d32_2, left_shift);
-        d32_3 = _mm_sll_epi32(d32_3, left_shift);
-
-        __m128i *const p = (__m128i *)&dst[j];
-        if (do_average) {
-          _mm_storeu_si128(
-              p + 0,
-              _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 0), d32_0), 1));
-          _mm_storeu_si128(
-              p + 1,
-              _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 1), d32_1), 1));
-          _mm_storeu_si128(
-              p + 2,
-              _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 2), d32_2), 1));
-          _mm_storeu_si128(
-              p + 3,
-              _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 3), d32_3), 1));
-        } else {
-          _mm_storeu_si128(p + 0, d32_0);
-          _mm_storeu_si128(p + 1, d32_1);
-          _mm_storeu_si128(p + 2, d32_2);
-          _mm_storeu_si128(p + 3, d32_3);
-        }
-      }
-      src += src_stride;
-      dst += dst_stride;
-    }
-  } else if (!(w % 8)) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i d8 = _mm_loadl_epi64((__m128i *)&src[j]);
-        const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero);
-        __m128i d32_0 = _mm_unpacklo_epi16(d16_0, zero);
-        __m128i d32_1 = _mm_unpackhi_epi16(d16_0, zero);
-
-        d32_0 = _mm_sll_epi32(d32_0, left_shift);
-        d32_1 = _mm_sll_epi32(d32_1, left_shift);
-
-        __m128i *const p = (__m128i *)&dst[j];
-        if (do_average) {
-          _mm_storeu_si128(
-              p + 0,
-              _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 0), d32_0), 1));
-          _mm_storeu_si128(
-              p + 1,
-              _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 1), d32_1), 1));
-        } else {
-          _mm_storeu_si128(p + 0, d32_0);
-          _mm_storeu_si128(p + 1, d32_1);
-        }
-      }
-      src += src_stride;
-      dst += dst_stride;
-    }
-  } else if (!(w % 4)) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 4) {
-        const __m128i d8 = _mm_cvtsi32_si128(*(const int *)&src[j]);
-        const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero);
-        __m128i d32_0 = _mm_unpacklo_epi16(d16_0, zero);
-
-        d32_0 = _mm_sll_epi32(d32_0, left_shift);
-        __m128i *const p = (__m128i *)&dst[j];
-        if (do_average) {
-          _mm_storeu_si128(
-              p, _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p), d32_0), 1));
-        } else {
-          _mm_storeu_si128(p, d32_0);
-        }
-      }
-      src += src_stride;
-      dst += dst_stride;
-    }
-  } else {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 2) {
-        const __m128i d8 = _mm_cvtsi32_si128(*(const int *)&src[j]);
-        const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero);
-        __m128i d32_0 = _mm_unpacklo_epi16(d16_0, zero);
-
-        d32_0 = _mm_sll_epi32(d32_0, left_shift);
-        __m128i *const p = (__m128i *)&dst[j];
-        if (do_average) {
-          _mm_storel_epi64(
-              p, _mm_srai_epi32(_mm_add_epi32(_mm_loadl_epi64(p), d32_0), 1));
-        } else {
-          _mm_storel_epi64(p, d32_0);
-        }
-      }
-      src += src_stride;
-      dst += dst_stride;
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 static INLINE void copy_128(const uint8_t *src, uint8_t *dst) {
   __m128i s[8];
   s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
@@ -685,7 +358,6 @@
   }
 }
 
-#if CONFIG_LOWPRECISION_BLEND
 void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
                                    uint8_t *dst0, int dst_stride0, int w, int h,
                                    InterpFilterParams *filter_params_x,
@@ -802,228 +474,3 @@
     }
   }
 }
-#else
-void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
-                                   uint8_t *dst0, int dst_stride0, int w, int h,
-                                   InterpFilterParams *filter_params_x,
-                                   InterpFilterParams *filter_params_y,
-                                   const int subpel_x_q4, const int subpel_y_q4,
-                                   ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  (void)filter_params_x;
-  (void)filter_params_y;
-  (void)subpel_x_q4;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  const int bits =
-      FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
-  const int do_average = conv_params->do_average;
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i left_shift = _mm_cvtsi32_si128(bits);
-  int i, j;
-
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m128i wt0 = _mm_set1_epi32(w0);
-  const __m128i wt1 = _mm_set1_epi32(w1);
-
-  if (!(w % 16)) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 16) {
-        const __m128i d8 = _mm_loadu_si128((__m128i *)&src[j]);
-        const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero);
-        const __m128i d16_1 = _mm_unpackhi_epi8(d8, zero);
-        __m128i d32_0 = _mm_unpacklo_epi16(d16_0, zero);
-        __m128i d32_1 = _mm_unpackhi_epi16(d16_0, zero);
-        __m128i d32_2 = _mm_unpacklo_epi16(d16_1, zero);
-        __m128i d32_3 = _mm_unpackhi_epi16(d16_1, zero);
-
-        __m128i *const p = (__m128i *)&dst[j];
-
-        if (conv_params->use_jnt_comp_avg) {
-          if (do_average) {
-            __m128i mul = _mm_madd_epi16(d32_0, wt1);
-            __m128i weighted_res = _mm_sll_epi32(mul, left_shift);
-            __m128i tmp = _mm_loadu_si128(p + 0);
-            __m128i sum = _mm_add_epi32(_mm_madd_epi16(tmp, wt0), weighted_res);
-            d32_0 = _mm_srai_epi32(sum, DIST_PRECISION_BITS);
-
-            mul = _mm_madd_epi16(d32_1, wt1);
-            weighted_res = _mm_sll_epi32(mul, left_shift);
-            tmp = _mm_loadu_si128(p + 1);
-            sum = _mm_add_epi32(_mm_madd_epi16(tmp, wt0), weighted_res);
-            d32_1 = _mm_srai_epi32(sum, DIST_PRECISION_BITS);
-
-            mul = _mm_madd_epi16(d32_2, wt1);
-            weighted_res = _mm_sll_epi32(mul, left_shift);
-            tmp = _mm_loadu_si128(p + 2);
-            sum = _mm_add_epi32(_mm_madd_epi16(tmp, wt0), weighted_res);
-            d32_2 = _mm_srai_epi32(sum, DIST_PRECISION_BITS);
-
-            mul = _mm_madd_epi16(d32_3, wt1);
-            weighted_res = _mm_sll_epi32(mul, left_shift);
-            tmp = _mm_loadu_si128(p + 3);
-            sum = _mm_add_epi32(_mm_madd_epi16(tmp, wt0), weighted_res);
-            d32_3 = _mm_srai_epi32(sum, DIST_PRECISION_BITS);
-          } else {
-            d32_0 = _mm_sll_epi32(d32_0, left_shift);
-            d32_1 = _mm_sll_epi32(d32_1, left_shift);
-            d32_2 = _mm_sll_epi32(d32_2, left_shift);
-            d32_3 = _mm_sll_epi32(d32_3, left_shift);
-          }
-        } else {
-          if (do_average) {
-            d32_0 =
-                _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 0),
-                                             _mm_sll_epi32(d32_0, left_shift)),
-                               1);
-            d32_1 =
-                _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 1),
-                                             _mm_sll_epi32(d32_1, left_shift)),
-                               1);
-            d32_2 =
-                _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 2),
-                                             _mm_sll_epi32(d32_2, left_shift)),
-                               1);
-            d32_3 =
-                _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 3),
-                                             _mm_sll_epi32(d32_3, left_shift)),
-                               1);
-          } else {
-            d32_0 = _mm_sll_epi32(d32_0, left_shift);
-            d32_1 = _mm_sll_epi32(d32_1, left_shift);
-            d32_2 = _mm_sll_epi32(d32_2, left_shift);
-            d32_3 = _mm_sll_epi32(d32_3, left_shift);
-          }
-        }
-
-        _mm_storeu_si128(p + 0, d32_0);
-        _mm_storeu_si128(p + 1, d32_1);
-        _mm_storeu_si128(p + 2, d32_2);
-        _mm_storeu_si128(p + 3, d32_3);
-      }
-      src += src_stride;
-      dst += dst_stride;
-    }
-  } else if (!(w % 8)) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i d8 = _mm_loadl_epi64((__m128i *)&src[j]);
-        const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero);
-        __m128i d32_0 = _mm_unpacklo_epi16(d16_0, zero);
-        __m128i d32_1 = _mm_unpackhi_epi16(d16_0, zero);
-
-        __m128i *const p = (__m128i *)&dst[j];
-        if (conv_params->use_jnt_comp_avg) {
-          if (do_average) {
-            __m128i mul = _mm_madd_epi16(d32_0, wt1);
-            __m128i weighted_res = _mm_sll_epi32(mul, left_shift);
-            __m128i tmp = _mm_loadu_si128(p + 0);
-            __m128i sum = _mm_add_epi32(_mm_madd_epi16(tmp, wt0), weighted_res);
-            d32_0 = _mm_srai_epi32(sum, DIST_PRECISION_BITS);
-
-            mul = _mm_madd_epi16(d32_1, wt1);
-            weighted_res = _mm_sll_epi32(mul, left_shift);
-            tmp = _mm_loadu_si128(p + 1);
-            sum = _mm_add_epi32(_mm_madd_epi16(tmp, wt0), weighted_res);
-            d32_1 = _mm_srai_epi32(sum, DIST_PRECISION_BITS);
-          } else {
-            d32_0 = _mm_sll_epi32(d32_0, left_shift);
-            d32_1 = _mm_sll_epi32(d32_1, left_shift);
-          }
-        } else {
-          if (do_average) {
-            d32_0 =
-                _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 0),
-                                             _mm_sll_epi32(d32_0, left_shift)),
-                               1);
-            d32_1 =
-                _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 1),
-                                             _mm_sll_epi32(d32_1, left_shift)),
-                               1);
-          } else {
-            d32_0 = _mm_sll_epi32(d32_0, left_shift);
-            d32_1 = _mm_sll_epi32(d32_1, left_shift);
-          }
-        }
-
-        _mm_storeu_si128(p + 0, d32_0);
-        _mm_storeu_si128(p + 1, d32_1);
-      }
-      src += src_stride;
-      dst += dst_stride;
-    }
-  } else if (!(w % 4)) {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 4) {
-        const __m128i d8 = _mm_loadl_epi64((__m128i *)&src[j]);
-        const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero);
-        __m128i d32_0 = _mm_unpacklo_epi16(d16_0, zero);
-
-        __m128i *const p = (__m128i *)&dst[j];
-        if (conv_params->use_jnt_comp_avg) {
-          if (do_average) {
-            __m128i mul = _mm_madd_epi16(d32_0, wt1);
-            __m128i weighted_res = _mm_sll_epi32(mul, left_shift);
-            __m128i tmp = _mm_loadu_si128(p + 0);
-            __m128i sum = _mm_add_epi32(_mm_madd_epi16(tmp, wt0), weighted_res);
-            d32_0 = _mm_srai_epi32(sum, DIST_PRECISION_BITS);
-          } else {
-            d32_0 = _mm_sll_epi32(d32_0, left_shift);
-          }
-        } else {
-          if (do_average) {
-            d32_0 =
-                _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 0),
-                                             _mm_sll_epi32(d32_0, left_shift)),
-                               1);
-          } else {
-            d32_0 = _mm_sll_epi32(d32_0, left_shift);
-          }
-        }
-
-        _mm_storeu_si128(p, d32_0);
-      }
-      src += src_stride;
-      dst += dst_stride;
-    }
-  } else {
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 2) {
-        const __m128i d8 = _mm_cvtsi32_si128(*(const int *)&src[j]);
-        const __m128i d16_0 = _mm_unpacklo_epi8(d8, zero);
-        __m128i d32_0 = _mm_unpacklo_epi16(d16_0, zero);
-
-        __m128i *const p = (__m128i *)&dst[j];
-        if (conv_params->use_jnt_comp_avg) {
-          if (do_average) {
-            __m128i mul = _mm_madd_epi16(d32_0, wt1);
-            __m128i weighted_res = _mm_sll_epi32(mul, left_shift);
-            __m128i tmp = _mm_loadl_epi64(p);
-            __m128i sum = _mm_add_epi32(_mm_madd_epi16(tmp, wt0), weighted_res);
-            d32_0 = _mm_srai_epi32(sum, DIST_PRECISION_BITS);
-          } else {
-            d32_0 = _mm_sll_epi32(d32_0, left_shift);
-          }
-        } else {
-          if (do_average) {
-            d32_0 =
-                _mm_srai_epi32(_mm_add_epi32(_mm_loadl_epi64(p),
-                                             _mm_sll_epi32(d32_0, left_shift)),
-                               1);
-          } else {
-            d32_0 = _mm_sll_epi32(d32_0, left_shift);
-          }
-        }
-
-        _mm_storel_epi64(p, d32_0);
-      }
-      src += src_stride;
-      dst += dst_stride;
-    }
-  }
-}
-#endif

diff --git a/av1/common/x86/convolve_avx2.c b/av1/common/x86/convolve_avx2.c
index 7d5b2c7..7e5320b 100644
--- a/av1/common/x86/convolve_avx2.c
+++ b/av1/common/x86/convolve_avx2.c

@@ -137,67 +137,6 @@
   *(uint32_t *)dst = _mm_cvtsi128_si32(x);
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_convolve_rounding_avx2(const int32_t *src, int src_stride,
-                                uint8_t *dst, int dst_stride, int w, int h,
-                                int bits) {
-  const __m256i rnd_num = _mm256_set1_epi32((int32_t)((1 << bits) >> 1));
-  const __m128i rnd_num_sse2 = _mm256_castsi256_si128(rnd_num);
-
-  if (w > 64) {  // width = 128
-    do {
-      cal_rounding_32xn_avx2(src, dst, &rnd_num, bits, 4);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else if (w > 32) {  // width = 64
-    do {
-      cal_rounding_32xn_avx2(src, dst, &rnd_num, bits, 2);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else if (w > 16) {  // width = 32
-    do {
-      cal_rounding_32xn_avx2(src, dst, &rnd_num, bits, 1);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else if (w > 8) {  // width = 16
-    do {
-      cal_rounding_16_avx2(src, dst, &rnd_num, bits);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else if (w > 4) {  // width = 8
-    do {
-      cal_rounding_8_avx2(src, dst, &rnd_num, bits);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else if (w > 2) {  // width = 4
-    do {
-      cal_rounding_4_sse2(src, dst, &rnd_num_sse2, bits);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else {  // width = 2
-    do {
-      dst[0] = clip_pixel(ROUND_POWER_OF_TWO(src[0], bits));
-      dst[1] = clip_pixel(ROUND_POWER_OF_TWO(src[1], bits));
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 static INLINE void cal_highbd_rounding_32xn_avx2(const int32_t *src,
                                                  uint16_t *dst,
                                                  const __m256i *rnd, int shift,
@@ -281,227 +220,6 @@
   _mm_storel_epi64((__m128i *)dst, x);
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_highbd_convolve_rounding_avx2(const int32_t *src, int src_stride,
-                                       uint8_t *dst8, int dst_stride, int w,
-                                       int h, int bits, int bd) {
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-  const __m256i rnd_num = _mm256_set1_epi32((int32_t)((1 << bits) >> 1));
-  const __m128i rnd_num_sse2 = _mm256_castsi256_si128(rnd_num);
-
-  if (w > 64) {  // width = 128
-    do {
-      cal_highbd_rounding_32xn_avx2(src, dst, &rnd_num, bits, 4, bd);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else if (w > 32) {  // width = 64
-    do {
-      cal_highbd_rounding_32xn_avx2(src, dst, &rnd_num, bits, 2, bd);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else if (w > 16) {  // width = 32
-    do {
-      cal_highbd_rounding_32xn_avx2(src, dst, &rnd_num, bits, 1, bd);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else if (w > 8) {  // width = 16
-    do {
-      cal_highbd_rounding_16_avx2(src, dst, &rnd_num, bits, bd);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else if (w > 4) {  // width = 8
-    do {
-      cal_highbd_rounding_8_avx2(src, dst, &rnd_num, bits, bd);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else if (w > 2) {  // width = 4
-    do {
-      cal_highbd_rounding_4_sse2(src, dst, &rnd_num_sse2, bits, bd);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  } else {  // width = 2
-    do {
-      dst[0] = clip_pixel_highbd(ROUND_POWER_OF_TWO(src[0], bits), bd);
-      dst[1] = clip_pixel_highbd(ROUND_POWER_OF_TWO(src[1], bits), bd);
-      src += src_stride;
-      dst += dst_stride;
-      h--;
-    } while (h > 0);
-  }
-}
-
-void av1_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
-                         int dst_stride0, int w, int h,
-                         InterpFilterParams *filter_params_x,
-                         InterpFilterParams *filter_params_y,
-                         const int subpel_x_q4, const int subpel_y_q4,
-                         ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const uint8_t *const src_ptr = src - fo_vert * src_stride;
-  // +1 to compensate for dividing the filter coeffs by 2
-  const int left_shift = FILTER_BITS - conv_params->round_0 + 1;
-  const __m256i round_const =
-      _mm256_set1_epi32((1 << conv_params->round_1) >> 1);
-  const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-  const int do_average = conv_params->do_average;
-  __m256i coeffs[4], s[8];
-
-  assert((FILTER_BITS - conv_params->round_0) >= 0);
-
-  prepare_coeffs_lowbd(filter_params_y, subpel_y_q4, coeffs);
-
-  (void)conv_params;
-  (void)filter_params_x;
-  (void)subpel_x_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  for (j = 0; j < w; j += 16) {
-    const uint8_t *data = &src_ptr[j];
-    __m256i src6;
-
-    // Load lines a and b. Line a to lower 128, line b to upper 128
-    const __m256i src_01a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 0 * src_stride))),
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
-        0x20);
-
-    const __m256i src_12a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
-        0x20);
-
-    const __m256i src_23a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
-        0x20);
-
-    const __m256i src_34a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
-        0x20);
-
-    const __m256i src_45a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
-        0x20);
-
-    src6 = _mm256_castsi128_si256(
-        _mm_loadu_si128((__m128i *)(data + 6 * src_stride)));
-    const __m256i src_56a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
-        src6, 0x20);
-
-    s[0] = _mm256_unpacklo_epi8(src_01a, src_12a);
-    s[1] = _mm256_unpacklo_epi8(src_23a, src_34a);
-    s[2] = _mm256_unpacklo_epi8(src_45a, src_56a);
-
-    s[4] = _mm256_unpackhi_epi8(src_01a, src_12a);
-    s[5] = _mm256_unpackhi_epi8(src_23a, src_34a);
-    s[6] = _mm256_unpackhi_epi8(src_45a, src_56a);
-
-    for (i = 0; i < h; i += 2) {
-      data = &src_ptr[i * src_stride + j];
-      const __m256i src_67a = _mm256_permute2x128_si256(
-          src6,
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
-          0x20);
-
-      src6 = _mm256_castsi128_si256(
-          _mm_loadu_si128((__m128i *)(data + 8 * src_stride)));
-      const __m256i src_78a = _mm256_permute2x128_si256(
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
-          src6, 0x20);
-
-      s[3] = _mm256_unpacklo_epi8(src_67a, src_78a);
-      s[7] = _mm256_unpackhi_epi8(src_67a, src_78a);
-
-      const __m256i res_lo = convolve_lowbd(s, coeffs);
-
-      const __m256i res_lo_0_32b =
-          _mm256_cvtepi16_epi32(_mm256_castsi256_si128(res_lo));
-      const __m256i res_lo_0_shift =
-          _mm256_slli_epi32(res_lo_0_32b, left_shift);
-      const __m256i res_lo_0_round = _mm256_sra_epi32(
-          _mm256_add_epi32(res_lo_0_shift, round_const), round_shift);
-
-      // Accumulate values into the destination buffer
-      add_store_aligned_256(&dst[i * dst_stride + j], &res_lo_0_round,
-                            do_average);
-
-      const __m256i res_lo_1_32b =
-          _mm256_cvtepi16_epi32(_mm256_extracti128_si256(res_lo, 1));
-      const __m256i res_lo_1_shift =
-          _mm256_slli_epi32(res_lo_1_32b, left_shift);
-      const __m256i res_lo_1_round = _mm256_sra_epi32(
-          _mm256_add_epi32(res_lo_1_shift, round_const), round_shift);
-
-      add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                            &res_lo_1_round, do_average);
-
-      if (w - j > 8) {
-        const __m256i res_hi = convolve_lowbd(s + 4, coeffs);
-
-        const __m256i res_hi_0_32b =
-            _mm256_cvtepi16_epi32(_mm256_castsi256_si128(res_hi));
-        const __m256i res_hi_0_shift =
-            _mm256_slli_epi32(res_hi_0_32b, left_shift);
-        const __m256i res_hi_0_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_hi_0_shift, round_const), round_shift);
-
-        add_store_aligned_256(&dst[i * dst_stride + j + 8], &res_hi_0_round,
-                              do_average);
-
-        const __m256i res_hi_1_32b =
-            _mm256_cvtepi16_epi32(_mm256_extracti128_si256(res_hi, 1));
-        const __m256i res_hi_1_shift =
-            _mm256_slli_epi32(res_hi_1_32b, left_shift);
-        const __m256i res_hi_1_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_hi_1_shift, round_const), round_shift);
-
-        add_store_aligned_256(&dst[i * dst_stride + j + 8 + dst_stride],
-                              &res_hi_1_round, do_average);
-      }
-      s[0] = s[1];
-      s[1] = s[2];
-      s[2] = s[3];
-
-      s[4] = s[5];
-      s[5] = s[6];
-      s[6] = s[7];
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
                             int dst_stride, int w, int h,
                             InterpFilterParams *filter_params_x,
@@ -658,72 +376,6 @@
     }
   }
 }
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
-                         int dst_stride0, int w, int h,
-                         InterpFilterParams *filter_params_x,
-                         InterpFilterParams *filter_params_y,
-                         const int subpel_x_q4, const int subpel_y_q4,
-                         ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  int i, j;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint8_t *const src_ptr = src - fo_horiz;
-  const int bits = FILTER_BITS - conv_params->round_1;
-  const int do_average = conv_params->do_average;
-  __m256i filt[4], coeffs[4];
-
-  assert(bits >= 0);
-  assert(conv_params->round_0 > 0);
-
-  filt[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2);
-  filt[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2);
-  filt[2] = _mm256_load_si256((__m256i const *)filt3_global_avx2);
-  filt[3] = _mm256_load_si256((__m256i const *)filt4_global_avx2);
-
-  prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs);
-
-  const __m256i round_const =
-      _mm256_set1_epi16((1 << (conv_params->round_0 - 1)) >> 1);
-  const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0 - 1);
-
-  (void)filter_params_y;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  for (i = 0; i < h; ++i) {
-    for (j = 0; j < w; j += 16) {
-      // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15 16 17 18 19
-      // 20 21 22 23
-      const __m256i data = _mm256_permute4x64_epi64(
-          _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]),
-          _MM_SHUFFLE(2, 1, 1, 0));
-
-      __m256i res = convolve_lowbd_x(data, coeffs, filt);
-
-      res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const), round_shift);
-
-      const __m256i res_lo_round =
-          _mm256_cvtepi16_epi32(_mm256_castsi256_si128(res));
-      const __m256i res_hi_round =
-          _mm256_cvtepi16_epi32(_mm256_extracti128_si256(res, 1));
-
-      const __m256i res_lo_shift = _mm256_slli_epi32(res_lo_round, bits);
-      const __m256i res_hi_shift = _mm256_slli_epi32(res_hi_round, bits);
-
-      // Accumulate values into the destination buffer
-      add_store_aligned_256(&dst[i * dst_stride + j], &res_lo_shift,
-                            do_average);
-      if (w - j > 8) {
-        add_store_aligned_256(&dst[i * dst_stride + j + 8], &res_hi_shift,
-                              do_average);
-      }
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
 
 void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
                             int dst_stride, int w, int h,

diff --git a/av1/common/x86/convolve_sse2.c b/av1/common/x86/convolve_sse2.c
index 87a114a..d8b4425 100644
--- a/av1/common/x86/convolve_sse2.c
+++ b/av1/common/x86/convolve_sse2.c

@@ -75,252 +75,6 @@
   return convolve(ss, coeffs);
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_convolve_y_sse2(const uint8_t *src, int src_stride,
-                         const uint8_t *dst0, int dst_stride0, int w, int h,
-                         InterpFilterParams *filter_params_x,
-                         InterpFilterParams *filter_params_y,
-                         const int subpel_x_q4, const int subpel_y_q4,
-                         ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  const int dst_stride = conv_params->dst_stride;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const uint8_t *src_ptr = src - fo_vert * src_stride;
-  const int bits = FILTER_BITS - conv_params->round_0;
-  const __m128i left_shift = _mm_cvtsi32_si128(bits);
-  const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_1) >> 1);
-  const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-  __m128i coeffs[4];
-
-  (void)filter_params_x;
-  (void)subpel_x_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  assert(bits >= 0);
-
-  prepare_coeffs(filter_params_y, subpel_y_q4, coeffs);
-
-  if (w == 4) {
-    __m128i s[8], src6, res, res_shift;
-    src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
-    s[0] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)));
-    s[1] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)));
-    s[2] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)));
-    s[3] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)));
-    s[4] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)));
-    s[5] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6);
-
-    do {
-      s[6] = _mm_unpacklo_epi8(
-          src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)));
-      src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride));
-      s[7] = _mm_unpacklo_epi8(
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6);
-
-      res = convolve_lo_y(s + 0, coeffs);
-      res_shift = _mm_sll_epi32(res, left_shift);
-      res_shift =
-          _mm_sra_epi32(_mm_add_epi32(res_shift, round_const), round_shift);
-      add_store(dst, &res_shift, conv_params->do_average);
-      src_ptr += src_stride;
-      dst += dst_stride;
-
-      res = convolve_lo_y(s + 1, coeffs);
-      res_shift = _mm_sll_epi32(res, left_shift);
-      res_shift =
-          _mm_sra_epi32(_mm_add_epi32(res_shift, round_const), round_shift);
-      add_store(dst, &res_shift, conv_params->do_average);
-      src_ptr += src_stride;
-      dst += dst_stride;
-
-      s[0] = s[2];
-      s[1] = s[3];
-      s[2] = s[4];
-      s[3] = s[5];
-      s[4] = s[6];
-      s[5] = s[7];
-      h -= 2;
-    } while (h);
-  } else {
-    assert(!(w % 8));
-    int j = 0;
-    do {
-      __m128i s[8], src6, res_lo, res_hi, res_lo_shift, res_hi_shift;
-      const uint8_t *data = &src_ptr[j];
-
-      src6 = _mm_loadl_epi64((__m128i *)(data + 6 * src_stride));
-      s[0] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 0 * src_stride)),
-          _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)));
-      s[1] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)),
-          _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)));
-      s[2] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)),
-          _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)));
-      s[3] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)),
-          _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)));
-      s[4] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)),
-          _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)));
-      s[5] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)), src6);
-
-      int i = 0;
-      do {
-        data = &src_ptr[i * src_stride + j];
-        s[6] = _mm_unpacklo_epi8(
-            src6, _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)));
-        src6 = _mm_loadl_epi64((__m128i *)(data + 8 * src_stride));
-        s[7] = _mm_unpacklo_epi8(
-            _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)), src6);
-
-        res_lo = convolve_lo_y(s, coeffs);  // Filter low index pixels
-        res_hi = convolve_hi_y(s, coeffs);  // Filter high index pixels
-        res_lo_shift = _mm_sll_epi32(res_lo, left_shift);
-        res_hi_shift = _mm_sll_epi32(res_hi, left_shift);
-        res_lo_shift = _mm_sra_epi32(_mm_add_epi32(res_lo_shift, round_const),
-                                     round_shift);
-        res_hi_shift = _mm_sra_epi32(_mm_add_epi32(res_hi_shift, round_const),
-                                     round_shift);
-        add_store(dst + i * dst_stride + j + 0, &res_lo_shift,
-                  conv_params->do_average);
-        add_store(dst + i * dst_stride + j + 4, &res_hi_shift,
-                  conv_params->do_average);
-        i++;
-
-        res_lo = convolve_lo_y(s + 1, coeffs);  // Filter low index pixels
-        res_hi = convolve_hi_y(s + 1, coeffs);  // Filter high index pixels
-        res_lo_shift = _mm_sll_epi32(res_lo, left_shift);
-        res_hi_shift = _mm_sll_epi32(res_hi, left_shift);
-        res_lo_shift = _mm_sra_epi32(_mm_add_epi32(res_lo_shift, round_const),
-                                     round_shift);
-        res_hi_shift = _mm_sra_epi32(_mm_add_epi32(res_hi_shift, round_const),
-                                     round_shift);
-        add_store(dst + i * dst_stride + j + 0, &res_lo_shift,
-                  conv_params->do_average);
-        add_store(dst + i * dst_stride + j + 4, &res_hi_shift,
-                  conv_params->do_average);
-        i++;
-
-        s[0] = s[2];
-        s[1] = s[3];
-        s[2] = s[4];
-        s[3] = s[5];
-        s[4] = s[6];
-        s[5] = s[7];
-      } while (i < h);
-      j += 8;
-    } while (j < w);
-  }
-}
-
-void av1_convolve_x_sse2(const uint8_t *src, int src_stride,
-                         const uint8_t *dst0, int dst_stride0, int w, int h,
-                         InterpFilterParams *filter_params_x,
-                         InterpFilterParams *filter_params_y,
-                         const int subpel_x_q4, const int subpel_y_q4,
-                         ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  const int dst_stride = conv_params->dst_stride;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint8_t *src_ptr = src - fo_horiz;
-  const int bits = FILTER_BITS - conv_params->round_1;
-  const __m128i left_shift = _mm_cvtsi32_si128(bits);
-  const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_0) >> 1);
-  const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-  __m128i coeffs[4];
-
-  (void)filter_params_y;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  assert(bits >= 0);
-
-  prepare_coeffs(filter_params_x, subpel_x_q4, coeffs);
-
-  if (w == 4) {
-    do {
-      const __m128i data = _mm_loadu_si128((__m128i *)src_ptr);
-      __m128i s[4];
-
-      s[0] = _mm_unpacklo_epi8(data, _mm_srli_si128(data, 1));
-      s[1] =
-          _mm_unpacklo_epi8(_mm_srli_si128(data, 2), _mm_srli_si128(data, 3));
-      s[2] =
-          _mm_unpacklo_epi8(_mm_srli_si128(data, 4), _mm_srli_si128(data, 5));
-      s[3] =
-          _mm_unpacklo_epi8(_mm_srli_si128(data, 6), _mm_srli_si128(data, 7));
-      const __m128i res_lo = convolve_lo_x(s, coeffs);
-      const __m128i res_lo_round =
-          _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-      const __m128i res_lo_shift = _mm_sll_epi32(res_lo_round, left_shift);
-
-      // Accumulate values into the destination buffer
-      add_store(dst, &res_lo_shift, conv_params->do_average);
-      src_ptr += src_stride;
-      dst += dst_stride;
-    } while (--h);
-  } else {
-    assert(!(w % 8));
-    int i = 0;
-    do {
-      int j = 0;
-      do {
-        const __m128i data =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-        __m128i s[4];
-
-        // Filter even-index pixels
-        s[0] = data;
-        s[1] = _mm_srli_si128(data, 2);
-        s[2] = _mm_srli_si128(data, 4);
-        s[3] = _mm_srli_si128(data, 6);
-        const __m128i res_even = convolve_lo_x(s, coeffs);
-
-        // Filter odd-index pixels
-        s[0] = _mm_srli_si128(data, 1);
-        s[1] = _mm_srli_si128(data, 3);
-        s[2] = _mm_srli_si128(data, 5);
-        s[3] = _mm_srli_si128(data, 7);
-        const __m128i res_odd = convolve_lo_x(s, coeffs);
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-        const __m128i res_lo_round =
-            _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-        const __m128i res_hi_round =
-            _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-        const __m128i res_lo_shift = _mm_sll_epi32(res_lo_round, left_shift);
-        const __m128i res_hi_shift = _mm_sll_epi32(res_hi_round, left_shift);
-
-        // Accumulate values into the destination buffer
-        add_store(dst + i * dst_stride + j + 0, &res_lo_shift,
-                  conv_params->do_average);
-        add_store(dst + i * dst_stride + j + 4, &res_hi_shift,
-                  conv_params->do_average);
-        j += 8;
-      } while (j < w);
-    } while (++i < h);
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride,
                             const uint8_t *dst, int dst_stride, int w, int h,
                             InterpFilterParams *filter_params_x,

diff --git a/av1/common/x86/highbd_convolve_2d_avx2.c b/av1/common/x86/highbd_convolve_2d_avx2.c
index bb2ccdd..63c1cf1 100644
--- a/av1/common/x86/highbd_convolve_2d_avx2.c
+++ b/av1/common/x86/highbd_convolve_2d_avx2.c

@@ -186,239 +186,6 @@
   }
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_highbd_convolve_2d_avx2(const uint16_t *src, int src_stride,
-                                 uint16_t *dst0, int dst_stride0, int w, int h,
-                                 InterpFilterParams *filter_params_x,
-                                 InterpFilterParams *filter_params_y,
-                                 const int subpel_x_q4, const int subpel_y_q4,
-                                 ConvolveParams *conv_params, int bd) {
-  DECLARE_ALIGNED(32, int16_t,
-                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = MAX_SB_SIZE;
-  int i, j;
-  const int do_average = conv_params->do_average;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-  (void)dst0;
-  (void)dst_stride0;
-  // Check that, even with 12-bit input, the intermediate values will fit
-  // into an unsigned 16-bit intermediate array.
-  assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
-
-  /* Horizontal filter */
-  {
-    const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-
-    const __m128i coeffs_x8 = _mm_loadu_si128((__m128i *)x_filter);
-    // since not all compilers yet support _mm256_set_m128i()
-    const __m256i coeffs_x = _mm256_insertf128_si256(
-        _mm256_castsi128_si256(coeffs_x8), coeffs_x8, 1);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m256i tmp_0 = _mm256_unpacklo_epi32(coeffs_x, coeffs_x);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m256i tmp_1 = _mm256_unpackhi_epi32(coeffs_x, coeffs_x);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m256i coeff_01 = _mm256_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m256i coeff_23 = _mm256_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m256i coeff_45 = _mm256_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m256i coeff_67 = _mm256_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m256i round_const = _mm256_set1_epi32(
-        ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
-    for (i = 0; i < im_h; ++i) {
-      for (j = 0; j < w; j += 16) {
-        const __m256i data =
-            _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
-        const __m128i data2_1 =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 16]);
-        const __m256i data2 = _mm256_insertf128_si256(
-            _mm256_castsi128_si256(data2_1), data2_1, 1);
-
-        // Filter even-index pixels
-        const __m256i res_0 = _mm256_madd_epi16(data, coeff_01);
-        const __m256i res_2 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 4),
-            coeff_23);
-        const __m256i res_4 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 8),
-            coeff_45);
-        const __m256i res_6 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 12),
-            coeff_67);
-
-        __m256i res_even = _mm256_add_epi32(_mm256_add_epi32(res_0, res_4),
-                                            _mm256_add_epi32(res_2, res_6));
-        res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const),
-                                    round_shift);
-
-        // Filter odd-index pixels
-        const __m256i res_1 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 2),
-            coeff_01);
-        const __m256i res_3 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 6),
-            coeff_23);
-        const __m256i res_5 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 10),
-            coeff_45);
-        const __m256i res_7 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 14),
-            coeff_67);
-
-        __m256i res_odd = _mm256_add_epi32(_mm256_add_epi32(res_1, res_5),
-                                           _mm256_add_epi32(res_3, res_7));
-        res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const),
-                                   round_shift);
-
-        __m256i res = _mm256_packs_epi32(res_even, res_odd);
-        _mm256_storeu_si256((__m256i *)&im_block[i * im_stride + j], res);
-      }
-    }
-  }
-
-  /* Vertical filter */
-  {
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-
-    const __m128i coeffs_y8 = _mm_loadu_si128((__m128i *)y_filter);
-    const __m256i coeffs_y = _mm256_insertf128_si256(
-        _mm256_castsi128_si256(coeffs_y8), coeffs_y8, 1);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m256i tmp_0 = _mm256_unpacklo_epi32(coeffs_y, coeffs_y);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m256i tmp_1 = _mm256_unpackhi_epi32(coeffs_y, coeffs_y);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m256i coeff_01 = _mm256_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m256i coeff_23 = _mm256_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m256i coeff_45 = _mm256_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m256i coeff_67 = _mm256_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m256i round_const = _mm256_set1_epi32(
-        ((1 << conv_params->round_1) >> 1) -
-        (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 16) {
-        // Filter even-index pixels
-        const int16_t *data = &im_block[i * im_stride + j];
-        const __m256i src_0 =
-            _mm256_unpacklo_epi16(*(__m256i *)(data + 0 * im_stride),
-                                  *(__m256i *)(data + 1 * im_stride));
-        const __m256i src_2 =
-            _mm256_unpacklo_epi16(*(__m256i *)(data + 2 * im_stride),
-                                  *(__m256i *)(data + 3 * im_stride));
-        const __m256i src_4 =
-            _mm256_unpacklo_epi16(*(__m256i *)(data + 4 * im_stride),
-                                  *(__m256i *)(data + 5 * im_stride));
-        const __m256i src_6 =
-            _mm256_unpacklo_epi16(*(__m256i *)(data + 6 * im_stride),
-                                  *(__m256i *)(data + 7 * im_stride));
-
-        const __m256i res_0 = _mm256_madd_epi16(src_0, coeff_01);
-        const __m256i res_2 = _mm256_madd_epi16(src_2, coeff_23);
-        const __m256i res_4 = _mm256_madd_epi16(src_4, coeff_45);
-        const __m256i res_6 = _mm256_madd_epi16(src_6, coeff_67);
-
-        const __m256i res_even = _mm256_add_epi32(
-            _mm256_add_epi32(res_0, res_2), _mm256_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m256i src_1 =
-            _mm256_unpackhi_epi16(*(__m256i *)(data + 0 * im_stride),
-                                  *(__m256i *)(data + 1 * im_stride));
-        const __m256i src_3 =
-            _mm256_unpackhi_epi16(*(__m256i *)(data + 2 * im_stride),
-                                  *(__m256i *)(data + 3 * im_stride));
-        const __m256i src_5 =
-            _mm256_unpackhi_epi16(*(__m256i *)(data + 4 * im_stride),
-                                  *(__m256i *)(data + 5 * im_stride));
-        const __m256i src_7 =
-            _mm256_unpackhi_epi16(*(__m256i *)(data + 6 * im_stride),
-                                  *(__m256i *)(data + 7 * im_stride));
-
-        const __m256i res_1 = _mm256_madd_epi16(src_1, coeff_01);
-        const __m256i res_3 = _mm256_madd_epi16(src_3, coeff_23);
-        const __m256i res_5 = _mm256_madd_epi16(src_5, coeff_45);
-        const __m256i res_7 = _mm256_madd_epi16(src_7, coeff_67);
-
-        const __m256i res_odd = _mm256_add_epi32(
-            _mm256_add_epi32(res_1, res_3), _mm256_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m256i res_lo = _mm256_unpacklo_epi32(res_even, res_odd);
-        const __m256i res_hi = _mm256_unpackhi_epi32(res_even, res_odd);
-
-        const __m256i res_lo_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_lo, round_const), round_shift);
-        const __m256i res_hi_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_hi, round_const), round_shift);
-
-        // Accumulate values into the destination buffer
-        __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        if (do_average) {
-          _mm_storeu_si128(
-              p + 0, _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 0),
-                                                  _mm256_extractf128_si256(
-                                                      res_lo_round, 0)),
-                                    1));
-          _mm_storeu_si128(
-              p + 1, _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 1),
-                                                  _mm256_extractf128_si256(
-                                                      res_hi_round, 0)),
-                                    1));
-          if (w - j > 8) {
-            _mm_storeu_si128(
-                p + 2, _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 2),
-                                                    _mm256_extractf128_si256(
-                                                        res_lo_round, 1)),
-                                      1));
-            _mm_storeu_si128(
-                p + 3, _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p + 3),
-                                                    _mm256_extractf128_si256(
-                                                        res_hi_round, 1)),
-                                      1));
-          }
-        } else {
-          _mm_storeu_si128(p + 0, _mm256_extractf128_si256(res_lo_round, 0));
-          _mm_storeu_si128(p + 1, _mm256_extractf128_si256(res_hi_round, 0));
-          if (w - j > 8) {
-            _mm_storeu_si128(p + 2, _mm256_extractf128_si256(res_lo_round, 1));
-            _mm_storeu_si128(p + 3, _mm256_extractf128_si256(res_hi_round, 1));
-          }
-        }
-      }
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 static INLINE void copy_64(const uint16_t *src, uint16_t *dst) {
   __m256i s[4];
   s[0] = _mm256_loadu_si256((__m256i *)(src + 0 * 16));

diff --git a/av1/common/x86/highbd_convolve_2d_sse4.c b/av1/common/x86/highbd_convolve_2d_sse4.c
index 047f214..428c2c7 100644
--- a/av1/common/x86/highbd_convolve_2d_sse4.c
+++ b/av1/common/x86/highbd_convolve_2d_sse4.c

@@ -21,7 +21,6 @@
 #include "aom_dsp/x86/convolve_sse4_1.h"
 #include "av1/common/convolve.h"
 
-#if CONFIG_LOWPRECISION_BLEND
 void av1_highbd_jnt_convolve_2d_copy_sse4_1(
     const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
     int h, InterpFilterParams *filter_params_x,
@@ -419,224 +418,3 @@
     }
   }
 }
-#else
-void av1_highbd_jnt_convolve_2d_sse4_1(
-    const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
-    int h, InterpFilterParams *filter_params_x,
-    InterpFilterParams *filter_params_y, const int subpel_x_q4,
-    const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
-  DECLARE_ALIGNED(16, int16_t,
-                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = MAX_SB_SIZE;
-  int i, j;
-  const int do_average = conv_params->do_average;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m128i wt0 = _mm_set1_epi32(w0);
-  const __m128i wt1 = _mm_set1_epi32(w1);
-
-  (void)dst0;
-  (void)dst_stride0;
-
-  // Check that, even with 12-bit input, the intermediate values will fit
-  // into an unsigned 16-bit intermediate array.
-  assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
-
-  /* Horizontal filter */
-  {
-    const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-    const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const = _mm_set1_epi32(
-        ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
-    for (i = 0; i < im_h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i data =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-        const __m128i data2 =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]);
-
-        // Filter even-index pixels
-        const __m128i res_0 = _mm_madd_epi16(data, coeff_01);
-        const __m128i res_2 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23);
-        const __m128i res_4 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45);
-        const __m128i res_6 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67);
-
-        __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
-                                         _mm_add_epi32(res_2, res_6));
-        res_even =
-            _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
-        // Filter odd-index pixels
-        const __m128i res_1 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01);
-        const __m128i res_3 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23);
-        const __m128i res_5 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45);
-        const __m128i res_7 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67);
-
-        __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
-                                        _mm_add_epi32(res_3, res_7));
-        res_odd =
-            _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
-        // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
-        __m128i res = _mm_packs_epi32(res_even, res_odd);
-        _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res);
-      }
-    }
-  }
-
-  /* Vertical filter */
-  {
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-    const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const = _mm_set1_epi32(
-        ((1 << conv_params->round_1) >> 1) -
-        (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        // Filter even-index pixels
-        const int16_t *data = &im_block[i * im_stride + j];
-        const __m128i src_0 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
-                               *(__m128i *)(data + 1 * im_stride));
-        const __m128i src_2 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
-                               *(__m128i *)(data + 3 * im_stride));
-        const __m128i src_4 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
-                               *(__m128i *)(data + 5 * im_stride));
-        const __m128i src_6 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
-                               *(__m128i *)(data + 7 * im_stride));
-
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
-                                               _mm_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m128i src_1 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
-                               *(__m128i *)(data + 1 * im_stride));
-        const __m128i src_3 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
-                               *(__m128i *)(data + 3 * im_stride));
-        const __m128i src_5 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
-                               *(__m128i *)(data + 5 * im_stride));
-        const __m128i src_7 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
-                               *(__m128i *)(data + 7 * im_stride));
-
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
-                                              _mm_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        const __m128i res_lo_round =
-            _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-        const __m128i res_hi_round =
-            _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
-        // Accumulate values into the destination buffer
-        __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        if (conv_params->use_jnt_comp_avg) {
-          if (do_average) {
-            const __m128i tmp_lo = _mm_loadu_si128(p + 0);
-            const __m128i tmp_hi = _mm_loadu_si128(p + 1);
-            const __m128i jnt_sum_lo =
-                _mm_add_epi32(_mm_mullo_epi32(tmp_lo, wt0),
-                              _mm_mullo_epi32(res_lo_round, wt1));
-            const __m128i jnt_sum_hi =
-                _mm_add_epi32(_mm_mullo_epi32(tmp_hi, wt0),
-                              _mm_mullo_epi32(res_hi_round, wt1));
-            const __m128i final_lo =
-                _mm_srai_epi32(jnt_sum_lo, DIST_PRECISION_BITS);
-            const __m128i final_hi =
-                _mm_srai_epi32(jnt_sum_hi, DIST_PRECISION_BITS);
-
-            _mm_storeu_si128(p + 0, final_lo);
-            _mm_storeu_si128(p + 1, final_hi);
-          } else {
-            _mm_storeu_si128(p + 0, res_lo_round);
-            _mm_storeu_si128(p + 1, res_hi_round);
-          }
-        } else {
-          if (do_average) {
-            _mm_storeu_si128(
-                p + 0,
-                _mm_srai_epi32(
-                    _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round), 1));
-            _mm_storeu_si128(
-                p + 1,
-                _mm_srai_epi32(
-                    _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round), 1));
-          } else {
-            _mm_storeu_si128(p + 0, res_lo_round);
-            _mm_storeu_si128(p + 1, res_hi_round);
-          }
-        }
-      }
-    }
-  }
-}
-#endif

diff --git a/av1/common/x86/highbd_convolve_2d_ssse3.c b/av1/common/x86/highbd_convolve_2d_ssse3.c
index 125e464..d04fe84 100644
--- a/av1/common/x86/highbd_convolve_2d_ssse3.c
+++ b/av1/common/x86/highbd_convolve_2d_ssse3.c

@@ -19,199 +19,6 @@
 #include "aom_dsp/x86/convolve_sse2.h"
 #include "av1/common/convolve.h"
 
-#if !CONFIG_LOWPRECISION_BLEND
-void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
-                                  uint16_t *dst0, int dst_stride0, int w, int h,
-                                  InterpFilterParams *filter_params_x,
-                                  InterpFilterParams *filter_params_y,
-                                  const int subpel_x_q4, const int subpel_y_q4,
-                                  ConvolveParams *conv_params, int bd) {
-  DECLARE_ALIGNED(16, int16_t,
-                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = MAX_SB_SIZE;
-  int i, j;
-  const int do_average = conv_params->do_average;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-  (void)dst0;
-  (void)dst_stride0;
-  // Check that, even with 12-bit input, the intermediate values will fit
-  // into an unsigned 16-bit intermediate array.
-  assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
-
-  /* Horizontal filter */
-  {
-    const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-    const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const = _mm_set1_epi32(
-        ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
-    for (i = 0; i < im_h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i data =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-        const __m128i data2 =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]);
-
-        // Filter even-index pixels
-        const __m128i res_0 = _mm_madd_epi16(data, coeff_01);
-        const __m128i res_2 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23);
-        const __m128i res_4 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45);
-        const __m128i res_6 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67);
-
-        __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
-                                         _mm_add_epi32(res_2, res_6));
-        res_even =
-            _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
-        // Filter odd-index pixels
-        const __m128i res_1 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01);
-        const __m128i res_3 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23);
-        const __m128i res_5 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45);
-        const __m128i res_7 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67);
-
-        __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
-                                        _mm_add_epi32(res_3, res_7));
-        res_odd =
-            _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
-        // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
-        __m128i res = _mm_packs_epi32(res_even, res_odd);
-        _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res);
-      }
-    }
-  }
-
-  /* Vertical filter */
-  {
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-    const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const = _mm_set1_epi32(
-        ((1 << conv_params->round_1) >> 1) -
-        (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        // Filter even-index pixels
-        const int16_t *data = &im_block[i * im_stride + j];
-        const __m128i src_0 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
-                               *(__m128i *)(data + 1 * im_stride));
-        const __m128i src_2 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
-                               *(__m128i *)(data + 3 * im_stride));
-        const __m128i src_4 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
-                               *(__m128i *)(data + 5 * im_stride));
-        const __m128i src_6 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
-                               *(__m128i *)(data + 7 * im_stride));
-
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
-                                               _mm_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m128i src_1 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
-                               *(__m128i *)(data + 1 * im_stride));
-        const __m128i src_3 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
-                               *(__m128i *)(data + 3 * im_stride));
-        const __m128i src_5 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
-                               *(__m128i *)(data + 5 * im_stride));
-        const __m128i src_7 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
-                               *(__m128i *)(data + 7 * im_stride));
-
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
-                                              _mm_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        const __m128i res_lo_round =
-            _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-        const __m128i res_hi_round =
-            _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
-        // Accumulate values into the destination buffer
-        __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        if (do_average) {
-          _mm_storeu_si128(
-              p + 0,
-              _mm_srai_epi32(
-                  _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round), 1));
-          _mm_storeu_si128(
-              p + 1,
-              _mm_srai_epi32(
-                  _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round), 1));
-        } else {
-          _mm_storeu_si128(p + 0, res_lo_round);
-          _mm_storeu_si128(p + 1, res_hi_round);
-        }
-      }
-    }
-  }
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride,
                                      uint16_t *dst, int dst_stride, int w,
                                      int h, InterpFilterParams *filter_params_x,

diff --git a/av1/common/x86/highbd_jnt_convolve_avx2.c b/av1/common/x86/highbd_jnt_convolve_avx2.c
index ff1f2f8..581060d 100644
--- a/av1/common/x86/highbd_jnt_convolve_avx2.c
+++ b/av1/common/x86/highbd_jnt_convolve_avx2.c

@@ -22,7 +22,6 @@
 #include "aom_dsp/aom_filter.h"
 #include "av1/common/convolve.h"
 
-#if CONFIG_LOWPRECISION_BLEND
 void av1_highbd_jnt_convolve_2d_copy_avx2(
     const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
     int h, InterpFilterParams *filter_params_x,
@@ -851,521 +850,3 @@
     }
   }
 }
-#else
-void av1_highbd_jnt_convolve_2d_copy_avx2(
-    const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
-    int h, InterpFilterParams *filter_params_x,
-    InterpFilterParams *filter_params_y, const int subpel_x_q4,
-    const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  (void)filter_params_x;
-  (void)filter_params_y;
-  (void)subpel_x_q4;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-  (void)bd;
-
-  const int bits =
-      FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
-  const __m128i left_shift = _mm_cvtsi32_si128(bits);
-  const int do_average = conv_params->do_average;
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m256i wt0 = _mm256_set1_epi32(w0);
-  const __m256i wt1 = _mm256_set1_epi32(w1);
-  int i, j;
-
-  assert(bits <= 4);
-
-  if (!(w % 16)) {
-    for (i = 0; i < h; i += 1) {
-      for (j = 0; j < w; j += 16) {
-        const __m256i src_16bit =
-            _mm256_loadu_si256((__m256i *)(&src[i * src_stride + j]));
-
-        const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
-        const __m256i res_lo =
-            _mm256_cvtepu16_epi32(_mm256_castsi256_si128(res));
-        const __m256i res_hi =
-            _mm256_cvtepu16_epi32(_mm256_extracti128_si256(res, 1));
-
-        if (conv_params->use_jnt_comp_avg) {
-          mult_add_store_aligned_256(&dst[i * dst_stride + j], &res_lo, &wt0,
-                                     &wt1, do_average);
-          mult_add_store_aligned_256(&dst[i * dst_stride + j + 8], &res_hi,
-                                     &wt0, &wt1, do_average);
-        } else {
-          add_store_aligned_256(&dst[i * dst_stride + j], &res_lo, do_average);
-          add_store_aligned_256(&dst[i * dst_stride + j + 8], &res_hi,
-                                do_average);
-        }
-      }
-    }
-  } else if (!(w % 4)) {
-    for (i = 0; i < h; i += 2) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i src_row_0 =
-            _mm_loadu_si128((__m128i *)(&src[i * src_stride + j]));
-        const __m128i src_row_1 =
-            _mm_loadu_si128((__m128i *)(&src[i * src_stride + j + src_stride]));
-        // since not all compilers yet support _mm256_set_m128i()
-        const __m256i src_10 = _mm256_insertf128_si256(
-            _mm256_castsi128_si256(src_row_0), src_row_1, 1);
-
-        const __m256i res = _mm256_sll_epi16(src_10, left_shift);
-
-        const __m256i res_lo =
-            _mm256_cvtepu16_epi32(_mm256_castsi256_si128(res));
-        const __m256i res_hi =
-            _mm256_cvtepu16_epi32(_mm256_extracti128_si256(res, 1));
-
-        if (conv_params->use_jnt_comp_avg) {
-          mult_add_store_aligned_256(&dst[i * dst_stride + j], &res_lo, &wt0,
-                                     &wt1, do_average);
-          mult_add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                     &res_hi, &wt0, &wt1, do_average);
-        } else {
-          add_store_aligned_256(&dst[i * dst_stride + j], &res_lo, do_average);
-          add_store_aligned_256(&dst[i * dst_stride + j + dst_stride], &res_hi,
-                                do_average);
-        }
-      }
-    }
-  }
-}
-
-void av1_highbd_jnt_convolve_2d_avx2(
-    const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst0, int dst_stride0,
-    int w, int h, InterpFilterParams *filter_params_x,
-    InterpFilterParams *filter_params_y, const int subpel_x_q4,
-    const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
-  DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = 8;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-  (void)dst0;
-  (void)dst_stride0;
-
-  // Check that, even with 12-bit input, the intermediate values will fit
-  // into an unsigned 16-bit intermediate array.
-  assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
-
-  __m256i s[8], coeffs_y[4], coeffs_x[4];
-  const int do_average = conv_params->do_average;
-
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m256i wt0 = _mm256_set1_epi32(w0);
-  const __m256i wt1 = _mm256_set1_epi32(w1);
-  const __m128i wt0_128 = _mm256_castsi256_si128(wt0);
-  const __m128i wt1_128 = _mm256_castsi256_si128(wt1);
-
-  const __m256i round_const_x = _mm256_set1_epi32(
-      ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
-  const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
-
-  const __m256i round_const_y = _mm256_set1_epi32(
-      ((1 << conv_params->round_1) >> 1) -
-      (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
-  const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
-
-  prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
-  prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
-  for (j = 0; j < w; j += 8) {
-    /* Horizontal filter */
-    {
-      for (i = 0; i < im_h; i += 2) {
-        const __m256i row0 =
-            _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
-        __m256i row1 = _mm256_set1_epi16(0);
-        if (i + 1 < im_h)
-          row1 =
-              _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
-
-        const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20);
-        const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31);
-
-        // even pixels
-        s[0] = _mm256_alignr_epi8(r1, r0, 0);
-        s[1] = _mm256_alignr_epi8(r1, r0, 4);
-        s[2] = _mm256_alignr_epi8(r1, r0, 8);
-        s[3] = _mm256_alignr_epi8(r1, r0, 12);
-
-        __m256i res_even = convolve(s, coeffs_x);
-        res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x),
-                                    round_shift_x);
-
-        // odd pixels
-        s[0] = _mm256_alignr_epi8(r1, r0, 2);
-        s[1] = _mm256_alignr_epi8(r1, r0, 6);
-        s[2] = _mm256_alignr_epi8(r1, r0, 10);
-        s[3] = _mm256_alignr_epi8(r1, r0, 14);
-
-        __m256i res_odd = convolve(s, coeffs_x);
-        res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x),
-                                   round_shift_x);
-
-        __m256i res_even1 = _mm256_packs_epi32(res_even, res_even);
-        __m256i res_odd1 = _mm256_packs_epi32(res_odd, res_odd);
-        __m256i res = _mm256_unpacklo_epi16(res_even1, res_odd1);
-
-        _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
-      }
-    }
-
-    /* Vertical filter */
-    {
-      __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
-      __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
-      __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
-      __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
-      __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
-      __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
-
-      s[0] = _mm256_unpacklo_epi16(s0, s1);
-      s[1] = _mm256_unpacklo_epi16(s2, s3);
-      s[2] = _mm256_unpacklo_epi16(s4, s5);
-
-      s[4] = _mm256_unpackhi_epi16(s0, s1);
-      s[5] = _mm256_unpackhi_epi16(s2, s3);
-      s[6] = _mm256_unpackhi_epi16(s4, s5);
-
-      for (i = 0; i < h; i += 2) {
-        const int16_t *data = &im_block[i * im_stride];
-
-        const __m256i s6 =
-            _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
-        const __m256i s7 =
-            _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
-
-        s[3] = _mm256_unpacklo_epi16(s6, s7);
-        s[7] = _mm256_unpackhi_epi16(s6, s7);
-
-        const __m256i res_a = convolve(s, coeffs_y);
-
-        const __m256i res_a_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_a, round_const_y), round_shift_y);
-
-        if (w - j > 4) {
-          const __m256i res_b = convolve(s + 4, coeffs_y);
-          const __m256i res_b_round = _mm256_sra_epi32(
-              _mm256_add_epi32(res_b, round_const_y), round_shift_y);
-          const __m256i res_ax =
-              _mm256_permute2x128_si256(res_a_round, res_b_round, 0x20);
-          const __m256i res_bx =
-              _mm256_permute2x128_si256(res_a_round, res_b_round, 0x31);
-
-          if (conv_params->use_jnt_comp_avg) {
-            mult_add_store_aligned_256(&dst[i * dst_stride + j], &res_ax, &wt0,
-                                       &wt1, do_average);
-            mult_add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                       &res_bx, &wt0, &wt1, do_average);
-          } else {
-            add_store_aligned_256(&dst[i * dst_stride + j], &res_ax,
-                                  do_average);
-            add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                  &res_bx, do_average);
-          }
-        } else {
-          const __m128i res_ax = _mm256_castsi256_si128(res_a_round);
-          const __m128i res_bx = _mm256_extracti128_si256(res_a_round, 1);
-
-          if (conv_params->use_jnt_comp_avg) {
-            mult_add_store(&dst[i * dst_stride + j], &res_ax, &wt0_128,
-                           &wt1_128, do_average);
-            mult_add_store(&dst[i * dst_stride + j + dst_stride], &res_bx,
-                           &wt0_128, &wt1_128, do_average);
-          } else {
-            add_store(&dst[i * dst_stride + j], &res_ax, do_average);
-            add_store(&dst[i * dst_stride + j + dst_stride], &res_bx,
-                      do_average);
-          }
-        }
-
-        s[0] = s[1];
-        s[1] = s[2];
-        s[2] = s[3];
-
-        s[4] = s[5];
-        s[5] = s[6];
-        s[6] = s[7];
-      }
-    }
-  }
-}
-
-void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride,
-                                    uint16_t *dst0, int dst_stride0, int w,
-                                    int h, InterpFilterParams *filter_params_x,
-                                    InterpFilterParams *filter_params_y,
-                                    const int subpel_x_q4,
-                                    const int subpel_y_q4,
-                                    ConvolveParams *conv_params, int bd) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint16_t *const src_ptr = src - fo_horiz;
-  const int bits = FILTER_BITS - conv_params->round_1;
-  (void)filter_params_y;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-  (void)bd;
-
-  int i, j;
-  __m256i s[4], coeffs_x[4];
-
-  const int do_average = conv_params->do_average;
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m256i wt0 = _mm256_set1_epi32(w0);
-  const __m256i wt1 = _mm256_set1_epi32(w1);
-  const __m128i wt0_128 = _mm256_castsi256_si128(wt0);
-  const __m128i wt1_128 = _mm256_castsi256_si128(wt1);
-
-  const __m256i round_const_x =
-      _mm256_set1_epi32(((1 << conv_params->round_0) >> 1));
-  const __m128i round_shift_x = _mm_cvtsi32_si128(conv_params->round_0);
-  const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
-
-  assert(bits >= 0);
-  prepare_coeffs(filter_params_x, subpel_x_q4, coeffs_x);
-
-  for (j = 0; j < w; j += 8) {
-    /* Horizontal filter */
-    for (i = 0; i < h; i += 2) {
-      const __m256i row0 =
-          _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
-      __m256i row1 =
-          _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
-
-      const __m256i r0 = _mm256_permute2x128_si256(row0, row1, 0x20);
-      const __m256i r1 = _mm256_permute2x128_si256(row0, row1, 0x31);
-
-      // even pixels
-      s[0] = _mm256_alignr_epi8(r1, r0, 0);
-      s[1] = _mm256_alignr_epi8(r1, r0, 4);
-      s[2] = _mm256_alignr_epi8(r1, r0, 8);
-      s[3] = _mm256_alignr_epi8(r1, r0, 12);
-
-      __m256i res_even = convolve(s, coeffs_x);
-      res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const_x),
-                                  round_shift_x);
-
-      // odd pixels
-      s[0] = _mm256_alignr_epi8(r1, r0, 2);
-      s[1] = _mm256_alignr_epi8(r1, r0, 6);
-      s[2] = _mm256_alignr_epi8(r1, r0, 10);
-      s[3] = _mm256_alignr_epi8(r1, r0, 14);
-
-      __m256i res_odd = convolve(s, coeffs_x);
-      res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const_x),
-                                 round_shift_x);
-
-      res_even = _mm256_sll_epi32(res_even, round_shift_bits);
-      res_odd = _mm256_sll_epi32(res_odd, round_shift_bits);
-
-      __m256i res1 = _mm256_unpacklo_epi32(res_even, res_odd);
-
-      if (w - j > 4) {
-        __m256i res2 = _mm256_unpackhi_epi32(res_even, res_odd);
-
-        const __m256i res_ax = _mm256_permute2x128_si256(res1, res2, 0x20);
-        const __m256i res_bx = _mm256_permute2x128_si256(res1, res2, 0x31);
-
-        if (conv_params->use_jnt_comp_avg) {
-          mult_add_store_aligned_256(&dst[i * dst_stride + j], &res_ax, &wt0,
-                                     &wt1, do_average);
-          mult_add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                     &res_bx, &wt0, &wt1, do_average);
-        } else {
-          add_store_aligned_256(&dst[i * dst_stride + j], &res_ax, do_average);
-          add_store_aligned_256(&dst[i * dst_stride + j + dst_stride], &res_bx,
-                                do_average);
-        }
-      } else {
-        const __m128i res_ax = _mm256_castsi256_si128(res1);
-        const __m128i res_bx = _mm256_extracti128_si256(res1, 1);
-
-        if (conv_params->use_jnt_comp_avg) {
-          mult_add_store(&dst[i * dst_stride + j], &res_ax, &wt0_128, &wt1_128,
-                         do_average);
-          mult_add_store(&dst[i * dst_stride + j + dst_stride], &res_bx,
-                         &wt0_128, &wt1_128, do_average);
-        } else {
-          add_store(&dst[i * dst_stride + j], &res_ax, do_average);
-          add_store(&dst[i * dst_stride + j + dst_stride], &res_bx, do_average);
-        }
-      }
-    }
-  }
-}
-
-void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride,
-                                    uint16_t *dst0, int dst_stride0, int w,
-                                    int h, InterpFilterParams *filter_params_x,
-                                    InterpFilterParams *filter_params_y,
-                                    const int subpel_x_q4,
-                                    const int subpel_y_q4,
-                                    ConvolveParams *conv_params, int bd) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const uint16_t *const src_ptr = src - fo_vert * src_stride;
-  const int bits = FILTER_BITS - conv_params->round_0;
-  (void)filter_params_x;
-  (void)subpel_x_q4;
-  (void)dst0;
-  (void)dst_stride0;
-  (void)bd;
-
-  assert(bits >= 0);
-  int i, j;
-  __m256i s[8], coeffs_y[4];
-  const int do_average = conv_params->do_average;
-
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m256i wt0 = _mm256_set1_epi32(w0);
-  const __m256i wt1 = _mm256_set1_epi32(w1);
-  const __m128i wt0_128 = _mm256_castsi256_si128(wt0);
-  const __m128i wt1_128 = _mm256_castsi256_si128(wt1);
-  const __m256i round_const_y =
-      _mm256_set1_epi32(((1 << conv_params->round_1) >> 1));
-  const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
-  const __m128i round_shift_bits = _mm_cvtsi32_si128(bits);
-
-  prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
-  for (j = 0; j < w; j += 8) {
-    const uint16_t *data = &src_ptr[j];
-    /* Vertical filter */
-    {
-      __m256i src6;
-      __m256i s01 = _mm256_permute2x128_si256(
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 0 * src_stride))),
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
-          0x20);
-      __m256i s12 = _mm256_permute2x128_si256(
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
-          0x20);
-      __m256i s23 = _mm256_permute2x128_si256(
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
-          0x20);
-      __m256i s34 = _mm256_permute2x128_si256(
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
-          0x20);
-      __m256i s45 = _mm256_permute2x128_si256(
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
-          0x20);
-      src6 = _mm256_castsi128_si256(
-          _mm_loadu_si128((__m128i *)(data + 6 * src_stride)));
-      __m256i s56 = _mm256_permute2x128_si256(
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
-          src6, 0x20);
-
-      s[0] = _mm256_unpacklo_epi16(s01, s12);
-      s[1] = _mm256_unpacklo_epi16(s23, s34);
-      s[2] = _mm256_unpacklo_epi16(s45, s56);
-
-      s[4] = _mm256_unpackhi_epi16(s01, s12);
-      s[5] = _mm256_unpackhi_epi16(s23, s34);
-      s[6] = _mm256_unpackhi_epi16(s45, s56);
-
-      for (i = 0; i < h; i += 2) {
-        data = &src_ptr[i * src_stride + j];
-
-        const __m256i s67 = _mm256_permute2x128_si256(
-            src6,
-            _mm256_castsi128_si256(
-                _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
-            0x20);
-
-        src6 = _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 8 * src_stride)));
-
-        const __m256i s78 = _mm256_permute2x128_si256(
-            _mm256_castsi128_si256(
-                _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
-            src6, 0x20);
-
-        s[3] = _mm256_unpacklo_epi16(s67, s78);
-        s[7] = _mm256_unpackhi_epi16(s67, s78);
-
-        const __m256i res_a = convolve(s, coeffs_y);
-
-        __m256i res_a_round = _mm256_sll_epi32(res_a, round_shift_bits);
-        res_a_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_a_round, round_const_y), round_shift_y);
-
-        if (w - j > 4) {
-          const __m256i res_b = convolve(s + 4, coeffs_y);
-          __m256i res_b_round = _mm256_sll_epi32(res_b, round_shift_bits);
-          res_b_round = _mm256_sra_epi32(
-              _mm256_add_epi32(res_b_round, round_const_y), round_shift_y);
-
-          const __m256i res_ax =
-              _mm256_permute2x128_si256(res_a_round, res_b_round, 0x20);
-          const __m256i res_bx =
-              _mm256_permute2x128_si256(res_a_round, res_b_round, 0x31);
-          if (conv_params->use_jnt_comp_avg) {
-            mult_add_store_aligned_256(&dst[i * dst_stride + j], &res_ax, &wt0,
-                                       &wt1, do_average);
-            mult_add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                       &res_bx, &wt0, &wt1, do_average);
-          } else {
-            add_store_aligned_256(&dst[i * dst_stride + j], &res_ax,
-                                  do_average);
-            add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                  &res_bx, do_average);
-          }
-        } else {
-          const __m128i res_ax = _mm256_castsi256_si128(res_a_round);
-          const __m128i res_bx = _mm256_extracti128_si256(res_a_round, 1);
-
-          if (conv_params->use_jnt_comp_avg) {
-            mult_add_store(&dst[i * dst_stride + j], &res_ax, &wt0_128,
-                           &wt1_128, do_average);
-            mult_add_store(&dst[i * dst_stride + j + dst_stride], &res_bx,
-                           &wt0_128, &wt1_128, do_average);
-          } else {
-            add_store(&dst[i * dst_stride + j], &res_ax, do_average);
-            add_store(&dst[i * dst_stride + j + dst_stride], &res_bx,
-                      do_average);
-          }
-        }
-        s[0] = s[1];
-        s[1] = s[2];
-        s[2] = s[3];
-
-        s[4] = s[5];
-        s[5] = s[6];
-        s[6] = s[7];
-      }
-    }
-  }
-}
-#endif

diff --git a/av1/common/x86/highbd_jnt_convolve_sse4.c b/av1/common/x86/highbd_jnt_convolve_sse4.c
index 680bebd..ba092a2 100644
--- a/av1/common/x86/highbd_jnt_convolve_sse4.c
+++ b/av1/common/x86/highbd_jnt_convolve_sse4.c

@@ -16,7 +16,6 @@
 #include "aom_dsp/x86/convolve_sse2.h"
 #include "aom_dsp/x86/convolve_sse4_1.h"
 
-#if CONFIG_LOWPRECISION_BLEND
 void av1_highbd_jnt_convolve_y_sse4_1(
     const uint16_t *src, int src_stride, uint16_t *dst0, int dst_stride0, int w,
     int h, InterpFilterParams *filter_params_x,
@@ -381,4 +380,3 @@
     }
   }
 }
-#endif

diff --git a/av1/common/x86/highbd_warp_plane_sse4.c b/av1/common/x86/highbd_warp_plane_sse4.c
index 5df6f7d..9599433 100644
--- a/av1/common/x86/highbd_warp_plane_sse4.c
+++ b/av1/common/x86/highbd_warp_plane_sse4.c

@@ -21,7 +21,6 @@
                                    int subsampling_x, int subsampling_y, int bd,
                                    ConvolveParams *conv_params, int16_t alpha,
                                    int16_t beta, int16_t gamma, int16_t delta) {
-#if CONFIG_LOWPRECISION_BLEND
   int comp_avg = conv_params->do_average;
   __m128i tmp[15];
   int i, j, k;
@@ -412,359 +411,4 @@
       }
     }
   }
-#else   // CONFIG_LOWPRECISION_BLEND
-  int comp_avg = conv_params->do_average;
-  __m128i tmp[15];
-  int i, j, k;
-  const int reduce_bits_horiz =
-      conv_params->round_0 +
-      AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
-  const int reduce_bits_vert = conv_params->is_compound
-                                   ? conv_params->round_1
-                                   : 2 * FILTER_BITS - reduce_bits_horiz;
-  const int offset_bits_horiz = bd + FILTER_BITS - 1;
-  assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
-  assert(!(bd == 12 && reduce_bits_horiz < 5));
-
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m128i wt0 = _mm_set1_epi32(w0);
-  const __m128i wt1 = _mm_set1_epi32(w1);
-
-  /* Note: For this code to work, the left/right frame borders need to be
-     extended by at least 13 pixels each. By the time we get here, other
-     code will have set up this border, but we allow an explicit check
-     for debugging purposes.
-  */
-  /*for (i = 0; i < height; ++i) {
-    for (j = 0; j < 13; ++j) {
-      assert(ref[i * stride - 13 + j] == ref[i * stride]);
-      assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
-    }
-  }*/
-
-  for (i = 0; i < p_height; i += 8) {
-    for (j = 0; j < p_width; j += 8) {
-      const int32_t src_x = (p_col + j + 4) << subsampling_x;
-      const int32_t src_y = (p_row + i + 4) << subsampling_y;
-      const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
-      const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
-      const int32_t x4 = dst_x >> subsampling_x;
-      const int32_t y4 = dst_y >> subsampling_y;
-
-      int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
-      int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-      int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
-      int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
-      // Add in all the constant terms, including rounding and offset
-      sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
-             (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
-      sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
-             (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
-
-      sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-      sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-
-      // Horizontal filter
-      // If the block is aligned such that, after clamping, every sample
-      // would be taken from the leftmost/rightmost column, then we can
-      // skip the expensive horizontal filter.
-      if (ix4 <= -7) {
-        for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
-          int iy = iy4 + k;
-          if (iy < 0)
-            iy = 0;
-          else if (iy > height - 1)
-            iy = height - 1;
-          tmp[k + 7] = _mm_set1_epi16(
-              (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
-              ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
-        }
-      } else if (ix4 >= width + 6) {
-        for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
-          int iy = iy4 + k;
-          if (iy < 0)
-            iy = 0;
-          else if (iy > height - 1)
-            iy = height - 1;
-          tmp[k + 7] =
-              _mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
-                             ref[iy * stride + (width - 1)] *
-                                 (1 << (FILTER_BITS - reduce_bits_horiz)));
-        }
-      } else {
-        for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
-          int iy = iy4 + k;
-          if (iy < 0)
-            iy = 0;
-          else if (iy > height - 1)
-            iy = height - 1;
-          int sx = sx4 + beta * (k + 4);
-
-          // Load source pixels
-          const __m128i src =
-              _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
-          const __m128i src2 =
-              _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
-
-          // Filter even-index pixels
-          const __m128i tmp_0 = _mm_loadu_si128(
-              (__m128i *)(warped_filter +
-                          ((sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS)));
-          const __m128i tmp_2 = _mm_loadu_si128(
-              (__m128i *)(warped_filter +
-                          ((sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS)));
-          const __m128i tmp_4 = _mm_loadu_si128(
-              (__m128i *)(warped_filter +
-                          ((sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS)));
-          const __m128i tmp_6 = _mm_loadu_si128(
-              (__m128i *)(warped_filter +
-                          ((sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS)));
-
-          // coeffs 0 1 0 1 2 3 2 3 for pixels 0, 2
-          const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
-          // coeffs 0 1 0 1 2 3 2 3 for pixels 4, 6
-          const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
-          // coeffs 4 5 4 5 6 7 6 7 for pixels 0, 2
-          const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
-          // coeffs 4 5 4 5 6 7 6 7 for pixels 4, 6
-          const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
-
-          // coeffs 0 1 0 1 0 1 0 1 for pixels 0, 2, 4, 6
-          const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10);
-          // coeffs 2 3 2 3 2 3 2 3 for pixels 0, 2, 4, 6
-          const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10);
-          // coeffs 4 5 4 5 4 5 4 5 for pixels 0, 2, 4, 6
-          const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14);
-          // coeffs 6 7 6 7 6 7 6 7 for pixels 0, 2, 4, 6
-          const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
-
-          const __m128i round_const = _mm_set1_epi32(
-              (1 << offset_bits_horiz) + ((1 << reduce_bits_horiz) >> 1));
-
-          // Calculate filtered results
-          const __m128i res_0 = _mm_madd_epi16(src, coeff_0);
-          const __m128i res_2 =
-              _mm_madd_epi16(_mm_alignr_epi8(src2, src, 4), coeff_2);
-          const __m128i res_4 =
-              _mm_madd_epi16(_mm_alignr_epi8(src2, src, 8), coeff_4);
-          const __m128i res_6 =
-              _mm_madd_epi16(_mm_alignr_epi8(src2, src, 12), coeff_6);
-
-          __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
-                                           _mm_add_epi32(res_2, res_6));
-          res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const),
-                                   _mm_cvtsi32_si128(reduce_bits_horiz));
-
-          // Filter odd-index pixels
-          const __m128i tmp_1 = _mm_loadu_si128(
-              (__m128i *)(warped_filter +
-                          ((sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS)));
-          const __m128i tmp_3 = _mm_loadu_si128(
-              (__m128i *)(warped_filter +
-                          ((sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS)));
-          const __m128i tmp_5 = _mm_loadu_si128(
-              (__m128i *)(warped_filter +
-                          ((sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS)));
-          const __m128i tmp_7 = _mm_loadu_si128(
-              (__m128i *)(warped_filter +
-                          ((sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS)));
-
-          const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
-          const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
-          const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
-          const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
-
-          const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11);
-          const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11);
-          const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15);
-          const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15);
-
-          const __m128i res_1 =
-              _mm_madd_epi16(_mm_alignr_epi8(src2, src, 2), coeff_1);
-          const __m128i res_3 =
-              _mm_madd_epi16(_mm_alignr_epi8(src2, src, 6), coeff_3);
-          const __m128i res_5 =
-              _mm_madd_epi16(_mm_alignr_epi8(src2, src, 10), coeff_5);
-          const __m128i res_7 =
-              _mm_madd_epi16(_mm_alignr_epi8(src2, src, 14), coeff_7);
-
-          __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
-                                          _mm_add_epi32(res_3, res_7));
-          res_odd = _mm_sra_epi32(_mm_add_epi32(res_odd, round_const),
-                                  _mm_cvtsi32_si128(reduce_bits_horiz));
-
-          // Combine results into one register.
-          // We store the columns in the order 0, 2, 4, 6, 1, 3, 5, 7
-          // as this order helps with the vertical filter.
-          tmp[k + 7] = _mm_packs_epi32(res_even, res_odd);
-        }
-      }
-
-      // Vertical filter
-      for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
-        int sy = sy4 + delta * (k + 4);
-
-        // Load from tmp and rearrange pairs of consecutive rows into the
-        // column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7
-        const __m128i *src = tmp + (k + 4);
-        const __m128i src_0 = _mm_unpacklo_epi16(src[0], src[1]);
-        const __m128i src_2 = _mm_unpacklo_epi16(src[2], src[3]);
-        const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]);
-        const __m128i src_6 = _mm_unpacklo_epi16(src[6], src[7]);
-
-        // Filter even-index pixels
-        const __m128i tmp_0 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_2 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_4 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_6 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
-        const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
-        const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
-        const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
-        const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
-
-        const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10);
-        const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10);
-        const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14);
-        const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
-
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_0);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_2);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_4);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_6);
-
-        const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
-                                               _mm_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m128i src_1 = _mm_unpackhi_epi16(src[0], src[1]);
-        const __m128i src_3 = _mm_unpackhi_epi16(src[2], src[3]);
-        const __m128i src_5 = _mm_unpackhi_epi16(src[4], src[5]);
-        const __m128i src_7 = _mm_unpackhi_epi16(src[6], src[7]);
-
-        const __m128i tmp_1 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_3 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_5 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_7 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
-        const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
-        const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
-        const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
-        const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
-
-        const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11);
-        const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11);
-        const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15);
-        const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15);
-
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_1);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_3);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_5);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_7);
-
-        const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
-                                              _mm_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        if (conv_params->is_compound) {
-          __m128i *const p =
-              (__m128i *)&conv_params
-                  ->dst[(i + k + 4) * conv_params->dst_stride + j];
-          const __m128i round_const = _mm_set1_epi32(
-              -(1 << (bd + 2 * FILTER_BITS - reduce_bits_horiz - 1)) +
-              ((1 << (reduce_bits_vert)) >> 1));
-          res_lo = _mm_add_epi32(res_lo, round_const);
-          res_lo = _mm_sra_epi32(res_lo, _mm_cvtsi32_si128(reduce_bits_vert));
-          if (conv_params->use_jnt_comp_avg) {
-            if (comp_avg) {
-              const __m128i sum =
-                  _mm_add_epi32(_mm_mullo_epi32(_mm_loadu_si128(p), wt0),
-                                _mm_mullo_epi32(res_lo, wt1));
-              res_lo = _mm_srai_epi32(sum, DIST_PRECISION_BITS);
-            }
-          } else {
-            if (comp_avg)
-              res_lo =
-                  _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p), res_lo), 1);
-          }
-
-          _mm_storeu_si128(p, res_lo);
-
-          if (p_width > 4) {
-            res_hi = _mm_add_epi32(res_hi, round_const);
-            res_hi = _mm_sra_epi32(res_hi, _mm_cvtsi32_si128(reduce_bits_vert));
-
-            if (conv_params->use_jnt_comp_avg) {
-              if (comp_avg) {
-                const __m128i sum =
-                    _mm_add_epi32(_mm_mullo_epi32(_mm_loadu_si128(p + 1), wt0),
-                                  _mm_mullo_epi32(res_hi, wt1));
-                res_hi = _mm_srai_epi32(sum, DIST_PRECISION_BITS);
-              }
-            } else {
-              if (comp_avg)
-                res_hi = _mm_srai_epi32(
-                    _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi), 1);
-            }
-
-            _mm_storeu_si128(p + 1, res_hi);
-          }
-        } else {
-          // Round and pack into 8 bits
-          const __m128i round_const =
-              _mm_set1_epi32(-(1 << (bd + reduce_bits_vert - 1)) +
-                             ((1 << reduce_bits_vert) >> 1));
-
-          const __m128i res_lo_round = _mm_srai_epi32(
-              _mm_add_epi32(res_lo, round_const), reduce_bits_vert);
-          const __m128i res_hi_round = _mm_srai_epi32(
-              _mm_add_epi32(res_hi, round_const), reduce_bits_vert);
-
-          __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
-          // Clamp res_16bit to the range [0, 2^bd - 1]
-          const __m128i max_val = _mm_set1_epi16((1 << bd) - 1);
-          const __m128i zero = _mm_setzero_si128();
-          res_16bit = _mm_max_epi16(_mm_min_epi16(res_16bit, max_val), zero);
-
-          // Store, blending with 'pred' if needed
-          __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
-
-          // Note: If we're outputting a 4x4 block, we need to be very careful
-          // to only output 4 pixels at this point, to avoid encode/decode
-          // mismatches when encoding with multiple threads.
-          if (p_width == 4) {
-            if (comp_avg)
-              res_16bit = _mm_avg_epu16(res_16bit, _mm_loadl_epi64(p));
-            _mm_storel_epi64(p, res_16bit);
-          } else {
-            if (comp_avg)
-              res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p));
-            _mm_storeu_si128(p, res_16bit);
-          }
-        }
-      }
-    }
-  }
-#endif  // CONFIG_LOWPRECISION_BLEND
 }

diff --git a/av1/common/x86/jnt_convolve_avx2.c b/av1/common/x86/jnt_convolve_avx2.c
index aaf8c7e..610b057 100644
--- a/av1/common/x86/jnt_convolve_avx2.c
+++ b/av1/common/x86/jnt_convolve_avx2.c

@@ -20,7 +20,6 @@
 #include "aom_dsp/aom_filter.h"
 #include "av1/common/convolve.h"
 
-#if CONFIG_LOWPRECISION_BLEND
 void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
                              int dst_stride0, int w, int h,
                              InterpFilterParams *filter_params_x,
@@ -702,487 +701,3 @@
     }
   }
 }
-#else
-void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride,
-                             const uint8_t *dst0, int dst_stride0, int w, int h,
-                             InterpFilterParams *filter_params_x,
-                             InterpFilterParams *filter_params_y,
-                             const int subpel_x_q4, const int subpel_y_q4,
-                             ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  int i, j;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint8_t *const src_ptr = src - fo_horiz;
-  const int bits = FILTER_BITS - conv_params->round_1;
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m256i wt0 = _mm256_set1_epi32(w0);
-  const __m256i wt1 = _mm256_set1_epi32(w1);
-  const int do_average = conv_params->do_average;
-  __m256i filt[4], coeffs[4];
-
-  assert(bits >= 0);
-  assert(conv_params->round_0 > 0);
-
-  filt[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2);
-  filt[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2);
-  filt[2] = _mm256_load_si256((__m256i const *)filt3_global_avx2);
-  filt[3] = _mm256_load_si256((__m256i const *)filt4_global_avx2);
-
-  prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs);
-
-  const __m256i round_const =
-      _mm256_set1_epi16((1 << (conv_params->round_0 - 1)) >> 1);
-  const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0 - 1);
-
-  (void)filter_params_y;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  for (i = 0; i < h; i += 2) {
-    for (j = 0; j < w; j += 8) {
-      const __m256i data = _mm256_permute2x128_si256(
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(&src_ptr[i * src_stride + j]))),
-          _mm256_castsi128_si256(_mm_loadu_si128(
-              (__m128i *)(&src_ptr[i * src_stride + j + src_stride]))),
-          0x20);
-
-      __m256i res = convolve_lowbd_x(data, coeffs, filt);
-
-      res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const), round_shift);
-
-      const __m256i res_lo_round =
-          _mm256_cvtepi16_epi32(_mm256_castsi256_si128(res));
-      const __m256i res_hi_round =
-          _mm256_cvtepi16_epi32(_mm256_extracti128_si256(res, 1));
-
-      const __m256i res_lo_shift = _mm256_slli_epi32(res_lo_round, bits);
-      const __m256i res_hi_shift = _mm256_slli_epi32(res_hi_round, bits);
-
-      // Accumulate values into the destination buffer
-      if (conv_params->use_jnt_comp_avg) {
-        mult_add_store_aligned_256(&dst[i * dst_stride + j], &res_lo_shift,
-                                   &wt0, &wt1, do_average);
-        mult_add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                   &res_hi_shift, &wt0, &wt1, do_average);
-      } else {
-        add_store_aligned_256(&dst[i * dst_stride + j], &res_lo_shift,
-                              do_average);
-        add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                              &res_hi_shift, do_average);
-      }
-    }
-  }
-}
-
-void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride,
-                             const uint8_t *dst0, int dst_stride0, int w, int h,
-                             InterpFilterParams *filter_params_x,
-                             InterpFilterParams *filter_params_y,
-                             const int subpel_x_q4, const int subpel_y_q4,
-                             ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const uint8_t *const src_ptr = src - fo_vert * src_stride;
-  // +1 to compensate for dividing the filter coeffs by 2
-  const int left_shift = FILTER_BITS - conv_params->round_0 + 1;
-  const __m256i round_const =
-      _mm256_set1_epi32((1 << conv_params->round_1) >> 1);
-  const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m256i wt0 = _mm256_set1_epi32(w0);
-  const __m256i wt1 = _mm256_set1_epi32(w1);
-  const int do_average = conv_params->do_average;
-  __m256i coeffs[4], s[8];
-
-  assert((FILTER_BITS - conv_params->round_0) >= 0);
-
-  prepare_coeffs_lowbd(filter_params_y, subpel_y_q4, coeffs);
-
-  (void)conv_params;
-  (void)filter_params_x;
-  (void)subpel_x_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  for (j = 0; j < w; j += 16) {
-    const uint8_t *data = &src_ptr[j];
-    __m256i src6;
-
-    // Load lines a and b. Line a to lower 128, line b to upper 128
-    const __m256i src_01a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 0 * src_stride))),
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
-        0x20);
-
-    const __m256i src_12a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 1 * src_stride))),
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
-        0x20);
-
-    const __m256i src_23a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 2 * src_stride))),
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
-        0x20);
-
-    const __m256i src_34a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 3 * src_stride))),
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
-        0x20);
-
-    const __m256i src_45a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 4 * src_stride))),
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
-        0x20);
-
-    src6 = _mm256_castsi128_si256(
-        _mm_loadu_si128((__m128i *)(data + 6 * src_stride)));
-    const __m256i src_56a = _mm256_permute2x128_si256(
-        _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)(data + 5 * src_stride))),
-        src6, 0x20);
-
-    s[0] = _mm256_unpacklo_epi8(src_01a, src_12a);
-    s[1] = _mm256_unpacklo_epi8(src_23a, src_34a);
-    s[2] = _mm256_unpacklo_epi8(src_45a, src_56a);
-
-    s[4] = _mm256_unpackhi_epi8(src_01a, src_12a);
-    s[5] = _mm256_unpackhi_epi8(src_23a, src_34a);
-    s[6] = _mm256_unpackhi_epi8(src_45a, src_56a);
-
-    for (i = 0; i < h; i += 2) {
-      data = &src_ptr[i * src_stride + j];
-      const __m256i src_67a = _mm256_permute2x128_si256(
-          src6,
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
-          0x20);
-
-      src6 = _mm256_castsi128_si256(
-          _mm_loadu_si128((__m128i *)(data + 8 * src_stride)));
-      const __m256i src_78a = _mm256_permute2x128_si256(
-          _mm256_castsi128_si256(
-              _mm_loadu_si128((__m128i *)(data + 7 * src_stride))),
-          src6, 0x20);
-
-      s[3] = _mm256_unpacklo_epi8(src_67a, src_78a);
-      s[7] = _mm256_unpackhi_epi8(src_67a, src_78a);
-
-      const __m256i res_lo = convolve_lowbd(s, coeffs);
-
-      const __m256i res_lo_0_32b =
-          _mm256_cvtepi16_epi32(_mm256_castsi256_si128(res_lo));
-      const __m256i res_lo_0_shift =
-          _mm256_slli_epi32(res_lo_0_32b, left_shift);
-      const __m256i res_lo_0_round = _mm256_sra_epi32(
-          _mm256_add_epi32(res_lo_0_shift, round_const), round_shift);
-
-      const __m256i res_lo_1_32b =
-          _mm256_cvtepi16_epi32(_mm256_extracti128_si256(res_lo, 1));
-      const __m256i res_lo_1_shift =
-          _mm256_slli_epi32(res_lo_1_32b, left_shift);
-      const __m256i res_lo_1_round = _mm256_sra_epi32(
-          _mm256_add_epi32(res_lo_1_shift, round_const), round_shift);
-
-      // Accumulate values into the destination buffer
-      if (conv_params->use_jnt_comp_avg) {
-        mult_add_store_aligned_256(&dst[i * dst_stride + j], &res_lo_0_round,
-                                   &wt0, &wt1, do_average);
-        mult_add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                   &res_lo_1_round, &wt0, &wt1, do_average);
-      } else {
-        add_store_aligned_256(&dst[i * dst_stride + j], &res_lo_0_round,
-                              do_average);
-        add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                              &res_lo_1_round, do_average);
-      }
-
-      if (w - j > 8) {
-        const __m256i res_hi = convolve_lowbd(s + 4, coeffs);
-
-        const __m256i res_hi_0_32b =
-            _mm256_cvtepi16_epi32(_mm256_castsi256_si128(res_hi));
-        const __m256i res_hi_0_shift =
-            _mm256_slli_epi32(res_hi_0_32b, left_shift);
-        const __m256i res_hi_0_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_hi_0_shift, round_const), round_shift);
-
-        const __m256i res_hi_1_32b =
-            _mm256_cvtepi16_epi32(_mm256_extracti128_si256(res_hi, 1));
-        const __m256i res_hi_1_shift =
-            _mm256_slli_epi32(res_hi_1_32b, left_shift);
-        const __m256i res_hi_1_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_hi_1_shift, round_const), round_shift);
-
-        if (conv_params->use_jnt_comp_avg) {
-          mult_add_store_aligned_256(&dst[i * dst_stride + j + 8],
-                                     &res_hi_0_round, &wt0, &wt1, do_average);
-          mult_add_store_aligned_256(&dst[i * dst_stride + j + 8 + dst_stride],
-                                     &res_hi_1_round, &wt0, &wt1, do_average);
-        } else {
-          add_store_aligned_256(&dst[i * dst_stride + j + 8], &res_hi_0_round,
-                                do_average);
-          add_store_aligned_256(&dst[i * dst_stride + j + 8 + dst_stride],
-                                &res_hi_1_round, do_average);
-        }
-      }
-      s[0] = s[1];
-      s[1] = s[2];
-      s[2] = s[3];
-
-      s[4] = s[5];
-      s[5] = s[6];
-      s[6] = s[7];
-    }
-  }
-}
-
-void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst0,
-                              int dst_stride0, int w, int h,
-                              InterpFilterParams *filter_params_x,
-                              InterpFilterParams *filter_params_y,
-                              const int subpel_x_q4, const int subpel_y_q4,
-                              ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int bd = 8;
-  (void)dst0;
-  (void)dst_stride0;
-
-  DECLARE_ALIGNED(32, int16_t, im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * 8]);
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = 8;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m256i wt0 = _mm256_set1_epi32(w0);
-  const __m256i wt1 = _mm256_set1_epi32(w1);
-  const int do_average = conv_params->do_average;
-  const __m128i wt0_128 = _mm256_castsi256_si128(wt0);
-  const __m128i wt1_128 = _mm256_castsi256_si128(wt1);
-  __m256i filt[4], s[8], coeffs_x[4], coeffs_y[4];
-
-  assert(conv_params->round_0 > 0);
-
-  filt[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2);
-  filt[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2);
-  filt[2] = _mm256_load_si256((__m256i const *)filt3_global_avx2);
-  filt[3] = _mm256_load_si256((__m256i const *)filt4_global_avx2);
-
-  prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs_x);
-  prepare_coeffs(filter_params_y, subpel_y_q4, coeffs_y);
-
-  const __m256i round_const_h = _mm256_set1_epi16(
-      ((1 << (conv_params->round_0 - 1)) >> 1) + (1 << (bd + FILTER_BITS - 2)));
-  const __m128i round_shift_h = _mm_cvtsi32_si128(conv_params->round_0 - 1);
-
-  const __m256i round_const_v = _mm256_set1_epi32(
-      ((1 << conv_params->round_1) >> 1) -
-      (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
-  const __m128i round_shift_v = _mm_cvtsi32_si128(conv_params->round_1);
-
-  for (j = 0; j < w; j += 8) {
-    /* Horizontal filter */
-    {
-      for (i = 0; i < im_h; i += 2) {
-        __m256i data = _mm256_castsi128_si256(
-            _mm_loadu_si128((__m128i *)&src_ptr[(i * src_stride) + j]));
-        if (i + 1 < im_h)
-          data = _mm256_inserti128_si256(
-              data,
-              _mm_loadu_si128(
-                  (__m128i *)&src_ptr[(i * src_stride) + j + src_stride]),
-              1);
-        __m256i res = convolve_lowbd_x(data, coeffs_x, filt);
-
-        res = _mm256_sra_epi16(_mm256_add_epi16(res, round_const_h),
-                               round_shift_h);
-
-        _mm256_store_si256((__m256i *)&im_block[i * im_stride], res);
-      }
-    }
-
-    /* Vertical filter */
-    {
-      __m256i s0 = _mm256_loadu_si256((__m256i *)(im_block + 0 * im_stride));
-      __m256i s1 = _mm256_loadu_si256((__m256i *)(im_block + 1 * im_stride));
-      __m256i s2 = _mm256_loadu_si256((__m256i *)(im_block + 2 * im_stride));
-      __m256i s3 = _mm256_loadu_si256((__m256i *)(im_block + 3 * im_stride));
-      __m256i s4 = _mm256_loadu_si256((__m256i *)(im_block + 4 * im_stride));
-      __m256i s5 = _mm256_loadu_si256((__m256i *)(im_block + 5 * im_stride));
-
-      s[0] = _mm256_unpacklo_epi16(s0, s1);
-      s[1] = _mm256_unpacklo_epi16(s2, s3);
-      s[2] = _mm256_unpacklo_epi16(s4, s5);
-
-      s[4] = _mm256_unpackhi_epi16(s0, s1);
-      s[5] = _mm256_unpackhi_epi16(s2, s3);
-      s[6] = _mm256_unpackhi_epi16(s4, s5);
-
-      for (i = 0; i < h; i += 2) {
-        const int16_t *data = &im_block[i * im_stride];
-
-        const __m256i s6 =
-            _mm256_loadu_si256((__m256i *)(data + 6 * im_stride));
-        const __m256i s7 =
-            _mm256_loadu_si256((__m256i *)(data + 7 * im_stride));
-
-        s[3] = _mm256_unpacklo_epi16(s6, s7);
-        s[7] = _mm256_unpackhi_epi16(s6, s7);
-
-        const __m256i res_a = convolve(s, coeffs_y);
-        const __m256i res_a_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_a, round_const_v), round_shift_v);
-
-        if (w - j > 4) {
-          const __m256i res_b = convolve(s + 4, coeffs_y);
-          const __m256i res_b_round = _mm256_sra_epi32(
-              _mm256_add_epi32(res_b, round_const_v), round_shift_v);
-          const __m256i res_ax =
-              _mm256_permute2x128_si256(res_a_round, res_b_round, 0x20);
-          const __m256i res_bx =
-              _mm256_permute2x128_si256(res_a_round, res_b_round, 0x31);
-
-          if (conv_params->use_jnt_comp_avg) {
-            mult_add_store_aligned_256(&dst[i * dst_stride + j], &res_ax, &wt0,
-                                       &wt1, do_average);
-            mult_add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                       &res_bx, &wt0, &wt1, do_average);
-          } else {
-            add_store_aligned_256(&dst[i * dst_stride + j], &res_ax,
-                                  do_average);
-            add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                  &res_bx, do_average);
-          }
-        } else {
-          const __m128i res_ax = _mm256_castsi256_si128(res_a_round);
-          const __m128i res_bx = _mm256_extracti128_si256(res_a_round, 1);
-
-          if (conv_params->use_jnt_comp_avg) {
-            mult_add_store(&dst[i * dst_stride + j], &res_ax, &wt0_128,
-                           &wt1_128, do_average);
-            mult_add_store(&dst[i * dst_stride + j + dst_stride], &res_bx,
-                           &wt0_128, &wt1_128, do_average);
-          } else {
-            add_store(&dst[i * dst_stride + j], &res_ax, do_average);
-            add_store(&dst[i * dst_stride + j + dst_stride], &res_bx,
-                      do_average);
-          }
-        }
-
-        s[0] = s[1];
-        s[1] = s[2];
-        s[2] = s[3];
-
-        s[4] = s[5];
-        s[5] = s[6];
-        s[6] = s[7];
-      }
-    }
-  }
-}
-
-void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride,
-                                   uint8_t *dst0, int dst_stride0, int w, int h,
-                                   InterpFilterParams *filter_params_x,
-                                   InterpFilterParams *filter_params_y,
-                                   const int subpel_x_q4, const int subpel_y_q4,
-                                   ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  (void)filter_params_x;
-  (void)filter_params_y;
-  (void)subpel_x_q4;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  const int bits =
-      FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
-  const __m128i left_shift = _mm_cvtsi32_si128(bits);
-  const int do_average = conv_params->do_average;
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m256i wt0 = _mm256_set1_epi32(w0);
-  const __m256i wt1 = _mm256_set1_epi32(w1);
-  const __m256i zero = _mm256_setzero_si256();
-  int i, j;
-
-  if (!(w % 16)) {
-    for (i = 0; i < h; i += 1) {
-      for (j = 0; j < w; j += 16) {
-        const __m256i src_16bit = _mm256_cvtepu8_epi16(
-            _mm_loadu_si128((__m128i *)(&src[i * src_stride + j])));
-
-        const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
-        const __m256i res_lo =
-            _mm256_cvtepu16_epi32(_mm256_castsi256_si128(res));
-        const __m256i res_hi =
-            _mm256_cvtepu16_epi32(_mm256_extracti128_si256(res, 1));
-
-        if (conv_params->use_jnt_comp_avg) {
-          mult_add_store_aligned_256(&dst[i * dst_stride + j], &res_lo, &wt0,
-                                     &wt1, do_average);
-          mult_add_store_aligned_256(&dst[i * dst_stride + j + 8], &res_hi,
-                                     &wt0, &wt1, do_average);
-        } else {
-          add_store_aligned_256(&dst[i * dst_stride + j], &res_lo, do_average);
-          add_store_aligned_256(&dst[i * dst_stride + j + 8], &res_hi,
-                                do_average);
-        }
-      }
-    }
-  } else if (!(w % 4)) {
-    for (i = 0; i < h; i += 2) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i src_row_0 =
-            _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j]));
-        const __m128i src_row_1 =
-            _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j + src_stride]));
-        // since not all compilers yet support _mm256_set_m128i()
-        const __m256i src_10 = _mm256_insertf128_si256(
-            _mm256_castsi128_si256(src_row_0), src_row_1, 1);
-
-        const __m256i src_16bit = _mm256_unpacklo_epi8(src_10, zero);
-
-        const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
-
-        const __m256i res_lo =
-            _mm256_cvtepu16_epi32(_mm256_castsi256_si128(res));
-        const __m256i res_hi =
-            _mm256_cvtepu16_epi32(_mm256_extracti128_si256(res, 1));
-
-        if (conv_params->use_jnt_comp_avg) {
-          mult_add_store_aligned_256(&dst[i * dst_stride + j], &res_lo, &wt0,
-                                     &wt1, do_average);
-          mult_add_store_aligned_256(&dst[i * dst_stride + j + dst_stride],
-                                     &res_hi, &wt0, &wt1, do_average);
-        } else {
-          add_store_aligned_256(&dst[i * dst_stride + j], &res_lo, do_average);
-          add_store_aligned_256(&dst[i * dst_stride + j + dst_stride], &res_hi,
-                                do_average);
-        }
-      }
-    }
-  }
-}
-#endif

diff --git a/av1/common/x86/jnt_convolve_sse2.c b/av1/common/x86/jnt_convolve_sse2.c
index 166ede1..5c9b2ae 100644
--- a/av1/common/x86/jnt_convolve_sse2.c
+++ b/av1/common/x86/jnt_convolve_sse2.c

@@ -15,7 +15,6 @@
 #include "aom_dsp/aom_filter.h"
 #include "aom_dsp/x86/convolve_sse2.h"
 
-#if CONFIG_LOWPRECISION_BLEND
 void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst0,
                              int dst_stride0, int w, int h,
                              InterpFilterParams *filter_params_x,
@@ -383,4 +382,3 @@
     } while (j < w);
   }
 }
-#endif

diff --git a/av1/common/x86/jnt_convolve_sse4.c b/av1/common/x86/jnt_convolve_sse4.c
deleted file mode 100644
index fde9677..0000000
--- a/av1/common/x86/jnt_convolve_sse4.c
+++ /dev/null

@@ -1,576 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-#include <smmintrin.h>
-
-#include "./aom_dsp_rtcd.h"
-#include "aom_dsp/aom_convolve.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/x86/convolve_common_intrin.h"
-#include "aom_dsp/x86/convolve_sse4_1.h"
-#include "av1/common/convolve.h"
-
-#if !CONFIG_LOWPRECISION_BLEND
-static INLINE void prepare_coeffs(const InterpFilterParams *const filter_params,
-                                  const int subpel_q4,
-                                  __m128i *const coeffs /* [4] */) {
-  const int16_t *const y_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params, subpel_q4 & SUBPEL_MASK);
-  const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-  // coeffs 0 1 0 1 2 3 2 3
-  const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
-  // coeffs 4 5 4 5 6 7 6 7
-  const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
-  coeffs[0] = _mm_unpacklo_epi64(tmp_0, tmp_0);  // coeffs 0 1 0 1 0 1 0 1
-  coeffs[1] = _mm_unpackhi_epi64(tmp_0, tmp_0);  // coeffs 2 3 2 3 2 3 2 3
-  coeffs[2] = _mm_unpacklo_epi64(tmp_1, tmp_1);  // coeffs 4 5 4 5 4 5 4 5
-  coeffs[3] = _mm_unpackhi_epi64(tmp_1, tmp_1);  // coeffs 6 7 6 7 6 7 6 7
-}
-
-static INLINE __m128i convolve(const __m128i *const s,
-                               const __m128i *const coeffs) {
-  const __m128i d0 = _mm_madd_epi16(s[0], coeffs[0]);
-  const __m128i d1 = _mm_madd_epi16(s[1], coeffs[1]);
-  const __m128i d2 = _mm_madd_epi16(s[2], coeffs[2]);
-  const __m128i d3 = _mm_madd_epi16(s[3], coeffs[3]);
-  const __m128i d = _mm_add_epi32(_mm_add_epi32(d0, d1), _mm_add_epi32(d2, d3));
-  return d;
-}
-
-static INLINE __m128i convolve_lo_x(const __m128i *const s,
-                                    const __m128i *const coeffs) {
-  __m128i ss[4];
-  ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128());
-  ss[1] = _mm_unpacklo_epi8(s[1], _mm_setzero_si128());
-  ss[2] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128());
-  ss[3] = _mm_unpacklo_epi8(s[3], _mm_setzero_si128());
-  return convolve(ss, coeffs);
-}
-
-static INLINE __m128i convolve_lo_y(const __m128i *const s,
-                                    const __m128i *const coeffs) {
-  __m128i ss[4];
-  ss[0] = _mm_unpacklo_epi8(s[0], _mm_setzero_si128());
-  ss[1] = _mm_unpacklo_epi8(s[2], _mm_setzero_si128());
-  ss[2] = _mm_unpacklo_epi8(s[4], _mm_setzero_si128());
-  ss[3] = _mm_unpacklo_epi8(s[6], _mm_setzero_si128());
-  return convolve(ss, coeffs);
-}
-
-static INLINE __m128i convolve_hi_y(const __m128i *const s,
-                                    const __m128i *const coeffs) {
-  __m128i ss[4];
-  ss[0] = _mm_unpackhi_epi8(s[0], _mm_setzero_si128());
-  ss[1] = _mm_unpackhi_epi8(s[2], _mm_setzero_si128());
-  ss[2] = _mm_unpackhi_epi8(s[4], _mm_setzero_si128());
-  ss[3] = _mm_unpackhi_epi8(s[6], _mm_setzero_si128());
-  return convolve(ss, coeffs);
-}
-void av1_jnt_convolve_y_sse4_1(const uint8_t *src, int src_stride,
-                               const uint8_t *dst0, int dst_stride0, int w,
-                               int h, InterpFilterParams *filter_params_x,
-                               InterpFilterParams *filter_params_y,
-                               const int subpel_x_q4, const int subpel_y_q4,
-                               ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  const int dst_stride = conv_params->dst_stride;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const uint8_t *src_ptr = src - fo_vert * src_stride;
-  const int bits = FILTER_BITS - conv_params->round_0;
-  const __m128i left_shift = _mm_cvtsi32_si128(bits);
-  const __m128i wt0 = _mm_set1_epi32(conv_params->fwd_offset);
-  const __m128i wt1 = _mm_set1_epi32(conv_params->bck_offset);
-  const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_1) >> 1);
-  const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-  __m128i coeffs[4];
-
-  (void)filter_params_x;
-  (void)subpel_x_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  prepare_coeffs(filter_params_y, subpel_y_q4, coeffs);
-
-  if (w == 4) {
-    __m128i s[8], src6, res, res_shift;
-    src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
-    s[0] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)));
-    s[1] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)));
-    s[2] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)));
-    s[3] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)));
-    s[4] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)));
-    s[5] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6);
-
-    do {
-      s[6] = _mm_unpacklo_epi8(
-          src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)));
-      src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride));
-      s[7] = _mm_unpacklo_epi8(
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6);
-
-      res = convolve_lo_y(s + 0, coeffs);
-      res_shift = _mm_sll_epi32(res, left_shift);
-      res_shift =
-          _mm_sra_epi32(_mm_add_epi32(res_shift, round_const), round_shift);
-      if (conv_params->use_jnt_comp_avg)
-        mult_add_store(dst, &res_shift, &wt0, &wt1, conv_params->do_average);
-      else
-        add_store(dst, &res_shift, conv_params->do_average);
-      src_ptr += src_stride;
-      dst += dst_stride;
-
-      res = convolve_lo_y(s + 1, coeffs);
-      res_shift = _mm_sll_epi32(res, left_shift);
-      res_shift =
-          _mm_sra_epi32(_mm_add_epi32(res_shift, round_const), round_shift);
-      if (conv_params->use_jnt_comp_avg)
-        mult_add_store(dst, &res_shift, &wt0, &wt1, conv_params->do_average);
-      else
-        add_store(dst, &res_shift, conv_params->do_average);
-      src_ptr += src_stride;
-      dst += dst_stride;
-
-      s[0] = s[2];
-      s[1] = s[3];
-      s[2] = s[4];
-      s[3] = s[5];
-      s[4] = s[6];
-      s[5] = s[7];
-      h -= 2;
-    } while (h);
-  } else {
-    assert(!(w % 8));
-    int j = 0;
-    do {
-      __m128i s[8], src6, res_lo, res_hi, res_lo_shift, res_hi_shift;
-      const uint8_t *data = &src_ptr[j];
-
-      src6 = _mm_loadl_epi64((__m128i *)(data + 6 * src_stride));
-      s[0] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 0 * src_stride)),
-          _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)));
-      s[1] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 1 * src_stride)),
-          _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)));
-      s[2] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 2 * src_stride)),
-          _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)));
-      s[3] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 3 * src_stride)),
-          _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)));
-      s[4] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 4 * src_stride)),
-          _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)));
-      s[5] = _mm_unpacklo_epi8(
-          _mm_loadl_epi64((__m128i *)(data + 5 * src_stride)), src6);
-
-      int i = 0;
-      do {
-        data = &src_ptr[i * src_stride + j];
-        s[6] = _mm_unpacklo_epi8(
-            src6, _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)));
-        src6 = _mm_loadl_epi64((__m128i *)(data + 8 * src_stride));
-        s[7] = _mm_unpacklo_epi8(
-            _mm_loadl_epi64((__m128i *)(data + 7 * src_stride)), src6);
-
-        res_lo = convolve_lo_y(s, coeffs);  // Filter low index pixels
-        res_hi = convolve_hi_y(s, coeffs);  // Filter high index pixels
-        res_lo_shift = _mm_sll_epi32(res_lo, left_shift);
-        res_hi_shift = _mm_sll_epi32(res_hi, left_shift);
-        res_lo_shift = _mm_sra_epi32(_mm_add_epi32(res_lo_shift, round_const),
-                                     round_shift);
-        res_hi_shift = _mm_sra_epi32(_mm_add_epi32(res_hi_shift, round_const),
-                                     round_shift);
-        if (conv_params->use_jnt_comp_avg) {
-          mult_add_store(dst + i * dst_stride + j + 0, &res_lo_shift, &wt0,
-                         &wt1, conv_params->do_average);
-          mult_add_store(dst + i * dst_stride + j + 4, &res_hi_shift, &wt0,
-                         &wt1, conv_params->do_average);
-        } else {
-          add_store(dst + i * dst_stride + j + 0, &res_lo_shift,
-                    conv_params->do_average);
-          add_store(dst + i * dst_stride + j + 4, &res_hi_shift,
-                    conv_params->do_average);
-        }
-        i++;
-
-        res_lo = convolve_lo_y(s + 1, coeffs);  // Filter low index pixels
-        res_hi = convolve_hi_y(s + 1, coeffs);  // Filter high index pixels
-        res_lo_shift = _mm_sll_epi32(res_lo, left_shift);
-        res_hi_shift = _mm_sll_epi32(res_hi, left_shift);
-        res_lo_shift = _mm_sra_epi32(_mm_add_epi32(res_lo_shift, round_const),
-                                     round_shift);
-        res_hi_shift = _mm_sra_epi32(_mm_add_epi32(res_hi_shift, round_const),
-                                     round_shift);
-        if (conv_params->use_jnt_comp_avg) {
-          mult_add_store(dst + i * dst_stride + j + 0, &res_lo_shift, &wt0,
-                         &wt1, conv_params->do_average);
-          mult_add_store(dst + i * dst_stride + j + 4, &res_hi_shift, &wt0,
-                         &wt1, conv_params->do_average);
-        } else {
-          add_store(dst + i * dst_stride + j + 0, &res_lo_shift,
-                    conv_params->do_average);
-          add_store(dst + i * dst_stride + j + 4, &res_hi_shift,
-                    conv_params->do_average);
-        }
-        i++;
-
-        s[0] = s[2];
-        s[1] = s[3];
-        s[2] = s[4];
-        s[3] = s[5];
-        s[4] = s[6];
-        s[5] = s[7];
-      } while (i < h);
-      j += 8;
-    } while (j < w);
-  }
-}
-
-void av1_jnt_convolve_x_sse4_1(const uint8_t *src, int src_stride,
-                               const uint8_t *dst0, int dst_stride0, int w,
-                               int h, InterpFilterParams *filter_params_x,
-                               InterpFilterParams *filter_params_y,
-                               const int subpel_x_q4, const int subpel_y_q4,
-                               ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  const int dst_stride = conv_params->dst_stride;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint8_t *src_ptr = src - fo_horiz;
-  const int bits = FILTER_BITS - conv_params->round_1;
-  const __m128i left_shift = _mm_cvtsi32_si128(bits);
-  const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_0) >> 1);
-  const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m128i wt0 = _mm_set1_epi32(w0);
-  const __m128i wt1 = _mm_set1_epi32(w1);
-  __m128i coeffs[4];
-
-  (void)filter_params_y;
-  (void)subpel_y_q4;
-  (void)dst0;
-  (void)dst_stride0;
-
-  prepare_coeffs(filter_params_x, subpel_x_q4, coeffs);
-
-  if (w == 4) {
-    do {
-      const __m128i data = _mm_loadu_si128((__m128i *)src_ptr);
-      __m128i s[4];
-
-      s[0] = _mm_unpacklo_epi8(data, _mm_srli_si128(data, 1));
-      s[1] =
-          _mm_unpacklo_epi8(_mm_srli_si128(data, 2), _mm_srli_si128(data, 3));
-      s[2] =
-          _mm_unpacklo_epi8(_mm_srli_si128(data, 4), _mm_srli_si128(data, 5));
-      s[3] =
-          _mm_unpacklo_epi8(_mm_srli_si128(data, 6), _mm_srli_si128(data, 7));
-      const __m128i res_lo = convolve_lo_x(s, coeffs);
-      const __m128i res_lo_round =
-          _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-      const __m128i res_lo_shift = _mm_sll_epi32(res_lo_round, left_shift);
-
-      // Accumulate values into the destination buffer
-      if (conv_params->use_jnt_comp_avg)
-        mult_add_store(dst, &res_lo_shift, &wt0, &wt1, conv_params->do_average);
-      else
-        add_store(dst, &res_lo_shift, conv_params->do_average);
-      src_ptr += src_stride;
-      dst += dst_stride;
-    } while (--h);
-  } else {
-    assert(!(w % 8));
-    int i = 0;
-    do {
-      int j = 0;
-      do {
-        const __m128i data =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-        __m128i s[4];
-
-        // Filter even-index pixels
-        s[0] = data;
-        s[1] = _mm_srli_si128(data, 2);
-        s[2] = _mm_srli_si128(data, 4);
-        s[3] = _mm_srli_si128(data, 6);
-        const __m128i res_even = convolve_lo_x(s, coeffs);
-
-        // Filter odd-index pixels
-        s[0] = _mm_srli_si128(data, 1);
-        s[1] = _mm_srli_si128(data, 3);
-        s[2] = _mm_srli_si128(data, 5);
-        s[3] = _mm_srli_si128(data, 7);
-        const __m128i res_odd = convolve_lo_x(s, coeffs);
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-        const __m128i res_lo_round =
-            _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-        const __m128i res_hi_round =
-            _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-        const __m128i res_lo_shift = _mm_sll_epi32(res_lo_round, left_shift);
-        const __m128i res_hi_shift = _mm_sll_epi32(res_hi_round, left_shift);
-
-        // Accumulate values into the destination buffer
-        if (conv_params->use_jnt_comp_avg) {
-          mult_add_store(dst + i * dst_stride + j + 0, &res_lo_shift, &wt0,
-                         &wt1, conv_params->do_average);
-          mult_add_store(dst + i * dst_stride + j + 4, &res_hi_shift, &wt0,
-                         &wt1, conv_params->do_average);
-        } else {
-          add_store(dst + i * dst_stride + j + 0, &res_lo_shift,
-                    conv_params->do_average);
-          add_store(dst + i * dst_stride + j + 4, &res_hi_shift,
-                    conv_params->do_average);
-        }
-        j += 8;
-      } while (j < w);
-    } while (++i < h);
-  }
-}
-
-void av1_jnt_convolve_2d_sse4_1(const uint8_t *src, int src_stride,
-                                uint8_t *dst0, int dst_stride0, int w, int h,
-                                InterpFilterParams *filter_params_x,
-                                InterpFilterParams *filter_params_y,
-                                const int subpel_x_q4, const int subpel_y_q4,
-                                ConvolveParams *conv_params) {
-  CONV_BUF_TYPE *dst = conv_params->dst;
-  int dst_stride = conv_params->dst_stride;
-  const int bd = 8;
-  (void)dst0;
-  (void)dst_stride0;
-
-  DECLARE_ALIGNED(16, int16_t,
-                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = MAX_SB_SIZE;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const int do_average = conv_params->do_average;
-  const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
-  const __m128i zero = _mm_setzero_si128();
-
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m128i wt0 = _mm_set_epi32(w0, w0, w0, w0);
-  const __m128i wt1 = _mm_set_epi32(w1, w1, w1, w1);
-
-  /* Horizontal filter */
-  {
-    const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-    const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const = _mm_set1_epi32(
-        ((1 << conv_params->round_0) >> 1) + (1 << (bd + FILTER_BITS - 1)));
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
-    for (i = 0; i < im_h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i data =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-
-        const __m128i src_lo = _mm_unpacklo_epi8(data, zero);
-        const __m128i src_hi = _mm_unpackhi_epi8(data, zero);
-
-        // Filter even-index pixels
-        const __m128i res_0 = _mm_madd_epi16(src_lo, coeff_01);
-        const __m128i src_2 = _mm_alignr_epi8(src_hi, src_lo, 4);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i src_4 = _mm_alignr_epi8(src_hi, src_lo, 8);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i src_6 = _mm_alignr_epi8(src_hi, src_lo, 12);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
-                                         _mm_add_epi32(res_2, res_6));
-        res_even =
-            _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
-        // Filter odd-index pixels
-        const __m128i src_1 = _mm_alignr_epi8(src_hi, src_lo, 2);
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i src_3 = _mm_alignr_epi8(src_hi, src_lo, 6);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i src_5 = _mm_alignr_epi8(src_hi, src_lo, 10);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i src_7 = _mm_alignr_epi8(src_hi, src_lo, 14);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
-                                        _mm_add_epi32(res_3, res_7));
-        res_odd =
-            _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
-        // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
-        __m128i res = _mm_packs_epi32(res_even, res_odd);
-        _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res);
-      }
-    }
-  }
-
-  /* Vertical filter */
-  {
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-    const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const = _mm_set1_epi32(
-        ((1 << conv_params->round_1) >> 1) -
-        (1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)));
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        // Filter even-index pixels
-        const int16_t *data = &im_block[i * im_stride + j];
-        const __m128i src_0 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
-                               *(__m128i *)(data + 1 * im_stride));
-        const __m128i src_2 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
-                               *(__m128i *)(data + 3 * im_stride));
-        const __m128i src_4 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
-                               *(__m128i *)(data + 5 * im_stride));
-        const __m128i src_6 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
-                               *(__m128i *)(data + 7 * im_stride));
-
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
-                                               _mm_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m128i src_1 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
-                               *(__m128i *)(data + 1 * im_stride));
-        const __m128i src_3 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
-                               *(__m128i *)(data + 3 * im_stride));
-        const __m128i src_5 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
-                               *(__m128i *)(data + 5 * im_stride));
-        const __m128i src_7 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
-                               *(__m128i *)(data + 7 * im_stride));
-
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
-                                              _mm_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        const __m128i res_lo_round =
-            _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-        const __m128i res_hi_round =
-            _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
-        if (conv_params->use_jnt_comp_avg) {
-          // FIXME(chengchen): validate this implementation
-          // original c function at: av1/common/convolve.c: av1_convolve_2d_c
-          __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-          if (do_average) {
-            _mm_storeu_si128(
-                p + 0,
-                _mm_srai_epi32(
-                    _mm_add_epi32(_mm_mullo_epi32(_mm_loadu_si128(p + 0), wt0),
-                                  _mm_mullo_epi32(res_lo_round, wt1)),
-                    DIST_PRECISION_BITS));
-            _mm_storeu_si128(
-                p + 1,
-                _mm_srai_epi32(
-                    _mm_add_epi32(_mm_mullo_epi32(_mm_loadu_si128(p + 1), wt0),
-                                  _mm_mullo_epi32(res_hi_round, wt1)),
-                    DIST_PRECISION_BITS));
-          } else {
-            _mm_storeu_si128(p + 0, res_lo_round);
-            _mm_storeu_si128(p + 1, res_hi_round);
-          }
-        } else {
-          // Accumulate values into the destination buffer
-          __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-          if (do_average) {
-            _mm_storeu_si128(
-                p + 0,
-                _mm_srai_epi32(
-                    _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round), 1));
-            _mm_storeu_si128(
-                p + 1,
-                _mm_srai_epi32(
-                    _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round), 1));
-          } else {
-            _mm_storeu_si128(p + 0, res_lo_round);
-            _mm_storeu_si128(p + 1, res_hi_round);
-          }
-        }
-      }
-    }
-  }
-}
-#endif

diff --git a/av1/common/x86/jnt_convolve_ssse3.c b/av1/common/x86/jnt_convolve_ssse3.c
index 7b2b4fb..ae84d91 100644
--- a/av1/common/x86/jnt_convolve_ssse3.c
+++ b/av1/common/x86/jnt_convolve_ssse3.c

@@ -15,7 +15,6 @@
 #include "aom_dsp/aom_filter.h"
 #include "aom_dsp/x86/convolve_sse2.h"
 
-#if CONFIG_LOWPRECISION_BLEND
 void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride,
                                uint8_t *dst0, int dst_stride0, int w, int h,
                                InterpFilterParams *filter_params_x,
@@ -230,4 +229,3 @@
     }
   }
 }
-#endif

diff --git a/av1/common/x86/warp_plane_sse4.c b/av1/common/x86/warp_plane_sse4.c
index 7c279a9..f586b55 100644
--- a/av1/common/x86/warp_plane_sse4.c
+++ b/av1/common/x86/warp_plane_sse4.c

@@ -208,7 +208,6 @@
                             int subsampling_x, int subsampling_y,
                             ConvolveParams *conv_params, int16_t alpha,
                             int16_t beta, int16_t gamma, int16_t delta) {
-#if CONFIG_LOWPRECISION_BLEND
   int comp_avg = conv_params->do_average;
   __m128i tmp[15];
   int i, j, k;
@@ -589,341 +588,4 @@
       }
     }
   }
-#else   // CONFIG_LOWPRECISION_BLEND
-  int comp_avg = conv_params->do_average;
-  __m128i tmp[15];
-  int i, j, k;
-  const int bd = 8;
-  const int reduce_bits_horiz = conv_params->round_0;
-  const int reduce_bits_vert = conv_params->is_compound
-                                   ? conv_params->round_1
-                                   : 2 * FILTER_BITS - reduce_bits_horiz;
-  const int offset_bits_horiz = bd + FILTER_BITS - 1;
-  assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m128i wt0 = _mm_set1_epi32(w0);
-  const __m128i wt1 = _mm_set1_epi32(w1);
-  assert(FILTER_BITS == FILTER_BITS);
-
-  /* Note: For this code to work, the left/right frame borders need to be
-     extended by at least 13 pixels each. By the time we get here, other
-     code will have set up this border, but we allow an explicit check
-     for debugging purposes.
-  */
-  /*for (i = 0; i < height; ++i) {
-    for (j = 0; j < 13; ++j) {
-      assert(ref[i * stride - 13 + j] == ref[i * stride]);
-      assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
-    }
-  }*/
-
-  for (i = 0; i < p_height; i += 8) {
-    for (j = 0; j < p_width; j += 8) {
-      const int32_t src_x = (p_col + j + 4) << subsampling_x;
-      const int32_t src_y = (p_row + i + 4) << subsampling_y;
-      const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
-      const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
-      const int32_t x4 = dst_x >> subsampling_x;
-      const int32_t y4 = dst_y >> subsampling_y;
-
-      int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
-      int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-      int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
-      int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
-      // Add in all the constant terms, including rounding and offset
-      sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
-             (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
-      sy4 += gamma * (-4) + delta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
-             (WARPEDPIXEL_PREC_SHIFTS << WARPEDDIFF_PREC_BITS);
-
-      sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-      sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-
-      // Horizontal filter
-      // If the block is aligned such that, after clamping, every sample
-      // would be taken from the leftmost/rightmost column, then we can
-      // skip the expensive horizontal filter.
-      if (ix4 <= -7) {
-        for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
-          int iy = iy4 + k;
-          if (iy < 0)
-            iy = 0;
-          else if (iy > height - 1)
-            iy = height - 1;
-          tmp[k + 7] = _mm_set1_epi16(
-              (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
-              ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
-        }
-      } else if (ix4 >= width + 6) {
-        for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
-          int iy = iy4 + k;
-          if (iy < 0)
-            iy = 0;
-          else if (iy > height - 1)
-            iy = height - 1;
-          tmp[k + 7] =
-              _mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
-                             ref[iy * stride + (width - 1)] *
-                                 (1 << (FILTER_BITS - reduce_bits_horiz)));
-        }
-      } else {
-        for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
-          int iy = iy4 + k;
-          if (iy < 0)
-            iy = 0;
-          else if (iy > height - 1)
-            iy = height - 1;
-          int sx = sx4 + beta * (k + 4);
-
-          // Load source pixels
-          const __m128i src =
-              _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
-          const __m128i src_even =
-              _mm_shuffle_epi8(src, _mm_loadu_si128((__m128i *)even_mask));
-          const __m128i src_odd =
-              _mm_shuffle_epi8(src, _mm_loadu_si128((__m128i *)odd_mask));
-
-          // Filter even-index pixels
-          const __m128i tmp_0 = _mm_loadl_epi64((
-              __m128i *)&filter_8bit[(sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS]);
-          const __m128i tmp_1 = _mm_loadl_epi64((
-              __m128i *)&filter_8bit[(sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS]);
-          const __m128i tmp_2 = _mm_loadl_epi64((
-              __m128i *)&filter_8bit[(sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS]);
-          const __m128i tmp_3 = _mm_loadl_epi64((
-              __m128i *)&filter_8bit[(sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS]);
-          const __m128i tmp_4 = _mm_loadl_epi64((
-              __m128i *)&filter_8bit[(sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS]);
-          const __m128i tmp_5 = _mm_loadl_epi64((
-              __m128i *)&filter_8bit[(sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS]);
-          const __m128i tmp_6 = _mm_loadl_epi64((
-              __m128i *)&filter_8bit[(sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS]);
-          const __m128i tmp_7 = _mm_loadl_epi64((
-              __m128i *)&filter_8bit[(sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS]);
-
-          // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 0 2
-          const __m128i tmp_8 = _mm_unpacklo_epi16(tmp_0, tmp_2);
-          // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 1 3
-          const __m128i tmp_9 = _mm_unpacklo_epi16(tmp_1, tmp_3);
-          // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 4 6
-          const __m128i tmp_10 = _mm_unpacklo_epi16(tmp_4, tmp_6);
-          // Coeffs 0 2 0 2 4 6 4 6 1 3 1 3 5 7 5 7 for pixels 5 7
-          const __m128i tmp_11 = _mm_unpacklo_epi16(tmp_5, tmp_7);
-
-          // Coeffs 0 2 0 2 0 2 0 2 4 6 4 6 4 6 4 6 for pixels 0 2 4 6
-          const __m128i tmp_12 = _mm_unpacklo_epi32(tmp_8, tmp_10);
-          // Coeffs 1 3 1 3 1 3 1 3 5 7 5 7 5 7 5 7 for pixels 0 2 4 6
-          const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_8, tmp_10);
-          // Coeffs 0 2 0 2 0 2 0 2 4 6 4 6 4 6 4 6 for pixels 1 3 5 7
-          const __m128i tmp_14 = _mm_unpacklo_epi32(tmp_9, tmp_11);
-          // Coeffs 1 3 1 3 1 3 1 3 5 7 5 7 5 7 5 7 for pixels 1 3 5 7
-          const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_9, tmp_11);
-
-          // Coeffs 0 2 for pixels 0 2 4 6 1 3 5 7
-          const __m128i coeff_02 = _mm_unpacklo_epi64(tmp_12, tmp_14);
-          // Coeffs 4 6 for pixels 0 2 4 6 1 3 5 7
-          const __m128i coeff_46 = _mm_unpackhi_epi64(tmp_12, tmp_14);
-          // Coeffs 1 3 for pixels 0 2 4 6 1 3 5 7
-          const __m128i coeff_13 = _mm_unpacklo_epi64(tmp_13, tmp_15);
-          // Coeffs 5 7 for pixels 0 2 4 6 1 3 5 7
-          const __m128i coeff_57 = _mm_unpackhi_epi64(tmp_13, tmp_15);
-
-          // The pixel order we need for 'src' is:
-          // 0 2 2 4 4 6 6 8 1 3 3 5 5 7 7 9
-          const __m128i src_02 = _mm_unpacklo_epi64(src_even, src_odd);
-          const __m128i res_02 = _mm_maddubs_epi16(src_02, coeff_02);
-          // 4 6 6 8 8 10 10 12 5 7 7 9 9 11 11 13
-          const __m128i src_46 = _mm_unpacklo_epi64(_mm_srli_si128(src_even, 4),
-                                                    _mm_srli_si128(src_odd, 4));
-          const __m128i res_46 = _mm_maddubs_epi16(src_46, coeff_46);
-          // 1 3 3 5 5 7 7 9 2 4 4 6 6 8 8 10
-          const __m128i src_13 =
-              _mm_unpacklo_epi64(src_odd, _mm_srli_si128(src_even, 2));
-          const __m128i res_13 = _mm_maddubs_epi16(src_13, coeff_13);
-          // 5 7 7 9 9 11 11 13 6 8 8 10 10 12 12 14
-          const __m128i src_57 = _mm_unpacklo_epi64(
-              _mm_srli_si128(src_odd, 4), _mm_srli_si128(src_even, 6));
-          const __m128i res_57 = _mm_maddubs_epi16(src_57, coeff_57);
-
-          const __m128i round_const = _mm_set1_epi16(
-              (1 << offset_bits_horiz) + ((1 << reduce_bits_horiz) >> 1));
-
-          // Note: The values res_02 + res_46 and res_13 + res_57 both
-          // fit into int16s at this point, but their sum may be too wide to fit
-          // into an int16. However, once we also add round_const, the sum of
-          // all of these fits into a uint16.
-          //
-          // The wrapping behaviour of _mm_add_* is used here to make sure we
-          // get the correct result despite converting between different
-          // (implicit) types.
-          const __m128i res_even = _mm_add_epi16(res_02, res_46);
-          const __m128i res_odd = _mm_add_epi16(res_13, res_57);
-          const __m128i res =
-              _mm_add_epi16(_mm_add_epi16(res_even, res_odd), round_const);
-          tmp[k + 7] = _mm_srl_epi16(res, _mm_cvtsi32_si128(reduce_bits_horiz));
-        }
-      }
-
-      // Vertical filter
-      for (k = -4; k < AOMMIN(4, p_height - i - 4); ++k) {
-        int sy = sy4 + delta * (k + 4);
-
-        // Load from tmp and rearrange pairs of consecutive rows into the
-        // column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7
-        const __m128i *src = tmp + (k + 4);
-        const __m128i src_0 = _mm_unpacklo_epi16(src[0], src[1]);
-        const __m128i src_2 = _mm_unpacklo_epi16(src[2], src[3]);
-        const __m128i src_4 = _mm_unpacklo_epi16(src[4], src[5]);
-        const __m128i src_6 = _mm_unpacklo_epi16(src[6], src[7]);
-
-        // Filter even-index pixels
-        const __m128i tmp_0 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_2 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_4 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_6 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
-        const __m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
-        const __m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
-        const __m128i tmp_12 = _mm_unpackhi_epi32(tmp_0, tmp_2);
-        const __m128i tmp_14 = _mm_unpackhi_epi32(tmp_4, tmp_6);
-
-        const __m128i coeff_0 = _mm_unpacklo_epi64(tmp_8, tmp_10);
-        const __m128i coeff_2 = _mm_unpackhi_epi64(tmp_8, tmp_10);
-        const __m128i coeff_4 = _mm_unpacklo_epi64(tmp_12, tmp_14);
-        const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
-
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_0);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_2);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_4);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_6);
-
-        const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
-                                               _mm_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m128i src_1 = _mm_unpackhi_epi16(src[0], src[1]);
-        const __m128i src_3 = _mm_unpackhi_epi16(src[2], src[3]);
-        const __m128i src_5 = _mm_unpackhi_epi16(src[4], src[5]);
-        const __m128i src_7 = _mm_unpackhi_epi16(src[6], src[7]);
-
-        const __m128i tmp_1 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_3 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_5 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS)));
-        const __m128i tmp_7 = _mm_loadu_si128(
-            (__m128i *)(warped_filter +
-                        ((sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS)));
-
-        const __m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
-        const __m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
-        const __m128i tmp_13 = _mm_unpackhi_epi32(tmp_1, tmp_3);
-        const __m128i tmp_15 = _mm_unpackhi_epi32(tmp_5, tmp_7);
-
-        const __m128i coeff_1 = _mm_unpacklo_epi64(tmp_9, tmp_11);
-        const __m128i coeff_3 = _mm_unpackhi_epi64(tmp_9, tmp_11);
-        const __m128i coeff_5 = _mm_unpacklo_epi64(tmp_13, tmp_15);
-        const __m128i coeff_7 = _mm_unpackhi_epi64(tmp_13, tmp_15);
-
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_1);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_3);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_5);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_7);
-
-        const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
-                                              _mm_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        if (conv_params->is_compound) {
-          __m128i *const p =
-              (__m128i *)&conv_params
-                  ->dst[(i + k + 4) * conv_params->dst_stride + j];
-          const __m128i round_const = _mm_set1_epi32(
-              -(1 << (bd + 2 * FILTER_BITS - reduce_bits_horiz - 1)) +
-              ((1 << (reduce_bits_vert)) >> 1));
-          res_lo = _mm_add_epi32(res_lo, round_const);
-          res_lo = _mm_sra_epi32(res_lo, _mm_cvtsi32_si128(reduce_bits_vert));
-          if (conv_params->use_jnt_comp_avg) {
-            if (comp_avg) {
-              res_lo = _mm_add_epi32(_mm_mullo_epi32(_mm_loadu_si128(p), wt0),
-                                     _mm_mullo_epi32(res_lo, wt1));
-              res_lo = _mm_srai_epi32(res_lo, DIST_PRECISION_BITS);
-            }
-          } else {
-            if (comp_avg)
-              res_lo =
-                  _mm_srai_epi32(_mm_add_epi32(_mm_loadu_si128(p), res_lo), 1);
-          }
-
-          _mm_storeu_si128(p, res_lo);
-          if (p_width > 4) {
-            res_hi = _mm_add_epi32(res_hi, round_const);
-            res_hi = _mm_sra_epi32(res_hi, _mm_cvtsi32_si128(reduce_bits_vert));
-            if (conv_params->use_jnt_comp_avg) {
-              if (comp_avg) {
-                res_hi =
-                    _mm_add_epi32(_mm_mullo_epi32(_mm_loadu_si128(p + 1), wt0),
-                                  _mm_mullo_epi32(res_hi, wt1));
-                res_hi = _mm_srai_epi32(res_hi, DIST_PRECISION_BITS);
-              }
-            } else {
-              if (comp_avg)
-                res_hi = _mm_srai_epi32(
-                    _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi), 1);
-            }
-
-            _mm_storeu_si128(p + 1, res_hi);
-          }
-        } else {
-          // Round and pack into 8 bits
-          const __m128i round_const =
-              _mm_set1_epi32(-(1 << (bd + reduce_bits_vert - 1)) +
-                             ((1 << reduce_bits_vert) >> 1));
-
-          const __m128i res_lo_round = _mm_srai_epi32(
-              _mm_add_epi32(res_lo, round_const), reduce_bits_vert);
-          const __m128i res_hi_round = _mm_srai_epi32(
-              _mm_add_epi32(res_hi, round_const), reduce_bits_vert);
-
-          const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
-          __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit);
-
-          // Store, blending with 'pred' if needed
-          __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
-
-          // Note: If we're outputting a 4x4 block, we need to be very careful
-          // to only output 4 pixels at this point, to avoid encode/decode
-          // mismatches when encoding with multiple threads.
-          if (p_width == 4) {
-            if (comp_avg) {
-              const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
-              res_8bit = _mm_avg_epu8(res_8bit, orig);
-            }
-            *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
-          } else {
-            if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
-            _mm_storel_epi64(p, res_8bit);
-          }
-        }
-      }
-    }
-  }
-#endif  // CONFIG_LOWPRECISION_BLEND
 }

diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 40d891b..6fd9e2c 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake

@@ -95,7 +95,6 @@
 set(CONFIG_FILM_GRAIN_SHOWEX 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_FP_MB_STATS 0 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_INTER_STATS_ONLY 0 CACHE NUMBER "AV1 experiment flag.")
-set(CONFIG_LOWPRECISION_BLEND 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_OPERATING_POINTS 0 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_RD_DEBUG 0 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_TRAILING_BITS 0 CACHE NUMBER "AV1 experiment flag.")

diff --git a/test/av1_convolve_2d_test.cc b/test/av1_convolve_2d_test.cc
index 2e38b07..1d62dea 100644
--- a/test/av1_convolve_2d_test.cc
+++ b/test/av1_convolve_2d_test.cc

@@ -19,26 +19,7 @@
 using libaom_test::AV1HighbdConvolve2D::AV1HighbdJntConvolve2DTest;
 using std::tr1::make_tuple;
 using std::tr1::tuple;
-#if !CONFIG_LOWPRECISION_BLEND
-using libaom_test::AV1Convolve2D::AV1Convolve2DTest;
-using libaom_test::AV1HighbdConvolve2D::AV1HighbdConvolve2DTest;
-#endif  // CONFIG_LOWPRECISION_BLEND
 namespace {
-#if !CONFIG_LOWPRECISION_BLEND
-TEST_P(AV1Convolve2DTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
-
-TEST_P(AV1Convolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
-
-INSTANTIATE_TEST_CASE_P(
-    C_COPY, AV1Convolve2DTest,
-    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_copy_c, 0, 0));
-INSTANTIATE_TEST_CASE_P(
-    C_X, AV1Convolve2DTest,
-    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_x_c, 1, 0));
-INSTANTIATE_TEST_CASE_P(
-    C_Y, AV1Convolve2DTest,
-    libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_c, 0, 1));
-#endif  // !CONFIG_LOWPRECISION_BLEND
 
 TEST_P(AV1Convolve2DSrTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
 
@@ -53,7 +34,6 @@
 INSTANTIATE_TEST_CASE_P(
     C_Y, AV1Convolve2DSrTest,
     libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_sr_c, 0, 1));
-#if CONFIG_LOWPRECISION_BLEND
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(SSE2_COPY, AV1Convolve2DSrTest,
                         libaom_test::AV1Convolve2D::BuildParams(
@@ -84,7 +64,6 @@
     libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sr_avx2, 1, 1));
 #endif  // HAVE_AVX2
 #endif  // HAVE_SSE2
-#endif  // CONFIG_LOWPRECISION_BLEND
 
 TEST_P(AV1JntConvolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
 
@@ -100,7 +79,6 @@
     C_Y, AV1JntConvolve2DTest,
     libaom_test::AV1Convolve2D::BuildParams(av1_jnt_convolve_y_c, 0, 1));
 
-#if CONFIG_LOWPRECISION_BLEND
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(SSE2_COPY, AV1JntConvolve2DTest,
                         libaom_test::AV1Convolve2D::BuildParams(
@@ -136,26 +114,6 @@
 #endif  // HAVE_AVX2
 #endif  // HAVE_SSE4_1
 #endif  // HAVE_SSE2
-#endif  // CONFIG_LOWPRECISION_BLEND
-
-#if !CONFIG_LOWPRECISION_BLEND
-#if HAVE_SSSE3
-TEST_P(AV1HighbdConvolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(1)); }
-
-INSTANTIATE_TEST_CASE_P(C_X, AV1HighbdConvolve2DTest,
-                        libaom_test::AV1HighbdConvolve2D::BuildParams(
-                            av1_highbd_convolve_x_c, 1, 0));
-
-INSTANTIATE_TEST_CASE_P(C_Y, AV1HighbdConvolve2DTest,
-                        libaom_test::AV1HighbdConvolve2D::BuildParams(
-                            av1_highbd_convolve_y_c, 0, 1));
-
-INSTANTIATE_TEST_CASE_P(C_COPY, AV1HighbdConvolve2DTest,
-                        libaom_test::AV1HighbdConvolve2D::BuildParams(
-                            av1_highbd_convolve_2d_copy_c, 0, 0));
-
-#endif  // HAVE_SSSE3
-#endif  // !CONFIG_LOWPRECISION_BLEND
 
 TEST_P(AV1HighbdConvolve2DSrTest, CheckOutput) { RunCheckOutput(GET_PARAM(1)); }
 TEST_P(AV1HighbdConvolve2DSrTest, DISABLED_Speed) {
@@ -173,7 +131,6 @@
 INSTANTIATE_TEST_CASE_P(C_COPY, AV1HighbdConvolve2DSrTest,
                         libaom_test::AV1HighbdConvolve2D::BuildParams(
                             av1_highbd_convolve_2d_copy_sr_c, 0, 0));
-#if CONFIG_LOWPRECISION_BLEND
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(SSE2_COPY, AV1HighbdConvolve2DSrTest,
                         libaom_test::AV1HighbdConvolve2D::BuildParams(
@@ -204,7 +161,6 @@
 #endif  // HAVE_AVX2
 #endif  // HAVE_SSSE3
 #endif  // HAVE_SSE2
-#endif  // CONFIG_LOWPRECISION_BLEND
 TEST_P(AV1HighbdJntConvolve2DTest, CheckOutput) {
   RunCheckOutput(GET_PARAM(1));
 }
@@ -224,7 +180,6 @@
 INSTANTIATE_TEST_CASE_P(C_COPY, AV1HighbdJntConvolve2DTest,
                         libaom_test::AV1HighbdConvolve2D::BuildParams(
                             av1_highbd_jnt_convolve_2d_copy_c, 0, 0));
-#if CONFIG_LOWPRECISION_BLEND
 #if HAVE_SSE4_1
 INSTANTIATE_TEST_CASE_P(SSE4_1_COPY, AV1HighbdJntConvolve2DTest,
                         libaom_test::AV1HighbdConvolve2D::BuildParams(
@@ -253,5 +208,4 @@
                             av1_highbd_jnt_convolve_y_avx2, 0, 1));
 #endif  // HAVE_AVX2
 #endif  // HAVE_SSE4_1
-#endif  // CONFIG_LOWPRECISION_BLEND
 }  // namespace

diff --git a/test/av1_convolve_2d_test_util.cc b/test/av1_convolve_2d_test_util.cc
index 62f4038..893fb10 100644
--- a/test/av1_convolve_2d_test_util.cc
+++ b/test/av1_convolve_2d_test_util.cc

@@ -31,110 +31,6 @@
                             ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-AV1Convolve2DTest::~AV1Convolve2DTest() {}
-void AV1Convolve2DTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
-
-void AV1Convolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
-void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
-  const int w = kMaxSize, h = kMaxSize;
-  const int has_subx = GET_PARAM(1);
-  const int has_suby = GET_PARAM(2);
-  const int block_idx = GET_PARAM(3);
-  int hfilter, vfilter, subx, suby;
-  uint8_t input[kMaxSize * kMaxSize];
-  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
-  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output2[MAX_SB_SQUARE]);
-
-  for (int i = 0; i < h; ++i)
-    for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
-  for (int i = 0; i < MAX_SB_SQUARE; ++i)
-    output[i] = output2[i] = rnd_.Rand31();
-
-  const int out_w = block_size_wide[block_idx];
-  const int out_h = block_size_high[block_idx];
-  for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
-    for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL; ++vfilter) {
-      InterpFilterParams filter_params_x =
-          av1_get_interp_filter_params((InterpFilter)hfilter);
-      InterpFilterParams filter_params_y =
-          av1_get_interp_filter_params((InterpFilter)vfilter);
-      for (int do_average = 0; do_average <= 1; ++do_average) {
-        ConvolveParams conv_params1 = get_conv_params_no_round(
-            0, do_average, 0, output, MAX_SB_SIZE, 1, 8);
-        ConvolveParams conv_params2 = get_conv_params_no_round(
-            0, do_average, 0, output2, MAX_SB_SIZE, 1, 8);
-
-        const int subx_range = has_subx ? 16 : 1;
-        const int suby_range = has_suby ? 16 : 1;
-        for (subx = 0; subx < subx_range; ++subx) {
-          for (suby = 0; suby < suby_range; ++suby) {
-            // Choose random locations within the source block
-            const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
-            const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-            av1_convolve_2d_c(input + offset_r * w + offset_c, w, NULL, 0,
-                              out_w, out_h, &filter_params_x, &filter_params_y,
-                              subx, suby, &conv_params1);
-            test_impl(input + offset_r * w + offset_c, w, NULL, 0, out_w, out_h,
-                      &filter_params_x, &filter_params_y, subx, suby,
-                      &conv_params2);
-
-            for (int i = 0; i < out_h; ++i) {
-              for (int j = 0; j < out_w; ++j) {
-                int idx = i * MAX_SB_SIZE + j;
-                ASSERT_EQ(output[idx], output2[idx])
-                    << out_w << "x" << out_h << " Pixel mismatch at index "
-                    << idx << " = (" << i << ", " << j
-                    << "), sub pixel offset = (" << suby << ", " << subx << ")";
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-void AV1Convolve2DTest::RunSpeedTest(convolve_2d_func test_impl) {
-  const int w = kMaxSize, h = kMaxSize;
-  const int has_subx = GET_PARAM(1);
-  const int has_suby = GET_PARAM(2);
-  const int block_idx = GET_PARAM(3);
-
-  uint8_t input[kMaxSize * kMaxSize];
-  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
-
-  for (int i = 0; i < h; ++i)
-    for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
-
-  int hfilter = EIGHTTAP_REGULAR, vfilter = EIGHTTAP_REGULAR;
-  int subx = 0, suby = 0;
-
-  InterpFilterParams filter_params_x =
-      av1_get_interp_filter_params((InterpFilter)hfilter);
-  InterpFilterParams filter_params_y =
-      av1_get_interp_filter_params((InterpFilter)vfilter);
-  const int do_average = 0;
-  ConvolveParams conv_params2 =
-      get_conv_params_no_round(0, do_average, 0, output, MAX_SB_SIZE, 1, 8);
-
-  const int out_w = block_size_wide[block_idx];
-  const int out_h = block_size_high[block_idx];
-  const int num_loops = 1000000000 / (out_w + out_h);
-  aom_usec_timer timer;
-  aom_usec_timer_start(&timer);
-
-  for (int i = 0; i < num_loops; ++i)
-    test_impl(input, w, NULL, 0, out_w, out_h, &filter_params_x,
-              &filter_params_y, subx, suby, &conv_params2);
-
-  aom_usec_timer_mark(&timer);
-  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
-  printf("%d,%d convolve %3dx%-3d: %7.2f us\n", has_subx, has_suby, out_w,
-         out_h, 1000.0 * elapsed_time / num_loops);
-}
-#endif  // CONFIG_LOWPRECISION_BLEND
-
 AV1Convolve2DSrTest::~AV1Convolve2DSrTest() {}
 void AV1Convolve2DSrTest::SetUp() {
   rnd_.Reset(ACMRandom::DeterministicSeed());
@@ -406,78 +302,6 @@
       ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
 }
 
-#if !CONFIG_LOWPRECISION_BLEND
-AV1HighbdConvolve2DTest::~AV1HighbdConvolve2DTest() {}
-void AV1HighbdConvolve2DTest::SetUp() {
-  rnd_.Reset(ACMRandom::DeterministicSeed());
-}
-
-void AV1HighbdConvolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
-
-void AV1HighbdConvolve2DTest::RunCheckOutput(
-    highbd_convolve_2d_func test_impl) {
-  const int w = kMaxSize, h = kMaxSize;
-  const int bd = GET_PARAM(0);
-  const int has_subx = GET_PARAM(2);
-  const int has_suby = GET_PARAM(3);
-  const int block_idx = GET_PARAM(4);
-  int hfilter, vfilter, subx, suby;
-  uint16_t input[kMaxSize * kMaxSize];
-  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
-  DECLARE_ALIGNED(32, CONV_BUF_TYPE, output2[MAX_SB_SQUARE]);
-
-  for (int i = 0; i < h; ++i)
-    for (int j = 0; j < w; ++j)
-      input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
-  for (int i = 0; i < MAX_SB_SQUARE; ++i)
-    output[i] = output2[i] = rnd_.Rand31();
-
-  const int out_w = block_size_wide[block_idx];
-  const int out_h = block_size_high[block_idx];
-  for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
-    for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL; ++vfilter) {
-      InterpFilterParams filter_params_x =
-          av1_get_interp_filter_params((InterpFilter)hfilter);
-      InterpFilterParams filter_params_y =
-          av1_get_interp_filter_params((InterpFilter)vfilter);
-      for (int do_average = 0; do_average <= 1; ++do_average) {
-        ConvolveParams conv_params1 = get_conv_params_no_round(
-            0, do_average, 0, output, MAX_SB_SIZE, 1, bd);
-        ConvolveParams conv_params2 = get_conv_params_no_round(
-            0, do_average, 0, output2, MAX_SB_SIZE, 1, bd);
-
-        const int subx_range = has_subx ? 16 : 1;
-        const int suby_range = has_suby ? 16 : 1;
-        for (subx = 0; subx < subx_range; ++subx) {
-          for (suby = 0; suby < suby_range; ++suby) {
-            // Choose random locations within the source block
-            const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
-            const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
-            av1_highbd_convolve_2d_c(input + offset_r * w + offset_c, w, NULL,
-                                     0, out_w, out_h, &filter_params_x,
-                                     &filter_params_y, subx, suby,
-                                     &conv_params1, bd);
-            test_impl(input + offset_r * w + offset_c, w, NULL, 0, out_w, out_h,
-                      &filter_params_x, &filter_params_y, subx, suby,
-                      &conv_params2, bd);
-
-            for (int i = 0; i < out_h; ++i) {
-              for (int j = 0; j < out_w; ++j) {
-                int idx = i * MAX_SB_SIZE + j;
-                ASSERT_EQ(output[idx], output2[idx])
-                    << out_w << "x" << out_h << " Pixel mismatch at index "
-                    << idx << " = (" << i << ", " << j
-                    << "), sub pixel offset = (" << suby << ", " << subx << ")";
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-#endif  // !CONFIG_LOWPRECISION_BLEND
-
 AV1HighbdConvolve2DSrTest::~AV1HighbdConvolve2DSrTest() {}
 void AV1HighbdConvolve2DSrTest::SetUp() {
   rnd_.Reset(ACMRandom::DeterministicSeed());

diff --git a/test/av1_convolve_2d_test_util.h b/test/av1_convolve_2d_test_util.h
index 328d18c..9389d8a 100644
--- a/test/av1_convolve_2d_test_util.h
+++ b/test/av1_convolve_2d_test_util.h

@@ -36,22 +36,6 @@
 ::testing::internal::ParamGenerator<Convolve2DParam> BuildParams(
     convolve_2d_func filter, int subx_exist, int suby_exist);
 
-#if !CONFIG_LOWPRECISION_BLEND
-class AV1Convolve2DTest : public ::testing::TestWithParam<Convolve2DParam> {
- public:
-  virtual ~AV1Convolve2DTest();
-  virtual void SetUp();
-
-  virtual void TearDown();
-
- protected:
-  void RunCheckOutput(convolve_2d_func test_impl);
-  void RunSpeedTest(convolve_2d_func test_impl);
-
-  libaom_test::ACMRandom rnd_;
-};
-#endif  // !CONFIG_LOWPRECISION_BLEND
-
 class AV1Convolve2DSrTest : public ::testing::TestWithParam<Convolve2DParam> {
  public:
   virtual ~AV1Convolve2DSrTest();
@@ -93,22 +77,6 @@
 ::testing::internal::ParamGenerator<HighbdConvolve2DParam> BuildParams(
     highbd_convolve_2d_func filter, int subx_exist, int suby_exist);
 
-#if !CONFIG_LOWPRECISION_BLEND
-class AV1HighbdConvolve2DTest
-    : public ::testing::TestWithParam<HighbdConvolve2DParam> {
- public:
-  virtual ~AV1HighbdConvolve2DTest();
-  virtual void SetUp();
-
-  virtual void TearDown();
-
- protected:
-  void RunCheckOutput(highbd_convolve_2d_func test_impl);
-
-  libaom_test::ACMRandom rnd_;
-};
-#endif  // !CONFIG_LOWPRECISION_BLEND
-
 class AV1HighbdConvolve2DSrTest
     : public ::testing::TestWithParam<HighbdConvolve2DParam> {
  public:

diff --git a/test/av1_convolve_scale_test.cc b/test/av1_convolve_scale_test.cc
index acb50df..fd5a3f5 100644
--- a/test/av1_convolve_scale_test.cc
+++ b/test/av1_convolve_scale_test.cc

@@ -158,7 +158,6 @@
   std::vector<CONV_BUF_TYPE> dst_16_data_;
 };
 
-#if CONFIG_LOWPRECISION_BLEND
 template <typename Pixel>
 void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
   if (!trash) {
@@ -526,5 +525,4 @@
                        ::testing::ValuesIn(kBlockDim),
                        ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
                        ::testing::Bool(), ::testing::ValuesIn(kBDs)));
-#endif
 }  // namespace

diff --git a/test/test.cmake b/test/test.cmake
index f5adbf0..23f69f8 100644
--- a/test/test.cmake
+++ b/test/test.cmake

@@ -218,14 +218,12 @@
     endif ()
 
     if (HAVE_SSE4_1)
-      if (CONFIG_LOWPRECISION_BLEND)
         set(AOM_UNIT_TEST_ENCODER_SOURCES
             ${AOM_UNIT_TEST_ENCODER_SOURCES}
             "${AOM_ROOT}/test/av1_convolve_scale_test.cc"
             "${AOM_ROOT}/test/warp_filter_test_util.cc"
             "${AOM_ROOT}/test/warp_filter_test_util.h"
             "${AOM_ROOT}/test/warp_filter_test.cc")
-      endif ()
     endif ()
 
     if (HAVE_SSE4_1)

diff --git a/test/warp_filter_test.cc b/test/warp_filter_test.cc
index 920422f..99e3d31 100644
--- a/test/warp_filter_test.cc
+++ b/test/warp_filter_test.cc

@@ -10,7 +10,6 @@
  */
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
 #include "test/warp_filter_test_util.h"
-#if CONFIG_LOWPRECISION_BLEND
 using libaom_test::ACMRandom;
 using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
 using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
@@ -35,4 +34,3 @@
 
 #endif  // HAVE_SSE4_1
 }  // namespace
-#endif  // CONFIG_LOWPRECISION_BLEND

diff --git a/test/warp_filter_test_util.cc b/test/warp_filter_test_util.cc
index 27299d9..574cdf1 100644
--- a/test/warp_filter_test_util.cc
+++ b/test/warp_filter_test_util.cc

@@ -78,7 +78,6 @@
 }
 
 namespace AV1WarpFilter {
-#if CONFIG_LOWPRECISION_BLEND
 ::testing::internal::ParamGenerator<WarpTestParam> BuildParams(
     warp_affine_func filter) {
   const WarpTestParam params[] = {
@@ -244,11 +243,9 @@
   delete[] dsta;
   delete[] dstb;
 }
-#endif
 }  // namespace AV1WarpFilter
 
 namespace AV1HighbdWarpFilter {
-#if CONFIG_LOWPRECISION_BLEND
 ::testing::internal::ParamGenerator<HighbdWarpTestParam> BuildParams(
     highbd_warp_affine_func filter) {
   const HighbdWarpTestParam params[] = {
@@ -432,6 +429,5 @@
   delete[] dsta;
   delete[] dstb;
 }
-#endif
 }  // namespace AV1HighbdWarpFilter
 }  // namespace libaom_test

diff --git a/test/warp_filter_test_util.h b/test/warp_filter_test_util.h
index fd67b5f..e6b1944 100644
--- a/test/warp_filter_test_util.h
+++ b/test/warp_filter_test_util.h

@@ -23,7 +23,6 @@
 #include "av1/common/mv.h"
 #include "av1/common/common_data.h"
 
-#if CONFIG_LOWPRECISION_BLEND
 namespace libaom_test {
 
 void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat,
@@ -96,5 +95,4 @@
 
 }  // namespace libaom_test
 
-#endif
 #endif  // TEST_WARP_FILTER_TEST_UTIL_H_
commit	5af4891665a64008af1cc4c7a6f22293196d0503	[log] [tgz]
author	Yaowu Xu <yaowu@google.com>	Wed Mar 28 15:04:34 2018 -0700
committer	Yaowu Xu <yaowu@google.com>	Thu Mar 29 05:39:14 2018 +0000
tree	d6204ec086d9b6e904aedba2af1c57ca5aff77c6
parent	66dde40b1fc2284a64176348253a6650349173f7 [diff]