Remove convolve_round/compound_round config flags Merged convolve_round experiment and removed its config flag in the code. Removed compound_round code. Change-Id: Ic01856732d75cca65d3866383d3cc1dd572f8863

commit: fa00507467e487aa4a1a75a51e33c9a8129ee3f8 [log] [tgz]
author: Yunqing Wang <yunqingwang@google.com> Tue Dec 12 17:45:36 2017 -0800
committer: Yunqing Wang <yunqingwang@google.com> Wed Dec 13 17:00:59 2017 +0000
tree: b498e34bba08ba390c195047a45bec3a9ab097a3
parent: ca14b47fea376074e2d37e105b24a8d36526021e [diff]
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 1703ba0..249e1dc 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl

@@ -735,9 +735,7 @@
   #
   # Alpha blending with mask
   #
-  if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
-    add_proto qw/void aom_blend_a64_d32_mask/, "int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
-  }
+  add_proto qw/void aom_blend_a64_d32_mask/, "int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
   add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
   add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
   add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";

diff --git a/aom_dsp/blend_a64_mask.c b/aom_dsp/blend_a64_mask.c
index 5cd3dac..384e81b 100644
--- a/aom_dsp/blend_a64_mask.c
+++ b/aom_dsp/blend_a64_mask.c

@@ -18,7 +18,6 @@
 
 #include "./aom_dsp_rtcd.h"
 
-#if CONFIG_CONVOLVE_ROUND
 // Blending with alpha mask. Mask values come from the range [0, 64],
 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
 // be the same as dst, or dst can be different from both sources.
@@ -79,7 +78,6 @@
     }
   }
 }
-#endif  // CONFIG_CONVOLVE_ROUND
 
 // Blending with alpha mask. Mask values come from the range [0, 64],
 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can

diff --git a/av1/av1.cmake b/av1/av1.cmake
index 74990ab..d3aadb4 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake

@@ -300,53 +300,49 @@
       "${AOM_ROOT}/av1/common/clpf_neon.c")
 endif ()
 
-if (CONFIG_CONVOLVE_ROUND)
-  set(AOM_AV1_COMMON_INTRIN_SSE2
-      ${AOM_AV1_COMMON_INTRIN_SSE2}
-      "${AOM_ROOT}/av1/common/x86/convolve_2d_sse2.c")
+set(AOM_AV1_COMMON_INTRIN_SSE2
+    ${AOM_AV1_COMMON_INTRIN_SSE2}
+    "${AOM_ROOT}/av1/common/x86/convolve_2d_sse2.c")
 
+set(AOM_AV1_COMMON_INTRIN_AVX2
+    ${AOM_AV1_COMMON_INTRIN_AVX2}
+    "${AOM_ROOT}/av1/common/x86/convolve_2d_avx2.c")
+
+if (CONFIG_HIGHBITDEPTH)
   set(AOM_AV1_COMMON_INTRIN_AVX2
       ${AOM_AV1_COMMON_INTRIN_AVX2}
-      "${AOM_ROOT}/av1/common/x86/convolve_2d_avx2.c")
+      "${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_avx2.c")
 
-  if (CONFIG_HIGHBITDEPTH)
-    set(AOM_AV1_COMMON_INTRIN_AVX2
-        ${AOM_AV1_COMMON_INTRIN_AVX2}
-        "${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_avx2.c")
+  set(AOM_AV1_COMMON_INTRIN_SSSE3
+      ${AOM_AV1_COMMON_INTRIN_SSSE3}
+      "${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_ssse3.c")
+endif ()
 
-    set(AOM_AV1_COMMON_INTRIN_SSSE3
-        ${AOM_AV1_COMMON_INTRIN_SSSE3}
-        "${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_ssse3.c")
-  endif ()
+if (CONFIG_JNT_COMP)
+  set(AOM_AV1_COMMON_INTRIN_SSE4_1
+      ${AOM_AV1_COMMON_INTRIN_SSE4_1}
+      "${AOM_ROOT}/av1/common/x86/convolve_2d_sse4.c")
+endif ()
 
-  if (CONFIG_JNT_COMP)
-    set(AOM_AV1_COMMON_INTRIN_SSE4_1
-        ${AOM_AV1_COMMON_INTRIN_SSE4_1}
-        "${AOM_ROOT}/av1/common/x86/convolve_2d_sse4.c")
-  endif ()
-
-  if(NOT CONFIG_COMPOUND_ROUND)
-    set(AOM_AV1_COMMON_INTRIN_SSE4_1
-        ${AOM_AV1_COMMON_INTRIN_SSE4_1}
-        "${AOM_ROOT}/av1/common/x86/av1_convolve_scale_sse4.c")
-  endif()
+set(AOM_AV1_COMMON_INTRIN_SSE4_1
+    ${AOM_AV1_COMMON_INTRIN_SSE4_1}
+    "${AOM_ROOT}/av1/common/x86/av1_convolve_scale_sse4.c")
 
   set(AOM_AV1_COMMON_INTRIN_SSE2
       ${AOM_AV1_COMMON_INTRIN_SSE2}
       "${AOM_ROOT}/av1/common/x86/convolve_sse2.c")
 
-  set(AOM_AV1_COMMON_INTRIN_AVX2
-      ${AOM_AV1_COMMON_INTRIN_AVX2}
-      "${AOM_ROOT}/av1/common/x86/convolve_avx2.c")
-endif ()
+set(AOM_AV1_COMMON_INTRIN_AVX2
+    ${AOM_AV1_COMMON_INTRIN_AVX2}
+    "${AOM_ROOT}/av1/common/x86/convolve_avx2.c")
 
-  set(AOM_AV1_ENCODER_SOURCES
-      ${AOM_AV1_ENCODER_SOURCES}
-      "${AOM_ROOT}/av1/encoder/wedge_utils.c")
+set(AOM_AV1_ENCODER_SOURCES
+    ${AOM_AV1_ENCODER_SOURCES}
+    "${AOM_ROOT}/av1/encoder/wedge_utils.c")
 
-  set(AOM_AV1_ENCODER_INTRIN_SSE2
-      ${AOM_AV1_ENCODER_INTRIN_SSE2}
-      "${AOM_ROOT}/av1/encoder/x86/wedge_utils_sse2.c")
+set(AOM_AV1_ENCODER_INTRIN_SSE2
+    ${AOM_AV1_ENCODER_INTRIN_SSE2}
+    "${AOM_ROOT}/av1/encoder/x86/wedge_utils_sse2.c")
 
 if (CONFIG_ACCOUNTING)
   set(AOM_AV1_DECODER_SOURCES

diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index cbff82d..bc3afa1 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk

@@ -79,9 +79,7 @@
 AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d_cfg.h
 AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/convolve_avx2.c
 AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c
-ifeq ($(CONFIG_CONVOLVE_ROUND)x$(CONFIG_COMPOUND_ROUND),yesx)
 AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_convolve_scale_sse4.c
-endif
 ifeq ($(CONFIG_HIGHBITDEPTH),yes)
 AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c
 endif
@@ -161,7 +159,6 @@
 endif
 endif
 
-ifeq ($(CONFIG_CONVOLVE_ROUND),yes)
 AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/convolve_sse2.c
 AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/convolve_2d_sse2.c
 AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/convolve_2d_sse4.c
@@ -170,7 +167,6 @@
 AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/highbd_convolve_2d_ssse3.c
 AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/highbd_convolve_2d_avx2.c
 endif
-endif
 
 ifeq ($(CONFIG_LV_MAP),yes)
 AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/txb_sse2.c

diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 3676d8f..f76f79a 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -575,48 +575,37 @@
 }
 
 # CONVOLVE_ROUND/COMPOUND_ROUND functions
+add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+specialize qw/av1_convolve_2d sse2 avx2/;
+add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
+specialize qw/av1_convolve_rounding avx2/;
 
-if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
-  add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-  specialize qw/av1_convolve_2d sse2 avx2/;
-  add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
-  specialize qw/av1_convolve_rounding avx2/;
+add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+specialize qw/av1_convolve_2d_copy sse2/;
+add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+specialize qw/av1_convolve_x sse2/;
+add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+specialize qw/av1_convolve_y sse2/;
 
-  if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
-    add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-    specialize qw/av1_convolve_2d_copy sse2/;
-    add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-    specialize qw/av1_convolve_x sse2/;
-    add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-    specialize qw/av1_convolve_y sse2/;
-  }
+add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
+specialize qw/av1_convolve_2d_scale sse4_1/;
 
-  add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
-  if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
-    specialize qw/av1_convolve_2d_scale sse4_1/;
-  }
+if (aom_config("CONFIG_JNT_COMP") eq "yes") {
+  add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+  specialize qw/av1_jnt_convolve_2d sse4_1/;
 
-  if (aom_config("CONFIG_JNT_COMP") eq "yes") {
-    add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-    specialize qw/av1_jnt_convolve_2d sse4_1/;
+  add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+  specialize qw/av1_jnt_convolve_2d_copy sse2/;
+}
 
-    if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
-      add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-      specialize qw/av1_jnt_convolve_2d_copy sse2/;
-    }
-  }
+if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+  add_proto qw/void av1_highbd_convolve_2d/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
+  specialize qw/av1_highbd_convolve_2d ssse3 avx2/;
+  add_proto qw/void av1_highbd_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd";
+  specialize qw/av1_highbd_convolve_rounding avx2/;
 
-  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-    add_proto qw/void av1_highbd_convolve_2d/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
-    specialize qw/av1_highbd_convolve_2d ssse3 avx2/;
-    add_proto qw/void av1_highbd_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd";
-    specialize qw/av1_highbd_convolve_rounding avx2/;
-
-    add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
-    if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
-        specialize qw/av1_highbd_convolve_2d_scale sse4_1/;
-    }
-  }
+  add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
+  specialize qw/av1_highbd_convolve_2d_scale sse4_1/;
 }
 
 # INTRA_EDGE functions

diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index 74b7085..d0b747e 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c

@@ -357,7 +357,6 @@
   }
 }
 
-#if CONFIG_CONVOLVE_ROUND
 void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst,
                              int dst_stride, int w, int h, int bits) {
   for (int r = 0; r < h; ++r) {
@@ -368,190 +367,6 @@
   }
 }
 
-#if CONFIG_COMPOUND_ROUND
-void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
-                       int dst_stride, int w, int h,
-                       InterpFilterParams *filter_params_x,
-                       InterpFilterParams *filter_params_y,
-                       const int subpel_x_q4, const int subpel_y_q4,
-                       ConvolveParams *conv_params) {
-  uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = w;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-
-  // horizontal filter
-  const uint8_t *src_horiz = src - fo_vert * src_stride;
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-  for (int y = 0; y < im_h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      int32_t sum = 0;
-      for (int k = 0; k < filter_params_x->taps; ++k) {
-        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
-      }
-      im_block[y * im_stride + x] =
-          clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
-    }
-  }
-
-  // vertical filter
-  uint8_t *src_vert = im_block + fo_vert * im_stride;
-  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE sum = 0;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
-      }
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
-      if (conv_params->do_average)
-        dst[y * dst_stride + x] += res;
-      else
-        dst[y * dst_stride + x] = res;
-    }
-  }
-}
-
-#if CONFIG_JNT_COMP
-void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
-                           CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
-                           InterpFilterParams *filter_params_x,
-                           InterpFilterParams *filter_params_y,
-                           const int subpel_x_q4, const int subpel_y_q4,
-                           ConvolveParams *conv_params) {
-  uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = w;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-
-  // horizontal filter
-  const uint8_t *src_horiz = src - fo_vert * src_stride;
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-  for (int y = 0; y < im_h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      int32_t sum = 0;
-      for (int k = 0; k < filter_params_x->taps; ++k) {
-        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
-      }
-      im_block[y * im_stride + x] =
-          clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
-    }
-  }
-
-  // vertical filter
-  uint8_t *src_vert = im_block + fo_vert * im_stride;
-  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE sum = 0;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
-      }
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average == 0) {
-          dst[y * dst_stride + x] = res * conv_params->fwd_offset;
-        } else {
-          dst[y * dst_stride + x] += res * conv_params->bck_offset;
-
-          dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
-                                                       DIST_PRECISION_BITS - 1);
-        }
-      } else {
-        if (conv_params->do_average)
-          dst[y * dst_stride + x] += res;
-        else
-          dst[y * dst_stride + x] = res;
-      }
-    }
-  }
-}
-#endif  // CONFIG_JNT_COMP
-
-void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
-                             CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
-                             InterpFilterParams *filter_params_x,
-                             InterpFilterParams *filter_params_y,
-                             const int subpel_x_qn, const int x_step_qn,
-                             const int subpel_y_qn, const int y_step_qn,
-                             ConvolveParams *conv_params) {
-  uint8_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
-  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
-             filter_params_y->taps;
-  int im_stride = w;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-
-  // horizontal filter
-  const uint8_t *src_horiz = src - fo_vert * src_stride;
-  for (int y = 0; y < im_h; ++y) {
-    int x_qn = subpel_x_qn;
-    for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
-      const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
-      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
-      assert(x_filter_idx < SUBPEL_SHIFTS);
-      const int16_t *x_filter =
-          av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
-      int sum = 0;
-      for (int k = 0; k < filter_params_x->taps; ++k)
-        sum += x_filter[k] * src_x[k - fo_horiz];
-      im_block[y * im_stride + x] =
-          clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
-    }
-    src_horiz += src_stride;
-  }
-
-  // vertical filter
-  const uint8_t *src_vert = im_block + fo_vert * im_stride;
-  for (int x = 0; x < w; ++x) {
-    int y_qn = subpel_y_qn;
-    for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
-      const uint8_t *const src_y =
-          &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
-      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
-      assert(y_filter_idx < SUBPEL_SHIFTS);
-      const int16_t *y_filter =
-          av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
-      CONV_BUF_TYPE sum = 0;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
-      }
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
-#if CONFIG_JNT_COMP
-      if (conv_params->use_jnt_comp_avg) {
-        if (conv_params->do_average == 0) {
-          dst[y * dst_stride + x] = res * conv_params->fwd_offset;
-        } else {
-          dst[y * dst_stride + x] += res * conv_params->bck_offset;
-
-          dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
-                                                       DIST_PRECISION_BITS - 1);
-        }
-      } else {
-        if (conv_params->do_average)
-          dst[y * dst_stride + x] += res;
-        else
-          dst[y * dst_stride + x] = res;
-      }
-#else
-      if (conv_params->do_average)
-        dst[y * dst_stride + x] += res;
-      else
-        dst[y * dst_stride + x] = res;
-#endif  // CONFIG_JNT_COMP
-    }
-    src_vert++;
-  }
-}
-
-#else
-
 /* When convolve-round is enabled and compound-round is disabled, we use a
    high-precision convolve filter.
    Note: For notes on hardware implementations, including the required
@@ -877,7 +692,6 @@
     src_vert++;
   }
 }
-#endif  // CONFIG_COMPOUND_ROUND
 
 void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                             int dst_stride, int w, int h,
@@ -947,12 +761,6 @@
                             &filter_params_y, subpel_x_q4, x_step_q4,
                             subpel_y_q4, y_step_q4, conv_params);
     } else {
-#if CONFIG_COMPOUND_ROUND
-      av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
-                          conv_params->dst_stride, w, h, &filter_params_x,
-                          &filter_params_y, subpel_x_q4, subpel_y_q4,
-                          conv_params);
-#else
       if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
         av1_jnt_convolve_2d_copy(src, src_stride, conv_params->dst,
                                  conv_params->dst_stride, w, h,
@@ -976,7 +784,6 @@
                             &filter_params_y, subpel_x_q4, subpel_y_q4,
                             conv_params);
       }
-#endif  // CONFIG_COMPOUND_ROUND
     }
 #else
     if (scaled) {
@@ -985,11 +792,6 @@
                             &filter_params_y, subpel_x_q4, x_step_q4,
                             subpel_y_q4, y_step_q4, conv_params);
     } else {
-#if CONFIG_COMPOUND_ROUND
-      av1_convolve_2d(src, src_stride, conv_params->dst,
-                      conv_params->dst_stride, w, h, &filter_params_x,
-                      &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
-#else
       // Special case convolve functions should produce the same result as
       // av1_convolve_2d.
       if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
@@ -1012,7 +814,6 @@
                         &filter_params_y, subpel_x_q4, subpel_y_q4,
                         conv_params);
       }
-#endif  // CONFIG_COMPOUND_ROUND
     }
 #endif  // CONFIG_JNT_COMP
   }
@@ -1031,114 +832,6 @@
   }
 }
 
-#if CONFIG_COMPOUND_ROUND
-void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
-                              CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
-                              InterpFilterParams *filter_params_x,
-                              InterpFilterParams *filter_params_y,
-                              const int subpel_x_q4, const int subpel_y_q4,
-                              ConvolveParams *conv_params, int bd) {
-  uint16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = w;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-
-  // horizontal filter
-  const uint16_t *src_horiz = src - fo_vert * src_stride;
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-  for (int y = 0; y < im_h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      int32_t sum = 0;
-      for (int k = 0; k < filter_params_x->taps; ++k) {
-        sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
-      }
-      im_block[y * im_stride + x] =
-          clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, conv_params->round_0), bd);
-    }
-  }
-
-  // vertical filter
-  uint16_t *src_vert = im_block + fo_vert * im_stride;
-  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-      *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-  for (int y = 0; y < h; ++y) {
-    for (int x = 0; x < w; ++x) {
-      CONV_BUF_TYPE sum = 0;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
-      }
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
-      if (conv_params->do_average)
-        dst[y * dst_stride + x] += res;
-      else
-        dst[y * dst_stride + x] = res;
-    }
-  }
-}
-
-void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
-                                    CONV_BUF_TYPE *dst, int dst_stride, int w,
-                                    int h, InterpFilterParams *filter_params_x,
-                                    InterpFilterParams *filter_params_y,
-                                    const int subpel_x_qn, const int x_step_qn,
-                                    const int subpel_y_qn, const int y_step_qn,
-                                    ConvolveParams *conv_params, int bd) {
-  uint16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
-  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
-             filter_params_y->taps;
-  int im_stride = w;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  (void)bd;
-
-  // horizontal filter
-  const uint16_t *src_horiz = src - fo_vert * src_stride;
-  for (int y = 0; y < im_h; ++y) {
-    int x_qn = subpel_x_qn;
-    for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
-      const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
-      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
-      assert(x_filter_idx < SUBPEL_SHIFTS);
-      const int16_t *x_filter =
-          av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
-      int sum = 0;
-      for (int k = 0; k < filter_params_x->taps; ++k)
-        sum += x_filter[k] * src_x[k - fo_horiz];
-      im_block[y * im_stride + x] =
-          clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
-    }
-    src_horiz += src_stride;
-  }
-
-  // vertical filter
-  uint16_t *src_vert = im_block + fo_vert * im_stride;
-  for (int x = 0; x < w; ++x) {
-    int y_qn = subpel_y_qn;
-    for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
-      const uint16_t *const src_y =
-          &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
-      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
-      assert(y_filter_idx < SUBPEL_SHIFTS);
-      const int16_t *y_filter =
-          av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
-      CONV_BUF_TYPE sum = 0;
-      for (int k = 0; k < filter_params_y->taps; ++k) {
-        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
-      }
-      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
-      if (conv_params->do_average)
-        dst[y * dst_stride + x] += res;
-      else
-        dst[y * dst_stride + x] = res;
-    }
-    src_vert++;
-  }
-}
-
-#else
-
 void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
                               CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                               InterpFilterParams *filter_params_x,
@@ -1253,7 +946,6 @@
     src_vert++;
   }
 }
-#endif  // CONFIG_COMPOUND_ROUND
 
 void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
                                    uint8_t *dst, int dst_stride, int w, int h,
@@ -1318,8 +1010,6 @@
 }
 #endif  // CONFIG_HIGHBITDEPTH
 
-#endif  // CONFIG_CONVOLVE_ROUND
-
 typedef void (*ConvolveFunc)(const uint8_t *src, int src_stride, uint8_t *dst,
                              int dst_stride, int w, int h,
                              const InterpFilterParams filter_params,

diff --git a/av1/common/convolve.h b/av1/common/convolve.h
index 607532b..8803ffa 100644
--- a/av1/common/convolve.h
+++ b/av1/common/convolve.h

@@ -99,7 +99,6 @@
 struct AV1Common;
 void av1_convolve_init(struct AV1Common *cm);
 
-#if CONFIG_CONVOLVE_ROUND
 void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                             int dst_stride, int w, int h,
                             InterpFilters interp_filters, const int subpel_x_q4,
@@ -113,11 +112,7 @@
   conv_params.ref = ref;
   conv_params.do_average = do_average;
   conv_params.round = CONVOLVE_OPT_NO_ROUND;
-#if CONFIG_COMPOUND_ROUND
-  conv_params.round_0 = FILTER_BITS;
-#else
   conv_params.round_0 = 5;
-#endif
   conv_params.round_1 = 0;
   conv_params.dst = dst;
   conv_params.dst_stride = dst_stride;
@@ -135,7 +130,6 @@
                                    int scaled, ConvolveParams *conv_params,
                                    int bd);
 #endif
-#endif  // CONFIG_CONVOLVE_ROUND
 
 void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                   int dst_stride, int w, int h, InterpFilters interp_filters,

diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index 1e3ccaa..c3737ef 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c

@@ -403,7 +403,6 @@
 #elif COMPOUND_SEGMENT_TYPE == 1
 #define DIFF_FACTOR 16
 
-#if CONFIG_CONVOLVE_ROUND
 static void diffwtd_mask_d32(uint8_t *mask, int which_inverse, int mask_base,
                              const int32_t *src0, int src0_stride,
                              const int32_t *src1, int src1_stride,
@@ -441,7 +440,6 @@
     default: assert(0);
   }
 }
-#endif  // CONFIG_CONVOLVE_ROUND
 
 static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
                          const uint8_t *src0, int src0_stride,
@@ -691,7 +689,6 @@
   init_wedge_masks();
 }
 
-#if CONFIG_CONVOLVE_ROUND
 static void build_masked_compound_no_round(
     CONV_BUF_TYPE *dst, int dst_stride, const CONV_BUF_TYPE *src0,
     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride,
@@ -705,7 +702,7 @@
   aom_blend_a64_d32_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
                          mask, block_size_wide[sb_type], h, w, subh, subw);
 }
-#endif  // CONFIG_CONVOLVE_ROUND
+
 static void build_masked_compound(
     uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
     const uint8_t *src1, int src1_stride,
@@ -756,17 +753,12 @@
 // a temporary buffer, then will blend that temporary buffer with that from
 // the other reference.
 //
-// With CONFIG_CONVOLVE_ROUND, if the rounding mode is CONVOLVE_OPT_NO_ROUND
+// If the rounding mode is CONVOLVE_OPT_NO_ROUND
 // then the predictions are at 32-bits, so we'll need 32 bits per
 // pixel. Otherwise, we'll need up to 16 bits per pixel if
 // CONFIG_HIGHBITDEPTH or just 8 otherwise.
-#if CONFIG_CONVOLVE_ROUND
 #define INTER_PRED_BYTES_PER_PIXEL 4
-#elif CONFIG_HIGHBITDEPTH
-#define INTER_PRED_BYTES_PER_PIXEL 2
-#else
-#define INTER_PRED_BYTES_PER_PIXEL 1
-#endif
+
   DECLARE_ALIGNED(16, uint8_t,
                   tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
 #undef INTER_PRED_BYTES_PER_PIXEL
@@ -779,7 +771,6 @@
   uint8_t *tmp_dst = tmp_buf;
 #endif
 
-#if CONFIG_CONVOLVE_ROUND
   const int tmp_buf_stride = MAX_SB_SIZE;
   const int is_conv_no_round = conv_params->round == CONVOLVE_OPT_NO_ROUND;
   CONV_BUF_TYPE *org_dst = conv_params->dst;
@@ -790,7 +781,6 @@
     conv_params->dst_stride = tmp_buf_stride;
     assert(conv_params->do_average == 0);
   }
-#endif  // CONFIG_CONVOLVE_ROUND
 
   // This will generate a prediction in tmp_buf for the second reference
   av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
@@ -799,14 +789,12 @@
                            xd);
 
   if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
-#if CONFIG_CONVOLVE_ROUND
     if (is_conv_no_round) {
       build_compound_seg_mask_d32(comp_data.seg_mask, comp_data.mask_type,
                                   org_dst, org_dst_stride, tmp_buf32,
                                   tmp_buf_stride, mi->mbmi.sb_type, h, w,
                                   conv_params, xd->bd);
     } else {
-#endif  // CONFIG_CONVOLVE_ROUND
 #if CONFIG_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type,
@@ -820,12 +808,9 @@
 #if CONFIG_HIGHBITDEPTH
       }
 #endif
-#if CONFIG_CONVOLVE_ROUND
     }
-#endif
   }
 
-#if CONFIG_CONVOLVE_ROUND
   if (is_conv_no_round) {
     build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
                                    org_dst_stride, tmp_buf32, tmp_buf_stride,
@@ -844,8 +829,6 @@
 
     conv_params->do_post_rounding = 0;
   } else {
-#endif  // CONFIG_CONVOLVE_ROUND
-
 #if CONFIG_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
       build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst,
@@ -855,9 +838,7 @@
 #endif  // CONFIG_HIGHBITDEPTH
       build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
                             MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
-#if CONFIG_CONVOLVE_ROUND
   }
-#endif  // CONFIG_CONVOLVE_ROUND
 }
 
 // TODO(sarahparker) av1_highbd_build_inter_predictor and
@@ -1038,17 +1019,11 @@
       for (idx = 0; idx < b8_w; idx += b4_w) {
         MB_MODE_INFO *this_mbmi = &xd->mi[row * xd->mi_stride + col]->mbmi;
         is_compound = has_second_ref(this_mbmi);
-#if CONFIG_CONVOLVE_ROUND
         DECLARE_ALIGNED(16, int32_t, tmp_dst[8 * 8]);
         int tmp_dst_stride = 8;
         assert(w <= 8 && h <= 8);
-#endif  // CONFIG_CONVOLVE_ROUND
-#if CONFIG_CONVOLVE_ROUND
         ConvolveParams conv_params =
             get_conv_params_no_round(0, 0, plane, tmp_dst, tmp_dst_stride);
-#else
-        ConvolveParams conv_params = get_conv_params(0, 0, plane);
-#endif
 #if CONFIG_JNT_COMP
         conv_params.use_jnt_comp_avg = 0;
 #endif  // CONFIG_JNT_COMP
@@ -1153,7 +1128,6 @@
                 (mi_y >> pd->subsampling_y) + y, plane, ref, mi, build_for_obmc,
                 xs, ys, xd);
         }  // for (ref = 0; ref < 1 + is_compound; ++ref)
-#if CONFIG_CONVOLVE_ROUND
         if (conv_params.do_post_rounding) {
 #if CONFIG_HIGHBITDEPTH
           if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
@@ -1169,7 +1143,6 @@
                 FILTER_BITS * 2 + is_compound - conv_params.round_0 -
                     conv_params.round_1);
         }
-#endif  // CONFIG_CONVOLVE_ROUND
         ++col;
       }
       ++row;
@@ -1184,9 +1157,7 @@
     uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
     uint8_t *pre[2];
     SubpelParams subpel_params[2];
-#if CONFIG_CONVOLVE_ROUND
     DECLARE_ALIGNED(16, int32_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]);
-#endif  // CONFIG_CONVOLVE_ROUND
 
     for (ref = 0; ref < 1 + is_compound; ++ref) {
 #if CONFIG_INTRABC
@@ -1251,7 +1222,6 @@
       }
     }
 
-#if CONFIG_CONVOLVE_ROUND
     ConvolveParams conv_params =
         get_conv_params_no_round(ref, ref, plane, tmp_dst, MAX_SB_SIZE);
 #if CONFIG_JNT_COMP
@@ -1260,10 +1230,6 @@
                                &conv_params.use_jnt_comp_avg, is_compound);
 #endif  // CONFIG_JNT_COMP
 
-#else
-    ConvolveParams conv_params = get_conv_params(ref, ref, plane);
-#endif  // CONFIG_CONVOLVE_ROUND
-
     for (ref = 0; ref < 1 + is_compound; ++ref) {
 #if CONFIG_INTRABC
       const struct scale_factors *const sf =
@@ -1301,7 +1267,6 @@
             subpel_params[ref].ys, xd);
     }
 
-#if CONFIG_CONVOLVE_ROUND
     // TODO(angiebird): This part needs optimization
     if (conv_params.do_post_rounding) {
 #if CONFIG_HIGHBITDEPTH
@@ -1317,7 +1282,6 @@
                               FILTER_BITS * 2 + is_compound -
                                   conv_params.round_0 - conv_params.round_1);
     }
-#endif  // CONFIG_CONVOLVE_ROUND
   }
 }
 

diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 7c2883d..1790086 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h

@@ -55,14 +55,10 @@
     // TODO(afergs, debargha): Use a different scale convolve function
     // that uses higher precision for subpel_x, subpel_y, xs, ys
     if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-#if CONFIG_CONVOLVE_ROUND
       av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
                              interp_filters, subpel_x, xs, subpel_y, ys, 1,
                              conv_params);
       conv_params->do_post_rounding = 1;
-#else
-      assert(0);
-#endif  // CONFIG_CONVOLVE_ROUND
     } else {
       assert(conv_params->round == CONVOLVE_OPT_ROUND);
       av1_convolve_scale(src, src_stride, dst, dst_stride, w, h, interp_filters,
@@ -78,14 +74,10 @@
     assert(xs <= SUBPEL_SHIFTS);
     assert(ys <= SUBPEL_SHIFTS);
     if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-#if CONFIG_CONVOLVE_ROUND
       av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
                              interp_filters, subpel_x, xs, subpel_y, ys, 0,
                              conv_params);
       conv_params->do_post_rounding = 1;
-#else
-      assert(0);
-#endif  // CONFIG_CONVOLVE_ROUND
     } else {
       assert(conv_params->round == CONVOLVE_OPT_ROUND);
 
@@ -131,14 +123,10 @@
 
   if (has_scale(xs, ys)) {
     if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-#if CONFIG_CONVOLVE_ROUND
       av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
                                     interp_filters, subpel_x, xs, subpel_y, ys,
                                     1, conv_params, bd);
       conv_params->do_post_rounding = 1;
-#else
-      assert(0);
-#endif  // CONFIG_CONVOLVE_ROUND
     } else {
       av1_highbd_convolve_scale(src, src_stride, dst, dst_stride, w, h,
                                 interp_filters, subpel_x, xs, subpel_y, ys, avg,
@@ -154,14 +142,10 @@
     assert(xs <= SUBPEL_SHIFTS);
     assert(ys <= SUBPEL_SHIFTS);
     if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-#if CONFIG_CONVOLVE_ROUND
       av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
                                     interp_filters, subpel_x, xs, subpel_y, ys,
                                     0, conv_params, bd);
       conv_params->do_post_rounding = 1;
-#else
-      assert(0);
-#endif  // CONFIG_CONVOLVE_ROUND
     } else {
       InterpFilterParams filter_params_x, filter_params_y;
 #if CONFIG_SHORT_FILTER

diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 1a9c6dc..962f140 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c

@@ -427,7 +427,6 @@
                               int16_t beta, int16_t gamma, int16_t delta) {
   int32_t tmp[15 * 8];
   int i, j, k, l, m;
-#if CONFIG_CONVOLVE_ROUND
   const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
   const int reduce_bits_horiz =
       use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
@@ -445,14 +444,6 @@
     conv_params->do_post_rounding = 1;
   }
   assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
-#else
-  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
-  const int max_bits_horiz =
-      bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
-  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
-  const int offset_bits_vert =
-      bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
-#endif
   (void)max_bits_horiz;
 
   for (i = p_row; i < p_row + p_height; i += 8) {
@@ -524,7 +515,7 @@
           for (m = 0; m < 8; ++m) {
             sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
           }
-#if CONFIG_CONVOLVE_ROUND
+
           if (use_conv_params) {
             CONV_BUF_TYPE *p =
                 &conv_params
@@ -555,9 +546,6 @@
               *p = sum;
 #endif  // CONFIG_JNT_COMP
           } else {
-#else
-          {
-#endif
             uint16_t *p =
                 &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
             sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS);
@@ -748,7 +736,6 @@
   int32_t tmp[15 * 8];
   int i, j, k, l, m;
   const int bd = 8;
-#if CONFIG_CONVOLVE_ROUND
   const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
   const int reduce_bits_horiz =
       use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
@@ -766,14 +753,6 @@
     conv_params->do_post_rounding = 1;
   }
   assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
-#else
-  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
-  const int max_bits_horiz =
-      bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
-  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
-  const int offset_bits_vert =
-      bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
-#endif
   (void)max_bits_horiz;
 
   for (i = p_row; i < p_row + p_height; i += 8) {
@@ -851,7 +830,7 @@
           for (m = 0; m < 8; ++m) {
             sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
           }
-#if CONFIG_CONVOLVE_ROUND
+
           if (use_conv_params) {
             CONV_BUF_TYPE *p =
                 &conv_params
@@ -882,9 +861,6 @@
               *p = sum;
 #endif  // CONFIG_JNT_COMP
           } else {
-#else
-          {
-#endif
             uint8_t *p =
                 &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
             sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS);

diff --git a/av1/common/x86/convolve_2d_avx2.c b/av1/common/x86/convolve_2d_avx2.c
index ff8ade8..3c1a24d 100644
--- a/av1/common/x86/convolve_2d_avx2.c
+++ b/av1/common/x86/convolve_2d_avx2.c

@@ -17,239 +17,6 @@
 #include "aom_dsp/aom_filter.h"
 #include "av1/common/convolve.h"
 
-#if CONFIG_COMPOUND_ROUND
-void av1_convolve_2d_avx2(const uint8_t *src, int src_stride,
-                          CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
-                          InterpFilterParams *filter_params_x,
-                          InterpFilterParams *filter_params_y,
-                          const int subpel_x_q4, const int subpel_y_q4,
-                          ConvolveParams *conv_params) {
-  DECLARE_ALIGNED(32, uint8_t,
-                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = MAX_SB_SIZE;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const int do_average = conv_params->do_average;
-  const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
-  const __m256i zero = _mm256_setzero_si256();
-
-  /* Horizontal filter */
-  {
-    const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-    const __m128i coeffs_x8 = _mm_loadu_si128((__m128i *)x_filter);
-    // since not all compilers yet support _mm256_set_m128i()
-    const __m256i coeffs_x = _mm256_insertf128_si256(
-        _mm256_castsi128_si256(coeffs_x8), coeffs_x8, 1);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m256i tmp_0 = _mm256_unpacklo_epi32(coeffs_x, coeffs_x);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m256i tmp_1 = _mm256_unpackhi_epi32(coeffs_x, coeffs_x);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m256i coeff_01 = _mm256_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m256i coeff_23 = _mm256_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m256i coeff_45 = _mm256_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m256i coeff_67 = _mm256_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m256i round_const =
-        _mm256_set1_epi32((1 << conv_params->round_0) >> 1);
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
-    for (i = 0; i < im_h; ++i) {
-      for (j = 0; j < w; j += 16) {
-        const __m256i data = _mm256_permute4x64_epi64(
-            _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]),
-            _MM_SHUFFLE(2, 1, 1, 0));
-
-        // Filter even-index pixels
-        const __m256i src_0 = _mm256_unpacklo_epi8(data, zero);
-        const __m256i res_0 = _mm256_madd_epi16(src_0, coeff_01);
-        const __m256i src_2 =
-            _mm256_unpacklo_epi8(_mm256_srli_si256(data, 2), zero);
-        const __m256i res_2 = _mm256_madd_epi16(src_2, coeff_23);
-        const __m256i src_4 =
-            _mm256_unpacklo_epi8(_mm256_srli_si256(data, 4), zero);
-        const __m256i res_4 = _mm256_madd_epi16(src_4, coeff_45);
-        const __m256i src_6 =
-            _mm256_unpacklo_epi8(_mm256_srli_si256(data, 6), zero);
-        const __m256i res_6 = _mm256_madd_epi16(src_6, coeff_67);
-
-        __m256i res_even = _mm256_add_epi32(_mm256_add_epi32(res_0, res_4),
-                                            _mm256_add_epi32(res_2, res_6));
-        res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const),
-                                    round_shift);
-
-        // Filter odd-index pixels
-        const __m256i src_1 =
-            _mm256_unpacklo_epi8(_mm256_srli_si256(data, 1), zero);
-        const __m256i res_1 = _mm256_madd_epi16(src_1, coeff_01);
-        const __m256i src_3 =
-            _mm256_unpacklo_epi8(_mm256_srli_si256(data, 3), zero);
-        const __m256i res_3 = _mm256_madd_epi16(src_3, coeff_23);
-        const __m256i src_5 =
-            _mm256_unpacklo_epi8(_mm256_srli_si256(data, 5), zero);
-        const __m256i res_5 = _mm256_madd_epi16(src_5, coeff_45);
-        const __m256i src_7 =
-            _mm256_unpacklo_epi8(_mm256_srli_si256(data, 7), zero);
-        const __m256i res_7 = _mm256_madd_epi16(src_7, coeff_67);
-
-        __m256i res_odd = _mm256_add_epi32(_mm256_add_epi32(res_1, res_5),
-                                           _mm256_add_epi32(res_3, res_7));
-        res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const),
-                                   round_shift);
-
-        // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
-        __m256i res = _mm256_packs_epi32(res_even, res_odd);
-        res = _mm256_packus_epi16(res, res);
-        _mm_storel_epi64((__m128i *)&im_block[i * im_stride + j],
-                         _mm256_extractf128_si256(res, 0));
-        _mm_storel_epi64((__m128i *)&im_block[i * im_stride + j + 8],
-                         _mm256_extractf128_si256(res, 1));
-      }
-    }
-  }
-
-  /* Vertical filter */
-  {
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-    const __m128i coeffs_y8 = _mm_loadu_si128((__m128i *)y_filter);
-    const __m256i coeffs_y = _mm256_insertf128_si256(
-        _mm256_castsi128_si256(coeffs_y8), coeffs_y8, 1);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m256i tmp_0 = _mm256_unpacklo_epi32(coeffs_y, coeffs_y);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m256i tmp_1 = _mm256_unpackhi_epi32(coeffs_y, coeffs_y);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m256i coeff_01 = _mm256_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m256i coeff_23 = _mm256_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m256i coeff_45 = _mm256_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m256i coeff_67 = _mm256_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m256i round_const =
-        _mm256_set1_epi32((1 << conv_params->round_1) >> 1);
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 16) {
-        // Filter even-index pixels
-        const uint8_t *data = &im_block[i * im_stride + j];
-        const __m256i src_01 = _mm256_unpacklo_epi8(
-            _mm256_inserti128_si256(
-                _mm256_castsi128_si256(
-                    _mm_loadl_epi64((__m128i *)(data + 0 * im_stride))),
-                _mm_loadl_epi64((__m128i *)(data + 0 * im_stride + 8)), 1),
-            _mm256_inserti128_si256(
-                _mm256_castsi128_si256(
-                    _mm_loadl_epi64((__m128i *)(data + 1 * im_stride))),
-                _mm_loadl_epi64((__m128i *)(data + 1 * im_stride + 8)), 1));
-        const __m256i src_23 = _mm256_unpacklo_epi8(
-            _mm256_inserti128_si256(
-                _mm256_castsi128_si256(
-                    _mm_loadl_epi64((__m128i *)(data + 2 * im_stride))),
-                _mm_loadl_epi64((__m128i *)(data + 2 * im_stride + 8)), 1),
-            _mm256_inserti128_si256(
-                _mm256_castsi128_si256(
-                    _mm_loadl_epi64((__m128i *)(data + 3 * im_stride))),
-                _mm_loadl_epi64((__m128i *)(data + 3 * im_stride + 8)), 1));
-        const __m256i src_45 = _mm256_unpacklo_epi8(
-            _mm256_inserti128_si256(
-                _mm256_castsi128_si256(
-                    _mm_loadl_epi64((__m128i *)(data + 4 * im_stride))),
-                _mm_loadl_epi64((__m128i *)(data + 4 * im_stride + 8)), 1),
-            _mm256_inserti128_si256(
-                _mm256_castsi128_si256(
-                    _mm_loadl_epi64((__m128i *)(data + 5 * im_stride))),
-                _mm_loadl_epi64((__m128i *)(data + 5 * im_stride + 8)), 1));
-        const __m256i src_67 = _mm256_unpacklo_epi8(
-            _mm256_inserti128_si256(
-                _mm256_castsi128_si256(
-                    _mm_loadl_epi64((__m128i *)(data + 6 * im_stride))),
-                _mm_loadl_epi64((__m128i *)(data + 6 * im_stride + 8)), 1),
-            _mm256_inserti128_si256(
-                _mm256_castsi128_si256(
-                    _mm_loadl_epi64((__m128i *)(data + 7 * im_stride))),
-                _mm_loadl_epi64((__m128i *)(data + 7 * im_stride + 8)), 1));
-
-        const __m256i src_0 = _mm256_unpacklo_epi8(src_01, zero);
-        const __m256i src_2 = _mm256_unpacklo_epi8(src_23, zero);
-        const __m256i src_4 = _mm256_unpacklo_epi8(src_45, zero);
-        const __m256i src_6 = _mm256_unpacklo_epi8(src_67, zero);
-
-        const __m256i res_0 = _mm256_madd_epi16(src_0, coeff_01);
-        const __m256i res_2 = _mm256_madd_epi16(src_2, coeff_23);
-        const __m256i res_4 = _mm256_madd_epi16(src_4, coeff_45);
-        const __m256i res_6 = _mm256_madd_epi16(src_6, coeff_67);
-
-        const __m256i res_even = _mm256_add_epi32(
-            _mm256_add_epi32(res_0, res_2), _mm256_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m256i src_1 = _mm256_unpackhi_epi8(src_01, zero);
-        const __m256i src_3 = _mm256_unpackhi_epi8(src_23, zero);
-        const __m256i src_5 = _mm256_unpackhi_epi8(src_45, zero);
-        const __m256i src_7 = _mm256_unpackhi_epi8(src_67, zero);
-
-        const __m256i res_1 = _mm256_madd_epi16(src_1, coeff_01);
-        const __m256i res_3 = _mm256_madd_epi16(src_3, coeff_23);
-        const __m256i res_5 = _mm256_madd_epi16(src_5, coeff_45);
-        const __m256i res_7 = _mm256_madd_epi16(src_7, coeff_67);
-
-        const __m256i res_odd = _mm256_add_epi32(
-            _mm256_add_epi32(res_1, res_3), _mm256_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m256i res_lo = _mm256_unpacklo_epi32(res_even, res_odd);
-        const __m256i res_hi = _mm256_unpackhi_epi32(res_even, res_odd);
-
-        const __m256i res_lo_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_lo, round_const), round_shift);
-        const __m256i res_hi_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_hi, round_const), round_shift);
-
-        // Accumulate values into the destination buffer
-        __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        if (do_average) {
-          _mm_storeu_si128(
-              p + 0, _mm_add_epi32(_mm_loadu_si128(p + 0),
-                                   _mm256_extractf128_si256(res_lo_round, 0)));
-          _mm_storeu_si128(
-              p + 1, _mm_add_epi32(_mm_loadu_si128(p + 1),
-                                   _mm256_extractf128_si256(res_hi_round, 0)));
-          if (w - j > 8) {
-            _mm_storeu_si128(p + 2, _mm_add_epi32(_mm_loadu_si128(p + 2),
-                                                  _mm256_extractf128_si256(
-                                                      res_lo_round, 1)));
-            _mm_storeu_si128(p + 3, _mm_add_epi32(_mm_loadu_si128(p + 3),
-                                                  _mm256_extractf128_si256(
-                                                      res_hi_round, 1)));
-          }
-        } else {
-          _mm_storeu_si128(p + 0, _mm256_extractf128_si256(res_lo_round, 0));
-          _mm_storeu_si128(p + 1, _mm256_extractf128_si256(res_hi_round, 0));
-          if (w - j > 8) {
-            _mm_storeu_si128(p + 2, _mm256_extractf128_si256(res_lo_round, 1));
-            _mm_storeu_si128(p + 3, _mm256_extractf128_si256(res_hi_round, 1));
-          }
-        }
-      }
-    }
-  }
-}
-#else
 void av1_convolve_2d_avx2(const uint8_t *src, int src_stride,
                           CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                           InterpFilterParams *filter_params_x,
@@ -459,4 +226,3 @@
     }
   }
 }
-#endif

diff --git a/av1/common/x86/convolve_2d_sse2.c b/av1/common/x86/convolve_2d_sse2.c
index 13275b6..34b7dc7 100644
--- a/av1/common/x86/convolve_2d_sse2.c
+++ b/av1/common/x86/convolve_2d_sse2.c

@@ -17,185 +17,6 @@
 #include "aom_dsp/aom_filter.h"
 #include "av1/common/convolve.h"
 
-#if CONFIG_COMPOUND_ROUND
-void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
-                          CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
-                          InterpFilterParams *filter_params_x,
-                          InterpFilterParams *filter_params_y,
-                          const int subpel_x_q4, const int subpel_y_q4,
-                          ConvolveParams *conv_params) {
-  DECLARE_ALIGNED(16, uint8_t,
-                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = MAX_SB_SIZE;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const int do_average = conv_params->do_average;
-  const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
-  const __m128i zero = _mm_setzero_si128();
-
-  /* Horizontal filter */
-  {
-    const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-    const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const =
-        _mm_set1_epi32((1 << conv_params->round_0) >> 1);
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
-    for (i = 0; i < im_h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i data =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-
-        // Filter even-index pixels
-        const __m128i src_0 = _mm_unpacklo_epi8(data, zero);
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
-        const __m128i src_2 = _mm_unpacklo_epi8(_mm_srli_si128(data, 2), zero);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(data, 4), zero);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i src_6 = _mm_unpacklo_epi8(_mm_srli_si128(data, 6), zero);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
-                                         _mm_add_epi32(res_2, res_6));
-        res_even =
-            _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
-        // Filter odd-index pixels
-        const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero);
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i src_3 = _mm_unpacklo_epi8(_mm_srli_si128(data, 3), zero);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i src_5 = _mm_unpacklo_epi8(_mm_srli_si128(data, 5), zero);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i src_7 = _mm_unpacklo_epi8(_mm_srli_si128(data, 7), zero);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
-                                        _mm_add_epi32(res_3, res_7));
-        res_odd =
-            _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
-        // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
-        __m128i res = _mm_packs_epi32(res_even, res_odd);
-        res = _mm_packus_epi16(res, res);
-        _mm_storel_epi64((__m128i *)&im_block[i * im_stride + j], res);
-      }
-    }
-  }
-
-  /* Vertical filter */
-  {
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-    const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const =
-        _mm_set1_epi32((1 << conv_params->round_1) >> 1);
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        // Filter even-index pixels
-        const uint8_t *data = &im_block[i * im_stride + j];
-        const __m128i src_01 = _mm_unpacklo_epi8(
-            _mm_loadl_epi64((__m128i *)(data + 0 * im_stride)),
-            _mm_loadl_epi64((__m128i *)(data + 1 * im_stride)));
-        const __m128i src_23 = _mm_unpacklo_epi8(
-            _mm_loadl_epi64((__m128i *)(data + 2 * im_stride)),
-            _mm_loadl_epi64((__m128i *)(data + 3 * im_stride)));
-        const __m128i src_45 = _mm_unpacklo_epi8(
-            _mm_loadl_epi64((__m128i *)(data + 4 * im_stride)),
-            _mm_loadl_epi64((__m128i *)(data + 5 * im_stride)));
-        const __m128i src_67 = _mm_unpacklo_epi8(
-            _mm_loadl_epi64((__m128i *)(data + 6 * im_stride)),
-            _mm_loadl_epi64((__m128i *)(data + 7 * im_stride)));
-
-        const __m128i src_0 = _mm_unpacklo_epi8(src_01, zero);
-        const __m128i src_2 = _mm_unpacklo_epi8(src_23, zero);
-        const __m128i src_4 = _mm_unpacklo_epi8(src_45, zero);
-        const __m128i src_6 = _mm_unpacklo_epi8(src_67, zero);
-
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
-                                               _mm_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m128i src_1 = _mm_unpackhi_epi8(src_01, zero);
-        const __m128i src_3 = _mm_unpackhi_epi8(src_23, zero);
-        const __m128i src_5 = _mm_unpackhi_epi8(src_45, zero);
-        const __m128i src_7 = _mm_unpackhi_epi8(src_67, zero);
-
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
-                                              _mm_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        const __m128i res_lo_round =
-            _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-        const __m128i res_hi_round =
-            _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
-        // Accumulate values into the destination buffer
-        __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        if (do_average) {
-          _mm_storeu_si128(p + 0,
-                           _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
-          _mm_storeu_si128(p + 1,
-                           _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
-        } else {
-          _mm_storeu_si128(p + 0, res_lo_round);
-          _mm_storeu_si128(p + 1, res_hi_round);
-        }
-      }
-    }
-  }
-}
-#else
 void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
                           CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                           InterpFilterParams *filter_params_x,
@@ -698,4 +519,3 @@
   }
 }
 #endif  // CONFIG_JNT_COMP
-#endif  // CONFIG_COMPOUND_ROUND

diff --git a/av1/common/x86/convolve_2d_sse4.c b/av1/common/x86/convolve_2d_sse4.c
index 71c32e7..ea0811a 100644
--- a/av1/common/x86/convolve_2d_sse4.c
+++ b/av1/common/x86/convolve_2d_sse4.c

@@ -19,221 +19,6 @@
 #include "av1/common/convolve.h"
 
 #if CONFIG_JNT_COMP
-#if CONFIG_COMPOUND_ROUND
-void av1_jnt_convolve_2d_sse4_1(const uint8_t *src, int src_stride,
-                                CONV_BUF_TYPE *dst, int dst_stride, int w,
-                                int h, InterpFilterParams *filter_params_x,
-                                InterpFilterParams *filter_params_y,
-                                const int subpel_x_q4, const int subpel_y_q4,
-                                ConvolveParams *conv_params) {
-  DECLARE_ALIGNED(16, uint8_t,
-                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = MAX_SB_SIZE;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const int do_average = conv_params->do_average;
-  const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
-  const __m128i zero = _mm_setzero_si128();
-
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m128i wt0 = _mm_set_epi32(w0, w0, w0, w0);
-  const __m128i wt1 = _mm_set_epi32(w1, w1, w1, w1);
-  const int jnt_round_const = 1 << (DIST_PRECISION_BITS - 2);
-  const __m128i jnt_r = _mm_set_epi32(jnt_round_const, jnt_round_const,
-                                      jnt_round_const, jnt_round_const);
-
-  /* Horizontal filter */
-  {
-    const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-    const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const =
-        _mm_set1_epi32((1 << conv_params->round_0) >> 1);
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
-    for (i = 0; i < im_h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i data =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-
-        // Filter even-index pixels
-        const __m128i src_0 = _mm_unpacklo_epi8(data, zero);
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
-        const __m128i src_2 = _mm_unpacklo_epi8(_mm_srli_si128(data, 2), zero);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i src_4 = _mm_unpacklo_epi8(_mm_srli_si128(data, 4), zero);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i src_6 = _mm_unpacklo_epi8(_mm_srli_si128(data, 6), zero);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
-                                         _mm_add_epi32(res_2, res_6));
-        res_even =
-            _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
-        // Filter odd-index pixels
-        const __m128i src_1 = _mm_unpacklo_epi8(_mm_srli_si128(data, 1), zero);
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i src_3 = _mm_unpacklo_epi8(_mm_srli_si128(data, 3), zero);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i src_5 = _mm_unpacklo_epi8(_mm_srli_si128(data, 5), zero);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i src_7 = _mm_unpacklo_epi8(_mm_srli_si128(data, 7), zero);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
-                                        _mm_add_epi32(res_3, res_7));
-        res_odd =
-            _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
-        // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
-        __m128i res = _mm_packs_epi32(res_even, res_odd);
-        res = _mm_packus_epi16(res, res);
-        _mm_storel_epi64((__m128i *)&im_block[i * im_stride + j], res);
-      }
-    }
-  }
-
-  /* Vertical filter */
-  {
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-    const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const =
-        _mm_set1_epi32((1 << conv_params->round_1) >> 1);
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        // Filter even-index pixels
-        const uint8_t *data = &im_block[i * im_stride + j];
-        const __m128i src_01 = _mm_unpacklo_epi8(
-            _mm_loadl_epi64((__m128i *)(data + 0 * im_stride)),
-            _mm_loadl_epi64((__m128i *)(data + 1 * im_stride)));
-        const __m128i src_23 = _mm_unpacklo_epi8(
-            _mm_loadl_epi64((__m128i *)(data + 2 * im_stride)),
-            _mm_loadl_epi64((__m128i *)(data + 3 * im_stride)));
-        const __m128i src_45 = _mm_unpacklo_epi8(
-            _mm_loadl_epi64((__m128i *)(data + 4 * im_stride)),
-            _mm_loadl_epi64((__m128i *)(data + 5 * im_stride)));
-        const __m128i src_67 = _mm_unpacklo_epi8(
-            _mm_loadl_epi64((__m128i *)(data + 6 * im_stride)),
-            _mm_loadl_epi64((__m128i *)(data + 7 * im_stride)));
-
-        const __m128i src_0 = _mm_unpacklo_epi8(src_01, zero);
-        const __m128i src_2 = _mm_unpacklo_epi8(src_23, zero);
-        const __m128i src_4 = _mm_unpacklo_epi8(src_45, zero);
-        const __m128i src_6 = _mm_unpacklo_epi8(src_67, zero);
-
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
-                                               _mm_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m128i src_1 = _mm_unpackhi_epi8(src_01, zero);
-        const __m128i src_3 = _mm_unpackhi_epi8(src_23, zero);
-        const __m128i src_5 = _mm_unpackhi_epi8(src_45, zero);
-        const __m128i src_7 = _mm_unpackhi_epi8(src_67, zero);
-
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
-                                              _mm_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        const __m128i res_lo_round =
-            _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-        const __m128i res_hi_round =
-            _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
-        if (conv_params->use_jnt_comp_avg) {
-          // NOTE(chengchen):
-          // only this part is different from av1_convolve_2d_sse2
-          // original c function at: av1/common/convolve.c:
-          // av1_convolve_2d_c() and av1_jnt_convolve_2d_c()
-          __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-          if (do_average) {
-            _mm_storeu_si128(
-                p + 0, _mm_srai_epi32(
-                           _mm_add_epi32(_mm_add_epi32(_mm_loadu_si128(p + 0),
-                                                       _mm_mullo_epi32(
-                                                           res_lo_round, wt1)),
-                                         jnt_r),
-                           DIST_PRECISION_BITS - 1));
-
-            _mm_storeu_si128(
-                p + 1, _mm_srai_epi32(
-                           _mm_add_epi32(_mm_add_epi32(_mm_loadu_si128(p + 1),
-                                                       _mm_mullo_epi32(
-                                                           res_hi_round, wt1)),
-                                         jnt_r),
-                           DIST_PRECISION_BITS - 1));
-          } else {
-            _mm_storeu_si128(p + 0, _mm_mullo_epi32(res_lo_round, wt0));
-            _mm_storeu_si128(p + 1, _mm_mullo_epi32(res_hi_round, wt0));
-          }
-        } else {
-          // Accumulate values into the destination buffer
-          __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-          if (do_average) {
-            _mm_storeu_si128(
-                p + 0, _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
-            _mm_storeu_si128(
-                p + 1, _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
-          } else {
-            _mm_storeu_si128(p + 0, res_lo_round);
-            _mm_storeu_si128(p + 1, res_hi_round);
-          }
-        }
-      }
-    }
-  }
-}
-#else   // CONFIG_COMPOUND_ROUND
 void av1_jnt_convolve_2d_sse4_1(const uint8_t *src, int src_stride,
                                 CONV_BUF_TYPE *dst, int dst_stride, int w,
                                 int h, InterpFilterParams *filter_params_x,
@@ -450,5 +235,4 @@
     }
   }
 }
-#endif  // CONFIG_COMPOUND_ROUND
 #endif  // CONFIG_JNT_COMP

diff --git a/av1/common/x86/convolve_avx2.c b/av1/common/x86/convolve_avx2.c
index a0e5871..93e7295 100644
--- a/av1/common/x86/convolve_avx2.c
+++ b/av1/common/x86/convolve_avx2.c

@@ -14,7 +14,6 @@
 #include "aom_dsp/aom_dsp_common.h"
 #include "./av1_rtcd.h"
 
-#if CONFIG_CONVOLVE_ROUND
 static const uint32_t sindex[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
 
 // 16 epi16 pixels
@@ -339,4 +338,3 @@
   }
 }
 #endif  // CONFIG_HIGHBITDEPTH
-#endif  // CONFIG_CONVOLVE_ROUND

diff --git a/av1/common/x86/convolve_sse2.c b/av1/common/x86/convolve_sse2.c
index f137ef0..08ee8c3 100644
--- a/av1/common/x86/convolve_sse2.c
+++ b/av1/common/x86/convolve_sse2.c

@@ -17,7 +17,6 @@
 #include "aom_dsp/aom_filter.h"
 #include "av1/common/convolve.h"
 
-#if !CONFIG_COMPOUND_ROUND
 void av1_convolve_y_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
                          int dst_stride, int w, int h,
                          InterpFilterParams *filter_params_x,
@@ -207,4 +206,3 @@
     }
   }
 }
-#endif

diff --git a/av1/common/x86/highbd_convolve_2d_avx2.c b/av1/common/x86/highbd_convolve_2d_avx2.c
index 7020763..c28c63d 100644
--- a/av1/common/x86/highbd_convolve_2d_avx2.c
+++ b/av1/common/x86/highbd_convolve_2d_avx2.c

@@ -18,227 +18,6 @@
 #include "aom_dsp/aom_filter.h"
 #include "av1/common/convolve.h"
 
-#if CONFIG_COMPOUND_ROUND
-void av1_highbd_convolve_2d_avx2(const uint16_t *src, int src_stride,
-                                 CONV_BUF_TYPE *dst, int dst_stride, int w,
-                                 int h, InterpFilterParams *filter_params_x,
-                                 InterpFilterParams *filter_params_y,
-                                 const int subpel_x_q4, const int subpel_y_q4,
-                                 ConvolveParams *conv_params, int bd) {
-  DECLARE_ALIGNED(32, int16_t,
-                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = MAX_SB_SIZE;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const int do_average = conv_params->do_average;
-  const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
-  /* Horizontal filter */
-  {
-    const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-    const __m128i coeffs_x8 = _mm_loadu_si128((__m128i *)x_filter);
-    // since not all compilers yet support _mm256_set_m128i()
-    const __m256i coeffs_x = _mm256_insertf128_si256(
-        _mm256_castsi128_si256(coeffs_x8), coeffs_x8, 1);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m256i tmp_0 = _mm256_unpacklo_epi32(coeffs_x, coeffs_x);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m256i tmp_1 = _mm256_unpackhi_epi32(coeffs_x, coeffs_x);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m256i coeff_01 = _mm256_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m256i coeff_23 = _mm256_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m256i coeff_45 = _mm256_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m256i coeff_67 = _mm256_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m256i round_const =
-        _mm256_set1_epi32((1 << conv_params->round_0) >> 1);
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
-    for (i = 0; i < im_h; ++i) {
-      for (j = 0; j < w; j += 16) {
-        const __m256i data =
-            _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
-        const __m128i data2_1 =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 16]);
-        const __m256i data2 = _mm256_insertf128_si256(
-            _mm256_castsi128_si256(data2_1), data2_1, 1);
-
-        // Filter even-index pixels
-        const __m256i res_0 = _mm256_madd_epi16(data, coeff_01);
-        const __m256i res_2 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 4),
-            coeff_23);
-        const __m256i res_4 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 8),
-            coeff_45);
-        const __m256i res_6 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 12),
-            coeff_67);
-
-        __m256i res_even = _mm256_add_epi32(_mm256_add_epi32(res_0, res_4),
-                                            _mm256_add_epi32(res_2, res_6));
-        res_even = _mm256_sra_epi32(_mm256_add_epi32(res_even, round_const),
-                                    round_shift);
-
-        // Filter odd-index pixels
-        const __m256i res_1 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 2),
-            coeff_01);
-        const __m256i res_3 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 6),
-            coeff_23);
-        const __m256i res_5 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 10),
-            coeff_45);
-        const __m256i res_7 = _mm256_madd_epi16(
-            _mm256_alignr_epi8(_mm256_permute2x128_si256(data2, data, 0x13),
-                               data, 14),
-            coeff_67);
-
-        __m256i res_odd = _mm256_add_epi32(_mm256_add_epi32(res_1, res_5),
-                                           _mm256_add_epi32(res_3, res_7));
-        res_odd = _mm256_sra_epi32(_mm256_add_epi32(res_odd, round_const),
-                                   round_shift);
-
-        // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
-        const __m256i maxval = _mm256_set1_epi16((1 << bd) - 1);
-        __m256i res = _mm256_packs_epi32(res_even, res_odd);
-        res = _mm256_max_epi16(_mm256_min_epi16(res, maxval),
-                               _mm256_setzero_si256());
-        _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j],
-                         _mm256_extractf128_si256(res, 0));
-        _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j + 8],
-                         _mm256_extractf128_si256(res, 1));
-      }
-    }
-  }
-
-  /* Vertical filter */
-  {
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-    const __m128i coeffs_y8 = _mm_loadu_si128((__m128i *)y_filter);
-    const __m256i coeffs_y = _mm256_insertf128_si256(
-        _mm256_castsi128_si256(coeffs_y8), coeffs_y8, 1);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m256i tmp_0 = _mm256_unpacklo_epi32(coeffs_y, coeffs_y);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m256i tmp_1 = _mm256_unpackhi_epi32(coeffs_y, coeffs_y);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m256i coeff_01 = _mm256_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m256i coeff_23 = _mm256_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m256i coeff_45 = _mm256_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m256i coeff_67 = _mm256_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m256i round_const =
-        _mm256_set1_epi32((1 << conv_params->round_1) >> 1);
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 16) {
-        // Filter even-index pixels
-        const int16_t *data = &im_block[i * im_stride + j];
-        const __m256i src_0 =
-            _mm256_unpacklo_epi16(*(__m256i *)(data + 0 * im_stride),
-                                  *(__m256i *)(data + 1 * im_stride));
-        const __m256i src_2 =
-            _mm256_unpacklo_epi16(*(__m256i *)(data + 2 * im_stride),
-                                  *(__m256i *)(data + 3 * im_stride));
-        const __m256i src_4 =
-            _mm256_unpacklo_epi16(*(__m256i *)(data + 4 * im_stride),
-                                  *(__m256i *)(data + 5 * im_stride));
-        const __m256i src_6 =
-            _mm256_unpacklo_epi16(*(__m256i *)(data + 6 * im_stride),
-                                  *(__m256i *)(data + 7 * im_stride));
-
-        const __m256i res_0 = _mm256_madd_epi16(src_0, coeff_01);
-        const __m256i res_2 = _mm256_madd_epi16(src_2, coeff_23);
-        const __m256i res_4 = _mm256_madd_epi16(src_4, coeff_45);
-        const __m256i res_6 = _mm256_madd_epi16(src_6, coeff_67);
-
-        const __m256i res_even = _mm256_add_epi32(
-            _mm256_add_epi32(res_0, res_2), _mm256_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m256i src_1 =
-            _mm256_unpackhi_epi16(*(__m256i *)(data + 0 * im_stride),
-                                  *(__m256i *)(data + 1 * im_stride));
-        const __m256i src_3 =
-            _mm256_unpackhi_epi16(*(__m256i *)(data + 2 * im_stride),
-                                  *(__m256i *)(data + 3 * im_stride));
-        const __m256i src_5 =
-            _mm256_unpackhi_epi16(*(__m256i *)(data + 4 * im_stride),
-                                  *(__m256i *)(data + 5 * im_stride));
-        const __m256i src_7 =
-            _mm256_unpackhi_epi16(*(__m256i *)(data + 6 * im_stride),
-                                  *(__m256i *)(data + 7 * im_stride));
-
-        const __m256i res_1 = _mm256_madd_epi16(src_1, coeff_01);
-        const __m256i res_3 = _mm256_madd_epi16(src_3, coeff_23);
-        const __m256i res_5 = _mm256_madd_epi16(src_5, coeff_45);
-        const __m256i res_7 = _mm256_madd_epi16(src_7, coeff_67);
-
-        const __m256i res_odd = _mm256_add_epi32(
-            _mm256_add_epi32(res_1, res_3), _mm256_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m256i res_lo = _mm256_unpacklo_epi32(res_even, res_odd);
-        const __m256i res_hi = _mm256_unpackhi_epi32(res_even, res_odd);
-
-        const __m256i res_lo_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_lo, round_const), round_shift);
-        const __m256i res_hi_round = _mm256_sra_epi32(
-            _mm256_add_epi32(res_hi, round_const), round_shift);
-
-        // Accumulate values into the destination buffer
-        __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        if (do_average) {
-          _mm_storeu_si128(
-              p + 0, _mm_add_epi32(_mm_loadu_si128(p + 0),
-                                   _mm256_extractf128_si256(res_lo_round, 0)));
-          _mm_storeu_si128(
-              p + 1, _mm_add_epi32(_mm_loadu_si128(p + 1),
-                                   _mm256_extractf128_si256(res_hi_round, 0)));
-          if (w - j > 8) {
-            _mm_storeu_si128(p + 2, _mm_add_epi32(_mm_loadu_si128(p + 2),
-                                                  _mm256_extractf128_si256(
-                                                      res_lo_round, 1)));
-            _mm_storeu_si128(p + 3, _mm_add_epi32(_mm_loadu_si128(p + 3),
-                                                  _mm256_extractf128_si256(
-                                                      res_hi_round, 1)));
-          }
-        } else {
-          _mm_storeu_si128(p + 0, _mm256_extractf128_si256(res_lo_round, 0));
-          _mm_storeu_si128(p + 1, _mm256_extractf128_si256(res_hi_round, 0));
-          if (w - j > 8) {
-            _mm_storeu_si128(p + 2, _mm256_extractf128_si256(res_lo_round, 1));
-            _mm_storeu_si128(p + 3, _mm256_extractf128_si256(res_hi_round, 1));
-          }
-        }
-      }
-    }
-  }
-}
-#else
 void av1_highbd_convolve_2d_avx2(const uint16_t *src, int src_stride,
                                  CONV_BUF_TYPE *dst, int dst_stride, int w,
                                  int h, InterpFilterParams *filter_params_x,
@@ -458,4 +237,3 @@
     }
   }
 }
-#endif

diff --git a/av1/common/x86/highbd_convolve_2d_ssse3.c b/av1/common/x86/highbd_convolve_2d_ssse3.c
index 195f0f5..95055b0 100644
--- a/av1/common/x86/highbd_convolve_2d_ssse3.c
+++ b/av1/common/x86/highbd_convolve_2d_ssse3.c

@@ -18,188 +18,6 @@
 #include "aom_dsp/aom_filter.h"
 #include "av1/common/convolve.h"
 
-#if CONFIG_COMPOUND_ROUND
-void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
-                                  CONV_BUF_TYPE *dst, int dst_stride, int w,
-                                  int h, InterpFilterParams *filter_params_x,
-                                  InterpFilterParams *filter_params_y,
-                                  const int subpel_x_q4, const int subpel_y_q4,
-                                  ConvolveParams *conv_params, int bd) {
-  DECLARE_ALIGNED(16, int16_t,
-                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
-  int im_h = h + filter_params_y->taps - 1;
-  int im_stride = MAX_SB_SIZE;
-  int i, j;
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const int do_average = conv_params->do_average;
-  const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
-  /* Horizontal filter */
-  {
-    const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
-    const __m128i coeffs_x = _mm_loadu_si128((__m128i *)x_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_x, coeffs_x);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_x, coeffs_x);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const =
-        _mm_set1_epi32((1 << conv_params->round_0) >> 1);
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
-
-    for (i = 0; i < im_h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i data =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j]);
-        const __m128i data2 =
-            _mm_loadu_si128((__m128i *)&src_ptr[i * src_stride + j + 8]);
-
-        // Filter even-index pixels
-        const __m128i res_0 = _mm_madd_epi16(data, coeff_01);
-        const __m128i res_2 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 4), coeff_23);
-        const __m128i res_4 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 8), coeff_45);
-        const __m128i res_6 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 12), coeff_67);
-
-        __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
-                                         _mm_add_epi32(res_2, res_6));
-        res_even =
-            _mm_sra_epi32(_mm_add_epi32(res_even, round_const), round_shift);
-
-        // Filter odd-index pixels
-        const __m128i res_1 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 2), coeff_01);
-        const __m128i res_3 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 6), coeff_23);
-        const __m128i res_5 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 10), coeff_45);
-        const __m128i res_7 =
-            _mm_madd_epi16(_mm_alignr_epi8(data2, data, 14), coeff_67);
-
-        __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
-                                        _mm_add_epi32(res_3, res_7));
-        res_odd =
-            _mm_sra_epi32(_mm_add_epi32(res_odd, round_const), round_shift);
-
-        // Pack in the column order 0, 2, 4, 6, 1, 3, 5, 7
-        const __m128i maxval = _mm_set1_epi16((1 << bd) - 1);
-        __m128i res = _mm_packs_epi32(res_even, res_odd);
-        res = _mm_max_epi16(_mm_min_epi16(res, maxval), _mm_setzero_si128());
-        _mm_storeu_si128((__m128i *)&im_block[i * im_stride + j], res);
-      }
-    }
-  }
-
-  /* Vertical filter */
-  {
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
-    const __m128i coeffs_y = _mm_loadu_si128((__m128i *)y_filter);
-
-    // coeffs 0 1 0 1 2 3 2 3
-    const __m128i tmp_0 = _mm_unpacklo_epi32(coeffs_y, coeffs_y);
-    // coeffs 4 5 4 5 6 7 6 7
-    const __m128i tmp_1 = _mm_unpackhi_epi32(coeffs_y, coeffs_y);
-
-    // coeffs 0 1 0 1 0 1 0 1
-    const __m128i coeff_01 = _mm_unpacklo_epi64(tmp_0, tmp_0);
-    // coeffs 2 3 2 3 2 3 2 3
-    const __m128i coeff_23 = _mm_unpackhi_epi64(tmp_0, tmp_0);
-    // coeffs 4 5 4 5 4 5 4 5
-    const __m128i coeff_45 = _mm_unpacklo_epi64(tmp_1, tmp_1);
-    // coeffs 6 7 6 7 6 7 6 7
-    const __m128i coeff_67 = _mm_unpackhi_epi64(tmp_1, tmp_1);
-
-    const __m128i round_const =
-        _mm_set1_epi32((1 << conv_params->round_1) >> 1);
-    const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_1);
-
-    for (i = 0; i < h; ++i) {
-      for (j = 0; j < w; j += 8) {
-        // Filter even-index pixels
-        const int16_t *data = &im_block[i * im_stride + j];
-        const __m128i src_0 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 0 * im_stride),
-                               *(__m128i *)(data + 1 * im_stride));
-        const __m128i src_2 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 2 * im_stride),
-                               *(__m128i *)(data + 3 * im_stride));
-        const __m128i src_4 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 4 * im_stride),
-                               *(__m128i *)(data + 5 * im_stride));
-        const __m128i src_6 =
-            _mm_unpacklo_epi16(*(__m128i *)(data + 6 * im_stride),
-                               *(__m128i *)(data + 7 * im_stride));
-
-        const __m128i res_0 = _mm_madd_epi16(src_0, coeff_01);
-        const __m128i res_2 = _mm_madd_epi16(src_2, coeff_23);
-        const __m128i res_4 = _mm_madd_epi16(src_4, coeff_45);
-        const __m128i res_6 = _mm_madd_epi16(src_6, coeff_67);
-
-        const __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_2),
-                                               _mm_add_epi32(res_4, res_6));
-
-        // Filter odd-index pixels
-        const __m128i src_1 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 0 * im_stride),
-                               *(__m128i *)(data + 1 * im_stride));
-        const __m128i src_3 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 2 * im_stride),
-                               *(__m128i *)(data + 3 * im_stride));
-        const __m128i src_5 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 4 * im_stride),
-                               *(__m128i *)(data + 5 * im_stride));
-        const __m128i src_7 =
-            _mm_unpackhi_epi16(*(__m128i *)(data + 6 * im_stride),
-                               *(__m128i *)(data + 7 * im_stride));
-
-        const __m128i res_1 = _mm_madd_epi16(src_1, coeff_01);
-        const __m128i res_3 = _mm_madd_epi16(src_3, coeff_23);
-        const __m128i res_5 = _mm_madd_epi16(src_5, coeff_45);
-        const __m128i res_7 = _mm_madd_epi16(src_7, coeff_67);
-
-        const __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_3),
-                                              _mm_add_epi32(res_5, res_7));
-
-        // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        const __m128i res_lo_round =
-            _mm_sra_epi32(_mm_add_epi32(res_lo, round_const), round_shift);
-        const __m128i res_hi_round =
-            _mm_sra_epi32(_mm_add_epi32(res_hi, round_const), round_shift);
-
-        // Accumulate values into the destination buffer
-        __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        if (do_average) {
-          _mm_storeu_si128(p + 0,
-                           _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
-          _mm_storeu_si128(p + 1,
-                           _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
-        } else {
-          _mm_storeu_si128(p + 0, res_lo_round);
-          _mm_storeu_si128(p + 1, res_hi_round);
-        }
-      }
-    }
-  }
-}
-#else
 void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
                                   CONV_BUF_TYPE *dst, int dst_stride, int w,
                                   int h, InterpFilterParams *filter_params_x,
@@ -383,4 +201,3 @@
     }
   }
 }
-#endif

diff --git a/av1/common/x86/highbd_warp_plane_sse4.c b/av1/common/x86/highbd_warp_plane_sse4.c
index 7c358ec..d40a9696 100644
--- a/av1/common/x86/highbd_warp_plane_sse4.c
+++ b/av1/common/x86/highbd_warp_plane_sse4.c

@@ -28,7 +28,6 @@
 #error "HORSHEAR_REDUCE_PREC_BITS < 5 not currently supported by SSSE3 filter"
 #endif
   int i, j, k;
-#if CONFIG_CONVOLVE_ROUND
   const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
   const int reduce_bits_horiz =
       use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
@@ -46,10 +45,6 @@
   const int jnt_round_const = 1 << (DIST_PRECISION_BITS - 2);
   const __m128i jnt_r = _mm_set1_epi32(jnt_round_const);
 #endif  // CONFIG_JNT_COMP
-#else
-  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
-  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
-#endif
 
   /* Note: For this code to work, the left/right frame borders need to be
      extended by at least 13 pixels each. By the time we get here, other
@@ -310,7 +305,6 @@
         __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
         __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
 
-#if CONFIG_CONVOLVE_ROUND
         if (use_conv_params) {
           __m128i *const p =
               (__m128i *)&conv_params
@@ -369,9 +363,6 @@
 #endif
           }
         } else {
-#else
-        {
-#endif
           // Round and pack into 8 bits
           const __m128i round_const =
               _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +

diff --git a/av1/common/x86/highbd_warp_plane_ssse3.c b/av1/common/x86/highbd_warp_plane_ssse3.c
index 71b0ec7..5eedf9a 100644
--- a/av1/common/x86/highbd_warp_plane_ssse3.c
+++ b/av1/common/x86/highbd_warp_plane_ssse3.c

@@ -28,7 +28,6 @@
 #error "HORSHEAR_REDUCE_PREC_BITS < 5 not currently supported by SSSE3 filter"
 #endif
   int i, j, k;
-#if CONFIG_CONVOLVE_ROUND
   const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
   const int reduce_bits_horiz =
       use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
@@ -38,10 +37,6 @@
     conv_params->do_post_rounding = 1;
   }
   assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
-#else
-  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
-  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
-#endif
 
   /* Note: For this code to work, the left/right frame borders need to be
      extended by at least 13 pixels each. By the time we get here, other
@@ -302,7 +297,6 @@
         __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
         __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
 
-#if CONFIG_CONVOLVE_ROUND
         if (use_conv_params) {
           __m128i *const p =
               (__m128i *)&conv_params
@@ -324,9 +318,6 @@
             _mm_storeu_si128(p + 1, res_hi);
           }
         } else {
-#else
-        {
-#endif
           // Round and pack into 8 bits
           const __m128i round_const =
               _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +

diff --git a/av1/common/x86/warp_plane_sse2.c b/av1/common/x86/warp_plane_sse2.c
index d30466a..6505d9a 100644
--- a/av1/common/x86/warp_plane_sse2.c
+++ b/av1/common/x86/warp_plane_sse2.c

@@ -24,7 +24,6 @@
   __m128i tmp[15];
   int i, j, k;
   const int bd = 8;
-#if CONFIG_CONVOLVE_ROUND
   const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
   const int reduce_bits_horiz =
       use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
@@ -34,10 +33,6 @@
     conv_params->do_post_rounding = 1;
   }
   assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
-#else
-  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
-  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
-#endif
 
   /* Note: For this code to work, the left/right frame borders need to be
      extended by at least 13 pixels each. By the time we get here, other
@@ -298,7 +293,6 @@
         __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
         __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
 
-#if CONFIG_CONVOLVE_ROUND
         if (use_conv_params) {
           __m128i *const p =
               (__m128i *)&conv_params
@@ -320,9 +314,6 @@
             _mm_storeu_si128(p + 1, res_hi);
           }
         } else {
-#else
-        {
-#endif
           // Round and pack into 8 bits
           const __m128i round_const =
               _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +

diff --git a/av1/common/x86/warp_plane_sse4.c b/av1/common/x86/warp_plane_sse4.c
index 6385c17..e0d6206 100644
--- a/av1/common/x86/warp_plane_sse4.c
+++ b/av1/common/x86/warp_plane_sse4.c

@@ -25,7 +25,6 @@
   __m128i tmp[15];
   int i, j, k;
   const int bd = 8;
-#if CONFIG_CONVOLVE_ROUND
   const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
   const int reduce_bits_horiz =
       use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
@@ -43,10 +42,6 @@
   const int jnt_round_const = 1 << (DIST_PRECISION_BITS - 2);
   const __m128i jnt_r = _mm_set1_epi32(jnt_round_const);
 #endif  // CONFIG_JNT_COMP
-#else
-  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
-  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
-#endif
 
   /* Note: For this code to work, the left/right frame borders need to be
      extended by at least 13 pixels each. By the time we get here, other
@@ -307,7 +302,6 @@
         __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
         __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
 
-#if CONFIG_CONVOLVE_ROUND
         if (use_conv_params) {
           __m128i *const p =
               (__m128i *)&conv_params
@@ -364,9 +358,6 @@
 #endif  // CONFIG_JNT_COMP
           }
         } else {
-#else
-        {
-#endif
           // Round and pack into 8 bits
           const __m128i round_const =
               _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +

diff --git a/av1/common/x86/warp_plane_ssse3.c b/av1/common/x86/warp_plane_ssse3.c
index 3986ad3..7bf3253 100644
--- a/av1/common/x86/warp_plane_ssse3.c
+++ b/av1/common/x86/warp_plane_ssse3.c

@@ -211,7 +211,6 @@
   __m128i tmp[15];
   int i, j, k;
   const int bd = 8;
-#if CONFIG_CONVOLVE_ROUND
   const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
   const int reduce_bits_horiz =
       use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
@@ -221,10 +220,6 @@
     conv_params->do_post_rounding = 1;
   }
   assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
-#else
-  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
-  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
-#endif
 
   /* Note: For this code to work, the left/right frame borders need to be
      extended by at least 13 pixels each. By the time we get here, other
@@ -474,7 +469,6 @@
         __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
         __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
 
-#if CONFIG_CONVOLVE_ROUND
         if (use_conv_params) {
           __m128i *const p =
               (__m128i *)&conv_params
@@ -496,9 +490,6 @@
             _mm_storeu_si128(p + 1, res_hi);
           }
         } else {
-#else
-        {
-#endif
           // Round and pack into 8 bits
           const __m128i round_const =
               _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +

diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 43d9e71..167c203 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake

@@ -114,8 +114,6 @@
 set(CONFIG_CDEF_SINGLEPASS 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_CFL 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_COLORSPACE_HEADERS 0 CACHE NUMBER "AV1 experiment flag.")
-set(CONFIG_COMPOUND_ROUND 0 CACHE NUMBER "AV1 experiment flag.")
-set(CONFIG_CONVOLVE_ROUND 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_DAALA_TX 0 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_DAALA_TX16 0 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_DAALA_TX32 0 CACHE NUMBER "AV1 experiment flag.")

diff --git a/build/cmake/aom_experiment_deps.cmake b/build/cmake/aom_experiment_deps.cmake
index 1a7e563..0555c63 100644
--- a/build/cmake/aom_experiment_deps.cmake
+++ b/build/cmake/aom_experiment_deps.cmake

@@ -32,12 +32,6 @@
     endif ()
   endif ()
 
-  if (CONFIG_COMPOUND_ROUND)
-    if (NOT CONFIG_CONVOLVE_ROUND)
-	    change_config_and_warn(CONVOLVE_ROUND 1 CONFIG_COMPOUND_ROUND)
-    endif ()
-  endif ()
-
   if (CONFIG_EOB_FIRST)
     if (NOT CONFIG_LV_MAP)
       change_config_and_warn(CONFIG_LV_MAP 1 CONFIG_EOB_FIRST)

diff --git a/configure b/configure
index a13efcf..2c119c4 100755
--- a/configure
+++ b/configure

@@ -252,8 +252,6 @@
     rect_tx_ext_intra
     short_filter
     dual_filter
-    convolve_round
-    compound_round
     tx64x64
     ext_intra
     filter_intra
@@ -496,7 +494,6 @@
       soft_enable intra_edge
       soft_enable mv_compress
       soft_enable dual_filter
-      soft_enable convolve_round
       soft_enable aom_qm
       soft_enable dist_8x8
       soft_enable loop_restoration
@@ -540,7 +537,6 @@
     enabled lv_map_multi && soft_enable lv_map
     enabled eob_first && enable_feature lv_map
     enabled txk_sel && soft_enable lv_map
-    enabled compound_round && soft_enable convolve_round
     enabled ext_intra_mod && enable_feature intra_edge
     enabled intra_edge && enable_feature ext_intra
     enabled mfmv && enable_feature frame_marker

diff --git a/test/test.cmake b/test/test.cmake
index a0cce48..e24f7e8 100644
--- a/test/test.cmake
+++ b/test/test.cmake

@@ -237,24 +237,21 @@
           "${AOM_ROOT}/test/quantize_func_test.cc")
     endif ()
 
-    if (CONFIG_CONVOLVE_ROUND)
+    set(AOM_UNIT_TEST_ENCODER_SOURCES
+        ${AOM_UNIT_TEST_ENCODER_SOURCES}
+        "${AOM_ROOT}/test/convolve_round_test.cc")
+    if (HAVE_SSE2)
       set(AOM_UNIT_TEST_ENCODER_SOURCES
           ${AOM_UNIT_TEST_ENCODER_SOURCES}
-          "${AOM_ROOT}/test/convolve_round_test.cc")
-      if (HAVE_SSE2)
-        set(AOM_UNIT_TEST_ENCODER_SOURCES
-            ${AOM_UNIT_TEST_ENCODER_SOURCES}
-            "${AOM_ROOT}/test/av1_convolve_2d_test.cc"
-            "${AOM_ROOT}/test/av1_convolve_2d_test_util.cc"
-            "${AOM_ROOT}/test/av1_convolve_2d_test_util.h")
-      endif ()
-      if (NOT CONFIG_COMPOUND_ROUND)
-        if (HAVE_SSE4_1)
-          set(AOM_UNIT_TEST_ENCODER_SOURCES
-              ${AOM_UNIT_TEST_ENCODER_SOURCES}
-              "${AOM_ROOT}/test/av1_convolve_scale_test.cc")
-        endif ()
-      endif ()
+          "${AOM_ROOT}/test/av1_convolve_2d_test.cc"
+          "${AOM_ROOT}/test/av1_convolve_2d_test_util.cc"
+          "${AOM_ROOT}/test/av1_convolve_2d_test_util.h")
+    endif ()
+
+    if (HAVE_SSE4_1)
+      set(AOM_UNIT_TEST_ENCODER_SOURCES
+          ${AOM_UNIT_TEST_ENCODER_SOURCES}
+          "${AOM_ROOT}/test/av1_convolve_scale_test.cc")
     endif ()
 
     set(AOM_UNIT_TEST_ENCODER_SOURCES

diff --git a/test/test.mk b/test/test.mk
index 2389a2f..2a7b9ae 100644
--- a/test/test.mk
+++ b/test/test.mk

@@ -227,16 +227,11 @@
 LIBAOM_TEST_SRCS-$(HAVE_SSE2) += hiprec_convolve_test_util.cc
 LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += selfguided_filter_test.cc
 endif
-ifeq ($(CONFIG_CONVOLVE_ROUND),yes)
 LIBAOM_TEST_SRCS-$(HAVE_SSE2) += av1_convolve_2d_test_util.h
 LIBAOM_TEST_SRCS-$(HAVE_SSE2) += av1_convolve_2d_test.cc
 LIBAOM_TEST_SRCS-$(HAVE_SSE2) += av1_convolve_2d_test_util.cc
 LIBAOM_TEST_SRCS-yes          += convolve_round_test.cc
-endif
-
-ifeq (yesx,$(CONFIG_CONVOLVE_ROUND)x$(CONFIG_COMPOUND_ROUND))
 LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += av1_convolve_scale_test.cc
-endif
 
 ifeq ($(CONFIG_AV1_ENCODER),yes)
 LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += corner_match_test.cc

diff --git a/test/warp_filter_test.cc b/test/warp_filter_test.cc
index ea052f8..c6fc23b 100644
--- a/test/warp_filter_test.cc
+++ b/test/warp_filter_test.cc

@@ -22,7 +22,7 @@
 
 namespace {
 
-#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND && HAVE_SSE4_1
+#if CONFIG_JNT_COMP && HAVE_SSE4_1
 TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
 
 INSTANTIATE_TEST_CASE_P(
@@ -38,7 +38,7 @@
                         libaom_test::AV1HighbdWarpFilter::GetDefaultParams());
 #endif
 
-#else  // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND && HAVE_SSE4_1
+#else  // CONFIG_JNT_COMP && HAVE_SSE4_1
 TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
 
 INSTANTIATE_TEST_CASE_P(

diff --git a/test/warp_filter_test_util.cc b/test/warp_filter_test_util.cc
index c815bf6..72b35ba 100644
--- a/test/warp_filter_test_util.cc
+++ b/test/warp_filter_test_util.cc

@@ -113,10 +113,8 @@
   int32_t mat[8];
   int16_t alpha, beta, gamma, delta;
   ConvolveParams conv_params = get_conv_params(0, 0, 0);
-#if CONFIG_CONVOLVE_ROUND
   int32_t *dsta = new int32_t[output_n];
   int32_t *dstb = new int32_t[output_n];
-#endif
 
   for (i = 0; i < num_iters; ++i) {
     // Generate an input block and extend its borders horizontally
@@ -126,17 +124,15 @@
       memset(input + r * stride - border, input[r * stride], border);
       memset(input + r * stride + w, input[r * stride + (w - 1)], border);
     }
-#if CONFIG_CONVOLVE_ROUND
+
     const int use_no_round = rnd_.Rand8() & 1;
-#endif
     for (sub_x = 0; sub_x < 2; ++sub_x)
       for (sub_y = 0; sub_y < 2; ++sub_y) {
         generate_model(mat, &alpha, &beta, &gamma, &delta);
-#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#if CONFIG_JNT_COMP
         for (int ii = 0; ii < 2; ++ii) {
           for (int jj = 0; jj < 5; ++jj) {
-#endif  // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
-#if CONFIG_CONVOLVE_ROUND
+#endif  // CONFIG_JNT_COMP
             if (use_no_round) {
               // Prepare two copies of the destination
               for (j = 0; j < out_w * out_h; ++j) {
@@ -148,8 +144,7 @@
             } else {
               conv_params = get_conv_params(0, 0, 0);
             }
-#endif
-#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#if CONFIG_JNT_COMP
             if (jj >= 4) {
               conv_params.use_jnt_comp_avg = 0;
             } else {
@@ -157,17 +152,15 @@
               conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
               conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
             }
-#endif  // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#endif  // CONFIG_JNT_COMP
 
             av1_warp_affine_c(mat, input, w, h, stride, output, 32, 32, out_w,
                               out_h, out_w, sub_x, sub_y, &conv_params, alpha,
                               beta, gamma, delta);
-#if CONFIG_CONVOLVE_ROUND
             if (use_no_round) {
               conv_params = get_conv_params_no_round(0, 0, 0, dstb, out_w);
             }
-#endif
-#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#if CONFIG_JNT_COMP
             if (jj >= 4) {
               conv_params.use_jnt_comp_avg = 0;
             } else {
@@ -175,12 +168,11 @@
               conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
               conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
             }
-#endif  // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#endif  // CONFIG_JNT_COMP
             test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
                       out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma,
                       delta);
 
-#if CONFIG_CONVOLVE_ROUND
             if (use_no_round) {
               for (j = 0; j < out_w * out_h; ++j)
                 ASSERT_EQ(dsta[j], dstb[j])
@@ -192,25 +184,17 @@
                     << "Pixel mismatch at index " << j << " = (" << (j % out_w)
                     << ", " << (j / out_w) << ") on iteration " << i;
             }
-#else
-        for (j = 0; j < out_w * out_h; ++j)
-          ASSERT_EQ(output[j], output2[j])
-              << "Pixel mismatch at index " << j << " = (" << (j % out_w)
-              << ", " << (j / out_w) << ") on iteration " << i;
-#endif
-#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#if CONFIG_JNT_COMP
           }
         }
-#endif  // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#endif  // CONFIG_JNT_COMP
       }
   }
   delete[] input_;
   delete[] output;
   delete[] output2;
-#if CONFIG_CONVOLVE_ROUND
   delete[] dsta;
   delete[] dstb;
-#endif
 }
 }  // namespace AV1WarpFilter
 
@@ -320,10 +304,8 @@
   int32_t mat[8];
   int16_t alpha, beta, gamma, delta;
   ConvolveParams conv_params = get_conv_params(0, 0, 0);
-#if CONFIG_CONVOLVE_ROUND
   int32_t *dsta = new int32_t[output_n];
   int32_t *dstb = new int32_t[output_n];
-#endif
 
   for (i = 0; i < num_iters; ++i) {
     // Generate an input block and extend its borders horizontally
@@ -335,17 +317,14 @@
         input[r * stride + w + c] = input[r * stride + (w - 1)];
       }
     }
-#if CONFIG_CONVOLVE_ROUND
     const int use_no_round = rnd_.Rand8() & 1;
-#endif
     for (sub_x = 0; sub_x < 2; ++sub_x)
       for (sub_y = 0; sub_y < 2; ++sub_y) {
         generate_model(mat, &alpha, &beta, &gamma, &delta);
-#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#if CONFIG_JNT_COMP
         for (int ii = 0; ii < 2; ++ii) {
           for (int jj = 0; jj < 5; ++jj) {
-#endif  // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
-#if CONFIG_CONVOLVE_ROUND
+#endif  // CONFIG_JNT_COMP
             if (use_no_round) {
               // Prepare two copies of the destination
               for (j = 0; j < out_w * out_h; ++j) {
@@ -357,8 +336,7 @@
             } else {
               conv_params = get_conv_params(0, 0, 0);
             }
-#endif
-#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#if CONFIG_JNT_COMP
             if (jj >= 4) {
               conv_params.use_jnt_comp_avg = 0;
             } else {
@@ -366,18 +344,16 @@
               conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
               conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
             }
-#endif  // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#endif  // CONFIG_JNT_COMP
             av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
                                      out_w, out_h, out_w, sub_x, sub_y, bd,
                                      &conv_params, alpha, beta, gamma, delta);
-#if CONFIG_CONVOLVE_ROUND
             if (use_no_round) {
               // TODO(angiebird): Change this to test_impl once we have SIMD
               // implementation
               conv_params = get_conv_params_no_round(0, 0, 0, dstb, out_w);
             }
-#endif
-#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#if CONFIG_JNT_COMP
             if (jj >= 4) {
               conv_params.use_jnt_comp_avg = 0;
             } else {
@@ -385,12 +361,11 @@
               conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
               conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
             }
-#endif  // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#endif  // CONFIG_JNT_COMP
             test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
                       out_w, sub_x, sub_y, bd, &conv_params, alpha, beta, gamma,
                       delta);
 
-#if CONFIG_CONVOLVE_ROUND
             if (use_no_round) {
               for (j = 0; j < out_w * out_h; ++j)
                 ASSERT_EQ(dsta[j], dstb[j])
@@ -402,26 +377,18 @@
                     << "Pixel mismatch at index " << j << " = (" << (j % out_w)
                     << ", " << (j / out_w) << ") on iteration " << i;
             }
-#else
-        for (j = 0; j < out_w * out_h; ++j)
-          ASSERT_EQ(output[j], output2[j])
-              << "Pixel mismatch at index " << j << " = (" << (j % out_w)
-              << ", " << (j / out_w) << ") on iteration " << i;
-#endif
-#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#if CONFIG_JNT_COMP
           }
         }
-#endif  // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
+#endif  // CONFIG_JNT_COMP
       }
   }
 
   delete[] input_;
   delete[] output;
   delete[] output2;
-#if CONFIG_CONVOLVE_ROUND
   delete[] dsta;
   delete[] dstb;
-#endif
 }
 }  // namespace AV1HighbdWarpFilter
 #endif  // CONFIG_HIGHBITDEPTH
commit	fa00507467e487aa4a1a75a51e33c9a8129ee3f8	[log] [tgz]
author	Yunqing Wang <yunqingwang@google.com>	Tue Dec 12 17:45:36 2017 -0800
committer	Yunqing Wang <yunqingwang@google.com>	Wed Dec 13 17:00:59 2017 +0000
tree	b498e34bba08ba390c195047a45bec3a9ab097a3
parent	ca14b47fea376074e2d37e105b24a8d36526021e [diff]