Remove offsets for dist-wtd-comp

under CONFIG_REMOVE_DIST_WTD_COMP config flag.

Change-Id: I5ae02e3981ff8188fbcaa83a2f28ba1f01f8bb66
diff --git a/av1/common/convolve.h b/av1/common/convolve.h
index 6a51e2e..f181a63 100644
--- a/av1/common/convolve.h
+++ b/av1/common/convolve.h
@@ -28,9 +28,9 @@
   int is_compound;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   int use_dist_wtd_comp_avg;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   int fwd_offset;
   int bck_offset;
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
 } ConvolveParams;
 
 #define ROUND0_BITS 3
diff --git a/av1/common/x86/convolve_2d_sse2.c b/av1/common/x86/convolve_2d_sse2.c
index cdfd452..dda27b9 100644
--- a/av1/common/x86/convolve_2d_sse2.c
+++ b/av1/common/x86/convolve_2d_sse2.c
@@ -228,18 +228,20 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
-  const int use_dist_wtd_comp_avg = 0;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i left_shift = _mm_cvtsi32_si128(bits);
-  int i, j;
-
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m128i wt0 = _mm_set1_epi16(w0);
   const __m128i wt1 = _mm_set1_epi16(w1);
   const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+#else
+  const __m128i wt0 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt1 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+  const int use_dist_wtd_comp_avg = 0;
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i left_shift = _mm_cvtsi32_si128(bits);
+  int i, j;
 
   const int offset_0 =
       bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
diff --git a/av1/common/x86/highbd_convolve_2d_sse4.c b/av1/common/x86/highbd_convolve_2d_sse4.c
index 9e8b13b..773549b 100644
--- a/av1/common/x86/highbd_convolve_2d_sse4.c
+++ b/av1/common/x86/highbd_convolve_2d_sse4.c
@@ -35,13 +35,15 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
-  const int use_dist_wtd_comp_avg = 0;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m128i wt0 = _mm_set1_epi32(w0);
   const __m128i wt1 = _mm_set1_epi32(w1);
+#else
+  const __m128i wt0 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt1 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+  const int use_dist_wtd_comp_avg = 0;
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const __m128i zero = _mm_setzero_si128();
   int i, j;
 
@@ -183,17 +185,18 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
-  const int use_dist_wtd_comp_avg = 0;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
-  const int fo_vert = filter_params_y->taps / 2 - 1;
-  const int fo_horiz = filter_params_x->taps / 2 - 1;
-  const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m128i wt0 = _mm_set1_epi32(w0);
   const __m128i wt1 = _mm_set1_epi32(w1);
+#else
+  const int use_dist_wtd_comp_avg = 0;
+  const __m128i wt0 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt1 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
+  const int fo_vert = filter_params_y->taps / 2 - 1;
+  const int fo_horiz = filter_params_x->taps / 2 - 1;
+  const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
 
   const int offset_0 =
       bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
diff --git a/av1/common/x86/highbd_jnt_convolve_avx2.c b/av1/common/x86/highbd_jnt_convolve_avx2.c
index f3f5d7b..ae694c7 100644
--- a/av1/common/x86/highbd_jnt_convolve_avx2.c
+++ b/av1/common/x86/highbd_jnt_convolve_avx2.c
@@ -36,13 +36,15 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
-  const int use_dist_wtd_comp_avg = 0;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m256i wt0 = _mm256_set1_epi32(w0);
   const __m256i wt1 = _mm256_set1_epi32(w1);
+#else
+  const int use_dist_wtd_comp_avg = 0;
+  const __m256i wt0 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+  const __m256i wt1 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const __m256i zero = _mm256_setzero_si256();
   int i, j;
 
@@ -251,14 +253,16 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
-  const int use_dist_wtd_comp_avg = 0;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
-
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m256i wt0 = _mm256_set1_epi32(w0);
   const __m256i wt1 = _mm256_set1_epi32(w1);
+#else
+  const int use_dist_wtd_comp_avg = 0;
+  const __m256i wt0 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+  const __m256i wt1 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
+
   const __m256i zero = _mm256_setzero_si256();
 
   const __m256i round_const_x = _mm256_set1_epi32(
@@ -484,13 +488,15 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
-  const int use_dist_wtd_comp_avg = 0;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m256i wt0 = _mm256_set1_epi32(w0);
   const __m256i wt1 = _mm256_set1_epi32(w1);
+#else
+  const int use_dist_wtd_comp_avg = 0;
+  const __m256i wt0 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+  const __m256i wt1 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const __m256i zero = _mm256_setzero_si256();
 
   const __m256i round_const_x =
@@ -654,14 +660,16 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
-  const int use_dist_wtd_comp_avg = 0;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
-
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m256i wt0 = _mm256_set1_epi32(w0);
   const __m256i wt1 = _mm256_set1_epi32(w1);
+#else
+  const int use_dist_wtd_comp_avg = 0;
+  const __m256i wt0 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+  const __m256i wt1 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
+
   const __m256i round_const_y =
       _mm256_set1_epi32(((1 << conv_params->round_1) >> 1));
   const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
diff --git a/av1/common/x86/highbd_jnt_convolve_sse4.c b/av1/common/x86/highbd_jnt_convolve_sse4.c
index 52ed7e4..f119ca6 100644
--- a/av1/common/x86/highbd_jnt_convolve_sse4.c
+++ b/av1/common/x86/highbd_jnt_convolve_sse4.c
@@ -32,14 +32,16 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
-  const int use_dist_wtd_comp_avg = 0;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
-
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m128i wt0 = _mm_set1_epi32(w0);
   const __m128i wt1 = _mm_set1_epi32(w1);
+#else
+  const int use_dist_wtd_comp_avg = 0;
+  const __m128i wt0 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt1 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
+
   const __m128i round_const_y =
       _mm_set1_epi32(((1 << conv_params->round_1) >> 1));
   const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
@@ -276,13 +278,15 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
-  const int use_dist_wtd_comp_avg = 0;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m128i wt0 = _mm_set1_epi32(w0);
   const __m128i wt1 = _mm_set1_epi32(w1);
+#else
+  const int use_dist_wtd_comp_avg = 0;
+  const __m128i wt0 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt1 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const __m128i zero = _mm_setzero_si128();
 
   const __m128i round_const_x =
diff --git a/av1/common/x86/jnt_convolve_avx2.c b/av1/common/x86/jnt_convolve_avx2.c
index c39f741..b92f69c 100644
--- a/av1/common/x86/jnt_convolve_avx2.c
+++ b/av1/common/x86/jnt_convolve_avx2.c
@@ -21,10 +21,18 @@
 #include "av1/common/convolve.h"
 
 static INLINE __m256i unpack_weights_avx2(ConvolveParams *conv_params) {
+  (void)conv_params;
+#if !CONFIG_REMOVE_DIST_WTD_COMP
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m256i wt0 = _mm256_set1_epi16((int16_t)w0);
   const __m256i wt1 = _mm256_set1_epi16((int16_t)w1);
+#else
+  const __m256i wt0 =
+      _mm256_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+  const __m256i wt1 =
+      _mm256_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1);
   return wt;
 }
diff --git a/av1/common/x86/jnt_convolve_sse2.c b/av1/common/x86/jnt_convolve_sse2.c
index 1573f17..c1df9c4 100644
--- a/av1/common/x86/jnt_convolve_sse2.c
+++ b/av1/common/x86/jnt_convolve_sse2.c
@@ -30,16 +30,19 @@
   const __m128i left_shift = _mm_cvtsi32_si128(bits);
   const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_0) >> 1);
   const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
+  const int do_average = conv_params->do_average;
+#if !CONFIG_REMOVE_DIST_WTD_COMP
+  const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m128i wt0 = _mm_set1_epi16(w0);
   const __m128i wt1 = _mm_set1_epi16(w1);
   const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
-  const int do_average = conv_params->do_average;
-#if !CONFIG_REMOVE_DIST_WTD_COMP
-  const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
 #else
   const int use_dist_wtd_comp_avg = 0;
+  const __m128i wt0 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt1 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
 #endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const int offset_0 =
       bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
@@ -162,13 +165,16 @@
   const uint8_t *src_ptr = src - fo_vert * src_stride;
   const int bits = FILTER_BITS - conv_params->round_0;
   const __m128i left_shift = _mm_cvtsi32_si128(bits);
+  const int do_average = conv_params->do_average;
+#if !CONFIG_REMOVE_DIST_WTD_COMP
   const __m128i wt0 = _mm_set1_epi16(conv_params->fwd_offset);
   const __m128i wt1 = _mm_set1_epi16(conv_params->bck_offset);
   const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
-  const int do_average = conv_params->do_average;
-#if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
 #else
+  const __m128i wt0 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt1 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
   const int use_dist_wtd_comp_avg = 0;
 #endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const int offset_0 =
@@ -404,18 +410,20 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
-  const int use_dist_wtd_comp_avg = 0;
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
-  const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
-  const __m128i zero = _mm_setzero_si128();
-
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m128i wt0 = _mm_set1_epi16(w0);
   const __m128i wt1 = _mm_set1_epi16(w1);
   const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+#else
+  const int use_dist_wtd_comp_avg = 0;
+  const __m128i wt0 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt1 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
+  const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
+
+  const __m128i zero = _mm_setzero_si128();
 
   const int offset_0 =
       bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
diff --git a/av1/common/x86/jnt_convolve_ssse3.c b/av1/common/x86/jnt_convolve_ssse3.c
index 7896396..b44d9a9 100644
--- a/av1/common/x86/jnt_convolve_ssse3.c
+++ b/av1/common/x86/jnt_convolve_ssse3.c
@@ -35,17 +35,19 @@
   const int do_average = conv_params->do_average;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
+  const int w0 = conv_params->fwd_offset;
+  const int w1 = conv_params->bck_offset;
+  const __m128i wt0 = _mm_set1_epi16(w0);
+  const __m128i wt1 = _mm_set1_epi16(w1);
 #else
   const int use_dist_wtd_comp_avg = 0;
+  const __m128i wt0 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+  const __m128i wt1 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
 #endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
 
   const __m128i zero = _mm_setzero_si128();
 
-  const int w0 = conv_params->fwd_offset;
-  const int w1 = conv_params->bck_offset;
-  const __m128i wt0 = _mm_set1_epi16(w0);
-  const __m128i wt1 = _mm_set1_epi16(w1);
   const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
 
   const int offset_0 =
diff --git a/av1/common/x86/warp_plane_avx2.c b/av1/common/x86/warp_plane_avx2.c
index 51fe701..52e7bac 100644
--- a/av1/common/x86/warp_plane_avx2.c
+++ b/av1/common/x86/warp_plane_avx2.c
@@ -431,15 +431,23 @@
 static INLINE void unpack_weights_and_set_round_const_avx2(
     ConvolveParams *conv_params, const int round_bits, const int offset_bits,
     __m256i *res_sub_const, __m256i *round_bits_const, __m256i *wt) {
+  (void)wt;
   *res_sub_const =
       _mm256_set1_epi16(-(1 << (offset_bits - conv_params->round_1)) -
                         (1 << (offset_bits - conv_params->round_1 - 1)));
   *round_bits_const = _mm256_set1_epi16(((1 << round_bits) >> 1));
 
+#if !CONFIG_REMOVE_DIST_WTD_COMP
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m256i wt0 = _mm256_set1_epi16((short)w0);
   const __m256i wt1 = _mm256_set1_epi16((short)w1);
+#else
+  const __m256i wt0 =
+      _mm256_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+  const __m256i wt1 =
+      _mm256_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   *wt = _mm256_unpacklo_epi16(wt0, wt1);
 }
 
diff --git a/av1/common/x86/warp_plane_sse4.c b/av1/common/x86/warp_plane_sse4.c
index bd83479..2f8d479 100644
--- a/av1/common/x86/warp_plane_sse4.c
+++ b/av1/common/x86/warp_plane_sse4.c
@@ -455,15 +455,21 @@
 static INLINE void unpack_weights_and_set_round_const(
     ConvolveParams *conv_params, const int round_bits, const int offset_bits,
     __m128i *res_sub_const, __m128i *round_bits_const, __m128i *wt) {
+  (void)wt;
   *res_sub_const =
       _mm_set1_epi16(-(1 << (offset_bits - conv_params->round_1)) -
                      (1 << (offset_bits - conv_params->round_1 - 1)));
   *round_bits_const = _mm_set1_epi16(((1 << round_bits) >> 1));
 
+#if !CONFIG_REMOVE_DIST_WTD_COMP
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
   const __m128i wt0 = _mm_set1_epi16((int16_t)w0);
   const __m128i wt1 = _mm_set1_epi16((int16_t)w1);
+#else
+  const __m128i wt0 = _mm_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+  const __m128i wt1 = _mm_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   *wt = _mm_unpacklo_epi16(wt0, wt1);
 }
 
diff --git a/test/av1_convolve_test.cc b/test/av1_convolve_test.cc
index ba308e4..313083d 100644
--- a/test/av1_convolve_test.cc
+++ b/test/av1_convolve_test.cc
@@ -906,11 +906,12 @@
                                  const CompoundParam &compound) {
   ConvolveParams conv_params =
       get_conv_params_no_round(do_average, 0, conv_buf, width, 1, bit_depth);
+  (void)compound;
 #if !CONFIG_REMOVE_DIST_WTD_COMP
   conv_params.use_dist_wtd_comp_avg = compound.UseDistWtdCompAvg();
-#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   conv_params.fwd_offset = compound.FwdOffset();
   conv_params.bck_offset = compound.BckOffset();
+#endif  // !CONFIG_REMOVE_DIST_WTD_COMP
   return conv_params;
 }