Remove offsets for dist-wtd-comp
under CONFIG_REMOVE_DIST_WTD_COMP config flag.
Change-Id: I5ae02e3981ff8188fbcaa83a2f28ba1f01f8bb66
diff --git a/av1/common/convolve.h b/av1/common/convolve.h
index 6a51e2e..f181a63 100644
--- a/av1/common/convolve.h
+++ b/av1/common/convolve.h
@@ -28,9 +28,9 @@
int is_compound;
#if !CONFIG_REMOVE_DIST_WTD_COMP
int use_dist_wtd_comp_avg;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
int fwd_offset;
int bck_offset;
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
} ConvolveParams;
#define ROUND0_BITS 3
diff --git a/av1/common/x86/convolve_2d_sse2.c b/av1/common/x86/convolve_2d_sse2.c
index cdfd452..dda27b9 100644
--- a/av1/common/x86/convolve_2d_sse2.c
+++ b/av1/common/x86/convolve_2d_sse2.c
@@ -228,18 +228,20 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
- const int use_dist_wtd_comp_avg = 0;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
- const __m128i zero = _mm_setzero_si128();
- const __m128i left_shift = _mm_cvtsi32_si128(bits);
- int i, j;
-
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m128i wt0 = _mm_set1_epi16(w0);
const __m128i wt1 = _mm_set1_epi16(w1);
const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+#else
+ const __m128i wt0 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt1 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+ const int use_dist_wtd_comp_avg = 0;
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i left_shift = _mm_cvtsi32_si128(bits);
+ int i, j;
const int offset_0 =
bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
diff --git a/av1/common/x86/highbd_convolve_2d_sse4.c b/av1/common/x86/highbd_convolve_2d_sse4.c
index 9e8b13b..773549b 100644
--- a/av1/common/x86/highbd_convolve_2d_sse4.c
+++ b/av1/common/x86/highbd_convolve_2d_sse4.c
@@ -35,13 +35,15 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
- const int use_dist_wtd_comp_avg = 0;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m128i wt0 = _mm_set1_epi32(w0);
const __m128i wt1 = _mm_set1_epi32(w1);
+#else
+ const __m128i wt0 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt1 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+ const int use_dist_wtd_comp_avg = 0;
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const __m128i zero = _mm_setzero_si128();
int i, j;
@@ -183,17 +185,18 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
- const int use_dist_wtd_comp_avg = 0;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
- const int fo_vert = filter_params_y->taps / 2 - 1;
- const int fo_horiz = filter_params_x->taps / 2 - 1;
- const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m128i wt0 = _mm_set1_epi32(w0);
const __m128i wt1 = _mm_set1_epi32(w1);
+#else
+ const int use_dist_wtd_comp_avg = 0;
+ const __m128i wt0 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt1 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
+ const int fo_vert = filter_params_y->taps / 2 - 1;
+ const int fo_horiz = filter_params_x->taps / 2 - 1;
+ const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
const int offset_0 =
bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
diff --git a/av1/common/x86/highbd_jnt_convolve_avx2.c b/av1/common/x86/highbd_jnt_convolve_avx2.c
index f3f5d7b..ae694c7 100644
--- a/av1/common/x86/highbd_jnt_convolve_avx2.c
+++ b/av1/common/x86/highbd_jnt_convolve_avx2.c
@@ -36,13 +36,15 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
- const int use_dist_wtd_comp_avg = 0;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m256i wt0 = _mm256_set1_epi32(w0);
const __m256i wt1 = _mm256_set1_epi32(w1);
+#else
+ const int use_dist_wtd_comp_avg = 0;
+ const __m256i wt0 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+ const __m256i wt1 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const __m256i zero = _mm256_setzero_si256();
int i, j;
@@ -251,14 +253,16 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
- const int use_dist_wtd_comp_avg = 0;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
-
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m256i wt0 = _mm256_set1_epi32(w0);
const __m256i wt1 = _mm256_set1_epi32(w1);
+#else
+ const int use_dist_wtd_comp_avg = 0;
+ const __m256i wt0 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+ const __m256i wt1 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
+
const __m256i zero = _mm256_setzero_si256();
const __m256i round_const_x = _mm256_set1_epi32(
@@ -484,13 +488,15 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
- const int use_dist_wtd_comp_avg = 0;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m256i wt0 = _mm256_set1_epi32(w0);
const __m256i wt1 = _mm256_set1_epi32(w1);
+#else
+ const int use_dist_wtd_comp_avg = 0;
+ const __m256i wt0 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+ const __m256i wt1 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const __m256i zero = _mm256_setzero_si256();
const __m256i round_const_x =
@@ -654,14 +660,16 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
- const int use_dist_wtd_comp_avg = 0;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
-
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m256i wt0 = _mm256_set1_epi32(w0);
const __m256i wt1 = _mm256_set1_epi32(w1);
+#else
+ const int use_dist_wtd_comp_avg = 0;
+ const __m256i wt0 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+ const __m256i wt1 = _mm256_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
+
const __m256i round_const_y =
_mm256_set1_epi32(((1 << conv_params->round_1) >> 1));
const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
diff --git a/av1/common/x86/highbd_jnt_convolve_sse4.c b/av1/common/x86/highbd_jnt_convolve_sse4.c
index 52ed7e4..f119ca6 100644
--- a/av1/common/x86/highbd_jnt_convolve_sse4.c
+++ b/av1/common/x86/highbd_jnt_convolve_sse4.c
@@ -32,14 +32,16 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
- const int use_dist_wtd_comp_avg = 0;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
-
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m128i wt0 = _mm_set1_epi32(w0);
const __m128i wt1 = _mm_set1_epi32(w1);
+#else
+ const int use_dist_wtd_comp_avg = 0;
+ const __m128i wt0 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt1 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
+
const __m128i round_const_y =
_mm_set1_epi32(((1 << conv_params->round_1) >> 1));
const __m128i round_shift_y = _mm_cvtsi32_si128(conv_params->round_1);
@@ -276,13 +278,15 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
- const int use_dist_wtd_comp_avg = 0;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m128i wt0 = _mm_set1_epi32(w0);
const __m128i wt1 = _mm_set1_epi32(w1);
+#else
+ const int use_dist_wtd_comp_avg = 0;
+ const __m128i wt0 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt1 = _mm_set1_epi32(1 << (DIST_PRECISION_BITS - 1));
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const __m128i zero = _mm_setzero_si128();
const __m128i round_const_x =
diff --git a/av1/common/x86/jnt_convolve_avx2.c b/av1/common/x86/jnt_convolve_avx2.c
index c39f741..b92f69c 100644
--- a/av1/common/x86/jnt_convolve_avx2.c
+++ b/av1/common/x86/jnt_convolve_avx2.c
@@ -21,10 +21,18 @@
#include "av1/common/convolve.h"
static INLINE __m256i unpack_weights_avx2(ConvolveParams *conv_params) {
+ (void)conv_params;
+#if !CONFIG_REMOVE_DIST_WTD_COMP
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m256i wt0 = _mm256_set1_epi16((int16_t)w0);
const __m256i wt1 = _mm256_set1_epi16((int16_t)w1);
+#else
+ const __m256i wt0 =
+ _mm256_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+ const __m256i wt1 =
+ _mm256_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const __m256i wt = _mm256_unpacklo_epi16(wt0, wt1);
return wt;
}
diff --git a/av1/common/x86/jnt_convolve_sse2.c b/av1/common/x86/jnt_convolve_sse2.c
index 1573f17..c1df9c4 100644
--- a/av1/common/x86/jnt_convolve_sse2.c
+++ b/av1/common/x86/jnt_convolve_sse2.c
@@ -30,16 +30,19 @@
const __m128i left_shift = _mm_cvtsi32_si128(bits);
const __m128i round_const = _mm_set1_epi32((1 << conv_params->round_0) >> 1);
const __m128i round_shift = _mm_cvtsi32_si128(conv_params->round_0);
+ const int do_average = conv_params->do_average;
+#if !CONFIG_REMOVE_DIST_WTD_COMP
+ const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m128i wt0 = _mm_set1_epi16(w0);
const __m128i wt1 = _mm_set1_epi16(w1);
const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
- const int do_average = conv_params->do_average;
-#if !CONFIG_REMOVE_DIST_WTD_COMP
- const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
#else
const int use_dist_wtd_comp_avg = 0;
+ const __m128i wt0 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt1 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const int offset_0 =
bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
@@ -162,13 +165,16 @@
const uint8_t *src_ptr = src - fo_vert * src_stride;
const int bits = FILTER_BITS - conv_params->round_0;
const __m128i left_shift = _mm_cvtsi32_si128(bits);
+ const int do_average = conv_params->do_average;
+#if !CONFIG_REMOVE_DIST_WTD_COMP
const __m128i wt0 = _mm_set1_epi16(conv_params->fwd_offset);
const __m128i wt1 = _mm_set1_epi16(conv_params->bck_offset);
const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
- const int do_average = conv_params->do_average;
-#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
#else
+ const __m128i wt0 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt1 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
const int use_dist_wtd_comp_avg = 0;
#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const int offset_0 =
@@ -404,18 +410,20 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
-#else
- const int use_dist_wtd_comp_avg = 0;
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
- const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
-
- const __m128i zero = _mm_setzero_si128();
-
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m128i wt0 = _mm_set1_epi16(w0);
const __m128i wt1 = _mm_set1_epi16(w1);
const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+#else
+ const int use_dist_wtd_comp_avg = 0;
+ const __m128i wt0 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt1 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
+ const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
+
+ const __m128i zero = _mm_setzero_si128();
const int offset_0 =
bd + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
diff --git a/av1/common/x86/jnt_convolve_ssse3.c b/av1/common/x86/jnt_convolve_ssse3.c
index 7896396..b44d9a9 100644
--- a/av1/common/x86/jnt_convolve_ssse3.c
+++ b/av1/common/x86/jnt_convolve_ssse3.c
@@ -35,17 +35,19 @@
const int do_average = conv_params->do_average;
#if !CONFIG_REMOVE_DIST_WTD_COMP
const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
+ const int w0 = conv_params->fwd_offset;
+ const int w1 = conv_params->bck_offset;
+ const __m128i wt0 = _mm_set1_epi16(w0);
+ const __m128i wt1 = _mm_set1_epi16(w1);
#else
const int use_dist_wtd_comp_avg = 0;
+ const __m128i wt0 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
+ const __m128i wt1 = _mm_set1_epi16(1 << (DIST_PRECISION_BITS - 1));
#endif // !CONFIG_REMOVE_DIST_WTD_COMP
const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
const __m128i zero = _mm_setzero_si128();
- const int w0 = conv_params->fwd_offset;
- const int w1 = conv_params->bck_offset;
- const __m128i wt0 = _mm_set1_epi16(w0);
- const __m128i wt1 = _mm_set1_epi16(w1);
const __m128i wt = _mm_unpacklo_epi16(wt0, wt1);
const int offset_0 =
diff --git a/av1/common/x86/warp_plane_avx2.c b/av1/common/x86/warp_plane_avx2.c
index 51fe701..52e7bac 100644
--- a/av1/common/x86/warp_plane_avx2.c
+++ b/av1/common/x86/warp_plane_avx2.c
@@ -431,15 +431,23 @@
static INLINE void unpack_weights_and_set_round_const_avx2(
ConvolveParams *conv_params, const int round_bits, const int offset_bits,
__m256i *res_sub_const, __m256i *round_bits_const, __m256i *wt) {
+ (void)wt;
*res_sub_const =
_mm256_set1_epi16(-(1 << (offset_bits - conv_params->round_1)) -
(1 << (offset_bits - conv_params->round_1 - 1)));
*round_bits_const = _mm256_set1_epi16(((1 << round_bits) >> 1));
+#if !CONFIG_REMOVE_DIST_WTD_COMP
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m256i wt0 = _mm256_set1_epi16((short)w0);
const __m256i wt1 = _mm256_set1_epi16((short)w1);
+#else
+ const __m256i wt0 =
+ _mm256_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+ const __m256i wt1 =
+ _mm256_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
*wt = _mm256_unpacklo_epi16(wt0, wt1);
}
diff --git a/av1/common/x86/warp_plane_sse4.c b/av1/common/x86/warp_plane_sse4.c
index bd83479..2f8d479 100644
--- a/av1/common/x86/warp_plane_sse4.c
+++ b/av1/common/x86/warp_plane_sse4.c
@@ -455,15 +455,21 @@
static INLINE void unpack_weights_and_set_round_const(
ConvolveParams *conv_params, const int round_bits, const int offset_bits,
__m128i *res_sub_const, __m128i *round_bits_const, __m128i *wt) {
+ (void)wt;
*res_sub_const =
_mm_set1_epi16(-(1 << (offset_bits - conv_params->round_1)) -
(1 << (offset_bits - conv_params->round_1 - 1)));
*round_bits_const = _mm_set1_epi16(((1 << round_bits) >> 1));
+#if !CONFIG_REMOVE_DIST_WTD_COMP
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
const __m128i wt0 = _mm_set1_epi16((int16_t)w0);
const __m128i wt1 = _mm_set1_epi16((int16_t)w1);
+#else
+ const __m128i wt0 = _mm_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+ const __m128i wt1 = _mm_set1_epi16((int16_t)(1 << (DIST_PRECISION_BITS - 1)));
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
*wt = _mm_unpacklo_epi16(wt0, wt1);
}
diff --git a/test/av1_convolve_test.cc b/test/av1_convolve_test.cc
index ba308e4..313083d 100644
--- a/test/av1_convolve_test.cc
+++ b/test/av1_convolve_test.cc
@@ -906,11 +906,12 @@
const CompoundParam &compound) {
ConvolveParams conv_params =
get_conv_params_no_round(do_average, 0, conv_buf, width, 1, bit_depth);
+ (void)compound;
#if !CONFIG_REMOVE_DIST_WTD_COMP
conv_params.use_dist_wtd_comp_avg = compound.UseDistWtdCompAvg();
-#endif // !CONFIG_REMOVE_DIST_WTD_COMP
conv_params.fwd_offset = compound.FwdOffset();
conv_params.bck_offset = compound.BckOffset();
+#endif // !CONFIG_REMOVE_DIST_WTD_COMP
return conv_params;
}