x86: normalize types used with _mm{,256}_set?_epi32
w/clang -fsanitize=integer fixes warnings of the form:
implicit conversion from type 'uint32_t' (aka 'unsigned int') of value
4273652926 (32-bit, unsigned) to type 'int' changed the value to
-21314370 (32-bit, signed)
Bug: aomedia:3136
Bug: b/229626362
Change-Id: I465237602f6ae426fe72b28d697abf76a1462000
diff --git a/aom_dsp/simd/v128_intrinsics_x86.h b/aom_dsp/simd/v128_intrinsics_x86.h
index 1b87fb7..baf0528 100644
--- a/aom_dsp/simd/v128_intrinsics_x86.h
+++ b/aom_dsp/simd/v128_intrinsics_x86.h
@@ -36,7 +36,7 @@
}
SIMD_INLINE v128 v128_from_32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
- return _mm_set_epi32(a, b, c, d);
+ return _mm_set_epi32((int)a, (int)b, (int)c, (int)d);
}
SIMD_INLINE v128 v128_load_aligned(const void *p) {
@@ -85,12 +85,12 @@
SIMD_INLINE v128 v128_dup_16(uint16_t x) { return _mm_set1_epi16((short)x); }
-SIMD_INLINE v128 v128_dup_32(uint32_t x) { return _mm_set1_epi32(x); }
+SIMD_INLINE v128 v128_dup_32(uint32_t x) { return _mm_set1_epi32((int)x); }
SIMD_INLINE v128 v128_dup_64(uint64_t x) {
// _mm_set_pi64x and _mm_cvtsi64x_si64 missing in some compilers
- return _mm_set_epi32((uint32_t)(x >> 32), (uint32_t)x, (uint32_t)(x >> 32),
- (uint32_t)x);
+ return _mm_set_epi32((int32_t)(x >> 32), (int32_t)x, (int32_t)(x >> 32),
+ (int32_t)x);
}
SIMD_INLINE v128 v128_add_8(v128 a, v128 b) { return _mm_add_epi8(a, b); }
diff --git a/aom_dsp/simd/v256_intrinsics_x86.h b/aom_dsp/simd/v256_intrinsics_x86.h
index b43a53d..0b77eae 100644
--- a/aom_dsp/simd/v256_intrinsics_x86.h
+++ b/aom_dsp/simd/v256_intrinsics_x86.h
@@ -82,7 +82,7 @@
SIMD_INLINE v256 v256_dup_16(uint16_t x) { return _mm256_set1_epi16((short)x); }
-SIMD_INLINE v256 v256_dup_32(uint32_t x) { return _mm256_set1_epi32(x); }
+SIMD_INLINE v256 v256_dup_32(uint32_t x) { return _mm256_set1_epi32((int)x); }
SIMD_INLINE v256 v256_dup_64(uint64_t x) { return _mm256_set1_epi64x(x); }
diff --git a/aom_dsp/simd/v64_intrinsics_x86.h b/aom_dsp/simd/v64_intrinsics_x86.h
index 88766be..308e917 100644
--- a/aom_dsp/simd/v64_intrinsics_x86.h
+++ b/aom_dsp/simd/v64_intrinsics_x86.h
@@ -43,14 +43,14 @@
}
SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) {
- return _mm_set_epi32(0, 0, x, y);
+ return _mm_set_epi32(0, 0, (int32_t)x, (int32_t)y);
}
SIMD_INLINE v64 v64_from_64(uint64_t x) {
#ifdef __x86_64__
- return _mm_cvtsi64_si128(x);
+ return _mm_cvtsi64_si128((int64_t)x);
#else
- return _mm_set_epi32(0, 0, x >> 32, (uint32_t)x);
+ return _mm_set_epi32(0, 0, (int32_t)(x >> 32), (int32_t)x);
#endif
}
@@ -105,7 +105,7 @@
SIMD_INLINE v64 v64_dup_16(uint16_t x) { return _mm_set1_epi16((short)x); }
-SIMD_INLINE v64 v64_dup_32(uint32_t x) { return _mm_set1_epi32(x); }
+SIMD_INLINE v64 v64_dup_32(uint32_t x) { return _mm_set1_epi32((int)x); }
SIMD_INLINE v64 v64_add_8(v64 a, v64 b) { return _mm_add_epi8(a, b); }
diff --git a/aom_dsp/x86/blend_a64_mask_avx2.c b/aom_dsp/x86/blend_a64_mask_avx2.c
index 95383d2..ad27152 100644
--- a/aom_dsp/x86/blend_a64_mask_avx2.c
+++ b/aom_dsp/x86/blend_a64_mask_avx2.c
@@ -964,10 +964,10 @@
const __m256i *clip_high, const __m256i *mask_max) {
do {
// Load 8x u8 pixels from each of 4 rows of the mask, pad each to u16
- const __m128i mask08 = _mm_set_epi32(*(uint32_t *)(mask + 3 * mask_stride),
- *(uint32_t *)(mask + 2 * mask_stride),
- *(uint32_t *)(mask + 1 * mask_stride),
- *(uint32_t *)(mask + 0 * mask_stride));
+ const __m128i mask08 = _mm_set_epi32(*(int32_t *)(mask + 3 * mask_stride),
+ *(int32_t *)(mask + 2 * mask_stride),
+ *(int32_t *)(mask + 1 * mask_stride),
+ *(int32_t *)(mask + 0 * mask_stride));
const __m256i mask0 = _mm256_cvtepu8_epi16(mask08);
highbd_blend_a64_d16_mask_w4_avx2(dst, dst_stride, src0, src0_stride, src1,
diff --git a/aom_dsp/x86/blend_a64_mask_sse4.c b/aom_dsp/x86/blend_a64_mask_sse4.c
index 4a368ef..5c68f41a 100644
--- a/aom_dsp/x86/blend_a64_mask_sse4.c
+++ b/aom_dsp/x86/blend_a64_mask_sse4.c
@@ -1187,11 +1187,11 @@
const __m128i *round_offset, int shift, const __m128i *clip_low,
const __m128i *clip_high, const __m128i *mask_max) {
do {
- const __m128i mask0a8 = _mm_set_epi32(0, 0, *(uint32_t *)mask,
- *(uint32_t *)(mask + mask_stride));
+ const __m128i mask0a8 =
+ _mm_set_epi32(0, 0, *(int32_t *)mask, *(int32_t *)(mask + mask_stride));
const __m128i mask0b8 =
- _mm_set_epi32(0, 0, *(uint32_t *)(mask + 2 * mask_stride),
- *(uint32_t *)(mask + 3 * mask_stride));
+ _mm_set_epi32(0, 0, *(int32_t *)(mask + 2 * mask_stride),
+ *(int32_t *)(mask + 3 * mask_stride));
const __m128i mask0a = _mm_cvtepu8_epi16(mask0a8);
const __m128i mask0b = _mm_cvtepu8_epi16(mask0b8);
diff --git a/aom_dsp/x86/highbd_variance_avx2.c b/aom_dsp/x86/highbd_variance_avx2.c
index 49912ac..36e6473 100644
--- a/aom_dsp/x86/highbd_variance_avx2.c
+++ b/aom_dsp/x86/highbd_variance_avx2.c
@@ -26,13 +26,13 @@
const uint32_t xoffset, const uint32_t yoffset, const uint8_t *dst_ptr8,
int dst_stride, uint32_t *sse) {
const __m256i filter1 =
- _mm256_set1_epi32((uint32_t)(bilinear_filters_2t[xoffset][1] << 16) |
+ _mm256_set1_epi32((int)(bilinear_filters_2t[xoffset][1] << 16) |
bilinear_filters_2t[xoffset][0]);
const __m256i filter2 =
- _mm256_set1_epi32((uint32_t)(bilinear_filters_2t[yoffset][1] << 16) |
+ _mm256_set1_epi32((int)(bilinear_filters_2t[yoffset][1] << 16) |
bilinear_filters_2t[yoffset][0]);
const __m256i one = _mm256_set1_epi16(1);
- const uint32_t bitshift = (uint32_t)0x40;
+ const int bitshift = 0x40;
(void)pixel_step;
unsigned int i, j, prev = 0, curr = 2;
uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
diff --git a/aom_dsp/x86/masked_variance_intrin_ssse3.c b/aom_dsp/x86/masked_variance_intrin_ssse3.c
index fb39a2b..6939aa4 100644
--- a/aom_dsp/x86/masked_variance_intrin_ssse3.c
+++ b/aom_dsp/x86/masked_variance_intrin_ssse3.c
@@ -494,15 +494,14 @@
for (y = 0; y < height; y += 4) {
// Load four rows at a time
- __m128i src =
- _mm_setr_epi32(*(uint32_t *)src_ptr, *(uint32_t *)&src_ptr[src_stride],
- *(uint32_t *)&src_ptr[src_stride * 2],
- *(uint32_t *)&src_ptr[src_stride * 3]);
+ __m128i src = _mm_setr_epi32(*(int *)src_ptr, *(int *)&src_ptr[src_stride],
+ *(int *)&src_ptr[src_stride * 2],
+ *(int *)&src_ptr[src_stride * 3]);
const __m128i a = _mm_loadu_si128((const __m128i *)a_ptr);
const __m128i b = _mm_loadu_si128((const __m128i *)b_ptr);
- const __m128i m = _mm_setr_epi32(
- *(uint32_t *)m_ptr, *(uint32_t *)&m_ptr[m_stride],
- *(uint32_t *)&m_ptr[m_stride * 2], *(uint32_t *)&m_ptr[m_stride * 3]);
+ const __m128i m = _mm_setr_epi32(*(int *)m_ptr, *(int *)&m_ptr[m_stride],
+ *(int *)&m_ptr[m_stride * 2],
+ *(int *)&m_ptr[m_stride * 3]);
accumulate_block(&src, &a, &b, &m, &sum, &sum_sq);
src_ptr += src_stride * 4;
diff --git a/av1/common/x86/selfguided_avx2.c b/av1/common/x86/selfguided_avx2.c
index 3c5558d..4ab35e8 100644
--- a/av1/common/x86/selfguided_avx2.c
+++ b/av1/common/x86/selfguided_avx2.c
@@ -230,7 +230,7 @@
const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
// Set up masks
- const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+ const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
__m256i mask[8];
for (int idx = 0; idx < 8; idx++) {
const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx));
@@ -367,7 +367,7 @@
const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
// Set up masks
- const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+ const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
__m256i mask[8];
for (int idx = 0; idx < 8; idx++) {
const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx));
diff --git a/av1/common/x86/selfguided_sse4.c b/av1/common/x86/selfguided_sse4.c
index 72c7708..948bbfb 100644
--- a/av1/common/x86/selfguided_sse4.c
+++ b/av1/common/x86/selfguided_sse4.c
@@ -181,7 +181,7 @@
const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
// Set up masks
- const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+ const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
__m128i mask[4];
for (int idx = 0; idx < 4; idx++) {
const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx));
@@ -322,7 +322,7 @@
const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
// Set up masks
- const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+ const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
__m128i mask[4];
for (int idx = 0; idx < 4; idx++) {
const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx));
diff --git a/av1/encoder/x86/av1_k_means_avx2.c b/av1/encoder/x86/av1_k_means_avx2.c
index 23a7369..759f515 100644
--- a/av1/encoder/x86/av1_k_means_avx2.c
+++ b/av1/encoder/x86/av1_k_means_avx2.c
@@ -21,7 +21,7 @@
for (int i = 0; i < n; i += 8) {
__m256i ind = _mm256_loadu_si256((__m256i *)data);
for (int j = 0; j < k; j++) {
- __m256i cent = _mm256_set1_epi32((uint32_t)centroids[j]);
+ __m256i cent = _mm256_set1_epi32(centroids[j]);
__m256i d1 = _mm256_sub_epi32(ind, cent);
dist[j] = _mm256_mullo_epi32(d1, d1);
}
diff --git a/av1/encoder/x86/av1_k_means_sse2.c b/av1/encoder/x86/av1_k_means_sse2.c
index 43f661f..f03c459 100644
--- a/av1/encoder/x86/av1_k_means_sse2.c
+++ b/av1/encoder/x86/av1_k_means_sse2.c
@@ -25,7 +25,7 @@
l = (l == 0) ? 1 : 0;
ind[l] = _mm_loadu_si128((__m128i *)data);
for (int j = 0; j < k; j++) {
- __m128i cent = _mm_set1_epi32((uint32_t)centroids[j]);
+ __m128i cent = _mm_set1_epi32(centroids[j]);
__m128i d1 = _mm_sub_epi32(ind[l], cent);
__m128i d2 = _mm_packs_epi32(d1, d1);
__m128i d3 = _mm_mullo_epi16(d2, d2);
diff --git a/av1/encoder/x86/highbd_block_error_intrin_sse2.c b/av1/encoder/x86/highbd_block_error_intrin_sse2.c
index 4579e4e..0287f01 100644
--- a/av1/encoder/x86/highbd_block_error_intrin_sse2.c
+++ b/av1/encoder/x86/highbd_block_error_intrin_sse2.c
@@ -33,7 +33,7 @@
__m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
// Check if any values require more than 15 bit
max = _mm_set1_epi32(0x3fff);
- min = _mm_set1_epi32(0xffffc000);
+ min = _mm_set1_epi32((int)0xffffc000);
cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
_mm_cmplt_epi32(mm_coeff, min));
cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
diff --git a/av1/encoder/x86/pickrst_avx2.c b/av1/encoder/x86/pickrst_avx2.c
index 0347bca..e244d5e 100644
--- a/av1/encoder/x86/pickrst_avx2.c
+++ b/av1/encoder/x86/pickrst_avx2.c
@@ -297,7 +297,7 @@
// interleaved copies of two pixels, but we only have one. However, the
// pixels are (effectively) used as inputs to a multiply-accumulate. So
// if we set the extra pixel slot to 0, then it is effectively ignored.
- const __m256i dgd_ijkl = _mm256_set1_epi32((uint32_t)D1);
+ const __m256i dgd_ijkl = _mm256_set1_epi32((int)D1);
acc_stat_highbd_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
&dgd_ijkl);
@@ -441,7 +441,7 @@
// interleaved copies of two pixels, but we only have one. However, the
// pixels are (effectively) used as inputs to a multiply-accumulate. So
// if we set the extra pixel slot to 0, then it is effectively ignored.
- const __m256i dgd_ijkl = _mm256_set1_epi32((uint32_t)D1);
+ const __m256i dgd_ijkl = _mm256_set1_epi32((int)D1);
acc_stat_highbd_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
&dgd_ijkl);
diff --git a/av1/encoder/x86/pickrst_sse4.c b/av1/encoder/x86/pickrst_sse4.c
index d4921bd..8208cca 100644
--- a/av1/encoder/x86/pickrst_sse4.c
+++ b/av1/encoder/x86/pickrst_sse4.c
@@ -265,7 +265,7 @@
// Load two u16 values from dgd as a single u32
// Then broadcast to 4x u32 slots of a 128
- const __m128i dgd_ijkl = _mm_set1_epi32(*((uint32_t *)(dgd_ijk + l)));
+ const __m128i dgd_ijkl = _mm_set1_epi32(*((int *)(dgd_ijk + l)));
// dgd_ijkl = [y x y x y x y x] as u16
acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
@@ -302,7 +302,7 @@
// interleaved copies of two pixels, but we only have one. However, the
// pixels are (effectively) used as inputs to a multiply-accumulate. So
// if we set the extra pixel slot to 0, then it is effectively ignored.
- const __m128i dgd_ijkl = _mm_set1_epi32((uint32_t)D1);
+ const __m128i dgd_ijkl = _mm_set1_epi32((int)D1);
acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
&dgd_ijkl);
@@ -414,7 +414,7 @@
// Load two u16 values from dgd as a single u32
// then broadcast to 4x u32 slots of a 128
- const __m128i dgd_ijkl = _mm_set1_epi32(*((uint32_t *)(dgd_ijk + l)));
+ const __m128i dgd_ijkl = _mm_set1_epi32(*((int *)(dgd_ijk + l)));
// dgd_ijkl = [y x y x y x y x] as u16
acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
@@ -447,7 +447,7 @@
// interleaved copies of two pixels, but we only have one. However, the
// pixels are (effectively) used as inputs to a multiply-accumulate. So
// if we set the extra pixel slot to 0, then it is effectively ignored.
- const __m128i dgd_ijkl = _mm_set1_epi32((uint32_t)D1);
+ const __m128i dgd_ijkl = _mm_set1_epi32((int)D1);
acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
&dgd_ijkl);
diff --git a/av1/encoder/x86/wedge_utils_avx2.c b/av1/encoder/x86/wedge_utils_avx2.c
index c06bad8..4215f40 100644
--- a/av1/encoder/x86/wedge_utils_avx2.c
+++ b/av1/encoder/x86/wedge_utils_avx2.c
@@ -155,7 +155,7 @@
*/
void av1_wedge_compute_delta_squares_avx2(int16_t *d, const int16_t *a,
const int16_t *b, int N) {
- const __m256i v_neg_w = _mm256_set1_epi32(0xffff0001);
+ const __m256i v_neg_w = _mm256_set1_epi32((int)0xffff0001);
assert(N % 64 == 0);