x86: normalize types used with _mm{,256}_set?_epi32

w/clang -fsanitize=integer fixes warnings of the form:
implicit conversion from type 'uint32_t' (aka 'unsigned int') of value
4273652926 (32-bit, unsigned) to type 'int' changed the value to
-21314370 (32-bit, signed)

Bug: aomedia:3136
Bug: b/229626362
Change-Id: I465237602f6ae426fe72b28d697abf76a1462000
diff --git a/aom_dsp/simd/v128_intrinsics_x86.h b/aom_dsp/simd/v128_intrinsics_x86.h
index 1b87fb7..baf0528 100644
--- a/aom_dsp/simd/v128_intrinsics_x86.h
+++ b/aom_dsp/simd/v128_intrinsics_x86.h
@@ -36,7 +36,7 @@
 }
 
 SIMD_INLINE v128 v128_from_32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
-  return _mm_set_epi32(a, b, c, d);
+  return _mm_set_epi32((int)a, (int)b, (int)c, (int)d);
 }
 
 SIMD_INLINE v128 v128_load_aligned(const void *p) {
@@ -85,12 +85,12 @@
 
 SIMD_INLINE v128 v128_dup_16(uint16_t x) { return _mm_set1_epi16((short)x); }
 
-SIMD_INLINE v128 v128_dup_32(uint32_t x) { return _mm_set1_epi32(x); }
+SIMD_INLINE v128 v128_dup_32(uint32_t x) { return _mm_set1_epi32((int)x); }
 
 SIMD_INLINE v128 v128_dup_64(uint64_t x) {
   // _mm_set_pi64x and _mm_cvtsi64x_si64 missing in some compilers
-  return _mm_set_epi32((uint32_t)(x >> 32), (uint32_t)x, (uint32_t)(x >> 32),
-                       (uint32_t)x);
+  return _mm_set_epi32((int32_t)(x >> 32), (int32_t)x, (int32_t)(x >> 32),
+                       (int32_t)x);
 }
 
 SIMD_INLINE v128 v128_add_8(v128 a, v128 b) { return _mm_add_epi8(a, b); }
diff --git a/aom_dsp/simd/v256_intrinsics_x86.h b/aom_dsp/simd/v256_intrinsics_x86.h
index b43a53d..0b77eae 100644
--- a/aom_dsp/simd/v256_intrinsics_x86.h
+++ b/aom_dsp/simd/v256_intrinsics_x86.h
@@ -82,7 +82,7 @@
 
 SIMD_INLINE v256 v256_dup_16(uint16_t x) { return _mm256_set1_epi16((short)x); }
 
-SIMD_INLINE v256 v256_dup_32(uint32_t x) { return _mm256_set1_epi32(x); }
+SIMD_INLINE v256 v256_dup_32(uint32_t x) { return _mm256_set1_epi32((int)x); }
 
 SIMD_INLINE v256 v256_dup_64(uint64_t x) { return _mm256_set1_epi64x(x); }
 
diff --git a/aom_dsp/simd/v64_intrinsics_x86.h b/aom_dsp/simd/v64_intrinsics_x86.h
index 88766be..308e917 100644
--- a/aom_dsp/simd/v64_intrinsics_x86.h
+++ b/aom_dsp/simd/v64_intrinsics_x86.h
@@ -43,14 +43,14 @@
 }
 
 SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) {
-  return _mm_set_epi32(0, 0, x, y);
+  return _mm_set_epi32(0, 0, (int32_t)x, (int32_t)y);
 }
 
 SIMD_INLINE v64 v64_from_64(uint64_t x) {
 #ifdef __x86_64__
-  return _mm_cvtsi64_si128(x);
+  return _mm_cvtsi64_si128((int64_t)x);
 #else
-  return _mm_set_epi32(0, 0, x >> 32, (uint32_t)x);
+  return _mm_set_epi32(0, 0, (int32_t)(x >> 32), (int32_t)x);
 #endif
 }
 
@@ -105,7 +105,7 @@
 
 SIMD_INLINE v64 v64_dup_16(uint16_t x) { return _mm_set1_epi16((short)x); }
 
-SIMD_INLINE v64 v64_dup_32(uint32_t x) { return _mm_set1_epi32(x); }
+SIMD_INLINE v64 v64_dup_32(uint32_t x) { return _mm_set1_epi32((int)x); }
 
 SIMD_INLINE v64 v64_add_8(v64 a, v64 b) { return _mm_add_epi8(a, b); }
 
diff --git a/aom_dsp/x86/blend_a64_mask_avx2.c b/aom_dsp/x86/blend_a64_mask_avx2.c
index 95383d2..ad27152 100644
--- a/aom_dsp/x86/blend_a64_mask_avx2.c
+++ b/aom_dsp/x86/blend_a64_mask_avx2.c
@@ -964,10 +964,10 @@
     const __m256i *clip_high, const __m256i *mask_max) {
   do {
     // Load 8x u8 pixels from each of 4 rows of the mask, pad each to u16
-    const __m128i mask08 = _mm_set_epi32(*(uint32_t *)(mask + 3 * mask_stride),
-                                         *(uint32_t *)(mask + 2 * mask_stride),
-                                         *(uint32_t *)(mask + 1 * mask_stride),
-                                         *(uint32_t *)(mask + 0 * mask_stride));
+    const __m128i mask08 = _mm_set_epi32(*(int32_t *)(mask + 3 * mask_stride),
+                                         *(int32_t *)(mask + 2 * mask_stride),
+                                         *(int32_t *)(mask + 1 * mask_stride),
+                                         *(int32_t *)(mask + 0 * mask_stride));
     const __m256i mask0 = _mm256_cvtepu8_epi16(mask08);
 
     highbd_blend_a64_d16_mask_w4_avx2(dst, dst_stride, src0, src0_stride, src1,
diff --git a/aom_dsp/x86/blend_a64_mask_sse4.c b/aom_dsp/x86/blend_a64_mask_sse4.c
index 4a368ef..5c68f41a 100644
--- a/aom_dsp/x86/blend_a64_mask_sse4.c
+++ b/aom_dsp/x86/blend_a64_mask_sse4.c
@@ -1187,11 +1187,11 @@
     const __m128i *round_offset, int shift, const __m128i *clip_low,
     const __m128i *clip_high, const __m128i *mask_max) {
   do {
-    const __m128i mask0a8 = _mm_set_epi32(0, 0, *(uint32_t *)mask,
-                                          *(uint32_t *)(mask + mask_stride));
+    const __m128i mask0a8 =
+        _mm_set_epi32(0, 0, *(int32_t *)mask, *(int32_t *)(mask + mask_stride));
     const __m128i mask0b8 =
-        _mm_set_epi32(0, 0, *(uint32_t *)(mask + 2 * mask_stride),
-                      *(uint32_t *)(mask + 3 * mask_stride));
+        _mm_set_epi32(0, 0, *(int32_t *)(mask + 2 * mask_stride),
+                      *(int32_t *)(mask + 3 * mask_stride));
     const __m128i mask0a = _mm_cvtepu8_epi16(mask0a8);
     const __m128i mask0b = _mm_cvtepu8_epi16(mask0b8);
 
diff --git a/aom_dsp/x86/highbd_variance_avx2.c b/aom_dsp/x86/highbd_variance_avx2.c
index 49912ac..36e6473 100644
--- a/aom_dsp/x86/highbd_variance_avx2.c
+++ b/aom_dsp/x86/highbd_variance_avx2.c
@@ -26,13 +26,13 @@
     const uint32_t xoffset, const uint32_t yoffset, const uint8_t *dst_ptr8,
     int dst_stride, uint32_t *sse) {
   const __m256i filter1 =
-      _mm256_set1_epi32((uint32_t)(bilinear_filters_2t[xoffset][1] << 16) |
+      _mm256_set1_epi32((int)(bilinear_filters_2t[xoffset][1] << 16) |
                         bilinear_filters_2t[xoffset][0]);
   const __m256i filter2 =
-      _mm256_set1_epi32((uint32_t)(bilinear_filters_2t[yoffset][1] << 16) |
+      _mm256_set1_epi32((int)(bilinear_filters_2t[yoffset][1] << 16) |
                         bilinear_filters_2t[yoffset][0]);
   const __m256i one = _mm256_set1_epi16(1);
-  const uint32_t bitshift = (uint32_t)0x40;
+  const int bitshift = 0x40;
   (void)pixel_step;
   unsigned int i, j, prev = 0, curr = 2;
   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
diff --git a/aom_dsp/x86/masked_variance_intrin_ssse3.c b/aom_dsp/x86/masked_variance_intrin_ssse3.c
index fb39a2b..6939aa4 100644
--- a/aom_dsp/x86/masked_variance_intrin_ssse3.c
+++ b/aom_dsp/x86/masked_variance_intrin_ssse3.c
@@ -494,15 +494,14 @@
 
   for (y = 0; y < height; y += 4) {
     // Load four rows at a time
-    __m128i src =
-        _mm_setr_epi32(*(uint32_t *)src_ptr, *(uint32_t *)&src_ptr[src_stride],
-                       *(uint32_t *)&src_ptr[src_stride * 2],
-                       *(uint32_t *)&src_ptr[src_stride * 3]);
+    __m128i src = _mm_setr_epi32(*(int *)src_ptr, *(int *)&src_ptr[src_stride],
+                                 *(int *)&src_ptr[src_stride * 2],
+                                 *(int *)&src_ptr[src_stride * 3]);
     const __m128i a = _mm_loadu_si128((const __m128i *)a_ptr);
     const __m128i b = _mm_loadu_si128((const __m128i *)b_ptr);
-    const __m128i m = _mm_setr_epi32(
-        *(uint32_t *)m_ptr, *(uint32_t *)&m_ptr[m_stride],
-        *(uint32_t *)&m_ptr[m_stride * 2], *(uint32_t *)&m_ptr[m_stride * 3]);
+    const __m128i m = _mm_setr_epi32(*(int *)m_ptr, *(int *)&m_ptr[m_stride],
+                                     *(int *)&m_ptr[m_stride * 2],
+                                     *(int *)&m_ptr[m_stride * 3]);
     accumulate_block(&src, &a, &b, &m, &sum, &sum_sq);
 
     src_ptr += src_stride * 4;
diff --git a/av1/common/x86/selfguided_avx2.c b/av1/common/x86/selfguided_avx2.c
index 3c5558d..4ab35e8 100644
--- a/av1/common/x86/selfguided_avx2.c
+++ b/av1/common/x86/selfguided_avx2.c
@@ -230,7 +230,7 @@
   const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
 
   // Set up masks
-  const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+  const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
   __m256i mask[8];
   for (int idx = 0; idx < 8; idx++) {
     const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx));
@@ -367,7 +367,7 @@
   const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
 
   // Set up masks
-  const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+  const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
   __m256i mask[8];
   for (int idx = 0; idx < 8; idx++) {
     const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx));
diff --git a/av1/common/x86/selfguided_sse4.c b/av1/common/x86/selfguided_sse4.c
index 72c7708..948bbfb 100644
--- a/av1/common/x86/selfguided_sse4.c
+++ b/av1/common/x86/selfguided_sse4.c
@@ -181,7 +181,7 @@
   const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
 
   // Set up masks
-  const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+  const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
   __m128i mask[4];
   for (int idx = 0; idx < 4; idx++) {
     const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx));
@@ -322,7 +322,7 @@
   const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
 
   // Set up masks
-  const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+  const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
   __m128i mask[4];
   for (int idx = 0; idx < 4; idx++) {
     const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx));
diff --git a/av1/encoder/x86/av1_k_means_avx2.c b/av1/encoder/x86/av1_k_means_avx2.c
index 23a7369..759f515 100644
--- a/av1/encoder/x86/av1_k_means_avx2.c
+++ b/av1/encoder/x86/av1_k_means_avx2.c
@@ -21,7 +21,7 @@
   for (int i = 0; i < n; i += 8) {
     __m256i ind = _mm256_loadu_si256((__m256i *)data);
     for (int j = 0; j < k; j++) {
-      __m256i cent = _mm256_set1_epi32((uint32_t)centroids[j]);
+      __m256i cent = _mm256_set1_epi32(centroids[j]);
       __m256i d1 = _mm256_sub_epi32(ind, cent);
       dist[j] = _mm256_mullo_epi32(d1, d1);
     }
diff --git a/av1/encoder/x86/av1_k_means_sse2.c b/av1/encoder/x86/av1_k_means_sse2.c
index 43f661f..f03c459 100644
--- a/av1/encoder/x86/av1_k_means_sse2.c
+++ b/av1/encoder/x86/av1_k_means_sse2.c
@@ -25,7 +25,7 @@
     l = (l == 0) ? 1 : 0;
     ind[l] = _mm_loadu_si128((__m128i *)data);
     for (int j = 0; j < k; j++) {
-      __m128i cent = _mm_set1_epi32((uint32_t)centroids[j]);
+      __m128i cent = _mm_set1_epi32(centroids[j]);
       __m128i d1 = _mm_sub_epi32(ind[l], cent);
       __m128i d2 = _mm_packs_epi32(d1, d1);
       __m128i d3 = _mm_mullo_epi16(d2, d2);
diff --git a/av1/encoder/x86/highbd_block_error_intrin_sse2.c b/av1/encoder/x86/highbd_block_error_intrin_sse2.c
index 4579e4e..0287f01 100644
--- a/av1/encoder/x86/highbd_block_error_intrin_sse2.c
+++ b/av1/encoder/x86/highbd_block_error_intrin_sse2.c
@@ -33,7 +33,7 @@
     __m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
     // Check if any values require more than 15 bit
     max = _mm_set1_epi32(0x3fff);
-    min = _mm_set1_epi32(0xffffc000);
+    min = _mm_set1_epi32((int)0xffffc000);
     cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
                          _mm_cmplt_epi32(mm_coeff, min));
     cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
diff --git a/av1/encoder/x86/pickrst_avx2.c b/av1/encoder/x86/pickrst_avx2.c
index 0347bca..e244d5e 100644
--- a/av1/encoder/x86/pickrst_avx2.c
+++ b/av1/encoder/x86/pickrst_avx2.c
@@ -297,7 +297,7 @@
         // interleaved copies of two pixels, but we only have one. However, the
         // pixels are (effectively) used as inputs to a multiply-accumulate. So
         // if we set the extra pixel slot to 0, then it is effectively ignored.
-        const __m256i dgd_ijkl = _mm256_set1_epi32((uint32_t)D1);
+        const __m256i dgd_ijkl = _mm256_set1_epi32((int)D1);
 
         acc_stat_highbd_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
                              &dgd_ijkl);
@@ -441,7 +441,7 @@
         // interleaved copies of two pixels, but we only have one. However, the
         // pixels are (effectively) used as inputs to a multiply-accumulate. So
         // if we set the extra pixel slot to 0, then it is effectively ignored.
-        const __m256i dgd_ijkl = _mm256_set1_epi32((uint32_t)D1);
+        const __m256i dgd_ijkl = _mm256_set1_epi32((int)D1);
 
         acc_stat_highbd_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
                              &dgd_ijkl);
diff --git a/av1/encoder/x86/pickrst_sse4.c b/av1/encoder/x86/pickrst_sse4.c
index d4921bd..8208cca 100644
--- a/av1/encoder/x86/pickrst_sse4.c
+++ b/av1/encoder/x86/pickrst_sse4.c
@@ -265,7 +265,7 @@
 
         // Load two u16 values from dgd as a single u32
         // Then broadcast to 4x u32 slots of a 128
-        const __m128i dgd_ijkl = _mm_set1_epi32(*((uint32_t *)(dgd_ijk + l)));
+        const __m128i dgd_ijkl = _mm_set1_epi32(*((int *)(dgd_ijk + l)));
         // dgd_ijkl = [y x y x y x y x] as u16
 
         acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
@@ -302,7 +302,7 @@
         // interleaved copies of two pixels, but we only have one. However, the
         // pixels are (effectively) used as inputs to a multiply-accumulate. So
         // if we set the extra pixel slot to 0, then it is effectively ignored.
-        const __m128i dgd_ijkl = _mm_set1_epi32((uint32_t)D1);
+        const __m128i dgd_ijkl = _mm_set1_epi32((int)D1);
 
         acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
                               &dgd_ijkl);
@@ -414,7 +414,7 @@
 
         // Load two u16 values from dgd as a single u32
         // then broadcast to 4x u32 slots of a 128
-        const __m128i dgd_ijkl = _mm_set1_epi32(*((uint32_t *)(dgd_ijk + l)));
+        const __m128i dgd_ijkl = _mm_set1_epi32(*((int *)(dgd_ijk + l)));
         // dgd_ijkl = [y x y x y x y x] as u16
 
         acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
@@ -447,7 +447,7 @@
         // interleaved copies of two pixels, but we only have one. However, the
         // pixels are (effectively) used as inputs to a multiply-accumulate. So
         // if we set the extra pixel slot to 0, then it is effectively ignored.
-        const __m128i dgd_ijkl = _mm_set1_epi32((uint32_t)D1);
+        const __m128i dgd_ijkl = _mm_set1_epi32((int)D1);
 
         acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
                               &dgd_ijkl);
diff --git a/av1/encoder/x86/wedge_utils_avx2.c b/av1/encoder/x86/wedge_utils_avx2.c
index c06bad8..4215f40 100644
--- a/av1/encoder/x86/wedge_utils_avx2.c
+++ b/av1/encoder/x86/wedge_utils_avx2.c
@@ -155,7 +155,7 @@
  */
 void av1_wedge_compute_delta_squares_avx2(int16_t *d, const int16_t *a,
                                           const int16_t *b, int N) {
-  const __m256i v_neg_w = _mm256_set1_epi32(0xffff0001);
+  const __m256i v_neg_w = _mm256_set1_epi32((int)0xffff0001);
 
   assert(N % 64 == 0);