x86: normalize types used with _mm_cvtsi128_si32 prefer int in most cases to match the return type of the function; for consistency intra predictors are kept unsigned as some implementations rely on this in subsequent operations. this was the cause of previous failures. w/clang -fsanitize=integer fixes warnings of the form: implicit conversion from type 'int' of value -809931979 (32-bit, signed) to type 'uint32_t' (aka 'unsigned int') changed the value to 3485035317 (32-bit, unsigned) Bug: b/229626362 Change-Id: I3c4442d9ab7263e8425de2be188b7c90ea3591e9

commit: bf733e6bcfa9599cc81451eb345ab623c1516841 [log] [tgz]
author: James Zern <jzern@google.com> Sat Jul 30 19:48:54 2022 -0700
committer: James Zern <jzern@google.com> Thu Aug 04 18:05:11 2022 +0000
tree: 207abfb93d4996c995ea5d8d0515e45cab20efe2
parent: 496ae0b2c2fc73a4f09dfa5acf87c5ab2ee360a1 [diff]
diff --git a/aom_dsp/x86/aom_convolve_copy_sse2.c b/aom_dsp/x86/aom_convolve_copy_sse2.c
index f7b468a..e78845e 100644
--- a/aom_dsp/x86/aom_convolve_copy_sse2.c
+++ b/aom_dsp/x86/aom_convolve_copy_sse2.c

@@ -207,11 +207,11 @@
   if (w == 2) {
     do {
       __m128i s = _mm_loadl_epi64((__m128i *)src);
-      *(uint32_t *)dst = _mm_cvtsi128_si32(s);
+      *(int *)dst = _mm_cvtsi128_si32(s);
       src += src_stride;
       dst += dst_stride;
       s = _mm_loadl_epi64((__m128i *)src);
-      *(uint32_t *)dst = _mm_cvtsi128_si32(s);
+      *(int *)dst = _mm_cvtsi128_si32(s);
       src += src_stride;
       dst += dst_stride;
       h -= 2;

diff --git a/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c b/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c
index d8d353c..22f2e69 100644
--- a/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c
+++ b/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c

@@ -43,8 +43,8 @@
 
 static INLINE void xx_storeu2_epi32(const uint8_t *output_ptr,
                                     const ptrdiff_t stride, const __m256i *a) {
-  *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*a));
-  *((uint32_t *)(output_ptr + stride)) =
+  *((int *)(output_ptr)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*a));
+  *((int *)(output_ptr + stride)) =
       _mm_cvtsi128_si32(_mm256_extracti128_si256(*a, 1));
 }
 
@@ -151,7 +151,7 @@
     srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
 
     // save 4 bytes
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
   }
 }
 
@@ -256,7 +256,7 @@
     srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
 
     // save 4 bytes
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
   }
 }
 

diff --git a/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c b/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
index cff7f43..5c36b68 100644
--- a/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
+++ b/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c

@@ -477,7 +477,7 @@
 
     src_ptr += src_pixels_per_line;
 
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
 
     output_ptr += output_pitch;
   }
@@ -555,8 +555,8 @@
 
     src_ptr += src_stride;
 
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(resReg23_34_45_56);
-    *((uint32_t *)(output_ptr + out_pitch)) =
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(resReg23_34_45_56);
+    *((int *)(output_ptr + out_pitch)) =
         _mm_cvtsi128_si32(_mm_srli_si128(resReg23_34_45_56, 4));
 
     output_ptr += dst_stride;

diff --git a/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c b/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c
index 8a18279..5823059 100644
--- a/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c
+++ b/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c

@@ -108,7 +108,7 @@
 
     src_ptr += src_pixels_per_line;
 
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
     output_ptr += output_pitch;
   }
 }
@@ -185,8 +185,8 @@
 
     src_ptr += src_stride;
 
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(resReglo);
-    *((uint32_t *)(output_ptr + out_pitch)) = _mm_cvtsi128_si32(resReghi);
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(resReglo);
+    *((int *)(output_ptr + out_pitch)) = _mm_cvtsi128_si32(resReghi);
 
     output_ptr += dst_stride;
 

diff --git a/aom_dsp/x86/convolve_avx2.h b/aom_dsp/x86/convolve_avx2.h
index 785ba39..a00ede2 100644
--- a/aom_dsp/x86/convolve_avx2.h
+++ b/aom_dsp/x86/convolve_avx2.h

@@ -576,9 +576,8 @@
           const __m128i res_0 = _mm256_castsi256_si128(res_8);                 \
           const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);            \
                                                                                \
-          *(uint32_t *)(&dst0[i * dst_stride0 + j]) =                          \
-              _mm_cvtsi128_si32(res_0);                                        \
-          *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =            \
+          *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);     \
+          *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =                 \
               _mm_cvtsi128_si32(res_1);                                        \
                                                                                \
         } else {                                                               \

diff --git a/aom_dsp/x86/highbd_convolve_ssse3.c b/aom_dsp/x86/highbd_convolve_ssse3.c
index 5293e27..21389db 100644
--- a/aom_dsp/x86/highbd_convolve_ssse3.c
+++ b/aom_dsp/x86/highbd_convolve_ssse3.c

@@ -136,10 +136,10 @@
           res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
           res_a_round1 = _mm_max_epi16(res_a_round1, zero);
 
-          *((uint32_t *)(&dst[i * dst_stride + j])) =
+          *((int *)(&dst[i * dst_stride + j])) =
               _mm_cvtsi128_si32(res_a_round0);
 
-          *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
+          *((int *)(&dst[i * dst_stride + j + dst_stride])) =
               _mm_cvtsi128_si32(res_a_round1);
         }
 
@@ -264,10 +264,10 @@
             res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
             res_a_round1 = _mm_max_epi16(res_a_round1, zero);
 
-            *((uint32_t *)(&dst[i * dst_stride + j])) =
+            *((int *)(&dst[i * dst_stride + j])) =
                 _mm_cvtsi128_si32(res_a_round0);
 
-            *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
+            *((int *)(&dst[i * dst_stride + j + dst_stride])) =
                 _mm_cvtsi128_si32(res_a_round1);
           }
 
@@ -375,7 +375,7 @@
           } else if (w == 4) {
             _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res);
           } else {
-            *((uint32_t *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
+            *((int *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
           }
         }
       }
@@ -430,7 +430,7 @@
           } else if (w == 4) {
             _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res);
           } else {
-            *((uint32_t *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
+            *((int *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
           }
         }
       }

diff --git a/aom_dsp/x86/highbd_intrapred_sse2.c b/aom_dsp/x86/highbd_intrapred_sse2.c
index 5a55736..6a2e915 100644
--- a/aom_dsp/x86/highbd_intrapred_sse2.c
+++ b/aom_dsp/x86/highbd_intrapred_sse2.c

@@ -821,11 +821,11 @@
   const __m128i sum_above = dc_sum_4(above);
   const __m128i sum_left = dc_sum_8(left);
   const __m128i sum = _mm_add_epi16(sum_above, sum_left);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 >>= 16;
   sum32 += 6;
   sum32 /= 12;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
   int i;
   for (i = 0; i < 4; ++i) {
     _mm_storel_epi64((__m128i *)dst, row);
@@ -842,11 +842,11 @@
   const __m128i sum_left = dc_sum_4(left);
   const __m128i sum_above = dc_sum_8(above);
   const __m128i sum = _mm_add_epi16(sum_above, sum_left);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 >>= 16;
   sum32 += 6;
   sum32 /= 12;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
 
   _mm_store_si128((__m128i *)dst, row);
   dst += stride;
@@ -867,10 +867,10 @@
   sum_left = _mm_unpacklo_epi16(sum_left, zero);
   sum_above = _mm_unpacklo_epi16(sum_above, zero);
   const __m128i sum = _mm_add_epi32(sum_left, sum_above);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 += 12;
   sum32 /= 24;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
   int i;
   for (i = 0; i < 4; ++i) {
     _mm_store_si128((__m128i *)dst, row);
@@ -894,10 +894,10 @@
   sum_left = _mm_unpacklo_epi16(sum_left, zero);
   sum_above = _mm_unpacklo_epi16(sum_above, zero);
   const __m128i sum = _mm_add_epi32(sum_left, sum_above);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 += 12;
   sum32 /= 24;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
   int i;
   for (i = 0; i < 2; ++i) {
     _mm_store_si128((__m128i *)dst, row);
@@ -924,10 +924,10 @@
   const __m128i zero = _mm_setzero_si128();
   sum_above = _mm_unpacklo_epi16(sum_above, zero);
   const __m128i sum = _mm_add_epi32(sum_left, sum_above);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 += 24;
   sum32 /= 48;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
   int i;
   for (i = 0; i < 8; ++i) {
     _mm_store_si128((__m128i *)dst, row);
@@ -954,10 +954,10 @@
   const __m128i zero = _mm_setzero_si128();
   sum_left = _mm_unpacklo_epi16(sum_left, zero);
   const __m128i sum = _mm_add_epi32(sum_left, sum_above);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 += 24;
   sum32 /= 48;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
   int i;
   for (i = 0; i < 4; ++i) {
     _mm_store_si128((__m128i *)dst, row);

diff --git a/aom_dsp/x86/intrapred_avx2.c b/aom_dsp/x86/intrapred_avx2.c
index b4b5ce2..b5f7144 100644
--- a/aom_dsp/x86/intrapred_avx2.c
+++ b/aom_dsp/x86/intrapred_avx2.c

@@ -426,10 +426,10 @@
   const __m128i top_sum = dc_sum_32_sse2(above);
   __m128i left_sum = dc_sum_16_sse2(left);
   left_sum = _mm_add_epi16(top_sum, left_sum);
-  uint16_t sum = _mm_cvtsi128_si32(left_sum);
+  uint16_t sum = (uint16_t)_mm_cvtsi128_si32(left_sum);
   sum += 24;
   sum /= 48;
-  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  const __m256i row = _mm256_set1_epi8((int8_t)sum);
   row_store_32xh(&row, 16, dst, stride);
 }
 
@@ -438,10 +438,10 @@
   const __m256i sum_above = dc_sum_32(above);
   __m256i sum_left = dc_sum_64(left);
   sum_left = _mm256_add_epi16(sum_left, sum_above);
-  uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+  uint16_t sum = (uint16_t)_mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
   sum += 48;
   sum /= 96;
-  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  const __m256i row = _mm256_set1_epi8((int8_t)sum);
   row_store_32xh(&row, 64, dst, stride);
 }
 
@@ -450,10 +450,10 @@
   const __m256i sum_above = dc_sum_64(above);
   __m256i sum_left = dc_sum_64(left);
   sum_left = _mm256_add_epi16(sum_left, sum_above);
-  uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+  uint16_t sum = (uint16_t)_mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
   sum += 64;
   sum /= 128;
-  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  const __m256i row = _mm256_set1_epi8((int8_t)sum);
   row_store_64xh(&row, 64, dst, stride);
 }
 
@@ -462,10 +462,10 @@
   const __m256i sum_above = dc_sum_64(above);
   __m256i sum_left = dc_sum_32(left);
   sum_left = _mm256_add_epi16(sum_left, sum_above);
-  uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+  uint16_t sum = (uint16_t)_mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
   sum += 48;
   sum /= 96;
-  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  const __m256i row = _mm256_set1_epi8((int8_t)sum);
   row_store_64xh(&row, 32, dst, stride);
 }
 
@@ -474,10 +474,10 @@
   const __m256i sum_above = dc_sum_64(above);
   __m256i sum_left = _mm256_castsi128_si256(dc_sum_16_sse2(left));
   sum_left = _mm256_add_epi16(sum_left, sum_above);
-  uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+  uint16_t sum = (uint16_t)_mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
   sum += 40;
   sum /= 80;
-  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  const __m256i row = _mm256_set1_epi8((int8_t)sum);
   row_store_64xh(&row, 16, dst, stride);
 }
 
@@ -3597,7 +3597,7 @@
 
   dr_prediction_z1_HxW_internal_avx2(4, N, dstvec, above, upsample_above, dx);
   for (int i = 0; i < N; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
   }
 }
 
@@ -3926,7 +3926,7 @@
     resy = _mm_srli_si128(resx, 4);
 
     resxy = _mm_blendv_epi8(resx, resy, *(__m128i *)BaseMask[base_min_diff]);
-    *(uint32_t *)(dst) = _mm_cvtsi128_si32(resxy);
+    *(int *)(dst) = _mm_cvtsi128_si32(resxy);
     dst += stride;
   }
 }
@@ -4338,10 +4338,10 @@
   transpose4x8_8x4_low_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3],
                             &d[0], &d[1], &d[2], &d[3]);
 
-  *(uint32_t *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
-  *(uint32_t *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
-  *(uint32_t *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
-  *(uint32_t *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
+  *(int *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
+  *(int *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
+  *(int *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
+  *(int *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
   return;
 }
 
@@ -4374,7 +4374,7 @@
   transpose4x8_8x4_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3], &d[0],
                         &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]);
   for (int i = 0; i < 8; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
   }
 }
 
@@ -4434,7 +4434,7 @@
   dr_prediction_z1_HxW_internal_avx2(16, 4, dstvec, left, upsample_left, dy);
   transpose4x16_sse2(dstvec, d);
   for (int i = 0; i < 16; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
   }
 }
 

diff --git a/aom_dsp/x86/intrapred_sse2.c b/aom_dsp/x86/intrapred_sse2.c
index 5afef68..4786696 100644
--- a/aom_dsp/x86/intrapred_sse2.c
+++ b/aom_dsp/x86/intrapred_sse2.c

@@ -112,12 +112,12 @@
   __m128i sum_above = dc_sum_4(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 6;
   sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X2);
 
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
-  const uint32_t pred = _mm_cvtsi128_si32(row);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(row);
   dc_store_4xh(pred, 8, dst, stride);
 }
 
@@ -127,12 +127,12 @@
   __m128i sum_above = dc_sum_4(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 10;
   sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X4);
 
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
-  const uint32_t pred = _mm_cvtsi128_si32(row);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(row);
   dc_store_4xh(pred, 16, dst, stride);
 }
 
@@ -142,7 +142,7 @@
   __m128i sum_above = dc_sum_8(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 6;
   sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X2);
 
@@ -156,10 +156,10 @@
   __m128i sum_above = dc_sum_8(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 12;
   sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_8xh(&row, 16, dst, stride);
 }
 
@@ -169,10 +169,10 @@
   __m128i sum_above = dc_sum_8(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 20;
   sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X4);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_8xh(&row, 32, dst, stride);
 }
 
@@ -182,10 +182,10 @@
   __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 10;
   sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X4);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_16xh(&row, 4, dst, stride);
 }
 
@@ -195,10 +195,10 @@
   __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 12;
   sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_16xh(&row, 8, dst, stride);
 }
 
@@ -208,10 +208,10 @@
   __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 24;
   sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_16xh(&row, 32, dst, stride);
 }
 
@@ -221,10 +221,10 @@
   __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 40;
   sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X4);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_16xh(&row, 64, dst, stride);
 }
 
@@ -234,10 +234,10 @@
   const __m128i sum_left = dc_sum_8(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 20;
   sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X4);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_32xh(&row, 8, dst, stride);
 }
 
@@ -247,10 +247,10 @@
   const __m128i sum_left = dc_sum_16_sse2(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 24;
   sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_32xh(&row, 16, dst, stride);
 }
 
@@ -260,10 +260,10 @@
   const __m128i sum_left = dc_sum_64(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 48;
   sum = divide_using_multiply_shift(sum, 5, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_32xh(&row, 64, dst, stride);
 }
 
@@ -273,10 +273,10 @@
   const __m128i sum_left = dc_sum_64(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 64;
   sum /= 128;
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_64xh(&row, 64, dst, stride);
 }
 
@@ -286,10 +286,10 @@
   const __m128i sum_left = dc_sum_32_sse2(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 48;
   sum = divide_using_multiply_shift(sum, 5, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_64xh(&row, 32, dst, stride);
 }
 
@@ -299,10 +299,10 @@
   const __m128i sum_left = dc_sum_16_sse2(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 40;
   sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X4);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_64xh(&row, 16, dst, stride);
 }
 
@@ -319,7 +319,7 @@
   sum_above = _mm_shufflelo_epi16(sum_above, 0);
   sum_above = _mm_packus_epi16(sum_above, sum_above);
 
-  const uint32_t pred = _mm_cvtsi128_si32(sum_above);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_above);
   dc_store_4xh(pred, 8, dst, stride);
 }
 
@@ -333,7 +333,7 @@
   sum_above = _mm_shufflelo_epi16(sum_above, 0);
   sum_above = _mm_packus_epi16(sum_above, sum_above);
 
-  const uint32_t pred = _mm_cvtsi128_si32(sum_above);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_above);
   dc_store_4xh(pred, 16, dst, stride);
 }
 
@@ -523,7 +523,7 @@
   sum_left = _mm_shufflelo_epi16(sum_left, 0);
   sum_left = _mm_packus_epi16(sum_left, sum_left);
 
-  const uint32_t pred = _mm_cvtsi128_si32(sum_left);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_left);
   dc_store_4xh(pred, 8, dst, stride);
 }
 
@@ -538,7 +538,7 @@
   sum_left = _mm_shufflelo_epi16(sum_left, 0);
   sum_left = _mm_packus_epi16(sum_left, sum_left);
 
-  const uint32_t pred = _mm_cvtsi128_si32(sum_left);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_left);
   dc_store_4xh(pred, 16, dst, stride);
 }
 
@@ -990,26 +990,26 @@
   __m128i row1 = _mm_shufflelo_epi16(left_col, 0x55);
   __m128i row2 = _mm_shufflelo_epi16(left_col, 0xaa);
   __m128i row3 = _mm_shufflelo_epi16(left_col, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
   dst += stride;
   left_col = _mm_unpackhi_epi64(left_col, left_col);
   row0 = _mm_shufflelo_epi16(left_col, 0);
   row1 = _mm_shufflelo_epi16(left_col, 0x55);
   row2 = _mm_shufflelo_epi16(left_col, 0xaa);
   row3 = _mm_shufflelo_epi16(left_col, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
 }
 
 void aom_h_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
@@ -1023,13 +1023,13 @@
   __m128i row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
   __m128i row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
   __m128i row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
   dst += stride;
 
   left_col_low = _mm_unpackhi_epi64(left_col_low, left_col_low);
@@ -1037,26 +1037,26 @@
   row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
   row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
   row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
   dst += stride;
 
   row0 = _mm_shufflelo_epi16(left_col_high, 0);
   row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
   row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
   row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
   dst += stride;
 
   left_col_high = _mm_unpackhi_epi64(left_col_high, left_col_high);
@@ -1064,13 +1064,13 @@
   row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
   row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
   row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
 }
 
 void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,

diff --git a/aom_dsp/x86/intrapred_sse4.c b/aom_dsp/x86/intrapred_sse4.c
index b732580..21fb1bb 100644
--- a/aom_dsp/x86/intrapred_sse4.c
+++ b/aom_dsp/x86/intrapred_sse4.c

@@ -210,7 +210,7 @@
 
   dr_prediction_z1_HxW_internal_sse4_1(4, N, dstvec, above, upsample_above, dx);
   for (int i = 0; i < N; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
   }
 }
 
@@ -571,7 +571,7 @@
     resy = _mm_srli_si128(resx, 4);
 
     resxy = _mm_blendv_epi8(resx, resy, *(__m128i *)Mask[0][base_min_diff]);
-    *(uint32_t *)(dst) = _mm_cvtsi128_si32(resxy);
+    *(int *)(dst) = _mm_cvtsi128_si32(resxy);
     dst += stride;
   }
 }
@@ -938,10 +938,10 @@
   transpose4x8_8x4_low_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3],
                             &d[0], &d[1], &d[2], &d[3]);
 
-  *(uint32_t *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
-  *(uint32_t *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
-  *(uint32_t *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
-  *(uint32_t *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
+  *(int *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
+  *(int *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
+  *(int *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
+  *(int *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
   return;
 }
 
@@ -974,7 +974,7 @@
   transpose4x8_8x4_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3], &d[0],
                         &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]);
   for (int i = 0; i < 8; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
   }
 }
 
@@ -1034,7 +1034,7 @@
   dr_prediction_z1_HxW_internal_sse4_1(16, 4, dstvec, left, upsample_left, dy);
   transpose4x16_sse2(dstvec, d);
   for (int i = 0; i < 16; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
   }
 }
 

diff --git a/aom_dsp/x86/intrapred_ssse3.c b/aom_dsp/x86/intrapred_ssse3.c
index f0bd040..a75616e 100644
--- a/aom_dsp/x86/intrapred_ssse3.c
+++ b/aom_dsp/x86/intrapred_ssse3.c

@@ -56,7 +56,7 @@
     const __m128i l16 = _mm_shuffle_epi8(l, rep);
     const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
 
-    *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+    *(int *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
     dst += stride;
     rep = _mm_add_epi16(rep, one);
   }
@@ -77,7 +77,7 @@
     const __m128i l16 = _mm_shuffle_epi8(l, rep);
     const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
 
-    *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+    *(int *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
     dst += stride;
     rep = _mm_add_epi16(rep, one);
   }
@@ -97,7 +97,7 @@
     const __m128i l16 = _mm_shuffle_epi8(l, rep);
     const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
 
-    *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+    *(int *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
     dst += stride;
     rep = _mm_add_epi16(rep, one);
   }
@@ -656,7 +656,7 @@
     sum = _mm_srai_epi32(sum, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum = _mm_shuffle_epi8(sum, gat);
-    *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
+    *(int *)dst = _mm_cvtsi128_si32(sum);
     dst += stride;
 
     rep = _mm_add_epi16(rep, one);
@@ -1071,7 +1071,7 @@
     sum = _mm_add_epi32(sum, pred_round);
     sum = _mm_srai_epi32(sum, SMOOTH_WEIGHT_LOG2_SCALE);
     sum = _mm_shuffle_epi8(sum, gat);
-    *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
+    *(int *)dst = _mm_cvtsi128_si32(sum);
     dst += stride;
     d = _mm_add_epi16(d, inc);
   }
@@ -1417,7 +1417,7 @@
     sum = _mm_srai_epi32(sum, SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum = _mm_shuffle_epi8(sum, gat);
-    *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
+    *(int *)dst = _mm_cvtsi128_si32(sum);
     dst += stride;
 
     rep = _mm_add_epi16(rep, one);

diff --git a/aom_dsp/x86/jnt_sad_ssse3.c b/aom_dsp/x86/jnt_sad_ssse3.c
index 4e6fe8f..357f70a 100644
--- a/aom_dsp/x86/jnt_sad_ssse3.c
+++ b/aom_dsp/x86/jnt_sad_ssse3.c

@@ -53,7 +53,8 @@
 
   // At this point, we have two 32-bit partial SADs at bit[0:31] and [64:95].
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
@@ -84,7 +85,8 @@
   }
 
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
@@ -108,7 +110,8 @@
   }
 
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
@@ -134,7 +137,8 @@
   }
 
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
@@ -160,7 +164,8 @@
   }
 
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
@@ -186,7 +191,8 @@
   }
 
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }

diff --git a/aom_dsp/x86/masked_sad_intrin_ssse3.c b/aom_dsp/x86/masked_sad_intrin_ssse3.c
index 7168277..fd5352c 100644
--- a/aom_dsp/x86/masked_sad_intrin_ssse3.c
+++ b/aom_dsp/x86/masked_sad_intrin_ssse3.c

@@ -132,8 +132,8 @@
     m_ptr += m_stride;
   }
   // At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
-  int32_t sad =
-      _mm_cvtsi128_si32(res) + _mm_cvtsi128_si32(_mm_srli_si128(res, 8));
+  unsigned int sad = (unsigned int)(_mm_cvtsi128_si32(res) +
+                                    _mm_cvtsi128_si32(_mm_srli_si128(res, 8)));
   return sad;
 }
 
@@ -177,8 +177,8 @@
     b_ptr += b_stride * 2;
     m_ptr += m_stride * 2;
   }
-  int32_t sad =
-      _mm_cvtsi128_si32(res) + _mm_cvtsi128_si32(_mm_srli_si128(res, 8));
+  unsigned int sad = (unsigned int)(_mm_cvtsi128_si32(res) +
+                                    _mm_cvtsi128_si32(_mm_srli_si128(res, 8)));
   return sad;
 }
 
@@ -222,8 +222,7 @@
     m_ptr += m_stride * 2;
   }
   // At this point, the SAD is stored in lane 0 of 'res'
-  int32_t sad = _mm_cvtsi128_si32(res);
-  return sad;
+  return (unsigned int)_mm_cvtsi128_si32(res);
 }
 
 // For width a multiple of 8

diff --git a/aom_dsp/x86/masked_variance_intrin_ssse3.c b/aom_dsp/x86/masked_variance_intrin_ssse3.c
index bfd86ee..ac0e576 100644
--- a/aom_dsp/x86/masked_variance_intrin_ssse3.c
+++ b/aom_dsp/x86/masked_variance_intrin_ssse3.c

@@ -452,7 +452,7 @@
   sum = _mm_hadd_epi32(sum, sum_sq);
   sum = _mm_hadd_epi32(sum, sum);
   *sum_ = _mm_cvtsi128_si32(sum);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
 }
 
 static void masked_variance8xh(const uint8_t *src_ptr, int src_stride,
@@ -482,7 +482,7 @@
   sum = _mm_hadd_epi32(sum, sum_sq);
   sum = _mm_hadd_epi32(sum, sum);
   *sum_ = _mm_cvtsi128_si32(sum);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
 }
 
 static void masked_variance4xh(const uint8_t *src_ptr, int src_stride,
@@ -514,7 +514,7 @@
   sum = _mm_hadd_epi32(sum, sum_sq);
   sum = _mm_hadd_epi32(sum, sum);
   *sum_ = _mm_cvtsi128_si32(sum);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
 }
 
 #if CONFIG_AV1_HIGHBITDEPTH
@@ -1024,7 +1024,7 @@
   sum = _mm_hadd_epi32(sum, sum_sq);
   sum = _mm_hadd_epi32(sum, zero);
   *sum_ = _mm_cvtsi128_si32(sum);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
 }
 #endif  // CONFIG_AV1_HIGHBITDEPTH
 

diff --git a/aom_dsp/x86/obmc_variance_avx2.c b/aom_dsp/x86/obmc_variance_avx2.c
index bfec0e8..b2df8a9 100644
--- a/aom_dsp/x86/obmc_variance_avx2.c
+++ b/aom_dsp/x86/obmc_variance_avx2.c

@@ -77,7 +77,7 @@
   v_d = _mm_hadd_epi32(v_sum_d, v_sse_d);
   v_d = _mm_hadd_epi32(v_d, v_d);
   *sum = _mm_cvtsi128_si32(v_d);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(v_d, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(v_d, 4));
 }
 
 static INLINE void obmc_variance_w16n(const uint8_t *pre, const int pre_stride,
@@ -147,7 +147,7 @@
   res0 = _mm256_castsi256_si128(v_d);
   res0 = _mm_add_epi32(res0, _mm256_extractf128_si256(v_d, 1));
   *sum = _mm_cvtsi128_si32(res0);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(res0, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(res0, 4));
 }
 
 #define OBMCVARWXH(W, H)                                                \

diff --git a/aom_dsp/x86/sad_avx2.c b/aom_dsp/x86/sad_avx2.c
index ef3fdc1..24cea76 100644
--- a/aom_dsp/x86/sad_avx2.c
+++ b/aom_dsp/x86/sad_avx2.c

@@ -17,7 +17,7 @@
 static INLINE unsigned int sad64xh_avx2(const uint8_t *src_ptr, int src_stride,
                                         const uint8_t *ref_ptr, int ref_stride,
                                         int h) {
-  int i, res;
+  int i;
   __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;
   __m256i sum_sad = _mm256_setzero_si256();
   __m256i sum_sad_h;
@@ -37,7 +37,7 @@
   sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);
   sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);
   sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);
-  res = _mm_cvtsi128_si32(sum_sad128);
+  unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128);
   _mm256_zeroupper();
   return res;
 }
@@ -45,7 +45,7 @@
 static INLINE unsigned int sad32xh_avx2(const uint8_t *src_ptr, int src_stride,
                                         const uint8_t *ref_ptr, int ref_stride,
                                         int h) {
-  int i, res;
+  int i;
   __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;
   __m256i sum_sad = _mm256_setzero_si256();
   __m256i sum_sad_h;
@@ -68,7 +68,7 @@
   sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);
   sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);
   sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);
-  res = _mm_cvtsi128_si32(sum_sad128);
+  unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128);
   _mm256_zeroupper();
   return res;
 }
@@ -129,7 +129,7 @@
   unsigned int aom_sad64x##h##_avg_avx2(                                      \
       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,         \
       int ref_stride, const uint8_t *second_pred) {                           \
-    int i, res;                                                               \
+    int i;                                                                    \
     __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;                           \
     __m256i sum_sad = _mm256_setzero_si256();                                 \
     __m256i sum_sad_h;                                                        \
@@ -155,7 +155,7 @@
     sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);                           \
     sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);                        \
     sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);  \
-    res = _mm_cvtsi128_si32(sum_sad128);                                      \
+    unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128);           \
     _mm256_zeroupper();                                                       \
     return res;                                                               \
   }
@@ -164,7 +164,7 @@
   unsigned int aom_sad32x##h##_avg_avx2(                                      \
       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,         \
       int ref_stride, const uint8_t *second_pred) {                           \
-    int i, res;                                                               \
+    int i;                                                                    \
     __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;                           \
     __m256i sum_sad = _mm256_setzero_si256();                                 \
     __m256i sum_sad_h;                                                        \
@@ -194,7 +194,7 @@
     sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);                           \
     sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);                        \
     sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);  \
-    res = _mm_cvtsi128_si32(sum_sad128);                                      \
+    unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128);           \
     _mm256_zeroupper();                                                       \
     return res;                                                               \
   }

diff --git a/aom_dsp/x86/sad_impl_avx2.c b/aom_dsp/x86/sad_impl_avx2.c
index 2afae4b..c5da6e9 100644
--- a/aom_dsp/x86/sad_impl_avx2.c
+++ b/aom_dsp/x86/sad_impl_avx2.c

@@ -34,7 +34,7 @@
   sum = _mm256_add_epi32(sum, _mm256_srli_si256(sum, 8));
   sum_i128 = _mm_add_epi32(_mm256_extracti128_si256(sum, 1),
                            _mm256_castsi256_si128(sum));
-  return _mm_cvtsi128_si32(sum_i128);
+  return (unsigned int)_mm_cvtsi128_si32(sum_i128);
 }
 
 static unsigned int sad64x32(const uint8_t *src_ptr, int src_stride,
@@ -112,7 +112,7 @@
                                      const uint8_t *ref_ptr, int ref_stride,
                                      const int h, const uint8_t *second_pred,
                                      const int second_pred_stride) {
-  int i, res;
+  int i;
   __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;
   __m256i sum_sad = _mm256_setzero_si256();
   __m256i sum_sad_h;
@@ -137,9 +137,7 @@
   sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);
   sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);
   sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);
-  res = _mm_cvtsi128_si32(sum_sad128);
-
-  return res;
+  return (unsigned int)_mm_cvtsi128_si32(sum_sad128);
 }
 
 unsigned int aom_sad64x128_avg_avx2(const uint8_t *src_ptr, int src_stride,

diff --git a/aom_dsp/x86/variance_sse2.c b/aom_dsp/x86/variance_sse2.c
index c36eeee..a0223a9 100644
--- a/aom_dsp/x86/variance_sse2.c
+++ b/aom_dsp/x86/variance_sse2.c

@@ -32,7 +32,7 @@
 
   vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
   vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
-  return _mm_cvtsi128_si32(vsum);
+  return (unsigned int)_mm_cvtsi128_si32(vsum);
 }
 
 static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) {
@@ -50,7 +50,7 @@
 static INLINE unsigned int add32x4_sse2(__m128i val) {
   val = _mm_add_epi32(val, _mm_srli_si128(val, 8));
   val = _mm_add_epi32(val, _mm_srli_si128(val, 4));
-  return _mm_cvtsi128_si32(val);
+  return (unsigned int)_mm_cvtsi128_si32(val);
 }
 
 // Accumulate 8 16bit in sum to 4 32bit number

diff --git a/av1/common/x86/av1_convolve_scale_sse4.c b/av1/common/x86/av1_convolve_scale_sse4.c
index 1966181..0bdf49f 100644
--- a/av1/common/x86/av1_convolve_scale_sse4.c
+++ b/av1/common/x86/av1_convolve_scale_sse4.c

@@ -187,7 +187,7 @@
           const __m128i subbed = _mm_sub_epi16(shifted_16, sub);
           result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift);
           const __m128i result_8 = _mm_packus_epi16(result, result);
-          *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8);
+          *(int *)dst_x = _mm_cvtsi128_si32(result_8);
         } else {
           _mm_storel_epi64((__m128i *)dst_16_x, shifted_16);
         }
@@ -195,7 +195,7 @@
         const __m128i subbed = _mm_sub_epi16(shifted_16, sub);
         result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift);
         const __m128i result_8 = _mm_packus_epi16(result, result);
-        *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8);
+        *(int *)dst_x = _mm_cvtsi128_si32(result_8);
       }
     }
     for (; x < w; ++x) {

diff --git a/av1/common/x86/av1_inv_txfm_ssse3.c b/av1/common/x86/av1_inv_txfm_ssse3.c
index a2a43f8..f9bfb37 100644
--- a/av1/common/x86/av1_inv_txfm_ssse3.c
+++ b/av1/common/x86/av1_inv_txfm_ssse3.c

@@ -2249,7 +2249,7 @@
     const __m128i v = _mm_cvtsi32_si128(*((uint32_t *)(output + i * stride)));
     __m128i u = _mm_adds_epi16(in[j], _mm_unpacklo_epi8(v, zero));
     u = _mm_packus_epi16(u, zero);
-    *((uint32_t *)(output + i * stride)) = _mm_cvtsi128_si32(u);
+    *((int *)(output + i * stride)) = _mm_cvtsi128_si32(u);
   }
 }
 

diff --git a/av1/common/x86/convolve_2d_sse2.c b/av1/common/x86/convolve_2d_sse2.c
index ca88bd7..1b85f37 100644
--- a/av1/common/x86/convolve_2d_sse2.c
+++ b/av1/common/x86/convolve_2d_sse2.c

@@ -426,7 +426,7 @@
           if (w == 2) {
             *(uint16_t *)p = (uint16_t)_mm_cvtsi128_si32(res);
           } else if (w == 4) {
-            *(uint32_t *)p = _mm_cvtsi128_si32(res);
+            *(int *)p = _mm_cvtsi128_si32(res);
           } else {
             _mm_storel_epi64(p, res);
           }
@@ -534,7 +534,7 @@
           if (w > 4)
             _mm_storel_epi64((__m128i *)(&dst0[j]), res_8);
           else
-            *(uint32_t *)(&dst0[j]) = _mm_cvtsi128_si32(res_8);
+            *(int *)(&dst0[j]) = _mm_cvtsi128_si32(res_8);
         } else {
           _mm_store_si128((__m128i *)(&dst[j]), res_unsigned);
         }

diff --git a/av1/common/x86/convolve_avx2.c b/av1/common/x86/convolve_avx2.c
index c7d1141..89e0a4c 100644
--- a/av1/common/x86/convolve_avx2.c
+++ b/av1/common/x86/convolve_avx2.c

@@ -376,10 +376,9 @@
           const __m128i res_0 = _mm256_extracti128_si256(res_8b_lo, 0);
           const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
           if (w - j > 2) {
-            *(uint32_t *)&dst[i * dst_stride + j] =
-                (uint32_t)_mm_cvtsi128_si32(res_0);
-            *(uint32_t *)&dst[i * dst_stride + j + dst_stride] =
-                (uint32_t)_mm_cvtsi128_si32(res_1);
+            *(int *)&dst[i * dst_stride + j] = _mm_cvtsi128_si32(res_0);
+            *(int *)&dst[i * dst_stride + j + dst_stride] =
+                _mm_cvtsi128_si32(res_1);
           } else {
             *(uint16_t *)&dst[i * dst_stride + j] =
                 (uint16_t)_mm_cvtsi128_si32(res_0);
@@ -767,11 +766,9 @@
         const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
         if (w > 2) {
           // 00 01 02 03
-          *(uint32_t *)&dst[i * dst_stride] =
-              (uint32_t)_mm_cvtsi128_si32(res_0);
+          *(int *)&dst[i * dst_stride] = _mm_cvtsi128_si32(res_0);
           // 10 11 12 13
-          *(uint32_t *)&dst[i * dst_stride + dst_stride] =
-              (uint32_t)_mm_cvtsi128_si32(res_1);
+          *(int *)&dst[i * dst_stride + dst_stride] = _mm_cvtsi128_si32(res_1);
         } else {
           // 00 01
           *(uint16_t *)&dst[i * dst_stride] =
@@ -824,10 +821,8 @@
           __m256i res_8b_lo = _mm256_packus_epi16(res_16b_lo, res_16b_lo);
           const __m128i res_0 = _mm256_extracti128_si256(res_8b_lo, 0);
           const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
-          *(uint32_t *)&dst[i * dst_stride + j] =
-              (uint32_t)_mm_cvtsi128_si32(res_0);
-          *(uint32_t *)&dst[i * dst_stride + j + 4] =
-              (uint32_t)_mm_cvtsi128_si32(res_1);
+          *(int *)&dst[i * dst_stride + j] = _mm_cvtsi128_si32(res_0);
+          *(int *)&dst[i * dst_stride + j + 4] = _mm_cvtsi128_si32(res_1);
         }
       }
     }
@@ -869,8 +864,8 @@
         } else {
           __m128i *const p_0 = (__m128i *)&dst[i * dst_stride];
           __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + dst_stride];
-          *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0);
-          *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1);
+          *(uint16_t *)p_0 = (uint16_t)_mm_cvtsi128_si32(res_0);
+          *(uint16_t *)p_1 = (uint16_t)_mm_cvtsi128_si32(res_1);
         }
       }
     } else {

diff --git a/av1/common/x86/convolve_sse2.c b/av1/common/x86/convolve_sse2.c
index cd5521e..0e77822 100644
--- a/av1/common/x86/convolve_sse2.c
+++ b/av1/common/x86/convolve_sse2.c

@@ -199,7 +199,7 @@
 
     if (w <= 4) {
       __m128i s[8], src6, res, res_round, res16;
-      uint32_t res_int;
+      int res_int;
       src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
       s[0] = _mm_unpacklo_epi8(
           _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
@@ -234,7 +234,7 @@
         if (w == 2)
           *(uint16_t *)dst = (uint16_t)res_int;
         else
-          *(uint32_t *)dst = res_int;
+          *(int *)dst = res_int;
 
         src_ptr += src_stride;
         dst += dst_stride;
@@ -247,7 +247,7 @@
         if (w == 2)
           *(uint16_t *)dst = (uint16_t)res_int;
         else
-          *(uint32_t *)dst = res_int;
+          *(int *)dst = res_int;
 
         src_ptr += src_stride;
         dst += dst_stride;
@@ -443,11 +443,11 @@
         const __m128i res16 = _mm_packs_epi32(res_lo_round, res_lo_round);
         const __m128i res = _mm_packus_epi16(res16, res16);
 
-        uint32_t r = _mm_cvtsi128_si32(res);
+        int r = _mm_cvtsi128_si32(res);
         if (w == 2)
           *(uint16_t *)dst = (uint16_t)r;
         else
-          *(uint32_t *)dst = r;
+          *(int *)dst = r;
 
         src_ptr += src_stride;
         dst += dst_stride;

diff --git a/av1/common/x86/highbd_convolve_2d_ssse3.c b/av1/common/x86/highbd_convolve_2d_ssse3.c
index 148543f..8324044 100644
--- a/av1/common/x86/highbd_convolve_2d_ssse3.c
+++ b/av1/common/x86/highbd_convolve_2d_ssse3.c

@@ -211,10 +211,10 @@
             res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
             res_a_round1 = _mm_max_epi16(res_a_round1, zero);
 
-            *((uint32_t *)(&dst[i * dst_stride + j])) =
+            *((int *)(&dst[i * dst_stride + j])) =
                 _mm_cvtsi128_si32(res_a_round0);
 
-            *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
+            *((int *)(&dst[i * dst_stride + j + dst_stride])) =
                 _mm_cvtsi128_si32(res_a_round1);
           }
           s[0] = s[1];
@@ -384,10 +384,10 @@
             res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
             res_a_round1 = _mm_max_epi16(res_a_round1, zero);
 
-            *((uint32_t *)(&dst[i * dst_stride + j])) =
+            *((int *)(&dst[i * dst_stride + j])) =
                 _mm_cvtsi128_si32(res_a_round0);
 
-            *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
+            *((int *)(&dst[i * dst_stride + j + dst_stride])) =
                 _mm_cvtsi128_si32(res_a_round1);
           }
           s[0] = s[1];

diff --git a/av1/common/x86/jnt_convolve_avx2.c b/av1/common/x86/jnt_convolve_avx2.c
index 7a13d4a..8ea856e 100644
--- a/av1/common/x86/jnt_convolve_avx2.c
+++ b/av1/common/x86/jnt_convolve_avx2.c

@@ -110,9 +110,8 @@
             _mm_storel_epi64(
                 (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
           } else {
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_0);
-            *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+            *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                 _mm_cvtsi128_si32(res_1);
           }
         } else {
@@ -165,9 +164,8 @@
             _mm_storel_epi64(
                 (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
           } else {
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_0);
-            *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+            *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                 _mm_cvtsi128_si32(res_1);
           }
         } else {
@@ -304,9 +302,8 @@
                   (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])),
                   res_1);
             } else {
-              *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                  _mm_cvtsi128_si32(res_0);
-              *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+              *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+              *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                   _mm_cvtsi128_si32(res_1);
             }
           } else {
@@ -483,9 +480,8 @@
                   (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])),
                   res_1);
             } else {
-              *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                  _mm_cvtsi128_si32(res_0);
-              *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+              *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+              *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                   _mm_cvtsi128_si32(res_1);
             }
           } else {
@@ -754,9 +750,8 @@
             const __m128i res_0 = _mm256_castsi256_si128(res_8);
             const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
 
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_0);
-            *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+            *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                 _mm_cvtsi128_si32(res_1);
 
           } else {
@@ -883,9 +878,8 @@
             _mm_storel_epi64(
                 (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
           } else {
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_0);
-            *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+            *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                 _mm_cvtsi128_si32(res_1);
           }
         } else {

diff --git a/av1/common/x86/jnt_convolve_sse2.c b/av1/common/x86/jnt_convolve_sse2.c
index b8400c0..581d150 100644
--- a/av1/common/x86/jnt_convolve_sse2.c
+++ b/av1/common/x86/jnt_convolve_sse2.c

@@ -79,7 +79,7 @@
             &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
 
         const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
-        *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+        *(int *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
       } else {
         _mm_store_si128((__m128i *)(&dst[0]), res_unsigned);
       }
@@ -223,7 +223,7 @@
             &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
 
         const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
-        *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+        *(int *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
 
       } else {
         _mm_store_si128((__m128i *)dst, res_unsigned);
@@ -252,7 +252,7 @@
             &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
 
         const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
-        *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+        *(int *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
 
       } else {
         _mm_store_si128((__m128i *)dst, res_unsigned);
@@ -596,8 +596,7 @@
           if (w > 4)
             _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
           else
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_8);
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_8);
         } else {
           _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
         }

diff --git a/av1/common/x86/jnt_convolve_ssse3.c b/av1/common/x86/jnt_convolve_ssse3.c
index f45e3b2..d0cf763 100644
--- a/av1/common/x86/jnt_convolve_ssse3.c
+++ b/av1/common/x86/jnt_convolve_ssse3.c

@@ -220,8 +220,7 @@
           if (w > 4)
             _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
           else
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_8);
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_8);
         } else {
           _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
         }

diff --git a/av1/common/x86/reconinter_sse4.c b/av1/common/x86/reconinter_sse4.c
index 5171ca4..a503532 100644
--- a/av1/common/x86/reconinter_sse4.c
+++ b/av1/common/x86/reconinter_sse4.c

@@ -46,8 +46,8 @@
       const __m128i m16 = calc_mask(mask_base, s0, s1);
       const __m128i m8 = _mm_packus_epi16(m16, m16);
 
-      *(uint32_t *)mask = _mm_cvtsi128_si32(m8);
-      *(uint32_t *)(mask + w) = _mm_extract_epi32(m8, 1);
+      *(int *)mask = _mm_cvtsi128_si32(m8);
+      *(int *)(mask + w) = _mm_extract_epi32(m8, 1);
       src0 += (stride0 << 1);
       src1 += (stride1 << 1);
       mask += 8;
@@ -146,7 +146,7 @@
       if ((w - j) > 4) {
         _mm_storel_epi64(dst, res_8);
       } else {  // w==4
-        *(uint32_t *)dst = _mm_cvtsi128_si32(res_8);
+        *(int *)dst = _mm_cvtsi128_si32(res_8);
       }
     }
   }

diff --git a/av1/common/x86/warp_plane_avx2.c b/av1/common/x86/warp_plane_avx2.c
index f6aaa88..b0c9a93 100644
--- a/av1/common/x86/warp_plane_avx2.c
+++ b/av1/common/x86/warp_plane_avx2.c

@@ -699,8 +699,8 @@
       const __m256i res_8_lo = _mm256_packus_epi16(res_lo_16, res_lo_16);
       const __m128i res_8_lo_0 = _mm256_castsi256_si128(res_8_lo);
       const __m128i res_8_lo_1 = _mm256_extracti128_si256(res_8_lo, 1);
-      *(uint32_t *)dst8_0 = _mm_cvtsi128_si32(res_8_lo_0);
-      *(uint32_t *)dst8_1 = _mm_cvtsi128_si32(res_8_lo_1);
+      *(int *)dst8_0 = _mm_cvtsi128_si32(res_8_lo_0);
+      *(int *)dst8_1 = _mm_cvtsi128_si32(res_8_lo_1);
     } else {
       const __m128i temp_lo_16_0 = _mm256_castsi256_si128(temp_lo_16);
       const __m128i temp_lo_16_1 = _mm256_extracti128_si256(temp_lo_16, 1);
@@ -742,8 +742,8 @@
         __m256i res_8_hi = _mm256_packus_epi16(res_hi_16, res_hi_16);
         const __m128i res_8_hi_0 = _mm256_castsi256_si128(res_8_hi);
         const __m128i res_8_hi_1 = _mm256_extracti128_si256(res_8_hi, 1);
-        *(uint32_t *)dst8_4_0 = _mm_cvtsi128_si32(res_8_hi_0);
-        *(uint32_t *)dst8_4_1 = _mm_cvtsi128_si32(res_8_hi_1);
+        *(int *)dst8_4_0 = _mm_cvtsi128_si32(res_8_hi_0);
+        *(int *)dst8_4_1 = _mm_cvtsi128_si32(res_8_hi_1);
       } else {
         const __m128i temp_hi_16_0 = _mm256_castsi256_si128(temp_hi_16);
         const __m128i temp_hi_16_1 = _mm256_extracti128_si256(temp_hi_16, 1);
@@ -767,8 +767,8 @@
     __m128i *const p1 = (__m128i *)&pred[(i + (k + 1) + 4) * p_stride + j];
 
     if (p_width == 4) {
-      *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit0);
-      *(uint32_t *)p1 = _mm_cvtsi128_si32(res_8bit1);
+      *(int *)p = _mm_cvtsi128_si32(res_8bit0);
+      *(int *)p1 = _mm_cvtsi128_si32(res_8bit1);
     } else {
       _mm_storel_epi64(p, res_8bit0);
       _mm_storel_epi64(p1, res_8bit1);

diff --git a/av1/common/x86/warp_plane_sse4.c b/av1/common/x86/warp_plane_sse4.c
index b1df486..e35b557 100644
--- a/av1/common/x86/warp_plane_sse4.c
+++ b/av1/common/x86/warp_plane_sse4.c

@@ -613,7 +613,7 @@
       res_lo_16 = _mm_srai_epi16(_mm_add_epi16(res_lo_16, *round_bits_const),
                                  round_bits);
       __m128i res_8_lo = _mm_packus_epi16(res_lo_16, res_lo_16);
-      *(uint32_t *)dst8 = _mm_cvtsi128_si32(res_8_lo);
+      *(int *)dst8 = _mm_cvtsi128_si32(res_8_lo);
     } else {
       _mm_storel_epi64(p, temp_lo_16);
     }
@@ -645,7 +645,7 @@
         res_hi_16 = _mm_srai_epi16(_mm_add_epi16(res_hi_16, *round_bits_const),
                                    round_bits);
         __m128i res_8_hi = _mm_packus_epi16(res_hi_16, res_hi_16);
-        *(uint32_t *)dst8_4 = _mm_cvtsi128_si32(res_8_hi);
+        *(int *)dst8_4 = _mm_cvtsi128_si32(res_8_hi);
 
       } else {
         _mm_storel_epi64(p4, temp_hi_16);
@@ -667,7 +667,7 @@
     // to only output 4 pixels at this point, to avoid encode/decode
     // mismatches when encoding with multiple threads.
     if (p_width == 4) {
-      *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
+      *(int *)p = _mm_cvtsi128_si32(res_8bit);
     } else {
       _mm_storel_epi64(p, res_8bit);
     }

diff --git a/av1/encoder/x86/temporal_filter_avx2.c b/av1/encoder/x86/temporal_filter_avx2.c
index 8aa0764..a9c80040 100644
--- a/av1/encoder/x86/temporal_filter_avx2.c
+++ b/av1/encoder/x86/temporal_filter_avx2.c

@@ -51,7 +51,7 @@
 
     _mm256_storeu_si256((__m256i *)(dst), vsqdiff1);
     // Set zero to uninitialized memory to avoid uninitialized loads later
-    *(uint32_t *)(dst + 16) = _mm_cvtsi128_si32(_mm_setzero_si128());
+    *(int *)(dst + 16) = _mm_cvtsi128_si32(_mm_setzero_si128());
 
     src1 += stride, src2 += stride2;
     dst += sse_stride;
@@ -85,7 +85,7 @@
     _mm256_storeu_si256((__m256i *)(dst), vres1);
     _mm256_storeu_si256((__m256i *)(dst + 16), vres2);
     // Set zero to uninitialized memory to avoid uninitialized loads later
-    *(uint32_t *)(dst + 32) = _mm_cvtsi128_si32(_mm_setzero_si128());
+    *(int *)(dst + 32) = _mm_cvtsi128_si32(_mm_setzero_si128());
 
     src1 += stride;
     src2 += stride2;

diff --git a/av1/encoder/x86/temporal_filter_sse2.c b/av1/encoder/x86/temporal_filter_sse2.c
index 26c3926..8be7164 100644
--- a/av1/encoder/x86/temporal_filter_sse2.c
+++ b/av1/encoder/x86/temporal_filter_sse2.c

@@ -42,7 +42,7 @@
   for (int i = 0; i < block_height; i++) {
     for (int j = 0; j < block_width; j += 16) {
       // Set zero to uninitialized memory to avoid uninitialized loads later
-      *(uint32_t *)(dst) = _mm_cvtsi128_si32(_mm_setzero_si128());
+      *(int *)(dst) = _mm_cvtsi128_si32(_mm_setzero_si128());
 
       __m128i vsrc1 = _mm_loadu_si128((__m128i *)(src1 + j));
       __m128i vsrc2 = _mm_loadu_si128((__m128i *)(src2 + j));
@@ -63,8 +63,7 @@
     }
 
     // Set zero to uninitialized memory to avoid uninitialized loads later
-    *(uint32_t *)(dst + block_width + 2) =
-        _mm_cvtsi128_si32(_mm_setzero_si128());
+    *(int *)(dst + block_width + 2) = _mm_cvtsi128_si32(_mm_setzero_si128());
 
     src1 += stride;
     src2 += stride2;
commit	bf733e6bcfa9599cc81451eb345ab623c1516841	[log] [tgz]
author	James Zern <jzern@google.com>	Sat Jul 30 19:48:54 2022 -0700
committer	James Zern <jzern@google.com>	Thu Aug 04 18:05:11 2022 +0000
tree	207abfb93d4996c995ea5d8d0515e45cab20efe2
parent	496ae0b2c2fc73a4f09dfa5acf87c5ab2ee360a1 [diff]