Revert "x86: normalize types used with _mm_cvtsi128_si32"
This reverts commit cbbc5cc3ab19003937bd97ebcea811961eb42736.
Reason for revert: This causes some assertion / fuzzing failures.
Bug: oss-fuzz:49590
Bug: oss-fuzz:49595
Change-Id: I3f14d14c106c43c3ac67e7f48e563d230b66f21f
diff --git a/aom_dsp/x86/aom_convolve_copy_sse2.c b/aom_dsp/x86/aom_convolve_copy_sse2.c
index e78845e..f7b468a 100644
--- a/aom_dsp/x86/aom_convolve_copy_sse2.c
+++ b/aom_dsp/x86/aom_convolve_copy_sse2.c
@@ -207,11 +207,11 @@
if (w == 2) {
do {
__m128i s = _mm_loadl_epi64((__m128i *)src);
- *(int *)dst = _mm_cvtsi128_si32(s);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(s);
src += src_stride;
dst += dst_stride;
s = _mm_loadl_epi64((__m128i *)src);
- *(int *)dst = _mm_cvtsi128_si32(s);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(s);
src += src_stride;
dst += dst_stride;
h -= 2;
diff --git a/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c b/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c
index 22f2e69..d8d353c 100644
--- a/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c
+++ b/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c
@@ -43,8 +43,8 @@
static INLINE void xx_storeu2_epi32(const uint8_t *output_ptr,
const ptrdiff_t stride, const __m256i *a) {
- *((int *)(output_ptr)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*a));
- *((int *)(output_ptr + stride)) =
+ *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*a));
+ *((uint32_t *)(output_ptr + stride)) =
_mm_cvtsi128_si32(_mm256_extracti128_si256(*a, 1));
}
@@ -151,7 +151,7 @@
srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
// save 4 bytes
- *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
+ *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
}
}
@@ -256,7 +256,7 @@
srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
// save 4 bytes
- *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
+ *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
}
}
diff --git a/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c b/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
index 5c36b68..cff7f43 100644
--- a/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
+++ b/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
@@ -477,7 +477,7 @@
src_ptr += src_pixels_per_line;
- *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
+ *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
output_ptr += output_pitch;
}
@@ -555,8 +555,8 @@
src_ptr += src_stride;
- *((int *)(output_ptr)) = _mm_cvtsi128_si32(resReg23_34_45_56);
- *((int *)(output_ptr + out_pitch)) =
+ *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(resReg23_34_45_56);
+ *((uint32_t *)(output_ptr + out_pitch)) =
_mm_cvtsi128_si32(_mm_srli_si128(resReg23_34_45_56, 4));
output_ptr += dst_stride;
diff --git a/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c b/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c
index 5823059..8a18279 100644
--- a/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c
+++ b/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c
@@ -108,7 +108,7 @@
src_ptr += src_pixels_per_line;
- *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
+ *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
output_ptr += output_pitch;
}
}
@@ -185,8 +185,8 @@
src_ptr += src_stride;
- *((int *)(output_ptr)) = _mm_cvtsi128_si32(resReglo);
- *((int *)(output_ptr + out_pitch)) = _mm_cvtsi128_si32(resReghi);
+ *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(resReglo);
+ *((uint32_t *)(output_ptr + out_pitch)) = _mm_cvtsi128_si32(resReghi);
output_ptr += dst_stride;
diff --git a/aom_dsp/x86/convolve_avx2.h b/aom_dsp/x86/convolve_avx2.h
index a00ede2..785ba39 100644
--- a/aom_dsp/x86/convolve_avx2.h
+++ b/aom_dsp/x86/convolve_avx2.h
@@ -576,8 +576,9 @@
const __m128i res_0 = _mm256_castsi256_si128(res_8); \
const __m128i res_1 = _mm256_extracti128_si256(res_8, 1); \
\
- *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0); \
- *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) = \
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) = \
+ _mm_cvtsi128_si32(res_0); \
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) = \
_mm_cvtsi128_si32(res_1); \
\
} else { \
diff --git a/aom_dsp/x86/highbd_convolve_ssse3.c b/aom_dsp/x86/highbd_convolve_ssse3.c
index 21389db..5293e27 100644
--- a/aom_dsp/x86/highbd_convolve_ssse3.c
+++ b/aom_dsp/x86/highbd_convolve_ssse3.c
@@ -136,10 +136,10 @@
res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
res_a_round1 = _mm_max_epi16(res_a_round1, zero);
- *((int *)(&dst[i * dst_stride + j])) =
+ *((uint32_t *)(&dst[i * dst_stride + j])) =
_mm_cvtsi128_si32(res_a_round0);
- *((int *)(&dst[i * dst_stride + j + dst_stride])) =
+ *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
_mm_cvtsi128_si32(res_a_round1);
}
@@ -264,10 +264,10 @@
res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
res_a_round1 = _mm_max_epi16(res_a_round1, zero);
- *((int *)(&dst[i * dst_stride + j])) =
+ *((uint32_t *)(&dst[i * dst_stride + j])) =
_mm_cvtsi128_si32(res_a_round0);
- *((int *)(&dst[i * dst_stride + j + dst_stride])) =
+ *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
_mm_cvtsi128_si32(res_a_round1);
}
@@ -375,7 +375,7 @@
} else if (w == 4) {
_mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res);
} else {
- *((int *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
+ *((uint32_t *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
}
}
}
@@ -430,7 +430,7 @@
} else if (w == 4) {
_mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res);
} else {
- *((int *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
+ *((uint32_t *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
}
}
}
diff --git a/aom_dsp/x86/highbd_intrapred_sse2.c b/aom_dsp/x86/highbd_intrapred_sse2.c
index 0b187cd..5a55736 100644
--- a/aom_dsp/x86/highbd_intrapred_sse2.c
+++ b/aom_dsp/x86/highbd_intrapred_sse2.c
@@ -821,11 +821,11 @@
const __m128i sum_above = dc_sum_4(above);
const __m128i sum_left = dc_sum_8(left);
const __m128i sum = _mm_add_epi16(sum_above, sum_left);
- int sum32 = _mm_cvtsi128_si32(sum);
+ uint32_t sum32 = _mm_cvtsi128_si32(sum);
sum32 >>= 16;
sum32 += 6;
sum32 /= 12;
- const __m128i row = _mm_set1_epi16((int16_t)sum32);
+ const __m128i row = _mm_set1_epi16((uint16_t)sum32);
int i;
for (i = 0; i < 4; ++i) {
_mm_storel_epi64((__m128i *)dst, row);
@@ -842,11 +842,11 @@
const __m128i sum_left = dc_sum_4(left);
const __m128i sum_above = dc_sum_8(above);
const __m128i sum = _mm_add_epi16(sum_above, sum_left);
- int sum32 = _mm_cvtsi128_si32(sum);
+ uint32_t sum32 = _mm_cvtsi128_si32(sum);
sum32 >>= 16;
sum32 += 6;
sum32 /= 12;
- const __m128i row = _mm_set1_epi16((int16_t)sum32);
+ const __m128i row = _mm_set1_epi16((uint16_t)sum32);
_mm_store_si128((__m128i *)dst, row);
dst += stride;
@@ -867,10 +867,10 @@
sum_left = _mm_unpacklo_epi16(sum_left, zero);
sum_above = _mm_unpacklo_epi16(sum_above, zero);
const __m128i sum = _mm_add_epi32(sum_left, sum_above);
- int sum32 = _mm_cvtsi128_si32(sum);
+ uint32_t sum32 = _mm_cvtsi128_si32(sum);
sum32 += 12;
sum32 /= 24;
- const __m128i row = _mm_set1_epi16((int16_t)sum32);
+ const __m128i row = _mm_set1_epi16((uint16_t)sum32);
int i;
for (i = 0; i < 4; ++i) {
_mm_store_si128((__m128i *)dst, row);
@@ -894,10 +894,10 @@
sum_left = _mm_unpacklo_epi16(sum_left, zero);
sum_above = _mm_unpacklo_epi16(sum_above, zero);
const __m128i sum = _mm_add_epi32(sum_left, sum_above);
- int sum32 = _mm_cvtsi128_si32(sum);
+ uint32_t sum32 = _mm_cvtsi128_si32(sum);
sum32 += 12;
sum32 /= 24;
- const __m128i row = _mm_set1_epi16((int16_t)sum32);
+ const __m128i row = _mm_set1_epi16((uint16_t)sum32);
int i;
for (i = 0; i < 2; ++i) {
_mm_store_si128((__m128i *)dst, row);
@@ -924,10 +924,10 @@
const __m128i zero = _mm_setzero_si128();
sum_above = _mm_unpacklo_epi16(sum_above, zero);
const __m128i sum = _mm_add_epi32(sum_left, sum_above);
- int sum32 = _mm_cvtsi128_si32(sum);
+ uint32_t sum32 = _mm_cvtsi128_si32(sum);
sum32 += 24;
sum32 /= 48;
- const __m128i row = _mm_set1_epi16((int16_t)sum32);
+ const __m128i row = _mm_set1_epi16((uint16_t)sum32);
int i;
for (i = 0; i < 8; ++i) {
_mm_store_si128((__m128i *)dst, row);
@@ -954,10 +954,10 @@
const __m128i zero = _mm_setzero_si128();
sum_left = _mm_unpacklo_epi16(sum_left, zero);
const __m128i sum = _mm_add_epi32(sum_left, sum_above);
- int sum32 = _mm_cvtsi128_si32(sum);
+ uint32_t sum32 = _mm_cvtsi128_si32(sum);
sum32 += 24;
sum32 /= 48;
- const __m128i row = _mm_set1_epi16((int16_t)sum32);
+ const __m128i row = _mm_set1_epi16((uint16_t)sum32);
int i;
for (i = 0; i < 4; ++i) {
_mm_store_si128((__m128i *)dst, row);
diff --git a/aom_dsp/x86/intrapred_avx2.c b/aom_dsp/x86/intrapred_avx2.c
index 0a2c308..b4b5ce2 100644
--- a/aom_dsp/x86/intrapred_avx2.c
+++ b/aom_dsp/x86/intrapred_avx2.c
@@ -426,10 +426,10 @@
const __m128i top_sum = dc_sum_32_sse2(above);
__m128i left_sum = dc_sum_16_sse2(left);
left_sum = _mm_add_epi16(top_sum, left_sum);
- int sum = _mm_cvtsi128_si32(left_sum);
+ uint16_t sum = _mm_cvtsi128_si32(left_sum);
sum += 24;
sum /= 48;
- const __m256i row = _mm256_set1_epi8((int8_t)sum);
+ const __m256i row = _mm256_set1_epi8((uint8_t)sum);
row_store_32xh(&row, 16, dst, stride);
}
@@ -438,10 +438,10 @@
const __m256i sum_above = dc_sum_32(above);
__m256i sum_left = dc_sum_64(left);
sum_left = _mm256_add_epi16(sum_left, sum_above);
- int sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+ uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
sum += 48;
sum /= 96;
- const __m256i row = _mm256_set1_epi8((int8_t)sum);
+ const __m256i row = _mm256_set1_epi8((uint8_t)sum);
row_store_32xh(&row, 64, dst, stride);
}
@@ -450,10 +450,10 @@
const __m256i sum_above = dc_sum_64(above);
__m256i sum_left = dc_sum_64(left);
sum_left = _mm256_add_epi16(sum_left, sum_above);
- int sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+ uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
sum += 64;
sum /= 128;
- const __m256i row = _mm256_set1_epi8((int8_t)sum);
+ const __m256i row = _mm256_set1_epi8((uint8_t)sum);
row_store_64xh(&row, 64, dst, stride);
}
@@ -462,10 +462,10 @@
const __m256i sum_above = dc_sum_64(above);
__m256i sum_left = dc_sum_32(left);
sum_left = _mm256_add_epi16(sum_left, sum_above);
- int sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+ uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
sum += 48;
sum /= 96;
- const __m256i row = _mm256_set1_epi8((int8_t)sum);
+ const __m256i row = _mm256_set1_epi8((uint8_t)sum);
row_store_64xh(&row, 32, dst, stride);
}
@@ -474,10 +474,10 @@
const __m256i sum_above = dc_sum_64(above);
__m256i sum_left = _mm256_castsi128_si256(dc_sum_16_sse2(left));
sum_left = _mm256_add_epi16(sum_left, sum_above);
- int sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+ uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
sum += 40;
sum /= 80;
- const __m256i row = _mm256_set1_epi8((int8_t)sum);
+ const __m256i row = _mm256_set1_epi8((uint8_t)sum);
row_store_64xh(&row, 16, dst, stride);
}
@@ -3597,7 +3597,7 @@
dr_prediction_z1_HxW_internal_avx2(4, N, dstvec, above, upsample_above, dx);
for (int i = 0; i < N; i++) {
- *(int *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
+ *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
}
}
@@ -3926,7 +3926,7 @@
resy = _mm_srli_si128(resx, 4);
resxy = _mm_blendv_epi8(resx, resy, *(__m128i *)BaseMask[base_min_diff]);
- *(int *)(dst) = _mm_cvtsi128_si32(resxy);
+ *(uint32_t *)(dst) = _mm_cvtsi128_si32(resxy);
dst += stride;
}
}
@@ -4338,10 +4338,10 @@
transpose4x8_8x4_low_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3],
&d[0], &d[1], &d[2], &d[3]);
- *(int *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
- *(int *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
- *(int *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
- *(int *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
+ *(uint32_t *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
+ *(uint32_t *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
+ *(uint32_t *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
+ *(uint32_t *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
return;
}
@@ -4374,7 +4374,7 @@
transpose4x8_8x4_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3], &d[0],
&d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]);
for (int i = 0; i < 8; i++) {
- *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+ *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
}
}
@@ -4434,7 +4434,7 @@
dr_prediction_z1_HxW_internal_avx2(16, 4, dstvec, left, upsample_left, dy);
transpose4x16_sse2(dstvec, d);
for (int i = 0; i < 16; i++) {
- *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+ *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
}
}
diff --git a/aom_dsp/x86/intrapred_sse2.c b/aom_dsp/x86/intrapred_sse2.c
index 08e6114..5afef68 100644
--- a/aom_dsp/x86/intrapred_sse2.c
+++ b/aom_dsp/x86/intrapred_sse2.c
@@ -112,12 +112,12 @@
__m128i sum_above = dc_sum_4(above);
sum_above = _mm_add_epi16(sum_left, sum_above);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 6;
sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
- const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(row);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
+ const uint32_t pred = _mm_cvtsi128_si32(row);
dc_store_4xh(pred, 8, dst, stride);
}
@@ -127,12 +127,12 @@
__m128i sum_above = dc_sum_4(above);
sum_above = _mm_add_epi16(sum_left, sum_above);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 10;
sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
- const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(row);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
+ const uint32_t pred = _mm_cvtsi128_si32(row);
dc_store_4xh(pred, 16, dst, stride);
}
@@ -142,7 +142,7 @@
__m128i sum_above = dc_sum_8(above);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 6;
sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X2);
@@ -156,10 +156,10 @@
__m128i sum_above = dc_sum_8(above);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 12;
sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_8xh(&row, 16, dst, stride);
}
@@ -169,10 +169,10 @@
__m128i sum_above = dc_sum_8(above);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 20;
sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_8xh(&row, 32, dst, stride);
}
@@ -182,10 +182,10 @@
__m128i sum_above = dc_sum_16_sse2(above);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 10;
sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_16xh(&row, 4, dst, stride);
}
@@ -195,10 +195,10 @@
__m128i sum_above = dc_sum_16_sse2(above);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 12;
sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_16xh(&row, 8, dst, stride);
}
@@ -208,10 +208,10 @@
__m128i sum_above = dc_sum_16_sse2(above);
sum_above = _mm_add_epi16(sum_left, sum_above);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 24;
sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_16xh(&row, 32, dst, stride);
}
@@ -221,10 +221,10 @@
__m128i sum_above = dc_sum_16_sse2(above);
sum_above = _mm_add_epi16(sum_left, sum_above);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 40;
sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_16xh(&row, 64, dst, stride);
}
@@ -234,10 +234,10 @@
const __m128i sum_left = dc_sum_8(left);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 20;
sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_32xh(&row, 8, dst, stride);
}
@@ -247,10 +247,10 @@
const __m128i sum_left = dc_sum_16_sse2(left);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 24;
sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_32xh(&row, 16, dst, stride);
}
@@ -260,10 +260,10 @@
const __m128i sum_left = dc_sum_64(left);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 48;
sum = divide_using_multiply_shift(sum, 5, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_32xh(&row, 64, dst, stride);
}
@@ -273,10 +273,10 @@
const __m128i sum_left = dc_sum_64(left);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 64;
sum /= 128;
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_64xh(&row, 64, dst, stride);
}
@@ -286,10 +286,10 @@
const __m128i sum_left = dc_sum_32_sse2(left);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 48;
sum = divide_using_multiply_shift(sum, 5, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_64xh(&row, 32, dst, stride);
}
@@ -299,10 +299,10 @@
const __m128i sum_left = dc_sum_16_sse2(left);
sum_above = _mm_add_epi16(sum_above, sum_left);
- int sum = _mm_cvtsi128_si32(sum_above);
+ uint32_t sum = _mm_cvtsi128_si32(sum_above);
sum += 40;
sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X4);
- const __m128i row = _mm_set1_epi8((int8_t)sum);
+ const __m128i row = _mm_set1_epi8((uint8_t)sum);
dc_store_64xh(&row, 16, dst, stride);
}
@@ -319,7 +319,7 @@
sum_above = _mm_shufflelo_epi16(sum_above, 0);
sum_above = _mm_packus_epi16(sum_above, sum_above);
- const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_above);
+ const uint32_t pred = _mm_cvtsi128_si32(sum_above);
dc_store_4xh(pred, 8, dst, stride);
}
@@ -333,7 +333,7 @@
sum_above = _mm_shufflelo_epi16(sum_above, 0);
sum_above = _mm_packus_epi16(sum_above, sum_above);
- const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_above);
+ const uint32_t pred = _mm_cvtsi128_si32(sum_above);
dc_store_4xh(pred, 16, dst, stride);
}
@@ -523,7 +523,7 @@
sum_left = _mm_shufflelo_epi16(sum_left, 0);
sum_left = _mm_packus_epi16(sum_left, sum_left);
- const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_left);
+ const uint32_t pred = _mm_cvtsi128_si32(sum_left);
dc_store_4xh(pred, 8, dst, stride);
}
@@ -538,7 +538,7 @@
sum_left = _mm_shufflelo_epi16(sum_left, 0);
sum_left = _mm_packus_epi16(sum_left, sum_left);
- const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_left);
+ const uint32_t pred = _mm_cvtsi128_si32(sum_left);
dc_store_4xh(pred, 16, dst, stride);
}
@@ -990,26 +990,26 @@
__m128i row1 = _mm_shufflelo_epi16(left_col, 0x55);
__m128i row2 = _mm_shufflelo_epi16(left_col, 0xaa);
__m128i row3 = _mm_shufflelo_epi16(left_col, 0xff);
- *(int *)dst = _mm_cvtsi128_si32(row0);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row1);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row2);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row3);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
dst += stride;
left_col = _mm_unpackhi_epi64(left_col, left_col);
row0 = _mm_shufflelo_epi16(left_col, 0);
row1 = _mm_shufflelo_epi16(left_col, 0x55);
row2 = _mm_shufflelo_epi16(left_col, 0xaa);
row3 = _mm_shufflelo_epi16(left_col, 0xff);
- *(int *)dst = _mm_cvtsi128_si32(row0);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row1);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row2);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row3);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
}
void aom_h_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
@@ -1023,13 +1023,13 @@
__m128i row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
__m128i row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
__m128i row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
- *(int *)dst = _mm_cvtsi128_si32(row0);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row1);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row2);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row3);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
dst += stride;
left_col_low = _mm_unpackhi_epi64(left_col_low, left_col_low);
@@ -1037,26 +1037,26 @@
row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
- *(int *)dst = _mm_cvtsi128_si32(row0);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row1);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row2);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row3);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
dst += stride;
row0 = _mm_shufflelo_epi16(left_col_high, 0);
row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
- *(int *)dst = _mm_cvtsi128_si32(row0);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row1);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row2);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row3);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
dst += stride;
left_col_high = _mm_unpackhi_epi64(left_col_high, left_col_high);
@@ -1064,13 +1064,13 @@
row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
- *(int *)dst = _mm_cvtsi128_si32(row0);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row1);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row2);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
dst += stride;
- *(int *)dst = _mm_cvtsi128_si32(row3);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
}
void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
diff --git a/aom_dsp/x86/intrapred_sse4.c b/aom_dsp/x86/intrapred_sse4.c
index 21fb1bb..b732580 100644
--- a/aom_dsp/x86/intrapred_sse4.c
+++ b/aom_dsp/x86/intrapred_sse4.c
@@ -210,7 +210,7 @@
dr_prediction_z1_HxW_internal_sse4_1(4, N, dstvec, above, upsample_above, dx);
for (int i = 0; i < N; i++) {
- *(int *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
+ *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
}
}
@@ -571,7 +571,7 @@
resy = _mm_srli_si128(resx, 4);
resxy = _mm_blendv_epi8(resx, resy, *(__m128i *)Mask[0][base_min_diff]);
- *(int *)(dst) = _mm_cvtsi128_si32(resxy);
+ *(uint32_t *)(dst) = _mm_cvtsi128_si32(resxy);
dst += stride;
}
}
@@ -938,10 +938,10 @@
transpose4x8_8x4_low_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3],
&d[0], &d[1], &d[2], &d[3]);
- *(int *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
- *(int *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
- *(int *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
- *(int *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
+ *(uint32_t *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
+ *(uint32_t *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
+ *(uint32_t *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
+ *(uint32_t *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
return;
}
@@ -974,7 +974,7 @@
transpose4x8_8x4_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3], &d[0],
&d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]);
for (int i = 0; i < 8; i++) {
- *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+ *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
}
}
@@ -1034,7 +1034,7 @@
dr_prediction_z1_HxW_internal_sse4_1(16, 4, dstvec, left, upsample_left, dy);
transpose4x16_sse2(dstvec, d);
for (int i = 0; i < 16; i++) {
- *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+ *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
}
}
diff --git a/aom_dsp/x86/intrapred_ssse3.c b/aom_dsp/x86/intrapred_ssse3.c
index a75616e..f0bd040 100644
--- a/aom_dsp/x86/intrapred_ssse3.c
+++ b/aom_dsp/x86/intrapred_ssse3.c
@@ -56,7 +56,7 @@
const __m128i l16 = _mm_shuffle_epi8(l, rep);
const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
- *(int *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+ *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
dst += stride;
rep = _mm_add_epi16(rep, one);
}
@@ -77,7 +77,7 @@
const __m128i l16 = _mm_shuffle_epi8(l, rep);
const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
- *(int *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+ *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
dst += stride;
rep = _mm_add_epi16(rep, one);
}
@@ -97,7 +97,7 @@
const __m128i l16 = _mm_shuffle_epi8(l, rep);
const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
- *(int *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+ *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
dst += stride;
rep = _mm_add_epi16(rep, one);
}
@@ -656,7 +656,7 @@
sum = _mm_srai_epi32(sum, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
sum = _mm_shuffle_epi8(sum, gat);
- *(int *)dst = _mm_cvtsi128_si32(sum);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
dst += stride;
rep = _mm_add_epi16(rep, one);
@@ -1071,7 +1071,7 @@
sum = _mm_add_epi32(sum, pred_round);
sum = _mm_srai_epi32(sum, SMOOTH_WEIGHT_LOG2_SCALE);
sum = _mm_shuffle_epi8(sum, gat);
- *(int *)dst = _mm_cvtsi128_si32(sum);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
dst += stride;
d = _mm_add_epi16(d, inc);
}
@@ -1417,7 +1417,7 @@
sum = _mm_srai_epi32(sum, SMOOTH_WEIGHT_LOG2_SCALE);
sum = _mm_shuffle_epi8(sum, gat);
- *(int *)dst = _mm_cvtsi128_si32(sum);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
dst += stride;
rep = _mm_add_epi16(rep, one);
diff --git a/aom_dsp/x86/jnt_sad_ssse3.c b/aom_dsp/x86/jnt_sad_ssse3.c
index 357f70a..4e6fe8f 100644
--- a/aom_dsp/x86/jnt_sad_ssse3.c
+++ b/aom_dsp/x86/jnt_sad_ssse3.c
@@ -53,8 +53,7 @@
// At this point, we have two 32-bit partial SADs at bit[0:31] and [64:95].
const unsigned int res =
- (unsigned int)(_mm_cvtsi128_si32(sad) +
- _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
+ _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
return res;
}
@@ -85,8 +84,7 @@
}
const unsigned int res =
- (unsigned int)(_mm_cvtsi128_si32(sad) +
- _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
+ _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
return res;
}
@@ -110,8 +108,7 @@
}
const unsigned int res =
- (unsigned int)(_mm_cvtsi128_si32(sad) +
- _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
+ _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
return res;
}
@@ -137,8 +134,7 @@
}
const unsigned int res =
- (unsigned int)(_mm_cvtsi128_si32(sad) +
- _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
+ _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
return res;
}
@@ -164,8 +160,7 @@
}
const unsigned int res =
- (unsigned int)(_mm_cvtsi128_si32(sad) +
- _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
+ _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
return res;
}
@@ -191,8 +186,7 @@
}
const unsigned int res =
- (unsigned int)(_mm_cvtsi128_si32(sad) +
- _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
+ _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
return res;
}
diff --git a/aom_dsp/x86/masked_sad_intrin_ssse3.c b/aom_dsp/x86/masked_sad_intrin_ssse3.c
index fd5352c..7168277 100644
--- a/aom_dsp/x86/masked_sad_intrin_ssse3.c
+++ b/aom_dsp/x86/masked_sad_intrin_ssse3.c
@@ -132,8 +132,8 @@
m_ptr += m_stride;
}
// At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
- unsigned int sad = (unsigned int)(_mm_cvtsi128_si32(res) +
- _mm_cvtsi128_si32(_mm_srli_si128(res, 8)));
+ int32_t sad =
+ _mm_cvtsi128_si32(res) + _mm_cvtsi128_si32(_mm_srli_si128(res, 8));
return sad;
}
@@ -177,8 +177,8 @@
b_ptr += b_stride * 2;
m_ptr += m_stride * 2;
}
- unsigned int sad = (unsigned int)(_mm_cvtsi128_si32(res) +
- _mm_cvtsi128_si32(_mm_srli_si128(res, 8)));
+ int32_t sad =
+ _mm_cvtsi128_si32(res) + _mm_cvtsi128_si32(_mm_srli_si128(res, 8));
return sad;
}
@@ -222,7 +222,8 @@
m_ptr += m_stride * 2;
}
// At this point, the SAD is stored in lane 0 of 'res'
- return (unsigned int)_mm_cvtsi128_si32(res);
+ int32_t sad = _mm_cvtsi128_si32(res);
+ return sad;
}
// For width a multiple of 8
diff --git a/aom_dsp/x86/masked_variance_intrin_ssse3.c b/aom_dsp/x86/masked_variance_intrin_ssse3.c
index ac0e576..bfd86ee 100644
--- a/aom_dsp/x86/masked_variance_intrin_ssse3.c
+++ b/aom_dsp/x86/masked_variance_intrin_ssse3.c
@@ -452,7 +452,7 @@
sum = _mm_hadd_epi32(sum, sum_sq);
sum = _mm_hadd_epi32(sum, sum);
*sum_ = _mm_cvtsi128_si32(sum);
- *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+ *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
}
static void masked_variance8xh(const uint8_t *src_ptr, int src_stride,
@@ -482,7 +482,7 @@
sum = _mm_hadd_epi32(sum, sum_sq);
sum = _mm_hadd_epi32(sum, sum);
*sum_ = _mm_cvtsi128_si32(sum);
- *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+ *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
}
static void masked_variance4xh(const uint8_t *src_ptr, int src_stride,
@@ -514,7 +514,7 @@
sum = _mm_hadd_epi32(sum, sum_sq);
sum = _mm_hadd_epi32(sum, sum);
*sum_ = _mm_cvtsi128_si32(sum);
- *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+ *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
}
#if CONFIG_AV1_HIGHBITDEPTH
@@ -1024,7 +1024,7 @@
sum = _mm_hadd_epi32(sum, sum_sq);
sum = _mm_hadd_epi32(sum, zero);
*sum_ = _mm_cvtsi128_si32(sum);
- *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+ *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
}
#endif // CONFIG_AV1_HIGHBITDEPTH
diff --git a/aom_dsp/x86/obmc_variance_avx2.c b/aom_dsp/x86/obmc_variance_avx2.c
index b2df8a9..bfec0e8 100644
--- a/aom_dsp/x86/obmc_variance_avx2.c
+++ b/aom_dsp/x86/obmc_variance_avx2.c
@@ -77,7 +77,7 @@
v_d = _mm_hadd_epi32(v_sum_d, v_sse_d);
v_d = _mm_hadd_epi32(v_d, v_d);
*sum = _mm_cvtsi128_si32(v_d);
- *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(v_d, 4));
+ *sse = _mm_cvtsi128_si32(_mm_srli_si128(v_d, 4));
}
static INLINE void obmc_variance_w16n(const uint8_t *pre, const int pre_stride,
@@ -147,7 +147,7 @@
res0 = _mm256_castsi256_si128(v_d);
res0 = _mm_add_epi32(res0, _mm256_extractf128_si256(v_d, 1));
*sum = _mm_cvtsi128_si32(res0);
- *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(res0, 4));
+ *sse = _mm_cvtsi128_si32(_mm_srli_si128(res0, 4));
}
#define OBMCVARWXH(W, H) \
diff --git a/aom_dsp/x86/sad_avx2.c b/aom_dsp/x86/sad_avx2.c
index 24cea76..ef3fdc1 100644
--- a/aom_dsp/x86/sad_avx2.c
+++ b/aom_dsp/x86/sad_avx2.c
@@ -17,7 +17,7 @@
static INLINE unsigned int sad64xh_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
int h) {
- int i;
+ int i, res;
__m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;
__m256i sum_sad = _mm256_setzero_si256();
__m256i sum_sad_h;
@@ -37,7 +37,7 @@
sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);
sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);
sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);
- unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128);
+ res = _mm_cvtsi128_si32(sum_sad128);
_mm256_zeroupper();
return res;
}
@@ -45,7 +45,7 @@
static INLINE unsigned int sad32xh_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
int h) {
- int i;
+ int i, res;
__m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;
__m256i sum_sad = _mm256_setzero_si256();
__m256i sum_sad_h;
@@ -68,7 +68,7 @@
sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);
sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);
sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);
- unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128);
+ res = _mm_cvtsi128_si32(sum_sad128);
_mm256_zeroupper();
return res;
}
@@ -129,7 +129,7 @@
unsigned int aom_sad64x##h##_avg_avx2( \
const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
int ref_stride, const uint8_t *second_pred) { \
- int i; \
+ int i, res; \
__m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
__m256i sum_sad = _mm256_setzero_si256(); \
__m256i sum_sad_h; \
@@ -155,7 +155,7 @@
sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
- unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128); \
+ res = _mm_cvtsi128_si32(sum_sad128); \
_mm256_zeroupper(); \
return res; \
}
@@ -164,7 +164,7 @@
unsigned int aom_sad32x##h##_avg_avx2( \
const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
int ref_stride, const uint8_t *second_pred) { \
- int i; \
+ int i, res; \
__m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
__m256i sum_sad = _mm256_setzero_si256(); \
__m256i sum_sad_h; \
@@ -194,7 +194,7 @@
sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
- unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128); \
+ res = _mm_cvtsi128_si32(sum_sad128); \
_mm256_zeroupper(); \
return res; \
}
diff --git a/aom_dsp/x86/sad_impl_avx2.c b/aom_dsp/x86/sad_impl_avx2.c
index c5da6e9..2afae4b 100644
--- a/aom_dsp/x86/sad_impl_avx2.c
+++ b/aom_dsp/x86/sad_impl_avx2.c
@@ -34,7 +34,7 @@
sum = _mm256_add_epi32(sum, _mm256_srli_si256(sum, 8));
sum_i128 = _mm_add_epi32(_mm256_extracti128_si256(sum, 1),
_mm256_castsi256_si128(sum));
- return (unsigned int)_mm_cvtsi128_si32(sum_i128);
+ return _mm_cvtsi128_si32(sum_i128);
}
static unsigned int sad64x32(const uint8_t *src_ptr, int src_stride,
@@ -112,7 +112,7 @@
const uint8_t *ref_ptr, int ref_stride,
const int h, const uint8_t *second_pred,
const int second_pred_stride) {
- int i;
+ int i, res;
__m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;
__m256i sum_sad = _mm256_setzero_si256();
__m256i sum_sad_h;
@@ -137,7 +137,9 @@
sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);
sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);
sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);
- return (unsigned int)_mm_cvtsi128_si32(sum_sad128);
+ res = _mm_cvtsi128_si32(sum_sad128);
+
+ return res;
}
unsigned int aom_sad64x128_avg_avx2(const uint8_t *src_ptr, int src_stride,
diff --git a/aom_dsp/x86/variance_sse2.c b/aom_dsp/x86/variance_sse2.c
index a0223a9..c36eeee 100644
--- a/aom_dsp/x86/variance_sse2.c
+++ b/aom_dsp/x86/variance_sse2.c
@@ -32,7 +32,7 @@
vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
- return (unsigned int)_mm_cvtsi128_si32(vsum);
+ return _mm_cvtsi128_si32(vsum);
}
static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) {
@@ -50,7 +50,7 @@
static INLINE unsigned int add32x4_sse2(__m128i val) {
val = _mm_add_epi32(val, _mm_srli_si128(val, 8));
val = _mm_add_epi32(val, _mm_srli_si128(val, 4));
- return (unsigned int)_mm_cvtsi128_si32(val);
+ return _mm_cvtsi128_si32(val);
}
// Accumulate 8 16bit in sum to 4 32bit number
diff --git a/av1/common/x86/av1_convolve_scale_sse4.c b/av1/common/x86/av1_convolve_scale_sse4.c
index 0bdf49f..1966181 100644
--- a/av1/common/x86/av1_convolve_scale_sse4.c
+++ b/av1/common/x86/av1_convolve_scale_sse4.c
@@ -187,7 +187,7 @@
const __m128i subbed = _mm_sub_epi16(shifted_16, sub);
result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift);
const __m128i result_8 = _mm_packus_epi16(result, result);
- *(int *)dst_x = _mm_cvtsi128_si32(result_8);
+ *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8);
} else {
_mm_storel_epi64((__m128i *)dst_16_x, shifted_16);
}
@@ -195,7 +195,7 @@
const __m128i subbed = _mm_sub_epi16(shifted_16, sub);
result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift);
const __m128i result_8 = _mm_packus_epi16(result, result);
- *(int *)dst_x = _mm_cvtsi128_si32(result_8);
+ *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8);
}
}
for (; x < w; ++x) {
diff --git a/av1/common/x86/av1_inv_txfm_ssse3.c b/av1/common/x86/av1_inv_txfm_ssse3.c
index f9bfb37..a2a43f8 100644
--- a/av1/common/x86/av1_inv_txfm_ssse3.c
+++ b/av1/common/x86/av1_inv_txfm_ssse3.c
@@ -2249,7 +2249,7 @@
const __m128i v = _mm_cvtsi32_si128(*((uint32_t *)(output + i * stride)));
__m128i u = _mm_adds_epi16(in[j], _mm_unpacklo_epi8(v, zero));
u = _mm_packus_epi16(u, zero);
- *((int *)(output + i * stride)) = _mm_cvtsi128_si32(u);
+ *((uint32_t *)(output + i * stride)) = _mm_cvtsi128_si32(u);
}
}
diff --git a/av1/common/x86/convolve_2d_sse2.c b/av1/common/x86/convolve_2d_sse2.c
index 1b85f37..ca88bd7 100644
--- a/av1/common/x86/convolve_2d_sse2.c
+++ b/av1/common/x86/convolve_2d_sse2.c
@@ -426,7 +426,7 @@
if (w == 2) {
*(uint16_t *)p = (uint16_t)_mm_cvtsi128_si32(res);
} else if (w == 4) {
- *(int *)p = _mm_cvtsi128_si32(res);
+ *(uint32_t *)p = _mm_cvtsi128_si32(res);
} else {
_mm_storel_epi64(p, res);
}
@@ -534,7 +534,7 @@
if (w > 4)
_mm_storel_epi64((__m128i *)(&dst0[j]), res_8);
else
- *(int *)(&dst0[j]) = _mm_cvtsi128_si32(res_8);
+ *(uint32_t *)(&dst0[j]) = _mm_cvtsi128_si32(res_8);
} else {
_mm_store_si128((__m128i *)(&dst[j]), res_unsigned);
}
diff --git a/av1/common/x86/convolve_avx2.c b/av1/common/x86/convolve_avx2.c
index 89e0a4c..c7d1141 100644
--- a/av1/common/x86/convolve_avx2.c
+++ b/av1/common/x86/convolve_avx2.c
@@ -376,9 +376,10 @@
const __m128i res_0 = _mm256_extracti128_si256(res_8b_lo, 0);
const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
if (w - j > 2) {
- *(int *)&dst[i * dst_stride + j] = _mm_cvtsi128_si32(res_0);
- *(int *)&dst[i * dst_stride + j + dst_stride] =
- _mm_cvtsi128_si32(res_1);
+ *(uint32_t *)&dst[i * dst_stride + j] =
+ (uint32_t)_mm_cvtsi128_si32(res_0);
+ *(uint32_t *)&dst[i * dst_stride + j + dst_stride] =
+ (uint32_t)_mm_cvtsi128_si32(res_1);
} else {
*(uint16_t *)&dst[i * dst_stride + j] =
(uint16_t)_mm_cvtsi128_si32(res_0);
@@ -766,9 +767,11 @@
const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
if (w > 2) {
// 00 01 02 03
- *(int *)&dst[i * dst_stride] = _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)&dst[i * dst_stride] =
+ (uint32_t)_mm_cvtsi128_si32(res_0);
// 10 11 12 13
- *(int *)&dst[i * dst_stride + dst_stride] = _mm_cvtsi128_si32(res_1);
+ *(uint32_t *)&dst[i * dst_stride + dst_stride] =
+ (uint32_t)_mm_cvtsi128_si32(res_1);
} else {
// 00 01
*(uint16_t *)&dst[i * dst_stride] =
@@ -821,8 +824,10 @@
__m256i res_8b_lo = _mm256_packus_epi16(res_16b_lo, res_16b_lo);
const __m128i res_0 = _mm256_extracti128_si256(res_8b_lo, 0);
const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
- *(int *)&dst[i * dst_stride + j] = _mm_cvtsi128_si32(res_0);
- *(int *)&dst[i * dst_stride + j + 4] = _mm_cvtsi128_si32(res_1);
+ *(uint32_t *)&dst[i * dst_stride + j] =
+ (uint32_t)_mm_cvtsi128_si32(res_0);
+ *(uint32_t *)&dst[i * dst_stride + j + 4] =
+ (uint32_t)_mm_cvtsi128_si32(res_1);
}
}
}
@@ -864,8 +869,8 @@
} else {
__m128i *const p_0 = (__m128i *)&dst[i * dst_stride];
__m128i *const p_1 = (__m128i *)&dst[i * dst_stride + dst_stride];
- *(uint16_t *)p_0 = (uint16_t)_mm_cvtsi128_si32(res_0);
- *(uint16_t *)p_1 = (uint16_t)_mm_cvtsi128_si32(res_1);
+ *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0);
+ *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1);
}
}
} else {
diff --git a/av1/common/x86/convolve_sse2.c b/av1/common/x86/convolve_sse2.c
index 0e77822..cd5521e 100644
--- a/av1/common/x86/convolve_sse2.c
+++ b/av1/common/x86/convolve_sse2.c
@@ -199,7 +199,7 @@
if (w <= 4) {
__m128i s[8], src6, res, res_round, res16;
- int res_int;
+ uint32_t res_int;
src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
s[0] = _mm_unpacklo_epi8(
_mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
@@ -234,7 +234,7 @@
if (w == 2)
*(uint16_t *)dst = (uint16_t)res_int;
else
- *(int *)dst = res_int;
+ *(uint32_t *)dst = res_int;
src_ptr += src_stride;
dst += dst_stride;
@@ -247,7 +247,7 @@
if (w == 2)
*(uint16_t *)dst = (uint16_t)res_int;
else
- *(int *)dst = res_int;
+ *(uint32_t *)dst = res_int;
src_ptr += src_stride;
dst += dst_stride;
@@ -443,11 +443,11 @@
const __m128i res16 = _mm_packs_epi32(res_lo_round, res_lo_round);
const __m128i res = _mm_packus_epi16(res16, res16);
- int r = _mm_cvtsi128_si32(res);
+ uint32_t r = _mm_cvtsi128_si32(res);
if (w == 2)
*(uint16_t *)dst = (uint16_t)r;
else
- *(int *)dst = r;
+ *(uint32_t *)dst = r;
src_ptr += src_stride;
dst += dst_stride;
diff --git a/av1/common/x86/highbd_convolve_2d_ssse3.c b/av1/common/x86/highbd_convolve_2d_ssse3.c
index 8324044..148543f 100644
--- a/av1/common/x86/highbd_convolve_2d_ssse3.c
+++ b/av1/common/x86/highbd_convolve_2d_ssse3.c
@@ -211,10 +211,10 @@
res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
res_a_round1 = _mm_max_epi16(res_a_round1, zero);
- *((int *)(&dst[i * dst_stride + j])) =
+ *((uint32_t *)(&dst[i * dst_stride + j])) =
_mm_cvtsi128_si32(res_a_round0);
- *((int *)(&dst[i * dst_stride + j + dst_stride])) =
+ *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
_mm_cvtsi128_si32(res_a_round1);
}
s[0] = s[1];
@@ -384,10 +384,10 @@
res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
res_a_round1 = _mm_max_epi16(res_a_round1, zero);
- *((int *)(&dst[i * dst_stride + j])) =
+ *((uint32_t *)(&dst[i * dst_stride + j])) =
_mm_cvtsi128_si32(res_a_round0);
- *((int *)(&dst[i * dst_stride + j + dst_stride])) =
+ *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
_mm_cvtsi128_si32(res_a_round1);
}
s[0] = s[1];
diff --git a/av1/common/x86/jnt_convolve_avx2.c b/av1/common/x86/jnt_convolve_avx2.c
index 8ea856e..7a13d4a 100644
--- a/av1/common/x86/jnt_convolve_avx2.c
+++ b/av1/common/x86/jnt_convolve_avx2.c
@@ -110,8 +110,9 @@
_mm_storel_epi64(
(__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
} else {
- *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
- *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
_mm_cvtsi128_si32(res_1);
}
} else {
@@ -164,8 +165,9 @@
_mm_storel_epi64(
(__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
} else {
- *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
- *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
_mm_cvtsi128_si32(res_1);
}
} else {
@@ -302,8 +304,9 @@
(__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])),
res_1);
} else {
- *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
- *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
_mm_cvtsi128_si32(res_1);
}
} else {
@@ -480,8 +483,9 @@
(__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])),
res_1);
} else {
- *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
- *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
_mm_cvtsi128_si32(res_1);
}
} else {
@@ -750,8 +754,9 @@
const __m128i res_0 = _mm256_castsi256_si128(res_8);
const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
- *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
- *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
_mm_cvtsi128_si32(res_1);
} else {
@@ -878,8 +883,9 @@
_mm_storel_epi64(
(__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
} else {
- *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
- *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_0);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
_mm_cvtsi128_si32(res_1);
}
} else {
diff --git a/av1/common/x86/jnt_convolve_sse2.c b/av1/common/x86/jnt_convolve_sse2.c
index 581d150..b8400c0 100644
--- a/av1/common/x86/jnt_convolve_sse2.c
+++ b/av1/common/x86/jnt_convolve_sse2.c
@@ -79,7 +79,7 @@
&comp_avg_res, &offset_const, &rounding_const, rounding_shift);
const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
- *(int *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+ *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
} else {
_mm_store_si128((__m128i *)(&dst[0]), res_unsigned);
}
@@ -223,7 +223,7 @@
&comp_avg_res, &offset_const, &rounding_const, rounding_shift);
const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
- *(int *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+ *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
} else {
_mm_store_si128((__m128i *)dst, res_unsigned);
@@ -252,7 +252,7 @@
&comp_avg_res, &offset_const, &rounding_const, rounding_shift);
const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
- *(int *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+ *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
} else {
_mm_store_si128((__m128i *)dst, res_unsigned);
@@ -596,7 +596,8 @@
if (w > 4)
_mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
else
- *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_8);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_8);
} else {
_mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
}
diff --git a/av1/common/x86/jnt_convolve_ssse3.c b/av1/common/x86/jnt_convolve_ssse3.c
index d0cf763..f45e3b2 100644
--- a/av1/common/x86/jnt_convolve_ssse3.c
+++ b/av1/common/x86/jnt_convolve_ssse3.c
@@ -220,7 +220,8 @@
if (w > 4)
_mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
else
- *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_8);
+ *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
+ _mm_cvtsi128_si32(res_8);
} else {
_mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
}
diff --git a/av1/common/x86/reconinter_sse4.c b/av1/common/x86/reconinter_sse4.c
index a503532..5171ca4 100644
--- a/av1/common/x86/reconinter_sse4.c
+++ b/av1/common/x86/reconinter_sse4.c
@@ -46,8 +46,8 @@
const __m128i m16 = calc_mask(mask_base, s0, s1);
const __m128i m8 = _mm_packus_epi16(m16, m16);
- *(int *)mask = _mm_cvtsi128_si32(m8);
- *(int *)(mask + w) = _mm_extract_epi32(m8, 1);
+ *(uint32_t *)mask = _mm_cvtsi128_si32(m8);
+ *(uint32_t *)(mask + w) = _mm_extract_epi32(m8, 1);
src0 += (stride0 << 1);
src1 += (stride1 << 1);
mask += 8;
@@ -146,7 +146,7 @@
if ((w - j) > 4) {
_mm_storel_epi64(dst, res_8);
} else { // w==4
- *(int *)dst = _mm_cvtsi128_si32(res_8);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(res_8);
}
}
}
diff --git a/av1/common/x86/warp_plane_avx2.c b/av1/common/x86/warp_plane_avx2.c
index b0c9a93..f6aaa88 100644
--- a/av1/common/x86/warp_plane_avx2.c
+++ b/av1/common/x86/warp_plane_avx2.c
@@ -699,8 +699,8 @@
const __m256i res_8_lo = _mm256_packus_epi16(res_lo_16, res_lo_16);
const __m128i res_8_lo_0 = _mm256_castsi256_si128(res_8_lo);
const __m128i res_8_lo_1 = _mm256_extracti128_si256(res_8_lo, 1);
- *(int *)dst8_0 = _mm_cvtsi128_si32(res_8_lo_0);
- *(int *)dst8_1 = _mm_cvtsi128_si32(res_8_lo_1);
+ *(uint32_t *)dst8_0 = _mm_cvtsi128_si32(res_8_lo_0);
+ *(uint32_t *)dst8_1 = _mm_cvtsi128_si32(res_8_lo_1);
} else {
const __m128i temp_lo_16_0 = _mm256_castsi256_si128(temp_lo_16);
const __m128i temp_lo_16_1 = _mm256_extracti128_si256(temp_lo_16, 1);
@@ -742,8 +742,8 @@
__m256i res_8_hi = _mm256_packus_epi16(res_hi_16, res_hi_16);
const __m128i res_8_hi_0 = _mm256_castsi256_si128(res_8_hi);
const __m128i res_8_hi_1 = _mm256_extracti128_si256(res_8_hi, 1);
- *(int *)dst8_4_0 = _mm_cvtsi128_si32(res_8_hi_0);
- *(int *)dst8_4_1 = _mm_cvtsi128_si32(res_8_hi_1);
+ *(uint32_t *)dst8_4_0 = _mm_cvtsi128_si32(res_8_hi_0);
+ *(uint32_t *)dst8_4_1 = _mm_cvtsi128_si32(res_8_hi_1);
} else {
const __m128i temp_hi_16_0 = _mm256_castsi256_si128(temp_hi_16);
const __m128i temp_hi_16_1 = _mm256_extracti128_si256(temp_hi_16, 1);
@@ -767,8 +767,8 @@
__m128i *const p1 = (__m128i *)&pred[(i + (k + 1) + 4) * p_stride + j];
if (p_width == 4) {
- *(int *)p = _mm_cvtsi128_si32(res_8bit0);
- *(int *)p1 = _mm_cvtsi128_si32(res_8bit1);
+ *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit0);
+ *(uint32_t *)p1 = _mm_cvtsi128_si32(res_8bit1);
} else {
_mm_storel_epi64(p, res_8bit0);
_mm_storel_epi64(p1, res_8bit1);
diff --git a/av1/common/x86/warp_plane_sse4.c b/av1/common/x86/warp_plane_sse4.c
index e35b557..b1df486 100644
--- a/av1/common/x86/warp_plane_sse4.c
+++ b/av1/common/x86/warp_plane_sse4.c
@@ -613,7 +613,7 @@
res_lo_16 = _mm_srai_epi16(_mm_add_epi16(res_lo_16, *round_bits_const),
round_bits);
__m128i res_8_lo = _mm_packus_epi16(res_lo_16, res_lo_16);
- *(int *)dst8 = _mm_cvtsi128_si32(res_8_lo);
+ *(uint32_t *)dst8 = _mm_cvtsi128_si32(res_8_lo);
} else {
_mm_storel_epi64(p, temp_lo_16);
}
@@ -645,7 +645,7 @@
res_hi_16 = _mm_srai_epi16(_mm_add_epi16(res_hi_16, *round_bits_const),
round_bits);
__m128i res_8_hi = _mm_packus_epi16(res_hi_16, res_hi_16);
- *(int *)dst8_4 = _mm_cvtsi128_si32(res_8_hi);
+ *(uint32_t *)dst8_4 = _mm_cvtsi128_si32(res_8_hi);
} else {
_mm_storel_epi64(p4, temp_hi_16);
@@ -667,7 +667,7 @@
// to only output 4 pixels at this point, to avoid encode/decode
// mismatches when encoding with multiple threads.
if (p_width == 4) {
- *(int *)p = _mm_cvtsi128_si32(res_8bit);
+ *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
} else {
_mm_storel_epi64(p, res_8bit);
}
diff --git a/av1/encoder/x86/temporal_filter_avx2.c b/av1/encoder/x86/temporal_filter_avx2.c
index a9c80040..8aa0764 100644
--- a/av1/encoder/x86/temporal_filter_avx2.c
+++ b/av1/encoder/x86/temporal_filter_avx2.c
@@ -51,7 +51,7 @@
_mm256_storeu_si256((__m256i *)(dst), vsqdiff1);
// Set zero to uninitialized memory to avoid uninitialized loads later
- *(int *)(dst + 16) = _mm_cvtsi128_si32(_mm_setzero_si128());
+ *(uint32_t *)(dst + 16) = _mm_cvtsi128_si32(_mm_setzero_si128());
src1 += stride, src2 += stride2;
dst += sse_stride;
@@ -85,7 +85,7 @@
_mm256_storeu_si256((__m256i *)(dst), vres1);
_mm256_storeu_si256((__m256i *)(dst + 16), vres2);
// Set zero to uninitialized memory to avoid uninitialized loads later
- *(int *)(dst + 32) = _mm_cvtsi128_si32(_mm_setzero_si128());
+ *(uint32_t *)(dst + 32) = _mm_cvtsi128_si32(_mm_setzero_si128());
src1 += stride;
src2 += stride2;
diff --git a/av1/encoder/x86/temporal_filter_sse2.c b/av1/encoder/x86/temporal_filter_sse2.c
index 8be7164..26c3926 100644
--- a/av1/encoder/x86/temporal_filter_sse2.c
+++ b/av1/encoder/x86/temporal_filter_sse2.c
@@ -42,7 +42,7 @@
for (int i = 0; i < block_height; i++) {
for (int j = 0; j < block_width; j += 16) {
// Set zero to uninitialized memory to avoid uninitialized loads later
- *(int *)(dst) = _mm_cvtsi128_si32(_mm_setzero_si128());
+ *(uint32_t *)(dst) = _mm_cvtsi128_si32(_mm_setzero_si128());
__m128i vsrc1 = _mm_loadu_si128((__m128i *)(src1 + j));
__m128i vsrc2 = _mm_loadu_si128((__m128i *)(src2 + j));
@@ -63,7 +63,8 @@
}
// Set zero to uninitialized memory to avoid uninitialized loads later
- *(int *)(dst + block_width + 2) = _mm_cvtsi128_si32(_mm_setzero_si128());
+ *(uint32_t *)(dst + block_width + 2) =
+ _mm_cvtsi128_si32(_mm_setzero_si128());
src1 += stride;
src2 += stride2;