mem_sse2.h: loadu_uint{16,32,64} -> loadu_int*
this changes the returns to signed which matches the types with
usage of these calls as input to _mm_cvtsi32_si128(), _mm_set_epi32(),
etc. fixes implicit conversion warnings with clang-11
-fsanitize=undefined
Bug: aomedia:3136
Bug: b/229626362
Change-Id: I1425f12d4f79155dd5d7af0eb00fbdb9f1940544
diff --git a/aom_dsp/x86/mem_sse2.h b/aom_dsp/x86/mem_sse2.h
index dacb613..085a572 100644
--- a/aom_dsp/x86/mem_sse2.h
+++ b/aom_dsp/x86/mem_sse2.h
@@ -19,20 +19,20 @@
#include "aom/aom_integer.h"
-static INLINE uint16_t loadu_uint16(const void *src) {
- uint16_t v;
+static INLINE int16_t loadu_int16(const void *src) {
+ int16_t v;
memcpy(&v, src, sizeof(v));
return v;
}
-static INLINE uint32_t loadu_uint32(const void *src) {
- uint32_t v;
+static INLINE int32_t loadu_int32(const void *src) {
+ int32_t v;
memcpy(&v, src, sizeof(v));
return v;
}
-static INLINE uint64_t loadu_uint64(const void *src) {
- uint64_t v;
+static INLINE int64_t loadu_int64(const void *src) {
+ int64_t v;
memcpy(&v, src, sizeof(v));
return v;
}
@@ -48,10 +48,10 @@
static INLINE __m128i load_8bit_4x4_to_1_reg_sse2(const void *const src,
const int byte_stride) {
- return _mm_setr_epi32(loadu_uint32((int8_t *)src + 0 * byte_stride),
- loadu_uint32((int8_t *)src + 1 * byte_stride),
- loadu_uint32((int8_t *)src + 2 * byte_stride),
- loadu_uint32((int8_t *)src + 3 * byte_stride));
+ return _mm_setr_epi32(loadu_int32((int8_t *)src + 0 * byte_stride),
+ loadu_int32((int8_t *)src + 1 * byte_stride),
+ loadu_int32((int8_t *)src + 2 * byte_stride),
+ loadu_int32((int8_t *)src + 3 * byte_stride));
}
static INLINE __m128i load_8bit_8x2_to_1_reg_sse2(const void *const src,
diff --git a/aom_dsp/x86/variance_sse2.c b/aom_dsp/x86/variance_sse2.c
index d49d115..a62a089 100644
--- a/aom_dsp/x86/variance_sse2.c
+++ b/aom_dsp/x86/variance_sse2.c
@@ -36,8 +36,8 @@
}
static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) {
- const __m128i p0 = _mm_cvtsi32_si128(loadu_uint32(p + 0 * stride));
- const __m128i p1 = _mm_cvtsi32_si128(loadu_uint32(p + 1 * stride));
+ const __m128i p0 = _mm_cvtsi32_si128(loadu_int32(p + 0 * stride));
+ const __m128i p1 = _mm_cvtsi32_si128(loadu_int32(p + 1 * stride));
return _mm_unpacklo_epi8(_mm_unpacklo_epi32(p0, p1), _mm_setzero_si128());
}
diff --git a/av1/encoder/x86/pickrst_avx2.c b/av1/encoder/x86/pickrst_avx2.c
index d53b128..86f0734 100644
--- a/av1/encoder/x86/pickrst_avx2.c
+++ b/av1/encoder/x86/pickrst_avx2.c
@@ -59,7 +59,7 @@
M_int[k][l] += D1 * X1 + D2 * X2;
const __m256i kl =
- _mm256_cvtepu8_epi16(_mm_set1_epi16(loadu_uint16(dgd_ijk + l)));
+ _mm256_cvtepu8_epi16(_mm_set1_epi16(loadu_int16(dgd_ijk + l)));
acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
@@ -260,7 +260,7 @@
// Load two u16 values from dgd_ijkl combined as a u32,
// then broadcast to 8x u32 slots of a 256
- const __m256i dgd_ijkl = _mm256_set1_epi32(loadu_uint32(dgd_ijk + l));
+ const __m256i dgd_ijkl = _mm256_set1_epi32(loadu_int32(dgd_ijk + l));
// dgd_ijkl = [y x y x y x y x] [y x y x y x y x] where each is a u16
acc_stat_highbd_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
@@ -408,7 +408,7 @@
// Load two u16 values from dgd_ijkl combined as a u32,
// then broadcast to 8x u32 slots of a 256
- const __m256i dgd_ijkl = _mm256_set1_epi32(loadu_uint32(dgd_ijk + l));
+ const __m256i dgd_ijkl = _mm256_set1_epi32(loadu_int32(dgd_ijk + l));
// dgd_ijkl = [x y x y x y x y] [x y x y x y x y] where each is a u16
acc_stat_highbd_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
@@ -569,7 +569,7 @@
M_int[k][l] += D1 * X1 + D2 * X2;
const __m256i kl =
- _mm256_cvtepu8_epi16(_mm_set1_epi16(loadu_uint16(dgd_ijk + l)));
+ _mm256_cvtepu8_epi16(_mm_set1_epi16(loadu_int16(dgd_ijk + l)));
acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
diff --git a/av1/encoder/x86/rdopt_avx2.c b/av1/encoder/x86/rdopt_avx2.c
index 3bc763c..a0ab394 100644
--- a/av1/encoder/x86/rdopt_avx2.c
+++ b/av1/encoder/x86/rdopt_avx2.c
@@ -31,8 +31,8 @@
// [ m n o p ]
const __m256i pixels = _mm256_set_epi64x(
- loadu_uint64(&diff[0 * stride]), loadu_uint64(&diff[1 * stride]),
- loadu_uint64(&diff[2 * stride]), loadu_uint64(&diff[3 * stride]));
+ loadu_int64(&diff[0 * stride]), loadu_int64(&diff[1 * stride]),
+ loadu_int64(&diff[2 * stride]), loadu_int64(&diff[3 * stride]));
// pixels = [d c b a h g f e] [l k j i p o n m] as i16
const __m256i slli = _mm256_slli_epi64(pixels, 16);