x86: normalize types used with _mm{,256}_set?_epi8
w/clang -fsanitize=integer fixes warnings of the form:
implicit conversion from type 'uint8_t' (aka 'unsigned char') of value
128 (8-bit, unsigned) to type 'char' changed the value to -128 (8-bit,
signed)
Bug: aomedia:3136
Bug: b/229626362
Change-Id: I1d42bd4971a58c081d1a9b5a7f2d78a3281026b3
diff --git a/aom_dsp/simd/v128_intrinsics_x86.h b/aom_dsp/simd/v128_intrinsics_x86.h
index 3cc6c02..dbd2442 100644
--- a/aom_dsp/simd/v128_intrinsics_x86.h
+++ b/aom_dsp/simd/v128_intrinsics_x86.h
@@ -81,7 +81,7 @@
SIMD_INLINE v128 v128_zero() { return _mm_setzero_si128(); }
-SIMD_INLINE v128 v128_dup_8(uint8_t x) { return _mm_set1_epi8(x); }
+SIMD_INLINE v128 v128_dup_8(uint8_t x) { return _mm_set1_epi8((char)x); }
SIMD_INLINE v128 v128_dup_16(uint16_t x) { return _mm_set1_epi16(x); }
@@ -534,7 +534,7 @@
SIMD_INLINE v128 v128_cmpeq_16(v128 a, v128 b) { return _mm_cmpeq_epi16(a, b); }
SIMD_INLINE v128 v128_shl_8(v128 a, unsigned int c) {
- return _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << c)),
+ return _mm_and_si128(_mm_set1_epi8((char)(0xff << c)),
_mm_sll_epi16(a, _mm_cvtsi32_si128((int)c)));
}
@@ -593,9 +593,9 @@
#define v128_shl_n_byte(a, c) _mm_slli_si128(a, (c)&127)
#define v128_shr_n_byte(a, c) _mm_srli_si128(a, (c)&127)
#define v128_shl_n_8(a, c) \
- _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
+ _mm_and_si128(_mm_set1_epi8((char)(0xff << (c))), _mm_slli_epi16(a, c))
#define v128_shr_n_u8(a, c) \
- _mm_and_si128(_mm_set1_epi8(0xff >> (c)), _mm_srli_epi16(a, c))
+ _mm_and_si128(_mm_set1_epi8((char)(0xff >> (c))), _mm_srli_epi16(a, c))
#define v128_shr_n_s8(a, c) \
_mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), \
_mm_srai_epi16(_mm_unpackhi_epi8(a, a), (c) + 8))
diff --git a/aom_dsp/simd/v256_intrinsics_x86.h b/aom_dsp/simd/v256_intrinsics_x86.h
index e10846b..10f4f8a 100644
--- a/aom_dsp/simd/v256_intrinsics_x86.h
+++ b/aom_dsp/simd/v256_intrinsics_x86.h
@@ -78,7 +78,7 @@
SIMD_INLINE v256 v256_zero(void) { return _mm256_setzero_si256(); }
-SIMD_INLINE v256 v256_dup_8(uint8_t x) { return _mm256_set1_epi8(x); }
+SIMD_INLINE v256 v256_dup_8(uint8_t x) { return _mm256_set1_epi8((char)x); }
SIMD_INLINE v256 v256_dup_16(uint16_t x) { return _mm256_set1_epi16(x); }
@@ -596,7 +596,7 @@
}
SIMD_INLINE v256 v256_shl_8(v256 a, unsigned int c) {
- return _mm256_and_si256(_mm256_set1_epi8((uint8_t)(0xff << c)),
+ return _mm256_and_si256(_mm256_set1_epi8((char)(0xff << c)),
_mm256_sll_epi16(a, _mm_cvtsi32_si128((int)c)));
}
@@ -677,11 +677,12 @@
#define v256_align(a, b, c) \
((c) ? v256_or(v256_shr_n_byte(b, c), v256_shl_n_byte(a, 32 - (c))) : b)
-#define v256_shl_n_8(a, c) \
- _mm256_and_si256(_mm256_set1_epi8((uint8_t)(0xff << (c))), \
+#define v256_shl_n_8(a, c) \
+ _mm256_and_si256(_mm256_set1_epi8((char)(0xff << (c))), \
_mm256_slli_epi16(a, c))
-#define v256_shr_n_u8(a, c) \
- _mm256_and_si256(_mm256_set1_epi8(0xff >> (c)), _mm256_srli_epi16(a, c))
+#define v256_shr_n_u8(a, c) \
+ _mm256_and_si256(_mm256_set1_epi8((char)(0xff >> (c))), \
+ _mm256_srli_epi16(a, c))
#define v256_shr_n_s8(a, c) \
_mm256_packs_epi16(_mm256_srai_epi16(_mm256_unpacklo_epi8(a, a), (c) + 8), \
_mm256_srai_epi16(_mm256_unpackhi_epi8(a, a), (c) + 8))
diff --git a/aom_dsp/simd/v64_intrinsics_x86.h b/aom_dsp/simd/v64_intrinsics_x86.h
index 42b602b..95a8765 100644
--- a/aom_dsp/simd/v64_intrinsics_x86.h
+++ b/aom_dsp/simd/v64_intrinsics_x86.h
@@ -101,7 +101,7 @@
SIMD_INLINE v64 v64_zero(void) { return _mm_setzero_si128(); }
-SIMD_INLINE v64 v64_dup_8(uint8_t x) { return _mm_set1_epi8(x); }
+SIMD_INLINE v64 v64_dup_8(uint8_t x) { return _mm_set1_epi8((char)x); }
SIMD_INLINE v64 v64_dup_16(uint16_t x) { return _mm_set1_epi16(x); }
@@ -433,7 +433,7 @@
SIMD_INLINE v64 v64_cmpeq_16(v64 a, v64 b) { return _mm_cmpeq_epi16(a, b); }
SIMD_INLINE v64 v64_shl_8(v64 a, unsigned int c) {
- return _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << c)),
+ return _mm_and_si128(_mm_set1_epi8((char)(0xff << c)),
_mm_sll_epi16(a, _mm_cvtsi32_si128((int)c)));
}
@@ -477,9 +477,9 @@
#define v64_shl_n_byte(a, c) _mm_slli_si128(a, c)
#define v64_shr_n_byte(a, c) _mm_srli_si128(_mm_unpacklo_epi64(a, a), c + 8)
#define v64_shl_n_8(a, c) \
- _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
+ _mm_and_si128(_mm_set1_epi8((char)(0xff << (c))), _mm_slli_epi16(a, c))
#define v64_shr_n_u8(a, c) \
- _mm_and_si128(_mm_set1_epi8(0xff >> (c)), _mm_srli_epi16(a, c))
+ _mm_and_si128(_mm_set1_epi8((char)(0xff >> (c))), _mm_srli_epi16(a, c))
#define v64_shr_n_s8(a, c) \
_mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), a)
#define v64_shl_n_16(a, c) _mm_slli_epi16(a, c)
diff --git a/aom_dsp/x86/intrapred_avx2.c b/aom_dsp/x86/intrapred_avx2.c
index b5f7144..597e3bd 100644
--- a/aom_dsp/x86/intrapred_avx2.c
+++ b/aom_dsp/x86/intrapred_avx2.c
@@ -361,7 +361,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+ const __m256i row = _mm256_set1_epi8((int8_t)0x80);
row_store_32xh(&row, 32, dst, stride);
}
@@ -628,7 +628,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+ const __m256i row = _mm256_set1_epi8((int8_t)0x80);
row_store_32xh(&row, 16, dst, stride);
}
@@ -637,7 +637,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+ const __m256i row = _mm256_set1_epi8((int8_t)0x80);
row_store_32xh(&row, 64, dst, stride);
}
@@ -646,7 +646,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+ const __m256i row = _mm256_set1_epi8((int8_t)0x80);
row_store_64xh(&row, 64, dst, stride);
}
@@ -655,7 +655,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+ const __m256i row = _mm256_set1_epi8((int8_t)0x80);
row_store_64xh(&row, 32, dst, stride);
}
@@ -664,7 +664,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+ const __m256i row = _mm256_set1_epi8((int8_t)0x80);
row_store_64xh(&row, 16, dst, stride);
}
@@ -3537,7 +3537,7 @@
__m128i a_mbase_x;
a16 = _mm256_set1_epi16(16);
- a_mbase_x = _mm_set1_epi8(above[max_base_x]);
+ a_mbase_x = _mm_set1_epi8((int8_t)above[max_base_x]);
c3f = _mm256_set1_epi16(0x3f);
int x = dx;
@@ -3640,7 +3640,7 @@
__m256i a_mbase_x, diff, c3f;
a16 = _mm256_set1_epi16(16);
- a_mbase_x = _mm256_set1_epi8(above[max_base_x]);
+ a_mbase_x = _mm256_set1_epi8((int8_t)above[max_base_x]);
c3f = _mm256_set1_epi16(0x3f);
int x = dx;
@@ -3722,7 +3722,7 @@
__m128i max_base_x128, base_inc128, mask128;
a16 = _mm256_set1_epi16(16);
- a_mbase_x = _mm256_set1_epi8(above[max_base_x]);
+ a_mbase_x = _mm256_set1_epi8((int8_t)above[max_base_x]);
max_base_x128 = _mm_set1_epi8(max_base_x);
c3f = _mm256_set1_epi16(0x3f);
@@ -3766,14 +3766,14 @@
_mm256_extracti128_si256(res, 1))); // 16 8bit values
base_inc128 =
- _mm_setr_epi8((uint8_t)(base + j), (uint8_t)(base + j + 1),
- (uint8_t)(base + j + 2), (uint8_t)(base + j + 3),
- (uint8_t)(base + j + 4), (uint8_t)(base + j + 5),
- (uint8_t)(base + j + 6), (uint8_t)(base + j + 7),
- (uint8_t)(base + j + 8), (uint8_t)(base + j + 9),
- (uint8_t)(base + j + 10), (uint8_t)(base + j + 11),
- (uint8_t)(base + j + 12), (uint8_t)(base + j + 13),
- (uint8_t)(base + j + 14), (uint8_t)(base + j + 15));
+ _mm_setr_epi8((int8_t)(base + j), (int8_t)(base + j + 1),
+ (int8_t)(base + j + 2), (int8_t)(base + j + 3),
+ (int8_t)(base + j + 4), (int8_t)(base + j + 5),
+ (int8_t)(base + j + 6), (int8_t)(base + j + 7),
+ (int8_t)(base + j + 8), (int8_t)(base + j + 9),
+ (int8_t)(base + j + 10), (int8_t)(base + j + 11),
+ (int8_t)(base + j + 12), (int8_t)(base + j + 13),
+ (int8_t)(base + j + 14), (int8_t)(base + j + 15));
mask128 = _mm_cmpgt_epi8(_mm_subs_epu8(max_base_x128, base_inc128),
_mm_setzero_si128());
diff --git a/aom_dsp/x86/intrapred_sse2.c b/aom_dsp/x86/intrapred_sse2.c
index d42f185..ccbce1e 100644
--- a/aom_dsp/x86/intrapred_sse2.c
+++ b/aom_dsp/x86/intrapred_sse2.c
@@ -146,7 +146,7 @@
sum += 6;
sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X2);
- const __m128i row = _mm_set1_epi8((uint8_t)sum);
+ const __m128i row = _mm_set1_epi8((int8_t)sum);
dc_store_8xh(&row, 4, dst, stride);
}
@@ -743,7 +743,7 @@
const uint8_t *above, const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_8xh(&row, 4, dst, stride);
}
@@ -751,7 +751,7 @@
const uint8_t *above, const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_8xh(&row, 16, dst, stride);
}
@@ -759,7 +759,7 @@
const uint8_t *above, const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_8xh(&row, 32, dst, stride);
}
@@ -767,7 +767,7 @@
const uint8_t *above, const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_16xh(&row, 4, dst, stride);
}
@@ -775,7 +775,7 @@
const uint8_t *above, const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_16xh(&row, 8, dst, stride);
}
@@ -784,7 +784,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_16xh(&row, 32, dst, stride);
}
@@ -793,7 +793,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_16xh(&row, 64, dst, stride);
}
@@ -801,7 +801,7 @@
const uint8_t *above, const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_32xh(&row, 8, dst, stride);
}
@@ -810,7 +810,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_32xh(&row, 16, dst, stride);
}
@@ -819,7 +819,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_32xh(&row, 64, dst, stride);
}
@@ -828,7 +828,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_64xh(&row, 64, dst, stride);
}
@@ -837,7 +837,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_64xh(&row, 32, dst, stride);
}
@@ -846,7 +846,7 @@
const uint8_t *left) {
(void)above;
(void)left;
- const __m128i row = _mm_set1_epi8((uint8_t)128);
+ const __m128i row = _mm_set1_epi8((int8_t)128);
dc_store_64xh(&row, 16, dst, stride);
}
diff --git a/aom_dsp/x86/intrapred_sse4.c b/aom_dsp/x86/intrapred_sse4.c
index 21fb1bb..f2c6188 100644
--- a/aom_dsp/x86/intrapred_sse4.c
+++ b/aom_dsp/x86/intrapred_sse4.c
@@ -141,7 +141,7 @@
__m128i a_mbase_x;
a16 = _mm_set1_epi16(16);
- a_mbase_x = _mm_set1_epi8(above[max_base_x]);
+ a_mbase_x = _mm_set1_epi8((char)above[max_base_x]);
c3f = _mm_set1_epi16(0x3f);
int x = dx;
@@ -255,7 +255,7 @@
__m128i a_mbase_x, diff, c3f;
a16 = _mm_set1_epi16(16);
- a_mbase_x = _mm_set1_epi8(above[max_base_x]);
+ a_mbase_x = _mm_set1_epi8((char)above[max_base_x]);
c3f = _mm_set1_epi16(0x3f);
int x = dx;
@@ -353,7 +353,7 @@
__m128i max_base, base_inc, mask;
a16 = _mm_set1_epi16(16);
- a_mbase_x = _mm_set1_epi8(above[max_base_x]);
+ a_mbase_x = _mm_set1_epi8((char)above[max_base_x]);
max_base = _mm_set1_epi8(max_base_x);
c3f = _mm_set1_epi16(0x3f);
@@ -412,14 +412,14 @@
res = _mm_packus_epi16(res, res1); // 16 8bit values
base_inc =
- _mm_setr_epi8((uint8_t)(base + j), (uint8_t)(base + j + 1),
- (uint8_t)(base + j + 2), (uint8_t)(base + j + 3),
- (uint8_t)(base + j + 4), (uint8_t)(base + j + 5),
- (uint8_t)(base + j + 6), (uint8_t)(base + j + 7),
- (uint8_t)(base + j + 8), (uint8_t)(base + j + 9),
- (uint8_t)(base + j + 10), (uint8_t)(base + j + 11),
- (uint8_t)(base + j + 12), (uint8_t)(base + j + 13),
- (uint8_t)(base + j + 14), (uint8_t)(base + j + 15));
+ _mm_setr_epi8((int8_t)(base + j), (int8_t)(base + j + 1),
+ (int8_t)(base + j + 2), (int8_t)(base + j + 3),
+ (int8_t)(base + j + 4), (int8_t)(base + j + 5),
+ (int8_t)(base + j + 6), (int8_t)(base + j + 7),
+ (int8_t)(base + j + 8), (int8_t)(base + j + 9),
+ (int8_t)(base + j + 10), (int8_t)(base + j + 11),
+ (int8_t)(base + j + 12), (int8_t)(base + j + 13),
+ (int8_t)(base + j + 14), (int8_t)(base + j + 15));
mask = _mm_cmpgt_epi8(_mm_subs_epu8(max_base, base_inc),
_mm_setzero_si128());
diff --git a/aom_dsp/x86/jnt_variance_ssse3.c b/aom_dsp/x86/jnt_variance_ssse3.c
index 6ec5dd8..ee88e1e 100644
--- a/aom_dsp/x86/jnt_variance_ssse3.c
+++ b/aom_dsp/x86/jnt_variance_ssse3.c
@@ -49,8 +49,8 @@
int ref_stride,
const DIST_WTD_COMP_PARAMS *jcp_param) {
int i;
- const uint8_t w0 = (uint8_t)jcp_param->fwd_offset;
- const uint8_t w1 = (uint8_t)jcp_param->bck_offset;
+ const int8_t w0 = (int8_t)jcp_param->fwd_offset;
+ const int8_t w1 = (int8_t)jcp_param->bck_offset;
const __m128i w = _mm_set_epi8(w1, w0, w1, w0, w1, w0, w1, w0, w1, w0, w1, w0,
w1, w0, w1, w0);
const uint16_t round = ((1 << DIST_PRECISION_BITS) >> 1);
@@ -95,10 +95,10 @@
assert(!(width & 3));
assert(!(height & 3));
for (i = 0; i < height; i += 4) {
- const uint8_t *row0 = ref + 0 * ref_stride;
- const uint8_t *row1 = ref + 1 * ref_stride;
- const uint8_t *row2 = ref + 2 * ref_stride;
- const uint8_t *row3 = ref + 3 * ref_stride;
+ const int8_t *row0 = (const int8_t *)ref + 0 * ref_stride;
+ const int8_t *row1 = (const int8_t *)ref + 1 * ref_stride;
+ const int8_t *row2 = (const int8_t *)ref + 2 * ref_stride;
+ const int8_t *row3 = (const int8_t *)ref + 3 * ref_stride;
__m128i p0 =
_mm_setr_epi8(row0[0], row0[1], row0[2], row0[3], row1[0], row1[1],
diff --git a/aom_dsp/x86/variance_impl_ssse3.c b/aom_dsp/x86/variance_impl_ssse3.c
index 66b0d7d..6990021 100644
--- a/aom_dsp/x86/variance_impl_ssse3.c
+++ b/aom_dsp/x86/variance_impl_ssse3.c
@@ -25,8 +25,8 @@
// Change {128, 0} to {64, 0} and reduce FILTER_BITS by 1 to avoid overflow.
const int16_t round = (1 << (FILTER_BITS - 1)) >> 1;
const __m128i r = _mm_set1_epi16(round);
- const uint8_t f0 = filter[0] >> 1;
- const uint8_t f1 = filter[1] >> 1;
+ const int8_t f0 = (int8_t)(filter[0] >> 1);
+ const int8_t f1 = (int8_t)(filter[1] >> 1);
const __m128i filters = _mm_setr_epi8(f0, f1, f0, f1, f0, f1, f0, f1, f0, f1,
f0, f1, f0, f1, f0, f1);
unsigned int i, j;
diff --git a/av1/common/x86/intra_edge_sse4.c b/av1/common/x86/intra_edge_sse4.c
index fc69f41..f025f79 100644
--- a/av1/common/x86/intra_edge_sse4.c
+++ b/av1/common/x86/intra_edge_sse4.c
@@ -33,7 +33,7 @@
// Extend the first and last samples to simplify the loop for the 5-tap case
p[-1] = p[0];
- __m128i last = _mm_set1_epi8(p[sz - 1]);
+ __m128i last = _mm_set1_epi8((char)p[sz - 1]);
_mm_storeu_si128((__m128i *)&p[sz], last);
// Adjust input pointer for filter support area
diff --git a/av1/encoder/x86/reconinter_enc_ssse3.c b/av1/encoder/x86/reconinter_enc_ssse3.c
index 7ac0f0d..e3be996 100644
--- a/av1/encoder/x86/reconinter_enc_ssse3.c
+++ b/av1/encoder/x86/reconinter_enc_ssse3.c
@@ -48,8 +48,8 @@
assert(!(width * height & 15));
n = width * height >> 4;
- const uint8_t w0 = (uint8_t)jcp_param->fwd_offset;
- const uint8_t w1 = (uint8_t)jcp_param->bck_offset;
+ const int8_t w0 = (int8_t)jcp_param->fwd_offset;
+ const int8_t w1 = (int8_t)jcp_param->bck_offset;
const __m128i w = _mm_set_epi8(w1, w0, w1, w0, w1, w0, w1, w0, w1, w0, w1, w0,
w1, w0, w1, w0);
const uint16_t round = ((1 << DIST_PRECISION_BITS) >> 1);