Constrain the range of immediate constants
Improvement over solution in 8a99b5f
BUG=aomedia:1945
Change-Id: I6c72494544919943dbce799f2fb046b1ef33abb0
diff --git a/aom_dsp/simd/v128_intrinsics_x86.h b/aom_dsp/simd/v128_intrinsics_x86.h
index 72a2261..f9043fe 100644
--- a/aom_dsp/simd/v128_intrinsics_x86.h
+++ b/aom_dsp/simd/v128_intrinsics_x86.h
@@ -12,6 +12,7 @@
#ifndef _V128_INTRINSICS_H
#define _V128_INTRINSICS_H
+#include <stdint.h>
#include "aom_dsp/simd/v64_intrinsics_x86.h"
typedef __m128i v128;
@@ -71,7 +72,7 @@
#endif
#else
#if defined(__SSSE3__)
-#define v128_align(a, b, c) ((c) ? _mm_alignr_epi8(a, b, c) : (b))
+#define v128_align(a, b, c) ((c) ? _mm_alignr_epi8(a, b, (uint8_t)(c)) : (b))
#else
#define v128_align(a, b, c) \
((c) ? _mm_or_si128(_mm_srli_si128(b, c), _mm_slli_si128(a, 16 - (c))) : (b))
@@ -588,8 +589,8 @@
/* These intrinsics require immediate values, so we must use #defines
to enforce that. */
-#define v128_shl_n_byte(a, c) _mm_slli_si128(a, c)
-#define v128_shr_n_byte(a, c) _mm_srli_si128(a, c)
+#define v128_shl_n_byte(a, c) _mm_slli_si128(a, (c)&127)
+#define v128_shr_n_byte(a, c) _mm_srli_si128(a, (c)&127)
#define v128_shl_n_8(a, c) \
_mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
#define v128_shr_n_u8(a, c) \
diff --git a/aom_dsp/simd/v256_intrinsics_x86.h b/aom_dsp/simd/v256_intrinsics_x86.h
index 6bfc0b0..05f2051 100644
--- a/aom_dsp/simd/v256_intrinsics_x86.h
+++ b/aom_dsp/simd/v256_intrinsics_x86.h
@@ -658,12 +658,12 @@
to enforce that. */
// _mm256_slli_si256 works on 128 bit lanes and can't be used
#define v256_shl_n_byte(a, n) \
- ((n) < 16 ? v256_from_v128(v128_align(v256_high_v128(a), v256_low_v128(a), \
- (16 - (n)) & 15), \
- v128_shl_n_byte(v256_low_v128(a), n)) \
+ ((n) < 16 ? v256_from_v128( \
+ v128_align(v256_high_v128(a), v256_low_v128(a), 16 - (n)), \
+ v128_shl_n_byte(v256_low_v128(a), n)) \
: _mm256_inserti128_si256( \
_mm256_setzero_si256(), \
- v128_shl_n_byte(v256_low_v128(a), (n)&15), 1))
+ v128_shl_n_byte(v256_low_v128(a), (n)-16), 1))
// _mm256_srli_si256 works on 128 bit lanes and can't be used
#define v256_shr_n_byte(a, n) \