v{64,128}_intrinsics_arm.h: add some missing casts

Fixes warnings of the form:
aom_dsp/simd/v128_intrinsics_arm.h(767,35): warning C4146: unary minus
operator applied to unsigned type, result still unsigned

when building with Microsoft's cl compiler.
Additional casts are added to other functions for consistency.

Change-Id: I4a5372ab880f2bec13842086d23b17579ee3c5f3
diff --git a/aom_dsp/simd/v128_intrinsics_arm.h b/aom_dsp/simd/v128_intrinsics_arm.h
index 5ee35bf..fb89d60 100644
--- a/aom_dsp/simd/v128_intrinsics_arm.h
+++ b/aom_dsp/simd/v128_intrinsics_arm.h
@@ -699,72 +699,72 @@
 
 SIMD_INLINE v128 v128_shl_8(v128 a, unsigned int c) {
   return (c > 7) ? v128_zero()
-                 : vreinterpretq_s64_u8(
-                       vshlq_u8(vreinterpretq_u8_s64(a), vdupq_n_s8(c)));
+                 : vreinterpretq_s64_u8(vshlq_u8(vreinterpretq_u8_s64(a),
+                                                 vdupq_n_s8((int8_t)c)));
 }
 
 SIMD_INLINE v128 v128_shr_u8(v128 a, unsigned int c) {
   return (c > 7) ? v128_zero()
-                 : vreinterpretq_s64_u8(
-                       vshlq_u8(vreinterpretq_u8_s64(a), vdupq_n_s8(-c)));
+                 : vreinterpretq_s64_u8(vshlq_u8(vreinterpretq_u8_s64(a),
+                                                 vdupq_n_s8(-(int8_t)c)));
 }
 
 SIMD_INLINE v128 v128_shr_s8(v128 a, unsigned int c) {
   return (c > 7) ? v128_ones()
-                 : vreinterpretq_s64_s8(
-                       vshlq_s8(vreinterpretq_s8_s64(a), vdupq_n_s8(-c)));
+                 : vreinterpretq_s64_s8(vshlq_s8(vreinterpretq_s8_s64(a),
+                                                 vdupq_n_s8(-(int8_t)c)));
 }
 
 SIMD_INLINE v128 v128_shl_16(v128 a, unsigned int c) {
   return (c > 15) ? v128_zero()
-                  : vreinterpretq_s64_u16(
-                        vshlq_u16(vreinterpretq_u16_s64(a), vdupq_n_s16(c)));
+                  : vreinterpretq_s64_u16(vshlq_u16(vreinterpretq_u16_s64(a),
+                                                    vdupq_n_s16((int16_t)c)));
 }
 
 SIMD_INLINE v128 v128_shr_u16(v128 a, unsigned int c) {
   return (c > 15) ? v128_zero()
-                  : vreinterpretq_s64_u16(
-                        vshlq_u16(vreinterpretq_u16_s64(a), vdupq_n_s16(-c)));
+                  : vreinterpretq_s64_u16(vshlq_u16(vreinterpretq_u16_s64(a),
+                                                    vdupq_n_s16(-(int16_t)c)));
 }
 
 SIMD_INLINE v128 v128_shr_s16(v128 a, unsigned int c) {
   return (c > 15) ? v128_ones()
-                  : vreinterpretq_s64_s16(
-                        vshlq_s16(vreinterpretq_s16_s64(a), vdupq_n_s16(-c)));
+                  : vreinterpretq_s64_s16(vshlq_s16(vreinterpretq_s16_s64(a),
+                                                    vdupq_n_s16(-(int16_t)c)));
 }
 
 SIMD_INLINE v128 v128_shl_32(v128 a, unsigned int c) {
   return (c > 31) ? v128_zero()
-                  : vreinterpretq_s64_u32(
-                        vshlq_u32(vreinterpretq_u32_s64(a), vdupq_n_s32(c)));
+                  : vreinterpretq_s64_u32(vshlq_u32(vreinterpretq_u32_s64(a),
+                                                    vdupq_n_s32((int32_t)c)));
 }
 
 SIMD_INLINE v128 v128_shr_u32(v128 a, unsigned int c) {
   return (c > 31) ? v128_zero()
-                  : vreinterpretq_s64_u32(
-                        vshlq_u32(vreinterpretq_u32_s64(a), vdupq_n_s32(-c)));
+                  : vreinterpretq_s64_u32(vshlq_u32(vreinterpretq_u32_s64(a),
+                                                    vdupq_n_s32(-(int32_t)c)));
 }
 
 SIMD_INLINE v128 v128_shr_s32(v128 a, unsigned int c) {
   return (c > 31) ? v128_ones()
-                  : vreinterpretq_s64_s32(
-                        vshlq_s32(vreinterpretq_s32_s64(a), vdupq_n_s32(-c)));
+                  : vreinterpretq_s64_s32(vshlq_s32(vreinterpretq_s32_s64(a),
+                                                    vdupq_n_s32(-(int32_t)c)));
 }
 
 SIMD_INLINE v128 v128_shl_64(v128 a, unsigned int c) {
   return (c > 63) ? v128_zero()
-                  : vreinterpretq_s64_u64(
-                        vshlq_u64(vreinterpretq_u64_s64(a), vdupq_n_s64(c)));
+                  : vreinterpretq_s64_u64(vshlq_u64(vreinterpretq_u64_s64(a),
+                                                    vdupq_n_s64((int64_t)c)));
 }
 
 SIMD_INLINE v128 v128_shr_u64(v128 a, unsigned int c) {
   return (c > 63) ? v128_zero()
-                  : vreinterpretq_s64_u64(
-                        vshlq_u64(vreinterpretq_u64_s64(a), vdupq_n_s64(-c)));
+                  : vreinterpretq_s64_u64(vshlq_u64(vreinterpretq_u64_s64(a),
+                                                    vdupq_n_s64(-(int64_t)c)));
 }
 
 SIMD_INLINE v128 v128_shr_s64(v128 a, unsigned int c) {
-  return (c > 63) ? v128_ones() : vshlq_s64(a, vdupq_n_s64(-c));
+  return (c > 63) ? v128_ones() : vshlq_s64(a, vdupq_n_s64(-(int64_t)c));
 }
 
 #if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)
diff --git a/aom_dsp/simd/v64_intrinsics_arm.h b/aom_dsp/simd/v64_intrinsics_arm.h
index 1576b8f..265ebed 100644
--- a/aom_dsp/simd/v64_intrinsics_arm.h
+++ b/aom_dsp/simd/v64_intrinsics_arm.h
@@ -559,43 +559,48 @@
 }
 
 SIMD_INLINE v64 v64_shl_8(v64 a, unsigned int c) {
-  return vreinterpret_s64_u8(vshl_u8(vreinterpret_u8_s64(a), vdup_n_s8(c)));
+  return vreinterpret_s64_u8(
+      vshl_u8(vreinterpret_u8_s64(a), vdup_n_s8((int8_t)c)));
 }
 
 SIMD_INLINE v64 v64_shr_u8(v64 a, unsigned int c) {
-  return vreinterpret_s64_u8(vshl_u8(vreinterpret_u8_s64(a), vdup_n_s8(-c)));
+  return vreinterpret_s64_u8(
+      vshl_u8(vreinterpret_u8_s64(a), vdup_n_s8(-(int8_t)c)));
 }
 
 SIMD_INLINE v64 v64_shr_s8(v64 a, unsigned int c) {
-  return vreinterpret_s64_s8(vshl_s8(vreinterpret_s8_s64(a), vdup_n_s8(-c)));
+  return vreinterpret_s64_s8(
+      vshl_s8(vreinterpret_s8_s64(a), vdup_n_s8(-(int8_t)c)));
 }
 
 SIMD_INLINE v64 v64_shl_16(v64 a, unsigned int c) {
-  return vreinterpret_s64_u16(vshl_u16(vreinterpret_u16_s64(a), vdup_n_s16(c)));
+  return vreinterpret_s64_u16(
+      vshl_u16(vreinterpret_u16_s64(a), vdup_n_s16((int16_t)c)));
 }
 
 SIMD_INLINE v64 v64_shr_u16(v64 a, unsigned int c) {
   return vreinterpret_s64_u16(
-      vshl_u16(vreinterpret_u16_s64(a), vdup_n_s16(-(int)c)));
+      vshl_u16(vreinterpret_u16_s64(a), vdup_n_s16(-(int16_t)c)));
 }
 
 SIMD_INLINE v64 v64_shr_s16(v64 a, unsigned int c) {
   return vreinterpret_s64_s16(
-      vshl_s16(vreinterpret_s16_s64(a), vdup_n_s16(-(int)c)));
+      vshl_s16(vreinterpret_s16_s64(a), vdup_n_s16(-(int16_t)c)));
 }
 
 SIMD_INLINE v64 v64_shl_32(v64 a, unsigned int c) {
-  return vreinterpret_s64_u32(vshl_u32(vreinterpret_u32_s64(a), vdup_n_s32(c)));
+  return vreinterpret_s64_u32(
+      vshl_u32(vreinterpret_u32_s64(a), vdup_n_s32((int32_t)c)));
 }
 
 SIMD_INLINE v64 v64_shr_u32(v64 a, unsigned int c) {
   return vreinterpret_s64_u32(
-      vshl_u32(vreinterpret_u32_s64(a), vdup_n_s32(-(int)c)));
+      vshl_u32(vreinterpret_u32_s64(a), vdup_n_s32(-(int32_t)c)));
 }
 
 SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int c) {
   return vreinterpret_s64_s32(
-      vshl_s32(vreinterpret_s32_s64(a), vdup_n_s32(-(int)c)));
+      vshl_s32(vreinterpret_s32_s64(a), vdup_n_s32(-(int32_t)c)));
 }
 
 // The following functions require an immediate.