arm: Fix building intrinsics with clang This fixes compiler errors like these with clang (latest trunk version): "argument to '__builtin_neon_vld1_lane_v' must be a constant integer" Convert the inline functions into defines, to allow the compiler to see the arguments as immediate values instead of as a potential variable. Change-Id: Ib1ae336d32512c91265c7f8ecf873cabe763c257
diff --git a/aom_dsp/arm/loopfilter_neon.c b/aom_dsp/arm/loopfilter_neon.c index dd81007..bdc6762 100644 --- a/aom_dsp/arm/loopfilter_neon.c +++ b/aom_dsp/arm/loopfilter_neon.c
@@ -785,7 +785,7 @@ const uint8_t *limit, const uint8_t *thresh) { uint32x2x2_t p1q0_p0q1, p1q1_p0q0, p1p0_q1q0; uint32x2_t pq_rev; - uint8x8_t p1p0, q0q1, p0q0, p1q1; + uint8x8_t UNINITIALIZED_IS_SAFE(p1p0), q0q1, p0q0, p1q1; // row0: p1 p0 | q0 q1 // row1: p1 p0 | q0 q1 @@ -823,7 +823,7 @@ void aom_lpf_horizontal_14_neon(uint8_t *src, int stride, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { - uint8x8_t p0q0, p1q1, p2q2, p3q3, p4q4, p5q5, p6q6; + uint8x8_t p0q0, p1q1, p2q2, p3q3, p4q4, p5q5, UNINITIALIZED_IS_SAFE(p6q6); load_u8_4x1(src - 7 * stride, &p6q6, 0); load_u8_4x1(src - 6 * stride, &p5q5, 0); @@ -912,7 +912,7 @@ void aom_lpf_horizontal_4_neon(uint8_t *src, int stride, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { - uint8x8_t p0q0, p1q1; + uint8x8_t p0q0, UNINITIALIZED_IS_SAFE(p1q1); load_u8_4x1(src - 2 * stride, &p1q1, 0); load_u8_4x1(src - 1 * stride, &p0q0, 0);
diff --git a/av1/common/arm/mem_neon.h b/av1/common/arm/mem_neon.h index 6e76b57..4bf45a5 100644 --- a/av1/common/arm/mem_neon.h +++ b/av1/common/arm/mem_neon.h
@@ -22,11 +22,13 @@ s += p; } -static INLINE void load_u8_4x1(const uint8_t *s, uint8x8_t *const s0, - int lane) { - *s0 = vreinterpret_u8_u32( - vld1_lane_u32((uint32_t *)s, vreinterpret_u32_u8(*s0), lane)); -} +/* These intrinsics require immediate values, so we must use #defines + to enforce that. */ +#define load_u8_4x1(s, s0, lane) \ + do { \ + *(s0) = vreinterpret_u8_u32( \ + vld1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(*(s0)), lane)); \ + } while (0) static INLINE void load_u8_8x8(const uint8_t *s, ptrdiff_t p, uint8x8_t *const s0, uint8x8_t *const s1, @@ -134,10 +136,12 @@ *s3 = vld1_s16(s); } -static INLINE void store_u8_4x1(const uint8_t *s, uint8x8_t const s0, - int lane) { - vst1_lane_u32((uint32_t *)s, vreinterpret_u32_u8(s0), lane); -} +/* These intrinsics require immediate values, so we must use #defines + to enforce that. */ +#define store_u8_4x1(s, s0, lane) \ + do { \ + vst1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(s0), lane); \ + } while (0) static INLINE void store_u8_8x8(uint8_t *s, ptrdiff_t p, const uint8x8_t s0, const uint8x8_t s1, const uint8x8_t s2,