arm: Fix building intrinsics with clang
This fixes compiler errors like these with clang (latest trunk
version):
"argument to '__builtin_neon_vld1_lane_v' must be a constant integer"
Convert the inline functions into defines, to allow the compiler to
see the arguments as immediate values instead of as a potential variable.
Change-Id: Ib1ae336d32512c91265c7f8ecf873cabe763c257
diff --git a/aom_dsp/arm/loopfilter_neon.c b/aom_dsp/arm/loopfilter_neon.c
index dd81007..bdc6762 100644
--- a/aom_dsp/arm/loopfilter_neon.c
+++ b/aom_dsp/arm/loopfilter_neon.c
@@ -785,7 +785,7 @@
const uint8_t *limit, const uint8_t *thresh) {
uint32x2x2_t p1q0_p0q1, p1q1_p0q0, p1p0_q1q0;
uint32x2_t pq_rev;
- uint8x8_t p1p0, q0q1, p0q0, p1q1;
+ uint8x8_t UNINITIALIZED_IS_SAFE(p1p0), q0q1, p0q0, p1q1;
// row0: p1 p0 | q0 q1
// row1: p1 p0 | q0 q1
@@ -823,7 +823,7 @@
void aom_lpf_horizontal_14_neon(uint8_t *src, int stride, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
- uint8x8_t p0q0, p1q1, p2q2, p3q3, p4q4, p5q5, p6q6;
+ uint8x8_t p0q0, p1q1, p2q2, p3q3, p4q4, p5q5, UNINITIALIZED_IS_SAFE(p6q6);
load_u8_4x1(src - 7 * stride, &p6q6, 0);
load_u8_4x1(src - 6 * stride, &p5q5, 0);
@@ -912,7 +912,7 @@
void aom_lpf_horizontal_4_neon(uint8_t *src, int stride, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
- uint8x8_t p0q0, p1q1;
+ uint8x8_t p0q0, UNINITIALIZED_IS_SAFE(p1q1);
load_u8_4x1(src - 2 * stride, &p1q1, 0);
load_u8_4x1(src - 1 * stride, &p0q0, 0);
diff --git a/av1/common/arm/mem_neon.h b/av1/common/arm/mem_neon.h
index 6e76b57..4bf45a5 100644
--- a/av1/common/arm/mem_neon.h
+++ b/av1/common/arm/mem_neon.h
@@ -22,11 +22,13 @@
s += p;
}
-static INLINE void load_u8_4x1(const uint8_t *s, uint8x8_t *const s0,
- int lane) {
- *s0 = vreinterpret_u8_u32(
- vld1_lane_u32((uint32_t *)s, vreinterpret_u32_u8(*s0), lane));
-}
+/* These intrinsics require immediate values, so we must use #defines
+ to enforce that. */
+#define load_u8_4x1(s, s0, lane) \
+ do { \
+ *(s0) = vreinterpret_u8_u32( \
+ vld1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(*(s0)), lane)); \
+ } while (0)
static INLINE void load_u8_8x8(const uint8_t *s, ptrdiff_t p,
uint8x8_t *const s0, uint8x8_t *const s1,
@@ -134,10 +136,12 @@
*s3 = vld1_s16(s);
}
-static INLINE void store_u8_4x1(const uint8_t *s, uint8x8_t const s0,
- int lane) {
- vst1_lane_u32((uint32_t *)s, vreinterpret_u32_u8(s0), lane);
-}
+/* These intrinsics require immediate values, so we must use #defines
+ to enforce that. */
+#define store_u8_4x1(s, s0, lane) \
+ do { \
+ vst1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(s0), lane); \
+ } while (0)
static INLINE void store_u8_8x8(uint8_t *s, ptrdiff_t p, const uint8x8_t s0,
const uint8x8_t s1, const uint8x8_t s2,