Add AArch64 SVE2 runtime feature detection
Add runtime feature detection of Armv9-A SVE2 in preparation for adding
SVE2-only code in future commits. Enable running SVE2 unit tests as
well.
Change-Id: I0b3fdebea287f187c0e5be1c1e769e236e9e09f7
diff --git a/aom_ports/aarch64_cpudetect.c b/aom_ports/aarch64_cpudetect.c
index 13299a6..3490d68 100644
--- a/aom_ports/aarch64_cpudetect.c
+++ b/aom_ports/aarch64_cpudetect.c
@@ -104,6 +104,7 @@
#define AOM_AARCH64_HWCAP_CRC32 (1 << 7)
#define AOM_AARCH64_HWCAP_ASIMDDP (1 << 20)
#define AOM_AARCH64_HWCAP_SVE (1 << 22)
+#define AOM_AARCH64_HWCAP2_SVE2 (1 << 1)
#define AOM_AARCH64_HWCAP2_I8MM (1 << 13)
static int arm_get_cpu_caps(void) {
@@ -111,7 +112,7 @@
#if HAVE_ARM_CRC32 || HAVE_NEON_DOTPROD || HAVE_SVE
unsigned long hwcap = getauxval(AT_HWCAP);
#endif
-#if HAVE_NEON_I8MM
+#if HAVE_NEON_I8MM || HAVE_SVE2
unsigned long hwcap2 = getauxval(AT_HWCAP2);
#endif
@@ -130,6 +131,9 @@
#if HAVE_SVE
if (hwcap & AOM_AARCH64_HWCAP_SVE) flags |= HAS_SVE;
#endif // HAVE_SVE
+#if HAVE_SVE2
+ if (hwcap2 & AOM_AARCH64_HWCAP2_SVE2) flags |= HAS_SVE2;
+#endif // HAVE_SVE2
return flags;
}
@@ -189,5 +193,8 @@
if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_SVE;
if (!(flags & HAS_NEON_I8MM)) flags &= ~HAS_SVE;
+ // Restrict flags: SVE2 assumes that FEAT_SVE is available.
+ if (!(flags & HAS_SVE)) flags &= ~HAS_SVE2;
+
return flags;
}
diff --git a/aom_ports/arm.h b/aom_ports/arm.h
index 853741d..a575108 100644
--- a/aom_ports/arm.h
+++ b/aom_ports/arm.h
@@ -29,6 +29,8 @@
#define HAS_NEON_I8MM (1 << 3)
// Armv8.2-A optional SVE instructions, mandatory from Armv9.0-A.
#define HAS_SVE (1 << 4)
+// Armv9.0-A SVE2 instructions.
+#define HAS_SVE2 (1 << 5)
int aom_arm_cpu_caps(void);
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index da7de4b..5b01ea2 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -37,6 +37,7 @@
set_aom_detect_var(HAVE_NEON_I8MM 0
"Enables Armv8.2-A Neon i8mm intrinsics optimizations.")
set_aom_detect_var(HAVE_SVE 0 "Enables Armv8.2-A SVE intrinsics optimizations.")
+set_aom_detect_var(HAVE_SVE2 0 "Enables Armv9-A SVE2 intrinsics optimizations.")
# PPC feature flags.
set_aom_detect_var(HAVE_VSX 0 "Enables VSX optimizations.")
@@ -209,6 +210,8 @@
"Enables Armv8.2-A Neon i8mm optimizations on AArch64 targets." ON)
set_aom_option_var(ENABLE_SVE
"Enables Armv8.2-A SVE optimizations on AArch64 targets." ON)
+set_aom_option_var(ENABLE_SVE2
+ "Enables Armv9-A SVE2 optimizations on AArch64 targets." ON)
# VSX intrinsics flags.
set_aom_option_var(ENABLE_VSX "Enables VSX optimizations on PowerPC targets."
diff --git a/build/cmake/cpu.cmake b/build/cmake/cpu.cmake
index bd13d03..489dbcb 100644
--- a/build/cmake/cpu.cmake
+++ b/build/cmake/cpu.cmake
@@ -14,11 +14,12 @@
set(AOM_ARCH_AARCH64 1)
set(RTCD_ARCH_ARM "yes")
- set(ARM64_FLAVORS "NEON;ARM_CRC32;NEON_DOTPROD;NEON_I8MM;SVE")
+ set(ARM64_FLAVORS "NEON;ARM_CRC32;NEON_DOTPROD;NEON_I8MM;SVE;SVE2")
set(AOM_ARM_CRC32_DEFAULT_FLAG "-march=armv8-a+crc")
set(AOM_NEON_DOTPROD_DEFAULT_FLAG "-march=armv8.2-a+dotprod")
set(AOM_NEON_I8MM_DEFAULT_FLAG "-march=armv8.2-a+dotprod+i8mm")
set(AOM_SVE_DEFAULT_FLAG "-march=armv8.2-a+dotprod+i8mm+sve")
+ set(AOM_SVE2_DEFAULT_FLAG "-march=armv9-a+sve2") # SVE2 is a v9-only feature
# Check that the compiler flag to enable each flavor is supported by the
# compiler. This may not be the case for new architecture features on old
@@ -45,8 +46,8 @@
endif()
endforeach()
- # SVE requires that the Neon-SVE bridge header is also available.
- if(ENABLE_SVE)
+ # SVE and SVE2 require that the Neon-SVE bridge header is also available.
+ if(ENABLE_SVE OR ENABLE_SVE2)
set(OLD_CMAKE_REQURED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AOM_SVE_FLAG}")
aom_check_source_compiles("arm_neon_sve_bridge_available" "
@@ -58,6 +59,7 @@
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQURED_FLAGS})
if(HAVE_SVE_HEADERS EQUAL 0)
set(ENABLE_SVE 0)
+ set(ENABLE_SVE2 0)
endif()
endif()
diff --git a/build/cmake/rtcd.pl b/build/cmake/rtcd.pl
index 1cf52f0..f4a7084 100755
--- a/build/cmake/rtcd.pl
+++ b/build/cmake/rtcd.pl
@@ -392,7 +392,7 @@
@ALL_ARCHS = filter(qw/neon/);
arm;
} elsif ($opts{arch} eq 'arm64' ) {
- @ALL_ARCHS = filter(qw/neon arm_crc32 neon_dotprod neon_i8mm sve/);
+ @ALL_ARCHS = filter(qw/neon arm_crc32 neon_dotprod neon_i8mm sve sve2/);
@REQUIRES = filter(qw/neon/);
&require(@REQUIRES);
arm;
diff --git a/test/test_libaom.cc b/test/test_libaom.cc
index fbd7f2e..26abbb0 100644
--- a/test/test_libaom.cc
+++ b/test/test_libaom.cc
@@ -62,6 +62,7 @@
if (!(caps & HAS_NEON_DOTPROD)) append_negative_gtest_filter("NEON_DOTPROD");
if (!(caps & HAS_NEON_I8MM)) append_negative_gtest_filter("NEON_I8MM");
if (!(caps & HAS_SVE)) append_negative_gtest_filter("SVE");
+ if (!(caps & HAS_SVE2)) append_negative_gtest_filter("SVE2");
#elif AOM_ARCH_ARM
const int caps = aom_arm_cpu_caps();
if (!(caps & HAS_NEON)) append_negative_gtest_filter("NEON");