Use dedicated Arm hardware instructions to compute CRC32C hash
Add an implementation of the CRC32C hash function that uses the
dedicated CRC32C instructions in the Armv8-A architecture.
The Arm CRC32C instructions are optional in Armv8.0-A but mandatory
in all architecture revisions starting from Armv8.1-A. For the time
being, we detect the presence of the CRC32C instructions at compile
time but run-time feature detection ought to be implemented for
platforms like Android where an architecture baseline of Armv8.1-A
cannot be assumed.
Change-Id: I30894ec77dd79c87eeafcdefc9e4b061cd5106d6
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 72dc3e7..fc99b25 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -367,6 +367,9 @@
"${AOM_ROOT}/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c"
"${AOM_ROOT}/av1/encoder/arm/neon/wedge_utils_neon.c")
+list(APPEND AOM_AV1_ENCODER_INTRIN_CRC32
+ "${AOM_ROOT}/av1/encoder/arm/crc32/hash_crc32.c")
+
list(APPEND AOM_AV1_ENCODER_INTRIN_MSA
"${AOM_ROOT}/av1/encoder/mips/msa/error_msa.c"
"${AOM_ROOT}/av1/encoder/mips/msa/fdct4x4_msa.c"
@@ -632,6 +635,16 @@
"AOM_AV1_ENCODER_INTRIN_NEON")
endif()
endif()
+
+ if(HAVE_ARM_CRC32)
+ if(CONFIG_AV1_ENCODER)
+ if(AOM_AV1_ENCODER_INTRIN_CRC32)
+ add_intrinsics_object_library("${AOM_ARM_CRC32_FLAG}" "crc32"
+ "aom_av1_encoder"
+ "AOM_AV1_ENCODER_INTRIN_CRC32")
+ endif()
+ endif()
+ endif()
endif()
if(HAVE_VSX)
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 97912be..fb650a8 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -450,7 +450,7 @@
# hash
add_proto qw/uint32_t av1_get_crc32c_value/, "void *crc_calculator, uint8_t *p, size_t length";
- specialize qw/av1_get_crc32c_value sse4_2/;
+ specialize qw/av1_get_crc32c_value sse4_2 arm_crc32/;
if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
add_proto qw/void av1_compute_stats/, "int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, int64_t *M, int64_t *H, int use_downsampled_wiener_stats";
diff --git a/av1/encoder/arm/crc32/hash_crc32.c b/av1/encoder/arm/crc32/hash_crc32.c
new file mode 100644
index 0000000..dd8685d
--- /dev/null
+++ b/av1/encoder/arm/crc32/hash_crc32.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <arm_acle.h>
+
+#define CRC_LOOP(op, crc, type, buf, len) \
+ while ((len) >= sizeof(type)) { \
+ (crc) = op((crc), *(type *)(buf)); \
+ (len) -= sizeof(type); \
+ buf += sizeof(type); \
+ }
+
+#define CRC_SINGLE(op, crc, type, buf, len) \
+ if ((len) >= sizeof(type)) { \
+ (crc) = op((crc), *(type *)(buf)); \
+ (len) -= sizeof(type); \
+ buf += sizeof(type); \
+ }
+
+/* Return 32-bit CRC for the input buffer.
+ * Polynomial is 0x1EDC6F41.
+ */
+
+uint32_t av1_get_crc32c_value_arm_crc32(void *crc_calculator, uint8_t *p,
+ size_t len) {
+ (void)crc_calculator;
+ const uint8_t *buf = p;
+ uint32_t crc = 0xFFFFFFFF;
+
+#if !defined(__aarch64__)
+ // Align input to 8-byte boundary (only necessary for 32-bit builds.)
+ while (len && ((uintptr_t)buf & 7)) {
+ crc = __crc32cb(crc, *buf++);
+ len--;
+ }
+#endif
+
+ CRC_LOOP(__crc32cd, crc, uint64_t, buf, len)
+ CRC_SINGLE(__crc32cw, crc, uint32_t, buf, len)
+ CRC_SINGLE(__crc32ch, crc, uint16_t, buf, len)
+ CRC_SINGLE(__crc32cb, crc, uint8_t, buf, len)
+
+ return ~crc;
+}
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 84159fb..b8e68f8 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -31,6 +31,7 @@
# ARM feature flags.
set_aom_detect_var(HAVE_NEON 0 "Enables NEON intrinsics optimizations.")
+set_aom_detect_var(HAVE_ARM_CRC32 0 "Enables Arm CRC32 optimizations.")
# MIPS feature flags.
set_aom_detect_var(HAVE_DSPR2 0 "Enables DSPR2 optimizations.")
diff --git a/build/cmake/cpu.cmake b/build/cmake/cpu.cmake
index ef2d755..c818ec6 100644
--- a/build/cmake/cpu.cmake
+++ b/build/cmake/cpu.cmake
@@ -20,6 +20,19 @@
set(HAVE_NEON 0)
set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-neon)
endif()
+
+ check_c_source_compiles("
+ #if !defined(__ARM_FEATURE_CRC32) || __ARM_FEATURE_CRC32 != 1
+ #error \"CRC32 is unavailable.\"
+ #endif
+ int main(void) { return 0; }" HAVE_CRC32)
+ if(HAVE_CRC32)
+ set(HAVE_ARM_CRC32 1)
+ else()
+ set(HAVE_ARM_CRC32 0)
+ set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-arm_crc32)
+ endif()
+
elseif("${AOM_TARGET_CPU}" MATCHES "^mips")
set(ARCH_MIPS 1)
set(RTCD_ARCH_MIPS "yes")
diff --git a/build/cmake/rtcd.pl b/build/cmake/rtcd.pl
index e9f75dd..7f961ca 100755
--- a/build/cmake/rtcd.pl
+++ b/build/cmake/rtcd.pl
@@ -432,8 +432,8 @@
@ALL_ARCHS = filter(qw/neon/);
arm;
} elsif ($opts{arch} eq 'arm64' ) {
- @ALL_ARCHS = filter(qw/neon/);
- &require("neon");
+ @ALL_ARCHS = filter(qw/neon arm_crc32/);
+ &require(@ALL_ARCHS);
arm;
} elsif ($opts{arch} eq 'ppc') {
@ALL_ARCHS = filter(qw/vsx/);
diff --git a/test/hash_test.cc b/test/hash_test.cc
index 5ce0fbb..61e0b51 100644
--- a/test/hash_test.cc
+++ b/test/hash_test.cc
@@ -131,4 +131,11 @@
::testing::ValuesIn(kValidBlockSize)));
#endif
+#if HAVE_ARM_CRC32
+INSTANTIATE_TEST_SUITE_P(
+ ARM_CRC32, AV1Crc32cHashTest,
+ ::testing::Combine(::testing::Values(&av1_get_crc32c_value_arm_crc32),
+ ::testing::ValuesIn(kValidBlockSize)));
+#endif
+
} // namespace
diff --git a/test/test.cmake b/test/test.cmake
index ac8681d..ea99a3a 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -322,7 +322,7 @@
endif()
- if(HAVE_SSE4_2)
+ if(HAVE_SSE4_2 OR HAVE_ARM_CRC32)
list(APPEND AOM_UNIT_TEST_ENCODER_SOURCES "${AOM_ROOT}/test/hash_test.cc")
endif()
@@ -498,6 +498,10 @@
add_intrinsics_source_to_target("${AOM_NEON_INTRIN_FLAG}" "test_libaom"
"AOM_UNIT_TEST_COMMON_INTRIN_NEON")
endif()
+ if(HAVE_ARM_CRC32)
+ add_intrinsics_source_to_target("${AOM_ARM_CRC32_FLAG}" "test_libaom"
+ "AOM_UNIT_TEST_COMMON_INTRIN_CRC32")
+ endif()
if(ENABLE_TESTDATA)
make_test_data_lists("${AOM_UNIT_TEST_DATA_LIST_FILE}" test_files