ARMv8 defines a set of optional CRC32/CRC32C instructions.
This patch defines an optimized function that uses these
instructions when available rather than table-based lookup.
Optimized function based on a Hadoop patch by Ed Nevill.
Autotools updated to check for compiler support.
Optimized function is selected at runtime based on HWCAP_CRC32.
Added crc32c "performance" unit test and arch unit test.
Tested on AMD Seattle.
Passes all crc32c unit tests.
Unit test shows ~4x performance increase versus sctp.
Signed-off-by: Yazen Ghannam <yazen.ghannam@linaro.org>
Reviewed-by: Steve Capper <steve.capper@linaro.org>
# Find supported SIMD / NEON / SSE extensions supported by the compiler
AX_ARM_FEATURES()
AM_CONDITIONAL(HAVE_NEON, [ test "x$ax_cv_support_neon_ext" = "xyes"])
+AM_CONDITIONAL(HAVE_ARMV8_CRC, [ test "x$ax_cv_support_crc_ext" = "xyes"])
AX_INTEL_FEATURES()
AM_CONDITIONAL(HAVE_SSSE3, [ test "x$ax_cv_support_ssse3_ext" = "xyes"])
AM_CONDITIONAL(HAVE_SSE4_PCLMUL, [ test "x$ax_cv_support_pclmuldq_ext" = "xyes"])
fi
;;
aarch64*)
+ AX_CHECK_COMPILE_FLAG(-march=armv8-a, ax_cv_support_armv8=yes, [])
+ if test x"$ax_cv_support_armv8" = x"yes"; then
+ ARM_ARCH_FLAGS="-march=armv8-a"
+ ARM_DEFINE_FLAGS="-DARCH_AARCH64"
+ fi
AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, ax_cv_support_neon_ext=yes, [])
if test x"$ax_cv_support_neon_ext" = x"yes"; then
+ ARM_ARCH_FLAGS="$ARM_ARCH_FLAGS+simd"
+ ARM_DEFINE_FLAGS="$ARM_DEFINE_FLAGS -DARM_NEON"
ARM_NEON_FLAGS="-march=armv8-a+simd -DARCH_AARCH64 -DARM_NEON"
- AC_SUBST(ARM_NEON_FLAGS)
- ARM_FLAGS="$ARM_FLAGS $ARM_NEON_FLAGS"
AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
+ AC_SUBST(ARM_NEON_FLAGS)
+ fi
+ AX_CHECK_COMPILE_FLAG(-march=armv8-a+crc, ax_cv_support_crc_ext=yes, [])
+ if test x"$ax_cv_support_crc_ext" = x"yes"; then
+ ARM_ARCH_FLAGS="$ARM_ARCH_FLAGS+crc"
+ ARM_CRC_FLAGS="-march=armv8-a+crc -DARCH_AARCH64"
+ AC_DEFINE(HAVE_ARMV8_CRC,,[Support ARMv8 CRC instructions])
+ AC_SUBST(ARM_CRC_FLAGS)
fi
+ ARM_FLAGS="$ARM_ARCH_FLAGS $ARM_DEFINE_FLAGS"
;;
esac
/* flags we export */
int ceph_arch_neon = 0;
+int ceph_arch_aarch64_crc32 = 0;
#include <stdio.h>
ceph_arch_neon = (get_hwcap() & HWCAP_NEON) == HWCAP_NEON;
#elif __aarch64__ && __linux__
ceph_arch_neon = (get_hwcap() & HWCAP_ASIMD) == HWCAP_ASIMD;
+ ceph_arch_aarch64_crc32 = (get_hwcap() & HWCAP_CRC32) == HWCAP_CRC32;
#else
if (0)
get_hwcap(); // make compiler shut up
#endif
extern int ceph_arch_neon; /* true if we have ARM NEON or ASIMD abilities */
+extern int ceph_arch_aarch64_crc32; /* true if we have AArch64 CRC32/CRC32C abilities */
extern int ceph_arch_arm_probe(void);
LIBCOMMON_DEPS += libcommon_crc.la
noinst_LTLIBRARIES += libcommon_crc.la
+if HAVE_ARMV8_CRC
+libcommon_crc_aarch64_la_SOURCES = common/crc32c_aarch64.c
+libcommon_crc_aarch64_la_CFLAGS = $(AM_CFLAGS) $(ARM_CRC_FLAGS)
+LIBCOMMON_DEPS += libcommon_crc_aarch64.la
+noinst_LTLIBRARIES += libcommon_crc_aarch64.la
+endif
+
noinst_HEADERS += \
common/bloom_filter.hpp \
common/sctp_crc32.h \
common/crc32c_intel_baseline.h \
- common/crc32c_intel_fast.h
+ common/crc32c_intel_fast.h \
+ common/crc32c_aarch64.h
# important; libmsg before libauth!
#include "arch/probe.h"
#include "arch/intel.h"
+#include "arch/arm.h"
#include "common/sctp_crc32.h"
#include "common/crc32c_intel_baseline.h"
#include "common/crc32c_intel_fast.h"
+#include "common/crc32c_aarch64.h"
/*
* choose best implementation based on the CPU architecture.
return ceph_crc32c_intel_fast;
}
+ if (ceph_arch_aarch64_crc32){
+ return ceph_crc32c_aarch64;
+ }
+
// default
return ceph_crc32c_sctp;
}
--- /dev/null
+#include "acconfig.h"
+#include "include/int_types.h"
+#include "common/crc32c_aarch64.h"
+
+#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+
+uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ int64_t length = len;
+
+ if (!buffer) {
+
+ while ((length -= sizeof(uint64_t)) >= 0)
+ CRC32CX(crc, 0);
+
+ /* The following is more efficient than the straight loop */
+ if (length & sizeof(uint32_t))
+ CRC32CW(crc, 0);
+
+ if (length & sizeof(uint16_t))
+ CRC32CH(crc, 0);
+
+ if (length & sizeof(uint8_t))
+ CRC32CB(crc, 0);
+ } else {
+ while ((length -= sizeof(uint64_t)) >= 0) {
+ CRC32CX(crc, *(uint64_t *)buffer);
+ buffer += sizeof(uint64_t);
+ }
+
+ /* The following is more efficient than the straight loop */
+ if (length & sizeof(uint32_t)) {
+ CRC32CW(crc, *(uint32_t *)buffer);
+ buffer += sizeof(uint32_t);
+ }
+ if (length & sizeof(uint16_t)) {
+ CRC32CH(crc, *(uint16_t *)buffer);
+ buffer += sizeof(uint16_t);
+ }
+ if (length & sizeof(uint8_t))
+ CRC32CB(crc, *buffer);
+ }
+ return crc;
+}
--- /dev/null
+#ifndef CEPH_COMMON_CRC32C_AARCH64_H
+#define CEPH_COMMON_CRC32C_AARCH64_H
+
+#include "arch/arm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_ARMV8_CRC
+
+extern uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len);
+
+#else
+
+static inline uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ return 0;
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
#include "common/sctp_crc32.h"
#include "common/crc32c_intel_baseline.h"
+#include "common/crc32c_aarch64.h"
TEST(Crc32c, Small) {
const char *a = "foo bar baz";
std::cout << "intel baseline = " << rate << " MB/sec" << std::endl;
ASSERT_EQ(261108528u, val);
}
+ if (ceph_arch_aarch64_crc32) // Skip if CRC32C instructions are not defined.
+ {
+ utime_t start = ceph_clock_now(NULL);
+ unsigned val = ceph_crc32c_aarch64(0, (unsigned char *)a, len);
+ utime_t end = ceph_clock_now(NULL);
+ float rate = (float)len / (float)(1024*1024) / (float)(end - start);
+ std::cout << "aarch64 = " << rate << " MB/sec" << std::endl;
+ ASSERT_EQ(261108528u, val);
+ }
}
int expected;
+#if (__arm__ || __aarch64__)
+
expected = (strstr(flags, " neon ") || strstr(flags, " asimd ")) ? 1 : 0;
EXPECT_EQ(expected, ceph_arch_neon);
+#endif
+#if (__aarch64__)
+
+ expected = strstr(flags, " crc32 ") ? 1 : 0;
+ EXPECT_EQ(expected, ceph_arch_aarch64_crc32);
+
+#endif
+#if (__x86_64__)
+
expected = strstr(flags, " pclmulqdq ") ? 1 : 0;
EXPECT_EQ(expected, ceph_arch_intel_pclmul);
expected = strstr(flags, " sse2 ") ? 1 : 0;
EXPECT_EQ(expected, ceph_arch_intel_sse2);
+
+#endif
+
#endif
}