]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crc32c: add aarch64 optimized crc32c implementation 3604/head
authorYazen Ghannam <yazen.ghannam@linaro.org>
Wed, 21 Jan 2015 03:29:05 +0000 (21:29 -0600)
committerYazen Ghannam <yazen.ghannam@linaro.org>
Tue, 17 Mar 2015 18:56:52 +0000 (13:56 -0500)
ARMv8 defines a set of optional CRC32/CRC32C instructions.
This patch defines an optimized function that uses these
instructions when available rather than table-based lookup.
Optimized function based on a Hadoop patch by Ed Nevill.

Autotools updated to check for compiler support.
Optimized function is selected at runtime based on HWCAP_CRC32.
Added crc32c "performance" unit test and arch unit test.

Tested on AMD Seattle.
Passes all crc32c unit tests.
Unit test shows ~4x performance increase versus sctp.

Signed-off-by: Yazen Ghannam <yazen.ghannam@linaro.org>
Reviewed-by: Steve Capper <steve.capper@linaro.org>
configure.ac
m4/ax_arm.m4
src/arch/arm.c
src/arch/arm.h
src/common/Makefile.am
src/common/crc32c.cc
src/common/crc32c_aarch64.c [new file with mode: 0644]
src/common/crc32c_aarch64.h [new file with mode: 0644]
src/test/common/test_crc32c.cc
src/test/test_arch.cc

index 05f0cf9cd91c4f5b9a2f9acca56eb7a9348888ab..78e57a861c0f1e96c7cd8d3078dc8d626cf4a27c 100644 (file)
@@ -606,6 +606,7 @@ AC_LANG_POP([C++])
 # Find supported SIMD / NEON / SSE extensions supported by the compiler
 AX_ARM_FEATURES()
 AM_CONDITIONAL(HAVE_NEON, [ test "x$ax_cv_support_neon_ext" = "xyes"])
+AM_CONDITIONAL(HAVE_ARMV8_CRC, [ test "x$ax_cv_support_crc_ext" = "xyes"])
 AX_INTEL_FEATURES()
 AM_CONDITIONAL(HAVE_SSSE3, [ test "x$ax_cv_support_ssse3_ext" = "xyes"])
 AM_CONDITIONAL(HAVE_SSE4_PCLMUL, [ test "x$ax_cv_support_pclmuldq_ext" = "xyes"])
index 2ccc9a977f823151d60c15659b86ab6f8db9ea50..37ea0aaf1d16a41baba741cb6ba528fa151d96f1 100644 (file)
@@ -13,13 +13,27 @@ AC_DEFUN([AX_ARM_FEATURES],
       fi
     ;;
     aarch64*)
+      AX_CHECK_COMPILE_FLAG(-march=armv8-a, ax_cv_support_armv8=yes, [])
+      if test x"$ax_cv_support_armv8" = x"yes"; then
+        ARM_ARCH_FLAGS="-march=armv8-a"
+        ARM_DEFINE_FLAGS="-DARCH_AARCH64"
+      fi
       AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, ax_cv_support_neon_ext=yes, [])
       if test x"$ax_cv_support_neon_ext" = x"yes"; then
+        ARM_ARCH_FLAGS="$ARM_ARCH_FLAGS+simd"
+        ARM_DEFINE_FLAGS="$ARM_DEFINE_FLAGS -DARM_NEON"
         ARM_NEON_FLAGS="-march=armv8-a+simd -DARCH_AARCH64 -DARM_NEON"
-        AC_SUBST(ARM_NEON_FLAGS)
-        ARM_FLAGS="$ARM_FLAGS $ARM_NEON_FLAGS"
         AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
+        AC_SUBST(ARM_NEON_FLAGS)
+      fi
+      AX_CHECK_COMPILE_FLAG(-march=armv8-a+crc, ax_cv_support_crc_ext=yes, [])
+      if test x"$ax_cv_support_crc_ext" = x"yes"; then
+        ARM_ARCH_FLAGS="$ARM_ARCH_FLAGS+crc"
+        ARM_CRC_FLAGS="-march=armv8-a+crc -DARCH_AARCH64"
+        AC_DEFINE(HAVE_ARMV8_CRC,,[Support ARMv8 CRC instructions])
+        AC_SUBST(ARM_CRC_FLAGS)
       fi
+        ARM_FLAGS="$ARM_ARCH_FLAGS $ARM_DEFINE_FLAGS"
     ;;
   esac
 
index 93d079ade965acd8819f1452b66048359b91acdb..5a47e33492351fdaa456072182c6f851956f1d26 100644 (file)
@@ -2,6 +2,7 @@
 
 /* flags we export */
 int ceph_arch_neon = 0;
+int ceph_arch_aarch64_crc32 = 0;
 
 #include <stdio.h>
 
@@ -47,6 +48,7 @@ int ceph_arch_arm_probe(void)
        ceph_arch_neon = (get_hwcap() & HWCAP_NEON) == HWCAP_NEON;
 #elif __aarch64__ && __linux__
        ceph_arch_neon = (get_hwcap() & HWCAP_ASIMD) == HWCAP_ASIMD;
+       ceph_arch_aarch64_crc32 = (get_hwcap() & HWCAP_CRC32) == HWCAP_CRC32;
 #else
        if (0)
                get_hwcap();  // make compiler shut up
index f61343833d20f195401f6fecb16b295f1b5195b8..1659b2e94dec42cf6d1d9038f76dec7538d11863 100644 (file)
@@ -6,6 +6,7 @@ extern "C" {
 #endif
 
 extern int ceph_arch_neon;  /* true if we have ARM NEON or ASIMD abilities */
+extern int ceph_arch_aarch64_crc32;  /* true if we have AArch64 CRC32/CRC32C abilities */
 
 extern int ceph_arch_arm_probe(void);
 
index a0736ba821fa1ae3d70c0b5411c5d333068138cf..1c37ae46b6a9308a46b75da9918e86c41723d0ee 100644 (file)
@@ -118,11 +118,19 @@ endif
 LIBCOMMON_DEPS += libcommon_crc.la
 noinst_LTLIBRARIES += libcommon_crc.la
 
+if HAVE_ARMV8_CRC
+libcommon_crc_aarch64_la_SOURCES = common/crc32c_aarch64.c
+libcommon_crc_aarch64_la_CFLAGS = $(AM_CFLAGS) $(ARM_CRC_FLAGS)
+LIBCOMMON_DEPS += libcommon_crc_aarch64.la
+noinst_LTLIBRARIES += libcommon_crc_aarch64.la
+endif
+
 noinst_HEADERS += \
        common/bloom_filter.hpp \
        common/sctp_crc32.h \
        common/crc32c_intel_baseline.h \
-       common/crc32c_intel_fast.h
+       common/crc32c_intel_fast.h \
+       common/crc32c_aarch64.h
 
 
 # important; libmsg before libauth!
index e2e81a42f4579c7e082e9ef0118b5f199875a464..45432f5687ed7fd7fabc2f66980d384d1ff50610 100644 (file)
@@ -5,9 +5,11 @@
 
 #include "arch/probe.h"
 #include "arch/intel.h"
+#include "arch/arm.h"
 #include "common/sctp_crc32.h"
 #include "common/crc32c_intel_baseline.h"
 #include "common/crc32c_intel_fast.h"
+#include "common/crc32c_aarch64.h"
 
 /*
  * choose best implementation based on the CPU architecture.
@@ -24,6 +26,10 @@ ceph_crc32c_func_t ceph_choose_crc32(void)
     return ceph_crc32c_intel_fast;
   }
 
+  if (ceph_arch_aarch64_crc32){
+    return ceph_crc32c_aarch64;
+  }
+
   // default
   return ceph_crc32c_sctp;
 }
diff --git a/src/common/crc32c_aarch64.c b/src/common/crc32c_aarch64.c
new file mode 100644 (file)
index 0000000..d33827d
--- /dev/null
@@ -0,0 +1,47 @@
+#include "acconfig.h"
+#include "include/int_types.h"
+#include "common/crc32c_aarch64.h"
+
+#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+
+uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+       int64_t length = len;
+
+       if (!buffer) {
+
+               while ((length -= sizeof(uint64_t)) >= 0)
+                       CRC32CX(crc, 0);
+
+               /* The following is more efficient than the straight loop */
+               if (length & sizeof(uint32_t))
+                       CRC32CW(crc, 0);
+
+               if (length & sizeof(uint16_t))
+                       CRC32CH(crc, 0);
+
+               if (length & sizeof(uint8_t))
+                       CRC32CB(crc, 0);
+       } else {
+               while ((length -= sizeof(uint64_t)) >= 0) {
+                       CRC32CX(crc, *(uint64_t *)buffer);
+                       buffer += sizeof(uint64_t);
+               }
+
+               /* The following is more efficient than the straight loop */
+               if (length & sizeof(uint32_t)) {
+                       CRC32CW(crc, *(uint32_t *)buffer);
+                       buffer += sizeof(uint32_t);
+               }
+               if (length & sizeof(uint16_t)) {
+                       CRC32CH(crc, *(uint16_t *)buffer);
+                       buffer += sizeof(uint16_t);
+               }
+               if (length & sizeof(uint8_t))
+                       CRC32CB(crc, *buffer);
+       }
+       return crc;
+}
diff --git a/src/common/crc32c_aarch64.h b/src/common/crc32c_aarch64.h
new file mode 100644 (file)
index 0000000..3727f54
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef CEPH_COMMON_CRC32C_AARCH64_H
+#define CEPH_COMMON_CRC32C_AARCH64_H
+
+#include "arch/arm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_ARMV8_CRC
+
+extern uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len);
+
+#else
+
+static inline uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+       return 0;
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index b4297c610771b13bfd29052fc62dc073c1f1b135..a31161620c92d9343fc5f0bc2e099d83234443e2 100644 (file)
@@ -13,6 +13,7 @@
 
 #include "common/sctp_crc32.h"
 #include "common/crc32c_intel_baseline.h"
+#include "common/crc32c_aarch64.h"
 
 TEST(Crc32c, Small) {
   const char *a = "foo bar baz";
@@ -80,6 +81,15 @@ TEST(Crc32c, Performance) {
     std::cout << "intel baseline = " << rate << " MB/sec" << std::endl;
     ASSERT_EQ(261108528u, val);
   }
+  if (ceph_arch_aarch64_crc32) // Skip if CRC32C instructions are not defined.
+  {
+    utime_t start = ceph_clock_now(NULL);
+    unsigned val = ceph_crc32c_aarch64(0, (unsigned char *)a, len);
+    utime_t end = ceph_clock_now(NULL);
+    float rate = (float)len / (float)(1024*1024) / (float)(end - start);
+    std::cout << "aarch64 = " << rate << " MB/sec" << std::endl;
+    ASSERT_EQ(261108528u, val);
+  }
 
 }
 
index b129262af277cb2abd1f618958a100d33e9d3e6d..e2c225b20ac6ae7580c54ebd00b815d3aaa43c4a 100644 (file)
@@ -47,9 +47,20 @@ TEST(Arch, all)
 
   int expected;
 
+#if (__arm__ || __aarch64__)
+
   expected = (strstr(flags, " neon ") || strstr(flags, " asimd ")) ? 1 : 0;
   EXPECT_EQ(expected, ceph_arch_neon);
 
+#endif
+#if (__aarch64__)
+
+  expected = strstr(flags, " crc32 ") ? 1 : 0;
+  EXPECT_EQ(expected, ceph_arch_aarch64_crc32);
+
+#endif
+#if (__x86_64__)
+
   expected = strstr(flags, " pclmulqdq ") ? 1 : 0;
   EXPECT_EQ(expected, ceph_arch_intel_pclmul);
 
@@ -67,6 +78,9 @@ TEST(Arch, all)
 
   expected = strstr(flags, " sse2 ") ? 1 : 0;
   EXPECT_EQ(expected, ceph_arch_intel_sse2);
+
+#endif
+
 #endif
 }