-macro(check_nasm_support _object_format _support_x64 _support_x64_and_avx2 _support_x64_and_avx512)
+macro(check_nasm_support _object_format _support_x64 _support_x64_and_avx2 _support_x64_and_avx512
+ _support_x64_and_avx512_vpclmul)
execute_process(
COMMAND which nasm
RESULT_VARIABLE no_nasm
if(NOT rt)
set(${_support_x64_and_avx512} TRUE)
endif()
+ execute_process(COMMAND nasm -D AS_FEATURE_LEVEL=10 -f ${object_format}
+ -i ${CMAKE_SOURCE_DIR}/src/isa-l/include/
+ ${CMAKE_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_by16_10.asm
+ -o /dev/null
+ RESULT_VARIABLE rt
+ OUTPUT_QUIET
+ ERROR_QUIET)
+ if(NOT rt)
+ set(${_support_x64_and_avx512_vpclmul} TRUE)
+ endif()
endif(${_support_x64})
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
endif(NOT no_nasm)
message(STATUS "Could NOT find nasm")
elseif(NOT ${_support_x64})
message(STATUS "Found nasm: but x86_64 with x32 ABI is not supported")
+ elseif(${_support_x64_and_avx512_vpclmul})
+ message(STATUS "Found nasm: best of best -- capable of assembling AVX512 & VPCLMUL")
elseif(${_support_x64_and_avx512})
message(STATUS "Found nasm: best -- capable of assembling AVX512")
elseif(${_support_x64_and_avx2})
check_nasm_support(${object_format}
HAVE_NASM_X64
HAVE_NASM_X64_AVX2
- HAVE_NASM_X64_AVX512)
+ HAVE_NASM_X64_AVX512
+ HAVE_NASM_X64_AVX512_VPCLMUL)
endif()
endif()
#include "arch/probe.h"
/* flags we export */
+int ceph_arch_intel_avx512_vpclmul = 0;
int ceph_arch_intel_pclmul = 0;
int ceph_arch_intel_sse42 = 0;
int ceph_arch_intel_sse41 = 0;
#ifdef __x86_64__
#include <cpuid.h>
+#include <x86intrin.h>
/* http://en.wikipedia.org/wiki/CPUID#EAX.3D1:_Processor_Info_and_Feature_Bits */
#define CPUID_SSSE3 (1 << 9)
#define CPUID_SSE3 (1)
#define CPUID_SSE2 (1 << 26)
-#define CPUID_AESNI (1 << 25)
+#define CPUID_AESNI (1 << 25)
+#define CPUID_OSXSAVE (1 << 27)
+
+/* SSE:[1] AVX:[2] Opmask:[5] ZMM_HI256:[6] ZMM16-31:[7]*/
+#define XCR0_AVX512 (0x000000E6ULL)
+
+/* Match ISA-L requirements since we call into it. May be stricter than necessary. */
+/* AVX512F:[16] DQ:[17] CD:[28] BW:[30] VL:[31] */
+#define CPUID7_0_AVX512_EBX (0xD0030000UL)
+/* AVX512VBMI2:[6] GFNI:[8] VAES:[9] VPCLMULQDQ:[10] VNNI:[11] BITALG:[12] VPOPCNTDQ:[14] */
+#define CPUID7_0_AVX512_ECX (0x00005F40UL)
+
+__attribute__((__target__("xsave")))
+unsigned long long ceph_xgetbv(unsigned int xcr_index) {
+ return _xgetbv(xcr_index);
+}
int ceph_arch_intel_probe(void)
{
if ((edx & CPUID_SSE2) != 0) {
ceph_arch_intel_sse2 = 1;
}
- if ((ecx & CPUID_AESNI) != 0) {
- ceph_arch_intel_aesni = 1;
- }
+ if ((ecx & CPUID_AESNI) != 0) {
+ ceph_arch_intel_aesni = 1;
+ }
+ /*
+ * AVX512 feature: check these conditions IN ORDER
+ * a. OSXSAVE/XGETBV is available
+ * b. AVX512 state is enabled in XCR0
+ * c. CPUID leaf 7 exists
+ * d. required AVX512 features present
+ */
+ unsigned int eax_7_0 = 0, ebx_7_0 = 0, ecx_7_0 = 0, edx_7_0 = 0;
+ if ((ecx & CPUID_OSXSAVE) &&
+ ((ceph_xgetbv(0) & XCR0_AVX512) == XCR0_AVX512) &&
+ (__get_cpuid_count(7, 0, &eax_7_0, &ebx_7_0, &ecx_7_0, &edx_7_0)) &&
+ ((ebx_7_0 & CPUID7_0_AVX512_EBX) == CPUID7_0_AVX512_EBX) &&
+ ((ecx_7_0 & CPUID7_0_AVX512_ECX) == CPUID7_0_AVX512_ECX)) {
+ ceph_arch_intel_avx512_vpclmul = 1;
+ }
return 0;
}
extern "C" {
#endif
+extern int ceph_arch_intel_avx512_vpclmul; /* true if we have AVX512+VPCLMUL features */
extern int ceph_arch_intel_pclmul; /* true if we have PCLMUL features */
extern int ceph_arch_intel_sse42; /* true if we have sse 4.2 features */
extern int ceph_arch_intel_sse41; /* true if we have sse 4.1 features */
${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_01.asm
crc32c_intel_fast_zero_asm.s)
endif(HAVE_NASM_X64)
+ if(HAVE_NASM_X64_AVX512_VPCLMUL)
+ set(CMAKE_ASM_FLAGS "-DAS_FEATURE_LEVEL=10 ${CMAKE_ASM_FLAGS}")
+ list(APPEND crc32_srcs ${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_by16_10.asm)
+ endif(HAVE_NASM_X64_AVX512_VPCLMUL)
elseif(HAVE_POWER8)
list(APPEND crc32_srcs
crc32c_ppc.c)
// if the CPU supports it, *and* the fast version is compiled in,
// use that.
#if defined(__i386__) || defined(__x86_64__)
+ if (ceph_arch_intel_avx512_vpclmul && ceph_crc32c_intel_fast_avx512_vpclmul_exists()) {
+ return ceph_crc32c_intel_fast_avx512_vpclmul;
+ }
if (ceph_arch_intel_sse42 && ceph_crc32c_intel_fast_exists()) {
if (ceph_arch_intel_pclmul) {
return ceph_crc32c_intel_fast_pclmul;
#include "common/crc32c_intel_baseline.h"
extern unsigned int crc32_iscsi_01(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_01");
+extern unsigned int crc32_iscsi_by16_10(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_by16_10");
extern unsigned int crc32_iscsi_zero_00(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_zero_00");
+#ifdef HAVE_NASM_X64_AVX512_VPCLMUL
+
+uint32_t ceph_crc32c_intel_fast_avx512_vpclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ if (!buffer)
+ {
+ return crc32_iscsi_zero_00(buffer, len, crc);
+ }
+ return crc32_iscsi_by16_10(buffer, len, crc);
+}
+
+int ceph_crc32c_intel_fast_avx512_vpclmul_exists(void)
+{
+ return 1;
+}
+
+#else
+
+uint32_t ceph_crc32c_intel_fast_avx512_vpclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ return 0;
+}
+
+int ceph_crc32c_intel_fast_avx512_vpclmul_exists(void)
+{
+ return 0;
+}
+
+#endif
+
#ifdef HAVE_NASM_X64
uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
/* is the fast version compiled in */
extern int ceph_crc32c_intel_fast_exists(void);
+extern int ceph_crc32c_intel_fast_avx512_vpclmul_exists(void);
#ifdef __x86_64__
+extern uint32_t ceph_crc32c_intel_fast_avx512_vpclmul(uint32_t crc, unsigned char const *buffer, unsigned len);
extern uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len);
extern uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len);
#else
+static inline uint32_t ceph_crc32c_intel_fast_avx512_vpclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ return 0;
+}
+
static inline uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
{
return 0;
/* nasm can also build the isa-l:avx512 */
#cmakedefine HAVE_NASM_X64_AVX512
+/* nasm can also build the isa-l:avx512 & vpclmulqdq */
+#cmakedefine HAVE_NASM_X64_AVX512_VPCLMUL
+
/* Define if the erasure code isa-l plugin is compiled */
#cmakedefine WITH_EC_ISA_PLUGIN