From: Maodi Ma Date: Wed, 5 Nov 2025 02:35:46 +0000 (+0000) Subject: common: enable AVX512+VPCLMULQDQ for crc32c performance on x86 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e941e8931811c0c88c12db5a829390c01c8f9c40;p=ceph.git common: enable AVX512+VPCLMULQDQ for crc32c performance on x86 - Add crc32_iscsi_by16_10 in src/isa-l into candidates for ceph_crc32c - Add hardware capability check for AVX512 instr before register - Add NASM feature check to ensure compatibility and to enable AS_FEATURE_LEVEL in crc32_iscsi_by16_10.asm Signed-off-by: Maodi Ma --- diff --git a/cmake/modules/CheckNasm.cmake b/cmake/modules/CheckNasm.cmake index 8a45bf38bfbc..e7951f6f354a 100644 --- a/cmake/modules/CheckNasm.cmake +++ b/cmake/modules/CheckNasm.cmake @@ -1,4 +1,5 @@ -macro(check_nasm_support _object_format _support_x64 _support_x64_and_avx2 _support_x64_and_avx512) +macro(check_nasm_support _object_format _support_x64 _support_x64_and_avx2 _support_x64_and_avx512 + _support_x64_and_avx512_vpclmul) execute_process( COMMAND which nasm RESULT_VARIABLE no_nasm @@ -37,6 +38,16 @@ macro(check_nasm_support _object_format _support_x64 _support_x64_and_avx2 _supp if(NOT rt) set(${_support_x64_and_avx512} TRUE) endif() + execute_process(COMMAND nasm -D AS_FEATURE_LEVEL=10 -f ${object_format} + -i ${CMAKE_SOURCE_DIR}/src/isa-l/include/ + ${CMAKE_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_by16_10.asm + -o /dev/null + RESULT_VARIABLE rt + OUTPUT_QUIET + ERROR_QUIET) + if(NOT rt) + set(${_support_x64_and_avx512_vpclmul} TRUE) + endif() endif(${_support_x64}) endif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") endif(NOT no_nasm) @@ -44,6 +55,8 @@ macro(check_nasm_support _object_format _support_x64 _support_x64_and_avx2 _supp message(STATUS "Could NOT find nasm") elseif(NOT ${_support_x64}) message(STATUS "Found nasm: but x86_64 with x32 ABI is not supported") + elseif(${_support_x64_and_avx512_vpclmul}) + message(STATUS "Found nasm: best of best -- capable of assembling AVX512 & VPCLMUL") elseif(${_support_x64_and_avx512}) message(STATUS "Found nasm: best -- capable of assembling AVX512") elseif(${_support_x64_and_avx2}) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 925004b9685d..e13613869599 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -239,7 +239,8 @@ if(HAVE_INTEL) check_nasm_support(${object_format} HAVE_NASM_X64 HAVE_NASM_X64_AVX2 - HAVE_NASM_X64_AVX512) + HAVE_NASM_X64_AVX512 + HAVE_NASM_X64_AVX512_VPCLMUL) endif() endif() diff --git a/src/arch/intel.c b/src/arch/intel.c index 5c483dccbdd2..963d944c6434 100644 --- a/src/arch/intel.c +++ b/src/arch/intel.c @@ -16,6 +16,7 @@ #include "arch/probe.h" /* flags we export */ +int ceph_arch_intel_avx512_vpclmul = 0; int ceph_arch_intel_pclmul = 0; int ceph_arch_intel_sse42 = 0; int ceph_arch_intel_sse41 = 0; @@ -26,6 +27,7 @@ int ceph_arch_intel_aesni = 0; #ifdef __x86_64__ #include +#include /* http://en.wikipedia.org/wiki/CPUID#EAX.3D1:_Processor_Info_and_Feature_Bits */ @@ -35,7 +37,22 @@ int ceph_arch_intel_aesni = 0; #define CPUID_SSSE3 (1 << 9) #define CPUID_SSE3 (1) #define CPUID_SSE2 (1 << 26) -#define CPUID_AESNI (1 << 25) +#define CPUID_AESNI (1 << 25) +#define CPUID_OSXSAVE (1 << 27) + +/* SSE:[1] AVX:[2] Opmask:[5] ZMM_HI256:[6] ZMM16-31:[7]*/ +#define XCR0_AVX512 (0x000000E6ULL) + +/* Match ISA-L requirements since we call into it. May be stricter than necessary. */ +/* AVX512F:[16] DQ:[17] CD:[28] BW:[30] VL:[31] */ +#define CPUID7_0_AVX512_EBX (0xD0030000UL) +/* AVX512VBMI2:[6] GFNI:[8] VAES:[9] VPCLMULQDQ:[10] VNNI:[11] BITALG:[12] VPOPCNTDQ:[14] */ +#define CPUID7_0_AVX512_ECX (0x00005F40UL) + +__attribute__((__target__("xsave"))) +unsigned long long ceph_xgetbv(unsigned int xcr_index) { + return _xgetbv(xcr_index); +} int ceph_arch_intel_probe(void) { @@ -62,10 +79,25 @@ int ceph_arch_intel_probe(void) if ((edx & CPUID_SSE2) != 0) { ceph_arch_intel_sse2 = 1; } - if ((ecx & CPUID_AESNI) != 0) { - ceph_arch_intel_aesni = 1; - } + if ((ecx & CPUID_AESNI) != 0) { + ceph_arch_intel_aesni = 1; + } + /* + * AVX512 feature: check these conditions IN ORDER + * a. OSXSAVE/XGETBV is available + * b. AVX512 state is enabled in XCR0 + * c. CPUID leaf 7 exists + * d. required AVX512 features present + */ + unsigned int eax_7_0 = 0, ebx_7_0 = 0, ecx_7_0 = 0, edx_7_0 = 0; + if ((ecx & CPUID_OSXSAVE) && + ((ceph_xgetbv(0) & XCR0_AVX512) == XCR0_AVX512) && + (__get_cpuid_count(7, 0, &eax_7_0, &ebx_7_0, &ecx_7_0, &edx_7_0)) && + ((ebx_7_0 & CPUID7_0_AVX512_EBX) == CPUID7_0_AVX512_EBX) && + ((ecx_7_0 & CPUID7_0_AVX512_ECX) == CPUID7_0_AVX512_ECX)) { + ceph_arch_intel_avx512_vpclmul = 1; + } return 0; } diff --git a/src/arch/intel.h b/src/arch/intel.h index 3f4ae9482c04..48f51ce811b1 100644 --- a/src/arch/intel.h +++ b/src/arch/intel.h @@ -5,6 +5,7 @@ extern "C" { #endif +extern int ceph_arch_intel_avx512_vpclmul; /* true if we have AVX512+VPCLMUL features */ extern int ceph_arch_intel_pclmul; /* true if we have PCLMUL features */ extern int ceph_arch_intel_sse42; /* true if we have sse 4.2 features */ extern int ceph_arch_intel_sse41; /* true if we have sse 4.1 features */ diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 6c197b1b513e..fa6771c2357b 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -237,6 +237,10 @@ if(HAVE_INTEL) ${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_01.asm crc32c_intel_fast_zero_asm.s) endif(HAVE_NASM_X64) + if(HAVE_NASM_X64_AVX512_VPCLMUL) + set(CMAKE_ASM_FLAGS "-DAS_FEATURE_LEVEL=10 ${CMAKE_ASM_FLAGS}") + list(APPEND crc32_srcs ${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_by16_10.asm) + endif(HAVE_NASM_X64_AVX512_VPCLMUL) elseif(HAVE_POWER8) list(APPEND crc32_srcs crc32c_ppc.c) diff --git a/src/common/crc32c.cc b/src/common/crc32c.cc index a0878d4280cc..f6f29987c35b 100644 --- a/src/common/crc32c.cc +++ b/src/common/crc32c.cc @@ -27,6 +27,9 @@ ceph_crc32c_func_t ceph_choose_crc32(void) // if the CPU supports it, *and* the fast version is compiled in, // use that. #if defined(__i386__) || defined(__x86_64__) + if (ceph_arch_intel_avx512_vpclmul && ceph_crc32c_intel_fast_avx512_vpclmul_exists()) { + return ceph_crc32c_intel_fast_avx512_vpclmul; + } if (ceph_arch_intel_sse42 && ceph_crc32c_intel_fast_exists()) { if (ceph_arch_intel_pclmul) { return ceph_crc32c_intel_fast_pclmul; diff --git a/src/common/crc32c_intel_fast.c b/src/common/crc32c_intel_fast.c index 1ce4340589b6..922eb420955c 100644 --- a/src/common/crc32c_intel_fast.c +++ b/src/common/crc32c_intel_fast.c @@ -2,8 +2,39 @@ #include "common/crc32c_intel_baseline.h" extern unsigned int crc32_iscsi_01(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_01"); +extern unsigned int crc32_iscsi_by16_10(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_by16_10"); extern unsigned int crc32_iscsi_zero_00(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_zero_00"); +#ifdef HAVE_NASM_X64_AVX512_VPCLMUL + +uint32_t ceph_crc32c_intel_fast_avx512_vpclmul(uint32_t crc, unsigned char const *buffer, unsigned len) +{ + if (!buffer) + { + return crc32_iscsi_zero_00(buffer, len, crc); + } + return crc32_iscsi_by16_10(buffer, len, crc); +} + +int ceph_crc32c_intel_fast_avx512_vpclmul_exists(void) +{ + return 1; +} + +#else + +uint32_t ceph_crc32c_intel_fast_avx512_vpclmul(uint32_t crc, unsigned char const *buffer, unsigned len) +{ + return 0; +} + +int ceph_crc32c_intel_fast_avx512_vpclmul_exists(void) +{ + return 0; +} + +#endif + #ifdef HAVE_NASM_X64 uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len) diff --git a/src/common/crc32c_intel_fast.h b/src/common/crc32c_intel_fast.h index 81c6e494f0c3..8886a9ac5a17 100644 --- a/src/common/crc32c_intel_fast.h +++ b/src/common/crc32c_intel_fast.h @@ -7,14 +7,21 @@ extern "C" { /* is the fast version compiled in */ extern int ceph_crc32c_intel_fast_exists(void); +extern int ceph_crc32c_intel_fast_avx512_vpclmul_exists(void); #ifdef __x86_64__ +extern uint32_t ceph_crc32c_intel_fast_avx512_vpclmul(uint32_t crc, unsigned char const *buffer, unsigned len); extern uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len); extern uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len); #else +static inline uint32_t ceph_crc32c_intel_fast_avx512_vpclmul(uint32_t crc, unsigned char const *buffer, unsigned len) +{ + return 0; +} + static inline uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len) { return 0; diff --git a/src/include/config-h.in.cmake b/src/include/config-h.in.cmake index 4938ec419893..1ae58be6201b 100644 --- a/src/include/config-h.in.cmake +++ b/src/include/config-h.in.cmake @@ -256,6 +256,9 @@ /* nasm can also build the isa-l:avx512 */ #cmakedefine HAVE_NASM_X64_AVX512 +/* nasm can also build the isa-l:avx512 & vpclmulqdq */ +#cmakedefine HAVE_NASM_X64_AVX512_VPCLMUL + /* Define if the erasure code isa-l plugin is compiled */ #cmakedefine WITH_EC_ISA_PLUGIN