From: lvshuo2016 Date: Wed, 22 Oct 2025 10:09:52 +0000 (+0800) Subject: common,arch,cmake: add RISC-V crc32c support X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=01dc12ad5651f0957a0b46cd311a7ab8e7100d86;p=ceph.git common,arch,cmake: add RISC-V crc32c support This adds hardware-accelerated crc32c support for the RISC-V architecture. It includes the feature implementation, necessary CMake configuration, and plumbing in src/arch/riscv.c to correctly detect and select the optimized instructions. Signed-off-by: lvshuo2016 --- diff --git a/cmake/modules/SIMDExt.cmake b/cmake/modules/SIMDExt.cmake index 35b52e64200..d72998b1ac6 100644 --- a/cmake/modules/SIMDExt.cmake +++ b/cmake/modules/SIMDExt.cmake @@ -109,6 +109,26 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(powerpc|ppc)") if(HAVE_POWER8) message(STATUS " HAVE_POWER8 yes") endif() +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64|RISCV64") + set(HAVE_RISCV 1) + include(CheckCCompilerFlag) + + CHECK_C_COMPILER_FLAG("-march=rv64gcv_zbc_zvbc" HAVE_RISCV_ZVBC) + if(HAVE_RISCV_ZVBC) + set(SIMD_COMPILE_FLAGS "${SIMD_COMPILE_FLAGS} -march=rv64gcv_zbc_zvbc") + set(HAVE_RISCV_RVV TRUE) + set(HAVE_RISCV_ZVBC TRUE) + message(STATUS " RISC-V Extension: Vector + Zbc + Zvbc detected (Best for CRC32)") + else() + CHECK_C_COMPILER_FLAG("-march=rv64gcv" HAVE_RISCV_RVV_ONLY) + if(HAVE_RISCV_RVV_ONLY) + set(SIMD_COMPILE_FLAGS "${SIMD_COMPILE_FLAGS} -march=rv64gcv") + set(HAVE_RISCV_RVV TRUE) + message(STATUS " RISC-V Extension: Standard Vector (rv64gcv) detected") + else() + message(WARNING " RISC-V Vector extension NOT detected by compiler.") + endif() + endif() elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(s390x|S390X|s390|S390)") set(HAVE_S390X 1) message(STATUS " we are s390x") diff --git a/src/arch/CMakeLists.txt b/src/arch/CMakeLists.txt index e95d9bbb81f..35c67b81210 100644 --- a/src/arch/CMakeLists.txt +++ b/src/arch/CMakeLists.txt @@ -7,6 +7,8 @@ elseif(HAVE_INTEL) list(APPEND arch_srcs intel.c) elseif(HAVE_PPC64LE OR HAVE_PPC64 OR HAVE_PPC) list(APPEND arch_srcs ppc.c) +elseif(HAVE_RISCV) + list(APPEND arch_srcs riscv.c) elseif(HAVE_S390X) list(APPEND arch_srcs s390x.c) endif() diff --git a/src/arch/probe.cc b/src/arch/probe.cc index 2189fd68dfb..fe050921cd3 100644 --- a/src/arch/probe.cc +++ b/src/arch/probe.cc @@ -7,6 +7,7 @@ #include "arch/arm.h" #include "arch/ppc.h" #include "arch/s390x.h" +#include "arch/riscv.h" int ceph_arch_probe(void) { @@ -18,6 +19,8 @@ int ceph_arch_probe(void) ceph_arch_arm_probe(); #elif defined(__powerpc__) || defined(__ppc__) ceph_arch_ppc_probe(); +#elif defined(__riscv) + ceph_arch_riscv_probe(); #elif defined(__s390__) ceph_arch_s390x_probe(); #endif diff --git a/src/arch/riscv.c b/src/arch/riscv.c new file mode 100644 index 00000000000..d7c306c6e16 --- /dev/null +++ b/src/arch/riscv.c @@ -0,0 +1,36 @@ +/** + * Runtime detection of RISC-V vector crypto support. + */ + +#include +#include +#include + +int ceph_arch_riscv_zbc = 0; +int ceph_arch_riscv_zvbc = 0; + +#ifndef RISCV_HWPROBE_EXT_ZBC +#define RISCV_HWPROBE_EXT_ZBC (1ULL << 15) +#endif + +#ifndef RISCV_HWPROBE_EXT_ZVBC +#define RISCV_HWPROBE_EXT_ZVBC (1ULL << 20) +#endif + +static int do_hwprobe(struct riscv_hwprobe *pairs, size_t count) +{ + return syscall(__NR_riscv_hwprobe, pairs, count, 0, NULL, 0); +} + +void ceph_arch_riscv_probe(void) +{ + struct riscv_hwprobe pairs[] = { + { .key = RISCV_HWPROBE_KEY_IMA_EXT_0 }, + }; + + if (do_hwprobe(pairs, 1) == 0) { + unsigned long long ext = pairs[0].value; + ceph_arch_riscv_zbc = (ext & RISCV_HWPROBE_EXT_ZBC); + ceph_arch_riscv_zvbc = (ext & RISCV_HWPROBE_EXT_ZVBC); + } +} diff --git a/src/arch/riscv.h b/src/arch/riscv.h new file mode 100644 index 00000000000..2f90cb24c1d --- /dev/null +++ b/src/arch/riscv.h @@ -0,0 +1,26 @@ +/* + * Copyright 2025 sanechips Corporation + * + * This is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License version 2.1, as published by + * the Free Software Foundation. See file COPYING. + */ + +#ifndef CEPH_ARCH_RISCV_H +#define CEPH_ARCH_RISCV_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern int ceph_arch_riscv_zbc; +extern int ceph_arch_riscv_zvbc; + +extern void ceph_arch_riscv_probe(void); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 22ea30bf88f..f25625f389d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -255,6 +255,8 @@ elseif(HAVE_S390X) crc32c_s390x.c crc32c_s390x_le-vx.S ) +elseif(HAVE_RISCV_ZVBC) + list(APPEND crc32_srcs crc32c_riscv.c) endif(HAVE_INTEL) add_library(crc32 OBJECT ${crc32_srcs}) @@ -263,6 +265,9 @@ if(HAVE_ARMV8_CRC) set_target_properties(crc32 PROPERTIES COMPILE_FLAGS "${CMAKE_C_FLAGS} ${ARMV8_CRC_COMPILE_FLAGS}") endif() +if(HAVE_RISCV) + set_target_properties(crc32 PROPERTIES COMPILE_FLAGS "${CMAKE_C_FLAGS} ${SIMD_COMPILE_FLAGS}") +endif() target_link_libraries(crc32 arch) diff --git a/src/common/crc32c.cc b/src/common/crc32c.cc index 7ef99467b34..761bd2cd52a 100644 --- a/src/common/crc32c.cc +++ b/src/common/crc32c.cc @@ -7,11 +7,13 @@ #include "arch/arm.h" #include "arch/ppc.h" #include "arch/s390x.h" +#include "arch/riscv.h" #include "common/sctp_crc32.h" #include "common/crc32c_intel_fast.h" #include "common/crc32c_aarch64.h" #include "common/crc32c_ppc.h" #include "common/crc32c_s390x.h" +#include "common/crc32c_riscv.h" /* * choose best implementation based on the CPU architecture. @@ -41,6 +43,10 @@ ceph_crc32c_func_t ceph_choose_crc32(void) if (ceph_arch_ppc_crc32) { return ceph_crc32c_ppc; } +#elif defined(__riscv) && defined(HAVE_RISCV_ZVBC) + if (ceph_arch_riscv_zbc && ceph_arch_riscv_zvbc) { + return ceph_crc32c_riscv; + } #elif defined(__s390__) if (ceph_arch_s390x_crc32) { return ceph_crc32c_s390x; diff --git a/src/common/crc32c_riscv.c b/src/common/crc32c_riscv.c new file mode 100644 index 00000000000..a03e967b321 --- /dev/null +++ b/src/common/crc32c_riscv.c @@ -0,0 +1,188 @@ +/* Copyright (C) 2025 sanechips Technologies Co., Ltd. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include "common/sctp_crc32.h" +#include "common/likely.h" + +// CRC32C polynomial constants +#define CRC32C_CONST_0 0xdd45aab8U +#define CRC32C_CONST_1 0x493c7d27U +#define CRC32C_CONST_QUO 0x0dea713f1ULL +#define CRC32C_CONST_POLY 0x105ec76f1ULL + +// Folding constants for CRC32C +static const uint64_t crc32c_fold_const[4] __attribute__((aligned(16))) = { + 0x00000000740eef02ULL, 0x000000009e4addf8ULL, + 0x00000000f20c0dfeULL, 0x00000000493c7d27ULL +}; + +/** + * Hardware-accelerated CRC32C using RISC-V vector crypto extensions. + * This uses the reflected polynomial version compatible with standard CRC32C. + */ +uint32_t ceph_crc32c_riscv(uint32_t crc, unsigned char const *buf, unsigned len) { + if (unlikely(len < 64) || unlikely(!buf)) { + // Fall back to table-based implementation for small buffers + return ceph_crc32c_sctp(crc, buf, len); + } + + uint32_t result; + const uint64_t *fold_consts = crc32c_fold_const; + uint64_t tmp_buf[2] __attribute__((aligned(16))); + + __asm__ __volatile__( + // Initialize CRC + "li t5, 0xffffffff\n\t" + "and %[crc], %[crc], t5\n\t" + "li a3, 0\n\t" + "li t1, 64\n\t" + + // Set vector configuration for 128-bit elements + "vsetivli zero, 2, e64, m1, ta, ma\n\t" + + // Load first 64 bytes and initialize + "mv a4, %[buf]\n\t" + "vle64.v v0, 0(a4)\n\t" + "addi a4, a4, 16\n\t" + "vle64.v v1, 0(a4)\n\t" + "addi a4, a4, 16\n\t" + "vle64.v v2, 0(a4)\n\t" + "addi a4, a4, 16\n\t" + "vle64.v v3, 0(a4)\n\t" + "addi a4, a4, 16\n\t" + "andi a3, %[len], ~63\n\t" + "addi t0, a3, -64\n\t" + + // XOR initial CRC into first vector + "vmv.s.x v4, zero\n\t" + "vmv.s.x v5, %[crc]\n\t" + "vslideup.vi v5, v4, 1\n\t" + "vxor.vv v0, v0, v5\n\t" + "vmv.s.x v8, zero\n\t" + + // Load folding constant + "add a5, a4, t0\n\t" + "mv t4, %[consts]\n\t" + "vle64.v v5, 0(t4)\n\t" + + // Check if we need main loop + "addi t0, %[len], -64\n\t" + "bltu t0, t1, 2f\n\t" + + // Main loop - process 64 bytes at a time + "1:\n\t" + "vle64.v v7, 0(a4)\n\t" + "vclmul.vv v4, v0, v5\n\t" + "vclmulh.vv v0, v0, v5\n\t" + "vredxor.vs v0, v0, v8\n\t" + "vredxor.vs v4, v4, v8\n\t" + "vslideup.vi v4, v0, 1\n\t" + "vxor.vv v0, v4, v7\n\t" + + "addi a4, a4, 16\n\t" + "vle64.v v7, 0(a4)\n\t" + "vclmul.vv v4, v1, v5\n\t" + "vclmulh.vv v1, v1, v5\n\t" + "vredxor.vs v1, v1, v8\n\t" + "vredxor.vs v4, v4, v8\n\t" + "vslideup.vi v4, v1, 1\n\t" + "vxor.vv v1, v4, v7\n\t" + + "addi a4, a4, 16\n\t" + "vle64.v v7, 0(a4)\n\t" + "vclmul.vv v4, v2, v5\n\t" + "vclmulh.vv v2, v2, v5\n\t" + "vredxor.vs v2, v2, v8\n\t" + "vredxor.vs v4, v4, v8\n\t" + "vslideup.vi v4, v2, 1\n\t" + "vxor.vv v2, v4, v7\n\t" + + "addi a4, a4, 16\n\t" + "vle64.v v7, 0(a4)\n\t" + "vclmul.vv v4, v3, v5\n\t" + "vclmulh.vv v3, v3, v5\n\t" + "vredxor.vs v3, v3, v8\n\t" + "vredxor.vs v4, v4, v8\n\t" + "vslideup.vi v4, v3, 1\n\t" + "vxor.vv v3, v4, v7\n\t" + + "addi a4, a4, 16\n\t" + "bne a4, a5, 1b\n\t" + + // Fold 512 bits to 128 bits + "2:\n\t" + "addi t4, t4, 16\n\t" + "vle64.v v5, 0(t4)\n\t" + "vclmul.vv v6, v0, v5\n\t" + "vclmulh.vv v7, v0, v5\n\t" + "vredxor.vs v6, v6, v8\n\t" + "vredxor.vs v7, v7, v8\n\t" + "vslideup.vi v6, v7, 1\n\t" + "vxor.vv v0, v6, v1\n\t" + + "vclmul.vv v6, v0, v5\n\t" + "vclmulh.vv v7, v0, v5\n\t" + "vredxor.vs v6, v6, v8\n\t" + "vredxor.vs v7, v7, v8\n\t" + "vslideup.vi v6, v7, 1\n\t" + "vxor.vv v0, v6, v2\n\t" + + "vclmul.vv v6, v0, v5\n\t" + "vclmulh.vv v7, v0, v5\n\t" + "vredxor.vs v6, v6, v8\n\t" + "vredxor.vs v7, v7, v8\n\t" + "vslideup.vi v6, v7, 1\n\t" + "vxor.vv v0, v6, v3\n\t" + + // Extract 128-bit result from vector register + "vse64.v v0, (%[tmp_ptr])\n\t" + "ld t0, 0(%[tmp_ptr])\n\t" + "ld t1, 8(%[tmp_ptr])\n\t" + + // Barrett reduction + "li t2, %[const0]\n\t" + "and t2, t2, t5\n\t" + "li t3, %[const1]\n\t" + + "clmul t4, t0, t3\n\t" + "clmulh t3, t0, t3\n\t" + "xor t1, t1, t4\n\t" + "and t4, t1, t5\n\t" + "srli t1, t1, 32\n\t" + "clmul t0, t4, t2\n\t" + "slli t3, t3, 32\n\t" + "xor t3, t3, t1\n\t" + "xor t3, t3, t0\n\t" + + // Final Barrett reduction + "and t4, t3, t5\n\t" + "li t2, %[quo]\n\t" + "li t1, %[poly]\n\t" + "clmul t4, t4, t2\n\t" + "and t4, t4, t5\n\t" + "clmul t4, t4, t1\n\t" + "xor t4, t3, t4\n\t" + "srai %[result], t4, 32\n\t" + "and %[result], %[result], t5\n\t" + + : [result] "=r" (result),[crc] "+r" (crc) + : [buf] "r" (buf), [len] "r" (len), [consts] "r" (fold_consts), + [const0] "i" (CRC32C_CONST_0), [const1] "i" (CRC32C_CONST_1), + [quo] "i" (CRC32C_CONST_QUO), [poly] "i" (CRC32C_CONST_POLY), + [tmp_ptr] "r" (tmp_buf) + : "a3", "a4", "a5", "t0", "t1", "t2", "t3", "t4", "t5", "v0", "v1", "v2", "v3", + "v4", "v5", "v6", "v7", "v8", "memory" + ); + size_t tail_len = len % 64; + if (tail_len > 0) { + result = ceph_crc32c_sctp(result, buf + len - tail_len, tail_len); + } + return result; +} diff --git a/src/common/crc32c_riscv.h b/src/common/crc32c_riscv.h new file mode 100644 index 00000000000..092c266113c --- /dev/null +++ b/src/common/crc32c_riscv.h @@ -0,0 +1,29 @@ +/* Copyright (C) 2025 sanechips Technologies Co., Ltd. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef CEPH_COMMON_CRC32C_RISCV_H +#define CEPH_COMMON_CRC32C_RISCV_H + +#include + +#if defined(__riscv) && defined(HAVE_RISCV_ZVBC) + +#ifdef __cplusplus +extern "C" { +#endif + +extern uint32_t ceph_crc32c_riscv(uint32_t crc, unsigned char const *buffer, unsigned len); + +#ifdef __cplusplus +} +#endif + +#endif + +#endif + diff --git a/src/include/config-h.in.cmake b/src/include/config-h.in.cmake index 7285210c7b1..039b62a29d2 100644 --- a/src/include/config-h.in.cmake +++ b/src/include/config-h.in.cmake @@ -295,6 +295,12 @@ /* Support ARMv8 CRC and CRYPTO intrinsics */ #cmakedefine HAVE_ARMV8_CRC_CRYPTO_INTRINSICS +/* Define if you have RISC-V Vector extension */ +#cmakedefine HAVE_RISCV_RVV 1 + +/* Define if you have RISC-V ZVBC extension */ +#cmakedefine HAVE_RISCV_ZVBC 1 + /* Define if you have struct stat.st_mtimespec.tv_nsec */ #cmakedefine HAVE_STAT_ST_MTIMESPEC_TV_NSEC