]> git.apps.os.sepia.ceph.com Git - ceph-client.git/commitdiff
lib/crc: riscv: Migrate optimized CRC code into lib/crc/
authorEric Biggers <ebiggers@kernel.org>
Sat, 7 Jun 2025 20:04:50 +0000 (13:04 -0700)
committerEric Biggers <ebiggers@kernel.org>
Mon, 30 Jun 2025 16:31:57 +0000 (09:31 -0700)
Move the riscv-optimized CRC code from arch/riscv/lib/crc* into its new
location in lib/crc/riscv/, and wire it up in the new way.  This new way
of organizing the CRC code eliminates the need to artificially split the
code for each CRC variant into separate arch and generic modules,
enabling better inlining and dead code elimination.  For more details,
see "lib/crc: Prepare for arch-optimized code in subdirs of lib/crc/".

Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: "Jason A. Donenfeld" <Jason@zx2c4.com>
Link: https://lore.kernel.org/r/20250607200454.73587-9-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
26 files changed:
arch/riscv/Kconfig
arch/riscv/lib/Makefile
arch/riscv/lib/crc-clmul-consts.h [deleted file]
arch/riscv/lib/crc-clmul-template.h [deleted file]
arch/riscv/lib/crc-clmul.h [deleted file]
arch/riscv/lib/crc-t10dif.c [deleted file]
arch/riscv/lib/crc16_msb.c [deleted file]
arch/riscv/lib/crc32.c [deleted file]
arch/riscv/lib/crc32_lsb.c [deleted file]
arch/riscv/lib/crc32_msb.c [deleted file]
arch/riscv/lib/crc64.c [deleted file]
arch/riscv/lib/crc64_lsb.c [deleted file]
arch/riscv/lib/crc64_msb.c [deleted file]
lib/crc/Kconfig
lib/crc/Makefile
lib/crc/riscv/crc-clmul-consts.h [new file with mode: 0644]
lib/crc/riscv/crc-clmul-template.h [new file with mode: 0644]
lib/crc/riscv/crc-clmul.h [new file with mode: 0644]
lib/crc/riscv/crc-t10dif.h [new file with mode: 0644]
lib/crc/riscv/crc16_msb.c [new file with mode: 0644]
lib/crc/riscv/crc32.h [new file with mode: 0644]
lib/crc/riscv/crc32_lsb.c [new file with mode: 0644]
lib/crc/riscv/crc32_msb.c [new file with mode: 0644]
lib/crc/riscv/crc64.h [new file with mode: 0644]
lib/crc/riscv/crc64_lsb.c [new file with mode: 0644]
lib/crc/riscv/crc64_msb.c [new file with mode: 0644]

index 36061f4732b7496a9c68a9a10f9959849dc2a95c..d963d8faf2aeb7dca3bc6ccb00dd021c54d0fe93 100644 (file)
@@ -24,9 +24,6 @@ config RISCV
        select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
        select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
        select ARCH_HAS_BINFMT_FLAT
-       select ARCH_HAS_CRC32 if RISCV_ISA_ZBC
-       select ARCH_HAS_CRC64 if 64BIT && RISCV_ISA_ZBC
-       select ARCH_HAS_CRC_T10DIF if RISCV_ISA_ZBC
        select ARCH_HAS_CURRENT_STACK_POINTER
        select ARCH_HAS_DEBUG_VIRTUAL if MMU
        select ARCH_HAS_DEBUG_VM_PGTABLE
index 0baec92d2f55b8320281bded76442f9a31581622..a4f4b48ed3a47334e546c44a4d9ec692f83cacda 100644 (file)
@@ -16,12 +16,6 @@ endif
 lib-$(CONFIG_MMU)      += uaccess.o
 lib-$(CONFIG_64BIT)    += tishift.o
 lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
-obj-$(CONFIG_CRC32_ARCH)       += crc32-riscv.o
-crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o
-obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o
-crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o
-obj-$(CONFIG_CRC_T10DIF_ARCH)  += crc-t10dif-riscv.o
-crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 lib-$(CONFIG_RISCV_ISA_V)      += xor.o
 lib-$(CONFIG_RISCV_ISA_V)      += riscv_v_helpers.o
diff --git a/arch/riscv/lib/crc-clmul-consts.h b/arch/riscv/lib/crc-clmul-consts.h
deleted file mode 100644 (file)
index 8d73449..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * CRC constants generated by:
- *
- *     ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
- *
- * Do not edit manually.
- */
-
-struct crc_clmul_consts {
-       unsigned long fold_across_2_longs_const_hi;
-       unsigned long fold_across_2_longs_const_lo;
-       unsigned long barrett_reduction_const_1;
-       unsigned long barrett_reduction_const_2;
-};
-
-/*
- * Constants generated for most-significant-bit-first CRC-16 using
- * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
-       .fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */
-       .fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */
-       .barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */
-       .barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */
-#else
-       .fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */
-       .fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */
-       .barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */
-       .barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */
-#endif
-};
-
-/*
- * Constants generated for most-significant-bit-first CRC-32 using
- * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
- *        x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
-       .fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */
-       .fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */
-       .barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */
-       .barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */
-#else
-       .fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */
-       .fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */
-       .barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */
-       .barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */
-#endif
-};
-
-/*
- * Constants generated for least-significant-bit-first CRC-32 using
- * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
- *        x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
-       .fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */
-       .fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */
-       .barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */
-       .barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */
-#else
-       .fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */
-       .fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */
-       .barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */
-       .barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */
-#endif
-};
-
-/*
- * Constants generated for least-significant-bit-first CRC-32 using
- * G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 +
- *        x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0
- */
-static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
-       .fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */
-       .fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */
-       .barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */
-       .barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */
-#else
-       .fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */
-       .fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */
-       .barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */
-       .barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */
-#endif
-};
-
-/*
- * Constants generated for most-significant-bit-first CRC-64 using
- * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
- *        x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
- *        x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
- *        x^7 + x^4 + x^1 + x^0
- */
-#ifdef CONFIG_64BIT
-static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = {
-       .fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */
-       .fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */
-       .barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */
-       .barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */
-};
-#endif
-
-/*
- * Constants generated for least-significant-bit-first CRC-64 using
- * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
- *        x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
- *        x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
- *        x^4 + x^3 + x^0
- */
-#ifdef CONFIG_64BIT
-static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = {
-       .fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */
-       .fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */
-       .barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */
-       .barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */
-};
-#endif
diff --git a/arch/riscv/lib/crc-clmul-template.h b/arch/riscv/lib/crc-clmul-template.h
deleted file mode 100644 (file)
index 77187e7..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Copyright 2025 Google LLC */
-
-/*
- * This file is a "template" that generates a CRC function optimized using the
- * RISC-V Zbc (scalar carryless multiplication) extension.  The includer of this
- * file must define the following parameters to specify the type of CRC:
- *
- *     crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC
- *     LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural
- *              mapping between bits and polynomial coefficients
- *              1 for a lsb (least-significant-bit) first CRC, i.e. reflected
- *              mapping between bits and polynomial coefficients
- */
-
-#include <asm/byteorder.h>
-#include <linux/minmax.h>
-
-#define CRC_BITS       (8 * sizeof(crc_t))     /* a.k.a. 'n' */
-
-static inline unsigned long clmul(unsigned long a, unsigned long b)
-{
-       unsigned long res;
-
-       asm(".option push\n"
-           ".option arch,+zbc\n"
-           "clmul %0, %1, %2\n"
-           ".option pop\n"
-           : "=r" (res) : "r" (a), "r" (b));
-       return res;
-}
-
-static inline unsigned long clmulh(unsigned long a, unsigned long b)
-{
-       unsigned long res;
-
-       asm(".option push\n"
-           ".option arch,+zbc\n"
-           "clmulh %0, %1, %2\n"
-           ".option pop\n"
-           : "=r" (res) : "r" (a), "r" (b));
-       return res;
-}
-
-static inline unsigned long clmulr(unsigned long a, unsigned long b)
-{
-       unsigned long res;
-
-       asm(".option push\n"
-           ".option arch,+zbc\n"
-           "clmulr %0, %1, %2\n"
-           ".option pop\n"
-           : "=r" (res) : "r" (a), "r" (b));
-       return res;
-}
-
-/*
- * crc_load_long() loads one "unsigned long" of aligned data bytes, producing a
- * polynomial whose bit order matches the CRC's bit order.
- */
-#ifdef CONFIG_64BIT
-#  if LSB_CRC
-#    define crc_load_long(x)   le64_to_cpup(x)
-#  else
-#    define crc_load_long(x)   be64_to_cpup(x)
-#  endif
-#else
-#  if LSB_CRC
-#    define crc_load_long(x)   le32_to_cpup(x)
-#  else
-#    define crc_load_long(x)   be32_to_cpup(x)
-#  endif
-#endif
-
-/* XOR @crc into the end of @msgpoly that represents the high-order terms. */
-static inline unsigned long
-crc_clmul_prep(crc_t crc, unsigned long msgpoly)
-{
-#if LSB_CRC
-       return msgpoly ^ crc;
-#else
-       return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS));
-#endif
-}
-
-/*
- * Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it
- * modulo the generator polynomial G.  This gives the CRC of @msgpoly.
- */
-static inline crc_t
-crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts)
-{
-       unsigned long tmp;
-
-       /*
-        * First step of Barrett reduction with integrated multiplication by
-        * x^n: calculate floor((msgpoly * x^n) / G).  This is the value by
-        * which G needs to be multiplied to cancel out the x^n and higher terms
-        * of msgpoly * x^n.  Do it using the following formula:
-        *
-        * msb-first:
-        *    floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1))
-        * lsb-first:
-        *    floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG)
-        *
-        * barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G),
-        * which fits a long exactly.  Using any lower power of x there would
-        * not carry enough precision through the calculation, while using any
-        * higher power of x would require extra instructions to handle a wider
-        * multiplication.  In the msb-first case, using this power of x results
-        * in needing a floored division by x^(BITS_PER_LONG-1), which matches
-        * what clmulr produces.  In the lsb-first case, a factor of x gets
-        * implicitly introduced by each carryless multiplication (shown as
-        * '* x' above), and the floored division instead needs to be by
-        * x^BITS_PER_LONG which matches what clmul produces.
-        */
-#if LSB_CRC
-       tmp = clmul(msgpoly, consts->barrett_reduction_const_1);
-#else
-       tmp = clmulr(msgpoly, consts->barrett_reduction_const_1);
-#endif
-
-       /*
-        * Second step of Barrett reduction:
-        *
-        *    crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G))
-        *
-        * This reduces (msgpoly * x^n) modulo G by adding the appropriate
-        * multiple of G to it.  The result uses only the x^0..x^(n-1) terms.
-        * HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those
-        * terms in the first place, it is more efficient to do the equivalent:
-        *
-        *    crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n
-        *
-        * In the lsb-first case further modify it to the following which avoids
-        * a shift, as the crc ends up in the physically low n bits from clmulr:
-        *
-        *    product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x
-        *    crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n
-        *
-        * barrett_reduction_const_2 contains the constant multiplier (G - x^n)
-        * or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above.  The
-        * cast of the result to crc_t is essential, as it applies the mod x^n!
-        */
-#if LSB_CRC
-       return clmulr(tmp, consts->barrett_reduction_const_2);
-#else
-       return clmul(tmp, consts->barrett_reduction_const_2);
-#endif
-}
-
-/* Update @crc with the data from @msgpoly. */
-static inline crc_t
-crc_clmul_update_long(crc_t crc, unsigned long msgpoly,
-                     const struct crc_clmul_consts *consts)
-{
-       return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts);
-}
-
-/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */
-static inline crc_t
-crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len,
-                        const struct crc_clmul_consts *consts)
-{
-       unsigned long msgpoly;
-       size_t i;
-
-#if LSB_CRC
-       msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8);
-       for (i = 1; i < len; i++)
-               msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8));
-#else
-       msgpoly = p[0];
-       for (i = 1; i < len; i++)
-               msgpoly = (msgpoly << 8) ^ p[i];
-#endif
-
-       if (len >= sizeof(crc_t)) {
-       #if LSB_CRC
-               msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
-       #else
-               msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS);
-       #endif
-               return crc_clmul_long(msgpoly, consts);
-       }
-#if LSB_CRC
-       msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
-       return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len));
-#else
-       msgpoly ^= crc >> (CRC_BITS - 8*len);
-       return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len));
-#endif
-}
-
-static inline crc_t
-crc_clmul(crc_t crc, const void *p, size_t len,
-         const struct crc_clmul_consts *consts)
-{
-       size_t align;
-
-       /* This implementation assumes that the CRC fits in an unsigned long. */
-       BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long));
-
-       /* If the buffer is not long-aligned, align it. */
-       align = (unsigned long)p % sizeof(unsigned long);
-       if (align && len) {
-               align = min(sizeof(unsigned long) - align, len);
-               crc = crc_clmul_update_partial(crc, p, align, consts);
-               p += align;
-               len -= align;
-       }
-
-       if (len >= 4 * sizeof(unsigned long)) {
-               unsigned long m0, m1;
-
-               m0 = crc_clmul_prep(crc, crc_load_long(p));
-               m1 = crc_load_long(p + sizeof(unsigned long));
-               p += 2 * sizeof(unsigned long);
-               len -= 2 * sizeof(unsigned long);
-               /*
-                * Main loop.  Each iteration starts with a message polynomial
-                * (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two
-                * more longs of data to form x^(3*BITS_PER_LONG)*m0 +
-                * x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then
-                * "folds" that back into a congruent (modulo G) value that uses
-                * just m0 and m1 again.  This is done by multiplying m0 by the
-                * precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by
-                * the precomputed constant (x^(2*BITS_PER_LONG) mod G), then
-                * adding the results to m2 and m3 as appropriate.  Each such
-                * multiplication produces a result twice the length of a long,
-                * which in RISC-V is two instructions clmul and clmulh.
-                *
-                * This could be changed to fold across more than 2 longs at a
-                * time if there is a CPU that can take advantage of it.
-                */
-               do {
-                       unsigned long p0, p1, p2, p3;
-
-                       p0 = clmulh(m0, consts->fold_across_2_longs_const_hi);
-                       p1 = clmul(m0, consts->fold_across_2_longs_const_hi);
-                       p2 = clmulh(m1, consts->fold_across_2_longs_const_lo);
-                       p3 = clmul(m1, consts->fold_across_2_longs_const_lo);
-                       m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p);
-                       m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^
-                            crc_load_long(p + sizeof(unsigned long));
-
-                       p += 2 * sizeof(unsigned long);
-                       len -= 2 * sizeof(unsigned long);
-               } while (len >= 2 * sizeof(unsigned long));
-
-               crc = crc_clmul_long(m0, consts);
-               crc = crc_clmul_update_long(crc, m1, consts);
-       }
-
-       while (len >= sizeof(unsigned long)) {
-               crc = crc_clmul_update_long(crc, crc_load_long(p), consts);
-               p += sizeof(unsigned long);
-               len -= sizeof(unsigned long);
-       }
-
-       if (len)
-               crc = crc_clmul_update_partial(crc, p, len, consts);
-
-       return crc;
-}
diff --git a/arch/riscv/lib/crc-clmul.h b/arch/riscv/lib/crc-clmul.h
deleted file mode 100644 (file)
index dd17362..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Copyright 2025 Google LLC */
-
-#ifndef _RISCV_CRC_CLMUL_H
-#define _RISCV_CRC_CLMUL_H
-
-#include <linux/types.h>
-#include "crc-clmul-consts.h"
-
-u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
-                   const struct crc_clmul_consts *consts);
-u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
-                   const struct crc_clmul_consts *consts);
-u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
-                   const struct crc_clmul_consts *consts);
-#ifdef CONFIG_64BIT
-u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
-                   const struct crc_clmul_consts *consts);
-u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
-                   const struct crc_clmul_consts *consts);
-#endif
-
-#endif /* _RISCV_CRC_CLMUL_H */
diff --git a/arch/riscv/lib/crc-t10dif.c b/arch/riscv/lib/crc-t10dif.c
deleted file mode 100644 (file)
index e6b0051..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized CRC-T10DIF function
- *
- * Copyright 2025 Google LLC
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <linux/crc-t10dif.h>
-#include <linux/module.h>
-
-#include "crc-clmul.h"
-
-u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
-{
-       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
-               return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts);
-       return crc_t10dif_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc16_msb.c b/arch/riscv/lib/crc16_msb.c
deleted file mode 100644 (file)
index 554d295..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized most-significant-bit-first CRC16
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u16 crc_t;
-#define LSB_CRC 0
-#include "crc-clmul-template.h"
-
-u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
-                   const struct crc_clmul_consts *consts)
-{
-       return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c
deleted file mode 100644 (file)
index a3188b7..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized CRC32 functions
- *
- * Copyright 2025 Google LLC
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <linux/crc32.h>
-#include <linux/module.h>
-
-#include "crc-clmul.h"
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
-       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
-               return crc32_lsb_clmul(crc, p, len,
-                                      &crc32_lsb_0xedb88320_consts);
-       return crc32_le_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
-       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
-               return crc32_msb_clmul(crc, p, len,
-                                      &crc32_msb_0x04c11db7_consts);
-       return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
-       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
-               return crc32_lsb_clmul(crc, p, len,
-                                      &crc32_lsb_0x82f63b78_consts);
-       return crc32c_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_optimizations(void)
-{
-       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
-               return CRC32_LE_OPTIMIZATION |
-                      CRC32_BE_OPTIMIZATION |
-                      CRC32C_OPTIMIZATION;
-       return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_DESCRIPTION("RISC-V optimized CRC32 functions");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc32_lsb.c b/arch/riscv/lib/crc32_lsb.c
deleted file mode 100644 (file)
index 72fd67e..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized least-significant-bit-first CRC32
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u32 crc_t;
-#define LSB_CRC 1
-#include "crc-clmul-template.h"
-
-u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
-                   const struct crc_clmul_consts *consts)
-{
-       return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc32_msb.c b/arch/riscv/lib/crc32_msb.c
deleted file mode 100644 (file)
index fdbeacc..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized most-significant-bit-first CRC32
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u32 crc_t;
-#define LSB_CRC 0
-#include "crc-clmul-template.h"
-
-u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
-                   const struct crc_clmul_consts *consts)
-{
-       return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc64.c b/arch/riscv/lib/crc64.c
deleted file mode 100644 (file)
index f0015a2..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized CRC64 functions
- *
- * Copyright 2025 Google LLC
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <linux/crc64.h>
-#include <linux/module.h>
-
-#include "crc-clmul.h"
-
-u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
-{
-       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
-               return crc64_msb_clmul(crc, p, len,
-                                      &crc64_msb_0x42f0e1eba9ea3693_consts);
-       return crc64_be_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc64_be_arch);
-
-u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
-{
-       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
-               return crc64_lsb_clmul(crc, p, len,
-                                      &crc64_lsb_0x9a6c9329ac4bc9b5_consts);
-       return crc64_nvme_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc64_nvme_arch);
-
-MODULE_DESCRIPTION("RISC-V optimized CRC64 functions");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc64_lsb.c b/arch/riscv/lib/crc64_lsb.c
deleted file mode 100644 (file)
index c5371bb..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized least-significant-bit-first CRC64
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u64 crc_t;
-#define LSB_CRC 1
-#include "crc-clmul-template.h"
-
-u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
-                   const struct crc_clmul_consts *consts)
-{
-       return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc64_msb.c b/arch/riscv/lib/crc64_msb.c
deleted file mode 100644 (file)
index 1925d1d..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized most-significant-bit-first CRC64
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u64 crc_t;
-#define LSB_CRC 0
-#include "crc-clmul-template.h"
-
-u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
-                   const struct crc_clmul_consts *consts)
-{
-       return crc_clmul(crc, p, len, consts);
-}
index bfcf43c2592a09a1512dde456db770538b463a4b..b8894c451aca9d31631f691b5ea403b08cd0f40e 100644 (file)
@@ -53,6 +53,7 @@ config CRC_T10DIF_ARCH
        default y if ARM && KERNEL_MODE_NEON
        default y if ARM64 && KERNEL_MODE_NEON
        default y if PPC64 && ALTIVEC
+       default y if RISCV && RISCV_ISA_ZBC
 
 config CRC32
        tristate
@@ -72,6 +73,7 @@ config CRC32_ARCH
        default y if LOONGARCH
        default y if MIPS && CPU_MIPSR6
        default y if PPC64 && ALTIVEC
+       default y if RISCV && RISCV_ISA_ZBC
 
 config CRC64
        tristate
@@ -85,6 +87,7 @@ config ARCH_HAS_CRC64
 config CRC64_ARCH
        bool
        depends on CRC64 && CRC_OPTIMIZATIONS
+       default y if RISCV && RISCV_ISA_ZBC && 64BIT
 
 config CRC_OPTIMIZATIONS
        bool "Enable optimized CRC implementations" if EXPERT
index 555fd3fb6d19789a09b2bdcfcb6ab0ce624963bf..190f889adf556e98daf332c3993204c94e0f0e47 100644 (file)
@@ -16,6 +16,7 @@ CFLAGS_crc-t10dif-main.o += -I$(src)/$(SRCARCH)
 crc-t10dif-$(CONFIG_ARM) += arm/crc-t10dif-core.o
 crc-t10dif-$(CONFIG_ARM64) += arm64/crc-t10dif-core.o
 crc-t10dif-$(CONFIG_PPC) += powerpc/crct10dif-vpmsum_asm.o
+crc-t10dif-$(CONFIG_RISCV) += riscv/crc16_msb.o
 endif
 
 obj-$(CONFIG_CRC32) += crc32.o
@@ -25,12 +26,14 @@ CFLAGS_crc32-main.o += -I$(src)/$(SRCARCH)
 crc32-$(CONFIG_ARM) += arm/crc32-core.o
 crc32-$(CONFIG_ARM64) += arm64/crc32-core.o
 crc32-$(CONFIG_PPC) += powerpc/crc32c-vpmsum_asm.o
+crc32-$(CONFIG_RISCV) += riscv/crc32_lsb.o riscv/crc32_msb.o
 endif
 
 obj-$(CONFIG_CRC64) += crc64.o
 crc64-y := crc64-main.o
 ifeq ($(CONFIG_CRC64_ARCH),y)
 CFLAGS_crc64-main.o += -I$(src)/$(SRCARCH)
+crc64-$(CONFIG_RISCV) += riscv/crc64_lsb.o riscv/crc64_msb.o
 endif
 
 obj-y += tests/
diff --git a/lib/crc/riscv/crc-clmul-consts.h b/lib/crc/riscv/crc-clmul-consts.h
new file mode 100644 (file)
index 0000000..8d73449
--- /dev/null
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CRC constants generated by:
+ *
+ *     ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
+ *
+ * Do not edit manually.
+ */
+
+struct crc_clmul_consts {
+       unsigned long fold_across_2_longs_const_hi;
+       unsigned long fold_across_2_longs_const_lo;
+       unsigned long barrett_reduction_const_1;
+       unsigned long barrett_reduction_const_2;
+};
+
+/*
+ * Constants generated for most-significant-bit-first CRC-16 using
+ * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
+ */
+static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = {
+#ifdef CONFIG_64BIT
+       .fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */
+       .fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */
+       .barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */
+       .barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */
+#else
+       .fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */
+       .fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */
+       .barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */
+       .barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */
+#endif
+};
+
+/*
+ * Constants generated for most-significant-bit-first CRC-32 using
+ * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
+ *        x^5 + x^4 + x^2 + x^1 + x^0
+ */
+static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = {
+#ifdef CONFIG_64BIT
+       .fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */
+       .fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */
+       .barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */
+       .barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */
+#else
+       .fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */
+       .fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */
+       .barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */
+       .barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */
+#endif
+};
+
+/*
+ * Constants generated for least-significant-bit-first CRC-32 using
+ * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
+ *        x^5 + x^4 + x^2 + x^1 + x^0
+ */
+static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = {
+#ifdef CONFIG_64BIT
+       .fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */
+       .fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */
+       .barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */
+       .barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */
+#else
+       .fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */
+       .fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */
+       .barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */
+       .barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */
+#endif
+};
+
+/*
+ * Constants generated for least-significant-bit-first CRC-32 using
+ * G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 +
+ *        x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0
+ */
+static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = {
+#ifdef CONFIG_64BIT
+       .fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */
+       .fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */
+       .barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */
+       .barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */
+#else
+       .fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */
+       .fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */
+       .barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */
+       .barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */
+#endif
+};
+
+/*
+ * Constants generated for most-significant-bit-first CRC-64 using
+ * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ *        x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ *        x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ *        x^7 + x^4 + x^1 + x^0
+ */
+#ifdef CONFIG_64BIT
+static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = {
+       .fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */
+       .fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */
+       .barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */
+       .barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */
+};
+#endif
+
+/*
+ * Constants generated for least-significant-bit-first CRC-64 using
+ * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
+ *        x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
+ *        x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
+ *        x^4 + x^3 + x^0
+ */
+#ifdef CONFIG_64BIT
+static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = {
+       .fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */
+       .fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */
+       .barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */
+       .barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */
+};
+#endif
diff --git a/lib/crc/riscv/crc-clmul-template.h b/lib/crc/riscv/crc-clmul-template.h
new file mode 100644 (file)
index 0000000..77187e7
--- /dev/null
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright 2025 Google LLC */
+
+/*
+ * This file is a "template" that generates a CRC function optimized using the
+ * RISC-V Zbc (scalar carryless multiplication) extension.  The includer of this
+ * file must define the following parameters to specify the type of CRC:
+ *
+ *     crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC
+ *     LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural
+ *              mapping between bits and polynomial coefficients
+ *              1 for a lsb (least-significant-bit) first CRC, i.e. reflected
+ *              mapping between bits and polynomial coefficients
+ */
+
+#include <asm/byteorder.h>
+#include <linux/minmax.h>
+
+#define CRC_BITS       (8 * sizeof(crc_t))     /* a.k.a. 'n' */
+
+static inline unsigned long clmul(unsigned long a, unsigned long b)
+{
+       unsigned long res;
+
+       asm(".option push\n"
+           ".option arch,+zbc\n"
+           "clmul %0, %1, %2\n"
+           ".option pop\n"
+           : "=r" (res) : "r" (a), "r" (b));
+       return res;
+}
+
+static inline unsigned long clmulh(unsigned long a, unsigned long b)
+{
+       unsigned long res;
+
+       asm(".option push\n"
+           ".option arch,+zbc\n"
+           "clmulh %0, %1, %2\n"
+           ".option pop\n"
+           : "=r" (res) : "r" (a), "r" (b));
+       return res;
+}
+
+static inline unsigned long clmulr(unsigned long a, unsigned long b)
+{
+       unsigned long res;
+
+       asm(".option push\n"
+           ".option arch,+zbc\n"
+           "clmulr %0, %1, %2\n"
+           ".option pop\n"
+           : "=r" (res) : "r" (a), "r" (b));
+       return res;
+}
+
+/*
+ * crc_load_long() loads one "unsigned long" of aligned data bytes, producing a
+ * polynomial whose bit order matches the CRC's bit order.
+ */
+#ifdef CONFIG_64BIT
+#  if LSB_CRC
+#    define crc_load_long(x)   le64_to_cpup(x)
+#  else
+#    define crc_load_long(x)   be64_to_cpup(x)
+#  endif
+#else
+#  if LSB_CRC
+#    define crc_load_long(x)   le32_to_cpup(x)
+#  else
+#    define crc_load_long(x)   be32_to_cpup(x)
+#  endif
+#endif
+
+/* XOR @crc into the end of @msgpoly that represents the high-order terms. */
+static inline unsigned long
+crc_clmul_prep(crc_t crc, unsigned long msgpoly)
+{
+#if LSB_CRC
+       return msgpoly ^ crc;
+#else
+       return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS));
+#endif
+}
+
+/*
+ * Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it
+ * modulo the generator polynomial G.  This gives the CRC of @msgpoly.
+ */
+static inline crc_t
+crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts)
+{
+       unsigned long tmp;
+
+       /*
+        * First step of Barrett reduction with integrated multiplication by
+        * x^n: calculate floor((msgpoly * x^n) / G).  This is the value by
+        * which G needs to be multiplied to cancel out the x^n and higher terms
+        * of msgpoly * x^n.  Do it using the following formula:
+        *
+        * msb-first:
+        *    floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1))
+        * lsb-first:
+        *    floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG)
+        *
+        * barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G),
+        * which fits a long exactly.  Using any lower power of x there would
+        * not carry enough precision through the calculation, while using any
+        * higher power of x would require extra instructions to handle a wider
+        * multiplication.  In the msb-first case, using this power of x results
+        * in needing a floored division by x^(BITS_PER_LONG-1), which matches
+        * what clmulr produces.  In the lsb-first case, a factor of x gets
+        * implicitly introduced by each carryless multiplication (shown as
+        * '* x' above), and the floored division instead needs to be by
+        * x^BITS_PER_LONG which matches what clmul produces.
+        */
+#if LSB_CRC
+       tmp = clmul(msgpoly, consts->barrett_reduction_const_1);
+#else
+       tmp = clmulr(msgpoly, consts->barrett_reduction_const_1);
+#endif
+
+       /*
+        * Second step of Barrett reduction:
+        *
+        *    crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G))
+        *
+        * This reduces (msgpoly * x^n) modulo G by adding the appropriate
+        * multiple of G to it.  The result uses only the x^0..x^(n-1) terms.
+        * HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those
+        * terms in the first place, it is more efficient to do the equivalent:
+        *
+        *    crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n
+        *
+        * In the lsb-first case further modify it to the following which avoids
+        * a shift, as the crc ends up in the physically low n bits from clmulr:
+        *
+        *    product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x
+        *    crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n
+        *
+        * barrett_reduction_const_2 contains the constant multiplier (G - x^n)
+        * or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above.  The
+        * cast of the result to crc_t is essential, as it applies the mod x^n!
+        */
+#if LSB_CRC
+       return clmulr(tmp, consts->barrett_reduction_const_2);
+#else
+       return clmul(tmp, consts->barrett_reduction_const_2);
+#endif
+}
+
+/* Update @crc with the data from @msgpoly. */
+static inline crc_t
+crc_clmul_update_long(crc_t crc, unsigned long msgpoly,
+                     const struct crc_clmul_consts *consts)
+{
+       return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts);
+}
+
+/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */
+static inline crc_t
+crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len,
+                        const struct crc_clmul_consts *consts)
+{
+       unsigned long msgpoly;
+       size_t i;
+
+#if LSB_CRC
+       msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8);
+       for (i = 1; i < len; i++)
+               msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8));
+#else
+       msgpoly = p[0];
+       for (i = 1; i < len; i++)
+               msgpoly = (msgpoly << 8) ^ p[i];
+#endif
+
+       if (len >= sizeof(crc_t)) {
+       #if LSB_CRC
+               msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
+       #else
+               msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS);
+       #endif
+               return crc_clmul_long(msgpoly, consts);
+       }
+#if LSB_CRC
+       msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
+       return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len));
+#else
+       msgpoly ^= crc >> (CRC_BITS - 8*len);
+       return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len));
+#endif
+}
+
+static inline crc_t
+crc_clmul(crc_t crc, const void *p, size_t len,
+         const struct crc_clmul_consts *consts)
+{
+       size_t align;
+
+       /* This implementation assumes that the CRC fits in an unsigned long. */
+       BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long));
+
+       /* If the buffer is not long-aligned, align it. */
+       align = (unsigned long)p % sizeof(unsigned long);
+       if (align && len) {
+               align = min(sizeof(unsigned long) - align, len);
+               crc = crc_clmul_update_partial(crc, p, align, consts);
+               p += align;
+               len -= align;
+       }
+
+       if (len >= 4 * sizeof(unsigned long)) {
+               unsigned long m0, m1;
+
+               m0 = crc_clmul_prep(crc, crc_load_long(p));
+               m1 = crc_load_long(p + sizeof(unsigned long));
+               p += 2 * sizeof(unsigned long);
+               len -= 2 * sizeof(unsigned long);
+               /*
+                * Main loop.  Each iteration starts with a message polynomial
+                * (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two
+                * more longs of data to form x^(3*BITS_PER_LONG)*m0 +
+                * x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then
+                * "folds" that back into a congruent (modulo G) value that uses
+                * just m0 and m1 again.  This is done by multiplying m0 by the
+                * precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by
+                * the precomputed constant (x^(2*BITS_PER_LONG) mod G), then
+                * adding the results to m2 and m3 as appropriate.  Each such
+                * multiplication produces a result twice the length of a long,
+                * which in RISC-V is two instructions clmul and clmulh.
+                *
+                * This could be changed to fold across more than 2 longs at a
+                * time if there is a CPU that can take advantage of it.
+                */
+               do {
+                       unsigned long p0, p1, p2, p3;
+
+                       p0 = clmulh(m0, consts->fold_across_2_longs_const_hi);
+                       p1 = clmul(m0, consts->fold_across_2_longs_const_hi);
+                       p2 = clmulh(m1, consts->fold_across_2_longs_const_lo);
+                       p3 = clmul(m1, consts->fold_across_2_longs_const_lo);
+                       m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p);
+                       m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^
+                            crc_load_long(p + sizeof(unsigned long));
+
+                       p += 2 * sizeof(unsigned long);
+                       len -= 2 * sizeof(unsigned long);
+               } while (len >= 2 * sizeof(unsigned long));
+
+               crc = crc_clmul_long(m0, consts);
+               crc = crc_clmul_update_long(crc, m1, consts);
+       }
+
+       while (len >= sizeof(unsigned long)) {
+               crc = crc_clmul_update_long(crc, crc_load_long(p), consts);
+               p += sizeof(unsigned long);
+               len -= sizeof(unsigned long);
+       }
+
+       if (len)
+               crc = crc_clmul_update_partial(crc, p, len, consts);
+
+       return crc;
+}
diff --git a/lib/crc/riscv/crc-clmul.h b/lib/crc/riscv/crc-clmul.h
new file mode 100644 (file)
index 0000000..dd17362
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright 2025 Google LLC */
+
+#ifndef _RISCV_CRC_CLMUL_H
+#define _RISCV_CRC_CLMUL_H
+
+#include <linux/types.h>
+#include "crc-clmul-consts.h"
+
+u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
+                   const struct crc_clmul_consts *consts);
+u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
+                   const struct crc_clmul_consts *consts);
+u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
+                   const struct crc_clmul_consts *consts);
+#ifdef CONFIG_64BIT
+u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
+                   const struct crc_clmul_consts *consts);
+u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
+                   const struct crc_clmul_consts *consts);
+#endif
+
+#endif /* _RISCV_CRC_CLMUL_H */
diff --git a/lib/crc/riscv/crc-t10dif.h b/lib/crc/riscv/crc-t10dif.h
new file mode 100644 (file)
index 0000000..cd6136c
--- /dev/null
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized CRC-T10DIF function
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+
+#include "crc-clmul.h"
+
+static inline u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
+{
+       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+               return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts);
+       return crc_t10dif_generic(crc, p, len);
+}
diff --git a/lib/crc/riscv/crc16_msb.c b/lib/crc/riscv/crc16_msb.c
new file mode 100644 (file)
index 0000000..554d295
--- /dev/null
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized most-significant-bit-first CRC16
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "crc-clmul.h"
+
+typedef u16 crc_t;
+#define LSB_CRC 0
+#include "crc-clmul-template.h"
+
+u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
+                   const struct crc_clmul_consts *consts)
+{
+       return crc_clmul(crc, p, len, consts);
+}
diff --git a/lib/crc/riscv/crc32.h b/lib/crc/riscv/crc32.h
new file mode 100644 (file)
index 0000000..3ec6eee
--- /dev/null
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized CRC32 functions
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+
+#include "crc-clmul.h"
+
+static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+               return crc32_lsb_clmul(crc, p, len,
+                                      &crc32_lsb_0xedb88320_consts);
+       return crc32_le_base(crc, p, len);
+}
+
+static inline u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+               return crc32_msb_clmul(crc, p, len,
+                                      &crc32_msb_0x04c11db7_consts);
+       return crc32_be_base(crc, p, len);
+}
+
+static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
+{
+       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+               return crc32_lsb_clmul(crc, p, len,
+                                      &crc32_lsb_0x82f63b78_consts);
+       return crc32c_base(crc, p, len);
+}
+
+static inline u32 crc32_optimizations_arch(void)
+{
+       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+               return CRC32_LE_OPTIMIZATION |
+                      CRC32_BE_OPTIMIZATION |
+                      CRC32C_OPTIMIZATION;
+       return 0;
+}
diff --git a/lib/crc/riscv/crc32_lsb.c b/lib/crc/riscv/crc32_lsb.c
new file mode 100644 (file)
index 0000000..72fd67e
--- /dev/null
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized least-significant-bit-first CRC32
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "crc-clmul.h"
+
+typedef u32 crc_t;
+#define LSB_CRC 1
+#include "crc-clmul-template.h"
+
+u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
+                   const struct crc_clmul_consts *consts)
+{
+       return crc_clmul(crc, p, len, consts);
+}
diff --git a/lib/crc/riscv/crc32_msb.c b/lib/crc/riscv/crc32_msb.c
new file mode 100644 (file)
index 0000000..fdbeacc
--- /dev/null
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized most-significant-bit-first CRC32
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "crc-clmul.h"
+
+typedef u32 crc_t;
+#define LSB_CRC 0
+#include "crc-clmul-template.h"
+
+u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
+                   const struct crc_clmul_consts *consts)
+{
+       return crc_clmul(crc, p, len, consts);
+}
diff --git a/lib/crc/riscv/crc64.h b/lib/crc/riscv/crc64.h
new file mode 100644 (file)
index 0000000..a1b7873
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized CRC64 functions
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+
+#include "crc-clmul.h"
+
+static inline u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
+{
+       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+               return crc64_msb_clmul(crc, p, len,
+                                      &crc64_msb_0x42f0e1eba9ea3693_consts);
+       return crc64_be_generic(crc, p, len);
+}
+
+static inline u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
+{
+       if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+               return crc64_lsb_clmul(crc, p, len,
+                                      &crc64_lsb_0x9a6c9329ac4bc9b5_consts);
+       return crc64_nvme_generic(crc, p, len);
+}
diff --git a/lib/crc/riscv/crc64_lsb.c b/lib/crc/riscv/crc64_lsb.c
new file mode 100644 (file)
index 0000000..c5371bb
--- /dev/null
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized least-significant-bit-first CRC64
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "crc-clmul.h"
+
+typedef u64 crc_t;
+#define LSB_CRC 1
+#include "crc-clmul-template.h"
+
+u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
+                   const struct crc_clmul_consts *consts)
+{
+       return crc_clmul(crc, p, len, consts);
+}
diff --git a/lib/crc/riscv/crc64_msb.c b/lib/crc/riscv/crc64_msb.c
new file mode 100644 (file)
index 0000000..1925d1d
--- /dev/null
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RISC-V optimized most-significant-bit-first CRC64
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include "crc-clmul.h"
+
+typedef u64 crc_t;
+#define LSB_CRC 0
+#include "crc-clmul-template.h"
+
+u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
+                   const struct crc_clmul_consts *consts)
+{
+       return crc_clmul(crc, p, len, consts);
+}