ISA-L provides a few different CRC32C implementations, of
which Ceph has only ever linked against one
(crc32_iscsi_00).
The second implementation of CRC32C provided by ISA-L
(crc32_iscsi_01) improves upon the first as it is used by
Ceph in a couple of ways:
1) crc32_iscsi_01 explicitly handles and checks for < 8
byte buffers and computes the CRC32C value using the
hardware-accelerated CRC32 instruction. In comparison,
crc32_iscsi_00 prefetches too far in cases of small
buffers, requiring the Ceph code to explicitly check
and handle this case differently in software. This
software-fallback implementation of CRC32 also comes
with a different set of LUTs (look up tables) and is
less efficient as it does not make use of the CRC32
instruction.
2) crc32_iscsi_00 makes use of large LUTs (look up
tables) to effectively perform the modular reduction
required to produce the CRC32C value. In constrast,
crc32_iscsi_01 uses the PCLMUL instruction set to
perform reductions 128-bits at a time with smaller
LUTs, resulting in greater throughput and less data
cache pollution.
Fixes: https://tracker.ceph.com/issues/65791
Signed-off-by: Tyler Stachecki <tstachecki@bloomberg.net>
(cherry picked from commit
948392a41511f5a04b13a8bad43ddb6d2731a197)
set(CMAKE_ASM_FLAGS "-i ${PROJECT_SOURCE_DIR}/src/isa-l/include/ ${CMAKE_ASM_FLAGS}")
list(APPEND crc32_srcs
${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_00.asm
+ ${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_01.asm
crc32c_intel_fast_zero_asm.s)
endif(HAVE_NASM_X64)
elseif(HAVE_POWER8)
// use that.
#if defined(__i386__) || defined(__x86_64__)
if (ceph_arch_intel_sse42 && ceph_crc32c_intel_fast_exists()) {
+ if (ceph_arch_intel_pclmul) {
+ return ceph_crc32c_intel_fast_pclmul;
+ }
return ceph_crc32c_intel_fast;
}
#elif defined(__arm__) || defined(__aarch64__)
#include "common/crc32c_intel_baseline.h"
extern unsigned int crc32_iscsi_00(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_00");
+extern unsigned int crc32_iscsi_01(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_01");
extern unsigned int crc32_iscsi_zero_00(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_zero_00");
#ifdef HAVE_NASM_X64
+uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ if (!buffer)
+ {
+ return crc32_iscsi_zero_00(buffer, len, crc);
+ }
+
+ /* Unlike crc32_iscsi_00, crc32_iscsi_01 handles the case where the
+ * input buffer is less than 8 bytes in its prelude, and does not
+ * prefetch beyond said buffer.
+ */
+ return crc32_iscsi_01(buffer, len, crc);
+}
+
uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
{
uint32_t v;
return 0;
}
+uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ return 0;
+}
+
uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
{
return 0;
#ifdef __x86_64__
+extern uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len);
extern uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len);
#else
+static inline uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
+{
+ return 0;
+}
+
static inline uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
{
return 0;