From: Mark Nelson Date: Mon, 24 Jun 2013 03:18:03 +0000 (-0500) Subject: Initial Intel SSE4 crc32c implementation. X-Git-Tag: v0.67-rc1~158^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7c59288d9168ddef3b3dc570464ae9a1f180d18c;p=ceph.git Initial Intel SSE4 crc32c implementation. Signed-off-by: Mark Nelson --- diff --git a/src/common/crc32c-intel.c b/src/common/crc32c-intel.c new file mode 100644 index 000000000000..c1fded99fddb --- /dev/null +++ b/src/common/crc32c-intel.c @@ -0,0 +1,111 @@ +#include +#include +#include +#include +#include +#include +#include +/* + * * Based on a posting to lkml by Austin Zhang + * * Further based on the fio crc32c-intel.c implementation by Jens Axboe. + * * + * * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. + * * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) + * * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: + * * http://www.intel.com/products/processor/manuals/ + * * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * * Volume 2A: Instruction Set Reference, A-M + * */ + +int crc32c_intel_available = 0; + +/* TODO: Need some kind of ifdef here for arch... */ + +#if BITS_PER_LONG == 64 +#define REX_PRE "0x48, " +#define SCALE_F 8 +#else +#define REX_PRE +#define SCALE_F 4 +#endif + +static int crc32c_probed; + +static uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data, + unsigned length) +{ + while (length--) { + __asm__ __volatile__( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + :"=S"(crc) + :"0"(crc), "c"(*data) + ); + data++; + } + + return crc; +} + +/* + * * Steps through buffer one byte at at time, calculates reflected + * * crc using table. + * */ +uint32_t crc32c_intel(uint32_t crc, unsigned char const *data, unsigned length) +{ + unsigned int iquotient = length / SCALE_F; + unsigned int iremainder = length % SCALE_F; +#if BITS_PER_LONG == 64 + uint64_t *ptmp = (uint64_t *) data; +#else + uint32_t *ptmp = (uint32_t *) data; +#endif + + while (iquotient--) { + __asm__ __volatile__( + ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" + :"=S"(crc) + :"0"(crc), "c"(*ptmp) + ); + ptmp++; + } + + if (iremainder) + crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, + iremainder); + + return crc; +} + +static void do_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, + unsigned int *edx) +{ + int id = *eax; + + asm("movl %4, %%eax;" + "cpuid;" + "movl %%eax, %0;" + "movl %%ebx, %1;" + "movl %%ecx, %2;" + "movl %%edx, %3;" + : "=r" (*eax), "=r" (*ebx), "=r" (*ecx), "=r" (*edx) + : "r" (id) + : "eax", "ebx", "ecx", "edx"); +} + +void crc32c_intel_probe(void) +{ + if (!crc32c_probed) { + unsigned int eax, ebx, ecx, edx; + + eax = 1; + + do_cpuid(&eax, &ebx, &ecx, &edx); + crc32c_intel_available = (ecx & (1 << 20)) != 0; + crc32c_probed = 1; + } +} + +uint32_t ceph_crc32c_le(uint32_t crc, unsigned char const *data, unsigned length) +{ + return crc32c_intel(crc, data, length); +}