From: Adam Kupczyk Date: Mon, 14 Nov 2016 13:55:28 +0000 (+0100) Subject: common, performance: Created special function for calculating CRC for data that conta... X-Git-Tag: v12.1.1~65^2~10^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=172fae2291d0308a5566195ea758f73ed640ade1;p=ceph.git common, performance: Created special function for calculating CRC for data that contains all zeros. Expected amount of operations is 'count of 1s in length' * 16. Generally is O(log(length)). Added test for performance comparision of "0" crc. Added test to evaluate single crc calculation time. Signed-off-by: Adam Kupczyk --- diff --git a/src/common/crc32c.cc b/src/common/crc32c.cc index 5e857abb89b1..04f2f5eb4c7e 100644 --- a/src/common/crc32c.cc +++ b/src/common/crc32c.cc @@ -52,3 +52,61 @@ ceph_crc32c_func_t ceph_choose_crc32(void) */ ceph_crc32c_func_t ceph_crc32c_func = ceph_choose_crc32(); +struct crc_turbo_struct { + uint32_t val[32][32]; +}; + +/* + * Look: http://crcutil.googlecode.com/files/crc-doc.1.0.pdf + * Here is implementation that goes 1 logical step further, + * it splits calculating CRC into jumps of length 1, 2, 4, 8, .... + * Each jump is performed on single input bit separately, xor-ed after that. + */ +crc_turbo_struct create_turbo_table() +{ + crc_turbo_struct table; + for (int bit = 0 ; bit < 32 ; bit++) { + table.val[0][bit] = ceph_crc32c_sctp(1UL << bit, nullptr, 1); + } + for (int range = 1; range <32 ; range++) { + for (int bit = 0 ; bit < 32 ; bit++) { + uint32_t crc_x = table.val[range-1][bit]; + uint32_t crc_y = 0; + for (int b = 0 ; b < 32 ; b++) { + if ( (crc_x & (1UL << b)) != 0 ) { + crc_y = crc_y ^ table.val[range-1][b]; + } + } + table.val[range][bit] = crc_y; + } + } + return table; +} + +static crc_turbo_struct crc_turbo_table = create_turbo_table(); + +uint32_t ceph_crc32c_zeros(uint32_t crc, unsigned len) +{ + int range = 0; + unsigned remainder = len & 15; + len = len >> 4; + range = 4; + while (len != 0) { + uint32_t crc1 = 0; + if ((len & 1) == 1) { + uint32_t* ptr = crc_turbo_table.val[range]; + while (crc != 0) { + uint32_t mask = ~((crc & 1) - 1); + crc1 = crc1 ^ (mask & *ptr); + crc = crc >> 1; + ptr++; + } + crc = crc1; + } + len = len >> 1; + range++; + } + if (remainder > 0) + crc = ceph_crc32c(crc, nullptr, remainder); + return crc; +} diff --git a/src/common/crc32c_intel_fast.c b/src/common/crc32c_intel_fast.c index af081a9946b5..9f950d7f4016 100644 --- a/src/common/crc32c_intel_fast.c +++ b/src/common/crc32c_intel_fast.c @@ -1,6 +1,7 @@ #include "acconfig.h" #include "include/int_types.h" #include "common/crc32c_intel_baseline.h" +#include "include/crc32c.h" extern unsigned int crc32_iscsi_00(unsigned char const *buffer, int len, unsigned int crc); extern unsigned int crc32_iscsi_zero_00(unsigned char const *buffer, int len, unsigned int crc); @@ -12,9 +13,10 @@ uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsig uint32_t v; unsigned left; - if (!buffer) - return crc32_iscsi_zero_00(buffer, len, crc); + { + return crc32_iscsi_zero_00(buffer, len, crc); + } /* * the crc32_iscsi_00 method reads past buffer+len (because it diff --git a/src/include/crc32c.h b/src/include/crc32c.h index a568edabe191..f2a3b39d43f5 100644 --- a/src/include/crc32c.h +++ b/src/include/crc32c.h @@ -4,6 +4,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + typedef uint32_t (*ceph_crc32c_func_t)(uint32_t crc, unsigned char const *data, unsigned length); /* @@ -14,6 +18,16 @@ extern ceph_crc32c_func_t ceph_crc32c_func; extern ceph_crc32c_func_t ceph_choose_crc32(void); +/** + * calculate crc32c for data that is entirely 0 (ZERO) + * + * Note: works the same as \ref ceph_crc32c for data == nullptr, but faster + * + * @param crc initial value + * @param length length of buffer + */ +uint32_t ceph_crc32c_zeros(uint32_t crc, unsigned length); + /** * calculate crc32c * @@ -26,7 +40,13 @@ extern ceph_crc32c_func_t ceph_choose_crc32(void); */ static inline uint32_t ceph_crc32c(uint32_t crc, unsigned char const *data, unsigned length) { + if (!data && length > 16) + return ceph_crc32c_zeros(crc, length); return ceph_crc32c_func(crc, data, length); } +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/test/bufferlist.cc b/src/test/bufferlist.cc index af3999e55a02..907a194229e4 100644 --- a/src/test/bufferlist.cc +++ b/src/test/bufferlist.cc @@ -36,6 +36,8 @@ #include "stdlib.h" #include "fcntl.h" #include "sys/stat.h" +#include "include/crc32c.h" +#include "common/sctp_crc32.h" #define MAX_TEST 1000000 #define FILENAME "bufferlist" @@ -2547,6 +2549,30 @@ TEST(BufferList, crc32c_append) { ASSERT_EQ(bl1.crc32c(0), bl2.crc32c(0)); } +TEST(BufferList, crc32c_zeros) { + char buffer[4*1024]; + for (size_t i=0; i < sizeof(buffer); i++) + { + buffer[i] = i; + } + + bufferlist bla; + bufferlist blb; + + for (size_t j=0; j < 1000; j++) + { + bufferptr a(buffer, sizeof(buffer)); + + bla.push_back(a); + uint32_t crca = bla.crc32c(111); + + blb.push_back(a); + uint32_t crcb = ceph_crc32c(111, (unsigned char*)blb.c_str(), blb.length()); + + EXPECT_EQ(crca, crcb); + } +} + TEST(BufferList, crc32c_append_perf) { int len = 256 * 1024 * 1024; bufferptr a(len); @@ -2614,7 +2640,6 @@ TEST(BufferList, crc32c_append_perf) { } assert(buffer::get_cached_crc() == 1 + base_cached); assert(buffer::get_cached_crc_adjusted() == 2 + base_cached_adjusted); - { utime_t start = ceph_clock_now(); uint32_t r = blb.crc32c(0); diff --git a/src/test/common/test_crc32c.cc b/src/test/common/test_crc32c.cc index 562ba84a31d5..c51006732e85 100644 --- a/src/test/common/test_crc32c.cc +++ b/src/test/common/test_crc32c.cc @@ -266,3 +266,83 @@ TEST(Crc32c, RangeNull) { ASSERT_EQ(crc, *check); } } + +double estimate_clock_resolution() +{ + volatile char* p = (volatile char*)malloc(1024); + utime_t start; + utime_t end; + std::set S; + for(int j=10; j<200; j+=1) { + start = ceph_clock_now(); + for (int i=0; i