Expected amount of operations is 'count of 1s in length' * 16. Generally is O(log(length)).
Added test for performance comparision of "0" crc.
Added test to evaluate single crc calculation time.
Signed-off-by: Adam Kupczyk <akupczyk@mirantis.com>
*/
ceph_crc32c_func_t ceph_crc32c_func = ceph_choose_crc32();
+struct crc_turbo_struct {
+ uint32_t val[32][32];
+};
+
+/*
+ * Look: http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
+ * Here is implementation that goes 1 logical step further,
+ * it splits calculating CRC into jumps of length 1, 2, 4, 8, ....
+ * Each jump is performed on single input bit separately, xor-ed after that.
+ */
+crc_turbo_struct create_turbo_table()
+{
+ crc_turbo_struct table;
+ for (int bit = 0 ; bit < 32 ; bit++) {
+ table.val[0][bit] = ceph_crc32c_sctp(1UL << bit, nullptr, 1);
+ }
+ for (int range = 1; range <32 ; range++) {
+ for (int bit = 0 ; bit < 32 ; bit++) {
+ uint32_t crc_x = table.val[range-1][bit];
+ uint32_t crc_y = 0;
+ for (int b = 0 ; b < 32 ; b++) {
+ if ( (crc_x & (1UL << b)) != 0 ) {
+ crc_y = crc_y ^ table.val[range-1][b];
+ }
+ }
+ table.val[range][bit] = crc_y;
+ }
+ }
+ return table;
+}
+
+static crc_turbo_struct crc_turbo_table = create_turbo_table();
+
+uint32_t ceph_crc32c_zeros(uint32_t crc, unsigned len)
+{
+ int range = 0;
+ unsigned remainder = len & 15;
+ len = len >> 4;
+ range = 4;
+ while (len != 0) {
+ uint32_t crc1 = 0;
+ if ((len & 1) == 1) {
+ uint32_t* ptr = crc_turbo_table.val[range];
+ while (crc != 0) {
+ uint32_t mask = ~((crc & 1) - 1);
+ crc1 = crc1 ^ (mask & *ptr);
+ crc = crc >> 1;
+ ptr++;
+ }
+ crc = crc1;
+ }
+ len = len >> 1;
+ range++;
+ }
+ if (remainder > 0)
+ crc = ceph_crc32c(crc, nullptr, remainder);
+ return crc;
+}
#include "acconfig.h"
#include "include/int_types.h"
#include "common/crc32c_intel_baseline.h"
+#include "include/crc32c.h"
extern unsigned int crc32_iscsi_00(unsigned char const *buffer, int len, unsigned int crc);
extern unsigned int crc32_iscsi_zero_00(unsigned char const *buffer, int len, unsigned int crc);
uint32_t v;
unsigned left;
-
if (!buffer)
- return crc32_iscsi_zero_00(buffer, len, crc);
+ {
+ return crc32_iscsi_zero_00(buffer, len, crc);
+ }
/*
* the crc32_iscsi_00 method reads past buffer+len (because it
#include <inttypes.h>
#include <string.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
typedef uint32_t (*ceph_crc32c_func_t)(uint32_t crc, unsigned char const *data, unsigned length);
/*
extern ceph_crc32c_func_t ceph_choose_crc32(void);
+/**
+ * calculate crc32c for data that is entirely 0 (ZERO)
+ *
+ * Note: works the same as \ref ceph_crc32c for data == nullptr, but faster
+ *
+ * @param crc initial value
+ * @param length length of buffer
+ */
+uint32_t ceph_crc32c_zeros(uint32_t crc, unsigned length);
+
/**
* calculate crc32c
*
*/
static inline uint32_t ceph_crc32c(uint32_t crc, unsigned char const *data, unsigned length)
{
+ if (!data && length > 16)
+ return ceph_crc32c_zeros(crc, length);
return ceph_crc32c_func(crc, data, length);
}
+#ifdef __cplusplus
+}
+#endif
+
#endif
#include "stdlib.h"
#include "fcntl.h"
#include "sys/stat.h"
+#include "include/crc32c.h"
+#include "common/sctp_crc32.h"
#define MAX_TEST 1000000
#define FILENAME "bufferlist"
ASSERT_EQ(bl1.crc32c(0), bl2.crc32c(0));
}
+TEST(BufferList, crc32c_zeros) {
+ char buffer[4*1024];
+ for (size_t i=0; i < sizeof(buffer); i++)
+ {
+ buffer[i] = i;
+ }
+
+ bufferlist bla;
+ bufferlist blb;
+
+ for (size_t j=0; j < 1000; j++)
+ {
+ bufferptr a(buffer, sizeof(buffer));
+
+ bla.push_back(a);
+ uint32_t crca = bla.crc32c(111);
+
+ blb.push_back(a);
+ uint32_t crcb = ceph_crc32c(111, (unsigned char*)blb.c_str(), blb.length());
+
+ EXPECT_EQ(crca, crcb);
+ }
+}
+
TEST(BufferList, crc32c_append_perf) {
int len = 256 * 1024 * 1024;
bufferptr a(len);
}
assert(buffer::get_cached_crc() == 1 + base_cached);
assert(buffer::get_cached_crc_adjusted() == 2 + base_cached_adjusted);
-
{
utime_t start = ceph_clock_now();
uint32_t r = blb.crc32c(0);
ASSERT_EQ(crc, *check);
}
}
+
+double estimate_clock_resolution()
+{
+ volatile char* p = (volatile char*)malloc(1024);
+ utime_t start;
+ utime_t end;
+ std::set<double> S;
+ for(int j=10; j<200; j+=1) {
+ start = ceph_clock_now();
+ for (int i=0; i<j; i++)
+ p[i]=1;
+ end = ceph_clock_now();
+ S.insert((double)(end - start));
+ }
+ auto head = S.begin();
+ auto tail = S.end();
+ for (size_t i=0; i<S.size()/4; i++) {
+ ++head;
+ --tail;
+ }
+ double v = *(head++);
+ double range=0;
+ while (head != tail) {
+ range = max(range, *head - v);
+ v = *head;
+ head++;
+ }
+ free((void*)p);
+ return range;
+}
+
+TEST(Crc32c, zeros_performance_compare) {
+ double resolution = estimate_clock_resolution();
+ utime_t start;
+ utime_t pre_start;
+ utime_t end;
+ double time_adjusted;
+ using namespace std::chrono;
+ high_resolution_clock::now();
+ for (size_t scale=1; scale < 31; scale++)
+ {
+ size_t size = (1<<scale) + rand()%(1<<scale);
+ pre_start = ceph_clock_now();
+ start = ceph_clock_now();
+ uint32_t crc_a = ceph_crc32c(111, nullptr, size);
+ end = ceph_clock_now();
+ time_adjusted = (end - start) - (start - pre_start);
+ std::cout << "regular method. size=" << size << " time= " << (double)(end-start)
+ << " at " << (double)size/(1024*1024)/(time_adjusted) << " MB/sec"
+ << " error=" << resolution / time_adjusted * 100 << "%" << std::endl;
+
+ pre_start = ceph_clock_now();
+ start = ceph_clock_now();
+ uint32_t crc_b = ceph_crc32c_func(111, nullptr, size);
+ end = ceph_clock_now();
+ time_adjusted = (end - start) - (start - pre_start);
+ std::cout << "fallback method. size=" << size << " time=" << (double)(end-start)
+ << " at " << (double)size/(1024*1024)/(time_adjusted) << " MB/sec"
+ << " error=" << resolution / time_adjusted * 100 << "%" << std::endl;
+ EXPECT_EQ(crc_a, crc_b);
+ }
+}
+
+TEST(Crc32c, zeros_performance) {
+ constexpr size_t ITER=100000;
+ utime_t start;
+ utime_t end;
+
+ start = ceph_clock_now();
+ for (size_t i=0; i<ITER; i++)
+ for (size_t scale=1; scale < 31; scale++)
+ {
+ size_t size = (1<<scale) + rand() % (1<<scale);
+ ceph_crc32c(rand(), nullptr, size);
+ }
+ end = ceph_clock_now();
+ std::cout << "iterations="<< ITER*31 << " time=" << (double)(end-start) << std::endl;
+
+}
+