From afb5b81adb69489766c174326c784c0aac8092dc Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 21 May 2020 10:03:28 -0500 Subject: [PATCH] common/FastCDC: add some comments Map terms back to those in the paper. Signed-off-by: Sage Weil --- src/common/FastCDC.cc | 29 ++++++++++++++++++++++------- src/common/FastCDC.h | 14 +++++++++++--- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/common/FastCDC.cc b/src/common/FastCDC.cc index b83a83749f8..9576124619d 100644 --- a/src/common/FastCDC.cc +++ b/src/common/FastCDC.cc @@ -7,17 +7,32 @@ #include "rabin.h" -// if we are close to the target, use the target mask. if we are very -// small or very large, use an adjusted mask. this tries to keep /most/ -// cut points using the same mask. - -// how many bits to set/clear in the small/large masks +// Unlike FastCDC describe in the paper, if we are close to the +// target, use the target mask. If we are very small or very large, +// use an adjusted mask--like the paper. This tries to keep more +// cut points using the same mask, and fewer using the small or large +// masks. + +// How many more/fewer bits to set in the small/large masks. +// +// This is the "normalization level" or "NC level" in the FastCDC +// paper. #define TARGET_WINDOW_MASK_BITS 2 -// how big the 'target window' is (in which we use the target mask) +// How big the 'target window' is (in which we use the target mask). +// +// In the FastCDC paper, this is always 0: there is not "target +// window," and either small_mask (maskS) or large_mask (maskL) is +// used--never target_mask (maskA). #define TARGET_WINDOW_BITS 1 -// hard limits on size +// How many bits larger/smaller than target for hard limits on chunk +// size. +// +// We assume the min and max sizes are always this many bits +// larger/smaller than the target. (Note that the FastCDC paper 8KB +// example has a min of 2KB (2 bits smaller) and max of 64 KB (3 bits +// larger), although it is not clear why they chose those values.) #define SIZE_WINDOW_BITS 2 void FastCDC::_setup(int target, int size_window_bits) diff --git a/src/common/FastCDC.h b/src/common/FastCDC.h index ce2ce95df6c..b9156f551f3 100644 --- a/src/common/FastCDC.h +++ b/src/common/FastCDC.h @@ -23,11 +23,19 @@ class FastCDC : public CDC { private: - int target_bits, min_bits, max_bits; - uint64_t target_mask, small_mask, large_mask; + int target_bits; ///< target chunk size bits (1 << target_bits) + int min_bits; ///< hard minimum chunk size bits (1 << min_bits) + int max_bits; ///< hard maximum chunk size bits (1 << max_bits) + + uint64_t target_mask; ///< maskA in the paper (target_bits set) + uint64_t small_mask; ///< maskS in the paper (more bits set) + uint64_t large_mask; ///< maskL in the paper (fewer bits set) + + /// lookup table with pseudorandom values for each byte uint64_t table[256]; - const size_t window = 64; + /// window size in bytes + const size_t window = sizeof(uint64_t)*8; // bits in uint64_t void _setup(int target, int window_bits); -- 2.39.5