From d07f5f358841546391c4df54ae83742533aadfd8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 3 Oct 2013 16:30:29 -0700 Subject: [PATCH] common/bloom_filter: drop raw_table_size_ member We were storing table_size_ and raw_table_size_, where one is the size in bits and the other is the size in bytes. This is silly. Store only the size in bytes. Also, bytes are always 8 bits, so use bit shifts and drop some of that silliness too. Move the member declarations to the top of the class so you read them before the methods. Fix some annoying whitespace. Avoid allocating a 0-length array. Mark the encoding incompatible with v1. Signed-off-by: Sage Weil --- src/common/bloom_filter.cc | 51 ++++++++++++------------- src/common/bloom_filter.hpp | 76 ++++++++++++++++++------------------- 2 files changed, 61 insertions(+), 66 deletions(-) diff --git a/src/common/bloom_filter.cc b/src/common/bloom_filter.cc index a14434076aa25..a7a4c1d0796b1 100644 --- a/src/common/bloom_filter.cc +++ b/src/common/bloom_filter.cc @@ -6,25 +6,22 @@ void bloom_filter::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 2, bl); ::encode((uint64_t)salt_count_, bl); - ::encode((uint64_t)table_size_, bl); ::encode((uint64_t)inserted_element_count_, bl); ::encode((uint64_t)random_seed_, bl); - bufferptr bp((const char*)bit_table_, raw_table_size_); + bufferptr bp((const char*)bit_table_, table_size_); ::encode(bp, bl); ENCODE_FINISH(bl); } void bloom_filter::decode(bufferlist::iterator& p) { - DECODE_START(1, p); + DECODE_START(2, p); uint64_t v; ::decode(v, p); salt_count_ = v; ::decode(v, p); - table_size_ = v; - ::decode(v, p); inserted_element_count_ = v; ::decode(v, p); random_seed_ = v; @@ -33,11 +30,14 @@ void bloom_filter::decode(bufferlist::iterator& p) salt_.clear(); generate_unique_salt(); - raw_table_size_ = t.length(); - assert(raw_table_size_ == table_size_ / bits_per_char); + table_size_ = t.length(); delete bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - t.copy(0, raw_table_size_, (char *)bit_table_); + if (table_size_) { + bit_table_ = new cell_type[table_size_]; + t.copy(0, table_size_, (char *)bit_table_); + } else { + bit_table_ = NULL; + } DECODE_FINISH(p); } @@ -46,7 +46,6 @@ void bloom_filter::dump(Formatter *f) const { f->dump_unsigned("salt_count", salt_count_); f->dump_unsigned("table_size", table_size_); - f->dump_unsigned("raw_table_size", raw_table_size_); f->dump_unsigned("insert_count", inserted_element_count_); f->dump_unsigned("random_seed", random_seed_); @@ -56,7 +55,7 @@ void bloom_filter::dump(Formatter *f) const f->close_section(); f->open_array_section("bit_table"); - for (unsigned i = 0; i < raw_table_size_; ++i) + for (unsigned i = 0; i < table_size_; ++i) f->dump_unsigned("byte", (unsigned)bit_table_[i]); f->close_section(); } @@ -78,7 +77,7 @@ void bloom_filter::generate_test_instances(list& ls) void compressible_bloom_filter::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 2, bl); ::encode((uint64_t)salt_count_, bl); uint32_t s = size_list.size(); ::encode(s, bl); @@ -87,14 +86,14 @@ void compressible_bloom_filter::encode(bufferlist& bl) const ::encode((uint64_t)*p, bl); ::encode((uint64_t)inserted_element_count_, bl); ::encode((uint64_t)random_seed_, bl); - bufferptr bp((const char*)bit_table_, raw_table_size_); + bufferptr bp((const char*)bit_table_, table_size_); ::encode(bp, bl); ENCODE_FINISH(bl); } void compressible_bloom_filter::decode(bufferlist::iterator& p) { - DECODE_START(1, p); + DECODE_START(2, p); uint64_t v; ::decode(v, p); salt_count_ = v; @@ -106,10 +105,6 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) ::decode(v, p); size_list[i] = v; } - if (size_list.size()) - table_size_ = size_list.back(); - else - table_size_ = 0; ::decode(v, p); inserted_element_count_ = v; @@ -120,11 +115,16 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) salt_.clear(); generate_unique_salt(); - raw_table_size_ = t.length(); - assert(raw_table_size_ == table_size_ / bits_per_char); + table_size_ = t.length(); + assert((!table_size_ && size_list.empty()) || + table_size_ == size_list.back()); delete bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - t.copy(0, raw_table_size_, (char *)bit_table_); + if (table_size_) { + bit_table_ = new cell_type[table_size_]; + t.copy(0, table_size_, (char *)bit_table_); + } else { + bit_table_ = NULL; + } DECODE_FINISH(p); } @@ -132,10 +132,9 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) void compressible_bloom_filter::dump(Formatter *f) const { f->dump_unsigned("salt_count", salt_count_); - f->dump_unsigned("raw_table_size", raw_table_size_); f->dump_unsigned("insert_count", inserted_element_count_); f->dump_unsigned("random_seed", random_seed_); - + f->dump_unsigned("table_size", table_size_); f->open_array_section("table_sizes"); for (vector::const_iterator p = size_list.begin(); p != size_list.end(); ++p) @@ -148,7 +147,7 @@ void compressible_bloom_filter::dump(Formatter *f) const f->close_section(); f->open_array_section("bit_table"); - for (unsigned i = 0; i < raw_table_size_; ++i) + for (unsigned i = 0; i < table_size_; ++i) f->dump_unsigned("byte", (unsigned)bit_table_[i]); f->close_section(); } diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp index 1c988fadbd112..50349560febd2 100644 --- a/src/common/bloom_filter.hpp +++ b/src/common/bloom_filter.hpp @@ -53,13 +53,19 @@ protected: typedef unsigned int bloom_type; typedef unsigned char cell_type; + unsigned char* bit_table_; ///< pointer to bit map + std::vector salt_; ///< vector of salts + std::size_t salt_count_; ///< number of salts + std::size_t table_size_; ///< bit table size in bytes + std::size_t inserted_element_count_; ///< insertion count + std::size_t random_seed_; ///< random seed + public: bloom_filter() : bit_table_(0), salt_count_(0), table_size_(0), - raw_table_size_(0), inserted_element_count_(0), random_seed_(0) {} @@ -76,7 +82,8 @@ public: init(); } - bloom_filter(const std::size_t& salt_count, std::size_t table_size, + bloom_filter(const std::size_t& salt_count, + std::size_t table_size, const std::size_t& random_seed) : bit_table_(0), salt_count_(salt_count), @@ -89,9 +96,8 @@ public: void init() { generate_unique_salt(); - raw_table_size_ = table_size_ / bits_per_char; - bit_table_ = new cell_type[raw_table_size_]; - std::fill_n(bit_table_,raw_table_size_,0x00); + bit_table_ = new cell_type[table_size_]; + std::fill_n(bit_table_, table_size_, 0x00); } bloom_filter(const bloom_filter& filter) @@ -104,12 +110,11 @@ public: if (this != &filter) { salt_count_ = filter.salt_count_; table_size_ = filter.table_size_; - raw_table_size_ = filter.raw_table_size_; inserted_element_count_ = filter.inserted_element_count_; random_seed_ = filter.random_seed_; delete[] bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - std::copy(filter.bit_table_,filter.bit_table_ + raw_table_size_,bit_table_); + bit_table_ = new cell_type[table_size_]; + std::copy(filter.bit_table_, filter.bit_table_ + table_size_, bit_table_); salt_ = filter.salt_; } return *this; @@ -127,7 +132,7 @@ public: inline void clear() { - std::fill_n(bit_table_,raw_table_size_,0x00); + std::fill_n(bit_table_, table_size_, 0x00); inserted_element_count_ = 0; } @@ -146,7 +151,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(val,salt_[i]),bit_index,bit); - bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; + bit_table_[bit_index >> 3] |= bit_mask[bit]; } ++inserted_element_count_; } @@ -158,7 +163,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); - bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; + bit_table_[bit_index >> 3] |= bit_mask[bit]; } ++inserted_element_count_; } @@ -207,7 +212,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(val,salt_[i]),bit_index,bit); - if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) + if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) { return false; } @@ -222,7 +227,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); - if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) + if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) { return false; } @@ -278,7 +283,7 @@ public: inline virtual std::size_t size() const { - return table_size_; + return table_size_ * bits_per_char; } inline std::size_t element_count() const @@ -306,7 +311,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] &= filter.bit_table_[i]; } } @@ -321,7 +326,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] |= filter.bit_table_[i]; } } @@ -336,7 +341,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] ^= filter.bit_table_[i]; } } @@ -352,8 +357,8 @@ protected: inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const { - bit_index = hash % table_size_; - bit = bit_index % bits_per_char; + bit_index = hash % (table_size_ << 3); + bit = bit_index & 7; } void generate_unique_salt() @@ -418,7 +423,8 @@ protected: } else { - std::copy(predef_salt,predef_salt + predef_salt_count,std::back_inserter(salt_)); + std::copy(predef_salt,predef_salt + predef_salt_count, + std::back_inserter(salt_)); srand(static_cast(random_seed_)); while (salt_.size() < salt_count_) { @@ -466,8 +472,8 @@ protected: *salt_count = static_cast(min_k); size_t t = static_cast(min_m); - t += (((t % bits_per_char) != 0) ? (bits_per_char - (t % bits_per_char)) : 0); - *table_size = t; + t += (((t & 7) != 0) ? (bits_per_char - (t & 7)) : 0); + *table_size = t >> 3; } inline bloom_type hash_ap(uint32_t val, bloom_type hash) const @@ -507,14 +513,6 @@ protected: return hash; } - std::vector salt_; - unsigned char* bit_table_; - std::size_t salt_count_; - std::size_t table_size_; - std::size_t raw_table_size_; - std::size_t inserted_element_count_; - std::size_t random_seed_; - public: void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); @@ -569,7 +567,7 @@ public: inline virtual std::size_t size() const { - return size_list.back(); + return size_list.back() * bits_per_char; } inline bool compress(const double& percentage) @@ -581,17 +579,16 @@ public: std::size_t original_table_size = size_list.back(); std::size_t new_table_size = static_cast((size_list.back() * (1.0 - (percentage / 100.0)))); - new_table_size -= (((new_table_size % bits_per_char) != 0) ? (new_table_size % bits_per_char) : 0); - if ((bits_per_char > new_table_size) || (new_table_size >= original_table_size)) + if ((!new_table_size) || (new_table_size >= original_table_size)) { return false; } - cell_type* tmp = new cell_type[new_table_size / bits_per_char]; - std::copy(bit_table_, bit_table_ + (new_table_size / bits_per_char), tmp); - cell_type* itr = bit_table_ + (new_table_size / bits_per_char); - cell_type* end = bit_table_ + (original_table_size / bits_per_char); + cell_type* tmp = new cell_type[new_table_size]; + std::copy(bit_table_, bit_table_ + (new_table_size), tmp); + cell_type* itr = bit_table_ + (new_table_size); + cell_type* end = bit_table_ + (original_table_size); cell_type* itr_tmp = tmp; while (end != itr) @@ -603,7 +600,6 @@ public: bit_table_ = tmp; size_list.push_back(new_table_size); table_size_ = new_table_size; - raw_table_size_ = table_size_ / bits_per_char; return true; } @@ -615,9 +611,9 @@ private: bit_index = hash; for (std::size_t i = 0; i < size_list.size(); ++i) { - bit_index %= size_list[i]; + bit_index %= size_list[i] << 3; } - bit = bit_index % bits_per_char; + bit = bit_index & 7; } std::vector size_list; -- 2.39.5