]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
common/bloom_filter: remember original target size
authorSage Weil <sage@inktank.com>
Sat, 5 Oct 2013 05:50:30 +0000 (22:50 -0700)
committerSage Weil <sage@inktank.com>
Sat, 5 Oct 2013 05:57:35 +0000 (22:57 -0700)
This isn't strictly needed for core functionality, but it is convenient to
know.

Signed-off-by: Sage Weil <sage@inktank.com>
src/common/bloom_filter.cc
src/common/bloom_filter.hpp

index a7a4c1d0796b14932c51524c1272451a16e45992..41cad1749092a3a0573c3e19338460031ea59922 100644 (file)
@@ -8,7 +8,8 @@ void bloom_filter::encode(bufferlist& bl) const
 {
   ENCODE_START(2, 2, bl);
   ::encode((uint64_t)salt_count_, bl);
-  ::encode((uint64_t)inserted_element_count_, bl);
+  ::encode((uint64_t)insert_count_, bl);
+  ::encode((uint64_t)target_element_count_, bl);
   ::encode((uint64_t)random_seed_, bl);
   bufferptr bp((const char*)bit_table_, table_size_);
   ::encode(bp, bl);
@@ -22,7 +23,9 @@ void bloom_filter::decode(bufferlist::iterator& p)
   ::decode(v, p);
   salt_count_ = v;
   ::decode(v, p);
-  inserted_element_count_ = v;
+  insert_count_ = v;
+  ::decode(v, p);
+  target_element_count_ = v;
   ::decode(v, p);
   random_seed_ = v;
   bufferlist t;
@@ -46,7 +49,8 @@ void bloom_filter::dump(Formatter *f) const
 {
   f->dump_unsigned("salt_count", salt_count_);
   f->dump_unsigned("table_size", table_size_);
-  f->dump_unsigned("insert_count", inserted_element_count_);
+  f->dump_unsigned("insert_count", insert_count_);
+  f->dump_unsigned("target_element_count", target_element_count_);
   f->dump_unsigned("random_seed", random_seed_);
 
   f->open_array_section("salt_table");
@@ -62,11 +66,11 @@ void bloom_filter::dump(Formatter *f) const
 
 void bloom_filter::generate_test_instances(list<bloom_filter*>& ls)
 {
-  ls.push_back(new bloom_filter(10, .5, 1));
-  ls.push_back(new bloom_filter(10, .5, 1));
+  ls.push_back(new bloom_filter(10, .5, 1, 5));
+  ls.push_back(new bloom_filter(10, .5, 1, 5));
   ls.back()->insert("foo");
   ls.back()->insert("bar");
-  ls.push_back(new bloom_filter(50, .5, 1));
+  ls.push_back(new bloom_filter(50, .5, 1, 5));
   ls.back()->insert("foo");
   ls.back()->insert("bar");
   ls.back()->insert("baz");
@@ -84,7 +88,8 @@ void compressible_bloom_filter::encode(bufferlist& bl) const
   for (vector<size_t>::const_iterator p = size_list.begin();
        p != size_list.end(); ++p)
     ::encode((uint64_t)*p, bl);
-  ::encode((uint64_t)inserted_element_count_, bl);
+  ::encode((uint64_t)insert_count_, bl);
+  ::encode((uint64_t)target_element_count_, bl);
   ::encode((uint64_t)random_seed_, bl);
   bufferptr bp((const char*)bit_table_, table_size_);
   ::encode(bp, bl);
@@ -107,7 +112,9 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p)
   }
 
   ::decode(v, p);
-  inserted_element_count_ = v;
+  insert_count_ = v;
+  ::decode(v, p);
+  target_element_count_ = v;
   ::decode(v, p);
   random_seed_ = v;
   bufferlist t;
@@ -132,7 +139,8 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p)
 void compressible_bloom_filter::dump(Formatter *f) const
 {
   f->dump_unsigned("salt_count", salt_count_);
-  f->dump_unsigned("insert_count", inserted_element_count_);
+  f->dump_unsigned("insert_count", insert_count_);
+  f->dump_unsigned("target_element_count", target_element_count_);
   f->dump_unsigned("random_seed", random_seed_);
   f->dump_unsigned("table_size", table_size_);
   f->open_array_section("table_sizes");
@@ -154,11 +162,11 @@ void compressible_bloom_filter::dump(Formatter *f) const
 
 void compressible_bloom_filter::generate_test_instances(list<compressible_bloom_filter*>& ls)
 {
-  ls.push_back(new compressible_bloom_filter(10, .5, 1));
-  ls.push_back(new compressible_bloom_filter(10, .5, 1));
+  ls.push_back(new compressible_bloom_filter(10, .5, 1, 4));
+  ls.push_back(new compressible_bloom_filter(10, .5, 1, 4));
   ls.back()->insert("foo");
   ls.back()->insert("bar");
-  ls.push_back(new compressible_bloom_filter(50, .5, 1));
+  ls.push_back(new compressible_bloom_filter(50, .5, 1, 4));
   ls.back()->insert("foo");
   ls.back()->insert("bar");
   ls.back()->insert("baz");
index 50349560febd2364bec0a51b4685b8e414fca6f8..c336eef8b6a4951614134f1b7f342fc58a5176e2 100644 (file)
@@ -57,7 +57,8 @@ protected:
   std::vector<bloom_type> salt_;     ///< vector of salts
   std::size_t         salt_count_;   ///< number of salts
   std::size_t         table_size_;   ///< bit table size in bytes
-  std::size_t         inserted_element_count_;  ///< insertion count
+  std::size_t         insert_count_;  ///< insertion count
+  std::size_t         target_element_count_;  ///< target number of unique insertions
   std::size_t         random_seed_;  ///< random seed
 
 public:
@@ -66,7 +67,8 @@ public:
     : bit_table_(0),
       salt_count_(0),
       table_size_(0),
-      inserted_element_count_(0),
+      insert_count_(0),
+      target_element_count_(0),
       random_seed_(0)
   {}
 
@@ -74,7 +76,8 @@ public:
               const double& false_positive_probability,
               const std::size_t& random_seed)
     : bit_table_(0),
-      inserted_element_count_(0),
+      insert_count_(0),
+      target_element_count_(predicted_inserted_element_count),
       random_seed_((random_seed) ? random_seed : 0xA5A5A5A5)
   {
     find_optimal_parameters(predicted_inserted_element_count, false_positive_probability,
@@ -84,11 +87,13 @@ public:
 
   bloom_filter(const std::size_t& salt_count,
               std::size_t table_size,
-              const std::size_t& random_seed)
+              const std::size_t& random_seed,
+              std::size_t target_element_count)
     : bit_table_(0),
       salt_count_(salt_count),
       table_size_(table_size),
-      inserted_element_count_(0),
+      insert_count_(0),
+      target_element_count_(target_element_count),
       random_seed_((random_seed) ? random_seed : 0xA5A5A5A5)
   {
     init();
@@ -110,7 +115,7 @@ public:
     if (this != &filter) {
       salt_count_ = filter.salt_count_;
       table_size_ = filter.table_size_;
-      inserted_element_count_ = filter.inserted_element_count_;
+      insert_count_ = filter.insert_count_;
       random_seed_ = filter.random_seed_;
       delete[] bit_table_;
       bit_table_ = new cell_type[table_size_];
@@ -133,7 +138,7 @@ public:
   inline void clear()
   {
     std::fill_n(bit_table_, table_size_, 0x00);
-    inserted_element_count_ = 0;
+    insert_count_ = 0;
   }
 
   /**
@@ -153,7 +158,7 @@ public:
       compute_indices(hash_ap(val,salt_[i]),bit_index,bit);
       bit_table_[bit_index >> 3] |= bit_mask[bit];
     }
-    ++inserted_element_count_;
+    ++insert_count_;
   }
 
   inline void insert(const unsigned char* key_begin, const std::size_t& length)
@@ -165,7 +170,7 @@ public:
       compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit);
       bit_table_[bit_index >> 3] |= bit_mask[bit];
     }
-    ++inserted_element_count_;
+    ++insert_count_;
   }
 
   template<typename T>
@@ -288,7 +293,9 @@ public:
 
   inline std::size_t element_count() const
   {
-    return inserted_element_count_;
+    return insert_count_;
+  }
+
   }
 
   inline double effective_fpp() const
@@ -300,7 +307,7 @@ public:
       the current number of inserted elements - not the user defined
       predicated/expected number of inserted elements.
     */
-    return std::pow(1.0 - std::exp(-1.0 * salt_.size() * inserted_element_count_ / size()), 1.0 * salt_.size());
+    return std::pow(1.0 - std::exp(-1.0 * salt_.size() * insert_count_ / size()), 1.0 * salt_.size());
   }
 
   inline bloom_filter& operator &= (const bloom_filter& filter)
@@ -552,15 +559,16 @@ public:
   compressible_bloom_filter(const std::size_t& predicted_element_count,
                            const double& false_positive_probability,
                            const std::size_t& random_seed)
-    : bloom_filter(predicted_element_count,false_positive_probability,random_seed)
+    : bloom_filter(predicted_element_count, false_positive_probability, random_seed)
   {
     size_list.push_back(table_size_);
   }
 
   compressible_bloom_filter(const std::size_t& salt_count,
                            std::size_t table_size,
-                           const std::size_t& random_seed)
-    : bloom_filter(salt_count, table_size, random_seed)
+                           const std::size_t& random_seed,
+                           std::size_t target_count)
+    : bloom_filter(salt_count, table_size, random_seed, target_count)
   {
     size_list.push_back(table_size_);
   }