TEST(BloomFilter, Empty) {
bloom_filter bf;
for (int i=0; i<100; ++i) {
- ASSERT_FALSE(bf.contains(i));
+ ASSERT_FALSE(bf.contains((uint32_t) i));
ASSERT_FALSE(bf.contains(stringify(i)));
}
}
}
TEST(BloomFilter, SweepInt) {
+ unsigned int seed = 0;
std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
std::cout.precision(5);
std::cout << "# max\tfpp\tactual\tsize\tB/insert\tdensity\tapprox_element_count" << std::endl;
ASSERT_TRUE(123);
ASSERT_TRUE(456);
+ // In Ceph code, the uint32_t input routines to the bloom filter
+ // are used with hash values that are uniformly distributed over
+ // the uint32_t range. To model this behavior in the test, we
+ // pass in values generated by a pseudo-random generator.
+ // To make the test reproducible anyway, use a fixed seed here,
+ // but a different one in each instance.
+ srand(seed++);
+
for (int n = 0; n < max; n++)
- bf.insert(n);
+ bf.insert((uint32_t) rand());
int test = max * 100;
int hit = 0;
for (int n = 0; n < test; n++)
- if (bf.contains(100000 + n))
+ if (bf.contains((uint32_t) rand()))
hit++;
ASSERT_TRUE(123);
std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert
<< "\t" << bf.density() << "\t" << bf.approx_unique_element_count() << std::endl;
- ASSERT_TRUE(actual < fpp * 10);
- ASSERT_TRUE(actual > fpp / 10);
+ ASSERT_TRUE(actual < fpp * 3);
+ ASSERT_TRUE(actual > fpp / 3);
ASSERT_TRUE(bf.density() > 0.40);
ASSERT_TRUE(bf.density() < 0.60);
}
TEST(BloomFilter, CompressibleSweep) {
+ unsigned int seed = 0;
std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
std::cout.precision(5);
std::cout << "# max\tins\test ins\tafter\ttgtfpp\tactual\tsize\tb/elem\n";
int max = 1024;
for (int div = 1; div < 10; div++) {
compressible_bloom_filter bf(max, fpp, 1);
+
+ // See the comment in SweepInt.
+ srand(seed++);
+
+ std::vector<uint32_t> values;
int t = max/div;
- for (int n = 0; n < t; n++)
- bf.insert(n);
+ for (int n = 0; n < t; n++) {
+ uint32_t val = (uint32_t) rand();
+ bf.insert(val);
+ values.push_back(val);
+ }
unsigned est = bf.approx_unique_element_count();
if (div > 1)
bf.compress(1.0 / div);
- for (int n = 0; n < t; n++)
- ASSERT_TRUE(bf.contains(n));
+ for (auto val : values)
+ ASSERT_TRUE(bf.contains(val));
int test = max * 100;
int hit = 0;
for (int n = 0; n < test; n++)
- if (bf.contains(100000 + n))
+ if (bf.contains((uint32_t) rand()))
hit++;
double actual = (double)hit / (double)test;