From: Radheshyam Balasundaram Date: Thu, 28 Aug 2014 17:42:23 +0000 (-0700) Subject: Implementing a cache friendly version of Cuckoo Hash X-Git-Tag: v3.5~3^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7f7144838846495b766812dd70ee0163b1584a98;p=rocksdb.git Implementing a cache friendly version of Cuckoo Hash Summary: This implements a cache friendly version of Cuckoo Hash in which, in case of collission, we try to insert in next few locations. The size of the neighborhood to check is taken as an input parameter in builder and stored in the table. Test Plan: make check all cuckoo_table_{db,reader,builder}_test Reviewers: sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D22455 --- diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 66556e7ed..3a47ed939 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -227,15 +227,46 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options = PlainTableOptions()); struct CuckooTablePropertyNames { + // The key that is used to fill empty buckets. static const std::string kEmptyKey; + // Fixed length of value. static const std::string kValueLength; - static const std::string kNumHashTable; - static const std::string kMaxNumBuckets; + // Number of hash functions used in Cuckoo Hash. + static const std::string kNumHashFunc; + // It denotes the number of buckets in a Cuckoo Block. Given a key and a + // particular hash function, a Cuckoo Block is a set of consecutive buckets, + // where starting bucket id is given by the hash function on the key. In case + // of a collision during inserting the key, the builder tries to insert the + // key in other locations of the cuckoo block before using the next hash + // function. This reduces cache miss during read operation in case of + // collision. + static const std::string kCuckooBlockSize; + // Size of the hash table. Use this number to compute the modulo of hash + // function. The actual number of buckets will be kMaxHashTableSize + + // kCuckooBlockSize - 1. The last kCuckooBlockSize-1 buckets are used to + // accommodate the Cuckoo Block from end of hash table, due to cache friendly + // implementation. + static const std::string kHashTableSize; + // Denotes if the key sorted in the file is Internal Key (if false) + // or User Key only (if true). static const std::string kIsLastLevel; }; +// Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing +// @hash_table_ratio: Determines the utilization of hash tables. Smaller values +// result in larger hash tables with fewer collisions. +// @max_search_depth: A property used by builder to determine the depth to go to +// to search for a path to displace elements in case of +// collision. See Builder.MakeSpaceForKey method. Higher +// values result in more efficient hash tables with fewer +// lookups but take more time to build. +// @cuckoo_block_size: In case of collision while inserting, the builder +// attempts to insert in the next cuckoo_block_size +// locations before skipping over to the next Cuckoo hash +// function. This makes lookups more cache friendly in case +// of collisions. extern TableFactory* NewCuckooTableFactory(double hash_table_ratio = 0.9, - uint32_t max_search_depth = 100); + uint32_t max_search_depth = 100, uint32_t cuckoo_block_size = 5); #endif // ROCKSDB_LITE diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc index 9e02bb04e..de39fc30f 100644 --- a/table/cuckoo_table_builder.cc +++ b/table/cuckoo_table_builder.cc @@ -24,14 +24,16 @@ namespace rocksdb { const std::string CuckooTablePropertyNames::kEmptyKey = "rocksdb.cuckoo.bucket.empty.key"; -const std::string CuckooTablePropertyNames::kNumHashTable = +const std::string CuckooTablePropertyNames::kNumHashFunc = "rocksdb.cuckoo.hash.num"; -const std::string CuckooTablePropertyNames::kMaxNumBuckets = - "rocksdb.cuckoo.bucket.maxnum"; +const std::string CuckooTablePropertyNames::kHashTableSize = + "rocksdb.cuckoo.hash.size"; const std::string CuckooTablePropertyNames::kValueLength = "rocksdb.cuckoo.value.length"; const std::string CuckooTablePropertyNames::kIsLastLevel = "rocksdb.cuckoo.file.islastlevel"; +const std::string CuckooTablePropertyNames::kCuckooBlockSize = + "rocksdb.cuckoo.hash.cuckooblocksize"; // Obtained by running echo rocksdb.table.cuckoo | sha1sum extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; @@ -39,13 +41,14 @@ extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; CuckooTableBuilder::CuckooTableBuilder( WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table, uint32_t max_search_depth, - const Comparator* user_comparator, + const Comparator* user_comparator, uint32_t cuckoo_block_size, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) - : num_hash_table_(2), + : num_hash_func_(2), file_(file), hash_table_ratio_(hash_table_ratio), - max_num_hash_table_(max_num_hash_table), + max_num_hash_func_(max_num_hash_table), max_search_depth_(max_search_depth), + cuckoo_block_size_(std::max(1U, cuckoo_block_size)), is_last_level_file_(false), has_seen_first_key_(false), ucomp_(user_comparator), @@ -101,8 +104,8 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { } Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { - uint64_t num_buckets = kvs_.size() / hash_table_ratio_; - buckets->resize(num_buckets); + uint64_t hash_table_size = kvs_.size() / hash_table_ratio_; + buckets->resize(hash_table_size + cuckoo_block_size_ - 1); uint64_t make_space_for_key_call_id = 0; for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) { uint64_t bucket_id; @@ -110,39 +113,49 @@ Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { autovector hash_vals; Slice user_key = is_last_level_file_ ? kvs_[vector_idx].first : ExtractUserKey(kvs_[vector_idx].first); - for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { - uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets); - if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) { - bucket_id = hash_val; - bucket_found = true; - break; - } else { - if (ucomp_->Compare(user_key, is_last_level_file_ - ? Slice(kvs_[(*buckets)[hash_val].vector_idx].first) - : ExtractUserKey( - kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) { - return Status::NotSupported("Same key is being inserted again."); + for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found; + ++hash_cnt) { + uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, hash_table_size); + // If there is a collision, check next cuckoo_block_size_ locations for + // empty locations. While checking, if we reach end of the hash table, + // stop searching and proceed for next hash function. + for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; + ++block_idx, ++hash_val) { + if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) { + bucket_id = hash_val; + bucket_found = true; + break; + } else { + if (ucomp_->Compare(user_key, is_last_level_file_ + ? Slice(kvs_[(*buckets)[hash_val].vector_idx].first) + : ExtractUserKey( + kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) { + return Status::NotSupported("Same key is being inserted again."); + } + hash_vals.push_back(hash_val); } - hash_vals.push_back(hash_val); } } while (!bucket_found && !MakeSpaceForKey(hash_vals, - ++make_space_for_key_call_id, buckets, &bucket_id)) { + hash_table_size, ++make_space_for_key_call_id, buckets, &bucket_id)) { // Rehash by increashing number of hash tables. - if (num_hash_table_ >= max_num_hash_table_) { - return Status::NotSupported("Too many collissions. Unable to hash."); + if (num_hash_func_ >= max_num_hash_func_) { + return Status::NotSupported("Too many collisions. Unable to hash."); } // We don't really need to rehash the entire table because old hashes are // still valid and we only increased the number of hash functions. uint64_t hash_val = get_slice_hash_(user_key, - num_hash_table_, num_buckets); - ++num_hash_table_; - if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) { - bucket_found = true; - bucket_id = hash_val; - break; - } else { - hash_vals.push_back(hash_val); + num_hash_func_, hash_table_size); + ++num_hash_func_; + for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; + ++block_idx, ++hash_val) { + if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) { + bucket_found = true; + bucket_id = hash_val; + break; + } else { + hash_vals.push_back(hash_val); + } } } (*buckets)[bucket_id].vector_idx = vector_idx; @@ -226,16 +239,22 @@ Status CuckooTableBuilder::Finish() { properties_.user_collected_properties[ CuckooTablePropertyNames::kEmptyKey] = unused_bucket; properties_.user_collected_properties[ - CuckooTablePropertyNames::kNumHashTable].assign( - reinterpret_cast(&num_hash_table_), sizeof(num_hash_table_)); - uint64_t num_buckets = buckets.size(); + CuckooTablePropertyNames::kNumHashFunc].assign( + reinterpret_cast(&num_hash_func_), sizeof(num_hash_func_)); + + uint64_t hash_table_size = buckets.size() - cuckoo_block_size_ + 1; properties_.user_collected_properties[ - CuckooTablePropertyNames::kMaxNumBuckets].assign( - reinterpret_cast(&num_buckets), sizeof(num_buckets)); + CuckooTablePropertyNames::kHashTableSize].assign( + reinterpret_cast(&hash_table_size), + sizeof(hash_table_size)); properties_.user_collected_properties[ CuckooTablePropertyNames::kIsLastLevel].assign( reinterpret_cast(&is_last_level_file_), sizeof(is_last_level_file_)); + properties_.user_collected_properties[ + CuckooTablePropertyNames::kCuckooBlockSize].assign( + reinterpret_cast(&cuckoo_block_size_), + sizeof(cuckoo_block_size_)); // Write meta blocks. MetaIndexBuilder meta_index_builder; @@ -307,6 +326,7 @@ uint64_t CuckooTableBuilder::FileSize() const { // If tree depth exceedes max depth, we return false indicating failure. bool CuckooTableBuilder::MakeSpaceForKey( const autovector& hash_vals, + const uint64_t hash_table_size, const uint64_t make_space_for_key_call_id, std::vector* buckets, uint64_t* bucket_id) { @@ -322,12 +342,13 @@ bool CuckooTableBuilder::MakeSpaceForKey( std::vector tree; // We want to identify already visited buckets in the current method call so // that we don't add same buckets again for exploration in the tree. - // We do this by maintaining a count of current method call, which acts as a - // unique id for this invocation of the method. We store this number into - // the nodes that we explore in current method call. + // We do this by maintaining a count of current method call in + // make_space_for_key_call_id, which acts as a unique id for this invocation + // of the method. We store this number into the nodes that we explore in + // current method call. // It is unlikely for the increment operation to overflow because the maximum - // no. of times this will be called is <= max_num_hash_table_ + kvs_.size(). - for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { + // no. of times this will be called is <= max_num_hash_func_ + kvs_.size(). + for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { uint64_t bucket_id = hash_vals[hash_cnt]; (*buckets)[bucket_id].make_space_for_key_call_id = make_space_for_key_call_id; @@ -342,22 +363,26 @@ bool CuckooTableBuilder::MakeSpaceForKey( break; } CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id]; - for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { + for (uint32_t hash_cnt = 0; + hash_cnt < num_hash_func_ && !null_found; ++hash_cnt) { uint64_t child_bucket_id = get_slice_hash_( is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first : ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first)), - hash_cnt, buckets->size()); - if ((*buckets)[child_bucket_id].make_space_for_key_call_id == - make_space_for_key_call_id) { - continue; - } - (*buckets)[child_bucket_id].make_space_for_key_call_id = - make_space_for_key_call_id; - tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1, - curr_pos)); - if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) { - null_found = true; - break; + hash_cnt, hash_table_size); + for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; + ++block_idx, ++child_bucket_id) { + if ((*buckets)[child_bucket_id].make_space_for_key_call_id == + make_space_for_key_call_id) { + continue; + } + (*buckets)[child_bucket_id].make_space_for_key_call_id = + make_space_for_key_call_id; + tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1, + curr_pos)); + if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) { + null_found = true; + break; + } } } ++curr_pos; @@ -367,10 +392,10 @@ bool CuckooTableBuilder::MakeSpaceForKey( // There is an empty node in tree.back(). Now, traverse the path from this // empty node to top of the tree and at every node in the path, replace // child with the parent. Stop when first level is reached in the tree - // (happens when 0 <= bucket_to_replace_pos < num_hash_table_) and return + // (happens when 0 <= bucket_to_replace_pos < num_hash_func_) and return // this location in first level for target key to be inserted. uint32_t bucket_to_replace_pos = tree.size()-1; - while (bucket_to_replace_pos >= num_hash_table_) { + while (bucket_to_replace_pos >= num_hash_func_) { CuckooNode& curr_node = tree[bucket_to_replace_pos]; (*buckets)[curr_node.bucket_id] = (*buckets)[tree[curr_node.parent_pos].bucket_id]; diff --git a/table/cuckoo_table_builder.h b/table/cuckoo_table_builder.h index 92f5c9cee..15f976d23 100644 --- a/table/cuckoo_table_builder.h +++ b/table/cuckoo_table_builder.h @@ -23,6 +23,7 @@ class CuckooTableBuilder: public TableBuilder { CuckooTableBuilder( WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table, uint32_t max_search_depth, const Comparator* user_comparator, + uint32_t cuckoo_block_size, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)); // REQUIRES: Either Finish() or Abandon() has been called. @@ -60,7 +61,7 @@ class CuckooTableBuilder: public TableBuilder { CuckooBucket() : vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {} uint32_t vector_idx; - // This number will not exceed kvs_.size() + max_num_hash_table_. + // This number will not exceed kvs_.size() + max_num_hash_func_. // We assume number of items is <= 2^32. uint32_t make_space_for_key_call_id; }; @@ -68,16 +69,18 @@ class CuckooTableBuilder: public TableBuilder { bool MakeSpaceForKey( const autovector& hash_vals, + const uint64_t hash_table_size, const uint64_t call_id, std::vector* buckets, uint64_t* bucket_id); Status MakeHashTable(std::vector* buckets); - uint32_t num_hash_table_; + uint32_t num_hash_func_; WritableFile* file_; const double hash_table_ratio_; - const uint32_t max_num_hash_table_; + const uint32_t max_num_hash_func_; const uint32_t max_search_depth_; + const uint32_t cuckoo_block_size_; bool is_last_level_file_; Status status_; std::vector> kvs_; diff --git a/table/cuckoo_table_builder_test.cc b/table/cuckoo_table_builder_test.cc index 047f35ce1..5de97a52b 100644 --- a/table/cuckoo_table_builder_test.cc +++ b/table/cuckoo_table_builder_test.cc @@ -37,8 +37,9 @@ class CuckooBuilderTest { void CheckFileContents(const std::vector& keys, const std::vector& values, const std::vector& expected_locations, - std::string expected_unused_bucket, uint64_t expected_max_buckets, - uint32_t expected_num_hash_fun, bool expected_is_last_level) { + std::string expected_unused_bucket, uint64_t expected_table_size, + uint32_t expected_num_hash_func, bool expected_is_last_level, + uint32_t expected_cuckoo_block_size = 1) { // Read file unique_ptr read_file; ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_)); @@ -51,7 +52,8 @@ class CuckooBuilderTest { kCuckooTableMagicNumber, env_, nullptr, &props)); ASSERT_EQ(props->num_entries, keys.size()); ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size()); - ASSERT_EQ(props->data_size, keys.size()*expected_unused_bucket.size()); + ASSERT_EQ(props->data_size, expected_unused_bucket.size() * + (expected_table_size + expected_cuckoo_block_size - 1)); ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len); // Check unused bucket. @@ -65,14 +67,18 @@ class CuckooBuilderTest { CuckooTablePropertyNames::kValueLength].data()); ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found); ASSERT_EQ(props->raw_value_size, values.size()*value_len_found); - const uint64_t max_buckets = + const uint64_t table_size = *reinterpret_cast(props->user_collected_properties[ - CuckooTablePropertyNames::kMaxNumBuckets].data()); - ASSERT_EQ(expected_max_buckets, max_buckets); - const uint32_t num_hash_fun_found = + CuckooTablePropertyNames::kHashTableSize].data()); + ASSERT_EQ(expected_table_size, table_size); + const uint32_t num_hash_func_found = *reinterpret_cast(props->user_collected_properties[ - CuckooTablePropertyNames::kNumHashTable].data()); - ASSERT_EQ(expected_num_hash_fun, num_hash_fun_found); + CuckooTablePropertyNames::kNumHashFunc].data()); + ASSERT_EQ(expected_num_hash_func, num_hash_func_found); + const uint32_t cuckoo_block_size = + *reinterpret_cast(props->user_collected_properties[ + CuckooTablePropertyNames::kCuckooBlockSize].data()); + ASSERT_EQ(expected_cuckoo_block_size, cuckoo_block_size); const bool is_last_level_found = *reinterpret_cast(props->user_collected_properties[ CuckooTablePropertyNames::kIsLastLevel].data()); @@ -82,7 +88,7 @@ class CuckooBuilderTest { // Check contents of the bucket. std::vector keys_found(keys.size(), false); uint32_t bucket_size = expected_unused_bucket.size(); - for (uint32_t i = 0; i < max_buckets; ++i) { + for (uint32_t i = 0; i < table_size + cuckoo_block_size - 1; ++i) { Slice read_slice; ASSERT_OK(read_file->Read(i*bucket_size, bucket_size, &read_slice, nullptr)); @@ -119,7 +125,7 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) { fname = test::TmpDir() + "/NoCollisionFullKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - 4, 100, BytewiseComparator(), GetSliceHash); + 4, 100, BytewiseComparator(), 1, GetSliceHash); ASSERT_OK(builder.status()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); @@ -146,7 +152,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { fname = test::TmpDir() + "/NoCollisionFullKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -156,11 +162,11 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); - uint32_t expected_max_buckets = keys.size() / kHashTableRatio; + uint32_t expected_table_size = keys.size() / kHashTableRatio; std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, - expected_unused_bucket, expected_max_buckets, 2, false); + expected_unused_bucket, expected_table_size, 2, false); } TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { @@ -183,7 +189,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { fname = test::TmpDir() + "/WithCollisionFullKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -193,11 +199,49 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); - uint32_t expected_max_buckets = keys.size() / kHashTableRatio; + uint32_t expected_table_size = keys.size() / kHashTableRatio; std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, - expected_unused_bucket, expected_max_buckets, 4, false); + expected_unused_bucket, expected_table_size, 4, false); +} + +TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) { + uint32_t num_hash_fun = 4; + std::vector user_keys = {"key01", "key02", "key03", "key04"}; + std::vector values = {"v01", "v02", "v03", "v04"}; + hash_map = { + {user_keys[0], {0, 1, 2, 3}}, + {user_keys[1], {0, 1, 2, 3}}, + {user_keys[2], {0, 1, 2, 3}}, + {user_keys[3], {0, 1, 2, 3}}, + }; + std::vector expected_locations = {0, 1, 2, 3}; + std::vector keys; + for (auto& user_key : user_keys) { + keys.push_back(GetInternalKey(user_key, false)); + } + + unique_ptr writable_file; + uint32_t cuckoo_block_size = 2; + fname = test::TmpDir() + "/WithCollisionFullKey2"; + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, + num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, GetSliceHash); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(keys[i]), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + ASSERT_OK(builder.Finish()); + ASSERT_OK(writable_file->Close()); + + uint32_t expected_table_size = keys.size() / kHashTableRatio; + std::string expected_unused_bucket = GetInternalKey("key00", true); + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(keys, values, expected_locations, + expected_unused_bucket, expected_table_size, 3, false, cuckoo_block_size); } TEST(CuckooBuilderTest, WithCollisionPathFullKey) { @@ -225,7 +269,46 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) { fname = test::TmpDir() + "/WithCollisionPathFullKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(keys[i]), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + ASSERT_OK(builder.Finish()); + ASSERT_OK(writable_file->Close()); + + uint32_t expected_table_size = keys.size() / kHashTableRatio; + std::string expected_unused_bucket = GetInternalKey("key00", true); + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(keys, values, expected_locations, + expected_unused_bucket, expected_table_size, 2, false); +} + +TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) { + uint32_t num_hash_fun = 2; + std::vector user_keys = {"key01", "key02", "key03", + "key04", "key05"}; + std::vector values = {"v01", "v02", "v03", "v04", "v05"}; + hash_map = { + {user_keys[0], {0, 1}}, + {user_keys[1], {1, 2}}, + {user_keys[2], {3, 4}}, + {user_keys[3], {4, 5}}, + {user_keys[4], {0, 3}}, + }; + std::vector expected_locations = {2, 1, 3, 4, 0}; + std::vector keys; + for (auto& user_key : user_keys) { + keys.push_back(GetInternalKey(user_key, false)); + } + + unique_ptr writable_file; + fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock"; + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, + num_hash_fun, 100, BytewiseComparator(), 2, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -235,11 +318,11 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) { ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); - uint32_t expected_max_buckets = keys.size() / kHashTableRatio; + uint32_t expected_table_size = keys.size() / kHashTableRatio; std::string expected_unused_bucket = GetInternalKey("key00", true); expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(keys, values, expected_locations, - expected_unused_bucket, expected_max_buckets, 2, false); + expected_unused_bucket, expected_table_size, 2, false, 2); } TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { @@ -258,7 +341,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { fname = test::TmpDir() + "/NoCollisionUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); @@ -268,11 +351,11 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); - uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio; + uint32_t expected_table_size = user_keys.size() / kHashTableRatio; std::string expected_unused_bucket = "key00"; expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(user_keys, values, expected_locations, - expected_unused_bucket, expected_max_buckets, 2, true); + expected_unused_bucket, expected_table_size, 2, true); } TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { @@ -291,7 +374,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { fname = test::TmpDir() + "/WithCollisionUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); @@ -301,11 +384,11 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); - uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio; + uint32_t expected_table_size = user_keys.size() / kHashTableRatio; std::string expected_unused_bucket = "key00"; expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(user_keys, values, expected_locations, - expected_unused_bucket, expected_max_buckets, 4, true); + expected_unused_bucket, expected_table_size, 4, true); } TEST(CuckooBuilderTest, WithCollisionPathUserKey) { @@ -326,7 +409,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) { fname = test::TmpDir() + "/WithCollisionPathUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 2, BytewiseComparator(), GetSliceHash); + num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); @@ -336,11 +419,11 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) { ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); - uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio; + uint32_t expected_table_size = user_keys.size() / kHashTableRatio; std::string expected_unused_bucket = "key00"; expected_unused_bucket += std::string(values[0].size(), 'a'); CheckFileContents(user_keys, values, expected_locations, - expected_unused_bucket, expected_max_buckets, 2, true); + expected_unused_bucket, expected_table_size, 2, true); } TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) { @@ -362,7 +445,7 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) { fname = test::TmpDir() + "/WithCollisionPathUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 2, BytewiseComparator(), GetSliceHash); + num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value")); @@ -382,7 +465,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) { fname = test::TmpDir() + "/FailWhenSameKeyInserted"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash); ASSERT_OK(builder.status()); builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1")); diff --git a/table/cuckoo_table_factory.cc b/table/cuckoo_table_factory.cc index 71893702b..a967e5258 100644 --- a/table/cuckoo_table_factory.cc +++ b/table/cuckoo_table_factory.cc @@ -49,7 +49,7 @@ TableBuilder* CuckooTableFactory::NewTableBuilder( WritableFile* file, CompressionType compression_type) const { return new CuckooTableBuilder(file, hash_table_ratio_, kMaxNumHashTable, max_search_depth_, internal_comparator.user_comparator(), - GetSliceMurmurHash); + cuckoo_block_size_, GetSliceMurmurHash); } std::string CuckooTableFactory::GetPrintableTableOptions() const { @@ -68,8 +68,9 @@ std::string CuckooTableFactory::GetPrintableTableOptions() const { } TableFactory* NewCuckooTableFactory(double hash_table_ratio, - uint32_t max_search_depth) { - return new CuckooTableFactory(hash_table_ratio, max_search_depth); + uint32_t max_search_depth, uint32_t cuckoo_block_size) { + return new CuckooTableFactory( + hash_table_ratio, max_search_depth, cuckoo_block_size); } } // namespace rocksdb diff --git a/table/cuckoo_table_factory.h b/table/cuckoo_table_factory.h index 573d769e8..4e24bef4e 100644 --- a/table/cuckoo_table_factory.h +++ b/table/cuckoo_table_factory.h @@ -23,9 +23,11 @@ extern uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index, // - Does not support Merge operations. class CuckooTableFactory : public TableFactory { public: - CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth) + CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth, + uint32_t cuckoo_block_size) : hash_table_ratio_(hash_table_ratio), - max_search_depth_(max_search_depth) {} + max_search_depth_(max_search_depth), + cuckoo_block_size_(cuckoo_block_size) {} ~CuckooTableFactory() {} const char* Name() const override { return "CuckooTable"; } @@ -50,6 +52,7 @@ class CuckooTableFactory : public TableFactory { private: const double hash_table_ratio_; const uint32_t max_search_depth_; + const uint32_t cuckoo_block_size_; }; } // namespace rocksdb diff --git a/table/cuckoo_table_reader.cc b/table/cuckoo_table_reader.cc index 636db5bfa..39b91235d 100644 --- a/table/cuckoo_table_reader.cc +++ b/table/cuckoo_table_reader.cc @@ -21,6 +21,9 @@ #include "util/coding.h" namespace rocksdb { +namespace { + static const uint64_t CACHE_LINE_MASK = ~(CACHE_LINE_SIZE - 1); +} extern const uint64_t kCuckooTableMagicNumber; @@ -44,12 +47,12 @@ CuckooTableReader::CuckooTableReader( } table_props_.reset(props); auto& user_props = props->user_collected_properties; - auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashTable); + auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc); if (hash_funs == user_props.end()) { status_ = Status::InvalidArgument("Number of hash functions not found"); return; } - num_hash_fun_ = *reinterpret_cast(hash_funs->second.data()); + num_hash_func_ = *reinterpret_cast(hash_funs->second.data()); auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey); if (unused_key == user_props.end()) { status_ = Status::InvalidArgument("Empty bucket value not found"); @@ -67,18 +70,29 @@ CuckooTableReader::CuckooTableReader( value_length->second.data()); bucket_length_ = key_length_ + value_length_; - auto num_buckets = user_props.find(CuckooTablePropertyNames::kMaxNumBuckets); - if (num_buckets == user_props.end()) { - status_ = Status::InvalidArgument("Num buckets not found"); + auto hash_table_size = user_props.find( + CuckooTablePropertyNames::kHashTableSize); + if (hash_table_size == user_props.end()) { + status_ = Status::InvalidArgument("Hash table size not found"); return; } - num_buckets_ = *reinterpret_cast(num_buckets->second.data()); + hash_table_size_ = *reinterpret_cast( + hash_table_size->second.data()); auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel); if (is_last_level == user_props.end()) { status_ = Status::InvalidArgument("Is last level not found"); return; } is_last_level_ = *reinterpret_cast(is_last_level->second.data()); + auto cuckoo_block_size = user_props.find( + CuckooTablePropertyNames::kCuckooBlockSize); + if (cuckoo_block_size == user_props.end()) { + status_ = Status::InvalidArgument("Cuckoo block size not found"); + return; + } + cuckoo_block_size_ = *reinterpret_cast( + cuckoo_block_size->second.data()); + cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1; status_ = file_->Read(0, file_size, &file_data_, nullptr); } @@ -89,40 +103,45 @@ Status CuckooTableReader::Get( void (*mark_key_may_exist_handler)(void* handle_context)) { assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0)); Slice user_key = ExtractUserKey(key); - for (uint32_t hash_cnt = 0; hash_cnt < num_hash_fun_; ++hash_cnt) { - uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets_); - assert(hash_val < num_buckets_); - const char* bucket = &file_data_.data()[hash_val * bucket_length_]; - if (ucomp_->Compare(Slice(unused_key_.data(), user_key.size()), - Slice(bucket, user_key.size())) == 0) { - return Status::OK(); - } - // Here, we compare only the user key part as we support only one entry - // per user key and we don't support sanpshot. - if (ucomp_->Compare(user_key, Slice(bucket, user_key.size())) == 0) { - Slice value = Slice(&bucket[key_length_], value_length_); - if (is_last_level_) { - ParsedInternalKey found_ikey(Slice(bucket, key_length_), 0, kTypeValue); - result_handler(handle_context, found_ikey, value); - } else { - Slice full_key(bucket, key_length_); - ParsedInternalKey found_ikey; - ParseInternalKey(full_key, &found_ikey); - result_handler(handle_context, found_ikey, value); + for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { + uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, hash_table_size_); + assert(hash_val < hash_table_size_); + for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; + ++block_idx, ++hash_val) { + const char* bucket = &file_data_.data()[hash_val * bucket_length_]; + if (ucomp_->Compare(Slice(unused_key_.data(), user_key.size()), + Slice(bucket, user_key.size())) == 0) { + return Status::OK(); + } + // Here, we compare only the user key part as we support only one entry + // per user key and we don't support sanpshot. + if (ucomp_->Compare(user_key, Slice(bucket, user_key.size())) == 0) { + Slice value = Slice(&bucket[key_length_], value_length_); + if (is_last_level_) { + ParsedInternalKey found_ikey( + Slice(bucket, key_length_), 0, kTypeValue); + result_handler(handle_context, found_ikey, value); + } else { + Slice full_key(bucket, key_length_); + ParsedInternalKey found_ikey; + ParseInternalKey(full_key, &found_ikey); + result_handler(handle_context, found_ikey, value); + } + // We don't support merge operations. So, we return here. + return Status::OK(); } - // We don't support merge operations. So, we return here. - return Status::OK(); } } return Status::OK(); } void CuckooTableReader::Prepare(const Slice& key) { - Slice user_key = ExtractUserKey(key); - // Prefetching first location also helps improve Get performance. - for (uint32_t hash_cnt = 0; hash_cnt < num_hash_fun_; ++hash_cnt) { - uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets_); - PREFETCH(&file_data_.data()[hash_val * bucket_length_], 0, 3); + // Prefetch the first Cuckoo Block. + uint64_t addr = reinterpret_cast(file_data_.data()) + bucket_length_ + * get_slice_hash_(ExtractUserKey(key), 0, hash_table_size_); + uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_; + for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) { + PREFETCH(reinterpret_cast(addr), 0, 3); } } @@ -186,7 +205,9 @@ CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader) void CuckooTableIterator::LoadKeysFromReader() { key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries); - for (uint32_t bucket_id = 0; bucket_id < reader_->num_buckets_; bucket_id++) { + uint64_t num_buckets = reader_->hash_table_size_ + + reader_->cuckoo_block_size_ - 1; + for (uint32_t bucket_id = 0; bucket_id < num_buckets; bucket_id++) { Slice read_key; status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_, reader_->key_length_, &read_key, nullptr); diff --git a/table/cuckoo_table_reader.h b/table/cuckoo_table_reader.h index ad5d4ec47..bf31365db 100644 --- a/table/cuckoo_table_reader.h +++ b/table/cuckoo_table_reader.h @@ -65,12 +65,14 @@ class CuckooTableReader: public TableReader { bool is_last_level_; std::shared_ptr table_props_; Status status_; - uint32_t num_hash_fun_; + uint32_t num_hash_func_; std::string unused_key_; uint32_t key_length_; uint32_t value_length_; uint32_t bucket_length_; - uint64_t num_buckets_; + uint32_t cuckoo_block_size_; + uint32_t cuckoo_block_bytes_minus_one_; + uint64_t hash_table_size_; const Comparator* ucomp_; uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index, uint64_t max_num_buckets); diff --git a/table/cuckoo_table_reader_test.cc b/table/cuckoo_table_reader_test.cc index c026a2742..dc97bf8d2 100644 --- a/table/cuckoo_table_reader_test.cc +++ b/table/cuckoo_table_reader_test.cc @@ -109,7 +109,7 @@ class CuckooReaderTest { std::unique_ptr writable_file; ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); CuckooTableBuilder builder( - writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, GetSliceHash); + writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) { builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); @@ -420,7 +420,7 @@ void WriteFile(const std::vector& keys, ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); CuckooTableBuilder builder( writable_file.get(), hash_ratio, - kMaxNumHashTable, 1000, test::Uint64Comparator(), GetSliceMurmurHash); + kMaxNumHashTable, 1000, test::Uint64Comparator(), 5, GetSliceMurmurHash); ASSERT_OK(builder.status()); for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { // Value is just a part of key. @@ -446,7 +446,7 @@ void WriteFile(const std::vector& keys, int cnt = 0; ASSERT_OK(reader.Get(r_options, Slice(key), &cnt, CheckValue, nullptr)); if (cnt != 1) { - fprintf(stderr, "%" PRIx64 " not found.\n", + fprintf(stderr, "%" PRIu64 " not found.\n", *reinterpret_cast(key.data())); ASSERT_EQ(1, cnt); } @@ -473,7 +473,7 @@ void ReadKeys(const std::vector& keys, uint64_t num, const UserCollectedProperties user_props = reader.GetTableProperties()->user_collected_properties; const uint32_t num_hash_fun = *reinterpret_cast( - user_props.at(CuckooTablePropertyNames::kNumHashTable).data()); + user_props.at(CuckooTablePropertyNames::kNumHashFunc).data()); fprintf(stderr, "With %" PRIu64 " items and hash table ratio %f, number of" " hash functions used: %u.\n", num, hash_ratio, num_hash_fun); ReadOptions r_options;