From: Guido Tagliavini Ponce Date: Tue, 14 Jun 2022 03:29:00 +0000 (-0700) Subject: Make the per-shard hash table fixed-size. (#10154) X-Git-Tag: v7.4.3~39 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f105e1a501c730168fcbdcc5d7a8722b0b997890;p=rocksdb.git Make the per-shard hash table fixed-size. (#10154) Summary: We make the size of the per-shard hash table fixed. The base level of the hash table is now preallocated with the required capacity. The user must provide an estimate of the size of the values. Notice that even though the base level becomes fixed, the chains are still dynamic. Overall, the shard capacity mechanisms haven't changed, so we don't need to test this. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10154 Test Plan: `make -j24 check` Reviewed By: pdillinger Differential Revision: D37124451 Pulled By: guidotag fbshipit-source-id: cba6ac76052fe0ec60b8ff4211b3de7650e80d0c --- diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index 4f739be2a..504f8f77f 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -287,7 +287,9 @@ class CacheBench { exit(1); } } else if (FLAGS_cache_type == "fast_lru_cache") { - cache_ = NewFastLRUCache(FLAGS_cache_size, FLAGS_num_shard_bits); + cache_ = NewFastLRUCache( + FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits, + false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy); } else if (FLAGS_cache_type == "lru_cache") { LRUCacheOptions opts(FLAGS_cache_size, FLAGS_num_shard_bits, false, 0.5); #ifndef ROCKSDB_LITE diff --git a/cache/cache_test.cc b/cache/cache_test.cc index d7b50c87a..bf24c859e 100644 --- a/cache/cache_test.cc +++ b/cache/cache_test.cc @@ -114,7 +114,9 @@ class CacheTest : public testing::TestWithParam { return NewClockCache(capacity); } if (type == kFast) { - return NewFastLRUCache(capacity); + return NewFastLRUCache( + capacity, 1 /*estimated_value_size*/, -1 /*num_shard_bits*/, + false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy); } return nullptr; } @@ -137,7 +139,8 @@ class CacheTest : public testing::TestWithParam { charge_policy); } if (type == kFast) { - return NewFastLRUCache(capacity, num_shard_bits, strict_capacity_limit, + return NewFastLRUCache(capacity, 1 /*estimated_value_size*/, + num_shard_bits, strict_capacity_limit, charge_policy); } return nullptr; diff --git a/cache/fast_lru_cache.cc b/cache/fast_lru_cache.cc index 31bdbfffd..ba4c0e1e1 100644 --- a/cache/fast_lru_cache.cc +++ b/cache/fast_lru_cache.cc @@ -18,15 +18,17 @@ #include "port/lang.h" #include "util/mutexlock.h" +#define KEY_LENGTH \ + 16 // TODO(guido) Make use of this symbol in other parts of the source code + // (e.g., cache_key.h, cache_test.cc, etc.) + namespace ROCKSDB_NAMESPACE { namespace fast_lru_cache { -LRUHandleTable::LRUHandleTable(int max_upper_hash_bits) - : length_bits_(/* historical starting size*/ 4), - list_(new LRUHandle* [size_t{1} << length_bits_] {}), - elems_(0), - max_length_bits_(max_upper_hash_bits) {} +LRUHandleTable::LRUHandleTable(int hash_bits) + : length_bits_(hash_bits), + list_(new LRUHandle* [size_t{1} << length_bits_] {}) {} LRUHandleTable::~LRUHandleTable() { ApplyToEntriesRange( @@ -42,19 +44,15 @@ LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) { return *FindPointer(key, hash); } +inline LRUHandle** LRUHandleTable::Head(uint32_t hash) { + return &list_[hash >> (32 - length_bits_)]; +} + LRUHandle* LRUHandleTable::Insert(LRUHandle* h) { LRUHandle** ptr = FindPointer(h->key(), h->hash); LRUHandle* old = *ptr; h->next_hash = (old == nullptr ? nullptr : old->next_hash); *ptr = h; - if (old == nullptr) { - ++elems_; - if ((elems_ >> length_bits_) > 0) { // elems_ >= length - // Since each cache entry is fairly large, we aim for a small - // average linked list length (<= 1). - Resize(); - } - } return old; } @@ -63,7 +61,6 @@ LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) { LRUHandle* result = *ptr; if (result != nullptr) { *ptr = result->next_hash; - --elems_; } return result; } @@ -76,46 +73,13 @@ LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) { return ptr; } -void LRUHandleTable::Resize() { - if (length_bits_ >= max_length_bits_) { - // Due to reaching limit of hash information, if we made the table bigger, - // we would allocate more addresses but only the same number would be used. - return; - } - if (length_bits_ >= 31) { - // Avoid undefined behavior shifting uint32_t by 32. - return; - } - - uint32_t old_length = uint32_t{1} << length_bits_; - int new_length_bits = length_bits_ + 1; - std::unique_ptr new_list { - new LRUHandle* [size_t{1} << new_length_bits] {} - }; - uint32_t count = 0; - for (uint32_t i = 0; i < old_length; i++) { - LRUHandle* h = list_[i]; - while (h != nullptr) { - LRUHandle* next = h->next_hash; - uint32_t hash = h->hash; - LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)]; - h->next_hash = *ptr; - *ptr = h; - h = next; - count++; - } - } - assert(elems_ == count); - list_ = std::move(new_list); - length_bits_ = new_length_bits; -} - -LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit, - CacheMetadataChargePolicy metadata_charge_policy, - int max_upper_hash_bits) +LRUCacheShard::LRUCacheShard(size_t capacity, size_t estimated_value_size, + bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy) : capacity_(0), strict_capacity_limit_(strict_capacity_limit), - table_(max_upper_hash_bits), + table_( + GetHashBits(capacity, estimated_value_size, metadata_charge_policy)), usage_(0), lru_usage_(0) { set_metadata_charge_policy(metadata_charge_policy); @@ -220,6 +184,27 @@ void LRUCacheShard::EvictFromLRU(size_t charge, } } +int LRUCacheShard::GetHashBits( + size_t capacity, size_t estimated_value_size, + CacheMetadataChargePolicy metadata_charge_policy) { + LRUHandle* e = reinterpret_cast( + new char[sizeof(LRUHandle) - 1 + KEY_LENGTH]); + e->key_length = KEY_LENGTH; + e->deleter = nullptr; + e->refs = 0; + e->flags = 0; + e->refs = 0; + + e->CalcTotalCharge(estimated_value_size, metadata_charge_policy); + size_t num_entries = capacity / e->total_charge; + e->Free(); + int num_hash_bits = 0; + while (num_entries >>= 1) { + ++num_hash_bits; + } + return num_hash_bits; +} + void LRUCacheShard::SetCapacity(size_t capacity) { autovector last_reference_list; { @@ -368,8 +353,9 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value, size_t charge, Cache::DeleterFn deleter, Cache::Handle** handle, Cache::Priority /*priority*/) { - if (key.size() != 16) { - return Status::NotSupported("FastLRUCache only supports key size 16B."); + if (key.size() != KEY_LENGTH) { + return Status::NotSupported("FastLRUCache only supports key size " + + std::to_string(KEY_LENGTH) + "B"); } // Allocate the memory here outside of the mutex. @@ -431,8 +417,8 @@ size_t LRUCacheShard::GetPinnedUsage() const { std::string LRUCacheShard::GetPrintableOptions() const { return std::string{}; } -LRUCache::LRUCache(size_t capacity, int num_shard_bits, - bool strict_capacity_limit, +LRUCache::LRUCache(size_t capacity, size_t estimated_value_size, + int num_shard_bits, bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy) : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) { num_shards_ = 1 << num_shard_bits; @@ -441,8 +427,8 @@ LRUCache::LRUCache(size_t capacity, int num_shard_bits, size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_; for (int i = 0; i < num_shards_; i++) { new (&shards_[i]) - LRUCacheShard(per_shard, strict_capacity_limit, metadata_charge_policy, - /* max_upper_hash_bits */ 32 - num_shard_bits); + LRUCacheShard(per_shard, estimated_value_size, strict_capacity_limit, + metadata_charge_policy); } } @@ -497,7 +483,8 @@ void LRUCache::DisownData() { } // namespace fast_lru_cache std::shared_ptr NewFastLRUCache( - size_t capacity, int num_shard_bits, bool strict_capacity_limit, + size_t capacity, size_t estimated_value_size, int num_shard_bits, + bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy) { if (num_shard_bits >= 20) { return nullptr; // The cache cannot be sharded into too many fine pieces. @@ -506,7 +493,8 @@ std::shared_ptr NewFastLRUCache( num_shard_bits = GetDefaultCacheShardBits(capacity); } return std::make_shared( - capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy); + capacity, estimated_value_size, num_shard_bits, strict_capacity_limit, + metadata_charge_policy); } } // namespace ROCKSDB_NAMESPACE diff --git a/cache/fast_lru_cache.h b/cache/fast_lru_cache.h index 9d3989ac7..00e2de9c7 100644 --- a/cache/fast_lru_cache.h +++ b/cache/fast_lru_cache.h @@ -114,10 +114,7 @@ struct LRUHandle { // 4.4.3's builtin hashtable. class LRUHandleTable { public: - // If the table uses more hash bits than `max_upper_hash_bits`, - // it will eat into the bits used for sharding, which are constant - // for a given LRUHandleTable. - explicit LRUHandleTable(int max_upper_hash_bits); + explicit LRUHandleTable(int hash_bits); ~LRUHandleTable(); LRUHandle* Lookup(const Slice& key, uint32_t hash); @@ -139,14 +136,16 @@ class LRUHandleTable { int GetLengthBits() const { return length_bits_; } + // Return the address of the head of the chain in the bucket given + // by the hash. + inline LRUHandle** Head(uint32_t hash); + private: // Return a pointer to slot that points to a cache entry that // matches key/hash. If there is no such cache entry, return a // pointer to the trailing slot in the corresponding linked list. LRUHandle** FindPointer(const Slice& key, uint32_t hash); - void Resize(); - // Number of hash bits (upper because lower bits used for sharding) // used for table index. Length == 1 << length_bits_ int length_bits_; @@ -154,20 +153,14 @@ class LRUHandleTable { // The table consists of an array of buckets where each bucket is // a linked list of cache entries that hash into the bucket. std::unique_ptr list_; - - // Number of elements currently in the table. - uint32_t elems_; - - // Set from max_upper_hash_bits (see constructor). - const int max_length_bits_; }; // A single shard of sharded cache. class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard { public: - LRUCacheShard(size_t capacity, bool strict_capacity_limit, - CacheMetadataChargePolicy metadata_charge_policy, - int max_upper_hash_bits); + LRUCacheShard(size_t capacity, size_t estimated_value_size, + bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy); ~LRUCacheShard() override = default; // Separate from constructor so caller can easily make an array of LRUCache @@ -239,6 +232,11 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard { // holding the mutex_. void EvictFromLRU(size_t charge, autovector* deleted); + // Returns the number of bits used to hash an element in the per-shard + // table. + static int GetHashBits(size_t capacity, size_t estimated_value_size, + CacheMetadataChargePolicy metadata_charge_policy); + // Initialized before use. size_t capacity_; @@ -284,7 +282,8 @@ class LRUCache #endif : public ShardedCache { public: - LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, + LRUCache(size_t capacity, size_t estimated_value_size, int num_shard_bits, + bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy = kDontChargeCacheMetadata); ~LRUCache() override; @@ -304,9 +303,8 @@ class LRUCache } // namespace fast_lru_cache std::shared_ptr NewFastLRUCache( - size_t capacity, int num_shard_bits = -1, - bool strict_capacity_limit = false, - CacheMetadataChargePolicy metadata_charge_policy = - kDefaultCacheMetadataChargePolicy); + size_t capacity, size_t estimated_value_size, int num_shard_bits, + bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy); } // namespace ROCKSDB_NAMESPACE diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index 953ceac7a..8af05c5d7 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -226,8 +226,8 @@ class FastLRUCacheTest : public testing::Test { cache_ = reinterpret_cast( port::cacheline_aligned_alloc(sizeof(fast_lru_cache::LRUCacheShard))); new (cache_) fast_lru_cache::LRUCacheShard( - capacity, false /*strict_capcity_limit*/, kDontChargeCacheMetadata, - 24 /*max_upper_hash_bits*/); + capacity, 1 /*estimated_value_size*/, false /*strict_capacity_limit*/, + kDontChargeCacheMetadata); } Status Insert(const std::string& key) { diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 3dcdb505e..9ab2d3db6 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -936,7 +936,9 @@ TEST_F(DBBlockCacheTest, AddRedundantStats) { for (std::shared_ptr base_cache : {NewLRUCache(capacity, num_shard_bits), NewClockCache(capacity, num_shard_bits), - NewFastLRUCache(capacity, num_shard_bits)}) { + NewFastLRUCache(capacity, 1 /*estimated_value_size*/, num_shard_bits, + false /*strict_capacity_limit*/, + kDefaultCacheMetadataChargePolicy)}) { if (!base_cache) { // Skip clock cache when not supported continue; diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index dc7c71c34..627dd3164 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -130,7 +130,9 @@ std::shared_ptr StressTest::NewCache(size_t capacity, } return cache; } else if (FLAGS_cache_type == "fast_lru_cache") { - return NewFastLRUCache((size_t)capacity, num_shard_bits); + return NewFastLRUCache(static_cast(capacity), FLAGS_block_size, + num_shard_bits, false /*strict_capacity_limit*/, + kDefaultCacheMetadataChargePolicy); } else if (FLAGS_cache_type == "lru_cache") { LRUCacheOptions opts; opts.capacity = capacity; diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 377460258..750333966 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -2945,8 +2945,10 @@ class Benchmark { } return cache; } else if (FLAGS_cache_type == "fast_lru_cache") { - return NewFastLRUCache(static_cast(capacity), - FLAGS_cache_numshardbits); + return NewFastLRUCache(static_cast(capacity), FLAGS_block_size, + FLAGS_cache_numshardbits, + false /*strict_capacity_limit*/, + kDefaultCacheMetadataChargePolicy); } else if (FLAGS_cache_type == "lru_cache") { LRUCacheOptions opts( static_cast(capacity), FLAGS_cache_numshardbits,