From: Mark Nelson Date: Mon, 25 Jun 2018 21:17:44 +0000 (-0500) Subject: cache:Add support for an erasure pool. X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3c60382d1159ee14b301b807956b52438e55b2b2;p=rocksdb.git cache:Add support for an erasure pool. Signed-off-by: Mark Nelson --- diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index 28b93800..bbf7d81f 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -107,7 +107,8 @@ LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit, high_pri_pool_ratio_(high_pri_pool_ratio), high_pri_pool_capacity_(0), usage_(0), - lru_usage_(0) { + lru_usage_(0), + erased_usage_(0) { // Make empty circular linked list lru_.next = &lru_; lru_.prev = &lru_; @@ -193,19 +194,36 @@ void LRUCacheShard::LRU_Remove(LRUHandle* e) { if (e->InHighPriPool()) { assert(high_pri_pool_usage_ >= e->charge); high_pri_pool_usage_ -= e->charge; + e->SetInHighPriPool(false); + } + if (e->IsFlaggedForErasure()) { + assert(erased_usage_ >= e->charge); + erased_usage_ -= e->charge; + e->SetFlaggedForErasure(false); } } void LRUCacheShard::LRU_Insert(LRUHandle* e) { assert(e->next == nullptr); assert(e->prev == nullptr); - if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) { + if (e->IsErased()) { + // Insert "e" to the tail of the LRU list. + e->next = lru_.next; + e->prev = &lru_; + e->prev->next = e; + e->next->prev = e; + e->SetInHighPriPool(false); + e->SetFlaggedForErasure(true); + lru_low_pri_ = e; + erased_usage_ += e->charge; + } else if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) { // Inset "e" to head of LRU list. e->next = &lru_; e->prev = lru_.prev; e->prev->next = e; e->next->prev = e; e->SetInHighPriPool(true); + e->SetFlaggedForErasure(false); high_pri_pool_usage_ += e->charge; MaintainPoolSize(); } else { @@ -216,11 +234,36 @@ void LRUCacheShard::LRU_Insert(LRUHandle* e) { e->prev->next = e; e->next->prev = e; e->SetInHighPriPool(false); + e->SetFlaggedForErasure(false); lru_low_pri_ = e; } lru_usage_ += e->charge; } +void LRUCacheShard::LRU_Demote(LRUHandle* e) { + assert(e->next != nullptr); + assert(e->prev != nullptr); + if (lru_low_pri_ == e) { + lru_low_pri_ = e->prev; + } + e->next->prev = e->prev; + e->prev->next = e->next; + e->next = lru_.next; + e->prev = &lru_; + e->next->prev = e; + e->prev->next = e; + + if (e->InHighPriPool()) { + assert(high_pri_pool_usage_ >= e->charge); + high_pri_pool_usage_ -= e->charge; + e->SetInHighPriPool(false); + } + if (!e->IsErased()) { + erased_usage_ += e->charge; + e->SetErased(true); + } +} + void LRUCacheShard::MaintainPoolSize() { while (high_pri_pool_usage_ > high_pri_pool_capacity_) { // Overflow last entry in high-pri pool to low-pri pool. @@ -316,6 +359,7 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) { } if (e->refs == 1 && e->InCache()) { // The item is still in cache, and nobody else holds a reference to it + if (usage_ > capacity_ || force_erase) { // the cache is full // The LRU list must be empty since the cache is full @@ -337,6 +381,7 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) { if (last_reference) { e->Free(); } + return last_reference; } @@ -429,9 +474,12 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) { if (last_reference) { usage_ -= e->charge; } + if (last_reference && e->InCache()) { LRU_Remove(e); } + + e->SetErased(true); e->SetInCache(false); } } @@ -441,6 +489,7 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) { if (last_reference) { e->Free(); } + } size_t LRUCacheShard::GetUsage() const { @@ -464,6 +513,11 @@ size_t LRUCacheShard::GetHighPriPoolUsage() const { return high_pri_pool_usage_; } +size_t LRUCacheShard::GetErasedUsage() const { + MutexLock l(&mutex_); + return erased_usage_; +} + std::string LRUCacheShard::GetPrintableOptions() const { const int kBufferSize = 200; char buffer[kBufferSize]; @@ -560,6 +614,15 @@ void LRUCache::SetHighPriPoolRatio(double high_pri_pool_ratio) { } } +size_t LRUCache::GetErasedUsage() const { + size_t size = 0; + for (int i = 0; i < num_shards_; i++) { + size += shards_[i].GetErasedUsage(); + } + return size; +} + + std::shared_ptr NewLRUCache(const LRUCacheOptions& cache_opts) { return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits, cache_opts.strict_capacity_limit, diff --git a/cache/lru_cache.h b/cache/lru_cache.h index c80594a1..26ce9cb8 100644 --- a/cache/lru_cache.h +++ b/cache/lru_cache.h @@ -58,6 +58,8 @@ struct LRUHandle { // in_cache: whether this entry is referenced by the hash table. // is_high_pri: whether this entry is high priority entry. // in_high_pri_pool: whether this entry is in high-pri pool. + // is_erased: whether this entry is erased (but may still have references!) + // is_flagged_for_erasure: whether this entry is flagged for erasure char flags; uint32_t hash; // Hash of key(); used for fast sharding and comparisons @@ -78,6 +80,8 @@ struct LRUHandle { bool IsHighPri() { return flags & 2; } bool InHighPriPool() { return flags & 4; } bool HasHit() { return flags & 8; } + bool IsErased() { return flags & 16; } + bool IsFlaggedForErasure() { return flags & 32; } void SetInCache(bool in_cache) { if (in_cache) { @@ -105,6 +109,22 @@ struct LRUHandle { void SetHit() { flags |= 8; } + void SetErased(bool erased) { + if (erased) { + flags |= 16; + } else { + flags &= ~16; + } + } + + void SetFlaggedForErasure(bool erased) { + if (erased) { + flags |= 32; + } else { + flags &= ~32; + } + } + void Free() { assert((refs == 1 && InCache()) || (refs == 0 && !InCache())); if (deleter) { @@ -211,9 +231,12 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard { virtual size_t GetHighPriPoolUsage() const; virtual double GetHighPriPoolRatio() const; + virtual size_t GetErasedUsage() const; + private: void LRU_Remove(LRUHandle* e); void LRU_Insert(LRUHandle* e); + void LRU_Demote(LRUHandle* e); // Overflow the last entry in high-pri pool to low-pri pool until size of // high-pri pool is no larger than the size specify by high_pri_pool_pct. @@ -275,6 +298,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard { // Memory size for entries residing only in the LRU list size_t lru_usage_; + // Memory size for entries that will be erased + size_t erased_usage_; + // mutex_ protects the following state. // We don't count mutex_ as the cache's internal state so semantically we // don't mind mutex_ invoking the non-const actions. @@ -297,6 +323,8 @@ class LRUCache : public ShardedCache { virtual size_t GetHighPriPoolUsage() const override; virtual double GetHighPriPoolRatio() const override; virtual void SetHighPriPoolRatio(double high_pri_pool_ratio) override; + + virtual size_t GetErasedUsage() const override; // Retrieves number of elements in LRU, for unit test purpose only size_t TEST_GetLRUSize(); diff --git a/cache/sharded_cache.h b/cache/sharded_cache.h index 54d0c377..fb3fe9c6 100644 --- a/cache/sharded_cache.h +++ b/cache/sharded_cache.h @@ -74,6 +74,7 @@ class ShardedCache : public Cache { virtual size_t GetUsage() const override; virtual size_t GetUsage(Handle* handle) const override; virtual size_t GetPinnedUsage() const override; + virtual size_t GetErasedUsage() const override = 0; virtual size_t GetHighPriPoolUsage() const override = 0; virtual double GetHighPriPoolRatio() const override = 0; diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 46661c79..a44b174b 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -220,6 +220,11 @@ class Cache { return 0; } + // returns the memory size for the entries that are to be erased. + virtual size_t GetErasedUsage() const { + return 0; + } + // returns the ratio of memory usaged by the high priority pool virtual double GetHighPriPoolRatio() const { // default implementation returns 0