From: Mark Nelson <mnelson@redhat.com>
Date: Mon, 25 Jun 2018 21:17:44 +0000 (-0500)
Subject: cache:Add support for an erasure pool.
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3c60382d1159ee14b301b807956b52438e55b2b2;p=rocksdb.git

cache:Add support for an erasure pool.

Signed-off-by: Mark Nelson <mnelson@redhat.com>
---

diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc
index 28b93800..bbf7d81f 100644
--- a/cache/lru_cache.cc
+++ b/cache/lru_cache.cc
@@ -107,7 +107,8 @@ LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
       high_pri_pool_ratio_(high_pri_pool_ratio),
       high_pri_pool_capacity_(0),
       usage_(0),
-      lru_usage_(0) {
+      lru_usage_(0),
+      erased_usage_(0) {
   // Make empty circular linked list
   lru_.next = &lru_;
   lru_.prev = &lru_;
@@ -193,19 +194,36 @@ void LRUCacheShard::LRU_Remove(LRUHandle* e) {
   if (e->InHighPriPool()) {
     assert(high_pri_pool_usage_ >= e->charge);
     high_pri_pool_usage_ -= e->charge;
+    e->SetInHighPriPool(false);
+  }
+  if (e->IsFlaggedForErasure()) {
+   assert(erased_usage_ >= e->charge);
+   erased_usage_ -= e->charge;
+   e->SetFlaggedForErasure(false);
   }
 }
 
 void LRUCacheShard::LRU_Insert(LRUHandle* e) {
   assert(e->next == nullptr);
   assert(e->prev == nullptr);
-  if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
+  if (e->IsErased()) {
+    // Insert "e" to the tail of the LRU list.
+    e->next = lru_.next;
+    e->prev = &lru_;
+    e->prev->next = e;
+    e->next->prev = e;
+    e->SetInHighPriPool(false);
+    e->SetFlaggedForErasure(true);
+    lru_low_pri_ = e;
+    erased_usage_ += e->charge;
+  } else if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
     // Inset "e" to head of LRU list.
     e->next = &lru_;
     e->prev = lru_.prev;
     e->prev->next = e;
     e->next->prev = e;
     e->SetInHighPriPool(true);
+    e->SetFlaggedForErasure(false);
     high_pri_pool_usage_ += e->charge;
     MaintainPoolSize();
   } else {
@@ -216,11 +234,36 @@ void LRUCacheShard::LRU_Insert(LRUHandle* e) {
     e->prev->next = e;
     e->next->prev = e;
     e->SetInHighPriPool(false);
+    e->SetFlaggedForErasure(false);
     lru_low_pri_ = e;
   }
   lru_usage_ += e->charge;
 }
 
+void LRUCacheShard::LRU_Demote(LRUHandle* e) {
+  assert(e->next != nullptr);
+  assert(e->prev != nullptr);
+  if (lru_low_pri_ == e) {
+    lru_low_pri_ = e->prev;
+  }
+  e->next->prev = e->prev;
+  e->prev->next = e->next;
+  e->next = lru_.next;
+  e->prev = &lru_;
+  e->next->prev = e;
+  e->prev->next = e;
+
+  if (e->InHighPriPool()) {
+    assert(high_pri_pool_usage_ >= e->charge);
+    high_pri_pool_usage_ -= e->charge;
+    e->SetInHighPriPool(false);
+  }
+  if (!e->IsErased()) {
+    erased_usage_ += e->charge;
+    e->SetErased(true);
+  }
+}
+
 void LRUCacheShard::MaintainPoolSize() {
   while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
     // Overflow last entry in high-pri pool to low-pri pool.
@@ -316,6 +359,7 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
     }
     if (e->refs == 1 && e->InCache()) {
       // The item is still in cache, and nobody else holds a reference to it
+
       if (usage_ > capacity_ || force_erase) {
         // the cache is full
         // The LRU list must be empty since the cache is full
@@ -337,6 +381,7 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
   if (last_reference) {
     e->Free();
   }
+
   return last_reference;
 }
 
@@ -429,9 +474,12 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
       if (last_reference) {
         usage_ -= e->charge;
       }
+
       if (last_reference && e->InCache()) {
         LRU_Remove(e);
       }
+
+      e->SetErased(true);
       e->SetInCache(false);
     }
   }
@@ -441,6 +489,7 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
   if (last_reference) {
     e->Free();
   }
+
 }
 
 size_t LRUCacheShard::GetUsage() const {
@@ -464,6 +513,11 @@ size_t LRUCacheShard::GetHighPriPoolUsage() const {
   return high_pri_pool_usage_;
 }
 
+size_t LRUCacheShard::GetErasedUsage() const {
+  MutexLock l(&mutex_);
+  return erased_usage_;
+}
+
 std::string LRUCacheShard::GetPrintableOptions() const {
   const int kBufferSize = 200;
   char buffer[kBufferSize];
@@ -560,6 +614,15 @@ void LRUCache::SetHighPriPoolRatio(double high_pri_pool_ratio) {
   }
 }
 
+size_t LRUCache::GetErasedUsage() const {
+  size_t size = 0;
+  for (int i = 0; i < num_shards_; i++) {
+    size += shards_[i].GetErasedUsage();
+  }
+  return size;
+}
+
+
 std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
   return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
                      cache_opts.strict_capacity_limit,
diff --git a/cache/lru_cache.h b/cache/lru_cache.h
index c80594a1..26ce9cb8 100644
--- a/cache/lru_cache.h
+++ b/cache/lru_cache.h
@@ -58,6 +58,8 @@ struct LRUHandle {
   //   in_cache:    whether this entry is referenced by the hash table.
   //   is_high_pri: whether this entry is high priority entry.
   //   in_high_pri_pool: whether this entry is in high-pri pool.
+  //   is_erased: whether this entry is erased (but may still have references!)
+  //   is_flagged_for_erasure: whether this entry is flagged for erasure
   char flags;
 
   uint32_t hash;     // Hash of key(); used for fast sharding and comparisons
@@ -78,6 +80,8 @@ struct LRUHandle {
   bool IsHighPri() { return flags & 2; }
   bool InHighPriPool() { return flags & 4; }
   bool HasHit() { return flags & 8; }
+  bool IsErased() { return flags & 16; }
+  bool IsFlaggedForErasure() { return flags & 32; }
 
   void SetInCache(bool in_cache) {
     if (in_cache) {
@@ -105,6 +109,22 @@ struct LRUHandle {
 
   void SetHit() { flags |= 8; }
 
+  void SetErased(bool erased) {
+    if (erased) {
+      flags |= 16;
+    } else {
+      flags &= ~16;
+    }
+  }
+
+  void SetFlaggedForErasure(bool erased) {
+    if (erased) {
+      flags |= 32;
+    } else {
+      flags &= ~32;
+    }
+  }
+
   void Free() {
     assert((refs == 1 && InCache()) || (refs == 0 && !InCache()));
     if (deleter) {
@@ -211,9 +231,12 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard {
   virtual size_t GetHighPriPoolUsage() const;
   virtual double GetHighPriPoolRatio() const;
 
+  virtual size_t GetErasedUsage() const;
+
  private:
   void LRU_Remove(LRUHandle* e);
   void LRU_Insert(LRUHandle* e);
+  void LRU_Demote(LRUHandle* e);
 
   // Overflow the last entry in high-pri pool to low-pri pool until size of
   // high-pri pool is no larger than the size specify by high_pri_pool_pct.
@@ -275,6 +298,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard {
   // Memory size for entries residing only in the LRU list
   size_t lru_usage_;
 
+  // Memory size for entries that will be erased
+  size_t erased_usage_;
+
   // mutex_ protects the following state.
   // We don't count mutex_ as the cache's internal state so semantically we
   // don't mind mutex_ invoking the non-const actions.
@@ -297,6 +323,8 @@ class LRUCache : public ShardedCache {
   virtual size_t GetHighPriPoolUsage() const override;
   virtual double GetHighPriPoolRatio() const override;
   virtual void SetHighPriPoolRatio(double high_pri_pool_ratio) override;
+
+  virtual size_t GetErasedUsage() const override;
   // Retrieves number of elements in LRU, for unit test purpose only
   size_t TEST_GetLRUSize();
 
diff --git a/cache/sharded_cache.h b/cache/sharded_cache.h
index 54d0c377..fb3fe9c6 100644
--- a/cache/sharded_cache.h
+++ b/cache/sharded_cache.h
@@ -74,6 +74,7 @@ class ShardedCache : public Cache {
   virtual size_t GetUsage() const override;
   virtual size_t GetUsage(Handle* handle) const override;
   virtual size_t GetPinnedUsage() const override;
+  virtual size_t GetErasedUsage() const override = 0;
   virtual size_t GetHighPriPoolUsage() const override = 0;
   virtual double GetHighPriPoolRatio() const override = 0;
 
diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h
index 46661c79..a44b174b 100644
--- a/include/rocksdb/cache.h
+++ b/include/rocksdb/cache.h
@@ -220,6 +220,11 @@ class Cache {
     return 0;
   }
 
+  // returns the memory size for the entries that are to be erased.
+  virtual size_t GetErasedUsage() const {
+   return 0;
+  }
+
   // returns the ratio of memory usaged by the high priority pool
   virtual double GetHighPriPoolRatio() const {
     // default implementation returns 0