]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd/cache/pwl/ssd: solve competition between read and retire
authorFeng Hualong <hualong.feng@intel.com>
Fri, 30 Jul 2021 12:24:05 +0000 (20:24 +0800)
committerDeepika Upadhyay <dupadhya@redhat.com>
Fri, 5 Nov 2021 09:22:02 +0000 (14:52 +0530)
SSD read is not like rwl's. SSD need aio read. Therefore,
we cannot guarantee that the data will not be retire
during the period from sending the read request to the SSD
and receiving the data to the memory, which may cause
the corresponding data on the SSD to be overwritten.

Fixes: https://tracker.ceph.com/issues/52236
Signed-off-by: Feng Hualong <hualong.feng@intel.com>
(cherry picked from commit dfdb452aa996af40f7b0ec684670ccf9a9b2d4c1)

src/librbd/cache/pwl/AbstractWriteLog.cc
src/librbd/cache/pwl/AbstractWriteLog.h
src/librbd/cache/pwl/LogEntry.cc
src/librbd/cache/pwl/LogEntry.h
src/librbd/cache/pwl/rwl/LogEntry.cc
src/librbd/cache/pwl/rwl/LogEntry.h
src/librbd/cache/pwl/rwl/WriteLog.cc
src/librbd/cache/pwl/rwl/WriteLog.h
src/librbd/cache/pwl/ssd/LogEntry.h
src/librbd/cache/pwl/ssd/WriteLog.cc
src/librbd/cache/pwl/ssd/WriteLog.h

index f78c4ca9cf72bc0848340c959943eca43cbb82c2..98869d8b23d62c19853ac1f16e08eb05bb7df508 100644 (file)
@@ -696,7 +696,7 @@ void AbstractWriteLog<I>::read(Extents&& image_extents,
   bl->clear();
   m_perfcounter->inc(l_librbd_pwl_rd_req, 1);
 
-  std::vector<WriteLogCacheEntry*> log_entries_to_read;
+  std::vector<std::shared_ptr<GenericWriteLogEntry>> log_entries_to_read;
   std::vector<bufferlist*> bls_to_read;
 
   m_async_op_tracker.start_op();
index e61f44967b82e217fe87d57791458792b310825a..e2ed571e9c80bdc562b6ce8ba2bf39f314405402 100644 (file)
@@ -366,11 +366,11 @@ protected:
                                pwl::DeferredContexts &later) = 0;
   virtual void collect_read_extents(
       uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
-      std::vector<WriteLogCacheEntry*> &log_entries_to_read,
+      std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
       std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
       Extent hit_extent, pwl::C_ReadRequest *read_ctx) = 0;
   virtual void complete_read(
-      std::vector<WriteLogCacheEntry*> &log_entries_to_read,
+      std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
       std::vector<bufferlist*> &bls_to_read, Context *ctx) = 0;
   virtual void write_data_to_buffer(
       std::shared_ptr<pwl::WriteLogEntry> ws_entry,
index 98224241b557e384e48da8e63ef4c507fec8786c..63873f748d9080ba91a51741bbcbcdf2b5a7ecbc 100644 (file)
@@ -88,14 +88,6 @@ void WriteLogEntry::init(bool has_data,
   ram_entry.discard = 0;
 }
 
-unsigned int WriteLogEntry::reader_count() const {
-  if (cache_bp.have_raw()) {
-    return (cache_bp.raw_nref() - bl_refs - 1);
-  } else {
-    return 0;
-  }
-}
-
 std::ostream& WriteLogEntry::format(std::ostream &os) const {
   os << "(Write) ";
   GenericWriteLogEntry::format(os);
index b29d7fb88bcb3439fd2da04f53d90ef19c3055b7..b0d27b5aefacc7f506cf08cbe43145d4dd79db37 100644 (file)
@@ -217,7 +217,7 @@ public:
   virtual buffer::list &get_cache_bl() = 0;
 
   BlockExtent block_extent();
-  unsigned int reader_count() const;
+  virtual unsigned int reader_count() const = 0;
   /* Constructs a new bl containing copies of cache_bp */
   void copy_cache_bl(bufferlist *out_bl) override {};
   bool can_retire() const override {
index 7325bef78a95892eb8839aad5668769133a8a1cc..056701fb514880d1902034eb72a30dc54d432e1e 100644 (file)
@@ -80,6 +80,14 @@ void WriteLogEntry::copy_cache_bl(bufferlist *out_bl) {
   this->init_bl(cloned_bp, *out_bl);
 }
 
+unsigned int WriteLogEntry::reader_count() const {
+  if (cache_bp.have_raw()) {
+    return (cache_bp.raw_nref() - bl_refs - 1);
+  } else {
+    return 0;
+  }
+}
+
 void WriteSameLogEntry::writeback(
     librbd::cache::ImageWritebackInterface &image_writeback, Context *ctx) {
   bufferlist entry_bl;
index 0eacb5aeec9ab6ce21411bcccc4985375e2bb791..a4675c5fbd824983357c6f153605206bf8c040ab 100644 (file)
@@ -39,6 +39,7 @@ public:
       std::vector<WriteBufferAllocation>::iterator allocation) override;
   buffer::list &get_cache_bl() override;
   void copy_cache_bl(bufferlist *out_bl) override;
+  unsigned int reader_count() const override;
 };
 
 class WriteSameLogEntry : public WriteLogEntry {
index 5e12e6f1ba5ef14b06d2c5ce46ba097bc9acc878..a76ced1ec3daca8c35730064def0f5b9ec650a80 100644 (file)
@@ -60,7 +60,7 @@ WriteLog<I>::~WriteLog() {
 template <typename I>
 void WriteLog<I>::collect_read_extents(
       uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
-      std::vector<WriteLogCacheEntry*> &log_entries_to_read,
+      std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
       std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
       Extent hit_extent, pwl::C_ReadRequest *read_ctx) {
   /* Make a bl for this hit extent. This will add references to the
@@ -82,7 +82,7 @@ void WriteLog<I>::collect_read_extents(
 
 template <typename I>
 void WriteLog<I>::complete_read(
-    std::vector<WriteLogCacheEntry*> &log_entries_to_read,
+    std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
     std::vector<bufferlist*> &bls_to_read, Context *ctx) {
   ctx->complete(0);
 }
index a72e668dc3ecabd52104d75bf68101755c830103..912368ee9a413d9db2ab02d06eb5262a17cda670 100644 (file)
@@ -88,11 +88,11 @@ protected:
                      bool &alloc_succeeds, bool &no_space) override;
   void collect_read_extents(
       uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
-      std::vector<WriteLogCacheEntry*> &log_entries_to_read,
+      std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
       std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
       Extent hit_extent, pwl::C_ReadRequest *read_ctx) override;
   void complete_read(
-      std::vector<WriteLogCacheEntry*> &log_entries_to_read,
+      std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
       std::vector<bufferlist*> &bls_to_read, Context *ctx) override;
   bool retire_entries(const unsigned long int frees_per_tx) override;
   void persist_last_flushed_sync_gen() override;
index 666398440a22155ffbe57dfc68d2a54ef0452095..86121473b82edb07225a9f71c87880bc8db4c3f9 100644 (file)
@@ -40,6 +40,11 @@ public:
   buffer::list &get_cache_bl() override;
   void remove_cache_bl() override;
   unsigned int get_aligned_data_size() const override;
+  void inc_bl_refs() { bl_refs++; };
+  void dec_bl_refs() { bl_refs--; };
+  unsigned int reader_count() const override {
+    return bl_refs;
+  }
 };
 
 class WriteSameLogEntry : public WriteLogEntry {
index 6aeb08ad18cefec7362342a873e1e42e13e96a0b..777b542d32f2c7f357fb8e94c10a17f6feaf678b 100644 (file)
@@ -69,32 +69,33 @@ WriteLog<I>::~WriteLog() {
 template <typename I>
 void WriteLog<I>::collect_read_extents(
     uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
-    std::vector<WriteLogCacheEntry*> &log_entries_to_read,
+    std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
     std::vector<bufferlist*> &bls_to_read,
     uint64_t entry_hit_length, Extent hit_extent,
     pwl::C_ReadRequest *read_ctx) {
-    // Make a bl for this hit extent. This will add references to the
-    // write_entry->cache_bl */
-    ldout(m_image_ctx.cct, 5) << dendl;
-    auto write_entry = static_pointer_cast<WriteLogEntry>(map_entry.log_entry);
-    buffer::list hit_bl;
-    hit_bl = write_entry->get_cache_bl();
-    bool writesame = write_entry->is_writesame_entry();
-    auto hit_extent_buf = std::make_shared<ImageExtentBuf>(
-        hit_extent, hit_bl, true, read_buffer_offset, writesame);
-    read_ctx->read_extents.push_back(hit_extent_buf);
-
-    if(!hit_bl.length()) {
-      ldout(m_image_ctx.cct, 5) << "didn't hit RAM" << dendl;
-      auto read_extent = read_ctx->read_extents.back();
-      log_entries_to_read.push_back(&write_entry->ram_entry);
-      bls_to_read.push_back(&read_extent->m_bl);
-    }
+  // Make a bl for this hit extent. This will add references to the
+  // write_entry->cache_bl */
+  ldout(m_image_ctx.cct, 5) << dendl;
+  auto write_entry = static_pointer_cast<WriteLogEntry>(map_entry.log_entry);
+  buffer::list hit_bl;
+  hit_bl = write_entry->get_cache_bl();
+  bool writesame = write_entry->is_writesame_entry();
+  auto hit_extent_buf = std::make_shared<ImageExtentBuf>(
+      hit_extent, hit_bl, true, read_buffer_offset, writesame);
+  read_ctx->read_extents.push_back(hit_extent_buf);
+
+  if (!hit_bl.length()) {
+    ldout(m_image_ctx.cct, 5) << "didn't hit RAM" << dendl;
+    auto read_extent = read_ctx->read_extents.back();
+    write_entry->inc_bl_refs();
+    log_entries_to_read.push_back(std::move(write_entry));
+    bls_to_read.push_back(&read_extent->m_bl);
+  }
 }
 
 template <typename I>
 void WriteLog<I>::complete_read(
-    std::vector<WriteLogCacheEntry*> &log_entries_to_read,
+    std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
     std::vector<bufferlist*> &bls_to_read,
     Context *ctx) {
   if (!log_entries_to_read.empty()) {
@@ -529,9 +530,11 @@ Context* WriteLog<I>::construct_flush_entry_ctx(
                                     std::move(captured_entry_bl));
           }), 0);
       });
-      ctx = new LambdaContext(
-        [this, log_entry, read_bl_ptr, ctx](int r) {
-          aio_read_data_block(&log_entry->ram_entry, read_bl_ptr, ctx);
+    ctx = new LambdaContext(
+      [this, log_entry, read_bl_ptr, ctx](int r) {
+        auto write_entry = static_pointer_cast<WriteLogEntry>(log_entry);
+        write_entry->inc_bl_refs();
+        aio_read_data_block(std::move(write_entry), read_bl_ptr, ctx);
       });
     return ctx;
   } else {
@@ -979,29 +982,32 @@ int WriteLog<I>::update_pool_root_sync(
 }
 
 template <typename I>
-void WriteLog<I>::aio_read_data_block(WriteLogCacheEntry *log_entry,
+void WriteLog<I>::aio_read_data_block(std::shared_ptr<GenericWriteLogEntry> log_entry,
                                       bufferlist *bl, Context *ctx) {
-  std::vector<WriteLogCacheEntry*> log_entries {log_entry};
+  std::vector<std::shared_ptr<GenericWriteLogEntry>> log_entries = {std::move(log_entry)};
   std::vector<bufferlist *> bls {bl};
   aio_read_data_blocks(log_entries, bls, ctx);
 }
 
 template <typename I>
 void WriteLog<I>::aio_read_data_blocks(
-    std::vector<WriteLogCacheEntry*> &log_entries,
+    std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries,
     std::vector<bufferlist *> &bls, Context *ctx) {
   ceph_assert(log_entries.size() == bls.size());
 
   //get the valid part
   Context *read_ctx = new LambdaContext(
-    [this, log_entries, bls, ctx](int r) {
+    [log_entries, bls, ctx](int r) {
       for (unsigned int i = 0; i < log_entries.size(); i++) {
         bufferlist valid_data_bl;
-        auto length = log_entries[i]->is_write() ? log_entries[i]->write_bytes :
-                                                   log_entries[i]->ws_datalen;
+        auto write_entry = static_pointer_cast<WriteLogEntry>(log_entries[i]);
+        auto length = write_entry->ram_entry.is_write() ? write_entry->ram_entry.write_bytes
+                                                        : write_entry->ram_entry.ws_datalen;
+
         valid_data_bl.substr_of(*bls[i], 0, length);
         bls[i]->clear();
         bls[i]->append(valid_data_bl);
+        write_entry->dec_bl_refs();
       }
      ctx->complete(r);
     });
@@ -1009,7 +1015,7 @@ void WriteLog<I>::aio_read_data_blocks(
   CephContext *cct = m_image_ctx.cct;
   AioTransContext *aio = new AioTransContext(cct, read_ctx);
   for (unsigned int i = 0; i < log_entries.size(); i++) {
-    auto log_entry = log_entries[i];
+    WriteLogCacheEntry *log_entry = &log_entries[i]->ram_entry;
 
     ceph_assert(log_entry->is_write() || log_entry->is_writesame());
     uint64_t len = log_entry->is_write() ? log_entry->write_bytes :
index 9c3a2bbd6a8dba535889aa87760dd22be42158e0..e64bd15efd8017e55123550df4e53716aa77d9c5 100644 (file)
@@ -109,11 +109,11 @@ private:
   void load_existing_entries(pwl::DeferredContexts &later);
   void collect_read_extents(
       uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
-      std::vector<WriteLogCacheEntry*> &log_entries_to_read,
+      std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
       std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
       Extent hit_extent, pwl::C_ReadRequest *read_ctx) override;
   void complete_read(
-      std::vector<WriteLogCacheEntry*> &log_entries_to_read,
+      std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
       std::vector<bufferlist*> &bls_to_read, Context *ctx) override;
   void enlist_op_appender();
   bool retire_entries(const unsigned long int frees_per_tx);
@@ -133,9 +133,9 @@ private:
   int update_pool_root_sync(std::shared_ptr<pwl::WriteLogPoolRoot> root);
   void update_pool_root(std::shared_ptr<WriteLogPoolRoot> root,
                                           AioTransContext *aio);
-  void aio_read_data_block(WriteLogCacheEntry *log_entry, bufferlist *bl,
-                           Context *ctx);
-  void aio_read_data_blocks(std::vector<WriteLogCacheEntry*> &log_entries,
+  void aio_read_data_block(std::shared_ptr<GenericWriteLogEntry> log_entry,
+                           bufferlist *bl, Context *ctx);
+  void aio_read_data_blocks(std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries,
                             std::vector<bufferlist *> &bls, Context *ctx);
   static void aio_cache_cb(void *priv, void *priv2) {
     AioTransContext *c = static_cast<AioTransContext*>(priv2);