From: Feng Hualong Date: Fri, 30 Jul 2021 12:24:05 +0000 (+0800) Subject: librbd/cache/pwl/ssd: solve competition between read and retire X-Git-Tag: v17.1.0~1114^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=dfdb452aa996af40f7b0ec684670ccf9a9b2d4c1;p=ceph.git librbd/cache/pwl/ssd: solve competition between read and retire SSD read is not like rwl's. SSD need aio read. Therefore, we cannot guarantee that the data will not be retire during the period from sending the read request to the SSD and receiving the data to the memory, which may cause the corresponding data on the SSD to be overwritten. Fixes: https://tracker.ceph.com/issues/52236 Signed-off-by: Feng Hualong --- diff --git a/src/librbd/cache/pwl/AbstractWriteLog.cc b/src/librbd/cache/pwl/AbstractWriteLog.cc index 20a03e6b350f..b7341edd08f8 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.cc +++ b/src/librbd/cache/pwl/AbstractWriteLog.cc @@ -691,7 +691,7 @@ void AbstractWriteLog::read(Extents&& image_extents, bl->clear(); m_perfcounter->inc(l_librbd_pwl_rd_req, 1); - std::vector log_entries_to_read; + std::vector> log_entries_to_read; std::vector bls_to_read; m_async_op_tracker.start_op(); diff --git a/src/librbd/cache/pwl/AbstractWriteLog.h b/src/librbd/cache/pwl/AbstractWriteLog.h index 2ca831f1f99c..a0a20cd69457 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.h +++ b/src/librbd/cache/pwl/AbstractWriteLog.h @@ -366,11 +366,11 @@ protected: pwl::DeferredContexts &later) = 0; virtual void collect_read_extents( uint64_t read_buffer_offset, LogMapEntry map_entry, - std::vector &log_entries_to_read, + std::vector> &log_entries_to_read, std::vector &bls_to_read, uint64_t entry_hit_length, Extent hit_extent, pwl::C_ReadRequest *read_ctx) = 0; virtual void complete_read( - std::vector &log_entries_to_read, + std::vector> &log_entries_to_read, std::vector &bls_to_read, Context *ctx) = 0; virtual void write_data_to_buffer( std::shared_ptr ws_entry, diff --git a/src/librbd/cache/pwl/LogEntry.cc b/src/librbd/cache/pwl/LogEntry.cc index 98224241b557..63873f748d90 100644 --- a/src/librbd/cache/pwl/LogEntry.cc +++ b/src/librbd/cache/pwl/LogEntry.cc @@ -88,14 +88,6 @@ void WriteLogEntry::init(bool has_data, ram_entry.discard = 0; } -unsigned int WriteLogEntry::reader_count() const { - if (cache_bp.have_raw()) { - return (cache_bp.raw_nref() - bl_refs - 1); - } else { - return 0; - } -} - std::ostream& WriteLogEntry::format(std::ostream &os) const { os << "(Write) "; GenericWriteLogEntry::format(os); diff --git a/src/librbd/cache/pwl/LogEntry.h b/src/librbd/cache/pwl/LogEntry.h index b29d7fb88bcb..b0d27b5aefac 100644 --- a/src/librbd/cache/pwl/LogEntry.h +++ b/src/librbd/cache/pwl/LogEntry.h @@ -217,7 +217,7 @@ public: virtual buffer::list &get_cache_bl() = 0; BlockExtent block_extent(); - unsigned int reader_count() const; + virtual unsigned int reader_count() const = 0; /* Constructs a new bl containing copies of cache_bp */ void copy_cache_bl(bufferlist *out_bl) override {}; bool can_retire() const override { diff --git a/src/librbd/cache/pwl/rwl/LogEntry.cc b/src/librbd/cache/pwl/rwl/LogEntry.cc index 7325bef78a95..056701fb5148 100644 --- a/src/librbd/cache/pwl/rwl/LogEntry.cc +++ b/src/librbd/cache/pwl/rwl/LogEntry.cc @@ -80,6 +80,14 @@ void WriteLogEntry::copy_cache_bl(bufferlist *out_bl) { this->init_bl(cloned_bp, *out_bl); } +unsigned int WriteLogEntry::reader_count() const { + if (cache_bp.have_raw()) { + return (cache_bp.raw_nref() - bl_refs - 1); + } else { + return 0; + } +} + void WriteSameLogEntry::writeback( librbd::cache::ImageWritebackInterface &image_writeback, Context *ctx) { bufferlist entry_bl; diff --git a/src/librbd/cache/pwl/rwl/LogEntry.h b/src/librbd/cache/pwl/rwl/LogEntry.h index 0eacb5aeec9a..a4675c5fbd82 100644 --- a/src/librbd/cache/pwl/rwl/LogEntry.h +++ b/src/librbd/cache/pwl/rwl/LogEntry.h @@ -39,6 +39,7 @@ public: std::vector::iterator allocation) override; buffer::list &get_cache_bl() override; void copy_cache_bl(bufferlist *out_bl) override; + unsigned int reader_count() const override; }; class WriteSameLogEntry : public WriteLogEntry { diff --git a/src/librbd/cache/pwl/rwl/WriteLog.cc b/src/librbd/cache/pwl/rwl/WriteLog.cc index ef0735ba65b9..d4bbe08b01e6 100644 --- a/src/librbd/cache/pwl/rwl/WriteLog.cc +++ b/src/librbd/cache/pwl/rwl/WriteLog.cc @@ -60,7 +60,7 @@ WriteLog::~WriteLog() { template void WriteLog::collect_read_extents( uint64_t read_buffer_offset, LogMapEntry map_entry, - std::vector &log_entries_to_read, + std::vector> &log_entries_to_read, std::vector &bls_to_read, uint64_t entry_hit_length, Extent hit_extent, pwl::C_ReadRequest *read_ctx) { /* Make a bl for this hit extent. This will add references to the @@ -82,7 +82,7 @@ void WriteLog::collect_read_extents( template void WriteLog::complete_read( - std::vector &log_entries_to_read, + std::vector> &log_entries_to_read, std::vector &bls_to_read, Context *ctx) { ctx->complete(0); } diff --git a/src/librbd/cache/pwl/rwl/WriteLog.h b/src/librbd/cache/pwl/rwl/WriteLog.h index 92989ac21a5d..a29047e8e375 100644 --- a/src/librbd/cache/pwl/rwl/WriteLog.h +++ b/src/librbd/cache/pwl/rwl/WriteLog.h @@ -88,11 +88,11 @@ protected: bool &alloc_succeeds, bool &no_space) override; void collect_read_extents( uint64_t read_buffer_offset, LogMapEntry map_entry, - std::vector &log_entries_to_read, + std::vector> &log_entries_to_read, std::vector &bls_to_read, uint64_t entry_hit_length, Extent hit_extent, pwl::C_ReadRequest *read_ctx) override; void complete_read( - std::vector &log_entries_to_read, + std::vector> &log_entries_to_read, std::vector &bls_to_read, Context *ctx) override; bool retire_entries(const unsigned long int frees_per_tx) override; void persist_last_flushed_sync_gen() override; diff --git a/src/librbd/cache/pwl/ssd/LogEntry.h b/src/librbd/cache/pwl/ssd/LogEntry.h index 666398440a22..86121473b82e 100644 --- a/src/librbd/cache/pwl/ssd/LogEntry.h +++ b/src/librbd/cache/pwl/ssd/LogEntry.h @@ -40,6 +40,11 @@ public: buffer::list &get_cache_bl() override; void remove_cache_bl() override; unsigned int get_aligned_data_size() const override; + void inc_bl_refs() { bl_refs++; }; + void dec_bl_refs() { bl_refs--; }; + unsigned int reader_count() const override { + return bl_refs; + } }; class WriteSameLogEntry : public WriteLogEntry { diff --git a/src/librbd/cache/pwl/ssd/WriteLog.cc b/src/librbd/cache/pwl/ssd/WriteLog.cc index 69837d07310e..774dae0a8edf 100644 --- a/src/librbd/cache/pwl/ssd/WriteLog.cc +++ b/src/librbd/cache/pwl/ssd/WriteLog.cc @@ -69,7 +69,7 @@ WriteLog::~WriteLog() { template void WriteLog::collect_read_extents( uint64_t read_buffer_offset, LogMapEntry map_entry, - std::vector &log_entries_to_read, + std::vector> &log_entries_to_read, std::vector &bls_to_read, uint64_t entry_hit_length, Extent hit_extent, pwl::C_ReadRequest *read_ctx) { @@ -87,14 +87,15 @@ void WriteLog::collect_read_extents( if (!hit_bl.length()) { ldout(m_image_ctx.cct, 5) << "didn't hit RAM" << dendl; auto read_extent = read_ctx->read_extents.back(); - log_entries_to_read.push_back(&write_entry->ram_entry); + write_entry->inc_bl_refs(); + log_entries_to_read.push_back(std::move(write_entry)); bls_to_read.push_back(&read_extent->m_bl); } } template void WriteLog::complete_read( - std::vector &log_entries_to_read, + std::vector> &log_entries_to_read, std::vector &bls_to_read, Context *ctx) { if (!log_entries_to_read.empty()) { @@ -510,7 +511,9 @@ Context* WriteLog::construct_flush_entry_ctx( }); ctx = new LambdaContext( [this, log_entry, read_bl_ptr, ctx](int r) { - aio_read_data_block(&log_entry->ram_entry, read_bl_ptr, ctx); + auto write_entry = static_pointer_cast(log_entry); + write_entry->inc_bl_refs(); + aio_read_data_block(std::move(write_entry), read_bl_ptr, ctx); }); return ctx; } else { @@ -957,29 +960,32 @@ int WriteLog::update_pool_root_sync( } template -void WriteLog::aio_read_data_block(WriteLogCacheEntry *log_entry, +void WriteLog::aio_read_data_block(std::shared_ptr log_entry, bufferlist *bl, Context *ctx) { - std::vector log_entries {log_entry}; + std::vector> log_entries = {std::move(log_entry)}; std::vector bls {bl}; aio_read_data_blocks(log_entries, bls, ctx); } template void WriteLog::aio_read_data_blocks( - std::vector &log_entries, + std::vector> &log_entries, std::vector &bls, Context *ctx) { ceph_assert(log_entries.size() == bls.size()); //get the valid part Context *read_ctx = new LambdaContext( - [this, log_entries, bls, ctx](int r) { + [log_entries, bls, ctx](int r) { for (unsigned int i = 0; i < log_entries.size(); i++) { bufferlist valid_data_bl; - auto length = log_entries[i]->is_write() ? log_entries[i]->write_bytes : - log_entries[i]->ws_datalen; + auto write_entry = static_pointer_cast(log_entries[i]); + auto length = write_entry->ram_entry.is_write() ? write_entry->ram_entry.write_bytes + : write_entry->ram_entry.ws_datalen; + valid_data_bl.substr_of(*bls[i], 0, length); bls[i]->clear(); bls[i]->append(valid_data_bl); + write_entry->dec_bl_refs(); } ctx->complete(r); }); @@ -987,7 +993,7 @@ void WriteLog::aio_read_data_blocks( CephContext *cct = m_image_ctx.cct; AioTransContext *aio = new AioTransContext(cct, read_ctx); for (unsigned int i = 0; i < log_entries.size(); i++) { - auto log_entry = log_entries[i]; + WriteLogCacheEntry *log_entry = &log_entries[i]->ram_entry; ceph_assert(log_entry->is_write() || log_entry->is_writesame()); uint64_t len = log_entry->is_write() ? log_entry->write_bytes : diff --git a/src/librbd/cache/pwl/ssd/WriteLog.h b/src/librbd/cache/pwl/ssd/WriteLog.h index c4b4c178af60..f4750fd2c374 100644 --- a/src/librbd/cache/pwl/ssd/WriteLog.h +++ b/src/librbd/cache/pwl/ssd/WriteLog.h @@ -108,11 +108,11 @@ private: void load_existing_entries(pwl::DeferredContexts &later); void collect_read_extents( uint64_t read_buffer_offset, LogMapEntry map_entry, - std::vector &log_entries_to_read, + std::vector> &log_entries_to_read, std::vector &bls_to_read, uint64_t entry_hit_length, Extent hit_extent, pwl::C_ReadRequest *read_ctx) override; void complete_read( - std::vector &log_entries_to_read, + std::vector> &log_entries_to_read, std::vector &bls_to_read, Context *ctx) override; void enlist_op_appender(); bool retire_entries(const unsigned long int frees_per_tx); @@ -132,9 +132,9 @@ private: int update_pool_root_sync(std::shared_ptr root); void update_pool_root(std::shared_ptr root, AioTransContext *aio); - void aio_read_data_block(WriteLogCacheEntry *log_entry, bufferlist *bl, - Context *ctx); - void aio_read_data_blocks(std::vector &log_entries, + void aio_read_data_block(std::shared_ptr log_entry, + bufferlist *bl, Context *ctx); + void aio_read_data_blocks(std::vector> &log_entries, std::vector &bls, Context *ctx); static void aio_cache_cb(void *priv, void *priv2) { AioTransContext *c = static_cast(priv2);