From ccd1047b256e8600d45ea9f6b07a9834b6246efd Mon Sep 17 00:00:00 2001 From: Mahati Chamarthy Date: Mon, 4 Jan 2021 15:57:59 +0530 Subject: [PATCH] librbd/cache: Implement aio_read operation ... and retire entries i.e. flush to OSD. Support writesame, compare_and_write, discard and invalidate IO operations with tests. Signed-off-by: Lisa Li Signed-off-by: Mahati Chamarthy Signed-off-by: Changcheng Liu --- src/librbd/CMakeLists.txt | 3 +- src/librbd/cache/pwl/AbstractWriteLog.cc | 63 +-- src/librbd/cache/pwl/AbstractWriteLog.h | 9 + src/librbd/cache/pwl/Builder.h | 3 + src/librbd/cache/pwl/LogEntry.h | 6 + src/librbd/cache/pwl/ReadRequest.h | 4 +- src/librbd/cache/pwl/Types.h | 18 +- src/librbd/cache/pwl/rwl/Builder.h | 5 + src/librbd/cache/pwl/{ => rwl}/ReadRequest.cc | 4 +- src/librbd/cache/pwl/rwl/ReadRequest.h | 34 ++ src/librbd/cache/pwl/rwl/WriteLog.cc | 30 ++ src/librbd/cache/pwl/rwl/WriteLog.h | 9 + src/librbd/cache/pwl/ssd/Builder.h | 5 + src/librbd/cache/pwl/ssd/ReadRequest.cc | 92 +++++ src/librbd/cache/pwl/ssd/ReadRequest.h | 34 ++ src/librbd/cache/pwl/ssd/WriteLog.cc | 211 +++++++++- src/librbd/cache/pwl/ssd/WriteLog.h | 10 + .../librbd/cache/pwl/test_mock_SSDWriteLog.cc | 378 +++++++++++++++++- 18 files changed, 876 insertions(+), 42 deletions(-) rename src/librbd/cache/pwl/{ => rwl}/ReadRequest.cc (94%) create mode 100644 src/librbd/cache/pwl/rwl/ReadRequest.h create mode 100644 src/librbd/cache/pwl/ssd/ReadRequest.cc create mode 100644 src/librbd/cache/pwl/ssd/ReadRequest.h diff --git a/src/librbd/CMakeLists.txt b/src/librbd/CMakeLists.txt index 44663f4af67e8..c1bacb991dfad 100644 --- a/src/librbd/CMakeLists.txt +++ b/src/librbd/CMakeLists.txt @@ -265,7 +265,6 @@ if(WITH_RBD_RWL OR WITH_RBD_SSD_CACHE) cache/pwl/LogEntry.cc cache/pwl/LogMap.cc cache/pwl/LogOperation.cc - cache/pwl/ReadRequest.cc cache/pwl/Request.cc cache/pwl/ShutdownRequest.cc cache/pwl/SyncPoint.cc @@ -276,6 +275,7 @@ if(WITH_RBD_RWL OR WITH_RBD_SSD_CACHE) set(rbd_plugin_pwl_srcs ${rbd_plugin_pwl_srcs} cache/pwl/ssd/LogEntry.cc + cache/pwl/ssd/ReadRequest.cc cache/pwl/ssd/Request.cc cache/pwl/ssd/WriteLog.cc) endif() @@ -285,6 +285,7 @@ if(WITH_RBD_RWL OR WITH_RBD_SSD_CACHE) cache/pwl/rwl/WriteLog.cc cache/pwl/rwl/LogEntry.cc cache/pwl/rwl/LogOperation.cc + cache/pwl/rwl/ReadRequest.cc cache/pwl/rwl/Request.cc) endif() diff --git a/src/librbd/cache/pwl/AbstractWriteLog.cc b/src/librbd/cache/pwl/AbstractWriteLog.cc index 075a8b15bfb63..987f5258df8cc 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.cc +++ b/src/librbd/cache/pwl/AbstractWriteLog.cc @@ -17,7 +17,6 @@ #include "librbd/asio/ContextWQ.h" #include "librbd/cache/pwl/ImageCacheState.h" #include "librbd/cache/pwl/LogEntry.h" -#include "librbd/cache/pwl/ReadRequest.h" #include "librbd/plugin/Api.h" #include #include @@ -670,7 +669,8 @@ void AbstractWriteLog::read(Extents&& image_extents, // TODO: handle writesame and discard case in later PRs CephContext *cct = m_image_ctx.cct; utime_t now = ceph_clock_now(); - C_ReadRequest *read_ctx = new C_ReadRequest(cct, now, m_perfcounter, bl, on_finish); + C_ReadRequest *read_ctx = m_builder->create_read_request( + cct, now, m_perfcounter, bl, on_finish); ldout(cct, 20) << "name: " << m_image_ctx.name << " id: " << m_image_ctx.id << "image_extents=" << image_extents << ", " << "bl=" << bl << ", " @@ -680,6 +680,22 @@ void AbstractWriteLog::read(Extents&& image_extents, bl->clear(); m_perfcounter->inc(l_librbd_pwl_rd_req, 1); + std::vector log_entries_to_read; + std::vector bls_to_read; + + Context *ctx = new LambdaContext( + [this, read_ctx, fadvise_flags](int r) { + if (read_ctx->miss_extents.empty()) { + /* All of this read comes from RWL */ + read_ctx->complete(0); + } else { + /* Pass the read misses on to the layer below RWL */ + m_image_writeback.aio_read( + std::move(read_ctx->miss_extents), &read_ctx->miss_bl, + fadvise_flags, read_ctx); + } + }); + /* * The strategy here is to look up all the WriteLogMapEntries that overlap * this read, and iterate through those to separate this read into hits and @@ -699,14 +715,16 @@ void AbstractWriteLog::read(Extents&& image_extents, for (auto &extent : image_extents) { uint64_t extent_offset = 0; RWLock::RLocker entry_reader_locker(m_entry_reader_lock); - WriteLogMapEntries map_entries = m_blocks_to_log_entries.find_map_entries(block_extent(extent)); + WriteLogMapEntries map_entries = m_blocks_to_log_entries.find_map_entries( + block_extent(extent)); for (auto &map_entry : map_entries) { Extent entry_image_extent(pwl::image_extent(map_entry.block_extent)); /* If this map entry starts after the current image extent offset ... */ if (entry_image_extent.first > extent.first + extent_offset) { /* ... add range before map_entry to miss extents */ uint64_t miss_extent_start = extent.first + extent_offset; - uint64_t miss_extent_length = entry_image_extent.first - miss_extent_start; + uint64_t miss_extent_length = entry_image_extent.first - + miss_extent_start; Extent miss_extent(miss_extent_start, miss_extent_length); read_ctx->miss_extents.push_back(miss_extent); /* Add miss range to read extents */ @@ -726,10 +744,13 @@ void AbstractWriteLog::read(Extents&& image_extents, uint64_t entry_hit_length = min(entry_image_extent.second - entry_offset, extent.second - extent_offset); Extent hit_extent(entry_image_extent.first, entry_hit_length); - if (0 == map_entry.log_entry->write_bytes() && 0 < map_entry.log_entry->bytes_dirty()) { + if (0 == map_entry.log_entry->write_bytes() && + 0 < map_entry.log_entry->bytes_dirty()) { /* discard log entry */ auto discard_entry = map_entry.log_entry; - ldout(cct, 20) << "read hit on discard entry: log_entry=" << *discard_entry << dendl; + ldout(cct, 20) << "read hit on discard entry: log_entry=" + << *discard_entry + << dendl; /* Discards read as zero, so we'll construct a bufferlist of zeros */ bufferlist zero_bl; zero_bl.append_zero(entry_hit_length); @@ -739,24 +760,14 @@ void AbstractWriteLog::read(Extents&& image_extents, } else { /* write and writesame log entry */ /* Offset of the map entry into the log entry's buffer */ - uint64_t map_entry_buffer_offset = entry_image_extent.first - map_entry.log_entry->ram_entry.image_offset_bytes; + uint64_t map_entry_buffer_offset = entry_image_extent.first - + map_entry.log_entry->ram_entry.image_offset_bytes; /* Offset into the log entry buffer of this read hit */ uint64_t read_buffer_offset = map_entry_buffer_offset + entry_offset; - /* Create buffer object referring to cache pool for this read hit */ - auto write_entry = map_entry.log_entry; - - /* Make a bl for this hit extent. This will add references to the write_entry->pmem_bp */ - buffer::list hit_bl; - - buffer::list entry_bl_copy; - write_entry->copy_cache_bl(&entry_bl_copy); - entry_bl_copy.begin(read_buffer_offset).copy(entry_hit_length, hit_bl); - - ceph_assert(hit_bl.length() == entry_hit_length); - - /* Add hit extent to read extents */ - ImageExtentBuf hit_extent_buf(hit_extent, hit_bl); - read_ctx->read_extents.push_back(hit_extent_buf); + /* Create buffer object referring to pmem pool for this read hit */ + collect_read_extents( + read_buffer_offset, map_entry, log_entries_to_read, bls_to_read, + entry_hit_length, hit_extent, read_ctx); } /* Exclude RWL hit range from buffer and extent */ extent_offset += entry_hit_length; @@ -779,13 +790,7 @@ void AbstractWriteLog::read(Extents&& image_extents, ldout(cct, 20) << "miss_extents=" << read_ctx->miss_extents << ", " << "miss_bl=" << read_ctx->miss_bl << dendl; - if (read_ctx->miss_extents.empty()) { - /* All of this read comes from RWL */ - read_ctx->complete(0); - } else { - /* Pass the read misses on to the layer below RWL */ - m_image_writeback.aio_read(std::move(read_ctx->miss_extents), &read_ctx->miss_bl, fadvise_flags, read_ctx); - } + complete_read(log_entries_to_read, bls_to_read, ctx); } template diff --git a/src/librbd/cache/pwl/AbstractWriteLog.h b/src/librbd/cache/pwl/AbstractWriteLog.h index dc0058bf97fcf..52b24dee3c298 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.h +++ b/src/librbd/cache/pwl/AbstractWriteLog.h @@ -12,6 +12,7 @@ #include "librbd/BlockGuard.h" #include "librbd/cache/Types.h" #include "librbd/cache/pwl/LogOperation.h" +#include "librbd/cache/pwl/ReadRequest.h" #include "librbd/cache/pwl/Request.h" #include "librbd/cache/pwl/LogMap.h" #include "librbd/cache/pwl/Builder.h" @@ -365,6 +366,14 @@ protected: virtual void remove_pool_file() = 0; virtual void initialize_pool(Context *on_finish, pwl::DeferredContexts &later) = 0; + virtual void collect_read_extents( + uint64_t read_buffer_offset, LogMapEntry map_entry, + std::vector &log_entries_to_read, + std::vector &bls_to_read, uint64_t entry_hit_length, + Extent hit_extent, pwl::C_ReadRequest *read_ctx) = 0; + virtual void complete_read( + std::vector &log_entries_to_read, + std::vector &bls_to_read, Context *ctx) = 0; virtual void write_data_to_buffer( std::shared_ptr ws_entry, pwl::WriteLogCacheEntry *cache_entry) {} diff --git a/src/librbd/cache/pwl/Builder.h b/src/librbd/cache/pwl/Builder.h index 6cea7ac0d6a21..b108d0e1845a7 100644 --- a/src/librbd/cache/pwl/Builder.h +++ b/src/librbd/cache/pwl/Builder.h @@ -45,6 +45,9 @@ public: WriteLogOperationSet &set, uint64_t image_offset_bytes, uint64_t write_bytes, uint32_t data_len, CephContext *cct, std::shared_ptr writesame_log_entry) = 0; + virtual C_ReadRequest *create_read_request(CephContext *cct, utime_t arrived, + PerfCounters *perfcounter, ceph::bufferlist *bl, Context *on_finish) = 0; + }; } // namespace pwl diff --git a/src/librbd/cache/pwl/LogEntry.h b/src/librbd/cache/pwl/LogEntry.h index 7c216ad9e97b5..b29d7fb88bcb3 100644 --- a/src/librbd/cache/pwl/LogEntry.h +++ b/src/librbd/cache/pwl/LogEntry.h @@ -62,6 +62,9 @@ public: virtual bool is_write_entry() const { return false; } + virtual bool is_writesame_entry() const { + return false; + } virtual bool is_sync_point() const { return false; } @@ -223,6 +226,9 @@ public: bool is_write_entry() const override { return true; } + bool is_writesame_entry() const override { + return is_writesame; + } std::ostream &format(std::ostream &os) const; friend std::ostream &operator<<(std::ostream &os, const WriteLogEntry &entry); diff --git a/src/librbd/cache/pwl/ReadRequest.h b/src/librbd/cache/pwl/ReadRequest.h index 7c953547c875d..c188733e10753 100644 --- a/src/librbd/cache/pwl/ReadRequest.h +++ b/src/librbd/cache/pwl/ReadRequest.h @@ -24,13 +24,11 @@ public: m_arrived_time(arrived), m_perfcounter(perfcounter) {} ~C_ReadRequest() {} - void finish(int r) override; - const char *get_name() const { return "C_ReadRequest"; } -private: +protected: CephContext *m_cct; Context *m_on_finish; bufferlist *m_out_bl; diff --git a/src/librbd/cache/pwl/Types.h b/src/librbd/cache/pwl/Types.h index d8bdcfa7ddbd3..0f4f9c0018213 100644 --- a/src/librbd/cache/pwl/Types.h +++ b/src/librbd/cache/pwl/Types.h @@ -364,10 +364,20 @@ Context * override_ctx(int r, Context *ctx); class ImageExtentBuf : public io::Extent { public: bufferlist m_bl; - ImageExtentBuf(io::Extent extent) - : io::Extent(extent) { } - ImageExtentBuf(io::Extent extent, bufferlist bl) - : io::Extent(extent), m_bl(bl) { } + bool need_to_truncate; + int truncate_offset; + bool writesame; + ImageExtentBuf() {} + ImageExtentBuf(io::Extent extent, + bool need_to_truncate = false, uint64_t truncate_offset = 0, + bool writesame = false) + : io::Extent(extent), need_to_truncate(need_to_truncate), + truncate_offset(truncate_offset), writesame(writesame) {} + ImageExtentBuf(io::Extent extent, bufferlist bl, + bool need_to_truncate = false, uint64_t truncate_offset = 0, + bool writesame = false) + : io::Extent(extent), m_bl(bl), need_to_truncate(need_to_truncate), + truncate_offset(truncate_offset), writesame(writesame) {} }; std::string unique_lock_name(const std::string &name, void *address); diff --git a/src/librbd/cache/pwl/rwl/Builder.h b/src/librbd/cache/pwl/rwl/Builder.h index 9665a83afd39f..1321d711b9272 100644 --- a/src/librbd/cache/pwl/rwl/Builder.h +++ b/src/librbd/cache/pwl/rwl/Builder.h @@ -6,6 +6,7 @@ #include #include "LogEntry.h" +#include "ReadRequest.h" #include "Request.h" #include "LogOperation.h" @@ -84,6 +85,10 @@ public: set, image_offset_bytes, write_bytes, data_len, cct, writesame_log_entry); } + C_ReadRequest *create_read_request(CephContext *cct, utime_t arrived, + PerfCounters *perfcounter, ceph::bufferlist *bl, Context *on_finish) { + return new C_ReadRequest(cct, arrived, perfcounter, bl, on_finish); + } }; } // namespace rwl diff --git a/src/librbd/cache/pwl/ReadRequest.cc b/src/librbd/cache/pwl/rwl/ReadRequest.cc similarity index 94% rename from src/librbd/cache/pwl/ReadRequest.cc rename to src/librbd/cache/pwl/rwl/ReadRequest.cc index 766e33febf17a..c4530237c5d6f 100644 --- a/src/librbd/cache/pwl/ReadRequest.cc +++ b/src/librbd/cache/pwl/rwl/ReadRequest.cc @@ -5,12 +5,13 @@ #define dout_subsys ceph_subsys_rbd_pwl #undef dout_prefix -#define dout_prefix *_dout << "librbd::cache::pwl::ReadRequest: " << this << " " \ +#define dout_prefix *_dout << "librbd::cache::pwl::rwl::ReadRequest: " << this << " " \ << __func__ << ": " namespace librbd { namespace cache { namespace pwl { +namespace rwl { void C_ReadRequest::finish(int r) { ldout(m_cct, 20) << "(" << get_name() << "): r=" << r << dendl; @@ -63,6 +64,7 @@ void C_ReadRequest::finish(int r) { } } +} // namespace rwl } // namespace pwl } // namespace cache } // namespace librbd diff --git a/src/librbd/cache/pwl/rwl/ReadRequest.h b/src/librbd/cache/pwl/rwl/ReadRequest.h new file mode 100644 index 0000000000000..25168e83b9452 --- /dev/null +++ b/src/librbd/cache/pwl/rwl/ReadRequest.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_CACHE_PWL_RWL_READ_REQUEST_H +#define CEPH_LIBRBD_CACHE_PWL_RWL_READ_REQUEST_H + +#include "librbd/cache/pwl/ReadRequest.h" + +namespace librbd { +namespace cache { +namespace pwl { +namespace rwl { + +typedef std::vector ImageExtentBufs; + +class C_ReadRequest : public pwl::C_ReadRequest { +protected: + using pwl::C_ReadRequest::m_cct; + using pwl::C_ReadRequest::m_on_finish; + using pwl::C_ReadRequest::m_out_bl; + using pwl::C_ReadRequest::m_arrived_time; + using pwl::C_ReadRequest::m_perfcounter; +public: + C_ReadRequest(CephContext *cct, utime_t arrived, PerfCounters *perfcounter, bufferlist *out_bl, Context *on_finish) + : pwl::C_ReadRequest(cct, arrived, perfcounter, out_bl, on_finish) {} + void finish(int r) override; +}; + +} // namespace rwl +} // namespace pwl +} // namespace cache +} // namespace librbd + +#endif // CEPH_LIBRBD_CACHE_PWL_RWL_READ_REQUEST_H diff --git a/src/librbd/cache/pwl/rwl/WriteLog.cc b/src/librbd/cache/pwl/rwl/WriteLog.cc index f05962863e80b..c597464f9a87b 100644 --- a/src/librbd/cache/pwl/rwl/WriteLog.cc +++ b/src/librbd/cache/pwl/rwl/WriteLog.cc @@ -57,6 +57,36 @@ WriteLog::~WriteLog() { delete m_builderobj; } +template +void WriteLog::collect_read_extents( + uint64_t read_buffer_offset, LogMapEntry map_entry, + std::vector &log_entries_to_read, + std::vector &bls_to_read, uint64_t entry_hit_length, + Extent hit_extent, pwl::C_ReadRequest *read_ctx) { + /* Make a bl for this hit extent. This will add references to the + * write_entry->pmem_bp */ + buffer::list hit_bl; + + /* Create buffer object referring to pmem pool for this read hit */ + auto write_entry = map_entry.log_entry; + + buffer::list entry_bl_copy; + write_entry->copy_cache_bl(&entry_bl_copy); + entry_bl_copy.begin(read_buffer_offset).copy(entry_hit_length, hit_bl); + ceph_assert(hit_bl.length() == entry_hit_length); + + /* Add hit extent to read extents */ + ImageExtentBuf hit_extent_buf(hit_extent, hit_bl); + read_ctx->read_extents.push_back(hit_extent_buf); +} + +template +void WriteLog::complete_read( + std::vector &log_entries_to_read, + std::vector &bls_to_read, Context *ctx) { + ctx->complete(0); +} + /* * Allocate the (already reserved) write log entries for a set of operations. * diff --git a/src/librbd/cache/pwl/rwl/WriteLog.h b/src/librbd/cache/pwl/rwl/WriteLog.h index 4d65a1de8b0f8..39d63776eca1d 100644 --- a/src/librbd/cache/pwl/rwl/WriteLog.h +++ b/src/librbd/cache/pwl/rwl/WriteLog.h @@ -42,6 +42,7 @@ public: WriteLog(const WriteLog&) = delete; WriteLog &operator=(const WriteLog&) = delete; + typedef io::Extent Extent; using This = AbstractWriteLog; using C_WriteRequestT = pwl::C_WriteRequest; using C_WriteSameRequestT = pwl::C_WriteSameRequest; @@ -83,6 +84,14 @@ protected: void schedule_append_ops(pwl::GenericLogOperations &ops) override; void append_scheduled_ops(void) override; void reserve_cache(C_BlockIORequestT *req, bool &alloc_succeeds, bool &no_space) override; + void collect_read_extents( + uint64_t read_buffer_offset, LogMapEntry map_entry, + std::vector &log_entries_to_read, + std::vector &bls_to_read, uint64_t entry_hit_length, + Extent hit_extent, pwl::C_ReadRequest *read_ctx) override; + void complete_read( + std::vector &log_entries_to_read, + std::vector &bls_to_read, Context *ctx) override; bool retire_entries(const unsigned long int frees_per_tx) override; void persist_last_flushed_sync_gen() override; bool alloc_resources(C_BlockIORequestT *req) override; diff --git a/src/librbd/cache/pwl/ssd/Builder.h b/src/librbd/cache/pwl/ssd/Builder.h index f79d6855dfe83..e761d4815efb1 100644 --- a/src/librbd/cache/pwl/ssd/Builder.h +++ b/src/librbd/cache/pwl/ssd/Builder.h @@ -6,6 +6,7 @@ #include #include "LogEntry.h" +#include "ReadRequest.h" #include "Request.h" #include "librbd/cache/ImageWriteback.h" @@ -83,6 +84,10 @@ public: set, image_offset_bytes, write_bytes, data_len, cct, writesame_log_entry); } + C_ReadRequest *create_read_request(CephContext *cct, utime_t arrived, + PerfCounters *perfcounter, ceph::bufferlist *bl, Context *on_finish) { + return new C_ReadRequest(cct, arrived, perfcounter, bl, on_finish); + } }; diff --git a/src/librbd/cache/pwl/ssd/ReadRequest.cc b/src/librbd/cache/pwl/ssd/ReadRequest.cc new file mode 100644 index 0000000000000..c04fdd83cd25e --- /dev/null +++ b/src/librbd/cache/pwl/ssd/ReadRequest.cc @@ -0,0 +1,92 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ReadRequest.h" + +#define dout_subsys ceph_subsys_rbd_pwl +#undef dout_prefix +#define dout_prefix *_dout << "librbd::cache::pwl::ssd::ReadRequest: " << this << " " \ + << __func__ << ": " + +namespace librbd { +namespace cache { +namespace pwl { +namespace ssd { + +void C_ReadRequest::finish(int r) { + ldout(m_cct, 20) << "(" << get_name() << "): r=" << r << dendl; + int hits = 0; + int misses = 0; + int hit_bytes = 0; + int miss_bytes = 0; + if (r >= 0) { + /* + * At this point the miss read has completed. We'll iterate through + * m_read_extents and produce *m_out_bl by assembling pieces of m_miss_bl + * and the individual hit extent bufs in the read extents that represent + * hits. + */ + uint64_t miss_bl_offset = 0; + for (auto &extent : read_extents) { + if (extent.m_bl.length()) { + /* This was a hit */ + bufferlist data_bl; + if (extent.writesame) { + int data_len = extent.m_bl.length(); + int read_buffer_offset = extent.truncate_offset; + if (extent.need_to_truncate && extent.truncate_offset >= data_len) { + read_buffer_offset = (extent.truncate_offset) % data_len; + } + // build data and truncate + bufferlist temp_bl; + uint64_t total_left_bytes = read_buffer_offset + extent.second; + while (total_left_bytes > 0) { + temp_bl.append(extent.m_bl); + total_left_bytes = total_left_bytes - data_len; + } + data_bl.substr_of(temp_bl, read_buffer_offset, extent.second); + m_out_bl->claim_append(data_bl); + } else if (extent.need_to_truncate) { + assert(extent.m_bl.length() >= extent.truncate_offset + extent.second); + data_bl.substr_of(extent.m_bl, extent.truncate_offset, extent.second); + m_out_bl->claim_append(data_bl); + } else { + assert(extent.second == extent.m_bl.length()); + m_out_bl->claim_append(extent.m_bl); + } + ++hits; + hit_bytes += extent.second; + } else { + /* This was a miss. */ + ++misses; + miss_bytes += extent.second; + bufferlist miss_extent_bl; + miss_extent_bl.substr_of(miss_bl, miss_bl_offset, extent.second); + /* Add this read miss bufferlist to the output bufferlist */ + m_out_bl->claim_append(miss_extent_bl); + /* Consume these bytes in the read miss bufferlist */ + miss_bl_offset += extent.second; + } + } + } + ldout(m_cct, 20) << "(" << get_name() << "): r=" << r << " bl=" << *m_out_bl << dendl; + utime_t now = ceph_clock_now(); + ceph_assert((int)m_out_bl->length() == hit_bytes + miss_bytes); + m_on_finish->complete(r); + m_perfcounter->inc(l_librbd_pwl_rd_bytes, hit_bytes + miss_bytes); + m_perfcounter->inc(l_librbd_pwl_rd_hit_bytes, hit_bytes); + m_perfcounter->tinc(l_librbd_pwl_rd_latency, now - m_arrived_time); + if (!misses) { + m_perfcounter->inc(l_librbd_pwl_rd_hit_req, 1); + m_perfcounter->tinc(l_librbd_pwl_rd_hit_latency, now - m_arrived_time); + } else { + if (hits) { + m_perfcounter->inc(l_librbd_pwl_rd_part_hit_req, 1); + } + } +} + +} // namespace ssd +} // namespace pwl +} // namespace cache +} // namespace librbd diff --git a/src/librbd/cache/pwl/ssd/ReadRequest.h b/src/librbd/cache/pwl/ssd/ReadRequest.h new file mode 100644 index 0000000000000..345c4aa65e0d4 --- /dev/null +++ b/src/librbd/cache/pwl/ssd/ReadRequest.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_CACHE_PWL_SSD_READ_REQUEST_H +#define CEPH_LIBRBD_CACHE_PWL_SSD_READ_REQUEST_H + +#include "librbd/cache/pwl/ReadRequest.h" + +namespace librbd { +namespace cache { +namespace pwl { +namespace ssd { + +typedef std::vector ImageExtentBufs; + +class C_ReadRequest : public pwl::C_ReadRequest { +protected: + using pwl::C_ReadRequest::m_cct; + using pwl::C_ReadRequest::m_on_finish; + using pwl::C_ReadRequest::m_out_bl; + using pwl::C_ReadRequest::m_arrived_time; + using pwl::C_ReadRequest::m_perfcounter; +public: + C_ReadRequest(CephContext *cct, utime_t arrived, PerfCounters *perfcounter, bufferlist *out_bl, Context *on_finish) + : pwl::C_ReadRequest(cct, arrived, perfcounter, out_bl, on_finish) {} + void finish(int r) override; +}; + +} // namespace ssd +} // namespace pwl +} // namespace cache +} // namespace librbd + +#endif // CEPH_LIBRBD_CACHE_PWL_SSD_READ_REQUEST_H diff --git a/src/librbd/cache/pwl/ssd/WriteLog.cc b/src/librbd/cache/pwl/ssd/WriteLog.cc index 6efd85ba1f5a2..c7d56f3dd3d53 100644 --- a/src/librbd/cache/pwl/ssd/WriteLog.cc +++ b/src/librbd/cache/pwl/ssd/WriteLog.cc @@ -56,6 +56,50 @@ WriteLog::~WriteLog() { delete m_builderobj; } +template +void WriteLog::collect_read_extents( + uint64_t read_buffer_offset, LogMapEntry map_entry, + std::vector &log_entries_to_read, + std::vector &bls_to_read, + uint64_t entry_hit_length, Extent hit_extent, + pwl::C_ReadRequest *read_ctx) { + // Make a bl for this hit extent. This will add references to the + // write_entry->pmem_bp */ + auto write_entry = static_pointer_cast(map_entry.log_entry); + buffer::list hit_bl; + hit_bl = write_entry->get_cache_bl(); + + if(!hit_bl.length()) { + ImageExtentBuf hit_extent_buf; + bool writesame = write_entry->is_writesame_entry(); + hit_extent_buf = ImageExtentBuf( + {hit_extent, true, read_buffer_offset, writesame}); + read_ctx->read_extents.push_back(hit_extent_buf); + ImageExtentBuf &read_extent = read_ctx->read_extents.back(); + + log_entries_to_read.push_back(&write_entry->ram_entry); + bls_to_read.push_back(&read_extent.m_bl); + } else { + buffer::list new_bl; + new_bl.substr_of(hit_bl, read_buffer_offset, entry_hit_length); + assert(new_bl.length() == entry_hit_length); + ImageExtentBuf hit_extent_buf(hit_extent, new_bl); + read_ctx->read_extents.push_back(hit_extent_buf); + } +} + +template +void WriteLog::complete_read( + std::vector &log_entries_to_read, + std::vector &bls_to_read, + Context *ctx) { + if (!log_entries_to_read.empty()) { + aio_read_data_block(log_entries_to_read, bls_to_read, ctx); + } else { + ctx->complete(0); + } +} + template void WriteLog::initialize_pool(Context *on_finish, pwl::DeferredContexts &later) { @@ -490,6 +534,8 @@ void WriteLog::process_work() { CephContext *cct = m_image_ctx.cct; int max_iterations = 4; bool wake_up_requested = false; + uint64_t aggressive_high_water_bytes = m_log_pool_ring_buffer_size * AGGRESSIVE_RETIRE_HIGH_WATER; + uint64_t aggressive_high_water_entries = this->m_total_log_entries * AGGRESSIVE_RETIRE_HIGH_WATER; uint64_t high_water_bytes = m_log_pool_ring_buffer_size * RETIRE_HIGH_WATER; uint64_t high_water_entries = this->m_total_log_entries * RETIRE_HIGH_WATER; @@ -509,11 +555,10 @@ void WriteLog::process_work() { << ", allocated_entries > high_water=" << (m_log_entries.size() > high_water_entries) << dendl; - //TODO: Implement and uncomment this in next PR - /*retire_entries((this->m_shutting_down || this->m_invalidating || + retire_entries((this->m_shutting_down || this->m_invalidating || (m_bytes_allocated > aggressive_high_water_bytes) || (m_log_entries.size() > aggressive_high_water_entries)) - ? MAX_ALLOC_PER_TRANSACTION : MAX_FREE_PER_TRANSACTION);*/ + ? MAX_ALLOC_PER_TRANSACTION : MAX_FREE_PER_TRANSACTION); } this->dispatch_deferred_writes(); this->process_writeback_dirty_entries(); @@ -533,6 +578,166 @@ void WriteLog::process_work() { } } +/** + * Retire up to MAX_ALLOC_PER_TRANSACTION of the oldest log entries + * that are eligible to be retired. Returns true if anything was + * retired. + * +*/ +template +bool WriteLog::retire_entries(const unsigned long int frees_per_tx) { + CephContext *cct = m_image_ctx.cct; + GenericLogEntriesVector retiring_entries; + uint32_t initial_first_valid_entry; + uint32_t first_valid_entry; + + std::lock_guard retire_locker(this->m_log_retire_lock); + ldout(cct, 20) << "Look for entries to retire" << dendl; + { + // Entry readers can't be added while we hold m_entry_reader_lock + RWLock::WLocker entry_reader_locker(this->m_entry_reader_lock); + std::lock_guard locker(m_lock); + initial_first_valid_entry = m_first_valid_entry; + first_valid_entry = m_first_valid_entry; + while (retiring_entries.size() < frees_per_tx && !m_log_entries.empty()) { + GenericLogEntriesVector retiring_subentries; + auto entry = m_log_entries.front(); + uint64_t control_block_pos = entry->log_entry_index; + uint64_t data_length = 0; + for (auto it = m_log_entries.begin(); it != m_log_entries.end(); ++it) { + if (this->can_retire_entry(*it)) { + // log_entry_index is valid after appending to SSD + if ((*it)->log_entry_index != control_block_pos) { + ldout(cct, 20) << "Old log_entry_index is " << control_block_pos + << ",New log_entry_index is " + << (*it)->log_entry_index + << ",data length is " << data_length << dendl; + ldout(cct, 20) << "The log entry is " << *(*it) << dendl; + if ((*it)->log_entry_index < control_block_pos) { + ceph_assert((*it)->log_entry_index == + (control_block_pos + data_length + MIN_WRITE_ALLOC_SSD_SIZE) + % this->m_log_pool_config_size + DATA_RING_BUFFER_OFFSET); + } else { + ceph_assert((*it)->log_entry_index == control_block_pos + + data_length + MIN_WRITE_ALLOC_SSD_SIZE); + } + break; + } else { + retiring_subentries.push_back(*it); + if ((*it)->is_write_entry()) { + data_length += (*it)->get_aligned_data_size(); + } + } + } else { + retiring_subentries.clear(); + break; + } + } + // SSD: retiring_subentries in a span + if (!retiring_subentries.empty()) { + for (auto it = retiring_subentries.begin(); + it != retiring_subentries.end(); it++) { + ceph_assert(m_log_entries.front() == *it); + m_log_entries.pop_front(); + if (entry->is_write_entry()) { + auto write_entry = static_pointer_cast(entry); + this->m_blocks_to_log_entries.remove_log_entry(write_entry); + } + } + retiring_entries.insert( + retiring_entries.end(), retiring_subentries.begin(), + retiring_subentries.end()); + } else { + break; + } + } + } + if (retiring_entries.size()) { + ldout(cct, 1) << "Retiring " << retiring_entries.size() + << " entries" << dendl; + + // Advance first valid entry and release buffers + uint64_t flushed_sync_gen; + std::lock_guard append_locker(this->m_log_append_lock); + { + std::lock_guard locker(m_lock); + flushed_sync_gen = this->m_flushed_sync_gen; + } + + //calculate new first_valid_entry based on last entry to retire + auto entry = retiring_entries.back(); + if (entry->is_write_entry() || entry->is_writesame_entry()) { + first_valid_entry = entry->ram_entry.write_data_pos + + entry->get_aligned_data_size(); + } else { + first_valid_entry = entry->log_entry_index + MIN_WRITE_ALLOC_SSD_SIZE; + } + if (first_valid_entry >= this->m_log_pool_config_size) { + first_valid_entry = first_valid_entry % this->m_log_pool_config_size + + DATA_RING_BUFFER_OFFSET; + } + ceph_assert(first_valid_entry != initial_first_valid_entry); + auto new_root = std::make_shared(pool_root); + new_root->flushed_sync_gen = flushed_sync_gen; + new_root->first_valid_entry = first_valid_entry; + pool_root.flushed_sync_gen = flushed_sync_gen; + pool_root.first_valid_entry = first_valid_entry; + + Context *ctx = new LambdaContext( + [this, flushed_sync_gen, first_valid_entry, + initial_first_valid_entry, retiring_entries](int r) { + uint64_t allocated_bytes = 0; + uint64_t cached_bytes = 0; + uint64_t former_log_pos = 0; + for (auto &entry : retiring_entries) { + ceph_assert(entry->log_entry_index != 0); + if (entry->log_entry_index != former_log_pos ) { + // Space for control blocks + allocated_bytes += MIN_WRITE_ALLOC_SSD_SIZE; + former_log_pos = entry->log_entry_index; + } + if (entry->is_write_entry()) { + cached_bytes += entry->write_bytes(); + //space for userdata + allocated_bytes += entry->get_aligned_data_size(); + } + } + { + std::lock_guard locker(m_lock); + m_first_valid_entry = first_valid_entry; + ceph_assert(m_first_valid_entry % MIN_WRITE_ALLOC_SSD_SIZE == 0); + this->m_free_log_entries += retiring_entries.size(); + ceph_assert(this->m_bytes_cached >= cached_bytes); + this->m_bytes_cached -= cached_bytes; + + ldout(m_image_ctx.cct, 20) + << "Finished root update: " << "initial_first_valid_entry=" + << initial_first_valid_entry << ", " << "m_first_valid_entry=" + << m_first_valid_entry << "," << "release space = " + << allocated_bytes << "," << "m_bytes_allocated=" + << m_bytes_allocated << "," << "release cached space=" + << allocated_bytes << "," << "m_bytes_cached=" + << this->m_bytes_cached << dendl; + + this->m_alloc_failed_since_retire = false; + this->wake_up(); + m_async_update_superblock--; + this->m_async_op_tracker.finish_op(); + } + + this->dispatch_deferred_writes(); + this->process_writeback_dirty_entries(); + }); + + std::lock_guard locker(m_lock); + schedule_update_root(new_root, ctx); + } else { + ldout(cct, 20) << "Nothing to retire" << dendl; + return false; + } + return true; +} + template void WriteLog::append_ops(GenericLogOperations &ops, Context *ctx, uint64_t* new_first_free_entry, diff --git a/src/librbd/cache/pwl/ssd/WriteLog.h b/src/librbd/cache/pwl/ssd/WriteLog.h index 3bc72bb5c0766..e8236be76df3c 100644 --- a/src/librbd/cache/pwl/ssd/WriteLog.h +++ b/src/librbd/cache/pwl/ssd/WriteLog.h @@ -42,6 +42,7 @@ public: WriteLog(const WriteLog&) = delete; WriteLog &operator=(const WriteLog&) = delete; + typedef io::Extent Extent; using This = AbstractWriteLog; using C_BlockIORequestT = pwl::C_BlockIORequest; using C_WriteRequestT = pwl::C_WriteRequest; @@ -106,7 +107,16 @@ private: Builder* create_builder(); void load_existing_entries(pwl::DeferredContexts &later); + void collect_read_extents( + uint64_t read_buffer_offset, LogMapEntry map_entry, + std::vector &log_entries_to_read, + std::vector &bls_to_read, uint64_t entry_hit_length, + Extent hit_extent, pwl::C_ReadRequest *read_ctx) override; + void complete_read( + std::vector &log_entries_to_read, + std::vector &bls_to_read, Context *ctx) override; void enlist_op_appender(); + bool retire_entries(const unsigned long int frees_per_tx); bool has_sync_point_logs(GenericLogOperations &ops); void append_op_log_entries(GenericLogOperations &ops); void alloc_op_log_entries(GenericLogOperations &ops); diff --git a/src/test/librbd/cache/pwl/test_mock_SSDWriteLog.cc b/src/test/librbd/cache/pwl/test_mock_SSDWriteLog.cc index 4eac15e0b7e88..22a9dcb1d0607 100644 --- a/src/test/librbd/cache/pwl/test_mock_SSDWriteLog.cc +++ b/src/test/librbd/cache/pwl/test_mock_SSDWriteLog.cc @@ -67,7 +67,8 @@ struct TestMockCacheSSDWriteLog : public TestMockFixture { MockImageCacheStateSSD *get_cache_state( MockImageCtx& mock_image_ctx, MockApi& mock_api) { - MockImageCacheStateSSD *rwl_state = new MockImageCacheStateSSD(&mock_image_ctx, mock_api); + MockImageCacheStateSSD *rwl_state = new MockImageCacheStateSSD( + &mock_image_ctx, mock_api); return rwl_state; } @@ -233,6 +234,381 @@ TEST_F(TestMockCacheSSDWriteLog, write) { ASSERT_EQ(0, finish_ctx3.wait()); } +TEST_F(TestMockCacheSSDWriteLog, read_hit_rwl_cache) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockImageCtx mock_image_ctx(*ictx); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockSSDWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); + expect_metadata_set(mock_image_ctx); + + MockContextSSD finish_ctx1; + expect_context_complete(finish_ctx1, 0); + rwl.init(&finish_ctx1); + ASSERT_EQ(0, finish_ctx1.wait()); + + MockContextSSD finish_ctx2; + expect_context_complete(finish_ctx2, 0); + Extents image_extents{{0, 4096}}; + bufferlist bl; + bl.append(std::string(4096, '1')); + bufferlist bl_copy = bl; + int fadvise_flags = 0; + rwl.write(std::move(image_extents), std::move(bl), + fadvise_flags, &finish_ctx2); + ASSERT_EQ(0, finish_ctx2.wait()); + + MockContextSSD finish_ctx_read; + expect_context_complete(finish_ctx_read, 0); + Extents image_extents_read{{0, 4096}}; + bufferlist read_bl; + rwl.read(std::move(image_extents_read), &read_bl, + fadvise_flags, &finish_ctx_read); + ASSERT_EQ(0, finish_ctx_read.wait()); + ASSERT_EQ(4096, read_bl.length()); + ASSERT_TRUE(bl_copy.contents_equal(read_bl)); + + MockContextSSD finish_ctx3; + expect_context_complete(finish_ctx3, 0); + rwl.shut_down(&finish_ctx3); + ASSERT_EQ(0, finish_ctx3.wait()); +} + +TEST_F(TestMockCacheSSDWriteLog, read_hit_part_rwl_cache) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockImageCtx mock_image_ctx(*ictx); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockSSDWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); + expect_metadata_set(mock_image_ctx); + + MockContextSSD finish_ctx1; + expect_context_complete(finish_ctx1, 0); + rwl.init(&finish_ctx1); + ASSERT_EQ(0, finish_ctx1.wait()); + + MockContextSSD finish_ctx2; + expect_context_complete(finish_ctx2, 0); + Extents image_extents{{0, 8192}}; + bufferlist bl; + bl.append(std::string(8192, '1')); + bufferlist bl_copy = bl; + int fadvise_flags = 0; + rwl.write(std::move(image_extents), std::move(bl), + fadvise_flags, &finish_ctx2); + ASSERT_EQ(0, finish_ctx2.wait()); + + MockContextSSD finish_ctx_read; + Extents image_extents_read{{4096, 4096}}; + bufferlist hit_bl; + bl_copy.begin(4095).copy(4096, hit_bl); + expect_context_complete(finish_ctx_read, 0); + bufferlist read_bl; + rwl.read(std::move(image_extents_read), &read_bl, + fadvise_flags, &finish_ctx_read); + ASSERT_EQ(0, finish_ctx_read.wait()); + ASSERT_EQ(4096, read_bl.length()); + bufferlist read_bl_hit; + read_bl.begin(0).copy(4096, read_bl_hit); + ASSERT_TRUE(hit_bl.contents_equal(read_bl_hit)); + + MockContextSSD finish_ctx3; + expect_context_complete(finish_ctx3, 0); + rwl.shut_down(&finish_ctx3); + ASSERT_EQ(0, finish_ctx3.wait()); +} + +TEST_F(TestMockCacheSSDWriteLog, read_miss_rwl_cache) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockImageCtx mock_image_ctx(*ictx); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockSSDWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); + expect_metadata_set(mock_image_ctx); + + MockContextSSD finish_ctx1; + expect_context_complete(finish_ctx1, 0); + rwl.init(&finish_ctx1); + ASSERT_EQ(0, finish_ctx1.wait()); + + MockContextSSD finish_ctx2; + expect_context_complete(finish_ctx2, 0); + Extents image_extents{{0, 4096}}; + bufferlist bl; + bl.append(std::string(4096, '1')); + int fadvise_flags = 0; + rwl.write(std::move(image_extents), std::move(bl), + fadvise_flags, &finish_ctx2); + ASSERT_EQ(0, finish_ctx2.wait()); + + MockContextSSD finish_ctx_read; + Extents image_extents_read{{4096, 4096}}; + expect_context_complete(finish_ctx_read, 4096); + bufferlist read_bl; + ASSERT_EQ(0, read_bl.length()); + rwl.read(std::move(image_extents_read), &read_bl, + fadvise_flags, &finish_ctx_read); + ASSERT_EQ(4096, finish_ctx_read.wait()); + ASSERT_EQ(4096, read_bl.length()); + + MockContextSSD finish_ctx3; + expect_context_complete(finish_ctx3, 0); + rwl.shut_down(&finish_ctx3); + ASSERT_EQ(0, finish_ctx3.wait()); +} + +TEST_F(TestMockCacheSSDWriteLog, compare_and_write_compare_matched) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockImageCtx mock_image_ctx(*ictx); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockSSDWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); + expect_metadata_set(mock_image_ctx); + + MockContextSSD finish_ctx1; + expect_context_complete(finish_ctx1, 0); + rwl.init(&finish_ctx1); + ASSERT_EQ(0, finish_ctx1.wait()); + + MockContextSSD finish_ctx2; + expect_context_complete(finish_ctx2, 0); + Extents image_extents{{0, 4096}}; + bufferlist bl1; + bl1.append(std::string(4096, '1')); + bufferlist com_bl = bl1; + int fadvise_flags = 0; + rwl.write(std::move(image_extents), std::move(bl1), fadvise_flags, &finish_ctx2); + ASSERT_EQ(0, finish_ctx2.wait()); + + MockContextSSD finish_ctx_cw; + bufferlist bl2; + bl2.append(std::string(4096, '2')); + bufferlist bl2_copy = bl2; + uint64_t mismatch_offset = -1; + expect_context_complete(finish_ctx_cw, 0); + rwl.compare_and_write({{0, 4096}}, std::move(com_bl), std::move(bl2), + &mismatch_offset, fadvise_flags, &finish_ctx_cw); + ASSERT_EQ(0, finish_ctx_cw.wait()); + ASSERT_EQ(0, mismatch_offset); + + MockContextSSD finish_ctx_read; + bufferlist read_bl; + expect_context_complete(finish_ctx_read, 0); + rwl.read({{0, 4096}}, &read_bl, fadvise_flags, &finish_ctx_read); + ASSERT_EQ(0, finish_ctx_read.wait()); + ASSERT_EQ(4096, read_bl.length()); + ASSERT_TRUE(bl2_copy.contents_equal(read_bl)); + + MockContextSSD finish_ctx3; + expect_context_complete(finish_ctx3, 0); + rwl.shut_down(&finish_ctx3); + + ASSERT_EQ(0, finish_ctx3.wait()); +} + +TEST_F(TestMockCacheSSDWriteLog, compare_and_write_compare_failed) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockImageCtx mock_image_ctx(*ictx); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockSSDWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); + expect_metadata_set(mock_image_ctx); + + MockContextSSD finish_ctx1; + expect_context_complete(finish_ctx1, 0); + rwl.init(&finish_ctx1); + ASSERT_EQ(0, finish_ctx1.wait()); + + MockContextSSD finish_ctx2; + expect_context_complete(finish_ctx2, 0); + Extents image_extents{{0, 4096}}; + bufferlist bl1; + bl1.append(std::string(4096, '1')); + bufferlist bl1_copy = bl1; + int fadvise_flags = 0; + rwl.write(std::move(image_extents), std::move(bl1), fadvise_flags, &finish_ctx2); + ASSERT_EQ(0, finish_ctx2.wait()); + + MockContextSSD finish_ctx_cw; + bufferlist bl2; + bl2.append(std::string(4096, '2')); + bufferlist com_bl = bl2; + uint64_t mismatch_offset = -1; + expect_context_complete(finish_ctx_cw, -EILSEQ); + rwl.compare_and_write({{0, 4096}}, std::move(com_bl), std::move(bl2), + &mismatch_offset, fadvise_flags, &finish_ctx_cw); + ASSERT_EQ(-EILSEQ, finish_ctx_cw.wait()); + ASSERT_EQ(0, mismatch_offset); + + MockContextSSD finish_ctx_read; + bufferlist read_bl; + expect_context_complete(finish_ctx_read, 0); + rwl.read({{0, 4096}}, &read_bl, fadvise_flags, &finish_ctx_read); + ASSERT_EQ(0, finish_ctx_read.wait()); + ASSERT_EQ(4096, read_bl.length()); + ASSERT_TRUE(bl1_copy.contents_equal(read_bl)); + + MockContextSSD finish_ctx3; + expect_context_complete(finish_ctx3, 0); + rwl.shut_down(&finish_ctx3); + ASSERT_EQ(0, finish_ctx3.wait()); +} + +TEST_F(TestMockCacheSSDWriteLog, writesame) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockImageCtx mock_image_ctx(*ictx); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockSSDWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); + expect_metadata_set(mock_image_ctx); + + MockContextSSD finish_ctx1; + expect_context_complete(finish_ctx1, 0); + rwl.init(&finish_ctx1); + ASSERT_EQ(0, finish_ctx1.wait()); + + MockContextSSD finish_ctx2; + expect_context_complete(finish_ctx2, 0); + bufferlist bl, test_bl; + bl.append(std::string(512, '1')); + test_bl.append(std::string(4096, '1')); + int fadvise_flags = 0; + rwl.writesame(0, 4096, std::move(bl), fadvise_flags, &finish_ctx2); + ASSERT_EQ(0, finish_ctx2.wait()); + + MockContextSSD finish_ctx_read; + bufferlist read_bl; + expect_context_complete(finish_ctx_read, 0); + rwl.read({{0, 4096}}, &read_bl, fadvise_flags, &finish_ctx_read); + ASSERT_EQ(0, finish_ctx_read.wait()); + ASSERT_EQ(4096, read_bl.length()); + ASSERT_TRUE(test_bl.contents_equal(read_bl)); + + MockContextSSD finish_ctx3; + expect_context_complete(finish_ctx3, 0); + rwl.shut_down(&finish_ctx3); + + ASSERT_EQ(0, finish_ctx3.wait()); +} + +TEST_F(TestMockCacheSSDWriteLog, discard) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockImageCtx mock_image_ctx(*ictx); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockSSDWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); + expect_metadata_set(mock_image_ctx); + + MockContextSSD finish_ctx1; + expect_context_complete(finish_ctx1, 0); + rwl.init(&finish_ctx1); + ASSERT_EQ(0, finish_ctx1.wait()); + + MockContextSSD finish_ctx2; + expect_context_complete(finish_ctx2, 0); + Extents image_extents{{0, 4096}}; + bufferlist bl; + bl.append(std::string(4096, '1')); + bufferlist bl_copy = bl; + int fadvise_flags = 0; + rwl.write(std::move(image_extents), std::move(bl), fadvise_flags, &finish_ctx2); + ASSERT_EQ(0, finish_ctx2.wait()); + + MockContextSSD finish_ctx_discard; + expect_context_complete(finish_ctx_discard, 0); + rwl.discard(0, 4096, 1, &finish_ctx_discard); + ASSERT_EQ(0, finish_ctx_discard.wait()); + + MockContextSSD finish_ctx_read; + bufferlist read_bl; + expect_context_complete(finish_ctx_read, 0); + rwl.read({{0, 4096}}, &read_bl, fadvise_flags, &finish_ctx_read); + ASSERT_EQ(0, finish_ctx_read.wait()); + ASSERT_EQ(4096, read_bl.length()); + ASSERT_TRUE(read_bl.is_zero()); + + MockContextSSD finish_ctx3; + expect_context_complete(finish_ctx3, 0); + rwl.shut_down(&finish_ctx3); + + ASSERT_EQ(0, finish_ctx3.wait()); +} + +TEST_F(TestMockCacheSSDWriteLog, invalidate) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockImageCtx mock_image_ctx(*ictx); + MockImageWriteback mock_image_writeback(mock_image_ctx); + MockApi mock_api; + MockSSDWriteLog rwl( + mock_image_ctx, get_cache_state(mock_image_ctx, mock_api), + mock_image_writeback, mock_api); + expect_op_work_queue(mock_image_ctx); + expect_metadata_set(mock_image_ctx); + + MockContextSSD finish_ctx1; + expect_context_complete(finish_ctx1, 0); + rwl.init(&finish_ctx1); + ASSERT_EQ(0, finish_ctx1.wait()); + + MockContextSSD finish_ctx2; + expect_context_complete(finish_ctx2, 0); + Extents image_extents{{0, 4096}}; + bufferlist bl; + bl.append(std::string(4096, '1')); + bufferlist bl_copy = bl; + int fadvise_flags = 0; + rwl.write(std::move(image_extents), std::move(bl), fadvise_flags, &finish_ctx2); + ASSERT_EQ(0, finish_ctx2.wait()); + + MockContextSSD finish_ctx_invalidate; + expect_context_complete(finish_ctx_invalidate, 0); + rwl.invalidate(&finish_ctx_invalidate); + ASSERT_EQ(0, finish_ctx_invalidate.wait()); + + MockContextSSD finish_ctx3; + expect_context_complete(finish_ctx3, 0); + rwl.shut_down(&finish_ctx3); + + ASSERT_EQ(0, finish_ctx3.wait()); +} + } // namespace pwl } // namespace cache } // namespace librbd -- 2.39.5