From 6200281e1cd9454e4088728c0c838e62badb058d Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Thu, 11 May 2023 10:53:24 +0000 Subject: [PATCH] os/bluestore: Resurrect ExtentMap::dup 1. Rename ExtentMap::dup to ExtentMap::dup_esb 2. Ressurect ExtentMap::dup Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueStore.cc | 120 +++++++++++++++++++++++++++++++++- src/os/bluestore/BlueStore.h | 8 +-- 2 files changed, 122 insertions(+), 6 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 3d8fa59cff25..03e28518f3a9 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -3199,6 +3199,125 @@ void BlueStore::ExtentMap::make_range_shared_maybe_merge( } void BlueStore::ExtentMap::dup(BlueStore* b, TransContext* txc, + CollectionRef& c, OnodeRef& oldo, OnodeRef& newo, uint64_t& srcoff, + uint64_t& length, uint64_t& dstoff) { + //_dup_writing needs cache lock + BufferCacheShard* bcs = c->cache; + bcs->lock.lock(); + while(bcs != c->cache) { + bcs->lock.unlock(); + bcs = c->cache; + bcs->lock.lock(); + } + + vector id_to_blob(oldo->extent_map.extent_map.size()); + for (auto& e : oldo->extent_map.extent_map) { + e.blob->last_encoded_id = -1; + } + + int n = 0; + uint64_t end = srcoff + length; + uint32_t dirty_range_begin = 0; + uint32_t dirty_range_end = 0; + bool src_dirty = false; + for (auto ep = oldo->extent_map.seek_lextent(srcoff); + ep != oldo->extent_map.extent_map.end(); + ++ep) { + auto& e = *ep; + if (e.logical_offset >= end) { + break; + } + dout(20) << __func__ << " src " << e << dendl; + BlobRef cb; + bool blob_duped = true; + if (e.blob->last_encoded_id >= 0) { + cb = id_to_blob[e.blob->last_encoded_id]; + blob_duped = false; + } else { + // dup the blob + const bluestore_blob_t& blob = e.blob->get_blob(); + // make sure it is shared + if (!blob.is_shared()) { + c->make_blob_shared(b->_assign_blobid(txc), e.blob); + if (!src_dirty) { + src_dirty = true; + dirty_range_begin = e.logical_offset; + } + ceph_assert(e.logical_end() > 0); + // -1 to exclude next potential shard + dirty_range_end = e.logical_end() - 1; + } else { + c->load_shared_blob(e.blob->shared_blob); + } + cb = new Blob(); + e.blob->last_encoded_id = n; + id_to_blob[n] = cb; + e.blob->dup(*cb); + // By default do not copy buffers to clones, and let them read data by themselves. + // The exception are 'writing' buffers, which are not yet stable on device. + bool some_copied = e.blob->bc._dup_writing(cb->shared_blob->get_cache(), &cb->bc); + if (some_copied) { + // Pretend we just wrote those buffers; + // we need to get _finish_write called, so we can clear then from writing list. + // Otherwise it will be stuck until someone does write-op on clone. + txc->blobs_written.insert(cb); + } + + // bump the extent refs on the copied blob's extents + for (auto p : blob.get_extents()) { + if (p.is_valid()) { + e.blob->shared_blob->get_ref(p.offset, p.length); + } + } + txc->write_shared_blob(e.blob->shared_blob); + dout(20) << __func__ << " new " << *cb << dendl; + } + + int skip_front, skip_back; + if (e.logical_offset < srcoff) { + skip_front = srcoff - e.logical_offset; + } else { + skip_front = 0; + } + if (e.logical_end() > end) { + skip_back = e.logical_end() - end; + } else { + skip_back = 0; + } + + Extent* ne = new Extent(e.logical_offset + skip_front + dstoff - srcoff, + e.blob_offset + skip_front, e.length - skip_front - skip_back, cb); + newo->extent_map.extent_map.insert(*ne); + ne->blob->get_ref(c.get(), ne->blob_offset, ne->length); + // fixme: we may leave parts of new blob unreferenced that could + // be freed (relative to the shared_blob). + txc->statfs_delta.stored() += ne->length; + if (e.blob->get_blob().is_compressed()) { + txc->statfs_delta.compressed_original() += ne->length; + if (blob_duped) { + txc->statfs_delta.compressed() += + cb->get_blob().get_compressed_payload_length(); + } + } + dout(20) << __func__ << " dst " << *ne << dendl; + ++n; + } + if (src_dirty) { + oldo->extent_map.dirty_range(dirty_range_begin, + dirty_range_end - dirty_range_begin); + txc->write_onode(oldo); + } + txc->write_onode(newo); + + if (dstoff + length > newo->onode.size) { + newo->onode.size = dstoff + length; + } + newo->extent_map.dirty_range(dstoff, length); + //_dup_writing needs cache lock + bcs->lock.unlock(); +} + +void BlueStore::ExtentMap::dup_esb(BlueStore* b, TransContext* txc, CollectionRef& c, OnodeRef& oldo, OnodeRef& newo, uint64_t& srcoff, uint64_t& length, uint64_t& dstoff) { BufferCacheShard* bcs = c->cache; @@ -13755,7 +13874,6 @@ void BlueStore::_txc_finish(TransContext *txc) sb->finish_write(txc->seq); } txc->blobs_written.clear(); - while (!txc->removed_collections.empty()) { _queue_reap_collection(txc->removed_collections.front()); txc->removed_collections.pop_front(); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index bd9a1561de84..780ec9b44b4d 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -64,8 +64,6 @@ class SimpleBitmap; //#define DEBUG_CACHE //#define DEBUG_DEFERRED - - // constants for Buffer::optimize() #define MAX_BUFFER_SLOP_RATIO_DEN 8 // so actually 1/N #define CEPH_BLUESTORE_TOOL_RESTORE_ALLOCATION @@ -466,7 +464,7 @@ public: discard(cache, offset, (uint32_t)-1 - offset); } - bool _dup_writing(BufferCacheShard* cache, BufferSpace* bc); + bool _dup_writing(BufferCacheShard* cache, BufferSpace* to); void split(BufferCacheShard* cache, size_t pos, BufferSpace &r); void dump(BufferCacheShard* cache, ceph::Formatter *f) const { @@ -524,7 +522,6 @@ public: /// put logical references, and get back any released extents void put_ref(uint64_t offset, uint32_t length, PExtentVector *r, bool *unshare); - friend bool operator==(const SharedBlob &l, const SharedBlob &r) { return l.get_sbid() == r.get_sbid(); } @@ -920,6 +917,8 @@ public: void dup(BlueStore* b, TransContext*, CollectionRef&, OnodeRef&, OnodeRef&, uint64_t&, uint64_t&, uint64_t&); + void dup_esb(BlueStore* b, TransContext*, CollectionRef&, OnodeRef&, OnodeRef&, + uint64_t&, uint64_t&, uint64_t&); bool needs_reshard() const { return needs_reshard_end > needs_reshard_begin; @@ -1830,7 +1829,6 @@ private: std::set shared_blobs; ///< these need to be updated/written std::set blobs_written; ///< update these on io completion - KeyValueDB::Transaction t; ///< then we will commit this std::list oncommits; ///< more commit completions std::list removed_collections; ///< colls we removed -- 2.47.3