From: Adam Kupczyk Date: Sun, 27 Nov 2022 11:44:10 +0000 (+0000) Subject: os/bluestore: Refactor, add _dup_writing when cloning X-Git-Tag: v19.0.0~486^2~24 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6213a94a83899bdc90d8aecaab410ba03b10b6eb;p=ceph.git os/bluestore: Refactor, add _dup_writing when cloning With BufferSpace now attached to Blob (was SharedBlob inside it), on-the-fly 'writing' buffers must be copied to clones. Otherwise those objects will read data from disk before it is written there. Signed-off-by: Adam Kupczyk --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 58fd3233ba29..0378dbf555f0 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -1890,6 +1890,27 @@ void BlueStore::BufferSpace::_finish_write(BufferCacheShard* cache, uint64_t seq cache->_audit("finish_write end"); } +/* + copy Buffers that are in writing queue + returns: + true if something copied + false if nothing copied +*/ +bool BlueStore::BufferSpace::_dup_writing(BufferCacheShard* cache, BufferSpace* to) +{ + bool copied = false; + if (!writing.empty()) { + copied = true; + for (auto it = writing.begin(); it != writing.end(); ++it) { + Buffer& b = *it; + Buffer* to_b = new Buffer(to, b.state, b.seq, b.offset, b.data, b.flags); + ceph_assert(to_b->is_writing()); + to->_add_buffer(cache, to_b, 0, nullptr); + } + } + return copied; +} + void BlueStore::BufferSpace::split(BufferCacheShard* cache, size_t pos, BlueStore::BufferSpace &r) { std::lock_guard lk(cache->lock); @@ -2580,6 +2601,16 @@ void BlueStore::ExtentMap::dup(BlueStore* b, TransContext* txc, e.blob->last_encoded_id = n; id_to_blob[n] = cb; e.blob->dup(*cb); + // By default do not copy buffers to clones, and let them read data by themselves. + // The exception are 'writing' buffers, which are not yet stable on device. + bool some_copied = e.blob->bc._dup_writing(cb->shared_blob->get_cache(), &cb->bc); + if (some_copied) { + // Pretend we just wrote those buffers; + // we need to get _finish_write called, so we can clear then from writing list. + // Otherwise it will be stuck until someone does write-op on clone. + txc->blobs_written.insert(cb); + } + // bump the extent refs on the copied blob's extents for (auto p : blob.get_extents()) { if (p.is_valid()) { diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index b0449451c15f..24b5cd143b86 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -464,6 +464,7 @@ public: discard(cache, offset, (uint32_t)-1 - offset); } + bool _dup_writing(BufferCacheShard* cache, BufferSpace* bc); void split(BufferCacheShard* cache, size_t pos, BufferSpace &r); void dump(BufferCacheShard* cache, ceph::Formatter *f) const {