From 801c87c185c0e67a73f8d9c689c56ab6452cf580 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Mon, 16 Jan 2023 11:16:43 +0000 Subject: [PATCH] os/bluestore: Add functions for partial blob copy Add functions that can copy parts of blobs. It is necessary for merging blobs together, which happens on cloning (ExtentMap::dup). Fixed: Blob::copy_extents_over_empty was faulty when insertion was targetting last extent and that extent was invalid(empty). Add dup() for bluestore_blob_t and bluestore_blob_use_tracker. Changed: Modified Blob::copy_from for better readability. Added bluestore_blob_t::adjust_to initization that conforms to other blob specifics. Move assert for is_mutable() out of bluestore_blob_t::add_tail, so it can be used in blobs that are shared. Modify bluestore_blob_use_tracker_t::get to automatically expand when accessing more AUs then originally declared. Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueStore.cc | 220 ++++++++++++++++++++++++++++ src/os/bluestore/BlueStore.h | 6 + src/os/bluestore/bluestore_types.cc | 57 ++++++- src/os/bluestore/bluestore_types.h | 11 +- 4 files changed, 291 insertions(+), 3 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 2de13e899620f..6b81f97b25d25 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -2414,6 +2414,7 @@ bool BlueStore::Blob::can_reuse_blob(uint32_t min_alloc_size, } if (new_blen > blen) { + ceph_assert(dirty_blob().is_mutable()); dirty_blob().add_tail(new_blen); used_in_blob.add_tail(new_blen, get_blob().get_release_size(min_alloc_size)); @@ -2422,6 +2423,225 @@ bool BlueStore::Blob::can_reuse_blob(uint32_t min_alloc_size, return true; } +void BlueStore::Blob::dup(const Blob& from, bool copy_used_in_blob) +{ + set_shared_blob(from.shared_blob); + blob.dup(from.blob); + if (copy_used_in_blob) { + used_in_blob = from.used_in_blob; + } else { + ceph_assert(from.blob.is_compressed()); + ceph_assert(from.used_in_blob.num_au <= 1); + used_in_blob.init(from.used_in_blob.au_size, from.used_in_blob.au_size); + } + for (auto p : blob.get_extents()) { + if (p.is_valid()) { + shared_blob->get_ref(p.offset, p.length); + } + } +} + +// copies part of a Blob +// it is used to create a consistent blob out of parts of other blobs +void BlueStore::Blob::copy_from( + CephContext* cct, const Blob& from, uint32_t min_release_size, uint32_t start, uint32_t len) +{ + dout(20) << __func__ << " to=" << *this << " from=" << from + << " [" << std::hex << start << "~" << len + << "] min_release=" << min_release_size << std::dec << dendl; + + auto& bto = blob; + auto& bfrom = from.blob; + ceph_assert(!bfrom.is_compressed()); // not suitable for compressed (immutable) blobs + ceph_assert(!bfrom.has_unused()); + // below to asserts are not required to make function work + // they check if it is run in desired context + ceph_assert(bfrom.is_shared()); + ceph_assert(shared_blob); + ceph_assert(shared_blob == from.shared_blob); + + // split len to pre_len, main_len, post_len + uint32_t start_aligned = p2align(start, min_release_size); + uint32_t start_roundup = p2roundup(start, min_release_size); + uint32_t end_aligned = p2align(start + len, min_release_size); + uint32_t end_roundup = p2roundup(start + len, min_release_size); + dout(25) << __func__ << " extent split:" + << std::hex << start_aligned << "~" << start_roundup << "~" + << end_aligned << "~" << end_roundup << std::dec << dendl; + + if (bto.get_logical_length() == 0) { + // this is initialization + bto.adjust_to(from.blob, end_roundup); + ceph_assert(min_release_size == from.used_in_blob.au_size); + used_in_blob.init(end_roundup, min_release_size); + } else if (bto.get_logical_length() < end_roundup) { + ceph_assert(!bto.is_compressed()); + bto.add_tail(end_roundup); + used_in_blob.add_tail(end_roundup, used_in_blob.au_size); + } + + if (end_aligned >= start_roundup) { + copy_extents(cct, from, start_aligned, + start_roundup - start_aligned,/*pre_len*/ + end_aligned - start_roundup,/*main_len*/ + end_roundup - end_aligned/*post_len*/); + } else { + // it is uncommon case that > bto.csum_chunk_order; + size_t csd_item_end = p2roundup(start + len, uint32_t(1 << bto.csum_chunk_order)) >> bto.csum_chunk_order; + ceph_assert(bto. csum_data.length() >= csd_item_end * csd_value_size); + ceph_assert(bfrom.csum_data.length() >= csd_item_end * csd_value_size); + memcpy(bto. csum_data.c_str() + csd_item_start * csd_value_size, + bfrom.csum_data.c_str() + csd_item_start * csd_value_size, + (csd_item_end - csd_item_start) * csd_value_size); + } + used_in_blob.get(start, len); + dout(20) << __func__ << " result=" << *this << dendl; +} + +void BlueStore::Blob::copy_extents( + CephContext* cct, const Blob& from, uint32_t start, + uint32_t pre_len, uint32_t main_len, uint32_t post_len) +{ + constexpr uint64_t invalid = bluestore_pextent_t::INVALID_OFFSET; + auto at = [&](const PExtentVector& e, uint32_t pos, uint32_t len) -> uint64_t { + auto it = e.begin(); + while (it != e.end() && pos >= it->length) { + pos -= it->length; + ++it; + } + if (it == e.end()) { + return invalid; + } + if (!it->is_valid()) { + return invalid; + } + ceph_assert(pos + len <= it->length); // post_len should be single au, and we do not split + return it->offset + pos; + }; + const PExtentVector& exfrom = from.blob.get_extents(); + PExtentVector& exto = blob.dirty_extents(); + dout(20) << __func__ << " 0x" << std::hex << start << " " + << pre_len << "/" << main_len << "/" << post_len << std::dec << dendl; + + // the extents that cover same area must be the same + if (pre_len > 0) { + uint64_t au_from = at(exfrom, start, pre_len); + ceph_assert(au_from != bluestore_pextent_t::INVALID_OFFSET); + uint64_t au_to = at(exto, start, pre_len); + if (au_to == bluestore_pextent_t::INVALID_OFFSET) { + main_len += pre_len; // also copy pre_len + } else { + ceph_assert(au_from == au_to); + start += pre_len; // skip, already there + } + } + if (post_len > 0) { + uint64_t au_from = at(exfrom, start + main_len, post_len); + ceph_assert(au_from != bluestore_pextent_t::INVALID_OFFSET); + uint64_t au_to = at(exto, start + main_len, post_len); + if (au_to == bluestore_pextent_t::INVALID_OFFSET) { + main_len += post_len; // also copy post_len + } else { + ceph_assert(au_from == au_to); + // skip, already there + } + } + // it is possible that here is nothing to copy + if (main_len > 0) { + copy_extents_over_empty(cct, from, start, main_len); + } +} + +// assumes that target (this->extents) has hole in relevant location +void BlueStore::Blob::copy_extents_over_empty( + CephContext* cct, const Blob& from, uint32_t start, uint32_t len) +{ + dout(20) << __func__ << " to=" << *this << " from=" << from + << "[0x" << std::hex << start << "~" << len << std::dec << "]" << dendl; + uint32_t padding; + auto& exto = blob.dirty_extents(); + auto ito = exto.begin(); + PExtentVector::iterator prev = exto.end(); + uint32_t sto = start; + + auto try_append = [&](PExtentVector::iterator& it, uint64_t disk_offset, uint32_t disk_len) { + if (prev != exto.end()) { + if (prev->is_valid()) { + if (prev->offset + prev->length == disk_offset) { + shared_blob->get_ref(disk_offset, disk_len); + prev->length += disk_len; + return; + } + } + } + it = exto.insert(it, bluestore_pextent_t(disk_offset, disk_len)); + prev = it; + ++it; + shared_blob->get_ref(disk_offset, disk_len); + }; + + while (ito != exto.end() && sto >= ito->length) { + sto -= ito->length; + prev = ito; + ++ito; + } + if (ito == exto.end()) { + // putting data after end, just expand / push back + if (sto > 0) { + exto.emplace_back(bluestore_pextent_t::INVALID_OFFSET, sto); + ito = exto.end(); + prev = ito; + } + padding = 0; + } else { + ceph_assert(!ito->is_valid()); // there can be no collision + ceph_assert(ito->length >= sto + len); // for at least len, starting with remainder sto + padding = ito->length - (sto + len); // add this much after copying + ito = exto.erase(ito); // cut a hole + if (sto > 0) { + ito = exto.insert(ito, bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, sto)); + prev = ito; + ++ito; + } + } + + const auto& exfrom = from.blob.get_extents(); + auto itf = exfrom.begin(); + uint32_t sf = start; + while (itf != exfrom.end() && sf >= itf->length) { + sf -= itf->length; + ++itf; + } + + uint32_t skip_on_first = sf; + while (itf != exfrom.end() && len > 0) { + ceph_assert(itf->is_valid()); + uint32_t to_copy = std::min(itf->length - skip_on_first, len); + try_append(ito, itf->offset + skip_on_first, to_copy); + len -= to_copy; + skip_on_first = 0; + ++itf; + } + ceph_assert(len == 0); + + if (padding > 0) { + exto.insert(ito, bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, padding)); + } + dout(20) << __func__ << " result=" << *this << dendl; +} + + +#undef dout_context +#define dout_context coll->store->cct + void BlueStore::Blob::finish_write(uint64_t seq) { while (true) { diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 39babed2f7922..9cae4d017b404 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -662,6 +662,12 @@ public: o.blob_bl = blob_bl; #endif } + void dup(const Blob& from, bool copy_used_in_blob); + void copy_from(CephContext* cct, const Blob& from, + uint32_t min_release_size, uint32_t start, uint32_t len); + void copy_extents(CephContext* cct, const Blob& from, uint32_t start, + uint32_t pre_len, uint32_t main_len, uint32_t post_len); + void copy_extents_over_empty(CephContext* cct, const Blob& from, uint32_t start, uint32_t len); inline const bluestore_blob_t& get_blob() const { return blob; diff --git a/src/os/bluestore/bluestore_types.cc b/src/os/bluestore/bluestore_types.cc index f891e2a7bc129..229215fc07835 100644 --- a/src/os/bluestore/bluestore_types.cc +++ b/src/os/bluestore/bluestore_types.cc @@ -441,7 +441,9 @@ void bluestore_blob_use_tracker_t::get( total_bytes += length; } else { auto end = offset + length; - + if (end / au_size >= num_au) { + add_tail(end, au_size); + } while (offset < end) { auto phase = offset % au_size; bytes_per_au[offset / au_size] += @@ -537,6 +539,19 @@ void bluestore_blob_use_tracker_t::split( } } +void bluestore_blob_use_tracker_t::dup(const bluestore_blob_use_tracker_t& from, + uint32_t start, uint32_t len) +{ + uint32_t end = start + len; + ceph_assert(from.total_bytes >= end); + init(end, from.au_size); + uint32_t* array = dirty_au_array(); + const uint32_t* afrom = from.get_au_array(); + for (uint32_t i = start / au_size, pos = start; pos < end; i++, pos += au_size) { + array[i] = afrom[i]; + } +} + bool bluestore_blob_use_tracker_t::equal( const bluestore_blob_use_tracker_t& other) const { @@ -669,6 +684,29 @@ string bluestore_blob_t::get_flags_string(unsigned flags) return s; } +void bluestore_blob_t::adjust_to(const bluestore_blob_t& other, uint32_t target_length) +{ + // there is no way to expand compressed + ceph_assert(!is_compressed()); + // never import data from other compressed + ceph_assert(!other.is_compressed()); + // unused is wanky, as it is based on logical_length size + // it could be cleared here, but it feels better to force caller + // to be aware that unused is inacceptable + ceph_assert(!has_unused()); + ceph_assert(logical_length == 0); // not initialized yet + ceph_assert(target_length <= other.logical_length); + + logical_length = target_length; + ceph_assert(!has_csum()); + if (other.has_csum()) { + init_csum(other.csum_type, other.csum_chunk_order, logical_length); + ceph_assert(csum_data.length() <= other.csum_data.length()); + memcpy(csum_data.c_str(), other.csum_data.c_str(), csum_data.length()); + } + compressed_length = 0; +} + size_t bluestore_blob_t::get_csum_value_size() const { return Checksummer::get_csum_value_size(csum_type); @@ -1059,6 +1097,23 @@ void bluestore_blob_t::split(uint32_t blob_offset, bluestore_blob_t& rb) } } +void bluestore_blob_t::dup(const bluestore_blob_t& from) +{ + extents = from.extents; + logical_length = from.logical_length; + compressed_length = from.compressed_length; + flags = from.flags; + unused = from.unused; + csum_type = from.csum_type; + csum_chunk_order = from.csum_chunk_order; + if (from.csum_data.length()) { + csum_data = ceph::buffer::ptr(from.csum_data.c_str(), from.csum_data.length()); + csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other); + } else { + csum_data = ceph::buffer::ptr(); + } +} + // bluestore_shared_blob_t MEMPOOL_DEFINE_OBJECT_FACTORY(bluestore_shared_blob_t, bluestore_shared_blob_t, bluestore_shared_blob); diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 4c96e8903260f..36216620f5bde 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -372,7 +372,8 @@ struct bluestore_blob_use_tracker_t { void split( uint32_t blob_offset, bluestore_blob_use_tracker_t* r); - + void dup(const bluestore_blob_use_tracker_t& from, + uint32_t start, uint32_t len); bool equal( const bluestore_blob_use_tracker_t& other) const; @@ -460,6 +461,11 @@ public: bluestore_blob_t(uint32_t f = 0) : flags(f) {} + void dup(const bluestore_blob_t& from); + + // initialize blob to accomodate data from other blob, but do not copy yet + void adjust_to(const bluestore_blob_t& other, uint32_t new_logical_length); + const PExtentVector& get_extents() const { return extents; } @@ -859,10 +865,10 @@ public: csum_data = ceph::buffer::ptr(t.c_str(), get_logical_length() / get_csum_chunk_size() * get_csum_value_size()); + csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other); } } void add_tail(uint32_t new_len) { - ceph_assert(is_mutable()); ceph_assert(!has_unused()); ceph_assert(new_len > logical_length); extents.emplace_back( @@ -877,6 +883,7 @@ public: get_csum_value_size() * logical_length / get_csum_chunk_size()); csum_data.copy_in(0, t.length(), t.c_str()); csum_data.zero(t.length(), csum_data.length() - t.length()); + csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other); } } uint32_t get_release_size(uint32_t min_alloc_size) const { -- 2.39.5