From 93bb26360a66535272ac77129b20089bb194369a Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Tue, 14 Nov 2023 16:25:01 +0000 Subject: [PATCH] os/bluestore: Refactor of write path. New punch_hole_2 function. Introducing new logic of Onode processing during write. New punch_hole_2 function empties range, but keeps track of elements: - allocations that are no longer used - blobs that are now empty - shared blobs that got modified - statfs changes to apply later This change allows to reuse allocation for deferred freely, which means that we can use allocations in deferred mode in other blob then they come from. Signed-off-by: Adam Kupczyk --- src/os/CMakeLists.txt | 1 + src/os/bluestore/BlueStore.h | 14 ++++ src/os/bluestore/Writer.cc | 113 +++++++++++++++++++++++++++++ src/os/bluestore/bluestore_types.h | 4 +- 4 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 src/os/bluestore/Writer.cc diff --git a/src/os/CMakeLists.txt b/src/os/CMakeLists.txt index 9353ceaa63f..e9c698eae26 100644 --- a/src/os/CMakeLists.txt +++ b/src/os/CMakeLists.txt @@ -25,6 +25,7 @@ if(WITH_BLUESTORE) bluestore/AvlAllocator.cc bluestore/BtreeAllocator.cc bluestore/HybridAllocator.cc + bluestore/Writer.cc ) endif(WITH_BLUESTORE) diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 2cb7229942d..e96547da990 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -747,6 +747,11 @@ public: /// put logical references, and get back any released extents bool put_ref(Collection *coll, uint32_t offset, uint32_t length, PExtentVector *r); + uint32_t put_ref_accumulate( + Collection *coll, + uint32_t offset, + uint32_t length, + PExtentVector *released_disk); /// split the blob void split(Collection *coll, uint32_t blob_offset, Blob *o); @@ -3702,6 +3707,15 @@ private: uint64_t loffs_end, uint64_t min_alloc_size); }; + BlueStore::extent_map_t::iterator _punch_hole_2( + Collection* c, + OnodeRef& o, + uint32_t offset, + uint32_t length, + PExtentVector& released, + std::vector& pruned_blobs, + std::set& shared_changed, + volatile_statfs& statfs_delta); void _do_write_small( TransContext *txc, CollectionRef &c, diff --git a/src/os/bluestore/Writer.cc b/src/os/bluestore/Writer.cc new file mode 100644 index 00000000000..ffb0dca280c --- /dev/null +++ b/src/os/bluestore/Writer.cc @@ -0,0 +1,113 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 IBM + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "os/bluestore/bluestore_types.h" +#include "BlueStore.h" +#include "Allocator.h" + +/// Signals that a range [offset~length] is no longer used. +/// Collects allocation units that became unused into *released_disk. +/// Returns: +/// disk space size to release +uint32_t BlueStore::Blob::put_ref_accumulate( + Collection *coll, + uint32_t offset, + uint32_t length, + PExtentVector *released_disk) +{ + ceph_assert(length > 0); + uint32_t res = 0; + auto [in_blob_offset, in_blob_length] = used_in_blob.put_simple(offset, length); + if (in_blob_length != 0) { + bluestore_blob_t& b = dirty_blob(); + res = b.release_extents(in_blob_offset, in_blob_length, released_disk); + return res; + } + return res; +} + +/// Empties range [offset~length] of object o that is in collection c. +/// Collects unused elements: +/// released - sequence of allocation units that are no longer used +/// pruned_blobs - set of blobs that are no longer used +/// shared_changed - set of shared blobs that are modified, +/// including the case of shared blob being empty +/// statfs_delta - delta of stats +BlueStore::extent_map_t::iterator BlueStore::_punch_hole_2( + Collection* c, + OnodeRef& o, + uint32_t offset, + uint32_t length, + PExtentVector& released, + std::vector& pruned_blobs, //completely emptied out blobs + std::set& shared_changed, //shared blobs that have changed + volatile_statfs& statfs_delta) +{ + ExtentMap& emap = o->extent_map; + uint32_t end = offset + length; + auto p = emap.maybe_split_at(offset); + while (p != emap.extent_map.end() && p->logical_offset < end) { + // here split tail extent, if needed + if (end < p->logical_end()) { + p = emap.split_at(p, end); + --p; + } + // here always whole lextent to drop + auto& bblob = p->blob->dirty_blob(); + uint32_t released_size = 0; + if (!bblob.is_shared()) { + released_size = + p->blob->put_ref_accumulate(c, p->blob_offset, p->length, &released); + } else { + // make sure shared blob is loaded + c->load_shared_blob(p->blob->get_shared_blob()); + // more complicated shared blob release + PExtentVector local_released; //no longer used by local blob + PExtentVector shared_released; //no longer used by shared blob too + p->blob->put_ref_accumulate(c, p->blob_offset, p->length, &local_released); + // filter local release disk regions + // through SharedBlob's multi-ref ref_map disk regions + bool unshare = false; //is there a chance that shared blob can be unshared? + // TODO - make put_ref return released_size directly + for (auto de: local_released) { + p->blob->get_shared_blob()->put_ref(de.offset, de.length, &shared_released, &unshare); + } + for (auto& de : shared_released) { + released_size += de.length; + } + released.insert(released.end(), shared_released.begin(), shared_released.end()); + shared_changed.insert(p->blob->get_shared_blob()); + } + statfs_delta.allocated() -= released_size; + statfs_delta.stored() -= p->length; + if (bblob.is_compressed()) { + statfs_delta.compressed_allocated() -= released_size; + statfs_delta.compressed_original() -= p->length; + if (!bblob.has_disk()) { + statfs_delta.compressed() -= bblob.get_compressed_payload_length(); + } + } + if (!bblob.has_disk()) { + pruned_blobs.push_back(p->blob); + if (p->blob->is_spanning()) { + emap.spanning_blob_map.erase(p->blob->id); + p->blob->id = -1; + } + } + Extent* e = &(*p); + p = emap.extent_map.erase(p); + delete e; + } + return p; +} diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 78293d8eaba..66a22689ae5 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -621,7 +621,9 @@ public: bool is_shared() const { return has_flag(FLAG_SHARED); } - + bool has_disk() const { + return extents.size() > 1 || extents.begin()->is_valid(); + } /// return chunk (i.e. min readable block) size for the blob uint64_t get_chunk_size(uint64_t dev_block_size) const { return has_csum() ? -- 2.39.5