]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
os/bluestore: Refactor of write path. New punch_hole_2 function.
authorAdam Kupczyk <akupczyk@ibm.com>
Tue, 14 Nov 2023 16:25:01 +0000 (16:25 +0000)
committerAdam Kupczyk <akupczyk@ibm.com>
Wed, 7 Aug 2024 10:55:45 +0000 (10:55 +0000)
Introducing new logic of Onode processing during write.
New punch_hole_2 function empties range, but keeps track of elements:
- allocations that are no longer used
- blobs that are now empty
- shared blobs that got modified
- statfs changes to apply later

This change allows to reuse allocation for deferred freely, which means
that we can use allocations in deferred mode in other blob then they come from.

Signed-off-by: Adam Kupczyk <akupczyk@ibm.com>
src/os/CMakeLists.txt
src/os/bluestore/BlueStore.h
src/os/bluestore/Writer.cc [new file with mode: 0644]
src/os/bluestore/bluestore_types.h

index 9353ceaa63fc861ebddba5bdaca41db19216188a..e9c698eae261af4690a7e42fd25848ccf9b436e7 100644 (file)
@@ -25,6 +25,7 @@ if(WITH_BLUESTORE)
     bluestore/AvlAllocator.cc
     bluestore/BtreeAllocator.cc
     bluestore/HybridAllocator.cc
+    bluestore/Writer.cc
   )
 endif(WITH_BLUESTORE)
 
index 2cb7229942d8a93080da3963d0918db6ed6c21fd..e96547da9903c7629fb2c9ba6be3712fe5ce159c 100644 (file)
@@ -747,6 +747,11 @@ public:
     /// put logical references, and get back any released extents
     bool put_ref(Collection *coll, uint32_t offset, uint32_t length,
                 PExtentVector *r);
+    uint32_t put_ref_accumulate(
+      Collection *coll,
+      uint32_t offset,
+      uint32_t length,
+      PExtentVector *released_disk);
     /// split the blob
     void split(Collection *coll, uint32_t blob_offset, Blob *o);
 
@@ -3702,6 +3707,15 @@ private:
       uint64_t loffs_end,
       uint64_t min_alloc_size);
   };
+  BlueStore::extent_map_t::iterator _punch_hole_2(
+    Collection* c,
+    OnodeRef& o,
+    uint32_t offset,
+    uint32_t length,
+    PExtentVector& released,
+    std::vector<BlobRef>& pruned_blobs,
+    std::set<SharedBlobRef>& shared_changed,
+    volatile_statfs& statfs_delta);
   void _do_write_small(
     TransContext *txc,
     CollectionRef &c,
diff --git a/src/os/bluestore/Writer.cc b/src/os/bluestore/Writer.cc
new file mode 100644 (file)
index 0000000..ffb0dca
--- /dev/null
@@ -0,0 +1,113 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2023 IBM
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "os/bluestore/bluestore_types.h"
+#include "BlueStore.h"
+#include "Allocator.h"
+
+/// Signals that a range [offset~length] is no longer used.
+/// Collects allocation units that became unused into *released_disk.
+/// Returns:
+///   disk space size to release
+uint32_t BlueStore::Blob::put_ref_accumulate(
+  Collection *coll,
+  uint32_t offset,
+  uint32_t length,
+  PExtentVector *released_disk)
+{
+  ceph_assert(length > 0);
+  uint32_t res = 0;
+  auto [in_blob_offset, in_blob_length] = used_in_blob.put_simple(offset, length);
+  if (in_blob_length != 0) {
+    bluestore_blob_t& b = dirty_blob();
+    res = b.release_extents(in_blob_offset, in_blob_length, released_disk);
+    return res;
+  }
+  return res;
+}
+
+/// Empties range [offset~length] of object o that is in collection c.
+/// Collects unused elements:
+/// released - sequence of allocation units that are no longer used
+/// pruned_blobs - set of blobs that are no longer used
+/// shared_changed - set of shared blobs that are modified,
+///                  including the case of shared blob being empty
+/// statfs_delta - delta of stats
+BlueStore::extent_map_t::iterator BlueStore::_punch_hole_2(
+  Collection* c,
+  OnodeRef& o,
+  uint32_t offset,
+  uint32_t length,
+  PExtentVector& released,
+  std::vector<BlobRef>& pruned_blobs,       //completely emptied out blobs
+  std::set<SharedBlobRef>& shared_changed,  //shared blobs that have changed
+  volatile_statfs& statfs_delta)
+{
+  ExtentMap& emap = o->extent_map;
+  uint32_t end = offset + length;
+  auto p = emap.maybe_split_at(offset);
+  while (p != emap.extent_map.end() && p->logical_offset < end) {
+    // here split tail extent, if needed
+    if (end < p->logical_end()) {
+      p = emap.split_at(p, end);
+      --p;
+    }
+    // here always whole lextent to drop
+    auto& bblob = p->blob->dirty_blob();
+    uint32_t released_size = 0;
+    if (!bblob.is_shared()) {
+      released_size =
+        p->blob->put_ref_accumulate(c, p->blob_offset, p->length, &released);
+    } else {
+      // make sure shared blob is loaded
+      c->load_shared_blob(p->blob->get_shared_blob());
+      // more complicated shared blob release
+      PExtentVector local_released;  //no longer used by local blob
+      PExtentVector shared_released; //no longer used by shared blob too
+      p->blob->put_ref_accumulate(c, p->blob_offset, p->length, &local_released);
+      // filter local release disk regions
+      // through SharedBlob's multi-ref ref_map disk regions
+      bool unshare = false; //is there a chance that shared blob can be unshared?
+      // TODO - make put_ref return released_size directly
+      for (auto de: local_released) {
+        p->blob->get_shared_blob()->put_ref(de.offset, de.length, &shared_released, &unshare);
+      }
+      for (auto& de : shared_released) {
+        released_size += de.length;
+      }
+      released.insert(released.end(), shared_released.begin(), shared_released.end());
+      shared_changed.insert(p->blob->get_shared_blob());
+    }
+    statfs_delta.allocated() -= released_size;
+    statfs_delta.stored() -= p->length;
+    if (bblob.is_compressed()) {
+      statfs_delta.compressed_allocated() -= released_size;
+      statfs_delta.compressed_original() -= p->length;
+      if (!bblob.has_disk()) {
+        statfs_delta.compressed() -= bblob.get_compressed_payload_length();
+      }
+    }
+    if (!bblob.has_disk()) {
+      pruned_blobs.push_back(p->blob);
+      if (p->blob->is_spanning()) {
+        emap.spanning_blob_map.erase(p->blob->id);
+        p->blob->id = -1;
+      }
+    }
+    Extent* e = &(*p);
+    p = emap.extent_map.erase(p);
+    delete e;
+  }
+  return p;
+}
index 78293d8eaba8313391aab96b62b733623b41ef5c..66a22689ae5dd0ad38732ae698293acda7a74370 100644 (file)
@@ -621,7 +621,9 @@ public:
   bool is_shared() const {
     return has_flag(FLAG_SHARED);
   }
-
+  bool has_disk() const {
+    return extents.size() > 1 || extents.begin()->is_valid();
+  }
   /// return chunk (i.e. min readable block) size for the blob
   uint64_t get_chunk_size(uint64_t dev_block_size) const {
     return has_csum() ?