]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
os/bluestore/bluefs: Make sure write buffer is aligned
authorAdam Kupczyk <akupczyk@ibm.com>
Thu, 27 Nov 2025 11:02:08 +0000 (11:02 +0000)
committerAdam Kupczyk <akupczyk@ibm.com>
Tue, 3 Feb 2026 13:29:27 +0000 (14:29 +0100)
In envelope mode header could cause write buffer to go unaligned.
Fixes it.

Fixes: https://tracker.ceph.com/issues/74010
Signed-off-by: Adam Kupczyk <akupczyk@ibm.com>
src/include/buffer.h
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h

index 1f1604e43bceb4d6e341ceaa156010ed6fa7bb49..73b7309c15ad203cf788d6bdd89a13e5ce542004 100644 (file)
@@ -890,6 +890,10 @@ struct error_code;
        bl.obtain_contiguous_space(0);
       }
 
+      void refill() {
+        _refill(min_alloc);
+      }
+
       void append(const char* buf, size_t entire_len) {
         _append_common(entire_len,
                        [buf, this] (const size_t chunk_len) mutable {
index 36fde6d8b06bfcf47bd1bd14e813ef53ce1aa217..6be3105035837fc7bb7efe237e0c97265c71a687 100644 (file)
@@ -3833,6 +3833,17 @@ ceph::bufferlist BlueFS::FileWriter::flush_buffer(
     // The alternative approach would be to place the entire tail and
     // padding on a dedicated, 4 KB long memory chunk. This shouldn't
     // trigger the rebuild while still being less expensive.
+    if (file->envelope_mode() &&
+      buffer.get_append_buffer_unused_tail_length() <= super.block_size &&
+      p2phase(tail, super.block_size) >
+      p2phase<unsigned>(tail + File::envelope_t::head_size(), super.block_size)
+    ) {
+      // Envelope mode header must completely fit in single buffer::ptr,
+      // otherwise append_hole() will allocate new unaligned buffer.
+      // Clearing the buffer is a way to force buffer_appender to allocate fresh
+      // pages. The size is min 2 * super.block_size so header will fit.
+      buffer.clear();
+    }
     buffer_appender.substr_of(bl, bl.length() - padding_len - tail, tail);
     buffer.splice(buffer.length() - tail, tail, &tail_block);
   } else {
@@ -4099,8 +4110,8 @@ void BlueFS::append_try_flush(FileWriter *h, const char* buf, size_t len)/*_WF_L
   {
     std::unique_lock hl(h->lock);
     if (h->file->envelope_mode() && h->get_buffer_length() == 0) {
-      uint32_t pos1 = h->get_effective_write_pos();
       h->envelope_head_filler = h->append_hole(File::envelope_t::head_size());
+      uint32_t pos1 = h->get_effective_write_pos() - File::envelope_t::head_size();
       uint32_t pos2 = reinterpret_cast<uintptr_t>(h->envelope_head_filler.c_str());
       ceph_assert(p2aligned(pos1 ^ pos2, CEPH_PAGE_SIZE));
     }
@@ -4706,7 +4717,7 @@ int BlueFS::open_for_write(
 
 BlueFS::FileWriter *BlueFS::_create_writer(FileRef f)
 {
-  FileWriter *w = new FileWriter(f);
+  FileWriter *w = new FileWriter(f, super.block_size);
   for (unsigned i = 0; i < MAX_BDEV; ++i) {
     if (bdev[i]) {
       w->iocv[i] = new IOContext(cct, NULL);
index a889a19122a2a5407b8a6c48b1390c54405f11c6..30c146c00ab4a01707e4e81583853628eecf48d5 100644 (file)
@@ -426,10 +426,11 @@ public:
     std::array<IOContext*,MAX_BDEV> iocv; ///< for each bdev
     std::array<bool, MAX_BDEV> dirty_devs;
 
-    FileWriter(FileRef f)
-      : file(std::move(f)),
-       buffer_appender(buffer.get_page_aligned_appender(
-                         g_conf()->bluefs_alloc_size / CEPH_PAGE_SIZE)), envelope_head_filler() {
+    FileWriter(FileRef f, unsigned super_block_size)
+      : file(std::move(f))
+      , buffer_appender(buffer.get_page_aligned_appender(
+        std::max<uint64_t>(g_conf()->bluefs_alloc_size, 2 * super_block_size) / CEPH_PAGE_SIZE))
+      , envelope_head_filler() {
       ++file->num_writers;
       iocv.fill(nullptr);
       dirty_devs.fill(false);
@@ -473,6 +474,10 @@ public:
     }
 
     bufferlist::contiguous_filler append_hole(uint64_t len) {
+      if (buffer.get_append_buffer_unused_tail_length() < len) {
+        ceph_assert(buffer.length() == 0);
+        buffer_appender.refill();
+      }
       return buffer.append_hole(len);
     }