From: Adam Kupczyk Date: Thu, 27 Nov 2025 11:02:08 +0000 (+0000) Subject: os/bluestore/bluefs: Make sure write buffer is aligned X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=dafa173733cc073597fa5e5be4c82a99c8730cbb;p=ceph.git os/bluestore/bluefs: Make sure write buffer is aligned In envelope mode header could cause write buffer to go unaligned. Fixes it. Fixes: https://tracker.ceph.com/issues/74010 Signed-off-by: Adam Kupczyk --- diff --git a/src/include/buffer.h b/src/include/buffer.h index 1f1604e43bce..73b7309c15ad 100644 --- a/src/include/buffer.h +++ b/src/include/buffer.h @@ -890,6 +890,10 @@ struct error_code; bl.obtain_contiguous_space(0); } + void refill() { + _refill(min_alloc); + } + void append(const char* buf, size_t entire_len) { _append_common(entire_len, [buf, this] (const size_t chunk_len) mutable { diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 6d1ea61086b4..252c062771a1 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -3851,6 +3851,17 @@ ceph::bufferlist BlueFS::FileWriter::flush_buffer( // The alternative approach would be to place the entire tail and // padding on a dedicated, 4 KB long memory chunk. This shouldn't // trigger the rebuild while still being less expensive. + if (file->envelope_mode() && + buffer.get_append_buffer_unused_tail_length() < tail + File::envelope_t::head_size()) { + // Envelope mode header must completely fit in single buffer::ptr, + // otherwise append_hole() will allocate new unaligned buffer. + // |4K ......... ~6k ........ 8K-1 | 8K .............................. 12K| + // <-----tail----><00000000000000000> <-----buffer_unused_tail_length-------> + // <------to-be-flushed-to-disk-----> <----------tail-----------> + // Clearing the buffer is a way to force buffer_appender to allocate fresh + // pages. The size is min 2 * super.block_size so header will fit. + buffer.clear(); + } buffer_appender.substr_of(bl, bl.length() - padding_len - tail, tail); buffer.splice(buffer.length() - tail, tail, &tail_block); } else { @@ -4117,8 +4128,8 @@ void BlueFS::append_try_flush(FileWriter *h, const char* buf, size_t len)/*_WF_L { std::unique_lock hl(h->lock); if (h->file->envelope_mode() && h->get_buffer_length() == 0) { - uint32_t pos1 = h->get_effective_write_pos(); h->envelope_head_filler = h->append_hole(File::envelope_t::head_size()); + uint32_t pos1 = h->get_effective_write_pos() - File::envelope_t::head_size(); uint32_t pos2 = reinterpret_cast(h->envelope_head_filler.c_str()); ceph_assert(p2aligned(pos1 ^ pos2, CEPH_PAGE_SIZE)); } @@ -4724,7 +4735,7 @@ int BlueFS::open_for_write( BlueFS::FileWriter *BlueFS::_create_writer(FileRef f) { - FileWriter *w = new FileWriter(f); + FileWriter *w = new FileWriter(f, super.block_size); for (unsigned i = 0; i < MAX_BDEV; ++i) { if (bdev[i]) { w->iocv[i] = new IOContext(cct, NULL); diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 8b766a5ef60d..450d6766acba 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -432,10 +432,11 @@ public: std::array iocv; ///< for each bdev std::array dirty_devs; - FileWriter(FileRef f) - : file(std::move(f)), - buffer_appender(buffer.get_page_aligned_appender( - g_conf()->bluefs_alloc_size / CEPH_PAGE_SIZE)), envelope_head_filler() { + FileWriter(FileRef f, unsigned super_block_size) + : file(std::move(f)) + , buffer_appender(buffer.get_page_aligned_appender( + std::max(g_conf()->bluefs_alloc_size, 2 * super_block_size) / CEPH_PAGE_SIZE)) + , envelope_head_filler() { ++file->num_writers; iocv.fill(nullptr); dirty_devs.fill(false); @@ -479,6 +480,10 @@ public: } bufferlist::contiguous_filler append_hole(uint64_t len) { + if (buffer.get_append_buffer_unused_tail_length() < len) { + ceph_assert(buffer.length() == 0); + buffer_appender.refill(); + } return buffer.append_hole(len); }