From 4bc0f61d23299724fad2d8e6f2858734f1db6e5a Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Wed, 9 Feb 2022 16:19:56 +0100 Subject: [PATCH] os/bluestore/bluefs: Fix improper vselector tracking in _flush_special() Moves vselector size tracking outside _flush_special(). Function _compact_log_async...() updated sizes twice. Problem could not be solved by making second modification of size just update, as it will possibly disrupt vselector consistency check (_vselector_check()). Feature to track vselector consistency relies on the fact that either log.lock or nodes.lock are taken when the check is performed. Which is not true for _compact_log_async...(). Now _flush_special does not update vselector sizes by itself but leaves the update to the caller. Fixes: https://tracker.ceph.com/issues/54248 Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueFS.cc | 20 ++++++++++---------- src/os/bluestore/BlueFS.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index e74cbc6308501..3b7d329f79d97 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -2495,8 +2495,8 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool allocate_with_fallback, log.writer = _create_writer(log_file); log.writer->append(bl); - r = _flush_special(log.writer); - ceph_assert(r == 0); + uint64_t new_data = _flush_special(log.writer); + vselector->add_usage(log_file->vselector_hint, new_data); #ifdef HAVE_LIBAIO if (!cct->_conf->bluefs_sync_write) { list completed_ios; @@ -2648,8 +2648,7 @@ void BlueFS::_compact_log_async_LD_LNF_D() //also locks FW for new_writer new_log_writer->append(bl); // 3. flush - r = _flush_special(new_log_writer); - ceph_assert(r == 0); + _flush_special(new_log_writer); // 4. wait _flush_bdev(new_log_writer); @@ -2863,8 +2862,8 @@ void BlueFS::_flush_and_sync_log_core(int64_t runway) log.t.clear(); log.t.seq = log.seq_live; - int r = _flush_special(log.writer); - ceph_assert(r == 0); + uint64_t new_data = _flush_special(log.writer); + vselector->add_usage(log.writer->file->vselector_hint, new_data); } // Clears dirty.files up to (including) seq_stable. @@ -3341,18 +3340,19 @@ int BlueFS::_flush_F(FileWriter *h, bool force, bool *flushed) // we do not need to dirty the log file (or it's compacting // replacement) when the file size changes because replay is // smart enough to discover it on its own. -int BlueFS::_flush_special(FileWriter *h) +uint64_t BlueFS::_flush_special(FileWriter *h) { ceph_assert(h->file->fnode.ino <= 1); uint64_t length = h->get_buffer_length(); uint64_t offset = h->pos; + uint64_t new_data = 0; ceph_assert(length + offset <= h->file->fnode.get_allocated()); if (h->file->fnode.size < offset + length) { - vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size); + new_data = offset + length - h->file->fnode.size; h->file->fnode.size = offset + length; - vselector->add_usage(h->file->vselector_hint, h->file->fnode.size); } - return _flush_data(h, offset, length, false); + _flush_data(h, offset, length, false); + return new_data; } int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 20dfbb5c4cfc1..4307507d13b95 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -427,7 +427,7 @@ private: int _flush_range_F(FileWriter *h, uint64_t offset, uint64_t length); int _flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered); int _flush_F(FileWriter *h, bool force, bool *flushed = nullptr); - int _flush_special(FileWriter *h); + uint64_t _flush_special(FileWriter *h); int _fsync(FileWriter *h); #ifdef HAVE_LIBAIO -- 2.39.5