From: Adam Kupczyk Date: Wed, 9 Feb 2022 15:19:56 +0000 (+0100) Subject: os/bluestore/bluefs: Fix improper vselector tracking in _flush_special() X-Git-Tag: v16.2.14~23^2~23 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bd6cef1e403e174c0e6544029f4de748bb8d8d34;p=ceph.git os/bluestore/bluefs: Fix improper vselector tracking in _flush_special() Moves vselector size tracking outside _flush_special(). Function _compact_log_async...() updated sizes twice. Problem could not be solved by making second modification of size just update, as it will possibly disrupt vselector consistency check (_vselector_check()). Feature to track vselector consistency relies on the fact that either log.lock or nodes.lock are taken when the check is performed. Which is not true for _compact_log_async...(). Now _flush_special does not update vselector sizes by itself but leaves the update to the caller. Fixes: https://tracker.ceph.com/issues/54248 Signed-off-by: Adam Kupczyk (cherry picked from commit 4bc0f61d23299724fad2d8e6f2858734f1db6e5a) --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index bb4f46c9dc2..59f8e682b5b 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -2360,8 +2360,8 @@ void BlueFS::_rewrite_log_and_layout_sync_LN_LD(bool allocate_with_fallback, log.writer = _create_writer(log_file); log.writer->append(bl); - r = _flush_special(log.writer); - ceph_assert(r == 0); + uint64_t new_data = _flush_special(log.writer); + vselector->add_usage(log_file->vselector_hint, new_data); #ifdef HAVE_LIBAIO if (!cct->_conf->bluefs_sync_write) { list completed_ios; @@ -2518,8 +2518,7 @@ void BlueFS::_compact_log_async_LD_NF_D() //also locks FW for new_writer new_log_writer->lock.lock(); new_log->lock.lock(); // 3. flush - r = _flush_special(new_log_writer); - ceph_assert(r == 0); + _flush_special(new_log_writer); // 4. wait _flush_bdev(new_log_writer); @@ -2738,8 +2737,8 @@ void BlueFS::_flush_and_sync_log_core(int64_t runway) log.t.clear(); log.t.seq = log.seq_live; - int r = _flush_special(log.writer); - ceph_assert(r == 0); + uint64_t new_data = _flush_special(log.writer); + vselector->add_usage(log.writer->file->vselector_hint, new_data); } // Clears dirty.files up to (including) seq_stable. @@ -3217,18 +3216,19 @@ int BlueFS::_flush_F(FileWriter *h, bool force, bool *flushed) // we do not need to dirty the log file (or it's compacting // replacement) when the file size changes because replay is // smart enough to discover it on its own. -int BlueFS::_flush_special(FileWriter *h) +uint64_t BlueFS::_flush_special(FileWriter *h) { ceph_assert(h->file->fnode.ino <= 1); uint64_t length = h->get_buffer_length(); uint64_t offset = h->pos; + uint64_t new_data = 0; ceph_assert(length + offset <= h->file->fnode.get_allocated()); if (h->file->fnode.size < offset + length) { - vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size); + new_data = offset + length - h->file->fnode.size; h->file->fnode.size = offset + length; - vselector->add_usage(h->file->vselector_hint, h->file->fnode.size); } - return _flush_data(h, offset, length, false); + _flush_data(h, offset, length, false); + return new_data; } int BlueFS::truncate(FileWriter *h, uint64_t offset) diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 65fad533c98..2d32640db98 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -408,7 +408,7 @@ private: int _flush_range_F(FileWriter *h, uint64_t offset, uint64_t length); int _flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered); int _flush_F(FileWriter *h, bool force, bool *flushed = nullptr); - int _flush_special(FileWriter *h); + uint64_t _flush_special(FileWriter *h); int _fsync(FileWriter *h); #ifdef HAVE_LIBAIO