From 8c541963b60ce957f112f3a5ae0da68b8a7ccd47 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Tue, 1 Apr 2025 14:01:23 +0000 Subject: [PATCH] os/bluestore/bluefs: Fix race condition between truncate() and unlink() It was possible for unlink() to interrupt ongoing truncate(). As the result, unlink() finishes properly, but truncate() is not aware of it and does: 1) updates file that is already removed 2) releases same allocations again Now fixed by checking if file is deleted under FILE lock. https://tracker.ceph.com/issues/70856 Signed-off-by: Adam Kupczyk (cherry picked from commit e5f8892a1249a0ce631082d1fbf8884237434a0f) --- src/os/bluestore/BlueFS.cc | 33 ++++++++++++++++++--------------- src/os/bluestore/BlueFS.h | 2 +- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 90f336e1ff8c2..17e44353e9139 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -2110,7 +2110,7 @@ std::ostream& operator<<(std::ostream& out, const lock_fnode_print& to_lock) { return out; } -void BlueFS::_drop_link_D(FileRef file) +void BlueFS::_drop_link_DF(FileRef file) { dout(20) << __func__ << " had refs " << file->refs << " on " << lock_fnode_print(file) << dendl; @@ -2126,9 +2126,11 @@ void BlueFS::_drop_link_D(FileRef file) log.t.op_file_remove(file->fnode.ino); nodes.file_map.erase(file->fnode.ino); logger->set(l_bluefs_num_files, nodes.file_map.size()); - file->deleted = true; - std::lock_guard dl(dirty.lock); + std::lock_guard fl(file->lock); + // setting `deleted` is under log+nodes+dirty+file locks + // so it is enough to held one of those locks to make sure it does not change + file->deleted = true; for (auto& r : file->fnode.extents) { dirty.pending_release[r.bdev].insert(r.offset, r.length); } @@ -3446,10 +3448,6 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length) << " to " << h->file->fnode << " hint " << h->file->vselector_hint << dendl; - if (h->file->deleted) { - dout(10) << __func__ << " deleted, no-op" << dendl; - return 0; - } bool buffered = cct->_conf->bluefs_buffered_io; @@ -3463,6 +3461,10 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length) << dendl; } std::lock_guard file_lock(h->file->lock); + if (h->file->deleted) { + dout(10) << __func__ << " deleted, no-op" << dendl; + return 0; + } ceph_assert(offset <= h->file->fnode.size); uint64_t allocated = h->file->fnode.get_allocated(); @@ -3719,16 +3721,13 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ auto t0 = mono_clock::now(); std::lock_guard hl(h->lock); auto& fnode = h->file->fnode; - dout(10) << __func__ << " 0x" << std::hex << offset << std::dec - << " file " << fnode << dendl; - if (h->file->deleted) { - dout(10) << __func__ << " deleted, no-op" << dendl; - return 0; - } // we never truncate internal log files ceph_assert(fnode.ino > 1); + dout(10) << __func__ << " 0x" << std::hex << offset << std::dec + << " file " << fnode << dendl; + // truncate off unflushed data? if (h->pos < offset && h->pos + h->get_buffer_length() > offset) { @@ -3749,6 +3748,10 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ { std::lock_guard ll(log.lock); std::lock_guard dl(dirty.lock); + if (h->file->deleted) { + dout(10) << __func__ << " deleted, no-op" << dendl; + return 0; + } bool changed_extents = false; vselector->sub_usage(h->file->vselector_hint, fnode); uint64_t x_off = 0; @@ -4339,7 +4342,7 @@ int BlueFS::rename( << " already exists, unlinking" << dendl; ceph_assert(q->second != file); log.t.op_dir_unlink(new_dirname, new_filename); - _drop_link_D(q->second); + _drop_link_DF(q->second); } dout(10) << __func__ << " " << new_dirname << "/" << new_filename << " " @@ -4534,7 +4537,7 @@ int BlueFS::unlink(std::string_view dirname, std::string_view filename)/*_LND*/ } dir->file_map.erase(q); log.t.op_dir_unlink(dirname, filename); - _drop_link_D(file); + _drop_link_DF(file); logger->tinc(l_bluefs_unlink_lat, mono_clock::now() - t0); return 0; diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 079c880cca72a..913cee9cbb751 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -558,7 +558,7 @@ private: FileRef _get_file(uint64_t ino); - void _drop_link_D(FileRef f); + void _drop_link_DF(FileRef f); unsigned _get_slow_device_id() { return bdev[BDEV_SLOW] ? BDEV_SLOW : BDEV_DB; -- 2.39.5