From: Adam Kupczyk Date: Tue, 1 Apr 2025 14:01:23 +0000 (+0000) Subject: os/bluestore/bluefs: Fix race condition between truncate() and unlink() X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=4f83038f13e60f02d2a14c59df232b392b57a904;p=ceph.git os/bluestore/bluefs: Fix race condition between truncate() and unlink() It was possible for unlink() to interrupt ongoing truncate(). As the result, unlink() finishes properly, but truncate() is not aware of it and does: 1) updates file that is already removed 2) releases same allocations again Now fixed by checking if file is deleted under FILE lock. Fixes: https://tracker.ceph.com/issues/70855 Signed-off-by: Adam Kupczyk (cherry picked from commit e5f8892a1249a0ce631082d1fbf8884237434a0f) --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 0e9941cea4b21..8aafe20c9a86a 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -2080,7 +2080,7 @@ std::ostream& operator<<(std::ostream& out, const lock_fnode_print& to_lock) { return out; } -void BlueFS::_drop_link_D(FileRef file) +void BlueFS::_drop_link_DF(FileRef file) { dout(20) << __func__ << " had refs " << file->refs << " on " << lock_fnode_print(file) << dendl; @@ -2096,9 +2096,11 @@ void BlueFS::_drop_link_D(FileRef file) log.t.op_file_remove(file->fnode.ino); nodes.file_map.erase(file->fnode.ino); logger->set(l_bluefs_num_files, nodes.file_map.size()); - file->deleted = true; - std::lock_guard dl(dirty.lock); + std::lock_guard fl(file->lock); + // setting `deleted` is under log+nodes+dirty+file locks + // so it is enough to held one of those locks to make sure it does not change + file->deleted = true; for (auto& r : file->fnode.extents) { dirty.pending_release[r.bdev].insert(r.offset, r.length); } @@ -3410,10 +3412,6 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length) << " to " << h->file->fnode << " hint " << h->file->vselector_hint << dendl; - if (h->file->deleted) { - dout(10) << __func__ << " deleted, no-op" << dendl; - return 0; - } bool buffered = cct->_conf->bluefs_buffered_io; @@ -3427,6 +3425,10 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length) << dendl; } std::lock_guard file_lock(h->file->lock); + if (h->file->deleted) { + dout(10) << __func__ << " deleted, no-op" << dendl; + return 0; + } ceph_assert(offset <= h->file->fnode.size); uint64_t allocated = h->file->fnode.get_allocated(); @@ -3681,16 +3683,13 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ { std::lock_guard hl(h->lock); auto& fnode = h->file->fnode; - dout(10) << __func__ << " 0x" << std::hex << offset << std::dec - << " file " << fnode << dendl; - if (h->file->deleted) { - dout(10) << __func__ << " deleted, no-op" << dendl; - return 0; - } // we never truncate internal log files ceph_assert(fnode.ino > 1); + dout(10) << __func__ << " 0x" << std::hex << offset << std::dec + << " file " << fnode << dendl; + // truncate off unflushed data? if (h->pos < offset && h->pos + h->get_buffer_length() > offset) { @@ -3711,6 +3710,10 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ { std::lock_guard ll(log.lock); std::lock_guard dl(dirty.lock); + if (h->file->deleted) { + dout(10) << __func__ << " deleted, no-op" << dendl; + return 0; + } bool changed_extents = false; vselector->sub_usage(h->file->vselector_hint, fnode); uint64_t x_off = 0; @@ -4298,7 +4301,7 @@ int BlueFS::rename( << " already exists, unlinking" << dendl; ceph_assert(q->second != file); log.t.op_dir_unlink(new_dirname, new_filename); - _drop_link_D(q->second); + _drop_link_DF(q->second); } dout(10) << __func__ << " " << new_dirname << "/" << new_filename << " " @@ -4492,7 +4495,8 @@ int BlueFS::unlink(std::string_view dirname, std::string_view filename)/*_LND*/ } dir->file_map.erase(string{filename}); log.t.op_dir_unlink(dirname, filename); - _drop_link_D(file); + _drop_link_DF(file); + return 0; } diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index d31f7e5d2ddd5..f0f46d3d86656 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -522,7 +522,7 @@ private: FileRef _get_file(uint64_t ino); - void _drop_link_D(FileRef f); + void _drop_link_DF(FileRef f); unsigned _get_slow_device_id() { return bdev[BDEV_SLOW] ? BDEV_SLOW : BDEV_DB;