From f825c88690dacd45afd5601bb8b4ebb131a9065f Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Mon, 29 Sep 2025 06:41:23 +0000 Subject: [PATCH] client: adjust `Fb` cap ref count check during synchronous fsync() cephfs client holds a ref on Fb caps when handing out a write delegation[0]. As fsync from (Ganesha) client holding write delegation will block indefinitely[1] waiting for cap ref for Fb to drop to 0, which will never happen until the delegation is returned/recalled. [0]: https://github.com/ceph/ceph/blob/main/src/client/Delegation.cc#L71 [1]: https://github.com/ceph/ceph/blob/main/src/client/Client.cc#L12438 If an inode has been write delegated, adjust for cap reference count check in fsync(). Note: This only workls for synchronous fsync() since `client_lock` is held for the entire duration of the call (at least till the patch leading upto the reference count check). Asynchronous fsync() needs to be fixed separately (as that can drop `client_lock`). Fixes: https://tracker.ceph.com/issues/73298 Signed-off-by: Venky Shankar --- src/client/Client.cc | 8 ++++++-- src/client/Delegation.h | 3 +++ src/client/Inode.cc | 15 +++++++++++++++ src/client/Inode.h | 1 + 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 4d802fa5d60..899f513ca09 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -12435,9 +12435,13 @@ int Client::_fsync(Inode *in, bool syncdataonly) ldout(cct, 15) << "got " << r << " from flush writeback" << dendl; } else { // FIXME: this can starve - while (in->cap_refs[CEPH_CAP_FILE_BUFFER] > 0) { + int nr_refs = 0; + if (in->is_write_delegated()) { + ++nr_refs; + } + while (in->cap_refs[CEPH_CAP_FILE_BUFFER] > nr_refs) { ldout(cct, 10) << "ino " << in->ino << " has " << in->cap_refs[CEPH_CAP_FILE_BUFFER] - << " uncommitted, waiting" << dendl; + << " uncommitted (nrefs: " << nr_refs << "), waiting" << dendl; wait_on_context_list(in->waitfor_commit); } } diff --git a/src/client/Delegation.h b/src/client/Delegation.h index d24a02487e1..85391cb3727 100644 --- a/src/client/Delegation.h +++ b/src/client/Delegation.h @@ -28,6 +28,9 @@ public: Fh *get_fh() { return fh; } unsigned get_type() { return type; } bool is_recalled() { return !recall_time.is_zero(); } + bool is_write_delegated() { + return type == CEPH_DELEGATION_WR; + } void reinit(unsigned _type, ceph_deleg_cb_t _recall_cb, void *_priv); void recall(bool skip_read); diff --git a/src/client/Inode.cc b/src/client/Inode.cc index 60932d606b5..aabed0a3ad3 100644 --- a/src/client/Inode.cc +++ b/src/client/Inode.cc @@ -640,6 +640,21 @@ bool Inode::has_recalled_deleg() return deleg.is_recalled(); } +bool Inode::is_write_delegated() +{ + if (delegations.empty()) { + return false; + } + + for (auto& deleg : delegations) { + if (deleg.is_write_delegated() && !deleg.is_recalled()) { + return true; + } + } + + return false; +} + void Inode::recall_deleg(bool skip_read) { if (delegations.empty()) diff --git a/src/client/Inode.h b/src/client/Inode.h index 5a92df5bea0..1e2e22a2828 100644 --- a/src/client/Inode.h +++ b/src/client/Inode.h @@ -349,6 +349,7 @@ struct Inode : RefCountedObject { void recall_deleg(bool skip_read); bool has_recalled_deleg(); + bool is_write_delegated(); int set_deleg(Fh *fh, unsigned type, ceph_deleg_cb_t cb, void *priv); void unset_deleg(Fh *fh); -- 2.39.5