From 3ebe97484d26cf5d9cd78636ee4718c075a2897b Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Thu, 26 Sep 2024 10:50:32 +0530 Subject: [PATCH] client: Fix libcephfs aio metadata corruption. Problem: With cephfs nfs-ganesha, there were following asserts hit while doing write on a file. 1. FAILED ceph_assert((bool)_front == (bool)_size) 2. FAILED ceph_assert(cap_refs[c] > 0) Cause: In aio path, the client_lock was not being held in the internal callback after the io is done where it's expected to be taken leading to corruption. Fix: Take client_lock in the callback Fixes: https://tracker.ceph.com/issues/68146 Signed-off-by: Kotresh HR --- src/client/Client.cc | 19 +++++++++++++++++-- src/client/Client.h | 15 +++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index e208cf7667577..1bc67ce38bb87 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -11399,10 +11399,18 @@ int64_t Client::_write_success(Fh *f, utime_t start, uint64_t fpos, return r; } +void Client::C_Lock_Client_Finisher::finish(int r) +{ + std::scoped_lock lock(clnt->client_lock); + onfinish->complete(r); +} + void Client::C_Write_Finisher::finish_io(int r) { bool fini; + ceph_assert(ceph_mutex_is_locked_by_me(clnt->client_lock)); + clnt->put_cap_ref(in, CEPH_CAP_FILE_BUFFER); if (r >= 0) { @@ -11438,6 +11446,8 @@ void Client::C_Write_Finisher::finish_fsync(int r) bool fini; client_t const whoami = clnt->whoami; // For the benefit of ldout prefix + ceph_assert(ceph_mutex_is_locked_by_me(clnt->client_lock)); + ldout(clnt->cct, 3) << "finish_fsync r = " << r << dendl; fsync_finished = true; @@ -11598,6 +11608,7 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf, std::unique_ptr iofinish = nullptr; std::unique_ptr cwf = nullptr; + std::unique_ptr filer_iofinish = nullptr; if (in->inline_version < CEPH_INLINE_NONE) { if (endoff > cct->_conf->client_max_inline_size || @@ -11709,7 +11720,10 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf, if (onfinish == nullptr) { // We need a safer condition to wait on. cond_iofinish = new C_SaferCond(); - iofinish.reset(cond_iofinish); + filer_iofinish.reset(cond_iofinish); + } else { + //Register a wrapper callback for the C_Write_Finisher which takes 'client_lock' + filer_iofinish.reset(new C_Lock_Client_Finisher(this, iofinish.get())); } get_cap_ref(in, CEPH_CAP_FILE_BUFFER); @@ -11717,11 +11731,12 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf, filer->write_trunc(in->ino, &in->layout, in->snaprealm->get_snap_context(), offset, size, bl, ceph::real_clock::now(), 0, in->truncate_size, in->truncate_seq, - iofinish.get()); + filer_iofinish.get()); if (onfinish) { // handle non-blocking caller (onfinish != nullptr), we can now safely // release all the managed pointers + filer_iofinish.release(); iofinish.release(); onuninline.release(); cwf.release(); diff --git a/src/client/Client.h b/src/client/Client.h index 5a1e69394d02a..f8c39e2fdd6ab 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -1409,6 +1409,21 @@ private: void finish(int r) override; }; + // A wrapper callback which takes the 'client_lock' and finishes the context. + // One of the usecase is the filer->write_trunc which doesn't hold client_lock + // in the call back passed. So, use this wrapper in such cases. + class C_Lock_Client_Finisher : public Context { + public: + C_Lock_Client_Finisher(Client *clnt, Context *onfinish) + : clnt(clnt), onfinish(onfinish) {} + + private: + Client *clnt; + Context *onfinish; + + void finish(int r) override; + }; + class C_Write_Finisher : public Context { public: void finish_io(int r); -- 2.39.5