From 7ab995b715968a4d03cf91aa7c6f44e25757a45e Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Mon, 2 Jun 2025 05:05:44 +0000 Subject: [PATCH] client: synthetically delay write operation To allow the client to hold Fb caps for an extended period of time, to allow an asynchronous fsync to intervene and block, so as to hunt [0]. [0]: https://tracker.ceph.com/issues/71510 Signed-off-by: Venky Shankar --- src/client/Client.cc | 15 +++++++++++++++ src/client/Client.h | 5 +++++ src/common/options/mds-client.yaml.in | 11 ++++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 27e81beccb5..92b0daf3cac 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -425,6 +425,9 @@ Client::Client(Messenger *m, MonClient *mc, Objecter *objecter_) caps_release_delay = cct->_conf.get_val( "client_caps_release_delay"); + injected_write_delay_secs = std::chrono::duration( + cct->_conf.get_val("client_inject_write_delay_secs")).count(); + if (cct->_conf->client_acl_type == "posix_acl") acl_type = POSIX_ACL; @@ -11978,6 +11981,13 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, bufferlist bl, get_cap_ref(in, CEPH_CAP_FILE_BUFFER); + auto delay = get_injected_write_delay_secs(); + if (unlikely(delay > 0)) { + ldout(cct, 20) << __func__ << ": delaying write for " << delay << " seconds" << dendl; + client_lock.unlock(); + sleep(delay); + client_lock.lock(); + } filer->write_trunc(in->ino, &in->layout, in->snaprealm->get_snap_context(), offset, size, bl, ceph::real_clock::now(), 0, in->truncate_size, in->truncate_seq, @@ -17589,6 +17599,7 @@ std::vector Client::get_tracked_keys() const noexcept "client_caps_release_delay", "client_deleg_break_on_open", "client_deleg_timeout", + "client_inject_write_delay_secs", "client_mount_timeout", "client_oc_max_dirty", "client_oc_max_dirty_age", @@ -17650,6 +17661,10 @@ void Client::handle_conf_change(const ConfigProxy& conf, mount_timeout = cct->_conf.get_val( "client_mount_timeout"); } + if (changed.count("client_inject_write_delay_secs")) { + injected_write_delay_secs = std::chrono::duration( + cct->_conf.get_val("client_inject_write_delay_secs")).count(); + } } void intrusive_ptr_add_ref(Inode *in) diff --git a/src/client/Client.h b/src/client/Client.h index 49477ac1cb0..6b7b4c7dbfa 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -1350,6 +1350,10 @@ protected: struct mount_state_t mount_state; struct initialize_state_t initialize_state; + int get_injected_write_delay_secs() const { + return injected_write_delay_secs; + } + private: class C_Read_Finisher : public Context { public: @@ -1962,6 +1966,7 @@ private: ceph::coarse_mono_time last_auto_reconnect; std::chrono::seconds caps_release_delay, mount_timeout; + int injected_write_delay_secs; // trace generation std::ofstream traceout; diff --git a/src/common/options/mds-client.yaml.in b/src/common/options/mds-client.yaml.in index a35c3c76489..f80562d08c0 100644 --- a/src/common/options/mds-client.yaml.in +++ b/src/common/options/mds-client.yaml.in @@ -597,4 +597,13 @@ options: default: 16 services: - mds_client - min: 1 \ No newline at end of file + min: 1 +- name: client_inject_write_delay_secs + type: secs + level: dev + desc: induce delay in write operation for testing + long_desc: Inject a delay in write operation after grabbing required cap references (Fb caps in this case). This config is disabled by default (value of 0) and is to be used for the purpose of validating a race case bug with concurrent fsync. + default: 0 + services: + - mds_client + min: 0 -- 2.39.5