From a3aa90e9e5a4a570e4ba0c9e9fd3ac47d5c01656 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 27 Oct 2020 22:40:15 -0400 Subject: [PATCH] client: try to renew caps and flush old cap releases For multiple threads case, if there has more than one threads are waiting the client_lock, which one to be woke up is not determined. So in worst case the tick() may be stuck for a long time. When the MDS daemons are out of resources it will recall them and tell the client to release the caps, so the clients may surge in receiving thousands of recall_state requests. Fixes: https://tracker.ceph.com/issues/47842 Signed-off-by: Xiubo Li --- src/client/Client.cc | 36 ++++++++++++++++++++++++++++-------- src/client/Client.h | 1 + 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 35dbd94058c..cdd0add312b 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2177,6 +2177,18 @@ void Client::handle_client_session(const MConstRef& m) break; case CEPH_SESSION_RECALL_STATE: + /* + * Call the renew caps and flush cap releases just before + * triming the caps in case the tick() won't get a chance + * to run them, which could cause the client to be blocklisted + * and MDS daemons trying to recall the caps again and + * again. + * + * In most cases it will do nothing, and the new cap releases + * added by trim_caps() followed will be deferred flushing + * by tick(). + */ + renew_and_flush_cap_releases(); trim_caps(session, m->get_max_caps()); break; @@ -6420,6 +6432,20 @@ void Client::flush_cap_releases() } } +void Client::renew_and_flush_cap_releases() +{ + ceph_assert(ceph_mutex_is_locked_by_me(client_lock)); + + if (!mount_aborted && mdsmap->get_epoch()) { + // renew caps? + utime_t el = ceph_clock_now() - last_cap_renew; + if (unlikely(el > mdsmap->get_session_timeout() / 3.0)) + renew_caps(); + + flush_cap_releases(); + } +} + void Client::tick() { ldout(cct, 20) << "tick" << dendl; @@ -6447,6 +6473,7 @@ void Client::tick() */ if (is_mounting() && !mds_requests.empty()) { MetaRequest *req = mds_requests.begin()->second; + if (req->op_stamp + cct->_conf->client_mount_timeout < now) { req->abort(-ETIMEDOUT); if (req->caller_cond) { @@ -6460,14 +6487,7 @@ void Client::tick() } } - if (!mount_aborted && mdsmap->get_epoch()) { - // renew caps? - utime_t el = now - last_cap_renew; - if (el > mdsmap->get_session_timeout() / 3.0) - renew_caps(); - - flush_cap_releases(); - } + renew_and_flush_cap_releases(); // delayed caps xlist::iterator p = delayed_list.begin(); diff --git a/src/client/Client.h b/src/client/Client.h index 5bb8e613284..84ad4d553a5 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -737,6 +737,7 @@ public: void renew_caps(); void renew_caps(MetaSession *session); void flush_cap_releases(); + void renew_and_flush_cap_releases(); void tick(); void inc_dentry_nr() { -- 2.47.3