From 7c716be4b94d7640d051d427f5d7e97fefe5baf2 Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Wed, 3 Jun 2020 03:17:38 -0400 Subject: [PATCH] client: introduce timeout for client shutdown Client::shutdown() could indefinitely wait when tearing down MDS sessions if an MDS is unreachable during shutdown, but a valid session existed, i.e., Client::mount() was successfull. These failures were initially observed in mgr/volumes tests in teuthology and lately during rados/mgr selftests where manager plugins do not respawn as volumes plugin waits for cleaning up libcephfs handles via Client::shutdown(). Fixes: http://tracker.ceph.com/issues/44276 Signed-off-by: Venky Shankar --- src/client/Client.cc | 19 +++++++++++++++++-- src/client/Client.h | 1 + src/common/options.cc | 10 +++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index c2129b01652b..dde2d3454331 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2081,6 +2081,7 @@ void Client::_closed_mds_session(MetaSession *s, int err, bool rejected) mount_cond.notify_all(); remove_session_caps(s, err); kick_requests_closed(s); + mds_ranks_closing.erase(s->mds_num); if (s->state == MetaSession::STATE_CLOSED) mds_sessions.erase(s->mds_num); } @@ -6057,13 +6058,27 @@ void Client::_close_sessions() for (auto &p : mds_sessions) { if (p.second.state != MetaSession::STATE_CLOSING) { _close_mds_session(&p.second); + mds_ranks_closing.insert(p.first); } } // wait for sessions to close - ldout(cct, 2) << "waiting for " << mds_sessions.size() << " mds sessions to close" << dendl; + double timo = cct->_conf.get_val("client_shutdown_timeout").count(); + ldout(cct, 2) << "waiting for " << mds_ranks_closing.size() << " mds session(s) to close (timeout: " + << timo << "s)" << dendl; std::unique_lock l{client_lock, std::adopt_lock}; - mount_cond.wait(l); + if (!timo) { + mount_cond.wait(l); + } else if (!mount_cond.wait_for(l, ceph::make_timespan(timo), [this] { return mds_ranks_closing.empty(); })) { + ldout(cct, 1) << mds_ranks_closing.size() << " mds(s) did not respond to session close -- timing out." << dendl; + while (!mds_ranks_closing.empty()) { + auto session = mds_sessions.at(*mds_ranks_closing.begin()); + // this prunes entry from mds_sessions and mds_ranks_closing + _closed_mds_session(&session, -ETIMEDOUT); + } + } + + mds_ranks_closing.clear(); l.release(); } } diff --git a/src/client/Client.h b/src/client/Client.h index 401c13481d27..4e63237c6f58 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -1212,6 +1212,7 @@ private: // mds sessions map mds_sessions; // mds -> push seq + std::set mds_ranks_closing; // mds ranks currently tearing down sessions std::list waiting_for_mdsmap; // FSMap, for when using mds_command diff --git a/src/common/options.cc b/src/common/options.cc index 5f25c0435053..c2803245a723 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -8622,8 +8622,16 @@ std::vector