From: Venky Shankar Date: Wed, 3 Jun 2020 07:17:38 +0000 (-0400) Subject: client: introduce timeout for client shutdown X-Git-Tag: v15.2.5~24^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d5a742a0b2747858a905f0f31206b341abd678fb;p=ceph.git client: introduce timeout for client shutdown Client::shutdown() could indefinitely wait when tearing down MDS sessions if an MDS is unreachable during shutdown, but a valid session existed, i.e., Client::mount() was successfull. These failures were initially observed in mgr/volumes tests in teuthology and lately during rados/mgr selftests where manager plugins do not respawn as volumes plugin waits for cleaning up libcephfs handles via Client::shutdown(). Fixes: http://tracker.ceph.com/issues/44276 Signed-off-by: Venky Shankar (cherry picked from commit 7c716be4b94d7640d051d427f5d7e97fefe5baf2) Conflicts: src/common/options.cc - octopus does not have "client_asio_thread_count", so omit that part of the master commit --- diff --git a/src/client/Client.cc b/src/client/Client.cc index 6cf7be9f837..c28ac4b504a 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2075,6 +2075,7 @@ void Client::_closed_mds_session(MetaSession *s, bool rejected) mount_cond.notify_all(); remove_session_caps(s); kick_requests_closed(s); + mds_ranks_closing.erase(s->mds_num); if (s->state == MetaSession::STATE_CLOSED) mds_sessions.erase(s->mds_num); } @@ -6025,13 +6026,27 @@ void Client::_close_sessions() for (auto &p : mds_sessions) { if (p.second.state != MetaSession::STATE_CLOSING) { _close_mds_session(&p.second); + mds_ranks_closing.insert(p.first); } } // wait for sessions to close - ldout(cct, 2) << "waiting for " << mds_sessions.size() << " mds sessions to close" << dendl; + double timo = cct->_conf.get_val("client_shutdown_timeout").count(); + ldout(cct, 2) << "waiting for " << mds_ranks_closing.size() << " mds session(s) to close (timeout: " + << timo << "s)" << dendl; std::unique_lock l{client_lock, std::adopt_lock}; - mount_cond.wait(l); + if (!timo) { + mount_cond.wait(l); + } else if (!mount_cond.wait_for(l, ceph::make_timespan(timo), [this] { return mds_ranks_closing.empty(); })) { + ldout(cct, 1) << mds_ranks_closing.size() << " mds(s) did not respond to session close -- timing out." << dendl; + while (!mds_ranks_closing.empty()) { + auto session = mds_sessions.at(*mds_ranks_closing.begin()); + // this prunes entry from mds_sessions and mds_ranks_closing + _closed_mds_session(&session, -ETIMEDOUT); + } + } + + mds_ranks_closing.clear(); l.release(); } } diff --git a/src/client/Client.h b/src/client/Client.h index e8fecf9753e..7ca604200eb 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -1212,6 +1212,7 @@ private: // mds sessions map mds_sessions; // mds -> push seq + std::set mds_ranks_closing; // mds ranks currently tearing down sessions std::list waiting_for_mdsmap; // FSMap, for when using mds_command diff --git a/src/common/options.cc b/src/common/options.cc index c0941ceb503..372f6997267 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -8444,6 +8444,14 @@ std::vector