From: Venky Shankar Date: Wed, 3 Jun 2020 07:17:38 +0000 (-0400) Subject: client: introduce timeout for client shutdown X-Git-Tag: v14.2.11~19^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=a58e642a167e3ea8f2eef03044d031ce3422b4ff;p=ceph.git client: introduce timeout for client shutdown Client::shutdown() could indefinitely wait when tearing down MDS sessions if an MDS is unreachable during shutdown, but a valid session existed, i.e., Client::mount() was successfull. These failures were initially observed in mgr/volumes tests in teuthology and lately during rados/mgr selftests where manager plugins do not respawn as volumes plugin waits for cleaning up libcephfs handles via Client::shutdown(). Fixes: http://tracker.ceph.com/issues/44276 Signed-off-by: Venky Shankar (cherry picked from commit 7c716be4b94d7640d051d427f5d7e97fefe5baf2) Conflicts: src/client/Client.cc src/client/Client.h src/common/options.cc Condition variables in nautilus use WaitInterval() rather than wait_for(). Client::_closed_mds_session() does not accept errno parameter in nautilus -- so adjust for that. Also, fixup config option conflict. --- diff --git a/src/client/Client.cc b/src/client/Client.cc index 3a02b72c2ba29..5a4d3135b4c8a 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -2083,6 +2083,7 @@ void Client::_closed_mds_session(MetaSession *s) mount_cond.Signal(); remove_session_caps(s); kick_requests_closed(s); + mds_ranks_closing.erase(s->mds_num); mds_sessions.erase(s->mds_num); } @@ -6006,12 +6007,34 @@ void Client::_close_sessions() for (auto &p : mds_sessions) { if (p.second.state != MetaSession::STATE_CLOSING) { _close_mds_session(&p.second); + mds_ranks_closing.insert(p.first); } } // wait for sessions to close - ldout(cct, 2) << "waiting for " << mds_sessions.size() << " mds sessions to close" << dendl; - mount_cond.Wait(client_lock); + double timo = cct->_conf.get_val("client_shutdown_timeout").count(); + ldout(cct, 2) << "waiting for " << mds_ranks_closing.size() << " mds session(s) to close (timeout: " + << timo << "s)" << dendl; + if (!timo) { + mount_cond.Wait(client_lock); + } else { + int r = 0; + utime_t t; + t.set_from_double(timo); + while (!mds_ranks_closing.empty() && r == 0) { + r = mount_cond.WaitInterval(client_lock, t); + } + if (r != 0) { + ldout(cct, 1) << mds_ranks_closing.size() << " mds(s) did not respond to session close -- timing out." << dendl; + while (!mds_ranks_closing.empty()) { + auto session = mds_sessions.at(*mds_ranks_closing.begin()); + // this prunes entry from mds_sessions and mds_ranks_closing + _closed_mds_session(&session); + } + } + } + + mds_ranks_closing.clear(); } } diff --git a/src/client/Client.h b/src/client/Client.h index 6e34e4ba1bbb8..0747baab91863 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -1210,6 +1210,8 @@ private: // mds sessions map mds_sessions; // mds -> push seq + + std::set mds_ranks_closing; // mds ranks currently tearing down sessions list waiting_for_mdsmap; // FSMap, for when using mds_command diff --git a/src/common/options.cc b/src/common/options.cc index 149d14db7c8fa..43f10eccec474 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -8441,6 +8441,14 @@ std::vector