From ec3840b29196ac56131bb974462593caf29426ce Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 17 Jun 2019 12:58:58 +0800 Subject: [PATCH] mds: don't mark unresponsive sessions holding no caps stale When an unresponsive MDS session holds no caps, do not mark it stale even after session_timeout; at session_autoclose, evict it directly. Fixes: http://tracker.ceph.com/issues/17854 Signed-off-by: Rishabh Dave (cherry picked from commit 98af31d10f362c05ea8ed57495973b08599431e7) Conflicts: src/mds/Server.cc --- src/mds/Locker.cc | 8 ++++++ src/mds/Locker.h | 1 + src/mds/Server.cc | 60 +++++++++++++++++++++++++++----------------- src/mds/SessionMap.h | 6 +++++ 4 files changed, 52 insertions(+), 23 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 6126f3193149..4af5d42e08ca 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -2669,6 +2669,14 @@ void Locker::mark_need_snapflush_inode(CInode *in) } } +bool Locker::is_revoking_any_caps_from(client_t client) +{ + auto it = revoking_caps_by_client.find(client); + if (it == revoking_caps_by_client.end()) + return false; + return !it->second.empty(); +} + void Locker::_do_null_snapflush(CInode *head_in, client_t client, snapid_t last) { dout(10) << "_do_null_snapflush client." << client << " on " << *head_in << dendl; diff --git a/src/mds/Locker.h b/src/mds/Locker.h index ce9aeb22044b..5f8fa9c720f6 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -214,6 +214,7 @@ protected: public: void snapflush_nudge(CInode *in); void mark_need_snapflush_inode(CInode *in); + bool is_revoking_any_caps_from(client_t client); // local public: diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 82ad9c8c92f4..680a479e9512 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -782,6 +782,8 @@ void Server::find_idle_sessions() double queue_max_age = mds->get_dispatch_queue_max_age(ceph_clock_now()); double cutoff = queue_max_age + mds->mdsmap->get_session_timeout(); + std::vector to_evict; + const auto sessions_p1 = mds->sessionmap.by_state.find(Session::STATE_OPEN); if (sessions_p1 != mds->sessionmap.by_state.end() && !sessions_p1->second->empty()) { std::vector new_stale; @@ -803,6 +805,21 @@ void Server::find_idle_sessions() } } + if (last_cap_renew_span >= mds->mdsmap->get_session_autoclose()) { + dout(20) << "evicting session " << session->info.inst << " since autoclose " + "has arrived" << dendl; + // evict session without marking it stale + to_evict.push_back(session); + continue; + } + + if (!session->is_any_flush_waiter() && + !mds->locker->is_revoking_any_caps_from(session->get_client())) { + dout(20) << "deferring marking session " << session->info.inst << " stale " + "since it holds no caps" << dendl; + continue; + } + dout(10) << "new stale session " << session->info.inst << " last renewed caps " << last_cap_renew_span << "s ago" << dendl; new_stale.push_back(session); @@ -833,38 +850,35 @@ void Server::find_idle_sessions() } // Collect a list of sessions exceeding the autoclose threshold - std::vector to_evict; const auto sessions_p2 = mds->sessionmap.by_state.find(Session::STATE_STALE); - if (sessions_p2 == mds->sessionmap.by_state.end() || sessions_p2->second->empty()) { - return; + if (sessions_p2 != mds->sessionmap.by_state.end() && !sessions_p2->second->empty()) { + for (auto session : *(sessions_p2->second)) { + assert(session->is_stale()); + auto last_cap_renew_span = std::chrono::duration(now - session->last_cap_renew).count(); + if (last_cap_renew_span < cutoff) { + dout(20) << "oldest stale session is " << session->info.inst + << " and recently renewed caps " << last_cap_renew_span << "s ago" << dendl; + break; + } + to_evict.push_back(session); + } } - const auto &stale_sessions = sessions_p2->second; - assert(stale_sessions != nullptr); - for (const auto &session: *stale_sessions) { - auto last_cap_renew_span = std::chrono::duration(now-session->last_cap_renew).count(); + for (auto session: to_evict) { if (session->is_importing()) { - dout(10) << "stopping at importing session " << session->info.inst << dendl; - break; - } - assert(session->is_stale()); - if (last_cap_renew_span < cutoff) { - dout(20) << "oldest stale session is " << session->info.inst << " and recently renewed caps " << last_cap_renew_span << "s ago" << dendl; - break; + dout(10) << "skipping session " << session->info.inst << ", it's being imported" << dendl; + continue; } - to_evict.push_back(session); - } - - for (const auto &session: to_evict) { - auto last_cap_renew_span = std::chrono::duration(now-session->last_cap_renew).count(); - mds->clog->warn() << "evicting unresponsive client " << *session << ", after " << last_cap_renew_span << " seconds"; - dout(10) << "autoclosing stale session " << session->info.inst << " last renewed caps " << last_cap_renew_span << "s ago" << dendl; + auto last_cap_renew_span = std::chrono::duration(now - session->last_cap_renew).count(); + mds->clog->warn() << "evicting unresponsive client " << *session + << ", after " << last_cap_renew_span << " seconds"; + dout(10) << "autoclosing stale session " << session->info.inst + << " last renewed caps " << last_cap_renew_span << "s ago" << dendl; if (g_conf->mds_session_blacklist_on_timeout) { std::stringstream ss; - mds->evict_client(session->get_client().v, false, true, - ss, nullptr); + mds->evict_client(session->get_client().v, false, true, ss, nullptr); } else { kill_session(session, NULL); } diff --git a/src/mds/SessionMap.h b/src/mds/SessionMap.h index a404e19813c6..d528205c5b2c 100644 --- a/src/mds/SessionMap.h +++ b/src/mds/SessionMap.h @@ -271,13 +271,19 @@ public: void touch_cap(Capability *cap) { caps.push_front(&cap->item_session_caps); } + void touch_cap_bottom(Capability *cap) { caps.push_back(&cap->item_session_caps); } + void touch_lease(ClientLease *r) { leases.push_back(&r->item_session_lease); } + bool is_any_flush_waiter() { + return !waitfor_flush.empty(); + } + // -- leases -- uint32_t lease_seq; -- 2.47.3