From 2919c4cfdee18c7dacc37fb479558882bd1d99f2 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 2 May 2024 20:50:37 -0400 Subject: [PATCH] mds: raise health warning if client lacks feature for root_squash Rather than evict all clients lacking this feature bit, raise a health error that pushes the administrator to address it. This avoids the surprise of having all affected clients suddenly evicted in the cluster. Fixes: https://tracker.ceph.com/issues/65733 Fixes: 954ed30 Signed-off-by: Patrick Donnelly (cherry picked from commit 66ff5c9fc8d4664f18b2fa462e96e5548c35951f) Conflicts: src/messages/MMDSBeacon.h: missing health beacon type (cherry picked from commit 480b2cb199e6c933930ece59a3eca9c4cdf29c50) --- src/mds/Beacon.cc | 24 ++++++++++++++++++++++++ src/mds/Server.cc | 14 ++------------ src/mds/SessionMap.cc | 1 + src/mds/SessionMap.h | 12 ++++++++++++ src/messages/MMDSBeacon.h | 5 +++++ 5 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc index 5dd319a14057c..b67e0a31b4073 100644 --- a/src/mds/Beacon.cc +++ b/src/mds/Beacon.cc @@ -484,6 +484,30 @@ void Beacon::notify_health(MDSRank const *mds) health.metrics.push_back(m); } + // Report a health warning if clients have broken root_squash + if (auto c = mds->sessionmap.num_broken_root_squash_clients(); c > 0) { + std::vector metrics; + + for (auto&& session : mds->sessionmap.get_broken_root_squash_clients()) { + CachedStackStringStream css; + *css << "Client " << session->get_human_name() << " has broken root_squash implementation"; + MDSHealthMetric m(MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH, HEALTH_ERR, css->strv()); + m.metadata["client_id"] = stringify(session->get_client()); + metrics.emplace_back(std::move(m)); + } + + if (metrics.size() <= (size_t)g_conf()->mds_health_summarize_threshold) { + health.metrics.insert(std::end(health.metrics), std::make_move_iterator(std::begin(metrics)), std::make_move_iterator(std::end(metrics))); + } else { + CachedStackStringStream css; + *css << "There are " << c << " clients with broken root_squash implementations"; + dout(20) << css->strv() << dendl; + MDSHealthMetric m(MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH, HEALTH_ERR, css->strv()); + m.metadata["client_count"] = stringify(c); + health.metrics.push_back(std::move(m)); + } + } + // Report if we have significantly exceeded our cache size limit if (mds->mdcache->cache_overfull()) { CachedStackStringStream css; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 23a3a82d4708a..2f8b376cea623 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -720,15 +720,7 @@ void Server::handle_client_session(const cref_t &m) std::string_view fs_name = mds->mdsmap->get_fs_name(); bool client_caps_check = client_metadata.features.test(CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK); if (session->auth_caps.root_squash_in_caps(fs_name) && !client_caps_check) { - CachedStackStringStream css; - *css << "client lacks CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK needed to enforce 'root_squash' MDS auth caps"; - send_reject_message(css->strv()); - mds->clog->warn() << "client session (" << session->info.inst - << ") lacks CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK " - << " needed to enforce 'root_squash' MDS auth caps"; - session->clear(); - break; - + mds->sessionmap.add_to_broken_root_squash_clients(session); } // Special case for the 'root' metadata path; validate that the claimed // root is actually within the caps of the session @@ -1578,9 +1570,7 @@ void Server::handle_client_reconnect(const cref_t &m) std::string_view fs_name = mds->mdsmap->get_fs_name(); bool client_caps_check = session->info.client_metadata.features.test(CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK); if (session->auth_caps.root_squash_in_caps(fs_name) && !client_caps_check) { - CachedStackStringStream css; - *css << "client lacks CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK needed to enforce 'root_squash' MDS auth caps"; - error_str = css->strv(); + mds->sessionmap.add_to_broken_root_squash_clients(session); } } diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc index 9cc2b013847e2..9bec67c245d61 100644 --- a/src/mds/SessionMap.cc +++ b/src/mds/SessionMap.cc @@ -705,6 +705,7 @@ void SessionMap::remove_session(Session *s) s->trim_completed_requests(0); s->item_session_list.remove_myself(); + broken_root_squash_clients.erase(s); session_map.erase(s->info.inst.name); dirty_sessions.erase(s->info.inst.name); null_sessions.insert(s->info.inst.name); diff --git a/src/mds/SessionMap.h b/src/mds/SessionMap.h index 360dd66a27bb5..623f20a0eb7a8 100644 --- a/src/mds/SessionMap.h +++ b/src/mds/SessionMap.h @@ -682,6 +682,16 @@ public: void remove_session(Session *s); void touch_session(Session *session); + void add_to_broken_root_squash_clients(Session* s) { + broken_root_squash_clients.insert(s); + } + uint64_t num_broken_root_squash_clients() const { + return broken_root_squash_clients.size(); + } + auto const& get_broken_root_squash_clients() const { + return broken_root_squash_clients; + } + Session *get_oldest_session(int state) { auto by_state_entry = by_state.find(state); if (by_state_entry == by_state.end() || by_state_entry->second->empty()) @@ -849,6 +859,8 @@ private: bool validate_and_encode_session(MDSRank *mds, Session *session, bufferlist& bl); void apply_blocklist(const std::set& victims); + + std::set broken_root_squash_clients; }; std::ostream& operator<<(std::ostream &out, const Session &s); diff --git a/src/messages/MMDSBeacon.h b/src/messages/MMDSBeacon.h index d843b0e766107..2323f78d8a7ed 100644 --- a/src/messages/MMDSBeacon.h +++ b/src/messages/MMDSBeacon.h @@ -46,6 +46,8 @@ enum mds_metric_t { MDS_HEALTH_CACHE_OVERSIZED, MDS_HEALTH_SLOW_METADATA_IO, MDS_HEALTH_CLIENTS_LAGGY, + MDS_HEALTH_CLIENTS_LAGGY_MANY, + MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH, MDS_HEALTH_DUMMY, // not a real health warning, for testing }; @@ -65,6 +67,7 @@ inline const char *mds_metric_name(mds_metric_t m) case MDS_HEALTH_CACHE_OVERSIZED: return "MDS_CACHE_OVERSIZED"; case MDS_HEALTH_SLOW_METADATA_IO: return "MDS_SLOW_METADATA_IO"; case MDS_HEALTH_CLIENTS_LAGGY: return "MDS_CLIENTS_LAGGY"; + case MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH: return "MDS_CLIENTS_BROKEN_ROOTSQUASH"; case MDS_HEALTH_DUMMY: return "MDS_DUMMY"; default: return "???"; @@ -101,6 +104,8 @@ inline const char *mds_metric_summary(mds_metric_t m) return "%num% MDSs report slow metadata IOs"; case MDS_HEALTH_CLIENTS_LAGGY: return "%num% client(s) laggy due to laggy OSDs"; + case MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH: + return "%num% MDS report clients with broken root_squash implementation"; default: return "???"; } -- 2.39.5