]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: raise health warning if client lacks feature for root_squash
authorPatrick Donnelly <pdonnell@redhat.com>
Fri, 3 May 2024 00:50:37 +0000 (20:50 -0400)
committerPatrick Donnelly <pdonnell@redhat.com>
Tue, 7 May 2024 12:19:28 +0000 (08:19 -0400)
Rather than evict all clients lacking this feature bit, raise a health error
that pushes the administrator to address it. This avoids the surprise of having
all affected clients suddenly evicted in the cluster.

Fixes: https://tracker.ceph.com/issues/65733
Fixes: 954ed30
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
src/mds/Beacon.cc
src/mds/Server.cc
src/mds/SessionMap.cc
src/mds/SessionMap.h
src/messages/MMDSBeacon.h

index 9c919d36b899b48e4668e44b094ef160469ac3c2..10f2b79e66419f8e3b1be3a720aa93d6b87403bf 100644 (file)
@@ -486,6 +486,30 @@ void Beacon::notify_health(MDSRank const *mds)
     health.metrics.push_back(m);
   }
 
+  // Report a health warning if clients have broken root_squash
+  if (auto c = mds->sessionmap.num_broken_root_squash_clients(); c > 0) {
+    std::vector<MDSHealthMetric> metrics;
+
+    for (auto&& session : mds->sessionmap.get_broken_root_squash_clients()) {
+      CachedStackStringStream css;
+      *css << "Client " << session->get_human_name() << " has broken root_squash implementation";
+      MDSHealthMetric m(MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH, HEALTH_ERR, css->strv());
+      m.metadata["client_id"] = stringify(session->get_client());
+      metrics.emplace_back(std::move(m));
+    }
+
+    if (metrics.size() <= (size_t)g_conf()->mds_health_summarize_threshold) {
+      health.metrics.insert(std::end(health.metrics), std::make_move_iterator(std::begin(metrics)), std::make_move_iterator(std::end(metrics)));
+    } else {
+      CachedStackStringStream css;
+      *css << "There are " << c << " clients with broken root_squash implementations";
+      dout(20) << css->strv() << dendl;
+      MDSHealthMetric m(MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH, HEALTH_ERR, css->strv());
+      m.metadata["client_count"] = stringify(c);
+      health.metrics.push_back(std::move(m));
+    }
+  }
+
   // Report if we have significantly exceeded our cache size limit
   if (mds->mdcache->cache_overfull()) {
     CachedStackStringStream css;
index 40fbccc56cd4e0cd2acd925ff3e6d539a827ce36..3dd2cc615601446bb22da38c23b796811130ba61 100644 (file)
@@ -722,15 +722,7 @@ void Server::handle_client_session(const cref_t<MClientSession> &m)
       std::string_view fs_name = mds->mdsmap->get_fs_name();
       bool client_caps_check = client_metadata.features.test(CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK);
       if (session->auth_caps.root_squash_in_caps(fs_name) && !client_caps_check) {
-       CachedStackStringStream css;
-       *css << "client lacks CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK needed to enforce 'root_squash' MDS auth caps";
-       send_reject_message(css->strv());
-       mds->clog->warn() << "client session (" << session->info.inst
-                          << ") lacks CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK "
-                          << " needed to enforce 'root_squash' MDS auth caps";
-       session->clear();
-       break;
-
+        mds->sessionmap.add_to_broken_root_squash_clients(session);
       }
       // Special case for the 'root' metadata path; validate that the claimed
       // root is actually within the caps of the session
@@ -1590,9 +1582,7 @@ void Server::handle_client_reconnect(const cref_t<MClientReconnect> &m)
       std::string_view fs_name = mds->mdsmap->get_fs_name();
       bool client_caps_check = session->info.client_metadata.features.test(CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK);
       if (session->auth_caps.root_squash_in_caps(fs_name) && !client_caps_check) {
-       CachedStackStringStream css;
-       *css << "client lacks CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK needed to enforce 'root_squash' MDS auth caps";
-       error_str = css->strv();
+        mds->sessionmap.add_to_broken_root_squash_clients(session);
       }
     }
 
index 9cc2b013847e276e6ed0e72034523d92adf4c781..9bec67c245d61e7cb7c7d97b5ec029c9ea309529 100644 (file)
@@ -705,6 +705,7 @@ void SessionMap::remove_session(Session *s)
 
   s->trim_completed_requests(0);
   s->item_session_list.remove_myself();
+  broken_root_squash_clients.erase(s);
   session_map.erase(s->info.inst.name);
   dirty_sessions.erase(s->info.inst.name);
   null_sessions.insert(s->info.inst.name);
index 360dd66a27bb5c24dc3696822d01381450cbf1c8..623f20a0eb7a80d00b9c165974b197138790fec4 100644 (file)
@@ -682,6 +682,16 @@ public:
   void remove_session(Session *s);
   void touch_session(Session *session);
 
+  void add_to_broken_root_squash_clients(Session* s) {
+    broken_root_squash_clients.insert(s);
+  }
+  uint64_t num_broken_root_squash_clients() const {
+    return broken_root_squash_clients.size();
+  }
+  auto const& get_broken_root_squash_clients() const {
+    return broken_root_squash_clients;
+  }
+
   Session *get_oldest_session(int state) {
     auto by_state_entry = by_state.find(state);
     if (by_state_entry == by_state.end() || by_state_entry->second->empty())
@@ -849,6 +859,8 @@ private:
 
   bool validate_and_encode_session(MDSRank *mds, Session *session, bufferlist& bl);
   void apply_blocklist(const std::set<entity_name_t>& victims);
+
+  std::set<Session*> broken_root_squash_clients;
 };
 
 std::ostream& operator<<(std::ostream &out, const Session &s);
index ba2d52d11ee3f98a5ff259c649c4ee4ce0b51daa..b87c3153af3df4110fb206d47a1adb831b2bae0c 100644 (file)
@@ -47,6 +47,7 @@ enum mds_metric_t {
   MDS_HEALTH_SLOW_METADATA_IO,
   MDS_HEALTH_CLIENTS_LAGGY,
   MDS_HEALTH_CLIENTS_LAGGY_MANY,
+  MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH,
   MDS_HEALTH_DUMMY, // not a real health warning, for testing
 };
 
@@ -67,6 +68,7 @@ inline const char *mds_metric_name(mds_metric_t m)
   case MDS_HEALTH_SLOW_METADATA_IO: return "MDS_SLOW_METADATA_IO";
   case MDS_HEALTH_CLIENTS_LAGGY: return "MDS_CLIENTS_LAGGY";
   case MDS_HEALTH_CLIENTS_LAGGY_MANY: return "MDS_CLIENTS_LAGGY_MANY";
+  case MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH: return "MDS_CLIENTS_BROKEN_ROOTSQUASH";
   case MDS_HEALTH_DUMMY: return "MDS_DUMMY";
   default:
     return "???";
@@ -103,6 +105,8 @@ inline const char *mds_metric_summary(mds_metric_t m)
     return "%num% MDSs report slow metadata IOs";
   case MDS_HEALTH_CLIENTS_LAGGY:
     return "%num% client(s) laggy due to laggy OSDs";  
+  case MDS_HEALTH_CLIENTS_BROKEN_ROOTSQUASH:
+    return "%num% MDS report clients with broken root_squash implementation";
   default:
     return "???";
   }