]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/HealthMonitor: raise AUTH_INSECURE_GLOBAL_ID_RENEWAL[_ALLOWED]
authorSage Weil <sage@newdream.net>
Thu, 25 Mar 2021 22:07:53 +0000 (18:07 -0400)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 12 Apr 2021 18:56:35 +0000 (20:56 +0200)
Two new alerts:

- AUTH_INSECURE_GLOBAL_ID_RENEWAL_ALLOWED if we are allowing clients to reclaim
global_ids in an insecure manner (for backwards compatibility until
clients are upgraded)

- AUTH_INSECURE_GLBOAL_ID_RENEWAL if there are currently clients connected that
do not know how to securely renew their global_id, as exposed by
auth_expose_insecure_global_id_reclaim=true.  The client auth names and IPs
are listed the alert details (up to a limit, at least).

The docs recommend operators mute these alerts instead of silencing, but
we still include option that allow the alerts to be disabled entirely.

Signed-off-by: Sage Weil <sage@newdream.net>
(cherry picked from commit 18b343b06e5dd904af425dc99e2c848e12f3b552)

Conflicts:
src/mon/HealthMonitor.cc [ commit e4bf716bfa07 ("mon: store
  a reference as member variable") not in octopus ]

doc/rados/operations/health-checks.rst
src/common/options.cc
src/mon/HealthMonitor.cc

index 32169dab333d5b52c23f5bc39cdf0af13b194be4..87f4bd6c927c1e184fe2696b38ae687124f7d57a 100644 (file)
@@ -122,6 +122,73 @@ The warning threshold may be adjusted with::
 
   ceph config set global mon_data_size_warn <size>
 
+AUTH_INSECURE_GLOBAL_ID_RECLAIM
+_______________________________
+
+One or more clients or daemons are connected to the cluster that are
+not securely reclaiming their global_id (a unique number identifying
+each entity in the cluster) when reconnecting to a monitor.  The
+client is being permitted to connect anyway because the
+``auth_allow_insecure_global_id_reclaim`` option is set to true (which may
+be necessary until all ceph clients have been upgraded), and the
+``auth_expose_insecure_global_id_reclaim`` option set to ``true`` (which
+allows monitors to detect clients with insecure reclaim early by forcing them to
+reconnect right after they first authenticate).
+
+You can identify which client(s) are using unpatched ceph client code with::
+
+  ceph health detail
+
+Clients global_id reclaim rehavior can also seen in the
+``global_id_status`` field in the dump of clients connected to an
+individual monitor (``reclaim_insecure`` means the client is
+unpatched and is contributing to this health alert)::
+
+  ceph tell mon.\* sessions
+
+We strongly recommend that all clients in the system are upgraded to a
+newer version of Ceph that correctly reclaims global_id values.  Once
+all clients have been updated, you can stop allowing insecure reconnections
+with::
+
+  ceph config set mon auth_allow_insecure_global_id_reclaim false
+
+If it is impractical to upgrade all clients immediately, you can silence
+this warning temporarily with::
+
+  ceph health mute AUTH_INSECURE_GLOBAL_ID_RECLAIM 1w   # 1 week
+
+Although we do NOT recommend doing so, you can also disable this warning indefinitely
+with::
+
+  ceph config set mon mon_warn_on_insecure_global_id_reclaim false
+
+AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED
+_______________________________________
+
+Ceph is currently configured to allow clients to reconnect to monitors using
+an insecure process to reclaim their previous global_id because the setting
+``auth_allow_insecure_global_id_reclaim`` is set to ``true``.  It may be necessary to
+leave this setting enabled while existing Ceph clients are upgraded to newer
+versions of Ceph that correctly and securely reclaim their global_id.
+
+If the ``AUTH_INSECURE_GLOBAL_ID_RECLAIM`` health alert has not also been raised and
+the ``auth_expose_insecure_global_id_reclaim`` setting has not been disabled (it is
+on by default), then there are currently no clients connected that need to be
+upgraded, and it is safe to disallow insecure global_id reclaim with::
+
+  ceph config set mon auth_allow_insecure_global_id_reclaim false
+
+If there are still clients that need to be upgraded, then this alert can be
+silenced temporarily with::
+
+  ceph health mute AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED 1w   # 1 week
+
+Although we do NOT recommend doing so, you can also disable this warning indefinitely
+with::
+
+  ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false
+
 
 Manager
 -------
index 3d4a16edede8f66f175ad3761a613f75d86e315d..4ce2689c234e4a04cdfbfc6c517b39afbdbab5dd 100644 (file)
@@ -1699,6 +1699,22 @@ std::vector<Option> get_global_options() {
     .add_service("mon")
     .set_description("time before OSDs who do not report to the mons are marked down (seconds)"),
 
+    Option("mon_warn_on_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .add_service("mon")
+    .set_description("issue AUTH_INSECURE_GLOBAL_ID_RECLAIM health warning if any connected clients are insecurely reclaiming global_id")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim_allowed")
+    .add_see_also("auth_allow_insecure_global_id_reclaim")
+    .add_see_also("auth_expose_insecure_global_id_reclaim"),
+
+    Option("mon_warn_on_insecure_global_id_reclaim_allowed", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .add_service("mon")
+    .set_description("issue AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED health warning if insecure global_id reclaim is allowed")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim")
+    .add_see_also("auth_allow_insecure_global_id_reclaim")
+    .add_see_also("auth_expose_insecure_global_id_reclaim"),
+
     Option("mon_warn_on_msgr2_not_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(true)
     .add_service("mon")
@@ -2268,12 +2284,16 @@ std::vector<Option> get_global_options() {
     .set_default(true)
     .set_description("Allow reclaiming global_id without presenting a valid ticket proving previous possession of that global_id")
     .set_long_description("Allowing unauthorized global_id (re)use poses a security risk.  Unfortunately, older clients may omit their ticket on reconnects and therefore rely on this being allowed for preserving their global_id for the lifetime of the client instance.  Setting this value to false would immediately prevent new connections from those clients (assuming auth_expose_insecure_global_id_reclaim set to true) and eventually break existing sessions as well (regardless of auth_expose_insecure_global_id_reclaim setting).")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim_allowed")
     .add_see_also("auth_expose_insecure_global_id_reclaim"),
 
     Option("auth_expose_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(true)
     .set_description("Force older clients that may omit their ticket on reconnects to reconnect as part of establishing a session")
     .set_long_description("In permissive mode (auth_allow_insecure_global_id_reclaim set to true), this helps with identifying clients that are not patched.  In enforcing mode (auth_allow_insecure_global_id_reclaim set to false), this is a fail-fast mechanism: don't establish a session that will almost inevitably be broken later.")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim_allowed")
     .add_see_also("auth_allow_insecure_global_id_reclaim"),
 
     Option("auth_debug", Option::TYPE_BOOL, Option::LEVEL_DEV)
index 26ffad6e32e656105e9e657adebf4bfe26ddca3a..8ad6d798b4afe7b81165d41d0e7ade39c8e1e450 100644 (file)
@@ -540,6 +540,7 @@ bool HealthMonitor::check_member_health()
 {
   dout(20) << __func__ << dendl;
   bool changed = false;
+  const auto max = g_conf().get_val<uint64_t>("mon_health_max_detail");
 
   // snapshot of usage
   DataStats stats;
@@ -609,6 +610,44 @@ bool HealthMonitor::check_member_health()
     }
   }
 
+  // AUTH_INSECURE_GLOBAL_ID_RECLAIM
+  if (g_conf().get_val<bool>("mon_warn_on_insecure_global_id_reclaim") &&
+      g_conf().get_val<bool>("auth_allow_insecure_global_id_reclaim")) {
+    // Warn if there are any clients that are insecurely renewing their global_id
+    std::lock_guard l(mon->session_map_lock);
+    list<std::string> detail;
+    for (auto p = mon->session_map.sessions.begin();
+        p != mon->session_map.sessions.end();
+        ++p) {
+      if ((*p)->global_id_status == global_id_status_t::RECLAIM_INSECURE) {
+       ostringstream ds;
+       ds << (*p)->entity_name << " at " << (*p)->addrs
+          << " is using insecure global_id reclaim";
+       detail.push_back(ds.str());
+       if (detail.size() >= max) {
+         detail.push_back("...");
+         break;
+       }
+      }
+    }
+    if (!detail.empty()) {
+      ostringstream ss;
+      ss << "client%plurals% %isorare% using insecure global_id reclaim";
+      auto& d = next.add("AUTH_INSECURE_GLOBAL_ID_RECLAIM", HEALTH_WARN, ss.str(),
+                        detail.size());
+      d.detail.swap(detail);
+    }
+  }
+  // AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED
+  if (g_conf().get_val<bool>("mon_warn_on_insecure_global_id_reclaim_allowed") &&
+      g_conf().get_val<bool>("auth_allow_insecure_global_id_reclaim")) {
+    ostringstream ss, ds;
+    ss << "mon%plurals% %isorare% allowing insecure global_id reclaim";
+    auto& d = next.add("AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED", HEALTH_WARN, ss.str(), 1);
+    ds << "mon." << mon->name << " has auth_allow_insecure_global_id_reclaim set to true";
+    d.detail.push_back(ds.str());
+  }
+
   auto p = quorum_checks.find(mon->rank);
   if (p == quorum_checks.end()) {
     if (next.empty()) {