]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/HealthMonitor: raise AUTH_INSECURE_GLOBAL_ID_RENEWAL[_ALLOWED]
authorSage Weil <sage@newdream.net>
Thu, 25 Mar 2021 22:07:53 +0000 (18:07 -0400)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 12 Apr 2021 19:59:42 +0000 (21:59 +0200)
Two new alerts:

- AUTH_INSECURE_GLOBAL_ID_RENEWAL_ALLOWED if we are allowing clients to reclaim
global_ids in an insecure manner (for backwards compatibility until
clients are upgraded)

- AUTH_INSECURE_GLBOAL_ID_RENEWAL if there are currently clients connected that
do not know how to securely renew their global_id, as exposed by
auth_expose_insecure_global_id_reclaim=true.  The client auth names and IPs
are listed the alert details (up to a limit, at least).

The docs recommend operators mute these alerts instead of silencing, but
we still include option that allow the alerts to be disabled entirely.

Signed-off-by: Sage Weil <sage@newdream.net>
(cherry picked from commit 18b343b06e5dd904af425dc99e2c848e12f3b552)

Conflicts:
doc/rados/operations/health-checks.rst [ MON_DISK_* alerts
  present but not documented in nautilus; "ceph health mute"
  not in nautilus -- silencing temporarily is not possible ]
src/mon/HealthMonitor.cc [ commits e4bf716bfa07 ("mon: store
  a reference as member variable") and d0eb22f3ba55
  ("mon/health_checks: associate a count with health_alert_t")
  not in nautilus ]

doc/rados/operations/health-checks.rst
src/common/options.cc
src/mon/HealthMonitor.cc

index b6c007a041c0a1d3a30f3eccf38e34f39e03115b..03e357f481b942a84e3a9d462ef45dd89fcb7e13 100644 (file)
@@ -70,6 +70,62 @@ listen for v2 connections on the new default 3300 port.
 
 If a monitor is configured to listen for v1 connections on a non-standard port (not 6789), then the monmap will need to be modified manually.
 
+AUTH_INSECURE_GLOBAL_ID_RECLAIM
+_______________________________
+
+One or more clients or daemons are connected to the cluster that are
+not securely reclaiming their global_id (a unique number identifying
+each entity in the cluster) when reconnecting to a monitor.  The
+client is being permitted to connect anyway because the
+``auth_allow_insecure_global_id_reclaim`` option is set to true (which may
+be necessary until all ceph clients have been upgraded), and the
+``auth_expose_insecure_global_id_reclaim`` option set to ``true`` (which
+allows monitors to detect clients with insecure reclaim early by forcing them to
+reconnect right after they first authenticate).
+
+You can identify which client(s) are using unpatched ceph client code with::
+
+  ceph health detail
+
+Clients global_id reclaim rehavior can also seen in the
+``global_id_status`` field in the dump of clients connected to an
+individual monitor (``reclaim_insecure`` means the client is
+unpatched and is contributing to this health alert)::
+
+  ceph tell mon.\* sessions
+
+We strongly recommend that all clients in the system are upgraded to a
+newer version of Ceph that correctly reclaims global_id values.  Once
+all clients have been updated, you can stop allowing insecure reconnections
+with::
+
+  ceph config set mon auth_allow_insecure_global_id_reclaim false
+
+Although we do NOT recommend doing so, you can disable this warning indefinitely
+with::
+
+  ceph config set mon mon_warn_on_insecure_global_id_reclaim false
+
+AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED
+_______________________________________
+
+Ceph is currently configured to allow clients to reconnect to monitors using
+an insecure process to reclaim their previous global_id because the setting
+``auth_allow_insecure_global_id_reclaim`` is set to ``true``.  It may be necessary to
+leave this setting enabled while existing Ceph clients are upgraded to newer
+versions of Ceph that correctly and securely reclaim their global_id.
+
+If the ``AUTH_INSECURE_GLOBAL_ID_RECLAIM`` health alert has not also been raised and
+the ``auth_expose_insecure_global_id_reclaim`` setting has not been disabled (it is
+on by default), then there are currently no clients connected that need to be
+upgraded, and it is safe to disallow insecure global_id reclaim with::
+
+  ceph config set mon auth_allow_insecure_global_id_reclaim false
+
+Although we do NOT recommend doing so, you can disable this warning indefinitely
+with::
+
+  ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false
 
 
 Manager
index 8c743eee9e958f0408ee763fde0eca8cd131eb9a..768d6505d9d876a575e35e79e4722ea7932c3297 100644 (file)
@@ -1709,6 +1709,22 @@ std::vector<Option> get_global_options() {
     .add_service("mon")
     .set_description("time before OSDs who do not report to the mons are marked down (seconds)"),
 
+    Option("mon_warn_on_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .add_service("mon")
+    .set_description("issue AUTH_INSECURE_GLOBAL_ID_RECLAIM health warning if any connected clients are insecurely reclaiming global_id")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim_allowed")
+    .add_see_also("auth_allow_insecure_global_id_reclaim")
+    .add_see_also("auth_expose_insecure_global_id_reclaim"),
+
+    Option("mon_warn_on_insecure_global_id_reclaim_allowed", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .add_service("mon")
+    .set_description("issue AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED health warning if insecure global_id reclaim is allowed")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim")
+    .add_see_also("auth_allow_insecure_global_id_reclaim")
+    .add_see_also("auth_expose_insecure_global_id_reclaim"),
+
     Option("mon_warn_on_msgr2_not_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(true)
     .add_service("mon")
@@ -2269,12 +2285,16 @@ std::vector<Option> get_global_options() {
     .set_default(true)
     .set_description("Allow reclaiming global_id without presenting a valid ticket proving previous possession of that global_id")
     .set_long_description("Allowing unauthorized global_id (re)use poses a security risk.  Unfortunately, older clients may omit their ticket on reconnects and therefore rely on this being allowed for preserving their global_id for the lifetime of the client instance.  Setting this value to false would immediately prevent new connections from those clients (assuming auth_expose_insecure_global_id_reclaim set to true) and eventually break existing sessions as well (regardless of auth_expose_insecure_global_id_reclaim setting).")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim_allowed")
     .add_see_also("auth_expose_insecure_global_id_reclaim"),
 
     Option("auth_expose_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(true)
     .set_description("Force older clients that may omit their ticket on reconnects to reconnect as part of establishing a session")
     .set_long_description("In permissive mode (auth_allow_insecure_global_id_reclaim set to true), this helps with identifying clients that are not patched.  In enforcing mode (auth_allow_insecure_global_id_reclaim set to false), this is a fail-fast mechanism: don't establish a session that will almost inevitably be broken later.")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim")
+    .add_see_also("mon_warn_on_insecure_global_id_reclaim_allowed")
     .add_see_also("auth_allow_insecure_global_id_reclaim"),
 
     Option("auth_debug", Option::TYPE_BOOL, Option::LEVEL_DEV)
index 692bfdf4e7a6941c288dc375f99ee349a2628896..c48ea51723d9f1361bff88eea605f356fd548cbe 100644 (file)
@@ -199,6 +199,7 @@ bool HealthMonitor::check_member_health()
 {
   dout(20) << __func__ << dendl;
   bool changed = false;
+  const auto max = g_conf().get_val<uint64_t>("mon_health_max_detail");
 
   // snapshot of usage
   DataStats stats;
@@ -268,6 +269,43 @@ bool HealthMonitor::check_member_health()
     }
   }
 
+  // AUTH_INSECURE_GLOBAL_ID_RECLAIM
+  if (g_conf().get_val<bool>("mon_warn_on_insecure_global_id_reclaim") &&
+      g_conf().get_val<bool>("auth_allow_insecure_global_id_reclaim")) {
+    // Warn if there are any clients that are insecurely renewing their global_id
+    std::lock_guard l(mon->session_map_lock);
+    list<std::string> detail;
+    for (auto p = mon->session_map.sessions.begin();
+        p != mon->session_map.sessions.end();
+        ++p) {
+      if ((*p)->global_id_status == global_id_status_t::RECLAIM_INSECURE) {
+       ostringstream ds;
+       ds << (*p)->entity_name << " at " << (*p)->addrs
+          << " is using insecure global_id reclaim";
+       detail.push_back(ds.str());
+       if (detail.size() >= max) {
+         detail.push_back("...");
+         break;
+       }
+      }
+    }
+    if (!detail.empty()) {
+      ostringstream ss;
+      ss << "client%plurals% %isorare% using insecure global_id reclaim";
+      auto& d = next.add("AUTH_INSECURE_GLOBAL_ID_RECLAIM", HEALTH_WARN, ss.str());
+      d.detail.swap(detail);
+    }
+  }
+  // AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED
+  if (g_conf().get_val<bool>("mon_warn_on_insecure_global_id_reclaim_allowed") &&
+      g_conf().get_val<bool>("auth_allow_insecure_global_id_reclaim")) {
+    ostringstream ss, ds;
+    ss << "mon%plurals% %isorare% allowing insecure global_id reclaim";
+    auto& d = next.add("AUTH_INSECURE_GLOBAL_ID_RECLAIM_ALLOWED", HEALTH_WARN, ss.str());
+    ds << "mon." << mon->name << " has auth_allow_insecure_global_id_reclaim set to true";
+    d.detail.push_back(ds.str());
+  }
+
   auto p = quorum_checks.find(mon->rank);
   if (p == quorum_checks.end()) {
     if (next.empty()) {