]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: fail fast when unauthorized global_id (re)use is disallowed
authorIlya Dryomov <idryomov@gmail.com>
Thu, 25 Mar 2021 19:59:13 +0000 (20:59 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 12 Apr 2021 19:59:41 +0000 (21:59 +0200)
When unauthorized global_id (re)use is disallowed, we don't want to
let unpatched clients in because they wouldn't be able to reestablish
their monitor session later, resulting in subtle hangs and disrupted
user workloads.

Denying the initial connect for all legacy (CephXAuthenticate < v3)
clients is not feasible because a large subset of them never stopped
presenting their ticket on reconnects and are therefore compatible with
enforcing mode: most notably all kernel clients but also pre-luminous
userspace clients.  They don't need to be patched and excluding them
would significantly hamper the adoption of enforcing mode.

Instead, force clients that we are not sure about to reconnect shortly
after they go through authentication and obtain global_id.  This is
done in Monitor::dispatch_op() to capture both msgr1 and msgr2, most
likely instead of dispatching mon_subscribe.

We need to let mon_getmap through for "ceph ping" and "ceph tell" to
work.  This does mean that we share the monmap, which lets the client
return from MonClient::authenticate() considering authentication to be
finished and causing the potential reconnect error to not propagate to
the user -- the client would hang waiting for remaining cluster maps.
For msgr1, this is unavoidable because the monmap is sent immediately
after the final MAuthReply.  But for msgr2 this is rare: most of the
time we get to their mon_subscribe and cut the connection before they
process the monmap!

Regardless, the user doesn't get a chance to start a workload since
there is no proper higher-level session at that point.

To help with identifying clients that need patching, add global_id and
global_id_status to "sessions" output.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit 08766a17edebb7450cd9b17cc2dc01efc068bb94)

Conflicts:
src/mon/Monitor.cc [ commit e1163b445bbf ("mon: print
  entity_name along with caps to debug log") not in nautilus ]

src/auth/AuthServiceHandler.cc
src/auth/AuthServiceHandler.h
src/common/legacy_config_opts.h
src/common/options.cc
src/mon/Monitor.cc
src/mon/Session.h

index fa46748d139ebc4371a769e373c95d4a5a8ac7e0..742b307b28bcfcd64448852ba4d4086b2f10f246 100644 (file)
 #define dout_subsys ceph_subsys_auth
 
 
+std::ostream& operator<<(std::ostream& os,
+                        global_id_status_t global_id_status)
+{
+  switch (global_id_status) {
+  case global_id_status_t::NONE:
+    return os << "none";
+  case global_id_status_t::NEW_PENDING:
+    return os << "new_pending";
+  case global_id_status_t::NEW_OK:
+    return os << "new_ok";
+  case global_id_status_t::NEW_NOT_EXPOSED:
+    return os << "new_not_exposed";
+  case global_id_status_t::RECLAIM_PENDING:
+    return os << "reclaim_pending";
+  case global_id_status_t::RECLAIM_OK:
+    return os << "reclaim_ok";
+  case global_id_status_t::RECLAIM_INSECURE:
+    return os << "reclaim_insecure";
+  default:
+    ceph_abort();
+  }
+}
+
 int AuthServiceHandler::start_session(const EntityName& entity_name,
                                      uint64_t global_id,
                                      bool is_new_global_id,
index 43b021983b2738fc5d0ff21746185d1593a37bbe..6bd50bf841937ad4edaf40f27a9dc8e0b2b6db11 100644 (file)
@@ -41,6 +41,9 @@ enum class global_id_status_t {
   RECLAIM_INSECURE
 };
 
+std::ostream& operator<<(std::ostream& os,
+                        global_id_status_t global_id_status);
+
 struct AuthServiceHandler {
 protected:
   CephContext *cct;
index 7a89f34759f4b734050ee9c2d98d6d8408863b68..1ff14bd61bd572e5ff02b8ec48ba1d6aaa9ecf90 100644 (file)
@@ -342,6 +342,7 @@ OPTION(cephx_sign_messages, OPT_BOOL)  // Default to signing session messages if
 OPTION(auth_mon_ticket_ttl, OPT_DOUBLE)
 OPTION(auth_service_ticket_ttl, OPT_DOUBLE)
 OPTION(auth_allow_insecure_global_id_reclaim, OPT_BOOL)
+OPTION(auth_expose_insecure_global_id_reclaim, OPT_BOOL)
 OPTION(auth_debug, OPT_BOOL)          // if true, assert when weird things happen
 OPTION(mon_client_hunt_parallel, OPT_U32)   // how many mons to try to connect to in parallel during hunt
 OPTION(mon_client_hunt_interval, OPT_DOUBLE)   // try new mon every N seconds until we connect
index 145e4cc9b6091d2fb7c704bac6f7636bb1d59f62..9cf884b265eb97311ae0248a26f04387a2f96b8d 100644 (file)
@@ -2268,7 +2268,14 @@ std::vector<Option> get_global_options() {
     Option("auth_allow_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(true)
     .set_description("Allow reclaiming global_id without presenting a valid ticket proving previous possession of that global_id")
-    .set_long_description("Allowing unauthorized global_id (re)use poses a security risk.  Unfortunately, older clients may omit their ticket on reconnects and therefore rely on this being allowed for preserving their global_id for the lifetime of the client instance."),
+    .set_long_description("Allowing unauthorized global_id (re)use poses a security risk.  Unfortunately, older clients may omit their ticket on reconnects and therefore rely on this being allowed for preserving their global_id for the lifetime of the client instance.  Setting this value to false would immediately prevent new connections from those clients (assuming auth_expose_insecure_global_id_reclaim set to true) and eventually break existing sessions as well (regardless of auth_expose_insecure_global_id_reclaim setting).")
+    .add_see_also("auth_expose_insecure_global_id_reclaim"),
+
+    Option("auth_expose_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description("Force older clients that may omit their ticket on reconnects to reconnect as part of establishing a session")
+    .set_long_description("In permissive mode (auth_allow_insecure_global_id_reclaim set to true), this helps with identifying clients that are not patched.  In enforcing mode (auth_allow_insecure_global_id_reclaim set to false), this is a fail-fast mechanism: don't establish a session that will almost inevitably be broken later.")
+    .add_see_also("auth_allow_insecure_global_id_reclaim"),
 
     Option("auth_debug", Option::TYPE_BOOL, Option::LEVEL_DEV)
     .set_default(false)
index 59881bbce90f12bbac28a4a7c2be385ef75af125..d7665a15d75b2a81d8bdfdbe9380f1aff7ab92d0 100644 (file)
@@ -4436,8 +4436,13 @@ void Monitor::_ms_dispatch(Message *m)
 
   if (s->auth_handler) {
     s->entity_name = s->auth_handler->get_entity_name();
+    s->global_id = s->auth_handler->get_global_id();
+    s->global_id_status = s->auth_handler->get_global_id_status();
   }
-  dout(20) << " caps " << s->caps.get_str() << dendl;
+  dout(20) << " entity_name " << s->entity_name
+          << " global_id " << s->global_id
+          << " (" << s->global_id_status
+          << ") caps " << s->caps.get_str() << dendl;
 
   if ((is_synchronizing() ||
        (!s->authenticated && !exited_quorum.is_zero())) &&
@@ -4484,6 +4489,34 @@ void Monitor::dispatch_op(MonOpRequestRef op)
     return;
   }
 
+  // global_id_status == NONE: all sessions for auth_none and krb,
+  // mon <-> mon sessions (including proxied sessions) for cephx
+  ceph_assert(s->global_id_status == global_id_status_t::NONE ||
+              s->global_id_status == global_id_status_t::NEW_OK ||
+              s->global_id_status == global_id_status_t::NEW_NOT_EXPOSED ||
+              s->global_id_status == global_id_status_t::RECLAIM_OK ||
+              s->global_id_status == global_id_status_t::RECLAIM_INSECURE);
+
+  // let mon_getmap through for "ping" (which doesn't reconnect)
+  // and "tell" (which reconnects but doesn't attempt to preserve
+  // its global_id and stays in NEW_NOT_EXPOSED, retrying until
+  // ->send_attempts reaches 0)
+  if (cct->_conf->auth_expose_insecure_global_id_reclaim &&
+      s->global_id_status == global_id_status_t::NEW_NOT_EXPOSED &&
+      op->get_req()->get_type() != CEPH_MSG_MON_GET_MAP) {
+    dout(5) << __func__ << " " << op->get_req()->get_source_inst()
+            << " may omit old_ticket on reconnects, discarding "
+            << *op->get_req() << " and forcing reconnect" << dendl;
+    ceph_assert(s->con && !s->proxy_con);
+    s->con->mark_down();
+    {
+      std::lock_guard l(session_map_lock);
+      remove_session(s);
+    }
+    op->mark_zap();
+    return;
+  }
+
   switch (op->get_req()->get_type()) {
     case CEPH_MSG_MON_GET_MAP:
       handle_mon_get_map(op);
index aab538465be045aaa80cbc29e140c67c6ad0ce53..43fe38ab8eeb13377eec585f33d7a90b7b0cc6e7 100644 (file)
@@ -59,6 +59,8 @@ struct MonSession : public RefCountedObject {
 
   AuthServiceHandler *auth_handler = nullptr;
   EntityName entity_name;
+  uint64_t global_id = 0;
+  global_id_status_t global_id_status = global_id_status_t::NONE;
 
   ConnectionRef proxy_con;
   uint64_t proxy_tid = 0;
@@ -118,6 +120,8 @@ struct MonSession : public RefCountedObject {
     f->dump_bool("open", !closed);
     f->dump_object("caps", caps);
     f->dump_bool("authenticated", authenticated);
+    f->dump_unsigned("global_id", global_id);
+    f->dump_stream("global_id_status") << global_id_status;
     f->dump_unsigned("osd_epoch", osd_epoch);
     f->dump_string("remote_host", remote_host);
   }