]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: fail fast when unauthorized global_id (re)use is disallowed
authorIlya Dryomov <idryomov@gmail.com>
Thu, 25 Mar 2021 19:59:13 +0000 (20:59 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 12 Apr 2021 18:50:53 +0000 (20:50 +0200)
When unauthorized global_id (re)use is disallowed, we don't want to
let unpatched clients in because they wouldn't be able to reestablish
their monitor session later, resulting in subtle hangs and disrupted
user workloads.

Denying the initial connect for all legacy (CephXAuthenticate < v3)
clients is not feasible because a large subset of them never stopped
presenting their ticket on reconnects and are therefore compatible with
enforcing mode: most notably all kernel clients but also pre-luminous
userspace clients.  They don't need to be patched and excluding them
would significantly hamper the adoption of enforcing mode.

Instead, force clients that we are not sure about to reconnect shortly
after they go through authentication and obtain global_id.  This is
done in Monitor::dispatch_op() to capture both msgr1 and msgr2, most
likely instead of dispatching mon_subscribe.

We need to let mon_getmap through for "ceph ping" and "ceph tell" to
work.  This does mean that we share the monmap, which lets the client
return from MonClient::authenticate() considering authentication to be
finished and causing the potential reconnect error to not propagate to
the user -- the client would hang waiting for remaining cluster maps.
For msgr1, this is unavoidable because the monmap is sent immediately
after the final MAuthReply.  But for msgr2 this is rare: most of the
time we get to their mon_subscribe and cut the connection before they
process the monmap!

Regardless, the user doesn't get a chance to start a workload since
there is no proper higher-level session at that point.

To help with identifying clients that need patching, add global_id and
global_id_status to "sessions" output.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit 08766a17edebb7450cd9b17cc2dc01efc068bb94)

src/auth/AuthServiceHandler.cc
src/auth/AuthServiceHandler.h
src/common/legacy_config_opts.h
src/common/options.cc
src/mon/Monitor.cc
src/mon/Session.h

index 07e71298da69b3fff3fb7caaf1572d55cb025586..2d1297ee2509064f19e98b4e71f2258b5dde00f9 100644 (file)
 #define dout_subsys ceph_subsys_auth
 
 
+std::ostream& operator<<(std::ostream& os,
+                        global_id_status_t global_id_status)
+{
+  switch (global_id_status) {
+  case global_id_status_t::NONE:
+    return os << "none";
+  case global_id_status_t::NEW_PENDING:
+    return os << "new_pending";
+  case global_id_status_t::NEW_OK:
+    return os << "new_ok";
+  case global_id_status_t::NEW_NOT_EXPOSED:
+    return os << "new_not_exposed";
+  case global_id_status_t::RECLAIM_PENDING:
+    return os << "reclaim_pending";
+  case global_id_status_t::RECLAIM_OK:
+    return os << "reclaim_ok";
+  case global_id_status_t::RECLAIM_INSECURE:
+    return os << "reclaim_insecure";
+  default:
+    ceph_abort();
+  }
+}
+
 int AuthServiceHandler::start_session(const EntityName& entity_name,
                                      uint64_t global_id,
                                      bool is_new_global_id,
index e10c69491f7e1c40284c12c576d4f70d049e998b..4b3dcccbe9dd26bd379d0b8f8ce1eeaeae63a198 100644 (file)
@@ -41,6 +41,9 @@ enum class global_id_status_t {
   RECLAIM_INSECURE
 };
 
+std::ostream& operator<<(std::ostream& os,
+                        global_id_status_t global_id_status);
+
 struct AuthServiceHandler {
 protected:
   CephContext *cct;
index 6659dc7d725148061b950f07064c53f25bbec281..8e2438bf0f66cdb37b568a6b2a1bbfef420fdc7d 100644 (file)
@@ -330,6 +330,7 @@ OPTION(cephx_sign_messages, OPT_BOOL)  // Default to signing session messages if
 OPTION(auth_mon_ticket_ttl, OPT_DOUBLE)
 OPTION(auth_service_ticket_ttl, OPT_DOUBLE)
 OPTION(auth_allow_insecure_global_id_reclaim, OPT_BOOL)
+OPTION(auth_expose_insecure_global_id_reclaim, OPT_BOOL)
 OPTION(auth_debug, OPT_BOOL)          // if true, assert when weird things happen
 OPTION(mon_client_hunt_parallel, OPT_U32)   // how many mons to try to connect to in parallel during hunt
 OPTION(mon_client_hunt_interval, OPT_DOUBLE)   // try new mon every N seconds until we connect
index 0fd2cd9d3f310fd71a815d578adbf5ab9adfd7c3..386de014362ece46b9da8909b0db81d978713329 100644 (file)
@@ -2371,7 +2371,14 @@ std::vector<Option> get_global_options() {
     Option("auth_allow_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(true)
     .set_description("Allow reclaiming global_id without presenting a valid ticket proving previous possession of that global_id")
-    .set_long_description("Allowing unauthorized global_id (re)use poses a security risk.  Unfortunately, older clients may omit their ticket on reconnects and therefore rely on this being allowed for preserving their global_id for the lifetime of the client instance."),
+    .set_long_description("Allowing unauthorized global_id (re)use poses a security risk.  Unfortunately, older clients may omit their ticket on reconnects and therefore rely on this being allowed for preserving their global_id for the lifetime of the client instance.  Setting this value to false would immediately prevent new connections from those clients (assuming auth_expose_insecure_global_id_reclaim set to true) and eventually break existing sessions as well (regardless of auth_expose_insecure_global_id_reclaim setting).")
+    .add_see_also("auth_expose_insecure_global_id_reclaim"),
+
+    Option("auth_expose_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description("Force older clients that may omit their ticket on reconnects to reconnect as part of establishing a session")
+    .set_long_description("In permissive mode (auth_allow_insecure_global_id_reclaim set to true), this helps with identifying clients that are not patched.  In enforcing mode (auth_allow_insecure_global_id_reclaim set to false), this is a fail-fast mechanism: don't establish a session that will almost inevitably be broken later.")
+    .add_see_also("auth_allow_insecure_global_id_reclaim"),
 
     Option("auth_debug", Option::TYPE_BOOL, Option::LEVEL_DEV)
     .set_default(false)
index f97940f71c5572ba4a38c6e95958f9255eeb2954..b77a466d9b78e24bcdaf951525ef6e5d586bb3e0 100644 (file)
@@ -4416,9 +4416,13 @@ void Monitor::_ms_dispatch(Message *m)
 
   if (s->auth_handler) {
     s->entity_name = s->auth_handler->get_entity_name();
+    s->global_id = s->auth_handler->get_global_id();
+    s->global_id_status = s->auth_handler->get_global_id_status();
   }
-  dout(20) << " entity " << s->entity_name
-          << " caps " << s->caps.get_str() << dendl;
+  dout(20) << " entity_name " << s->entity_name
+          << " global_id " << s->global_id
+          << " (" << s->global_id_status
+          << ") caps " << s->caps.get_str() << dendl;
 
   if (!session_stretch_allowed(s, op)) {
     return;
@@ -4472,6 +4476,34 @@ void Monitor::dispatch_op(MonOpRequestRef op)
     return;
   }
 
+  // global_id_status == NONE: all sessions for auth_none and krb,
+  // mon <-> mon sessions (including proxied sessions) for cephx
+  ceph_assert(s->global_id_status == global_id_status_t::NONE ||
+              s->global_id_status == global_id_status_t::NEW_OK ||
+              s->global_id_status == global_id_status_t::NEW_NOT_EXPOSED ||
+              s->global_id_status == global_id_status_t::RECLAIM_OK ||
+              s->global_id_status == global_id_status_t::RECLAIM_INSECURE);
+
+  // let mon_getmap through for "ping" (which doesn't reconnect)
+  // and "tell" (which reconnects but doesn't attempt to preserve
+  // its global_id and stays in NEW_NOT_EXPOSED, retrying until
+  // ->send_attempts reaches 0)
+  if (cct->_conf->auth_expose_insecure_global_id_reclaim &&
+      s->global_id_status == global_id_status_t::NEW_NOT_EXPOSED &&
+      op->get_req()->get_type() != CEPH_MSG_MON_GET_MAP) {
+    dout(5) << __func__ << " " << op->get_req()->get_source_inst()
+            << " may omit old_ticket on reconnects, discarding "
+            << *op->get_req() << " and forcing reconnect" << dendl;
+    ceph_assert(s->con && !s->proxy_con);
+    s->con->mark_down();
+    {
+      std::lock_guard l(session_map_lock);
+      remove_session(s);
+    }
+    op->mark_zap();
+    return;
+  }
+
   switch (op->get_req()->get_type()) {
     case CEPH_MSG_MON_GET_MAP:
       handle_mon_get_map(op);
index e7b076b6bcb30e0dbbf93a5f962513fa374b4860..3009d023944731b1616f27942b0bad3783d4924e 100644 (file)
@@ -65,6 +65,8 @@ struct MonSession : public RefCountedObject {
 
   AuthServiceHandler *auth_handler = nullptr;
   EntityName entity_name;
+  uint64_t global_id = 0;
+  global_id_status_t global_id_status = global_id_status_t::NONE;
 
   ConnectionRef proxy_con;
   uint64_t proxy_tid = 0;
@@ -132,6 +134,8 @@ struct MonSession : public RefCountedObject {
     f->dump_bool("open", !closed);
     f->dump_object("caps", caps);
     f->dump_bool("authenticated", authenticated);
+    f->dump_unsigned("global_id", global_id);
+    f->dump_stream("global_id_status") << global_id_status;
     f->dump_unsigned("osd_epoch", osd_epoch);
     f->dump_string("remote_host", remote_host);
   }