]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: fail fast when unauthorized global_id (re)use is disallowed
authorIlya Dryomov <idryomov@gmail.com>
Thu, 25 Mar 2021 19:59:13 +0000 (20:59 +0100)
committerSage Weil <sage@newdream.net>
Tue, 6 Apr 2021 21:28:55 +0000 (17:28 -0400)
When unauthorized global_id (re)use is disallowed, we don't want to
let unpatched clients in because they wouldn't be able to reestablish
their monitor session later, resulting in subtle hangs and disrupted
user workloads.

Denying the initial connect for all legacy (CephXAuthenticate < v3)
clients is not feasible because a large subset of them never stopped
presenting their ticket on reconnects and are therefore compatible with
enforcing mode: most notably all kernel clients but also pre-luminous
userspace clients.  They don't need to be patched and excluding them
would significantly hamper the adoption of enforcing mode.

Instead, force clients that we are not sure about to reconnect shortly
after they go through authentication and obtain global_id.  This is
done in Monitor::dispatch_op() to capture both msgr1 and msgr2, most
likely instead of dispatching mon_subscribe.

We need to let mon_getmap through for "ceph ping" and "ceph tell" to
work.  This does mean that we share the monmap, which lets the client
return from MonClient::authenticate() considering authentication to be
finished and causing the potential reconnect error to not propagate to
the user -- the client would hang waiting for remaining cluster maps.
For msgr1, this is unavoidable because the monmap is sent immediately
after the final MAuthReply.  But for msgr2 this is rare: most of the
time we get to their mon_subscribe and cut the connection before they
process the monmap!

Regardless, the user doesn't get a chance to start a workload since
there is no proper higher-level session at that point.

To help with identifying clients that need patching, add global_id and
global_id_status to "sessions" output.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
src/auth/AuthServiceHandler.cc
src/auth/AuthServiceHandler.h
src/common/legacy_config_opts.h
src/common/options.cc
src/mon/Monitor.cc
src/mon/Session.h

index 07e71298da69b3fff3fb7caaf1572d55cb025586..2d1297ee2509064f19e98b4e71f2258b5dde00f9 100644 (file)
 #define dout_subsys ceph_subsys_auth
 
 
+std::ostream& operator<<(std::ostream& os,
+                        global_id_status_t global_id_status)
+{
+  switch (global_id_status) {
+  case global_id_status_t::NONE:
+    return os << "none";
+  case global_id_status_t::NEW_PENDING:
+    return os << "new_pending";
+  case global_id_status_t::NEW_OK:
+    return os << "new_ok";
+  case global_id_status_t::NEW_NOT_EXPOSED:
+    return os << "new_not_exposed";
+  case global_id_status_t::RECLAIM_PENDING:
+    return os << "reclaim_pending";
+  case global_id_status_t::RECLAIM_OK:
+    return os << "reclaim_ok";
+  case global_id_status_t::RECLAIM_INSECURE:
+    return os << "reclaim_insecure";
+  default:
+    ceph_abort();
+  }
+}
+
 int AuthServiceHandler::start_session(const EntityName& entity_name,
                                      uint64_t global_id,
                                      bool is_new_global_id,
index e10c69491f7e1c40284c12c576d4f70d049e998b..4b3dcccbe9dd26bd379d0b8f8ce1eeaeae63a198 100644 (file)
@@ -41,6 +41,9 @@ enum class global_id_status_t {
   RECLAIM_INSECURE
 };
 
+std::ostream& operator<<(std::ostream& os,
+                        global_id_status_t global_id_status);
+
 struct AuthServiceHandler {
 protected:
   CephContext *cct;
index c6acce3670fadcd460e17a28726b163d1ac6be1a..52820319bc54609b23330482d8699fcb80702737 100644 (file)
@@ -329,6 +329,7 @@ OPTION(cephx_sign_messages, OPT_BOOL)  // Default to signing session messages if
 OPTION(auth_mon_ticket_ttl, OPT_DOUBLE)
 OPTION(auth_service_ticket_ttl, OPT_DOUBLE)
 OPTION(auth_allow_insecure_global_id_reclaim, OPT_BOOL)
+OPTION(auth_expose_insecure_global_id_reclaim, OPT_BOOL)
 OPTION(auth_debug, OPT_BOOL)          // if true, assert when weird things happen
 OPTION(mon_client_hunt_parallel, OPT_U32)   // how many mons to try to connect to in parallel during hunt
 OPTION(mon_client_hunt_interval, OPT_DOUBLE)   // try new mon every N seconds until we connect
index 33e05e24210f2b41e6a54945a8fcd0bcdf86a866..a146adc041a079787a56057c15381b39b7667b5d 100644 (file)
@@ -2395,7 +2395,14 @@ std::vector<Option> get_global_options() {
     Option("auth_allow_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(true)
     .set_description("Allow reclaiming global_id without presenting a valid ticket proving previous possession of that global_id")
-    .set_long_description("Allowing unauthorized global_id (re)use poses a security risk.  Unfortunately, older clients may omit their ticket on reconnects and therefore rely on this being allowed for preserving their global_id for the lifetime of the client instance."),
+    .set_long_description("Allowing unauthorized global_id (re)use poses a security risk.  Unfortunately, older clients may omit their ticket on reconnects and therefore rely on this being allowed for preserving their global_id for the lifetime of the client instance.  Setting this value to false would immediately prevent new connections from those clients (assuming auth_expose_insecure_global_id_reclaim set to true) and eventually break existing sessions as well (regardless of auth_expose_insecure_global_id_reclaim setting).")
+    .add_see_also("auth_expose_insecure_global_id_reclaim"),
+
+    Option("auth_expose_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description("Force older clients that may omit their ticket on reconnects to reconnect as part of establishing a session")
+    .set_long_description("In permissive mode (auth_allow_insecure_global_id_reclaim set to true), this helps with identifying clients that are not patched.  In enforcing mode (auth_allow_insecure_global_id_reclaim set to false), this is a fail-fast mechanism: don't establish a session that will almost inevitably be broken later.")
+    .add_see_also("auth_allow_insecure_global_id_reclaim"),
 
     Option("auth_debug", Option::TYPE_BOOL, Option::LEVEL_DEV)
     .set_default(false)
index ef47e99604aaf5f534918cf44bdab35a2b7c60a5..f93a6035c40eb7cd7620974568bda6a8d360dc9b 100644 (file)
@@ -4439,9 +4439,13 @@ void Monitor::_ms_dispatch(Message *m)
 
   if (s->auth_handler) {
     s->entity_name = s->auth_handler->get_entity_name();
+    s->global_id = s->auth_handler->get_global_id();
+    s->global_id_status = s->auth_handler->get_global_id_status();
   }
-  dout(20) << " entity " << s->entity_name
-          << " caps " << s->caps.get_str() << dendl;
+  dout(20) << " entity_name " << s->entity_name
+          << " global_id " << s->global_id
+          << " (" << s->global_id_status
+          << ") caps " << s->caps.get_str() << dendl;
 
   if (!session_stretch_allowed(s, op)) {
     return;
@@ -4495,6 +4499,34 @@ void Monitor::dispatch_op(MonOpRequestRef op)
     return;
   }
 
+  // global_id_status == NONE: all sessions for auth_none and krb,
+  // mon <-> mon sessions (including proxied sessions) for cephx
+  ceph_assert(s->global_id_status == global_id_status_t::NONE ||
+              s->global_id_status == global_id_status_t::NEW_OK ||
+              s->global_id_status == global_id_status_t::NEW_NOT_EXPOSED ||
+              s->global_id_status == global_id_status_t::RECLAIM_OK ||
+              s->global_id_status == global_id_status_t::RECLAIM_INSECURE);
+
+  // let mon_getmap through for "ping" (which doesn't reconnect)
+  // and "tell" (which reconnects but doesn't attempt to preserve
+  // its global_id and stays in NEW_NOT_EXPOSED, retrying until
+  // ->send_attempts reaches 0)
+  if (cct->_conf->auth_expose_insecure_global_id_reclaim &&
+      s->global_id_status == global_id_status_t::NEW_NOT_EXPOSED &&
+      op->get_req()->get_type() != CEPH_MSG_MON_GET_MAP) {
+    dout(5) << __func__ << " " << op->get_req()->get_source_inst()
+            << " may omit old_ticket on reconnects, discarding "
+            << *op->get_req() << " and forcing reconnect" << dendl;
+    ceph_assert(s->con && !s->proxy_con);
+    s->con->mark_down();
+    {
+      std::lock_guard l(session_map_lock);
+      remove_session(s);
+    }
+    op->mark_zap();
+    return;
+  }
+
   switch (op->get_req()->get_type()) {
     case CEPH_MSG_MON_GET_MAP:
       handle_mon_get_map(op);
index e7b076b6bcb30e0dbbf93a5f962513fa374b4860..3009d023944731b1616f27942b0bad3783d4924e 100644 (file)
@@ -65,6 +65,8 @@ struct MonSession : public RefCountedObject {
 
   AuthServiceHandler *auth_handler = nullptr;
   EntityName entity_name;
+  uint64_t global_id = 0;
+  global_id_status_t global_id_status = global_id_status_t::NONE;
 
   ConnectionRef proxy_con;
   uint64_t proxy_tid = 0;
@@ -132,6 +134,8 @@ struct MonSession : public RefCountedObject {
     f->dump_bool("open", !closed);
     f->dump_object("caps", caps);
     f->dump_bool("authenticated", authenticated);
+    f->dump_unsigned("global_id", global_id);
+    f->dump_stream("global_id_status") << global_id_status;
     f->dump_unsigned("osd_epoch", osd_epoch);
     f->dump_string("remote_host", remote_host);
   }