When unauthorized global_id (re)use is disallowed, we don't want to
let unpatched clients in because they wouldn't be able to reestablish
their monitor session later, resulting in subtle hangs and disrupted
user workloads.
Denying the initial connect for all legacy (CephXAuthenticate < v3)
clients is not feasible because a large subset of them never stopped
presenting their ticket on reconnects and are therefore compatible with
enforcing mode: most notably all kernel clients but also pre-luminous
userspace clients. They don't need to be patched and excluding them
would significantly hamper the adoption of enforcing mode.
Instead, force clients that we are not sure about to reconnect shortly
after they go through authentication and obtain global_id. This is
done in Monitor::dispatch_op() to capture both msgr1 and msgr2, most
likely instead of dispatching mon_subscribe.
We need to let mon_getmap through for "ceph ping" and "ceph tell" to
work. This does mean that we share the monmap, which lets the client
return from MonClient::authenticate() considering authentication to be
finished and causing the potential reconnect error to not propagate to
the user -- the client would hang waiting for remaining cluster maps.
For msgr1, this is unavoidable because the monmap is sent immediately
after the final MAuthReply. But for msgr2 this is rare: most of the
time we get to their mon_subscribe and cut the connection before they
process the monmap!
Regardless, the user doesn't get a chance to start a workload since
there is no proper higher-level session at that point.
To help with identifying clients that need patching, add global_id and
global_id_status to "sessions" output.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit
08766a17edebb7450cd9b17cc2dc01efc068bb94)
#define dout_subsys ceph_subsys_auth
+std::ostream& operator<<(std::ostream& os,
+ global_id_status_t global_id_status)
+{
+ switch (global_id_status) {
+ case global_id_status_t::NONE:
+ return os << "none";
+ case global_id_status_t::NEW_PENDING:
+ return os << "new_pending";
+ case global_id_status_t::NEW_OK:
+ return os << "new_ok";
+ case global_id_status_t::NEW_NOT_EXPOSED:
+ return os << "new_not_exposed";
+ case global_id_status_t::RECLAIM_PENDING:
+ return os << "reclaim_pending";
+ case global_id_status_t::RECLAIM_OK:
+ return os << "reclaim_ok";
+ case global_id_status_t::RECLAIM_INSECURE:
+ return os << "reclaim_insecure";
+ default:
+ ceph_abort();
+ }
+}
+
int AuthServiceHandler::start_session(const EntityName& entity_name,
uint64_t global_id,
bool is_new_global_id,
RECLAIM_INSECURE
};
+std::ostream& operator<<(std::ostream& os,
+ global_id_status_t global_id_status);
+
struct AuthServiceHandler {
protected:
CephContext *cct;
OPTION(auth_mon_ticket_ttl, OPT_DOUBLE)
OPTION(auth_service_ticket_ttl, OPT_DOUBLE)
OPTION(auth_allow_insecure_global_id_reclaim, OPT_BOOL)
+OPTION(auth_expose_insecure_global_id_reclaim, OPT_BOOL)
OPTION(auth_debug, OPT_BOOL) // if true, assert when weird things happen
OPTION(mon_client_hunt_parallel, OPT_U32) // how many mons to try to connect to in parallel during hunt
OPTION(mon_client_hunt_interval, OPT_DOUBLE) // try new mon every N seconds until we connect
Option("auth_allow_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.set_description("Allow reclaiming global_id without presenting a valid ticket proving previous possession of that global_id")
- .set_long_description("Allowing unauthorized global_id (re)use poses a security risk. Unfortunately, older clients may omit their ticket on reconnects and therefore rely on this being allowed for preserving their global_id for the lifetime of the client instance."),
+ .set_long_description("Allowing unauthorized global_id (re)use poses a security risk. Unfortunately, older clients may omit their ticket on reconnects and therefore rely on this being allowed for preserving their global_id for the lifetime of the client instance. Setting this value to false would immediately prevent new connections from those clients (assuming auth_expose_insecure_global_id_reclaim set to true) and eventually break existing sessions as well (regardless of auth_expose_insecure_global_id_reclaim setting).")
+ .add_see_also("auth_expose_insecure_global_id_reclaim"),
+
+ Option("auth_expose_insecure_global_id_reclaim", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+ .set_default(true)
+ .set_description("Force older clients that may omit their ticket on reconnects to reconnect as part of establishing a session")
+ .set_long_description("In permissive mode (auth_allow_insecure_global_id_reclaim set to true), this helps with identifying clients that are not patched. In enforcing mode (auth_allow_insecure_global_id_reclaim set to false), this is a fail-fast mechanism: don't establish a session that will almost inevitably be broken later.")
+ .add_see_also("auth_allow_insecure_global_id_reclaim"),
Option("auth_debug", Option::TYPE_BOOL, Option::LEVEL_DEV)
.set_default(false)
if (s->auth_handler) {
s->entity_name = s->auth_handler->get_entity_name();
+ s->global_id = s->auth_handler->get_global_id();
+ s->global_id_status = s->auth_handler->get_global_id_status();
}
- dout(20) << " entity " << s->entity_name
- << " caps " << s->caps.get_str() << dendl;
+ dout(20) << " entity_name " << s->entity_name
+ << " global_id " << s->global_id
+ << " (" << s->global_id_status
+ << ") caps " << s->caps.get_str() << dendl;
if (!session_stretch_allowed(s, op)) {
return;
return;
}
+ // global_id_status == NONE: all sessions for auth_none and krb,
+ // mon <-> mon sessions (including proxied sessions) for cephx
+ ceph_assert(s->global_id_status == global_id_status_t::NONE ||
+ s->global_id_status == global_id_status_t::NEW_OK ||
+ s->global_id_status == global_id_status_t::NEW_NOT_EXPOSED ||
+ s->global_id_status == global_id_status_t::RECLAIM_OK ||
+ s->global_id_status == global_id_status_t::RECLAIM_INSECURE);
+
+ // let mon_getmap through for "ping" (which doesn't reconnect)
+ // and "tell" (which reconnects but doesn't attempt to preserve
+ // its global_id and stays in NEW_NOT_EXPOSED, retrying until
+ // ->send_attempts reaches 0)
+ if (cct->_conf->auth_expose_insecure_global_id_reclaim &&
+ s->global_id_status == global_id_status_t::NEW_NOT_EXPOSED &&
+ op->get_req()->get_type() != CEPH_MSG_MON_GET_MAP) {
+ dout(5) << __func__ << " " << op->get_req()->get_source_inst()
+ << " may omit old_ticket on reconnects, discarding "
+ << *op->get_req() << " and forcing reconnect" << dendl;
+ ceph_assert(s->con && !s->proxy_con);
+ s->con->mark_down();
+ {
+ std::lock_guard l(session_map_lock);
+ remove_session(s);
+ }
+ op->mark_zap();
+ return;
+ }
+
switch (op->get_req()->get_type()) {
case CEPH_MSG_MON_GET_MAP:
handle_mon_get_map(op);
AuthServiceHandler *auth_handler = nullptr;
EntityName entity_name;
+ uint64_t global_id = 0;
+ global_id_status_t global_id_status = global_id_status_t::NONE;
ConnectionRef proxy_con;
uint64_t proxy_tid = 0;
f->dump_bool("open", !closed);
f->dump_object("caps", caps);
f->dump_bool("authenticated", authenticated);
+ f->dump_unsigned("global_id", global_id);
+ f->dump_stream("global_id_status") << global_id_status;
f->dump_unsigned("osd_epoch", osd_epoch);
f->dump_string("remote_host", remote_host);
}