From 6160ed75fcc2a648da4b696fd0ec20b95c4a0a61 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 17 May 2021 21:16:16 +0200 Subject: [PATCH] mon/MonClient: tolerate a rotating key that is slightly out of date Commit 918c12c2ab5d ("monclient: avoid key renew storm on clock skew") made wait_auth_rotating() wait for a key set with a valid "current" key (instead of any key set, including with all keys expired if the clocks are skewed). While a good idea in general, this is a bit too stringent because the monitors will hand out key sets with "current" key that is _just_ about to expire. There is nothing wrong with that as "next" key is also there, valid for the entire auth_service_ticket_ttl. So even if the daemon is talking to the leader, it is possible to get a key set with an expired "current" key. If the daemon is talking to a peon, it is pretty easy to run into in practice. This, coupled with the fact that _check_auth_rotating() explicitly allows the keys to go slightly out of date, can lead to wait_auth_rotating() stalling the boot for up to 30 seconds: 15:41:11.824+0000 1 ... ==== auth_reply(proto 2 0 (0) Success) 15:41:41.824+0000 0 monclient: wait_auth_rotating timed out after 30 15:41:41.824+0000 -1 mds.b unable to obtain rotating service keys; retrying Apply the same 30 second or less tolerance in wait_auth_rotating(). Fixes: https://tracker.ceph.com/issues/50390 Signed-off-by: Ilya Dryomov --- src/mon/MonClient.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc index 1819ecdc6d143..9ffa7d367df98 100644 --- a/src/mon/MonClient.cc +++ b/src/mon/MonClient.cc @@ -1102,10 +1102,11 @@ int MonClient::wait_auth_rotating(double timeout) return 0; ldout(cct, 10) << __func__ << " waiting for " << timeout << dendl; - utime_t now = ceph_clock_now(); - if (auth_cond.wait_for(l, ceph::make_timespan(timeout), [now, this] { + utime_t cutoff = ceph_clock_now(); + cutoff -= std::min(30.0, cct->_conf->auth_service_ticket_ttl / 4.0); + if (auth_cond.wait_for(l, ceph::make_timespan(timeout), [this, cutoff] { return (!auth_principal_needs_rotating_keys(entity_name) || - !rotating_secrets->need_new_secrets(now)); + !rotating_secrets->need_new_secrets(cutoff)); })) { ldout(cct, 10) << __func__ << " done" << dendl; return 0; -- 2.39.5