From cbc7c5ea7d4a12278003e0af37bd706a17bed130 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Mon, 6 Mar 2023 13:21:51 -0500 Subject: [PATCH] mon/MgrMonitor: plug PAXOS for batched MgrMap/OSDMap Plugging PAXOS has the effect of batching map updates into a single PAXOS transaction. Since we're updating the OSDMap several times and the MgrMap, plug PAXOS for efficiency. This also has the nice effect of reducing any delay between the active mgr getting dropped and the blocklisting of its clients. This doesn't resolve any race condition as the two maps are never processed in one unit. So the former active manager may process the OSDMap blocklists before learning it is dropped from the MgrMap. Fixes: https://tracker.ceph.com/issues/58923 Signed-off-by: Patrick Donnelly (cherry picked from commit 2e057bbf9ed4934443fb78e8cdd588aa2100969c) --- src/mon/MgrMonitor.cc | 45 ++++++++++++++++++++++++++++++++++--------- src/mon/MgrMonitor.h | 8 +++++++- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc index 10bdcb4e09679..d93edab808dd0 100644 --- a/src/mon/MgrMonitor.cc +++ b/src/mon/MgrMonitor.cc @@ -490,6 +490,10 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) auto m = op->get_req(); dout(4) << "beacon from " << m->get_gid() << dendl; + // Track whether we modified pending_map + bool updated = false; + bool plugged = false; + // See if we are seeing same name, new GID for the active daemon if (m->get_name() == pending_map.active_name && m->get_gid() != pending_map.active_gid) @@ -503,7 +507,8 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } - drop_active(); + plugged |= drop_active(); + updated = true; } // See if we are seeing same name, new GID for any standbys @@ -514,15 +519,13 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) mon.clog->debug() << "Standby manager daemon " << m->get_name() << " restarted"; drop_standby(i.first); + updated = true; break; } } last_beacon[m->get_gid()] = ceph::coarse_mono_clock::now(); - // Track whether we modified pending_map - bool updated = false; - if (pending_map.active_gid == m->get_gid()) { if (pending_map.services != m->get_services()) { dout(4) << "updated services from mgr." << m->get_name() @@ -578,7 +581,7 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) dout(4) << "mgr thinks it is active but it is not, dropping!" << dendl; auto m = make_message(MgrMap::create_null_mgrmap()); mon.send_reply(op, m.detach()); - return true; + goto out; } else if (pending_map.active_gid == 0) { // There is no currently active daemon, select this one. if (pending_map.standbys.count(m->get_gid())) { @@ -633,6 +636,12 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) dout(10) << "no change" << dendl; } +out: + + if (plugged) { + paxos.unplug(); + } + return updated; } @@ -795,6 +804,7 @@ void MgrMonitor::tick() } bool propose = false; + bool plugged = false; for (auto i : dead_standbys) { dout(4) << "Dropping laggy standby " << i << dendl; @@ -806,7 +816,7 @@ void MgrMonitor::tick() && last_beacon.at(pending_map.active_gid) < cutoff && mon.osdmon()->is_writeable()) { const std::string old_active_name = pending_map.active_name; - drop_active(); + plugged |= drop_active(); propose = true; dout(4) << "Dropping active" << pending_map.active_gid << dendl; if (promote_standby()) { @@ -849,6 +859,11 @@ void MgrMonitor::tick() if (propose) { propose_pending(); } + if (plugged) { + paxos.unplug(); + ceph_assert(propose); + paxos.trigger_propose(); + } } void MgrMonitor::on_restart() @@ -883,10 +898,16 @@ bool MgrMonitor::promote_standby() } } -void MgrMonitor::drop_active() +bool MgrMonitor::drop_active() { ceph_assert(mon.osdmon()->is_writeable()); + bool plugged = false; + if (!paxos.is_plugged()) { + paxos.plug(); + plugged = true; + } + if (last_beacon.count(pending_map.active_gid) > 0) { last_beacon.erase(pending_map.active_gid); } @@ -926,6 +947,7 @@ void MgrMonitor::drop_active() // So that when new active mgr subscribes to mgrdigest, it will // get an immediate response instead of waiting for next timer cancel_timer(); + return plugged; } void MgrMonitor::drop_standby(uint64_t gid, bool drop_meta) @@ -1157,6 +1179,7 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) const auto prefix = cmd_getval_or(cmdmap, "prefix", string{}); int r = 0; + bool plugged = false; if (prefix == "mgr fail") { string who; @@ -1178,7 +1201,7 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } - drop_active(); + plugged |= drop_active(); changed = true; } else { gid = 0; @@ -1201,7 +1224,7 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); return false; } - drop_active(); + plugged |= drop_active(); changed = true; } else if (pending_map.standbys.count(gid) > 0) { drop_standby(gid); @@ -1291,6 +1314,10 @@ out: mon.reply_command(op, r, rs, rdata, get_last_committed()); } + if (plugged) { + paxos.unplug(); + } + return r >= 0; } diff --git a/src/mon/MgrMonitor.h b/src/mon/MgrMonitor.h index be75602abf9fa..79d4e50051d80 100644 --- a/src/mon/MgrMonitor.h +++ b/src/mon/MgrMonitor.h @@ -45,7 +45,13 @@ class MgrMonitor: public PaxosService * @return true if a standby was promoted */ bool promote_standby(); - void drop_active(); + + /** + * Drop the active daemon from the MgrMap. No promotion is performed. + * + * @return whether PAXOS was plugged by this method + */ + bool drop_active(); /** * Remove this gid from the list of standbys. By default, -- 2.39.5