From 8e9c5fd71df7e7276f1978722d92a01c57440f7c Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Wed, 10 Jan 2024 12:50:51 -0500 Subject: [PATCH] mon/MgrMonitor: add "down" setting to simplify testing This flag prevents promotion of a standby manager to the active. It also drops the current active. Signed-off-by: Patrick Donnelly (cherry picked from commit 2e2a3f15cdd9125029fca78b834b8b5bb53edbb1) --- src/mon/MgrMap.h | 13 ++++++-- src/mon/MgrMonitor.cc | 69 +++++++++++++++++++++++++++++++++---------- src/mon/MgrMonitor.h | 3 +- src/mon/MonCommands.h | 4 +++ 4 files changed, 70 insertions(+), 19 deletions(-) diff --git a/src/mon/MgrMap.h b/src/mon/MgrMap.h index f37ed97fd16c7..b36acef5a7f14 100644 --- a/src/mon/MgrMap.h +++ b/src/mon/MgrMap.h @@ -225,6 +225,10 @@ public: epoch_t epoch = 0; epoch_t last_failure_osd_epoch = 0; + + static const uint64_t FLAG_DOWN = (1<<0); + uint64_t flags = 0; + /// global_id of the ceph-mgr instance selected as a leader uint64_t active_gid = 0; /// server address reported by the leader once it is active @@ -401,7 +405,7 @@ public: ENCODE_FINISH(bl); return; } - ENCODE_START(12, 6, bl); + ENCODE_START(13, 6, bl); encode(epoch, bl); encode(active_addrs, bl, features); encode(active_gid, bl); @@ -425,13 +429,14 @@ public: // backwards compatible messsage for older monitors. encode(clients_addrs, bl, features); encode(clients_names, bl, features); + encode(flags, bl); ENCODE_FINISH(bl); return; } void decode(ceph::buffer::list::const_iterator& p) { - DECODE_START(12, p); + DECODE_START(13, p); decode(epoch, p); decode(active_addrs, p); decode(active_gid, p); @@ -498,11 +503,15 @@ public: } } } + if (struct_v >= 13) { + decode(flags, p); + } DECODE_FINISH(p); } void dump(ceph::Formatter *f) const { f->dump_int("epoch", epoch); + f->dump_int("flags", flags); f->dump_int("active_gid", get_active_gid()); f->dump_string("active_name", get_active_name()); f->dump_object("active_addrs", active_addrs); diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc index 2f6510846b3be..4e9fa7d023267 100644 --- a/src/mon/MgrMonitor.cc +++ b/src/mon/MgrMonitor.cc @@ -587,22 +587,23 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) if (pending_map.standbys.count(m->get_gid())) { drop_standby(m->get_gid(), false); } - dout(4) << "selecting new active " << m->get_gid() - << " " << m->get_name() - << " (was " << pending_map.active_gid << " " - << pending_map.active_name << ")" << dendl; - pending_map.active_gid = m->get_gid(); - pending_map.active_name = m->get_name(); - pending_map.active_change = ceph_clock_now(); - pending_map.active_mgr_features = m->get_mgr_features(); - pending_map.available_modules = m->get_available_modules(); - encode(m->get_metadata(), pending_metadata[m->get_name()]); - pending_metadata_rm.erase(m->get_name()); - - mon.clog->info() << "Activating manager daemon " - << pending_map.active_name; + if (!(pending_map.flags & MgrMap::FLAG_DOWN)) { + dout(4) << "selecting new active " << m->get_gid() + << " " << m->get_name() + << " (was " << pending_map.active_gid << " " + << pending_map.active_name << ")" << dendl; + pending_map.active_gid = m->get_gid(); + pending_map.active_name = m->get_name(); + pending_map.active_change = ceph_clock_now(); + pending_map.active_mgr_features = m->get_mgr_features(); + pending_map.available_modules = m->get_available_modules(); + encode(m->get_metadata(), pending_metadata[m->get_name()]); + pending_metadata_rm.erase(m->get_name()); - updated = true; + mon.clog->info() << "Activating manager daemon " + << pending_map.active_name; + updated = true; + } } else { if (pending_map.standbys.count(m->get_gid()) > 0) { dout(10) << "from existing standby " << m->get_gid() << dendl; @@ -877,6 +878,9 @@ void MgrMonitor::on_restart() bool MgrMonitor::promote_standby() { ceph_assert(pending_map.active_gid == 0); + if (pending_map.flags & MgrMap::FLAG_DOWN) { + return false; + } if (pending_map.standbys.size()) { // Promote a replacement (arbitrary choice of standby) auto replacement_gid = pending_map.standbys.begin()->first; @@ -890,6 +894,9 @@ bool MgrMonitor::promote_standby() pending_map.active_addrs = entity_addrvec_t(); pending_map.active_change = ceph_clock_now(); + mon.clog->info() << "Activating manager daemon " + << pending_map.active_name; + drop_standby(replacement_gid, false); return true; @@ -1181,7 +1188,37 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) int r = 0; bool plugged = false; - if (prefix == "mgr fail") { + if (prefix == "mgr set") { + std::string var; + if (!cmd_getval(cmdmap, "var", var) || var.empty()) { + ss << "Invalid variable"; + return -EINVAL; + } + string val; + if (!cmd_getval(cmdmap, "val", val)) { + return -EINVAL; + } + + if (var == "down") { + bool enable_down = false; + int r = parse_bool(val, &enable_down, ss); + if (r != 0) { + return r; + } + if (enable_down) { + if (!mon.osdmon()->is_writeable()) { + mon.osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + return false; + } + pending_map.flags |= MgrMap::FLAG_DOWN; + plugged |= drop_active(); + } else { + pending_map.flags &= ~(MgrMap::FLAG_DOWN); + } + } else { + return -EINVAL; + } + } else if (prefix == "mgr fail") { string who; if (!cmd_getval(cmdmap, "who", who)) { if (!map.active_gid) { diff --git a/src/mon/MgrMonitor.h b/src/mon/MgrMonitor.h index 79d4e50051d80..a2a84c141f710 100644 --- a/src/mon/MgrMonitor.h +++ b/src/mon/MgrMonitor.h @@ -21,8 +21,9 @@ #include "MgrMap.h" #include "PaxosService.h" #include "MonCommand.h" +#include "CommandHandler.h" -class MgrMonitor: public PaxosService +class MgrMonitor: public PaxosService, public CommandHandler { MgrMap map; MgrMap pending_map; diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 8fa39ed5df58f..24cd0ca7c25fb 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1249,6 +1249,10 @@ COMMAND("mgr dump " "name=epoch,type=CephInt,range=0,req=false", "dump the latest MgrMap", "mgr", "r") +COMMAND("mgr set " + "name=var,type=CephChoices,strings=down " + "name=val,type=CephString ", + "set mgr parameter to ", "mgr", "rw") COMMAND("mgr fail name=who,type=CephString,req=false", "treat the named manager daemon as failed", "mgr", "rw") COMMAND("mgr module ls", -- 2.39.5