From: Rishabh Dave Date: Thu, 18 Jul 2024 19:38:07 +0000 (+0530) Subject: mon/MgrMonitor: allow disabling always-on MGR modules X-Git-Tag: testing/wip-mchangir-testing-20250210.053633-reef-debug^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=ba3bb8d290738a7ebee1c0808d0fcc6211d01da7;p=ceph-ci.git mon/MgrMonitor: allow disabling always-on MGR modules Add a new command ("ceph mgr module force disable ") that allows forcibly disabling an always-on module. This command should ideally only be used to for cluster recovery. Fixes: https://tracker.ceph.com/issues/66005 Signed-off-by: Rishabh Dave (cherry picked from commit 9962772358048a98a6e871dccf1bfd0a15b4d791) --- diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index 7e294d518b7..f16174177e1 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -120,7 +120,8 @@ bool PyModuleRegistry::handle_mgr_map(const MgrMap &mgr_map_) return false; } else { bool modules_changed = mgr_map_.modules != mgr_map.modules || - mgr_map_.always_on_modules != mgr_map.always_on_modules; + mgr_map_.always_on_modules != mgr_map.always_on_modules || + mgr_map_.force_disabled_modules != mgr_map.force_disabled_modules; mgr_map = mgr_map_; if (standby_modules != nullptr) { @@ -209,6 +210,16 @@ void PyModuleRegistry::active_start( // Anything we're skipping because of !can_run will be flagged // to the user separately via get_health_checks if (!(i.second->is_enabled() && i.second->is_loaded())) { + dout(8) << __func__ << " Not starting module '" << i.first << "', it is " + << "not enabled and loaded" << dendl; + continue; + } + + // These are always-on modules but user force-disabled them. + if (mgr_map.force_disabled_modules.find(i.first) != + mgr_map.force_disabled_modules.end()) { + dout(8) << __func__ << " Not starting module '" << i.first << "', it is " + << "force-disabled" << dendl; continue; } diff --git a/src/mon/MgrMap.h b/src/mon/MgrMap.h index b36acef5a7f..8bcd33d9130 100644 --- a/src/mon/MgrMap.h +++ b/src/mon/MgrMap.h @@ -254,6 +254,9 @@ public: // active version. std::map> always_on_modules; + // Modules which are always-on but have been force-disabled by user. + std::set force_disabled_modules; + // Modules which are reported to exist std::vector available_modules; @@ -405,7 +408,7 @@ public: ENCODE_FINISH(bl); return; } - ENCODE_START(13, 6, bl); + ENCODE_START(14, 6, bl); encode(epoch, bl); encode(active_addrs, bl, features); encode(active_gid, bl); @@ -430,13 +433,14 @@ public: encode(clients_addrs, bl, features); encode(clients_names, bl, features); encode(flags, bl); + encode(force_disabled_modules, bl); ENCODE_FINISH(bl); return; } void decode(ceph::buffer::list::const_iterator& p) { - DECODE_START(13, p); + DECODE_START(14, p); decode(epoch, p); decode(active_addrs, p); decode(active_gid, p); @@ -506,6 +510,11 @@ public: if (struct_v >= 13) { decode(flags, p); } + + if (struct_v >= 14) { + decode(force_disabled_modules, p); + } + DECODE_FINISH(p); } @@ -559,6 +568,13 @@ public: f->close_section(); } f->close_section(); // always_on_modules + + f->open_object_section("force_disabled_modules"); + for (auto& m : force_disabled_modules) { + f->dump_string("module", m); + } + f->close_section(); + f->dump_int("last_failure_osd_epoch", last_failure_osd_epoch); f->open_array_section("active_clients"); for (const auto& i : clients) { diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc index 7a331c67e6f..174cf3c49ce 100644 --- a/src/mon/MgrMonitor.cc +++ b/src/mon/MgrMonitor.cc @@ -1053,6 +1053,13 @@ bool MgrMonitor::preprocess_command(MonOpRequestRef op) f->dump_string("module", p); } f->close_section(); + + f->open_array_section("force_disabled_modules"); + for (auto& p : map.force_disabled_modules) { + f->dump_string("module", p); + } + f->close_section(); + f->open_array_section("enabled_modules"); for (auto& p : map.modules) { if (map.get_always_on_modules().count(p) > 0) @@ -1082,7 +1089,11 @@ bool MgrMonitor::preprocess_command(MonOpRequestRef op) for (auto& p : map.get_always_on_modules()) { tbl << p; - tbl << "on (always on)"; + if (map.force_disabled_modules.find(p) == map.force_disabled_modules.end()) { + tbl << "on (always on)"; + } else { + tbl << "off (always on but force-disabled)"; + } tbl << TextTable::endrow; } for (auto& p : map.modules) { @@ -1297,10 +1308,13 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) r = -EINVAL; goto out; } - if (pending_map.get_always_on_modules().count(module) > 0) { + + if (pending_map.get_always_on_modules().count(module) > 0 && + !pending_map.force_disabled_modules.contains(module)) { ss << "module '" << module << "' is already enabled (always-on)"; goto out; } + bool force = false; cmd_getval_compat_cephbool(cmdmap, "force", force); if (!pending_map.all_support_module(module) && @@ -1324,7 +1338,12 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) ss << "module '" << module << "' is already enabled"; r = 0; goto out; + } else if (pending_map.force_disabled_modules.contains(module)) { + pending_map.force_disabled_modules.erase(module); + r = 0; + goto out; } + pending_map.modules.insert(module); } else if (prefix == "mgr module disable") { string module; @@ -1334,8 +1353,9 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) goto out; } if (pending_map.get_always_on_modules().count(module) > 0) { - ss << "module '" << module << "' cannot be disabled (always-on)"; - r = -EINVAL; + ss << "module '" << module << "' cannot be disabled (always-on), use " << + "'ceph mgr module force disable' command to disable an always-on module"; + r = -EPERM; goto out; } if (!pending_map.module_enabled(module)) { @@ -1348,6 +1368,50 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op) } dout(8) << __func__ << " disabling module " << module << " from new " << dendl; pending_map.modules.erase(module); + } else if (prefix == "mgr module force disable") { + string mod; + cmd_getval(cmdmap, "module", mod); + + bool confirmation_flag = false; + cmd_getval(cmdmap, "yes_i_really_mean_it", confirmation_flag); + + if (mod.empty()) { + ss << "Module name wasn't passed!"; + r = -EINVAL; + goto out; + } + + if (!pending_map.get_always_on_modules().contains(mod)) { + ss << "Always-on module named \"" << mod << "\" does not exist"; + r = -EINVAL; + goto out; + } else if (pending_map.modules.contains(mod)) { + ss << "Module '" << mod << "' is not an always-on module, only always-on " << + "modules can be disabled through this command."; + r = -EINVAL; + goto out; + } + + if (pending_map.force_disabled_modules.contains(mod)) { + ss << "Module \"" << mod << "\"is already disabled"; + r = 0; + goto out; + } + + if (!confirmation_flag) { + ss << "This command will disable operations and remove commands that " + << "other Ceph utilities expect to be available. Do not continue " + << "unless your cluster is already experiencing an event due to " + << "which it is advised to disable this module as part of " + << "troubleshooting. If you are sure that you wish to continue, " + << "run again with --yes-i-really-mean-it"; + r = -EPERM; + goto out; + } + + dout(8) << __func__ << " force-disabling module '" << mod << "'" << dendl; + pending_map.force_disabled_modules.insert(mod); + pending_map.modules.erase(mod); } else { ss << "Command '" << prefix << "' not implemented!"; r = -ENOSYS; diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index de5ab7030e7..f8265069d3d 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1317,6 +1317,10 @@ COMMAND("mgr module enable " COMMAND("mgr module disable " "name=module,type=CephString", "disable mgr module", "mgr", "rw") +COMMAND("mgr module force disable " + "name=module,type=CephString " + "name=yes_i_really_mean_it,type=CephBool,req=false", + "force disable a always-on mgr module", "mgr", "rw") COMMAND("mgr metadata name=who,type=CephString,req=false", "dump metadata for all daemons or a specific daemon", "mgr", "r")