From 42fbaf6543a20874e70127a619002fec545099d5 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Thu, 25 Jun 2020 10:41:30 +0800 Subject: [PATCH] mgr: avoid false alarm of MGR_MODULE_ERROR mgr sends healthy report periodically, the report includes the information whether the always-on modules are loaded or not. but the modules are loaded with two steps: 1. load the options and command exposed by modules. the options and commands are registered using static methods of the subclasss of MgrModule. 2. create an instance of the subclass of MgrModule. this is performed in background by a Finisher thread. upon finishing of the construction of the instance, ActivePyModules::start_one() adds the module which successfully creates the class to `modules`. but there is chance that when mgr sends healthy report, the always-on module is still creating its instance of MgrModule subclass, or that task is still pending in the finisher thread. in that case, mgr would add a false error message like ``` 4 mgr modules have failed (MGR_MODULE_ERROR) ``` in the healthy report in this change, the number of modules in pending state is tracked, and mgr will not take the missing always-on modules into account unless the number of pending modules is 0. Signed-off-by: Kefu Chai (cherry picked from commit 2d9b3abd1fc50e5fcd9ce2c05e8fac41d389b052) --- src/mgr/ActivePyModules.cc | 4 +++- src/mgr/ActivePyModules.h | 6 ++++++ src/mgr/PyModuleRegistry.cc | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc index ab2bd2512bc41..d50c2cf1d71e9 100644 --- a/src/mgr/ActivePyModules.cc +++ b/src/mgr/ActivePyModules.cc @@ -438,15 +438,17 @@ void ActivePyModules::start_one(PyModuleRef py_module) const auto name = py_module->get_name(); auto active_module = std::make_shared(py_module, clog); + pending_modules.insert(name); // Send all python calls down a Finisher to avoid blocking // C++ code, and avoid any potential lock cycles. finisher.queue(new FunctionContext([this, active_module, name](int) { int r = active_module->load(this); + std::lock_guard l(lock); + pending_modules.erase(name); if (r != 0) { derr << "Failed to run module in active mode ('" << name << "')" << dendl; } else { - std::lock_guard l(lock); auto em = modules.emplace(name, active_module); ceph_assert(em.second); // actually inserted diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h index 58b9434fc5e37..8a14a4671bfff 100644 --- a/src/mgr/ActivePyModules.h +++ b/src/mgr/ActivePyModules.h @@ -39,6 +39,9 @@ class PyModuleRegistry; class ActivePyModules { + // module class instances not yet created + std::set> pending_modules; + // module class instances already created std::map> modules; PyModuleConfig &module_config; std::map store_cache; @@ -155,6 +158,9 @@ public: const std::string ¬ify_id); void notify_all(const LogEntry &log_entry); + bool is_pending(std::string_view name) const { + return pending_modules.count(name) > 0; + } bool module_exists(const std::string &name) const { return modules.count(name) > 0; diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index d74af406ed548..da4a5a3044001 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -375,6 +375,9 @@ void PyModuleRegistry::get_health_checks(health_check_map_t *checks) if (obsolete_modules.count(name)) { continue; } + if (active_modules->is_pending(name)) { + continue; + } if (!active_modules->module_exists(name)) { if (failed_modules.find(name) == failed_modules.end() && dependency_modules.find(name) == dependency_modules.end()) { -- 2.39.5