From 2d9b3abd1fc50e5fcd9ce2c05e8fac41d389b052 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Thu, 25 Jun 2020 10:41:30 +0800 Subject: [PATCH] mgr: avoid false alarm of MGR_MODULE_ERROR mgr sends healthy report periodically, the report includes the information whether the always-on modules are loaded or not. but the modules are loaded with two steps: 1. load the options and command exposed by modules. the options and commands are registered using static methods of the subclasss of MgrModule. 2. create an instance of the subclass of MgrModule. this is performed in background by a Finisher thread. upon finishing of the construction of the instance, ActivePyModules::start_one() adds the module which successfully creates the class to `modules`. but there is chance that when mgr sends healthy report, the always-on module is still creating its instance of MgrModule subclass, or that task is still pending in the finisher thread. in that case, mgr would add a false error message like ``` 4 mgr modules have failed (MGR_MODULE_ERROR) ``` in the healthy report in this change, the number of modules in pending state is tracked, and mgr will not take the missing always-on modules into account unless the number of pending modules is 0. Signed-off-by: Kefu Chai --- src/mgr/ActivePyModules.cc | 4 +++- src/mgr/ActivePyModules.h | 6 ++++++ src/mgr/PyModuleRegistry.cc | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc index 27d11f27af98f..210f775509085 100644 --- a/src/mgr/ActivePyModules.cc +++ b/src/mgr/ActivePyModules.cc @@ -448,15 +448,17 @@ void ActivePyModules::start_one(PyModuleRef py_module) const auto name = py_module->get_name(); auto active_module = std::make_shared(py_module, clog); + pending_modules.insert(name); // Send all python calls down a Finisher to avoid blocking // C++ code, and avoid any potential lock cycles. finisher.queue(new LambdaContext([this, active_module, name](int) { int r = active_module->load(this); + std::lock_guard l(lock); + pending_modules.erase(name); if (r != 0) { derr << "Failed to run module in active mode ('" << name << "')" << dendl; } else { - std::lock_guard l(lock); auto em = modules.emplace(name, active_module); ceph_assert(em.second); // actually inserted diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h index 192360400c762..4892f2705fcda 100644 --- a/src/mgr/ActivePyModules.h +++ b/src/mgr/ActivePyModules.h @@ -39,6 +39,9 @@ class PyModuleRegistry; class ActivePyModules { + // module class instances not yet created + std::set> pending_modules; + // module class instances already created std::map> modules; PyModuleConfig &module_config; std::map store_cache; @@ -158,6 +161,9 @@ public: const std::string ¬ify_id); void notify_all(const LogEntry &log_entry); + bool is_pending(std::string_view name) const { + return pending_modules.count(name) > 0; + } bool module_exists(const std::string &name) const { return modules.count(name) > 0; diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index 2e2e080aa76c0..5540339d6b0ab 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -377,6 +377,9 @@ void PyModuleRegistry::get_health_checks(health_check_map_t *checks) if (obsolete_modules.count(name)) { continue; } + if (active_modules->is_pending(name)) { + continue; + } if (!active_modules->module_exists(name)) { if (failed_modules.find(name) == failed_modules.end() && dependency_modules.find(name) == dependency_modules.end()) { -- 2.39.5