From: Kefu Chai Date: Thu, 25 Jun 2020 02:41:30 +0000 (+0800) Subject: mgr: avoid false alarm of MGR_MODULE_ERROR X-Git-Tag: v16.1.0~1903^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2d9b3abd1fc50e5fcd9ce2c05e8fac41d389b052;p=ceph.git mgr: avoid false alarm of MGR_MODULE_ERROR mgr sends healthy report periodically, the report includes the information whether the always-on modules are loaded or not. but the modules are loaded with two steps: 1. load the options and command exposed by modules. the options and commands are registered using static methods of the subclasss of MgrModule. 2. create an instance of the subclass of MgrModule. this is performed in background by a Finisher thread. upon finishing of the construction of the instance, ActivePyModules::start_one() adds the module which successfully creates the class to `modules`. but there is chance that when mgr sends healthy report, the always-on module is still creating its instance of MgrModule subclass, or that task is still pending in the finisher thread. in that case, mgr would add a false error message like ``` 4 mgr modules have failed (MGR_MODULE_ERROR) ``` in the healthy report in this change, the number of modules in pending state is tracked, and mgr will not take the missing always-on modules into account unless the number of pending modules is 0. Signed-off-by: Kefu Chai --- diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc index 27d11f27af98..210f77550908 100644 --- a/src/mgr/ActivePyModules.cc +++ b/src/mgr/ActivePyModules.cc @@ -448,15 +448,17 @@ void ActivePyModules::start_one(PyModuleRef py_module) const auto name = py_module->get_name(); auto active_module = std::make_shared(py_module, clog); + pending_modules.insert(name); // Send all python calls down a Finisher to avoid blocking // C++ code, and avoid any potential lock cycles. finisher.queue(new LambdaContext([this, active_module, name](int) { int r = active_module->load(this); + std::lock_guard l(lock); + pending_modules.erase(name); if (r != 0) { derr << "Failed to run module in active mode ('" << name << "')" << dendl; } else { - std::lock_guard l(lock); auto em = modules.emplace(name, active_module); ceph_assert(em.second); // actually inserted diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h index 192360400c76..4892f2705fcd 100644 --- a/src/mgr/ActivePyModules.h +++ b/src/mgr/ActivePyModules.h @@ -39,6 +39,9 @@ class PyModuleRegistry; class ActivePyModules { + // module class instances not yet created + std::set> pending_modules; + // module class instances already created std::map> modules; PyModuleConfig &module_config; std::map store_cache; @@ -158,6 +161,9 @@ public: const std::string ¬ify_id); void notify_all(const LogEntry &log_entry); + bool is_pending(std::string_view name) const { + return pending_modules.count(name) > 0; + } bool module_exists(const std::string &name) const { return modules.count(name) > 0; diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index 2e2e080aa76c..5540339d6b0a 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -377,6 +377,9 @@ void PyModuleRegistry::get_health_checks(health_check_map_t *checks) if (obsolete_modules.count(name)) { continue; } + if (active_modules->is_pending(name)) { + continue; + } if (!active_modules->module_exists(name)) { if (failed_modules.find(name) == failed_modules.end() && dependency_modules.find(name) == dependency_modules.end()) {