- Reduced data availability
- Degraded data redundancy
- objects misplaced
+ - Synthetic exception in serve
+ - influxdb python module not found
- cephfs_test_runner:
modules:
- tasks.mgr.test_module_selftest
"{1} are required".format(
len(self.mgr_cluster.mgr_ids), self.MGRS_REQUIRED))
- # Restart all the daemons
+ # Stop all the daemons
for daemon in self.mgr_cluster.mgr_daemons.values():
daemon.stop()
for mgr_id in self.mgr_cluster.mgr_ids:
self.mgr_cluster.mgr_fail(mgr_id)
+ # Unload all non-default plugins
+ loaded = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "module", "ls"))['enabled_modules']
+ unload_modules = set(loaded) - {"status", "restful"}
+
+ for m in unload_modules:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "module", "disable", m)
+
+ # Start all the daemons
for daemon in self.mgr_cluster.mgr_daemons.values():
daemon.restart()
"mgr", "self-test", "run"
)
self.assertEqual(exc_raised.exception.exitstatus, errno.EIO)
+
+ # A health alert should be raised for a module that has thrown
+ # an exception from its serve() method
+ self.wait_for_health(
+ "Module 'selftest' has failed: Synthetic exception in serve",
+ timeout=30)
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "module", "disable", "selftest")
+
+ self.wait_for_health_clear(timeout=30)
return result;
}
+void PyModuleRegistry::get_health_checks(health_check_map_t *checks)
+{
+ Mutex::Locker l(lock);
+
+ // Only the active mgr reports module issues
+ if (active_modules) {
+ active_modules->get_health_checks(checks);
+
+ std::map<std::string, std::string> dependency_modules;
+ std::map<std::string, std::string> failed_modules;
+
+ /*
+ * Break up broken modules into two categories:
+ * - can_run=false: the module is working fine but explicitly
+ * telling you that a dependency is missing. Advise the user to
+ * read the message from the module and install what's missing.
+ * - failed=true or loaded=false: something unexpected is broken,
+ * either at runtime (from serve()) or at load time. This indicates
+ * a bug and the user should be guided to inspect the mgr log
+ * to investigate and gather evidence.
+ */
+
+ for (const auto &i : modules) {
+ auto module = i.second;
+ if (module->is_enabled() && !module->get_can_run()) {
+ dependency_modules[module->get_name()] = module->get_error_string();
+ } else if ((module->is_enabled() && !module->is_loaded())
+ || module->is_failed()) {
+ failed_modules[module->get_name()] = module->get_error_string();
+ }
+ }
+
+ if (!dependency_modules.empty()) {
+ std::ostringstream ss;
+ if (dependency_modules.size() == 1) {
+ auto iter = dependency_modules.begin();
+ ss << "Module '" << iter->first << "' has failed dependency: "
+ << iter->second;
+ } else if (dependency_modules.size() > 1) {
+ ss << dependency_modules.size() << " modules have failed dependencies";
+ }
+ checks->add("MGR_MODULE_DEPENDENCY", HEALTH_WARN, ss.str());
+ }
+
+ if (!failed_modules.empty()) {
+ std::ostringstream ss;
+ if (failed_modules.size() == 1) {
+ auto iter = failed_modules.begin();
+ ss << "Module '" << iter->first << "' has failed: "
+ << iter->second;
+ } else if (failed_modules.size() > 1) {
+ ss << failed_modules.size() << " modules have failed";
+ }
+ checks->add("MGR_MODULE_ERROR", HEALTH_ERR, ss.str());
+ }
+ }
+}
+
return modules.at(module_name);
}
+ /**
+ * Pass through command to the named module for execution.
+ *
+ * The command must exist in the COMMANDS reported by the module. If it
+ * doesn't then this will abort.
+ *
+ * If ActivePyModules has not been instantiated yet then this will
+ * return EAGAIN.
+ */
int handle_command(
std::string const &module_name,
const cmdmap_t &cmdmap,
std::stringstream *ds,
std::stringstream *ss);
+ /**
+ * Pass through health checks reported by modules, and report any
+ * modules that have failed (i.e. unhandled exceptions in serve())
+ */
+ void get_health_checks(health_check_map_t *checks);
+
// FIXME: breaking interface so that I don't have to go rewrite all
// the places that call into these (for now)
// >>>
}
}
- void get_health_checks(health_check_map_t *checks)
- {
- assert(active_modules);
- active_modules->get_health_checks(checks);
- }
std::map<std::string, std::string> get_services() const
{
assert(active_modules);