From 712ad57d09a2a96dc1e21cd245fbf1a6372867ba Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 16 Nov 2017 16:30:39 -0500 Subject: [PATCH] mgr: evaluate `can_run` method on modules ...and transmit the result to the monitor in our beacon. Fixes: http://tracker.ceph.com/issues/21502 Signed-off-by: John Spray --- doc/mgr/plugins.rst | 13 +++++ src/messages/MMgrBeacon.h | 89 +++++++++++++++++++++++++++++---- src/mgr/MgrStandby.cc | 17 +++++-- src/mgr/PyModule.cc | 31 +++++++++++- src/mgr/PyModule.h | 25 ++++++++- src/mgr/PyModuleRegistry.h | 8 +++ src/mgr/PyModuleRunner.cc | 3 ++ src/mon/MgrMap.h | 2 +- src/pybind/mgr/influx/module.py | 7 +++ src/pybind/mgr/mgr_module.py | 16 ++++++ 10 files changed, 195 insertions(+), 16 deletions(-) diff --git a/doc/mgr/plugins.rst b/doc/mgr/plugins.rst index 5069fa2a078..9e73b7a58cd 100644 --- a/doc/mgr/plugins.rst +++ b/doc/mgr/plugins.rst @@ -124,6 +124,19 @@ the monitor cluster, use this function: .. automethod:: MgrModule.have_mon_connection +Reporting if your module cannot run +----------------------------------- + +If your module cannot be run for any reason (such as a missing dependency), +then you can report that by implementing the ``can_run`` function. + +.. automethod:: MgrModule.can_run + +Note that this will only work properly if your module can always be imported: +if you are importing a dependency that may be absent, then do it in a +try/except block so that your module can be loaded far enough to use +``can_run`` even if the dependency is absent. + Sending commands ---------------- diff --git a/src/messages/MMgrBeacon.h b/src/messages/MMgrBeacon.h index 54d569cba87..660941d056d 100644 --- a/src/messages/MMgrBeacon.h +++ b/src/messages/MMgrBeacon.h @@ -21,19 +21,46 @@ #include "include/types.h" + class MMgrBeacon : public PaxosServiceMessage { - static const int HEAD_VERSION = 6; + static const int HEAD_VERSION = 7; static const int COMPAT_VERSION = 1; + public: + + class ModuleInfo + { + public: + std::string name; + bool can_run = true; + std::string error_string; + + // We do not include the module's `failed` field in the beacon, + // because it is exposed via health checks. + void encode(bufferlist &bl) const { + ENCODE_START(1, 1, bl); + ::encode(name, bl); + ::encode(can_run, bl); + ::encode(error_string, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::iterator &bl) { + DECODE_START(1, bl); + ::decode(name, bl); + ::decode(can_run, bl); + ::decode(error_string, bl); + DECODE_FINISH(bl); + } + }; + protected: uint64_t gid; entity_addr_t server_addr; bool available; std::string name; uuid_d fsid; - std::set available_modules; - map metadata; ///< misc metadata about this osd // From active daemon to populate MgrMap::services std::map services; @@ -41,6 +68,11 @@ protected: // Only populated during activation std::vector command_descs; + // Information about the modules found locally on this daemon + std::vector modules; + + map metadata; ///< misc metadata about this osd + public: MMgrBeacon() : PaxosServiceMessage(MSG_MGR_BEACON, 0, HEAD_VERSION, COMPAT_VERSION), @@ -50,11 +82,11 @@ public: MMgrBeacon(const uuid_d& fsid_, uint64_t gid_, const std::string &name_, entity_addr_t server_addr_, bool available_, - const std::set& module_list, - map&& metadata) + std::vector&& modules_, + map&& metadata_) : PaxosServiceMessage(MSG_MGR_BEACON, 0, HEAD_VERSION, COMPAT_VERSION), gid(gid_), server_addr(server_addr_), available(available_), name(name_), - fsid(fsid_), available_modules(module_list), metadata(std::move(metadata)) + fsid(fsid_), modules(std::move(modules_)), metadata(std::move(metadata_)) { } @@ -63,7 +95,7 @@ public: bool get_available() const { return available; } const std::string& get_name() const { return name; } const uuid_d& get_fsid() const { return fsid; } - std::set& get_available_modules() { return available_modules; } + std::vector& get_modules() { return modules; } const std::map& get_metadata() const { return metadata; } @@ -87,6 +119,18 @@ public: return command_descs; } + std::set get_available_modules() const + { + std::set result; + for (const auto &i : modules) { + result.insert(i.name); + } + + return result; + } + + + private: ~MMgrBeacon() override {} @@ -103,15 +147,26 @@ public: void encode_payload(uint64_t features) override { using ceph::encode; paxos_encode(); + encode(server_addr, payload, features); encode(gid, payload); encode(available, payload); encode(name, payload); encode(fsid, payload); - encode(available_modules, payload); + + // Fill out old-style list of module names (deprecated by + // later field of full ModuleInfos) + std::set module_names; + for (const auto &info : modules) { + module_names.insert(info.name); + } + encode(module_names, payload); + encode(command_descs, payload); encode(metadata, payload); encode(services, payload); + + encode(modules, payload); } void decode_payload() override { bufferlist::iterator p = payload.begin(); @@ -124,7 +179,17 @@ public: decode(fsid, p); } if (header.version >= 3) { - decode(available_modules, p); + std::set module_name_list; + decode(module_name_list, p); + // Only need to unpack this field if we won't have the full + // ModuleInfo structures added in v7 + if (header.version < 7) { + for (const auto &i : module_name_list) { + ModuleInfo info; + info.name = i; + modules.push_back(std::move(info)); + } + } } if (header.version >= 4) { decode(command_descs, p); @@ -135,7 +200,13 @@ public: if (header.version >= 6) { decode(services, p); } + if (header.version >= 7) { + decode(modules, p); + } } }; +WRITE_CLASS_ENCODER(MMgrBeacon::ModuleInfo); + + #endif diff --git a/src/mgr/MgrStandby.cc b/src/mgr/MgrStandby.cc index be0f6af1891..91f7737813b 100644 --- a/src/mgr/MgrStandby.cc +++ b/src/mgr/MgrStandby.cc @@ -151,8 +151,19 @@ void MgrStandby::send_beacon() assert(lock.is_locked_by_me()); dout(1) << state_str() << dendl; - set modules; - py_module_registry.list_modules(&modules); + std::list modules; + py_module_registry.get_modules(&modules); + + // Construct a list of the info about each loaded module + // which we will transmit to the monitor. + std::vector module_info; + for (const auto &module : modules) { + MMgrBeacon::ModuleInfo info; + info.name = module->get_name(); + info.error_string = module->get_error_string(); + info.can_run = module->get_can_run(); + module_info.push_back(std::move(info)); + } // Whether I think I am available (request MgrMonitor to set me // as available in the map) @@ -170,7 +181,7 @@ void MgrStandby::send_beacon() g_conf->name.get_id(), addr, available, - modules, + std::move(module_info), std::move(metadata)); if (available) { diff --git a/src/mgr/PyModule.cc b/src/mgr/PyModule.cc index 1e1d2659a46..3d4eb7e90fc 100644 --- a/src/mgr/PyModule.cc +++ b/src/mgr/PyModule.cc @@ -233,8 +233,37 @@ int PyModule::load(PyThreadState *pMainThreadState) // Populate can_run by interrogating the module's callback that // may check for dependencies etc - // TODO + PyObject *pCanRunTuple = PyObject_CallMethod(pClass, + const_cast("can_run"), const_cast("()")); + if (pCanRunTuple != nullptr) { + if (PyTuple_Check(pCanRunTuple) && PyTuple_Size(pCanRunTuple) == 2) { + PyObject *pCanRun = PyTuple_GetItem(pCanRunTuple, 0); + PyObject *can_run_str = PyTuple_GetItem(pCanRunTuple, 1); + if (!PyBool_Check(pCanRun) || !PyString_Check(can_run_str)) { + derr << "Module " << get_name() + << " returned wrong type in can_run" << dendl; + can_run = false; + } else { + can_run = (pCanRun == Py_True); + if (!can_run) { + error_string = PyString_AsString(can_run_str); + dout(4) << "Module " << get_name() + << " reported that it cannot run: " + << error_string << dendl; + } + } + } else { + derr << "Module " << get_name() + << " returned wrong type in can_run" << dendl; + can_run = false; + } + Py_DECREF(pCanRunTuple); + } else { + derr << "Exception calling can_run on " << get_name() << dendl; + derr << handle_pyerror() << dendl; + can_run = false; + } } return 0; } diff --git a/src/mgr/PyModule.h b/src/mgr/PyModule.h index ae56e11167e..d3c39f4abb3 100644 --- a/src/mgr/PyModule.h +++ b/src/mgr/PyModule.h @@ -45,6 +45,9 @@ private: // (e.g. throwing an exception from serve()) bool failed = false; + // Populated if loaded, can_run or failed indicates a problem + std::string error_string; + public: SafeThreadState pMyThreadState; PyObject *pClass = nullptr; @@ -59,9 +62,27 @@ public: int load(PyThreadState *pMainThreadState); + /** + * Mark the module as failed, recording the reason in the error + * string. + */ + void fail(const std::string &reason) + { + Mutex::Locker l(lock); + failed = true; + error_string = reason; + } + bool is_enabled() const { Mutex::Locker l(lock) ; return enabled; } - const std::string &get_name() const - { Mutex::Locker l(lock) ; return module_name; } + const std::string &get_name() const { + Mutex::Locker l(lock) ; return module_name; + } + const std::string &get_error_string() const { + Mutex::Locker l(lock) ; return error_string; + } + bool get_can_run() const { + Mutex::Locker l(lock) ; return can_run; + } }; typedef std::shared_ptr PyModuleRef; diff --git a/src/mgr/PyModuleRegistry.h b/src/mgr/PyModuleRegistry.h index b821cb5c8ac..3eaa9f64092 100644 --- a/src/mgr/PyModuleRegistry.h +++ b/src/mgr/PyModuleRegistry.h @@ -59,6 +59,14 @@ public: void list_modules(std::set *modules); + void get_modules(std::list *modules_out) + { + Mutex::Locker l(lock); + for (const auto &i : modules) { + modules_out->push_back(i.second); + } + } + PyModuleRegistry(LogChannelRef clog_) : clog(clog_) {} diff --git a/src/mgr/PyModuleRunner.cc b/src/mgr/PyModuleRunner.cc index 9e5a0da32dd..81d74473504 100644 --- a/src/mgr/PyModuleRunner.cc +++ b/src/mgr/PyModuleRunner.cc @@ -70,6 +70,9 @@ int PyModuleRunner::serve() << ": " << exc_msg; derr << get_name() << ".serve:" << dendl; derr << handle_pyerror() << dendl; + + py_module->fail(exc_msg); + return -EINVAL; } diff --git a/src/mon/MgrMap.h b/src/mon/MgrMap.h index 79d5b3b0c22..15d5c0cb291 100644 --- a/src/mon/MgrMap.h +++ b/src/mon/MgrMap.h @@ -28,7 +28,7 @@ public: std::set available_modules; StandbyInfo(uint64_t gid_, const std::string &name_, - std::set& am) + const std::set& am) : gid(gid_), name(name_), available_modules(am) {} diff --git a/src/pybind/mgr/influx/module.py b/src/pybind/mgr/influx/module.py index 890dc2360cd..dd288dc718f 100644 --- a/src/pybind/mgr/influx/module.py +++ b/src/pybind/mgr/influx/module.py @@ -58,6 +58,13 @@ class Module(MgrModule): def get_fsid(self): return self.get('mon_map')['fsid'] + @staticmethod + def can_run(): + if InfluxDBClient is not None: + return True, "" + else: + return False, "influxdb python module not found" + def get_latest(self, daemon_type, daemon_name, stat): data = self.get_counter(daemon_type, daemon_name, stat)[stat] if data: diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py index adb59a0cff0..ea72df830a0 100644 --- a/src/pybind/mgr/mgr_module.py +++ b/src/pybind/mgr/mgr_module.py @@ -633,3 +633,19 @@ class MgrModule(ceph_module.BaseMgrModule): self._rados.connect() return self._rados + + @staticmethod + def can_run(): + """ + Implement this function to report whether the module's dependencies + are met. For example, if the module needs to import a particular + dependency to work, then use a try/except around the import at + file scope, and then report here if the import failed. + + This will be called in a blocking way from the C++ code, so do not + do any I/O that could block in this function. + + :return a 2-tuple consisting of a boolean and explanatory string + """ + + return True, "" -- 2.39.5