From f22347db5509dc2da3cac6bc9f7a9b627798d3ac Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Thu, 24 Jan 2019 11:04:24 +0200 Subject: [PATCH] mgr: load modules in finisher to avoid potential lock cycles Fixes: https://tracker.ceph.com/issues/37997 Signed-off-by: Mykola Golub --- src/mgr/ActivePyModules.cc | 35 +++++++++++++----------- src/mgr/ActivePyModules.h | 2 +- src/mgr/MgrStandby.cc | 53 ++++++++++++++++++++++--------------- src/mgr/MgrStandby.h | 1 + src/mgr/PyModuleRegistry.cc | 18 +++---------- src/mgr/PyModuleRegistry.h | 2 +- src/mgr/StandbyPyModules.cc | 50 ++++++++++++++++++---------------- src/mgr/StandbyPyModules.h | 9 +++++-- 8 files changed, 91 insertions(+), 79 deletions(-) diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc index 0e72ad75c8f..1f185714972 100644 --- a/src/mgr/ActivePyModules.cc +++ b/src/mgr/ActivePyModules.cc @@ -382,27 +382,30 @@ PyObject *ActivePyModules::get_python(const std::string &what) } } -int ActivePyModules::start_one(PyModuleRef py_module) +void ActivePyModules::start_one(PyModuleRef py_module) { std::lock_guard l(lock); ceph_assert(modules.count(py_module->get_name()) == 0); - modules[py_module->get_name()].reset(new ActivePyModule(py_module, clog)); - auto active_module = modules.at(py_module->get_name()).get(); - - int r = active_module->load(this); - if (r != 0) { - // the class instance wasn't created... remove it from the set of activated - // modules so commands and notifications aren't delivered. - modules.erase(py_module->get_name()); - return r; - } else { - dout(4) << "Starting thread for " << py_module->get_name() << dendl; - active_module->thread.create(active_module->get_thread_name()); - - return 0; - } + const auto name = py_module->get_name(); + modules[name].reset(new ActivePyModule(py_module, clog)); + auto active_module = modules.at(name).get(); + + // Send all python calls down a Finisher to avoid blocking + // C++ code, and avoid any potential lock cycles. + finisher.queue(new FunctionContext([this, active_module, name](int) { + int r = active_module->load(this); + if (r != 0) { + derr << "Failed to run module in active mode ('" << name << "')" + << dendl; + std::lock_guard l(lock); + modules.erase(name); + } else { + dout(4) << "Starting thread for " << name << dendl; + active_module->thread.create(active_module->get_thread_name()); + } + })); } void ActivePyModules::shutdown() diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h index df1477f8f35..d1761283334 100644 --- a/src/mgr/ActivePyModules.h +++ b/src/mgr/ActivePyModules.h @@ -161,7 +161,7 @@ public: int init(); void shutdown(); - int start_one(PyModuleRef py_module); + void start_one(PyModuleRef py_module); void dump_server(const std::string &hostname, const DaemonStateCollection &dmc, diff --git a/src/mgr/MgrStandby.cc b/src/mgr/MgrStandby.cc index 7bfe8e4772d..f055ce28837 100644 --- a/src/mgr/MgrStandby.cc +++ b/src/mgr/MgrStandby.cc @@ -53,6 +53,7 @@ MgrStandby::MgrStandby(int argc, const char **argv) : clog(log_client.create_channel(CLOG_CHANNEL_CLUSTER)), audit_clog(log_client.create_channel(CLOG_CHANNEL_AUDIT)), lock("MgrStandby::lock"), + finisher(g_ceph_context, "MgrStandby", "mgrsb-fin"), timer(g_ceph_context, lock), py_module_registry(clog), active_mgr(nullptr), @@ -107,6 +108,9 @@ int MgrStandby::init() std::lock_guard l(lock); + // Start finisher + finisher.start(); + // Initialize Messenger client_messenger->add_dispatcher_tail(this); client_messenger->add_dispatcher_head(&objecter); @@ -257,7 +261,6 @@ void MgrStandby::tick() void MgrStandby::handle_signal(int signum) { - std::lock_guard l(lock); ceph_assert(signum == SIGINT || signum == SIGTERM); derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; shutdown(); @@ -265,29 +268,35 @@ void MgrStandby::handle_signal(int signum) void MgrStandby::shutdown() { - // Expect already to be locked as we're called from signal handler - ceph_assert(lock.is_locked_by_me()); - - dout(4) << "Shutting down" << dendl; + finisher.queue(new FunctionContext([&](int) { + std::lock_guard l(lock); + + dout(4) << "Shutting down" << dendl; + + // stop sending beacon first, i use monc to talk with monitors + timer.shutdown(); + // client uses monc and objecter + client.shutdown(); + mgrc.shutdown(); + // stop monc, so mon won't be able to instruct me to shutdown/activate after + // the active_mgr is stopped + monc.shutdown(); + if (active_mgr) { + active_mgr->shutdown(); + } - // stop sending beacon first, i use monc to talk with monitors - timer.shutdown(); - // client uses monc and objecter - client.shutdown(); - mgrc.shutdown(); - // stop monc, so mon won't be able to instruct me to shutdown/activate after - // the active_mgr is stopped - monc.shutdown(); - if (active_mgr) { - active_mgr->shutdown(); - } + py_module_registry.shutdown(); - py_module_registry.shutdown(); + // objecter is used by monc and active_mgr + objecter.shutdown(); + // client_messenger is used by all of them, so stop it in the end + client_messenger->shutdown(); + })); - // objecter is used by monc and active_mgr - objecter.shutdown(); - // client_messenger is used by all of them, so stop it in the end - client_messenger->shutdown(); + // Then stop the finisher to ensure its enqueued contexts aren't going + // to touch references to the things we're about to tear down + finisher.wait_for_empty(); + finisher.stop(); } void MgrStandby::respawn() @@ -405,7 +414,7 @@ void MgrStandby::handle_mgr_map(MMgrMap* mmap) // I am the standby and someone else is active, start modules // in standby mode to do redirects if needed if (!py_module_registry.is_standby_running()) { - py_module_registry.standby_start(monc); + py_module_registry.standby_start(monc, finisher); } } } diff --git a/src/mgr/MgrStandby.h b/src/mgr/MgrStandby.h index cdbd572e2ec..7adab68d701 100644 --- a/src/mgr/MgrStandby.h +++ b/src/mgr/MgrStandby.h @@ -50,6 +50,7 @@ protected: LogChannelRef clog, audit_clog; Mutex lock; + Finisher finisher; SafeTimer timer; PyModuleRegistry py_module_registry; diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index 5ef411a5d55..d0b3c87e5b4 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -128,7 +128,7 @@ bool PyModuleRegistry::handle_mgr_map(const MgrMap &mgr_map_) -void PyModuleRegistry::standby_start(MonClient &mc) +void PyModuleRegistry::standby_start(MonClient &mc, Finisher &f) { std::lock_guard l(lock); ceph_assert(active_modules == nullptr); @@ -141,7 +141,7 @@ void PyModuleRegistry::standby_start(MonClient &mc) dout(4) << "Starting modules in standby mode" << dendl; standby_modules.reset(new StandbyPyModules( - mgr_map, module_config, clog, mc)); + mgr_map, module_config, clog, mc, f)); std::set failed_modules; for (const auto &i : modules) { @@ -155,13 +155,7 @@ void PyModuleRegistry::standby_start(MonClient &mc) if (i.second->pStandbyClass) { dout(4) << "starting module " << i.second->get_name() << dendl; - int r = standby_modules->start_one(i.second); - if (r != 0) { - derr << "failed to start module '" << i.second->get_name() - << "'" << dendl;; - failed_modules.insert(i.second->get_name()); - // Continue trying to load any other modules - } + standby_modules->start_one(i.second); } else { dout(4) << "skipping module '" << i.second->get_name() << "' because " "it does not implement a standby mode" << dendl; @@ -210,11 +204,7 @@ void PyModuleRegistry::active_start( } dout(4) << "Starting " << i.first << dendl; - int r = active_modules->start_one(i.second); - if (r != 0) { - derr << "Failed to run module in active mode ('" << i.first << "')" - << dendl; - } + active_modules->start_one(i.second); } } diff --git a/src/mgr/PyModuleRegistry.h b/src/mgr/PyModuleRegistry.h index 6868aec65bc..eccc2bfb0f6 100644 --- a/src/mgr/PyModuleRegistry.h +++ b/src/mgr/PyModuleRegistry.h @@ -99,7 +99,7 @@ public: MonClient &mc, LogChannelRef clog_, LogChannelRef audit_clog_, Objecter &objecter_, Client &client_, Finisher &f, DaemonServer &server); - void standby_start(MonClient &mc); + void standby_start(MonClient &mc, Finisher &f); bool is_standby_running() const { diff --git a/src/mgr/StandbyPyModules.cc b/src/mgr/StandbyPyModules.cc index 0682a268119..988107190b3 100644 --- a/src/mgr/StandbyPyModules.cc +++ b/src/mgr/StandbyPyModules.cc @@ -13,6 +13,7 @@ #include "StandbyPyModules.h" +#include "common/Finisher.h" #include "common/debug.h" #include "common/errno.h" @@ -36,9 +37,11 @@ StandbyPyModules::StandbyPyModules( const MgrMap &mgr_map_, PyModuleConfig &module_config, LogChannelRef clog_, - MonClient &monc_) + MonClient &monc_, + Finisher &f) : state(module_config, monc_), - clog(clog_) + clog(clog_), + finisher(f) { state.set_mgr_map(mgr_map_); } @@ -72,29 +75,30 @@ void StandbyPyModules::shutdown() modules.clear(); } -int StandbyPyModules::start_one(PyModuleRef py_module) +void StandbyPyModules::start_one(PyModuleRef py_module) { std::lock_guard l(lock); - const std::string &module_name = py_module->get_name(); - - ceph_assert(modules.count(module_name) == 0); - - modules[module_name].reset(new StandbyPyModule( - state, - py_module, clog)); - - int r = modules[module_name]->load(); - if (r != 0) { - modules.erase(module_name); - return r; - } else { - dout(4) << "Starting thread for " << module_name << dendl; - // Giving Thread the module's module_name member as its - // char* thread name: thread must not outlive module class lifetime. - modules[module_name]->thread.create( - modules[module_name]->get_name().c_str()); - return 0; - } + const auto name = py_module->get_name(); + + ceph_assert(modules.count(name) == 0); + + modules[name].reset(new StandbyPyModule(state, py_module, clog)); + auto standby_module = modules.at(name).get(); + + // Send all python calls down a Finisher to avoid blocking + // C++ code, and avoid any potential lock cycles. + finisher.queue(new FunctionContext([this, standby_module, name](int) { + int r = standby_module->load(); + if (r != 0) { + derr << "Failed to run module in standby mode ('" << name << "')" + << dendl; + std::lock_guard l(lock); + modules.erase(name); + } else { + dout(4) << "Starting thread for " << name << dendl; + standby_module->thread.create(standby_module->get_thread_name()); + } + })); } int StandbyPyModule::load() diff --git a/src/mgr/StandbyPyModules.h b/src/mgr/StandbyPyModules.h index 02d26800cd1..0de0b07eb61 100644 --- a/src/mgr/StandbyPyModules.h +++ b/src/mgr/StandbyPyModules.h @@ -26,6 +26,8 @@ #include "mon/MgrMap.h" #include "mgr/PyModuleRunner.h" +class Finisher; + /** * State that is read by all modules running in standby mode */ @@ -105,15 +107,18 @@ private: LogChannelRef clog; + Finisher &finisher; + public: StandbyPyModules( const MgrMap &mgr_map_, PyModuleConfig &module_config, LogChannelRef clog_, - MonClient &monc); + MonClient &monc, + Finisher &f); - int start_one(PyModuleRef py_module); + void start_one(PyModuleRef py_module); void shutdown(); -- 2.39.5