From 30a61c18ac0af459db7e71d62a7d7f198874dacb Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Wed, 13 Nov 2019 11:00:44 -0800 Subject: [PATCH] mgr: handle race with finisher after shutdown The module is deleted when modules is cleared. The notify method is called on a destructed class. Fixes: https://tracker.ceph.com/issues/42744 Signed-off-by: Patrick Donnelly (cherry picked from commit 40c4b9ac9d8e2b2c17269affada304f5e1554974) Conflicts: src/mgr/ActivePyModules.cc - nautilus has capitalized lock.Lock and lock.Unlock - finisher.queue calls look different in nautilus --- src/mgr/ActivePyModule.cc | 19 ++++++++++++++++ src/mgr/ActivePyModules.cc | 45 ++++++++++++++++++-------------------- src/mgr/ActivePyModules.h | 2 +- src/mgr/PyModuleRunner.cc | 2 ++ src/mgr/PyModuleRunner.h | 5 +++++ 5 files changed, 48 insertions(+), 25 deletions(-) diff --git a/src/mgr/ActivePyModule.cc b/src/mgr/ActivePyModule.cc index 402c7cad3a0cc..852c17be81dc9 100644 --- a/src/mgr/ActivePyModule.cc +++ b/src/mgr/ActivePyModule.cc @@ -53,6 +53,11 @@ int ActivePyModule::load(ActivePyModules *py_modules) void ActivePyModule::notify(const std::string ¬ify_type, const std::string ¬ify_id) { + if (is_dead()) { + dout(5) << "cancelling notify " << notify_type << " " << notify_id << dendl; + return; + } + ceph_assert(pClassInstance != nullptr); Gil gil(py_module->pMyThreadState, true); @@ -76,6 +81,11 @@ void ActivePyModule::notify(const std::string ¬ify_type, const std::string &n void ActivePyModule::notify_clog(const LogEntry &log_entry) { + if (is_dead()) { + dout(5) << "cancelling notify_clog" << dendl; + return; + } + ceph_assert(pClassInstance != nullptr); Gil gil(py_module->pMyThreadState, true); @@ -159,6 +169,11 @@ PyObject *ActivePyModule::dispatch_remote( void ActivePyModule::config_notify() { + if (is_dead()) { + dout(5) << "cancelling config_notify" << dendl; + return; + } + Gil gil(py_module->pMyThreadState, true); dout(20) << "Calling " << py_module->get_name() << ".config_notify..." << dendl; @@ -234,6 +249,10 @@ int ActivePyModule::handle_command( void ActivePyModule::get_health_checks(health_check_map_t *checks) { + if (is_dead()) { + dout(5) << "cancelling get_health_checks" << dendl; + return; + } checks->merge(health_checks); } diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc index 71a6356813e8c..5ca7deb3da478 100644 --- a/src/mgr/ActivePyModules.cc +++ b/src/mgr/ActivePyModules.cc @@ -435,11 +435,11 @@ void ActivePyModules::start_one(PyModuleRef py_module) { std::lock_guard l(lock); - ceph_assert(modules.count(py_module->get_name()) == 0); - const auto name = py_module->get_name(); - modules[name].reset(new ActivePyModule(py_module, clog)); - auto active_module = modules.at(name).get(); + auto em = modules.emplace(name, + std::make_shared(py_module, clog)); + ceph_assert(em.second); // actually inserted + auto& active_module = em.first->second; // Send all python calls down a Finisher to avoid blocking // C++ code, and avoid any potential lock cycles. @@ -462,10 +462,7 @@ void ActivePyModules::shutdown() std::lock_guard locker(lock); // Signal modules to drop out of serve() and/or tear down resources - for (auto &i : modules) { - auto module = i.second.get(); - const auto& name = i.first; - + for (auto& [name, module] : modules) { lock.Unlock(); dout(10) << "calling module " << name << " shutdown()" << dendl; module->shutdown(); @@ -475,11 +472,11 @@ void ActivePyModules::shutdown() // For modules implementing serve(), finish the threads where we // were running that. - for (auto &i : modules) { + for (auto& [name, module] : modules) { lock.Unlock(); - dout(10) << "joining module " << i.first << dendl; - i.second->thread.join(); - dout(10) << "joined module " << i.first << dendl; + dout(10) << "joining module " << name << dendl; + module->thread.join(); + dout(10) << "joined module " << name << dendl; lock.Lock(); } @@ -495,10 +492,10 @@ void ActivePyModules::notify_all(const std::string ¬ify_type, std::lock_guard l(lock); dout(10) << __func__ << ": notify_all " << notify_type << dendl; - for (auto& i : modules) { - auto module = i.second.get(); + for (auto& [name, module] : modules) { // Send all python calls down a Finisher to avoid blocking // C++ code, and avoid any potential lock cycles. + dout(15) << "queuing notify to " << name << dendl; finisher.queue(new FunctionContext([module, notify_type, notify_id](int r){ module->notify(notify_type, notify_id); })); @@ -510,14 +507,14 @@ void ActivePyModules::notify_all(const LogEntry &log_entry) std::lock_guard l(lock); dout(10) << __func__ << ": notify_all (clog)" << dendl; - for (auto& i : modules) { - auto module = i.second.get(); + for (auto& [name, module] : modules) { // Send all python calls down a Finisher to avoid blocking // C++ code, and avoid any potential lock cycles. // // Note intentional use of non-reference lambda binding on // log_entry: we take a copy because caller's instance is // probably ephemeral. + dout(15) << "queuing notify (clog) to " << name << dendl; finisher.queue(new FunctionContext([module, log_entry](int r){ module->notify_clog(log_entry); })); @@ -689,11 +686,10 @@ std::map ActivePyModules::get_services() const { std::map result; std::lock_guard l(lock); - for (const auto& i : modules) { - const auto &module = i.second.get(); + for (const auto& [name, module] : modules) { std::string svc_str = module->get_uri(); if (!svc_str.empty()) { - result[module->get_name()] = svc_str; + result[name] = svc_str; } } @@ -974,8 +970,9 @@ int ActivePyModules::handle_command( void ActivePyModules::get_health_checks(health_check_map_t *checks) { std::lock_guard l(lock); - for (auto& p : modules) { - p.second->get_health_checks(checks); + for (auto& [name, module] : modules) { + dout(15) << "getting health checks for" << name << dendl; + module->get_health_checks(checks); } } @@ -1011,10 +1008,10 @@ void ActivePyModules::get_progress_events(std::map *e void ActivePyModules::config_notify() { std::lock_guard l(lock); - for (auto& i : modules) { - auto module = i.second.get(); + for (auto& [name, module] : modules) { // Send all python calls down a Finisher to avoid blocking // C++ code, and avoid any potential lock cycles. + dout(15) << "notify (config) " << name << dendl; finisher.queue(new FunctionContext([module](int r){ module->config_notify(); })); @@ -1028,7 +1025,7 @@ void ActivePyModules::set_uri(const std::string& module_name, dout(4) << " module " << module_name << " set URI '" << uri << "'" << dendl; - modules[module_name]->set_uri(uri); + modules.at(module_name)->set_uri(uri); } OSDPerfMetricQueryID ActivePyModules::add_osd_perf_query( diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h index ab9b637d9ee9d..58b9434fc5e37 100644 --- a/src/mgr/ActivePyModules.h +++ b/src/mgr/ActivePyModules.h @@ -39,7 +39,7 @@ class PyModuleRegistry; class ActivePyModules { - std::map> modules; + std::map> modules; PyModuleConfig &module_config; std::map store_cache; DaemonStateIndex &daemon_state; diff --git a/src/mgr/PyModuleRunner.cc b/src/mgr/PyModuleRunner.cc index 403c8a9f183f3..cde54f215543e 100644 --- a/src/mgr/PyModuleRunner.cc +++ b/src/mgr/PyModuleRunner.cc @@ -88,6 +88,8 @@ void PyModuleRunner::shutdown() derr << "Failed to invoke shutdown() on " << get_name() << dendl; derr << handle_pyerror() << dendl; } + + dead = true; } void PyModuleRunner::log(int level, const std::string &record) diff --git a/src/mgr/PyModuleRunner.h b/src/mgr/PyModuleRunner.h index 08c85a1326360..52f60be55f88d 100644 --- a/src/mgr/PyModuleRunner.h +++ b/src/mgr/PyModuleRunner.h @@ -47,6 +47,8 @@ protected: void *entry() override; }; + bool is_dead() const { return dead; } + std::string thread_name; public: @@ -79,6 +81,9 @@ public: PyModuleRunnerThread thread; std::string const &get_name() const { return py_module->get_name(); } + +private: + bool dead = false; }; -- 2.39.5