From: Sage Weil Date: Fri, 18 Jun 2021 21:02:40 +0000 (-0400) Subject: mgr: generate crash dump for python exceptions X-Git-Tag: v17.1.0~1552^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=719d5e1836c95bcbc446dbeb7d8f03adefd02a3e;p=ceph.git mgr: generate crash dump for python exceptions Extend handle_pyerror() to generate a crash dump. Pass some additional context through from the callers (including the ability to not generate a crash dump in the CLI handler case). Extra crash dump fields look like so: "backtrace": [ " File \"/home/sage/src/ceph/src/pybind/mgr/balancer/module.py\", line 652, in serve\n self.ifail()", " File \"/home/sage/src/ceph/src/pybind/mgr/balancer/module.py\", line 648, in ifail\n raise RuntimeError('test')", ], "mgr_module": "balancer", "mgr_module_caller": "PyModuleRunner::serve", "mgr_python_exception": "RuntimeError", Notably, the backtrace deliberately excludes the 'value' of the exception, as that may leak identifying information about the system. Instead, we only include the exception *type* and the portion of the traceback that identifies the call path (where in the code we crashed). Also note: a side-effect of this change is that module exceptions will trigger cluster health warnings about daemon crashes. Signed-off-by: Sage Weil --- diff --git a/src/mgr/ActivePyModule.cc b/src/mgr/ActivePyModule.cc index c776acfd03b..20cf1b945fa 100644 --- a/src/mgr/ActivePyModule.cc +++ b/src/mgr/ActivePyModule.cc @@ -42,7 +42,7 @@ int ActivePyModule::load(ActivePyModules *py_modules) Py_DECREF(pArgs); if (pClassInstance == nullptr) { derr << "Failed to construct class in '" << get_name() << "'" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, get_name(), "ActivePyModule::load") << dendl; return -EINVAL; } else { dout(1) << "Constructed class from module: " << get_name() << dendl; @@ -71,7 +71,7 @@ void ActivePyModule::notify(const std::string ¬ify_type, const std::string &n Py_DECREF(pValue); } else { derr << get_name() << ".notify:" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, get_name(), "ActivePyModule::notify") << dendl; // FIXME: callers can't be expected to handle a python module // that has spontaneously broken, but Mgr() should provide // a hook to unload misbehaving modules when they have an @@ -104,7 +104,7 @@ void ActivePyModule::notify_clog(const LogEntry &log_entry) Py_DECREF(pValue); } else { derr << get_name() << ".notify_clog:" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, get_name(), "ActivePyModule::notify_clog") << dendl; // FIXME: callers can't be expected to handle a python module // that has spontaneously broken, but Mgr() should provide // a hook to unload misbehaving modules when they have an @@ -159,7 +159,8 @@ PyObject *ActivePyModule::dispatch_remote( // Because the caller is in a different context, we can't let this // exception bubble up, need to re-raise it from the caller's // context later. - *err = handle_pyerror(); + std::string caller = "ActivePyModule::dispatch_remote "s + method; + *err = handle_pyerror(true, get_name(), caller); } else { dout(20) << "Success calling '" << method << "'" << dendl; } diff --git a/src/mgr/ActivePyModule.h b/src/mgr/ActivePyModule.h index 1cbf6d18ac2..13c1c200429 100644 --- a/src/mgr/ActivePyModule.h +++ b/src/mgr/ActivePyModule.h @@ -1,4 +1,3 @@ - // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab /* @@ -25,6 +24,7 @@ #include "mgr/Gil.h" #include "PyModuleRunner.h" +#include "PyModule.h" #include #include @@ -98,5 +98,4 @@ public: }; -std::string handle_pyerror(); diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc index eeb2d8cba58..f4ee27e8c31 100644 --- a/src/mgr/ActivePyModules.cc +++ b/src/mgr/ActivePyModules.cc @@ -987,7 +987,8 @@ PyObject *construct_with_capsule( PyObject *module = PyImport_ImportModule(module_name.c_str()); if (!module) { derr << "Failed to import python module:" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, module_name, + "construct_with_capsule "s + module_name + " " + clsname) << dendl; } ceph_assert(module); @@ -995,7 +996,8 @@ PyObject *construct_with_capsule( module, (const char*)clsname.c_str()); if (!wrapper_type) { derr << "Failed to get python type:" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, module_name, + "construct_with_capsule "s + module_name + " " + clsname) << dendl; } ceph_assert(wrapper_type); @@ -1008,7 +1010,8 @@ PyObject *construct_with_capsule( auto wrapper_instance = PyObject_CallObject(wrapper_type, pArgs); if (wrapper_instance == nullptr) { derr << "Failed to construct python OSDMap:" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, module_name, + "construct_with_capsule "s + module_name + " " + clsname) << dendl; } ceph_assert(wrapper_instance != nullptr); Py_DECREF(pArgs); diff --git a/src/mgr/PyModule.cc b/src/mgr/PyModule.cc index ff1ff85e7ea..fc03d68df1f 100644 --- a/src/mgr/PyModule.cc +++ b/src/mgr/PyModule.cc @@ -19,6 +19,10 @@ #include "PyModule.h" +#include "include/stringify.h" +#include "common/BackTrace.h" +#include "global/signal_handler.h" + #include "common/debug.h" #include "common/errno.h" #define dout_context g_ceph_context @@ -40,49 +44,83 @@ std::string PyModule::mgr_store_prefix = "mgr/"; #undef BOOST_BIND_GLOBAL_PLACEHOLDERS #include #include "include/ceph_assert.h" // boost clobbers this + + // decode a Python exception into a string -std::string handle_pyerror() +std::string handle_pyerror( + bool crash_dump, + std::string module, + std::string caller) { - using namespace boost::python; - using namespace boost; - - PyObject *exc, *val, *tb; - object formatted_list, formatted; - PyErr_Fetch(&exc, &val, &tb); - PyErr_NormalizeException(&exc, &val, &tb); - handle<> hexc(exc), hval(allow_null(val)), htb(allow_null(tb)); - object traceback(import("traceback")); - if (!tb) { - object format_exception_only(traceback.attr("format_exception_only")); - try { - formatted_list = format_exception_only(hexc, hval); - } catch (error_already_set const &) { - // error while processing exception object - // returning only the exception string value - PyObject *name_attr = PyObject_GetAttrString(exc, "__name__"); - std::stringstream ss; - ss << PyUnicode_AsUTF8(name_attr) << ": " << PyUnicode_AsUTF8(val); - Py_XDECREF(name_attr); - ss << "\nError processing exception object: " << peek_pyerror(); - return ss.str(); - } - } else { - object format_exception(traceback.attr("format_exception")); - try { - formatted_list = format_exception(hexc, hval, htb); - } catch (error_already_set const &) { - // error while processing exception object - // returning only the exception string value - PyObject *name_attr = PyObject_GetAttrString(exc, "__name__"); - std::stringstream ss; - ss << PyUnicode_AsUTF8(name_attr) << ": " << PyUnicode_AsUTF8(val); - Py_XDECREF(name_attr); - ss << "\nError processing exception object: " << peek_pyerror(); - return ss.str(); - } + using namespace boost::python; + using namespace boost; + + PyObject *exc, *val, *tb; + object formatted_list, formatted; + PyErr_Fetch(&exc, &val, &tb); + PyErr_NormalizeException(&exc, &val, &tb); + handle<> hexc(exc), hval(allow_null(val)), htb(allow_null(tb)); + + object traceback(import("traceback")); + if (!tb) { + object format_exception_only(traceback.attr("format_exception_only")); + try { + formatted_list = format_exception_only(hexc, hval); + } catch (error_already_set const &) { + // error while processing exception object + // returning only the exception string value + PyObject *name_attr = PyObject_GetAttrString(exc, "__name__"); + std::stringstream ss; + ss << PyUnicode_AsUTF8(name_attr) << ": " << PyUnicode_AsUTF8(val); + Py_XDECREF(name_attr); + ss << "\nError processing exception object: " << peek_pyerror(); + return ss.str(); } - formatted = str("").join(formatted_list); - return extract(formatted); + } else { + object format_exception(traceback.attr("format_exception")); + try { + formatted_list = format_exception(hexc, hval, htb); + } catch (error_already_set const &) { + // error while processing exception object + // returning only the exception string value + PyObject *name_attr = PyObject_GetAttrString(exc, "__name__"); + std::stringstream ss; + ss << PyUnicode_AsUTF8(name_attr) << ": " << PyUnicode_AsUTF8(val); + Py_XDECREF(name_attr); + ss << "\nError processing exception object: " << peek_pyerror(); + return ss.str(); + } + } + formatted = str("").join(formatted_list); + + if (!module.empty()) { + std::list bt_strings; + std::map extra; + + extra["mgr_module"] = module; + extra["mgr_module_caller"] = caller; + PyObject *name_attr = PyObject_GetAttrString(exc, "__name__"); + extra["mgr_python_exception"] = stringify(PyUnicode_AsUTF8(name_attr)); + Py_XDECREF(name_attr); + + PyObject *l = get_managed_object(formatted_list, boost::python::tag); + if (PyList_Check(l)) { + // skip first line, which is: "Traceback (most recent call last):\n" + // omit last line, which contains a runtime value that may be identifying! + for (unsigned i = 1; i < PyList_Size(l) - 1; ++i) { + PyObject *val = PyList_GET_ITEM(l, i); + std::string s = PyUnicode_AsUTF8(val); + s.resize(s.size() - 1); // strip off newline character + bt_strings.push_back(s); + } + } + PyBackTrace bt(bt_strings); + + char crash_path[PATH_MAX]; + generate_crash_dump(crash_path, bt, &extra); + } + + return extract(formatted); } /** @@ -405,7 +443,7 @@ int PyModule::load(PyThreadState *pMainThreadState) Py_DECREF(pCanRunTuple); } else { derr << "Exception calling can_run on " << get_name() << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, get_name(), "PyModule::load") << dendl; can_run = false; } } @@ -464,7 +502,7 @@ int PyModule::register_options(PyObject *cls) } else { derr << "Exception calling _register_options on " << get_name() << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, module_name, "PyModule::register_options") << dendl; } return 0; } @@ -479,7 +517,7 @@ int PyModule::load_commands() } else { derr << "Exception calling _register_commands on " << get_name() << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, module_name, "PyModule::load_commands") << dendl; } int r = walk_dict_list("COMMANDS", [this](PyObject *pCommand) -> int { @@ -633,7 +671,7 @@ int PyModule::load_subclass_of(const char* base_class, PyObject** py_class) if (!mgr_module) { error_string = peek_pyerror(); derr << "Module not found: 'mgr_module'" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, module_name, "PyModule::load_subclass_of") << dendl; return -EINVAL; } auto mgr_module_type = PyObject_GetAttrString(mgr_module, base_class); @@ -641,7 +679,7 @@ int PyModule::load_subclass_of(const char* base_class, PyObject** py_class) if (!mgr_module_type) { error_string = peek_pyerror(); derr << "Unable to import MgrModule from mgr_module" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, module_name, "PyModule::load_subclass_of") << dendl; return -EINVAL; } @@ -650,7 +688,7 @@ int PyModule::load_subclass_of(const char* base_class, PyObject** py_class) if (!plugin_module) { error_string = peek_pyerror(); derr << "Module not found: '" << module_name << "'" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, module_name, "PyModule::load_subclass_of") << dendl; return -ENOENT; } auto locals = PyModule_GetDict(plugin_module); diff --git a/src/mgr/PyModule.h b/src/mgr/PyModule.h index 64fa373b8bf..037a7a22b00 100644 --- a/src/mgr/PyModule.h +++ b/src/mgr/PyModule.h @@ -26,7 +26,9 @@ class MonClient; -std::string handle_pyerror(); +std::string handle_pyerror(bool generate_crash_dump = false, + std::string module = {}, + std::string caller = {}); std::string peek_pyerror(); diff --git a/src/mgr/PyModuleRunner.cc b/src/mgr/PyModuleRunner.cc index e27f7f40551..57c90fdab57 100644 --- a/src/mgr/PyModuleRunner.cc +++ b/src/mgr/PyModuleRunner.cc @@ -63,7 +63,7 @@ int PyModuleRunner::serve() << "' while running on mgr." << g_conf()->name.get_id() << ": " << exc_msg; derr << get_name() << ".serve:" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, get_name(), "PyModuleRunner::serve") << dendl; py_module->fail(exc_msg); @@ -86,7 +86,7 @@ void PyModuleRunner::shutdown() Py_DECREF(pValue); } else { derr << "Failed to invoke shutdown() on " << get_name() << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, get_name(), "PyModuleRunner::shutdown") << dendl; } dead = true; diff --git a/src/mgr/StandbyPyModules.cc b/src/mgr/StandbyPyModules.cc index 86ee8550cad..337aab02927 100644 --- a/src/mgr/StandbyPyModules.cc +++ b/src/mgr/StandbyPyModules.cc @@ -114,7 +114,7 @@ int StandbyPyModule::load() Py_DECREF(pArgs); if (pClassInstance == nullptr) { derr << "Failed to construct class in '" << get_name() << "'" << dendl; - derr << handle_pyerror() << dendl; + derr << handle_pyerror(true, get_name(), "StandbyPyModule::load") << dendl; return -EINVAL; } else { dout(1) << "Constructed class from module: " << get_name() << dendl;