From f9a0425d19354d858f65f0d9c5bfcce90c6f70c8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 18 Jun 2021 16:58:58 -0400 Subject: [PATCH] global/signal_handler: expose function to generate crash dump Signed-off-by: Sage Weil --- src/global/signal_handler.cc | 115 ++++++++++++++++++++--------------- src/global/signal_handler.h | 9 +++ 2 files changed, 74 insertions(+), 50 deletions(-) diff --git a/src/global/signal_handler.cc b/src/global/signal_handler.cc index a015ee9f31f..f4e41a5cf97 100644 --- a/src/global/signal_handler.cc +++ b/src/global/signal_handler.cc @@ -145,55 +145,14 @@ static int parse_from_os_release( return 0; } -static void handle_oneshot_fatal_signal(int signum) -{ - constexpr static pid_t NULL_TID{0}; - static std::atomic handler_tid{NULL_TID}; - if (auto expected{NULL_TID}; - !handler_tid.compare_exchange_strong(expected, ceph_gettid())) { - if (expected == ceph_gettid()) { - // The handler code may itself trigger a SIGSEGV if the heap is corrupt. - // In that case, SIG_DFL followed by return specifies that the default - // signal handler -- presumably dump core -- will handle it. - signal(signum, SIG_DFL); - } else { - // Huh, another thread got into troubles while we are handling the fault. - // If this is i.e. SIGSEGV handler, returning means retrying the faulty - // instruction one more time, and thus all those extra threads will run - // into a busy-wait basically. - } - return; - } - - char buf[1024]; - char pthread_name[16] = {0}; //limited by 16B include terminating null byte. - int r = ceph_pthread_getname(pthread_self(), pthread_name, sizeof(pthread_name)); - (void)r; -#if defined(__sun) - char message[SIG2STR_MAX]; - sig2str(signum,message); - snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n " - "in thread %llx thread_name:%s\n", message, (unsigned long long)pthread_self(), - pthread_name); -#else - snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n " - "in thread %llx thread_name:%s\n", sig_str(signum), (unsigned long long)pthread_self(), - pthread_name); -#endif - dout_emergency(buf); - pidfile_remove(); - // TODO: don't use an ostringstream here. It could call malloc(), which we - // don't want inside a signal handler. - // Also fix the backtrace code not to allocate memory. - BackTrace bt(1); - ostringstream oss; - bt.print(oss); - dout_emergency(oss.str()); - - char base[PATH_MAX] = { 0 }; +void generate_crash_dump(char *base, + const BackTrace& bt, + std::map *extra) +{ if (g_ceph_context && g_ceph_context->_conf->crash_dir.size()) { + // -- crash dump -- // id ostringstream idss; @@ -205,7 +164,7 @@ static void handle_oneshot_fatal_signal(int signum) string id = idss.str(); std::replace(id.begin(), id.end(), ' ', '_'); - snprintf(base, sizeof(base), "%s/%s", + snprintf(base, PATH_MAX, "%s/%s", g_ceph_context->_conf->crash_dir.c_str(), id.c_str()); int r = ::mkdir(base, 0700); @@ -300,9 +259,14 @@ static void handle_oneshot_fatal_signal(int signum) } } - // backtrace bt.dump(&jf); + if (extra) { + for (auto& i : *extra) { + jf.dump_string(i.first, i.second); + } + } + jf.close_section(); ostringstream oss; jf.flush(oss); @@ -315,6 +279,57 @@ static void handle_oneshot_fatal_signal(int signum) ::creat(fn, 0444); } } +} + +static void handle_oneshot_fatal_signal(int signum) +{ + constexpr static pid_t NULL_TID{0}; + static std::atomic handler_tid{NULL_TID}; + if (auto expected{NULL_TID}; + !handler_tid.compare_exchange_strong(expected, ceph_gettid())) { + if (expected == ceph_gettid()) { + // The handler code may itself trigger a SIGSEGV if the heap is corrupt. + // In that case, SIG_DFL followed by return specifies that the default + // signal handler -- presumably dump core -- will handle it. + signal(signum, SIG_DFL); + } else { + // Huh, another thread got into troubles while we are handling the fault. + // If this is i.e. SIGSEGV handler, returning means retrying the faulty + // instruction one more time, and thus all those extra threads will run + // into a busy-wait basically. + } + return; + } + + char buf[1024]; + char pthread_name[16] = {0}; //limited by 16B include terminating null byte. + int r = ceph_pthread_getname(pthread_self(), pthread_name, sizeof(pthread_name)); + (void)r; +#if defined(__sun) + char message[SIG2STR_MAX]; + sig2str(signum,message); + snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n " + "in thread %llx thread_name:%s\n", message, (unsigned long long)pthread_self(), + pthread_name); +#else + snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n " + "in thread %llx thread_name:%s\n", sig_str(signum), (unsigned long long)pthread_self(), + pthread_name); +#endif + dout_emergency(buf); + pidfile_remove(); + + // TODO: don't use an ostringstream here. It could call malloc(), which we + // don't want inside a signal handler. + // Also fix the backtrace code not to allocate memory. + BackTrace bt(1); + ostringstream oss; + bt.print(oss); + dout_emergency(oss.str()); + + char crash_base[PATH_MAX] = { 0 }; + + generate_crash_dump(crash_base, bt); // avoid recursion back into logging code if that is where // we got the SEGV. @@ -331,9 +346,9 @@ static void handle_oneshot_fatal_signal(int signum) g_ceph_context->_log->dump_recent(); - if (base[0]) { + if (crash_base[0]) { char fn[PATH_MAX*2]; - snprintf(fn, sizeof(fn)-1, "%s/log", base); + snprintf(fn, sizeof(fn)-1, "%s/log", crash_base); g_ceph_context->_log->set_log_file(fn); g_ceph_context->_log->reopen_log_file(); g_ceph_context->_log->dump_recent(); diff --git a/src/global/signal_handler.h b/src/global/signal_handler.h index 166c1bb2066..c7cb84a1066 100644 --- a/src/global/signal_handler.h +++ b/src/global/signal_handler.h @@ -17,8 +17,13 @@ #include #include "acconfig.h" +#include +#include typedef void (*signal_handler_t)(int); +namespace ceph { + struct BackTrace; +} #if defined(HAVE_SIGDESCR_NP) # define sig_str(signum) sigdescr_np(signum) @@ -53,4 +58,8 @@ void register_async_signal_handler_oneshot(int signum, signal_handler_t handler) /// uninstall a safe async signal callback void unregister_async_signal_handler(int signum, signal_handler_t handler); +void generate_crash_dump(char *base, + const ceph::BackTrace& bt, + std::map *extra = 0); + #endif -- 2.39.5