global/signal_handler: expose function to generate crash dump

author Sage Weil <sage@newdream.net>

Fri, 18 Jun 2021 20:58:58 +0000 (16:58 -0400)

committer Sage Weil <sage@newdream.net>

Wed, 23 Jun 2021 17:00:49 +0000 (13:00 -0400)
author Sage Weil <sage@newdream.net>
Fri, 18 Jun 2021 20:58:58 +0000 (16:58 -0400)
committer Sage Weil <sage@newdream.net>
Wed, 23 Jun 2021 17:00:49 +0000 (13:00 -0400)
diff --git a/src/global/signal_handler.cc b/src/global/signal_handler.cc

index a015ee9f31f11e05ad4dfd6ac80a1d0ce488dd96..f4e41a5cf970237fb27d4b8d4a113802a6d8bda3 100644 (file)
--- a/src/global/signal_handler.cc
+++ b/src/global/signal_handler.cc
@@ -145,55 +145,14 @@ static int parse_from_os_release(
    return 0;
  }
  
-static void handle_oneshot_fatal_signal(int signum)
-{
-  constexpr static pid_t NULL_TID{0};
-  static std::atomic<pid_t> handler_tid{NULL_TID};
-  if (auto expected{NULL_TID};
-      !handler_tid.compare_exchange_strong(expected, ceph_gettid())) {
-    if (expected == ceph_gettid()) {
-      // The handler code may itself trigger a SIGSEGV if the heap is corrupt.
-      // In that case, SIG_DFL followed by return specifies that the default
-      // signal handler -- presumably dump core -- will handle it.
-      signal(signum, SIG_DFL);
-    } else {
-      // Huh, another thread got into troubles while we are handling the fault.
-      // If this is i.e. SIGSEGV handler, returning means retrying the faulty
-      // instruction one more time, and thus all those extra threads will run
-      // into a busy-wait basically.
-    }
-    return;
-  }
-
-  char buf[1024];
-  char pthread_name[16] = {0}; //limited by 16B include terminating null byte.
-  int r = ceph_pthread_getname(pthread_self(), pthread_name, sizeof(pthread_name));
-  (void)r;
-#if defined(__sun)
-  char message[SIG2STR_MAX];
-  sig2str(signum,message);
-  snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n "
-           "in thread %llx thread_name:%s\n", message, (unsigned long long)pthread_self(),
-           pthread_name);
-#else
-  snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n "
-           "in thread %llx thread_name:%s\n", sig_str(signum), (unsigned long long)pthread_self(),
-           pthread_name);
-#endif
-  dout_emergency(buf);
-  pidfile_remove();
  
-  // TODO: don't use an ostringstream here. It could call malloc(), which we
-  // don't want inside a signal handler.
-  // Also fix the backtrace code not to allocate memory.
-  BackTrace bt(1);
-  ostringstream oss;
-  bt.print(oss);
-  dout_emergency(oss.str());
-
-  char base[PATH_MAX] = { 0 };
+void generate_crash_dump(char *base,
+                        const BackTrace& bt,
+                        std::map<std::string,std::string> *extra)
+{
    if (g_ceph_context &&
        g_ceph_context->_conf->crash_dir.size()) {
+
      // -- crash dump --
      // id
      ostringstream idss;
@@ -205,7 +164,7 @@ static void handle_oneshot_fatal_signal(int signum)
      string id = idss.str();
      std::replace(id.begin(), id.end(), ' ', '_');
  
-    snprintf(base, sizeof(base), "%s/%s",
+    snprintf(base, PATH_MAX, "%s/%s",
              g_ceph_context->_conf->crash_dir.c_str(),
              id.c_str());
      int r = ::mkdir(base, 0700);
@@ -300,9 +259,14 @@ static void handle_oneshot_fatal_signal(int signum)
           }
         }
  
-       // backtrace
         bt.dump(&jf);
  
+       if (extra) {
+         for (auto& i : *extra) {
+           jf.dump_string(i.first, i.second);
+         }
+       }
+
         jf.close_section();
         ostringstream oss;
         jf.flush(oss);
@@ -315,6 +279,57 @@ static void handle_oneshot_fatal_signal(int signum)
        ::creat(fn, 0444);
      }
    }
+}
+
+static void handle_oneshot_fatal_signal(int signum)
+{
+  constexpr static pid_t NULL_TID{0};
+  static std::atomic<pid_t> handler_tid{NULL_TID};
+  if (auto expected{NULL_TID};
+      !handler_tid.compare_exchange_strong(expected, ceph_gettid())) {
+    if (expected == ceph_gettid()) {
+      // The handler code may itself trigger a SIGSEGV if the heap is corrupt.
+      // In that case, SIG_DFL followed by return specifies that the default
+      // signal handler -- presumably dump core -- will handle it.
+      signal(signum, SIG_DFL);
+    } else {
+      // Huh, another thread got into troubles while we are handling the fault.
+      // If this is i.e. SIGSEGV handler, returning means retrying the faulty
+      // instruction one more time, and thus all those extra threads will run
+      // into a busy-wait basically.
+    }
+    return;
+  }
+
+  char buf[1024];
+  char pthread_name[16] = {0}; //limited by 16B include terminating null byte.
+  int r = ceph_pthread_getname(pthread_self(), pthread_name, sizeof(pthread_name));
+  (void)r;
+#if defined(__sun)
+  char message[SIG2STR_MAX];
+  sig2str(signum,message);
+  snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n "
+           "in thread %llx thread_name:%s\n", message, (unsigned long long)pthread_self(),
+           pthread_name);
+#else
+  snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n "
+           "in thread %llx thread_name:%s\n", sig_str(signum), (unsigned long long)pthread_self(),
+           pthread_name);
+#endif
+  dout_emergency(buf);
+  pidfile_remove();
+
+  // TODO: don't use an ostringstream here. It could call malloc(), which we
+  // don't want inside a signal handler.
+  // Also fix the backtrace code not to allocate memory.
+  BackTrace bt(1);
+  ostringstream oss;
+  bt.print(oss);
+  dout_emergency(oss.str());
+
+  char crash_base[PATH_MAX] = { 0 };
+  
+  generate_crash_dump(crash_base, bt);
  
    // avoid recursion back into logging code if that is where
    // we got the SEGV.
@@ -331,9 +346,9 @@ static void handle_oneshot_fatal_signal(int signum)
  
      g_ceph_context->_log->dump_recent();
  
-    if (base[0]) {
+    if (crash_base[0]) {
        char fn[PATH_MAX*2];
-      snprintf(fn, sizeof(fn)-1, "%s/log", base);
+      snprintf(fn, sizeof(fn)-1, "%s/log", crash_base);
        g_ceph_context->_log->set_log_file(fn);
        g_ceph_context->_log->reopen_log_file();
        g_ceph_context->_log->dump_recent();
diff --git a/src/global/signal_handler.h b/src/global/signal_handler.h

index 166c1bb20669715bf812e62cdac005cb8eef9663..c7cb84a10668d857d6f5d1b2c3aec1ef291479ac 100644 (file)
--- a/src/global/signal_handler.h
+++ b/src/global/signal_handler.h
@@ -17,8 +17,13 @@
  
  #include <signal.h>
  #include "acconfig.h"
+#include <map>
+#include <string>
  
  typedef void (*signal_handler_t)(int);
+namespace ceph {
+  struct BackTrace;
+}
  
  #if defined(HAVE_SIGDESCR_NP)
  # define sig_str(signum) sigdescr_np(signum)
@@ -53,4 +58,8 @@ void register_async_signal_handler_oneshot(int signum, signal_handler_t handler)
  /// uninstall a safe async signal callback
  void unregister_async_signal_handler(int signum, signal_handler_t handler);
  
+void generate_crash_dump(char *base,
+                        const ceph::BackTrace& bt,
+                        std::map<std::string,std::string> *extra = 0);
+
  #endif
author	Sage Weil <sage@newdream.net>
	Fri, 18 Jun 2021 20:58:58 +0000 (16:58 -0400)
committer	Sage Weil <sage@newdream.net>
	Wed, 23 Jun 2021 17:00:49 +0000 (13:00 -0400)
src/global/signal_handler.cc		patch \| blob \| history
src/global/signal_handler.h		patch \| blob \| history