]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
add ceph-crash service
authorDan Mick <dan.mick@redhat.com>
Tue, 24 Jul 2018 05:05:01 +0000 (22:05 -0700)
committerDan Mick <dan.mick@redhat.com>
Thu, 9 Aug 2018 01:37:43 +0000 (18:37 -0700)
ceph-crash runs from systemd and watches /var/lib/ceph/crash
for crashdumps, posting them to the mgrs using the mgr's
crash plugin

Signed-off-by: Dan Mick <dan.mick@redhat.com>
ceph.spec.in
debian/ceph-base.dirs
debian/ceph-base.install
src/CMakeLists.txt
src/ceph-crash.in [new file with mode: 0755]
systemd/50-ceph.preset
systemd/CMakeLists.txt
systemd/ceph-crash.service.in [new file with mode: 0644]

index 278aa4f724a3796e91ac5e7a316e46d0d2d8dca6..c732d8cc31508edf1324ea7f967614b13a29de24 100644 (file)
@@ -1041,13 +1041,14 @@ mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mon
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/osd
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mds
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mgr
+mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash
+mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash/posted
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/radosgw
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-osd
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mds
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rgw
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mgr
 mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rbd
-mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash
 
 %if 0%{?suse_version}
 # create __pycache__ directories and their contents
@@ -1063,6 +1064,7 @@ rm -rf %{buildroot}
 %files
 
 %files base
+%{_bindir}/ceph-crash
 %{_bindir}/crushtool
 %{_bindir}/monmaptool
 %{_bindir}/osdmaptool
@@ -1079,6 +1081,7 @@ rm -rf %{buildroot}
 %{_libdir}/ceph/erasure-code/libec_*.so*
 %dir %{_libdir}/ceph/compressor
 %{_libdir}/ceph/compressor/libceph_*.so*
+%{_unitdir}/ceph-crash.service
 %ifarch x86_64
 %dir %{_libdir}/ceph/crypto
 %{_libdir}/ceph/crypto/libceph_*.so*
@@ -1114,6 +1117,8 @@ rm -rf %{buildroot}
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/ceph-kvstore-tool.8*
 #set up placeholder directories
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/crash
+%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/crash/posted
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds
@@ -1126,22 +1131,22 @@ rm -rf %{buildroot}
 %if 0%{?suse_version}
 %fillup_only
 if [ $1 -eq 1 ] ; then
-/usr/bin/systemctl preset ceph.target >/dev/null 2>&1 || :
+/usr/bin/systemctl preset ceph.target ceph-crash.service >/dev/null 2>&1 || :
 fi
 %endif
 %if 0%{?fedora} || 0%{?rhel}
-%systemd_post ceph.target
+%systemd_post ceph.target ceph-crash.service
 %endif
 if [ $1 -eq 1 ] ; then
-/usr/bin/systemctl start ceph.target >/dev/null 2>&1 || :
+/usr/bin/systemctl start ceph.target ceph-crash.service >/dev/null 2>&1 || :
 fi
 
 %preun base
 %if 0%{?suse_version}
-%service_del_preun ceph.target
+%service_del_preun ceph.target ceph-crash.service
 %endif
 %if 0%{?fedora} || 0%{?rhel}
-%systemd_preun ceph.target
+%systemd_preun ceph.target ceph-crash.service
 %endif
 
 %postun base
index 262e6f6a508e2d6477b1ba542059f5ef224e876a..6f580230a65f4efce617b276df9f77d02e9e9ed1 100644 (file)
@@ -5,3 +5,4 @@ var/lib/ceph/bootstrap-rgw
 var/lib/ceph/bootstrap-rbd
 var/lib/ceph/tmp
 var/lib/ceph/crash
+var/lib/ceph/crash/posted
index 5f366ca172be1b6f17056a449ef50484b79935c4..24731dc89db10c5537610c1992cbf26f7fa01d3b 100644 (file)
@@ -1,4 +1,6 @@
 etc/init.d/ceph
+lib/systemd/system/ceph-crash.service
+usr/bin/ceph-crash
 usr/bin/ceph-debugpack
 usr/bin/ceph-run
 usr/bin/crushtool
index f7ba403422d67bcf555d25b40df552f38cd4a187..556e9c775d75f4ba27237eecc0456fafaeab0f5b 100644 (file)
@@ -581,6 +581,9 @@ configure_file(${CMAKE_SOURCE_DIR}/src/init-ceph.in
 configure_file(ceph-post-file.in
   ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-post-file @ONLY)
 
+configure_file(ceph-crash.in
+  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-crash @ONLY)
+
 if(WITH_TESTS)
   install(PROGRAMS
     ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-debugpack
@@ -591,6 +594,7 @@ endif()
 install(PROGRAMS
   ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph
   ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-post-file
+  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-crash
   ${CMAKE_SOURCE_DIR}/src/ceph-run
   ${CMAKE_SOURCE_DIR}/src/ceph-clsinfo
   DESTINATION bin)
diff --git a/src/ceph-crash.in b/src/ceph-crash.in
new file mode 100755 (executable)
index 0000000..b43cd78
--- /dev/null
@@ -0,0 +1,83 @@
+#!@PYTHON_EXECUTABLE@
+# -*- mode:python -*-
+# vim: ts=4 sw=4 smarttab expandtab
+
+import argparse
+import logging
+import os
+import subprocess
+import sys
+import time
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-p', '--path', default='/var/lib/ceph/crash',
+        help='base path to monitor for crash dumps')
+    parser.add_argument(
+        '-d', '--delay', default=10.0, type=float,
+        help='minutes to delay between scans (0 to exit after one)',
+    )
+    return parser.parse_args()
+
+
+def post_crash(path):
+    pr = subprocess.Popen(
+        args=['timeout', '30', 'ceph', 'crash', 'post', '-i', '-'],
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    f = open(os.path.join(path, 'meta'), 'r')
+    stdout, stderr = pr.communicate(input=f.read())
+    rc = pr.wait()
+    f.close()
+    if rc != 0:
+        log.warning('post %s failed: %s' % (path, stderr))
+    return rc
+
+
+def scrape_path(path):
+    for p in os.listdir(path):
+        crashpath = os.path.join(path, p)
+        metapath = os.path.join(crashpath, 'meta')
+        donepath = os.path.join(crashpath, 'done')
+        if os.path.isfile(metapath):
+            if not os.path.isfile(donepath):
+                # hang out just for a bit; either we interrupted the dump
+                # or the daemon crashed before finishing it
+                time.sleep(1)
+                if not os.path.isfile(donepath):
+                    return
+            # ok, we can process this one
+            rc = post_crash(crashpath)
+            if rc == 0:
+                os.rename(crashpath, os.path.join(path, 'posted/', p))
+                log.debug(
+                    "posted %s and renamed %s -> %s " %
+                    (metapath, p, os.path.join('posted/', p))
+                )
+
+
+def main():
+    args = parse_args()
+    postdir = os.path.join(args.path, 'posted')
+
+    while not os.path.isdir(postdir):
+        log.error("%s does not exist; please create" % postdir)
+        time.sleep(30)
+
+    log.info("monitoring path %s, delay %ds" % (args.path, args.delay * 60.0))
+    while True:
+        scrape_path(args.path)
+        if args.delay == 0:
+            sys.exit(0)
+        time.sleep(args.delay * 60)
+
+
+if __name__ == "__main__":
+    main()
index 34c0801f9f2ba6bbf7803c93579201f913ee9bca..da3ee0b7b0bcf36cdc0668da9a0c2684a4a17d4b 100644 (file)
@@ -4,3 +4,4 @@ enable ceph-mgr.target
 enable ceph-mon.target
 enable ceph-osd.target
 enable ceph-radosgw.target
+enable ceph-crash.service
index 67497f9a891220d10c1ea948340d2d3d919ba010..56be619d44cabe80a2ad401a40d025f8479367c0 100644 (file)
@@ -4,6 +4,7 @@ set(CEPH_SYSTEMD_ENV_DIR "/etc/sysconfig"
   CACHE PATH "Location for systemd service environmental variable settings files")
 set(SYSTEMD_ENV_FILE "${CEPH_SYSTEMD_ENV_DIR}/ceph")
 foreach(service
+    ceph-crash
     ceph-fuse@
     ceph-mds@
     ceph-mgr@
diff --git a/systemd/ceph-crash.service.in b/systemd/ceph-crash.service.in
new file mode 100644 (file)
index 0000000..8304dd6
--- /dev/null
@@ -0,0 +1,13 @@
+[Unit]
+Description=Ceph crash dump collector
+
+[Service]
+Type=simple
+ExecStart=/usr/bin/ceph-crash
+Restart=always
+RestartSec=10
+StartLimitInterval=10min
+StartLimitBurst=10
+
+[Install]
+WantedBy=ceph.target