]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
qa/tasks/ceph: gather crash dumps
authorSage Weil <sage@redhat.com>
Sun, 28 Oct 2018 14:26:00 +0000 (09:26 -0500)
committerSage Weil <sage@redhat.com>
Tue, 20 Nov 2018 12:49:20 +0000 (06:49 -0600)
Gather crash dumps as part of the teuthology run.

Signed-off-by: Sage Weil <sage@redhat.com>
qa/tasks/ceph.py

index a55a69d85715999dad3afdc33a4bd7a874ebb807..87bf5b346e74b944c95c7397f864fcf44fd3a7ca 100644 (file)
@@ -17,6 +17,7 @@ import socket
 
 from paramiko import SSHException
 from ceph_manager import CephManager, write_conf
+from tarfile import ReadError
 from tasks.cephfs.filesystem import Filesystem
 from teuthology import misc as teuthology
 from teuthology import contextutil
@@ -67,6 +68,35 @@ def generate_caps(type_):
         yield capability
 
 
+@contextlib.contextmanager
+def ceph_crash(ctx, config):
+    """
+    Gather crash dumps from /var/lib/crash
+    """
+    try:
+        yield
+
+    finally:
+        if ctx.archive is not None:
+            log.info('Archiving crash dumps...')
+            path = os.path.join(ctx.archive, 'remote')
+            try:
+                os.makedirs(path)
+            except OSError as e:
+                pass
+            for remote in ctx.cluster.remotes.iterkeys():
+                sub = os.path.join(path, remote.shortname)
+                try:
+                    os.makedirs(sub)
+                except OSError as e:
+                    pass
+                try:
+                    teuthology.pull_directory(remote, '/var/lib/ceph/crash',
+                                              os.path.join(sub, 'crash'))
+                except ReadError as e:
+                    pass
+
+
 @contextlib.contextmanager
 def ceph_log(ctx, config):
     """
@@ -235,10 +265,16 @@ def ceph_log(ctx, config):
 
             log.info('Archiving logs...')
             path = os.path.join(ctx.archive, 'remote')
-            os.makedirs(path)
+            try:
+                os.makedirs(path)
+            except OSError as e:
+                pass
             for remote in ctx.cluster.remotes.iterkeys():
                 sub = os.path.join(path, remote.shortname)
-                os.makedirs(sub)
+                try:
+                    os.makedirs(sub)
+                except OSError as e:
+                    pass
                 teuthology.pull_directory(remote, '/var/log/ceph',
                                           os.path.join(sub, 'log'))
 
@@ -1691,6 +1727,7 @@ def task(ctx, config):
         # so they should only be run once
         subtasks = [
             lambda: ceph_log(ctx=ctx, config=None),
+            lambda: ceph_crash(ctx=ctx, config=None),
             lambda: valgrind_post(ctx=ctx, config=config),
         ]