]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
Add failure_reason to summary for the first failure detected.
authorJosh Durgin <josh.durgin@dreamhost.com>
Mon, 3 Oct 2011 23:08:49 +0000 (16:08 -0700)
committerJosh Durgin <josh.durgin@dreamhost.com>
Tue, 4 Oct 2011 00:07:41 +0000 (17:07 -0700)
For now, this is the exception raised during a task, the error found
in the central log, or coredumps found. More specific errors
(i.e. s3-tests had 3 failures) can be added later as exceptions raised
by tasks.

teuthology/run_tasks.py
teuthology/task/ceph.py
teuthology/task/internal.py

index dcd94689e6f69ba831a6cd4f4498c6102d41c88a..4466202c450043f7eebc3bf77c7eda3b5a5ec244 100644 (file)
@@ -26,8 +26,10 @@ def run_tasks(tasks, ctx):
             if hasattr(manager, '__enter__'):
                 manager.__enter__()
                 stack.append(manager)
-    except:
+    except Exception, e:
         ctx.summary['success'] = False
+        if 'failure_reason' not in ctx.summary:
+            ctx.summary['failure_reason'] = str(e)
         log.exception('Saw exception from tasks')
         if ctx.config.get('interactive-on-error'):
             from .task import interactive
@@ -41,8 +43,10 @@ def run_tasks(tasks, ctx):
                 log.debug('Unwinding manager %s', manager)
                 try:
                     suppress = manager.__exit__(*exc_info)
-                except:
+                except Exception, e:
                     ctx.summary['success'] = False
+                    if 'failure_reason' not in ctx.summary:
+                        ctx.summary['failure_reason'] = str(e)
                     log.exception('Manager failed: %s', manager)
                 else:
                     if suppress:
index ce359fec39e673763c68a239889611178fa8c408..1d01daba6aa3852135eb5603ccb13bfefbfa1371 100644 (file)
@@ -652,18 +652,40 @@ def cluster(ctx, config):
                 )
 
         log.info('Checking cluster ceph.log for badness...')
-        r = mon0_remote.run(args=[
-                'if', run.Raw('!'),
-                'egrep', '-q', '\[ERR\]|\[WRN\]|\[SEC\]',
-                '/tmp/cephtest/data/%s/log' % firstmon,
-                run.Raw(';'), 'then', 'echo', 'OK', run.Raw(';'),
-                'fi',
-                ],
+        def first_in_ceph_log(pattern):
+            r = mon0_remote.run(
+                args=[
+                    'if', run.Raw('!'),
+                    'egrep', '-q', pattern,
+                    '/tmp/cephtest/data/%s/log' % firstmon,
+                    run.Raw(';'), 'then', 'echo', 'OK', run.Raw(';'),
+                    'else',
+                    'egrep', pattern,
+                    '/tmp/cephtest/data/%s/log' % firstmon,
+                    run.Raw('|'),
+                    'head', '-n', '1', run.Raw(';'),
+                    'fi',
+                    ],
                 stdout=StringIO(),
                 )
-        if r.stdout.getvalue() != "OK\n":
+            stdout = r.stdout.getvalue()
+            if stdout != "OK\n":
+                return stdout
+            return None
+
+        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]') is not None:
             log.warning('Found errors (ERR|WRN|SEC) in cluster log')
             ctx.summary['success'] = False
+            # use the most severe problem as the failure reason
+            if 'failure_reason' not in ctx.summary:
+                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
+                    match = first_in_ceph_log(pattern)
+                    if match is not None:
+                        ctx.summary['failure_reason'] = \
+                            '"{match}" in cluster log'.format(
+                            match=match.rstrip('\n'),
+                            )
+                        break
 
         for remote, dirs in devs_to_clean.iteritems():
             for dir_ in dirs:
index 4191b986d4cc973d52c5af4e0c6f39ee411a961f..baf93a407969db40c0754d87a9426201418c8241 100644 (file)
@@ -267,6 +267,9 @@ def coredump(ctx, config):
             if r.stdout.getvalue() != 'OK\n':
                 log.warning('Found coredumps on %s, flagging run as failed', remote)
                 ctx.summary['success'] = False
+                if 'failure_reason' not in ctx.summary:
+                    ctx.summary['failure_reason'] = \
+                        'Found coredumps on {remote}'.format(remote=remote)
 
 @contextlib.contextmanager
 def syslog(ctx, config):