From 1cad309d6542697eb774ab5eed985270118631db Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Mon, 3 Oct 2011 16:08:49 -0700 Subject: [PATCH] Add failure_reason to summary for the first failure detected. For now, this is the exception raised during a task, the error found in the central log, or coredumps found. More specific errors (i.e. s3-tests had 3 failures) can be added later as exceptions raised by tasks. --- teuthology/run_tasks.py | 8 ++++++-- teuthology/task/ceph.py | 38 +++++++++++++++++++++++++++++-------- teuthology/task/internal.py | 3 +++ 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/teuthology/run_tasks.py b/teuthology/run_tasks.py index dcd94689e6..4466202c45 100644 --- a/teuthology/run_tasks.py +++ b/teuthology/run_tasks.py @@ -26,8 +26,10 @@ def run_tasks(tasks, ctx): if hasattr(manager, '__enter__'): manager.__enter__() stack.append(manager) - except: + except Exception, e: ctx.summary['success'] = False + if 'failure_reason' not in ctx.summary: + ctx.summary['failure_reason'] = str(e) log.exception('Saw exception from tasks') if ctx.config.get('interactive-on-error'): from .task import interactive @@ -41,8 +43,10 @@ def run_tasks(tasks, ctx): log.debug('Unwinding manager %s', manager) try: suppress = manager.__exit__(*exc_info) - except: + except Exception, e: ctx.summary['success'] = False + if 'failure_reason' not in ctx.summary: + ctx.summary['failure_reason'] = str(e) log.exception('Manager failed: %s', manager) else: if suppress: diff --git a/teuthology/task/ceph.py b/teuthology/task/ceph.py index ce359fec39..1d01daba6a 100644 --- a/teuthology/task/ceph.py +++ b/teuthology/task/ceph.py @@ -652,18 +652,40 @@ def cluster(ctx, config): ) log.info('Checking cluster ceph.log for badness...') - r = mon0_remote.run(args=[ - 'if', run.Raw('!'), - 'egrep', '-q', '\[ERR\]|\[WRN\]|\[SEC\]', - '/tmp/cephtest/data/%s/log' % firstmon, - run.Raw(';'), 'then', 'echo', 'OK', run.Raw(';'), - 'fi', - ], + def first_in_ceph_log(pattern): + r = mon0_remote.run( + args=[ + 'if', run.Raw('!'), + 'egrep', '-q', pattern, + '/tmp/cephtest/data/%s/log' % firstmon, + run.Raw(';'), 'then', 'echo', 'OK', run.Raw(';'), + 'else', + 'egrep', pattern, + '/tmp/cephtest/data/%s/log' % firstmon, + run.Raw('|'), + 'head', '-n', '1', run.Raw(';'), + 'fi', + ], stdout=StringIO(), ) - if r.stdout.getvalue() != "OK\n": + stdout = r.stdout.getvalue() + if stdout != "OK\n": + return stdout + return None + + if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]') is not None: log.warning('Found errors (ERR|WRN|SEC) in cluster log') ctx.summary['success'] = False + # use the most severe problem as the failure reason + if 'failure_reason' not in ctx.summary: + for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: + match = first_in_ceph_log(pattern) + if match is not None: + ctx.summary['failure_reason'] = \ + '"{match}" in cluster log'.format( + match=match.rstrip('\n'), + ) + break for remote, dirs in devs_to_clean.iteritems(): for dir_ in dirs: diff --git a/teuthology/task/internal.py b/teuthology/task/internal.py index 4191b986d4..baf93a4079 100644 --- a/teuthology/task/internal.py +++ b/teuthology/task/internal.py @@ -267,6 +267,9 @@ def coredump(ctx, config): if r.stdout.getvalue() != 'OK\n': log.warning('Found coredumps on %s, flagging run as failed', remote) ctx.summary['success'] = False + if 'failure_reason' not in ctx.summary: + ctx.summary['failure_reason'] = \ + 'Found coredumps on {remote}'.format(remote=remote) @contextlib.contextmanager def syslog(ctx, config): -- 2.39.5