From 99235a00366e84e8eefe495ab99c7f6aa767a58d Mon Sep 17 00:00:00 2001 From: Zack Cerza Date: Thu, 17 Mar 2016 15:50:39 -0600 Subject: [PATCH] Move task.internal.syslog to its own module Signed-off-by: Zack Cerza --- teuthology/task/internal/__init__.py | 147 ------------------------ teuthology/task/internal/syslog.py | 163 +++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 147 deletions(-) create mode 100644 teuthology/task/internal/syslog.py diff --git a/teuthology/task/internal/__init__.py b/teuthology/task/internal/__init__.py index 475d68e70..da88011f2 100644 --- a/teuthology/task/internal/__init__.py +++ b/teuthology/task/internal/__init__.py @@ -483,150 +483,3 @@ def archive_upload(ctx, config): upload) else: log.info('Not uploading archives.') - -@contextlib.contextmanager -def syslog(ctx, config): - """ - start syslog / stop syslog on exit. - """ - if ctx.archive is None: - # disable this whole feature if we're not going to archive the data anyway - yield - return - - log.info('Starting syslog monitoring...') - - archive_dir = misc.get_archive_dir(ctx) - log_dir = '{adir}/syslog'.format(adir=archive_dir) - run.wait( - ctx.cluster.run( - args=['mkdir', '-p', '-m0755', '--', log_dir], - wait=False, - ) - ) - - CONF = '/etc/rsyslog.d/80-cephtest.conf' - kern_log = '{log_dir}/kern.log'.format(log_dir=log_dir) - misc_log = '{log_dir}/misc.log'.format(log_dir=log_dir) - conf_lines = [ - 'kern.* -{kern_log};RSYSLOG_FileFormat'.format(kern_log=kern_log), - '*.*;kern.none -{misc_log};RSYSLOG_FileFormat'.format( - misc_log=misc_log), - ] - conf_fp = StringIO('\n'.join(conf_lines)) - try: - for rem in ctx.cluster.remotes.iterkeys(): - log_context = 'system_u:object_r:var_log_t:s0' - for log_path in (kern_log, misc_log): - rem.run(args='touch %s' % log_path) - rem.chcon(log_path, log_context) - misc.sudo_write_file( - remote=rem, - path=CONF, - data=conf_fp, - ) - conf_fp.seek(0) - run.wait( - ctx.cluster.run( - args=[ - 'sudo', - 'service', - # a mere reload (SIGHUP) doesn't seem to make - # rsyslog open the files - 'rsyslog', - 'restart', - ], - wait=False, - ), - ) - - yield - finally: - log.info('Shutting down syslog monitoring...') - - run.wait( - ctx.cluster.run( - args=[ - 'sudo', - 'rm', - '-f', - '--', - CONF, - run.Raw('&&'), - 'sudo', - 'service', - 'rsyslog', - 'restart', - ], - wait=False, - ), - ) - # race condition: nothing actually says rsyslog had time to - # flush the file fully. oh well. - - log.info('Checking logs for errors...') - for rem in ctx.cluster.remotes.iterkeys(): - log.debug('Checking %s', rem.name) - r = rem.run( - args=[ - 'egrep', '--binary-files=text', - '\\bBUG\\b|\\bINFO\\b|\\bDEADLOCK\\b', - run.Raw('{adir}/syslog/*.log'.format(adir=archive_dir)), - run.Raw('|'), - 'grep', '-v', 'task .* blocked for more than .* seconds', - run.Raw('|'), - 'grep', '-v', 'lockdep is turned off', - run.Raw('|'), - 'grep', '-v', 'trying to register non-static key', - run.Raw('|'), - 'grep', '-v', 'DEBUG: fsize', # xfs_fsr - run.Raw('|'), - 'grep', '-v', 'CRON', # ignore cron noise - run.Raw('|'), - 'grep', '-v', 'BUG: bad unlock balance detected', # #6097 - run.Raw('|'), - 'grep', '-v', 'inconsistent lock state', # FIXME see #2523 - run.Raw('|'), - 'grep', '-v', '*** DEADLOCK ***', # part of lockdep output - run.Raw('|'), - 'grep', '-v', 'INFO: possible irq lock inversion dependency detected', # FIXME see #2590 and #147 - run.Raw('|'), - 'grep', '-v', 'INFO: NMI handler (perf_event_nmi_handler) took too long to run', - run.Raw('|'), - 'grep', '-v', 'INFO: recovery required on readonly', - run.Raw('|'), - 'grep', '-v', 'ceph-create-keys: INFO', - run.Raw('|'), - 'head', '-n', '1', - ], - stdout=StringIO(), - ) - stdout = r.stdout.getvalue() - if stdout != '': - log.error('Error in syslog on %s: %s', rem.name, stdout) - set_status(ctx.summary, 'fail') - if 'failure_reason' not in ctx.summary: - ctx.summary['failure_reason'] = \ - "'{error}' in syslog".format(error=stdout) - - log.info('Compressing syslogs...') - run.wait( - ctx.cluster.run( - args=[ - 'find', - '{adir}/syslog'.format(adir=archive_dir), - '-name', - '*.log', - '-print0', - run.Raw('|'), - 'sudo', - 'xargs', - '-0', - '--no-run-if-empty', - '--', - 'gzip', - '--', - ], - wait=False, - ), - ) diff --git a/teuthology/task/internal/syslog.py b/teuthology/task/internal/syslog.py new file mode 100644 index 000000000..63b8f7764 --- /dev/null +++ b/teuthology/task/internal/syslog.py @@ -0,0 +1,163 @@ +import contextlib +import logging + +from cStringIO import StringIO + +from teuthology import misc +from teuthology.job_status import set_status +from teuthology.orchestra import run + + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def syslog(ctx, config): + """ + start syslog / stop syslog on exit. + """ + if ctx.archive is None: + # disable this whole feature if we're not going to archive the data + # anyway + yield + return + + log.info('Starting syslog monitoring...') + + archive_dir = misc.get_archive_dir(ctx) + log_dir = '{adir}/syslog'.format(adir=archive_dir) + run.wait( + ctx.cluster.run( + args=['mkdir', '-p', '-m0755', '--', log_dir], + wait=False, + ) + ) + + CONF = '/etc/rsyslog.d/80-cephtest.conf' + kern_log = '{log_dir}/kern.log'.format(log_dir=log_dir) + misc_log = '{log_dir}/misc.log'.format(log_dir=log_dir) + conf_lines = [ + 'kern.* -{kern_log};RSYSLOG_FileFormat'.format(kern_log=kern_log), + '*.*;kern.none -{misc_log};RSYSLOG_FileFormat'.format( + misc_log=misc_log), + ] + conf_fp = StringIO('\n'.join(conf_lines)) + try: + for rem in ctx.cluster.remotes.iterkeys(): + log_context = 'system_u:object_r:var_log_t:s0' + for log_path in (kern_log, misc_log): + rem.run(args='touch %s' % log_path) + rem.chcon(log_path, log_context) + misc.sudo_write_file( + remote=rem, + path=CONF, + data=conf_fp, + ) + conf_fp.seek(0) + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'service', + # a mere reload (SIGHUP) doesn't seem to make + # rsyslog open the files + 'rsyslog', + 'restart', + ], + wait=False, + ), + ) + + yield + finally: + log.info('Shutting down syslog monitoring...') + + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'rm', + '-f', + '--', + CONF, + run.Raw('&&'), + 'sudo', + 'service', + 'rsyslog', + 'restart', + ], + wait=False, + ), + ) + # race condition: nothing actually says rsyslog had time to + # flush the file fully. oh well. + + log.info('Checking logs for errors...') + for rem in ctx.cluster.remotes.iterkeys(): + log.debug('Checking %s', rem.name) + r = rem.run( + args=[ + 'egrep', '--binary-files=text', + '\\bBUG\\b|\\bINFO\\b|\\bDEADLOCK\\b', + run.Raw('{adir}/syslog/*.log'.format(adir=archive_dir)), + run.Raw('|'), + 'grep', '-v', 'task .* blocked for more than .* seconds', + run.Raw('|'), + 'grep', '-v', 'lockdep is turned off', + run.Raw('|'), + 'grep', '-v', 'trying to register non-static key', + run.Raw('|'), + 'grep', '-v', 'DEBUG: fsize', # xfs_fsr + run.Raw('|'), + 'grep', '-v', 'CRON', # ignore cron noise + run.Raw('|'), + 'grep', '-v', 'BUG: bad unlock balance detected', # #6097 + run.Raw('|'), + 'grep', '-v', 'inconsistent lock state', # FIXME see #2523 + run.Raw('|'), + 'grep', '-v', '*** DEADLOCK ***', # part of lockdep output + run.Raw('|'), + 'grep', '-v', + # FIXME see #2590 and #147 + 'INFO: possible irq lock inversion dependency detected', + run.Raw('|'), + 'grep', '-v', + 'INFO: NMI handler (perf_event_nmi_handler) took too long to run', # noqa + run.Raw('|'), + 'grep', '-v', 'INFO: recovery required on readonly', + run.Raw('|'), + 'grep', '-v', 'ceph-create-keys: INFO', + run.Raw('|'), + 'head', '-n', '1', + ], + stdout=StringIO(), + ) + stdout = r.stdout.getvalue() + if stdout != '': + log.error('Error in syslog on %s: %s', rem.name, stdout) + set_status(ctx.summary, 'fail') + if 'failure_reason' not in ctx.summary: + ctx.summary['failure_reason'] = \ + "'{error}' in syslog".format(error=stdout) + + log.info('Compressing syslogs...') + run.wait( + ctx.cluster.run( + args=[ + 'find', + '{adir}/syslog'.format(adir=archive_dir), + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) -- 2.47.3