From 453a0f99d454c5d4784c4ea5018eb1c78b660e4b Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Wed, 10 Aug 2011 14:19:23 -0700 Subject: [PATCH] teuthology-nuke: identify and reboot machines with kernel mounts This includes untested code for just force-unmounting them when that works again, but for now it does a full reboot-and- reconnect cycle. Signed-off-by: Greg Farnum --- teuthology/run.py | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/teuthology/run.py b/teuthology/run.py index 3fb2138fe3828..a1938e7b9b20a 100644 --- a/teuthology/run.py +++ b/teuthology/run.py @@ -165,6 +165,7 @@ def nuke(): from orchestra import monkey; monkey.patch_all() import logging + import time log = logging.getLogger(__name__) ctx = parse_args() @@ -231,6 +232,63 @@ def nuke(): proc.exitstatus.get() log.info('Shutdowns Done.') + nodes = {} + log.info('Looking for kernel mounts to handle...') + for remote in ctx.cluster.remotes.iterkeys(): + proc = remote.run( + args=[ + 'grep', '-q', " ceph " , '/etc/mtab' + ], + wait=False, + ) + nodes[remote] = proc + kernel_mounts = list() + for remote, proc in nodes.iteritems(): + try: + proc.exitstatus.get() + log.debug('kernel mount exists on %s', remote.name) + kernel_mounts.append(remote) + except run.CommandFailedError: # no mounts! + log.debug('no kernel mount on %s', remote.name) + """ + properly we should be able to just do a forced unmount, + but that doesn't seem to be working, so we'll reboot instead + nodes = {} + for remote in kernel_mounts: + log.info('clearing kernel mount from %s', remote.name) + proc = remote.run( + args=[ + 'grep', 'ceph', '/etc/mtab', run.Raw('|'), + 'grep', '-o', "on /.* type", run.Raw('|'), + 'grep', '-o', "/.* ", run.Raw('|'), + 'xargs', 'sudo', 'umount', '-f', run.Raw(';') + 'fi' + ] + wait=False + ) + nodes[remote] = proc + """ + nodes = {} + + for remote in kernel_mounts: + log.info('rebooting %s', remote.name) + proc = remote.run( # note use of -n to force a no-sync reboot + args=['sudo', 'reboot', '-f', '-n'], + wait=False + ) + nodes[remote] = proc + # we just ignore these procs because reboot -f doesn't actually + # send anything back to the ssh client! + #for remote, proc in nodes.iteritems(): + #proc.exitstatus.get() + from teuthology.misc import reconnect + if kernel_mounts: + log.info('waiting for nodes to reboot') + time.sleep(5) #if we try and reconnect too quickly, it succeeds! + reconnect(ctx, 300) #allow 5 minutes for the reboots + + + nodes = {} log.info('Clearing filesystem of test data...') for remote in ctx.cluster.remotes.iterkeys(): proc = remote.run( -- 2.39.5