From: Xiubo Li Date: Thu, 28 May 2020 04:58:13 +0000 (-0400) Subject: qa/tasks/cephfs/mount.py: remove the stale netnses and bridge X-Git-Tag: wip-pdonnell-testing-20200918.022351~488^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c0c5550ed745a9dbafd23061e8ea6cd0a8dab915;p=ceph-ci.git qa/tasks/cephfs/mount.py: remove the stale netnses and bridge If the previous test cases failed, the netnses and bridge will be left. Here will remove them when new test cases begin. Fixes: https://tracker.ceph.com/issues/45806 Signed-off-by: Xiubo Li --- diff --git a/qa/tasks/ceph_fuse.py b/qa/tasks/ceph_fuse.py index 34dd36da10a..7c651bd6988 100644 --- a/qa/tasks/ceph_fuse.py +++ b/qa/tasks/ceph_fuse.py @@ -109,11 +109,13 @@ def task(ctx, config): all_mounts = getattr(ctx, 'mounts', {}) mounted_by_me = {} skipped = {} + remotes = set() brxnet = config.get("brxnet", None) # Construct any new FuseMount instances for id_, remote in clients: + remotes.add(remote) client_config = config.get("client.%s" % id_) if client_config is None: client_config = {} @@ -137,6 +139,15 @@ def task(ctx, config): ctx.mounts = all_mounts + # Umount any pre-existing clients that we have not been asked to mount + for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()) - set(skipped.keys()): + mount = all_mounts[client_id] + if mount.is_mounted(): + mount.umount_wait() + + for remote in remotes: + FuseMount.cleanup_stale_netnses_and_bridge(remote) + # Mount any clients we have been asked to (default to mount all) log.info('Mounting ceph-fuse clients...') for info in mounted_by_me.values(): @@ -148,12 +159,6 @@ def task(ctx, config): for info in mounted_by_me.values(): info["mount"].wait_until_mounted() - # Umount any pre-existing clients that we have not been asked to mount - for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()) - set(skipped.keys()): - mount = all_mounts[client_id] - if mount.is_mounted(): - mount.umount_wait() - try: yield all_mounts finally: diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index c8ed54fb820..8681eeb675b 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -14,7 +14,6 @@ from tasks.cephfs.filesystem import Filesystem log = logging.getLogger(__name__) - class CephFSMount(object): def __init__(self, ctx, test_dir, client_id, client_remote, brxnet): """ @@ -42,6 +41,33 @@ class CephFSMount(object): self.background_procs = [] + # This will cleanup the stale netnses, which are from the + # last failed test cases. + @staticmethod + def cleanup_stale_netnses_and_bridge(remote): + p = remote.run(args=['ip', 'netns', 'list'], + stdout=StringIO(), timeout=(5*60)) + p = p.stdout.getvalue().strip() + + # Get the netns name list + netns_list = re.findall(r'ceph-ns-[^()\s][-.\w]+[^():\s]', p) + + # Remove the stale netnses + for ns in netns_list: + ns_name = ns.split()[0] + args = ['sudo', 'ip', 'netns', 'delete', '{0}'.format(ns_name)] + try: + remote.run(args=args, timeout=(5*60), omit_sudo=False) + except Exception: + pass + + # Remove the stale 'ceph-brx' + try: + args = ['sudo', 'ip', 'link', 'delete', 'ceph-brx'] + remote.run(args=args, timeout=(5*60), omit_sudo=False) + except Exception: + pass + def _parse_netns_name(self): self._netns_name = '-'.join(["ceph-ns", re.sub(r'/+', "-", self.mountpoint)]) @@ -171,13 +197,19 @@ class CephFSMount(object): for ns in netns_list: ns_name = ns.split()[0] args = ['sudo', 'ip', 'netns', 'exec', '{0}'.format(ns_name), 'ip', 'addr'] - p = self.client_remote.run(args=args, stderr=StringIO(), - stdout=StringIO(), timeout=(5*60), - omit_sudo=False) - q = re.search("{0}".format(ip), p.stdout.getvalue()) - if q is not None: - found = True - break + try: + p = self.client_remote.run(args=args, stderr=StringIO(), + stdout=StringIO(), timeout=(5*60), + omit_sudo=False) + q = re.search("{0}".format(ip), p.stdout.getvalue()) + if q is not None: + found = True + break + except CommandFailedError: + if "No such file or directory" in p.stderr.getvalue(): + pass + if "Invalid argument" in p.stderr.getvalue(): + pass if found == False: break diff --git a/qa/tasks/kclient.py b/qa/tasks/kclient.py index a6271a220c3..74506e4693e 100644 --- a/qa/tasks/kclient.py +++ b/qa/tasks/kclient.py @@ -76,6 +76,9 @@ def task(ctx, config): test_dir = misc.get_testdir(ctx) + for id_, remote in clients: + KernelMount.cleanup_stale_netnses_and_bridge(remote) + mounts = {} for id_, remote in clients: client_config = config.get("client.%s" % id_) diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py index e96187d847a..237755d8835 100644 --- a/qa/tasks/vstart_runner.py +++ b/qa/tasks/vstart_runner.py @@ -130,6 +130,7 @@ try: from tasks.cephfs.fuse_mount import FuseMount from tasks.cephfs.kernel_mount import KernelMount from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster + from tasks.cephfs.mount import CephFSMount from tasks.mgr.mgr_test_case import MgrCluster from teuthology.contextutil import MaxWhileTries from teuthology.task import interactive @@ -1287,6 +1288,8 @@ def exec_test(): global remote remote = LocalRemote() + CephFSMount.cleanup_stale_netnses_and_bridge(remote) + # Tolerate no MDSs or clients running at start ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())], stdout=StringIO()).stdout.getvalue().strip()