]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
qa/tasks/cephfs/mount.py: remove the stale netnses and bridge
authorXiubo Li <xiubli@redhat.com>
Thu, 28 May 2020 04:58:13 +0000 (00:58 -0400)
committerXiubo Li <xiubli@redhat.com>
Wed, 29 Jul 2020 00:41:18 +0000 (08:41 +0800)
If the previous test cases failed, the netnses and bridge will be
left. Here will remove them when new test cases begin.

Fixes: https://tracker.ceph.com/issues/45806
Signed-off-by: Xiubo Li <xiubli@redhat.com>
qa/tasks/ceph_fuse.py
qa/tasks/cephfs/mount.py
qa/tasks/kclient.py
qa/tasks/vstart_runner.py

index 34dd36da10a67b5554bef2a7767130423c313894..7c651bd6988092f1e4060c07da70d2440997f09a 100644 (file)
@@ -109,11 +109,13 @@ def task(ctx, config):
     all_mounts = getattr(ctx, 'mounts', {})
     mounted_by_me = {}
     skipped = {}
+    remotes = set()
 
     brxnet = config.get("brxnet", None)
 
     # Construct any new FuseMount instances
     for id_, remote in clients:
+        remotes.add(remote)
         client_config = config.get("client.%s" % id_)
         if client_config is None:
             client_config = {}
@@ -137,6 +139,15 @@ def task(ctx, config):
 
     ctx.mounts = all_mounts
 
+    # Umount any pre-existing clients that we have not been asked to mount
+    for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()) - set(skipped.keys()):
+        mount = all_mounts[client_id]
+        if mount.is_mounted():
+            mount.umount_wait()
+
+    for remote in remotes:
+        FuseMount.cleanup_stale_netnses_and_bridge(remote)
+
     # Mount any clients we have been asked to (default to mount all)
     log.info('Mounting ceph-fuse clients...')
     for info in mounted_by_me.values():
@@ -148,12 +159,6 @@ def task(ctx, config):
     for info in mounted_by_me.values():
         info["mount"].wait_until_mounted()
 
-    # Umount any pre-existing clients that we have not been asked to mount
-    for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()) - set(skipped.keys()):
-        mount = all_mounts[client_id]
-        if mount.is_mounted():
-            mount.umount_wait()
-
     try:
         yield all_mounts
     finally:
index c8ed54fb8209932ff5073bddb22c5fa0465ff9a3..8681eeb675befaecb46325fff4f3b0b424277209 100644 (file)
@@ -14,7 +14,6 @@ from tasks.cephfs.filesystem import Filesystem
 
 log = logging.getLogger(__name__)
 
-
 class CephFSMount(object):
     def __init__(self, ctx, test_dir, client_id, client_remote, brxnet):
         """
@@ -42,6 +41,33 @@ class CephFSMount(object):
 
         self.background_procs = []
 
+    # This will cleanup the stale netnses, which are from the
+    # last failed test cases.
+    @staticmethod
+    def cleanup_stale_netnses_and_bridge(remote):
+        p = remote.run(args=['ip', 'netns', 'list'],
+                       stdout=StringIO(), timeout=(5*60))
+        p = p.stdout.getvalue().strip()
+
+        # Get the netns name list
+        netns_list = re.findall(r'ceph-ns-[^()\s][-.\w]+[^():\s]', p)
+
+        # Remove the stale netnses
+        for ns in netns_list:
+            ns_name = ns.split()[0]
+            args = ['sudo', 'ip', 'netns', 'delete', '{0}'.format(ns_name)]
+            try:
+                remote.run(args=args, timeout=(5*60), omit_sudo=False)
+            except Exception:
+                pass
+
+        # Remove the stale 'ceph-brx'
+        try:
+            args = ['sudo', 'ip', 'link', 'delete', 'ceph-brx']
+            remote.run(args=args, timeout=(5*60), omit_sudo=False)
+        except Exception:
+            pass
+
     def _parse_netns_name(self):
         self._netns_name = '-'.join(["ceph-ns",
                                      re.sub(r'/+', "-", self.mountpoint)])
@@ -171,13 +197,19 @@ class CephFSMount(object):
             for ns in netns_list:
                 ns_name = ns.split()[0]
                 args = ['sudo', 'ip', 'netns', 'exec', '{0}'.format(ns_name), 'ip', 'addr']
-                p = self.client_remote.run(args=args, stderr=StringIO(),
-                                           stdout=StringIO(), timeout=(5*60),
-                                           omit_sudo=False)
-                q = re.search("{0}".format(ip), p.stdout.getvalue())
-                if q is not None:
-                    found = True
-                    break
+                try:
+                    p = self.client_remote.run(args=args, stderr=StringIO(),
+                                               stdout=StringIO(), timeout=(5*60),
+                                               omit_sudo=False)
+                    q = re.search("{0}".format(ip), p.stdout.getvalue())
+                    if q is not None:
+                        found = True
+                        break
+                except CommandFailedError:
+                    if "No such file or directory" in p.stderr.getvalue():
+                        pass
+                    if "Invalid argument" in p.stderr.getvalue():
+                        pass
 
             if found == False:
                 break
index a6271a220c3f90f5251ff148335aa79faffe4a72..74506e4693e5dcfab864a7d0ddeae31664e565ba 100644 (file)
@@ -76,6 +76,9 @@ def task(ctx, config):
 
     test_dir = misc.get_testdir(ctx)
 
+    for id_, remote in clients:
+        KernelMount.cleanup_stale_netnses_and_bridge(remote)
+
     mounts = {}
     for id_, remote in clients:
         client_config = config.get("client.%s" % id_)
index e96187d847ab78480137fecbcda93d72d741d942..237755d8835fba6f13238c0987e94627d7bc0b4c 100644 (file)
@@ -130,6 +130,7 @@ try:
     from tasks.cephfs.fuse_mount import FuseMount
     from tasks.cephfs.kernel_mount import KernelMount
     from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster
+    from tasks.cephfs.mount import CephFSMount
     from tasks.mgr.mgr_test_case import MgrCluster
     from teuthology.contextutil import MaxWhileTries
     from teuthology.task import interactive
@@ -1287,6 +1288,8 @@ def exec_test():
     global remote
     remote = LocalRemote()
 
+    CephFSMount.cleanup_stale_netnses_and_bridge(remote)
+
     # Tolerate no MDSs or clients running at start
     ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())],
                         stdout=StringIO()).stdout.getvalue().strip()