]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
qa: defer cleaning the mountpoint's netnses and the bridge
authorXiubo Li <xiubli@redhat.com>
Mon, 6 Jul 2020 12:27:34 +0000 (08:27 -0400)
committerXiubo Li <xiubli@redhat.com>
Thu, 30 Jul 2020 00:58:36 +0000 (20:58 -0400)
The netnses maybe created/deleted many times in the whole test cases,
we can defer cleaning them untile the last mountpoint is unmounted
or when the test is exiting.

Fixes: https://tracker.ceph.com/issues/46282
Signed-off-by: Xiubo Li <xiubli@redhat.com>
qa/tasks/ceph_fuse.py
qa/tasks/cephfs/mount.py
qa/tasks/kclient.py
qa/tasks/vstart_runner.py

index 7c651bd6988092f1e4060c07da70d2440997f09a..f55441a17a6aa63808888976aa69f33b6a532790 100644 (file)
@@ -169,3 +169,5 @@ def task(ctx, config):
             mount = info["mount"]
             if mount.is_mounted():
                 mount.umount_wait()
+        for remote in remotes:
+            FuseMount.cleanup_stale_netnses_and_bridge(remote)
index 8681eeb675befaecb46325fff4f3b0b424277209..c72f12a6b4daf0af9faffbc1518f6be07e60d665 100644 (file)
@@ -156,34 +156,39 @@ class CephFSMount(object):
     def _setup_netns(self):
         p = self.client_remote.run(args=['ip', 'netns', 'list'],
                                    stderr=StringIO(), stdout=StringIO(),
-                                   timeout=(5*60))
-        p = p.stdout.getvalue().strip()
-        if re.match(self.netns_name, p) is not None:
-            raise RuntimeError("the netns '{}' already exists!".format(self.netns_name))
+                                   timeout=(5*60)).stdout.getvalue().strip()
 
         # Get the netns name list
         netns_list = re.findall(r'[^()\s][-.\w]+[^():\s]', p)
 
-        # Get an uniq netns id
-        nsid = 0
-        while True:
+        out = re.search(r"{0}".format(self.netns_name), p)
+        if out is None:
+            # Get an uniq nsid for the new netns
+            nsid = 0
             p = self.client_remote.run(args=['ip', 'netns', 'list-id'],
                                        stderr=StringIO(), stdout=StringIO(),
-                                       timeout=(5*60))
-            p = re.search(r"nsid {} ".format(nsid), p.stdout.getvalue())
-            if p is None:
-                break
+                                       timeout=(5*60)).stdout.getvalue()
+            while True:
+                out = re.search(r"nsid {} ".format(nsid), p)
+                if out is None:
+                    break
 
-            nsid += 1
+                nsid += 1
 
-        self.nsid = nsid;
+            # Add one new netns and set it id
+            self.run_shell_payload(f"""
+                set -e
+                sudo ip netns add {self.netns_name}
+                sudo ip netns set {self.netns_name} {nsid}
+            """, timeout=(5*60), omit_sudo=False, cwd='/')
+            self.nsid = nsid;
+        else:
+            # The netns already exists and maybe suspended by self.kill()
+            self.resume_netns();
 
-        # Add one new netns and set it id
-        self.run_shell_payload(f"""
-            set -e
-            sudo ip netns add {self.netns_name}
-            sudo ip netns set {self.netns_name} {nsid}
-        """, timeout=(5*60), omit_sudo=False, cwd='/')
+            nsid = int(re.search(r"{0} \(id: (\d+)\)".format(self.netns_name), p).group(1))
+            self.nsid = nsid;
+            return
 
         # Get one ip address for netns
         ips = IP(self.ceph_brx_net)
@@ -304,9 +309,13 @@ class CephFSMount(object):
         """
         Cleanup the netns for the mountpoint.
         """
-        log.info("Cleaning the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
-        self._cleanup_netns()
-        self._cleanup_brx_and_nat()
+        # We will defer cleaning the netnses and bridge until the last
+        # mountpoint is unmounted, this will be a temporary work around
+        # for issue#46282.
+
+        # log.info("Cleaning the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+        # self._cleanup_netns()
+        # self._cleanup_brx_and_nat()
 
     def suspend_netns(self):
         """
index 74506e4693e5dcfab864a7d0ddeae31664e565ba..3917606955af39e83c4f947d19b9e173ce664b47 100644 (file)
@@ -118,6 +118,9 @@ def task(ctx, config):
                     forced = True
                     mount.umount_wait(force=True)
 
+        for id_, remote in clients:
+            KernelMount.cleanup_stale_netnses_and_bridge(remote)
+
         return forced
 
     ctx.mounts = mounts
index 237755d8835fba6f13238c0987e94627d7bc0b4c..94035d4f28d9158345de421975a338c60983961e 100644 (file)
@@ -1480,6 +1480,8 @@ def exec_test():
         verbosity=2,
         failfast=True).run(overall_suite)
 
+    CephFSMount.cleanup_stale_netnses_and_bridge(remote)
+
     if opt_teardown_cluster:
         teardown_cluster()