From e57c86fab39bb28096f2efa77afb575f3afffa59 Mon Sep 17 00:00:00 2001 From: Rishabh Dave Date: Thu, 7 May 2020 18:03:41 +0530 Subject: [PATCH] qa/cephfs: run() cleanup whether FS was mounted or not In case the mount command in mount() fails, it would still have created the mountpoint and network namespace for the FS's mount. Therefore, run cleanup() and cleanup_netns() in umount() and umount_wait() even when self.mounted is set to False. Also, move the call to cleanup_netns() in cleanup(). Fixes: https://tracker.ceph.com/issues/45430 Signed-off-by: Rishabh Dave --- qa/tasks/cephfs/fuse_mount.py | 17 +++++++++++++---- qa/tasks/cephfs/kernel_mount.py | 5 ++--- qa/tasks/cephfs/mount.py | 3 ++- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py index f46689b5b06..50975981d9d 100644 --- a/qa/tasks/cephfs/fuse_mount.py +++ b/qa/tasks/cephfs/fuse_mount.py @@ -258,8 +258,14 @@ class FuseMount(CephFSMount): def _mountpoint_exists(self): return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False, cwd=self.test_dir, timeout=(15*60)).exitstatus == 0 - def umount(self): + def umount(self, cleanup=True): + """ + umount() must not run cleanup() when it's called by umount_wait() + since "run.wait([self.fuse_daemon], timeout)" would hang otherwise. + """ if not self.is_mounted(): + if cleanup: + self.cleanup() return try: @@ -328,6 +334,8 @@ class FuseMount(CephFSMount): self.id = None self.inst = None self.addr = None + if cleanup: + self.cleanup() def umount_wait(self, force=False, require_clean=False, timeout=900): """ @@ -337,6 +345,7 @@ class FuseMount(CephFSMount): log.debug('ceph-fuse client.{id} is not mounted at {remote} {mnt}'.format(id=self.client_id, remote=self.client_remote, mnt=self.mountpoint)) + self.cleanup() return if force: @@ -351,7 +360,9 @@ class FuseMount(CephFSMount): # mount -o remount (especially if the remount is stuck because MDSs # are unavailable) - self.umount() + # cleanup is set to to fail since clieanup must happen after umount is + # complete; otherwise following call to run.wait hangs. + self.umount(cleanup=False) try: # Permit a timeout, so that we do not block forever @@ -365,7 +376,6 @@ class FuseMount(CephFSMount): if require_clean: raise - self.cleanup_netns() self.mounted = False self.cleanup() @@ -384,7 +394,6 @@ class FuseMount(CephFSMount): except CommandFailedError: pass - self.cleanup_netns() self.mounted = False # Indiscriminate, unlike the touchier cleanup() diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py index 9eb41622407..3362885d621 100644 --- a/qa/tasks/cephfs/kernel_mount.py +++ b/qa/tasks/cephfs/kernel_mount.py @@ -71,6 +71,7 @@ class KernelMount(CephFSMount): def umount(self, force=False): if not self.is_mounted(): + self.cleanup() return log.debug('Unmounting client client.{id}...'.format(id=self.client_id)) @@ -92,7 +93,6 @@ class KernelMount(CephFSMount): raise e self.mounted = False - self.cleanup_netns() self.cleanup() def umount_wait(self, force=False, require_clean=False, timeout=900): @@ -100,6 +100,7 @@ class KernelMount(CephFSMount): Unlike the fuse client, the kernel client's umount is immediate """ if not self.is_mounted(): + self.cleanup() return try: @@ -109,7 +110,6 @@ class KernelMount(CephFSMount): raise # force delete the netns and umount - self.cleanup_netns() self.client_remote.run( args=['sudo', 'umount', @@ -120,7 +120,6 @@ class KernelMount(CephFSMount): timeout=(15*60)) self.mounted = False - self.cleanup_netns() self.cleanup() def wait_until_mounted(self): diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index 91146695af0..75e136e0927 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -449,7 +449,6 @@ class CephFSMount(object): """ log.info('Cleaning up killed connection on {0}'.format(self.client_remote.name)) self.umount_wait(force=True) - self.cleanup() def cleanup(self): """ @@ -476,6 +475,8 @@ class CephFSMount(object): else: raise + self.cleanup_netns() + def wait_until_mounted(self): raise NotImplementedError() -- 2.39.5