From: Xiubo Li Date: Thu, 5 Mar 2020 11:13:02 +0000 (-0500) Subject: qa/cephfs/fuse-mount: do not use the 'stat' to check the mount state X-Git-Tag: v16.1.0~2551^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8728da9c085fea4c34e4247f45d495437f32c5fd;p=ceph.git qa/cephfs/fuse-mount: do not use the 'stat' to check the mount state If the network couldn't response due to some reasons, the 'stat' cmd will stuck until the network recovery, the best case is it will stuck forever. Fixes: https://tracker.ceph.com/issues/44044 Signed-off-by: Xiubo Li --- diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py index ac4da5b360d4..49cd6198f9f1 100644 --- a/qa/tasks/cephfs/fuse_mount.py +++ b/qa/tasks/cephfs/fuse_mount.py @@ -184,7 +184,7 @@ class FuseMount(CephFSMount): if self.inst is None: raise RuntimeError("cannot find client session") - def is_mounted(self): + def check_mounted_state(self): proc = self.client_remote.run( args=[ 'stat', @@ -228,13 +228,15 @@ class FuseMount(CephFSMount): sleep for 5 seconds and check again. """ - while not self.is_mounted(): + while not self.check_mounted_state(): # Even if it's not mounted, it should at least # be running: catch simple failures where it has terminated. assert not self.fuse_daemon.poll() time.sleep(5) + self.mounted = True + # Now that we're mounted, set permissions so that the rest of the test will have # unrestricted access to the filesystem mount. try: @@ -253,6 +255,7 @@ class FuseMount(CephFSMount): def umount(self): try: log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name)) + stderr = BytesIO() self.client_remote.run( args = [ 'sudo', @@ -261,49 +264,57 @@ class FuseMount(CephFSMount): self.mountpoint, ], cwd=self.test_dir, + stderr=stderr, timeout=(30*60), ) except run.CommandFailedError: - log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name)) - - self.client_remote.run(args=[ - 'sudo', - run.Raw('PATH=/usr/sbin:$PATH'), - 'lsof', - run.Raw(';'), - 'ps', - 'auxf', - ], timeout=(60*15)) - - # abort the fuse mount, killing all hung processes - if self._fuse_conn: - self.run_python(dedent(""" - import os - path = "/sys/fs/fuse/connections/{0}/abort" - if os.path.exists(path): - open(path, "w").write("1") - """).format(self._fuse_conn)) - self._fuse_conn = None + if "mountpoint not found" in stderr.getvalue(): + # This happens if the mount directory doesn't exist + log.info('mount point does not exist: %s', self.mountpoint) + elif "not mounted" in stderr.getvalue(): + # This happens if the mount directory already unmouted + log.info('mount point not mounted: %s', self.mountpoint) + else: + log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name)) - stderr = BytesIO() - try: + self.client_remote.run(args=[ + 'sudo', + run.Raw('PATH=/usr/sbin:$PATH'), + 'lsof', + run.Raw(';'), + 'ps', + 'auxf', + ], timeout=(60*15)) + + # abort the fuse mount, killing all hung processes + if self._fuse_conn: + self.run_python(dedent(""" + import os + path = "/sys/fs/fuse/connections/{0}/abort" + if os.path.exists(path): + open(path, "w").write("1") + """).format(self._fuse_conn)) + self._fuse_conn = None + + stderr = BytesIO() # make sure its unmounted - self.client_remote.run( - args=[ - 'sudo', - 'umount', - '-l', - '-f', - self.mountpoint, - ], - stderr=stderr, - timeout=(60*15) - ) - except CommandFailedError: - if self.is_mounted(): - raise + try: + self.client_remote.run( + args=[ + 'sudo', + 'umount', + '-l', + '-f', + self.mountpoint, + ], + stderr=stderr, + timeout=(60*15) + ) + except CommandFailedError: + if self.is_mounted(): + raise - assert not self.is_mounted() + self.mounted = False self._fuse_conn = None self.id = None self.inst = None diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py index 2dc07ca973c3..cc0a1ae83611 100644 --- a/qa/tasks/cephfs/kernel_mount.py +++ b/qa/tasks/cephfs/kernel_mount.py @@ -20,8 +20,6 @@ class KernelMount(CephFSMount): def __init__(self, ctx, test_dir, client_id, client_remote, brxnet): super(KernelMount, self).__init__(ctx, test_dir, client_id, client_remote, brxnet) - self.mounted = False - def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]): if mountpoint is not None: self.mountpoint = mountpoint @@ -122,9 +120,6 @@ class KernelMount(CephFSMount): self.cleanup_netns() self.cleanup() - def is_mounted(self): - return self.mounted - def wait_until_mounted(self): """ Unlike the fuse client, the kernel client is up and running as soon diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index d913dcbcb64f..3ebcd6fc2d70 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -33,6 +33,7 @@ class CephFSMount(object): self.mountpoint_dir_name = 'mnt.{id}'.format(id=self.client_id) self._mountpoint = None self.fs = None + self.mounted = False self._netns_name = None self.nsid = -1 if brxnet is None: @@ -86,7 +87,7 @@ class CephFSMount(object): self._netns_name = name def is_mounted(self): - raise NotImplementedError() + return self.mounted def setupfs(self, name=None): if name is None and self.fs is not None: