From 261a45643987de16cea159cedaab35e6397191dd Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 28 Oct 2019 15:39:44 +0800 Subject: [PATCH] qa/cephfs: test case for auto reconnect after blacklisted Signed-off-by: "Yan, Zheng" Fixes: https://tracker.ceph.com/issues/42085 --- qa/tasks/cephfs/fuse_mount.py | 8 ++- qa/tasks/cephfs/kernel_mount.py | 5 +- qa/tasks/cephfs/mount.py | 2 +- qa/tasks/cephfs/test_client_recovery.py | 93 +++++++++++++++++++++++++ qa/tasks/vstart_runner.py | 9 ++- 5 files changed, 110 insertions(+), 7 deletions(-) diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py index 626a0682f4ad6..27d89437df7a2 100644 --- a/qa/tasks/cephfs/fuse_mount.py +++ b/qa/tasks/cephfs/fuse_mount.py @@ -24,13 +24,13 @@ class FuseMount(CephFSMount): self.inst = None self.addr = None - def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None): + def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]): if mountpoint is not None: self.mountpoint = mountpoint self.setupfs(name=mount_fs_name) try: - return self._mount(mount_path, mount_fs_name) + return self._mount(mount_path, mount_fs_name, mount_options) except RuntimeError: # Catch exceptions by the mount() logic (i.e. not remote command # failures) and ensure the mount is not left half-up. @@ -40,7 +40,7 @@ class FuseMount(CephFSMount): self.umount_wait(force=True) raise - def _mount(self, mount_path, mount_fs_name): + def _mount(self, mount_path, mount_fs_name, mount_options): log.info("Client client.%s config is %s" % (self.client_id, self.client_config)) daemon_signal = 'kill' @@ -70,6 +70,8 @@ class FuseMount(CephFSMount): if mount_fs_name is not None: fuse_cmd += ["--client_mds_namespace={0}".format(mount_fs_name)] + fuse_cmd += mount_options + fuse_cmd += [ '--name', 'client.{id}'.format(id=self.client_id), # TODO ceph-fuse doesn't understand dash dash '--', diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py index b90749f259934..86a06006df752 100644 --- a/qa/tasks/cephfs/kernel_mount.py +++ b/qa/tasks/cephfs/kernel_mount.py @@ -27,7 +27,7 @@ class KernelMount(CephFSMount): self.ipmi_password = ipmi_password self.ipmi_domain = ipmi_domain - def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None): + def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]): if mountpoint is not None: self.mountpoint = mountpoint self.setupfs(name=mount_fs_name) @@ -47,6 +47,9 @@ class KernelMount(CephFSMount): if mount_fs_name is not None: opts += ",mds_namespace={0}".format(mount_fs_name) + for mount_opt in mount_options : + opts += ",{0}".format(mount_opt) + self.client_remote.run( args=[ 'sudo', diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index 4d7375f6d5c06..aeed4fa204346 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -58,7 +58,7 @@ class CephFSMount(object): self.fs.wait_for_daemons() log.info('Ready to start {}...'.format(type(self).__name__)) - def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None): + def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]): raise NotImplementedError() def umount(self): diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py index 7bc567aed83c8..e18fe997e4d1b 100644 --- a/qa/tasks/cephfs/test_client_recovery.py +++ b/qa/tasks/cephfs/test_client_recovery.py @@ -10,6 +10,7 @@ import distutils.version as version import re import os +from teuthology.orchestra import run from teuthology.orchestra.run import CommandFailedError, ConnectionLostError from tasks.cephfs.fuse_mount import FuseMount from tasks.cephfs.cephfs_test_case import CephFSTestCase @@ -629,3 +630,95 @@ class TestClientRecovery(CephFSTestCase): self.assert_session_count(1) self.mount_a.kill_cleanup() + + def test_reconnect_after_blacklisted(self): + """ + Test reconnect after blacklisted. + - writing to a fd that was opened before blacklist should return -EBADF + - reading/writing to a file with lost file locks should return -EIO + - readonly fd should continue to work + """ + + self.mount_a.umount_wait() + + if isinstance(self.mount_a, FuseMount): + self.skipTest("Not implemented in FUSE client yet") + else: + try: + self.mount_a.mount(mount_options=['recover_session=clean']) + except CommandFailedError: + self.mount_a.kill_cleanup() + self.skipTest("Not implemented in current kernel") + + self.mount_a.wait_until_mounted() + + path = os.path.join(self.mount_a.mountpoint, 'testfile_reconnect_after_blacklisted') + pyscript = dedent(""" + import os + import sys + import fcntl + import errno + import time + + fd1 = os.open("{path}.1", os.O_RDWR | os.O_CREAT, 0O666) + fd2 = os.open("{path}.1", os.O_RDONLY) + fd3 = os.open("{path}.2", os.O_RDWR | os.O_CREAT, 0O666) + fd4 = os.open("{path}.2", os.O_RDONLY) + + os.write(fd1, b'content') + os.read(fd2, 1); + + os.write(fd3, b'content') + os.read(fd4, 1); + fcntl.flock(fd4, fcntl.LOCK_SH | fcntl.LOCK_NB) + + print("blacklist") + sys.stdout.flush() + + sys.stdin.readline() + + # wait for mds to close session + time.sleep(10); + + # trigger 'open session' message. kclient relies on 'session reject' message + # to detect if itself is blacklisted + try: + os.stat("{path}.1") + except: + pass + + # wait for auto reconnect + time.sleep(10); + + try: + os.write(fd1, b'content') + except OSError as e: + if e.errno != errno.EBADF: + raise + else: + raise RuntimeError("write() failed to raise error") + + os.read(fd2, 1); + + try: + os.read(fd4, 1) + except OSError as e: + if e.errno != errno.EIO: + raise + else: + raise RuntimeError("read() failed to raise error") + """).format(path=path) + rproc = self.mount_a.client_remote.run( + args=['sudo', 'python3', '-c', pyscript], + wait=False, stdin=run.PIPE, stdout=run.PIPE) + + rproc.stdout.readline() + + mount_a_client_id = self.mount_a.get_global_id() + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + + rproc.stdin.writelines(['done\n']) + rproc.stdin.flush() + + rproc.wait() + self.assertEqual(rproc.exitstatus, 0) diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py index de6922e61151c..6b5f90b687ec1 100644 --- a/qa/tasks/vstart_runner.py +++ b/qa/tasks/vstart_runner.py @@ -638,7 +638,7 @@ class LocalKernelMount(KernelMount): rproc.wait() self.mounted = False - def mount(self, mount_path=None, mount_fs_name=None): + def mount(self, mount_path=None, mount_fs_name=None, mount_options=[]): self.setupfs(name=mount_fs_name) log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format( @@ -662,6 +662,9 @@ class LocalKernelMount(KernelMount): if mount_fs_name is not None: opts += ",mds_namespace={0}".format(mount_fs_name) + for mount_opt in mount_options: + opts += ",{0}".format(mount_opt) + self.client_remote.run( args=[ 'sudo', @@ -800,7 +803,7 @@ class LocalFuseMount(FuseMount): if self.is_mounted(): super(LocalFuseMount, self).umount() - def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None): + def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None, mount_options=[]): if mountpoint is not None: self.mountpoint = mountpoint self.setupfs(name=mount_fs_name) @@ -841,6 +844,8 @@ class LocalFuseMount(FuseMount): if mount_fs_name is not None: prefix += ["--client_mds_namespace={0}".format(mount_fs_name)] + prefix += mount_options; + self.fuse_daemon = self.client_remote.run(args= prefix + [ "-f", -- 2.39.5