]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/tasks: vstart_runner: introduce --config-mode
authorLeonid Usov <leonid.usov@ibm.com>
Sat, 16 Mar 2024 15:41:47 +0000 (11:41 -0400)
committerLeonid Usov <leonid.usov@ibm.com>
Thu, 25 Apr 2024 18:15:05 +0000 (21:15 +0300)
The new mode of the vstart_runner allows for passing
paths to yaml configs that will be merged and then
run just as the teuthology would do it.
Building on the standard run method we can even
pass "-" as the config name and provide one on the stdin like

    python3 ../qa/tasks/vstart_runner.py --config-mode "-" << END
    tasks:
      - quiescer:
          quiesce_factor: 0.5
          min_quiesce: 10
          max_quiesce: 10
          initial_delay: 5
          cancelations_cap: 2
          paths:
            - a
            - b
            - c
      - waiter:
          on_exit: 100
    END

This commit does the minimum to allow testing of the quiescer,
but it also lays the groundwork for running arbitrary configs.

The cornerstone of the approach is to inject our local implementations
of the main fs suite classes. To be able to do that, some minor
refactoring was required in the corresponding modules:
the standard classes were renamed to have a *Base suffix, and the
former class name without the suffix is made a module level variable
initialized with the *Base implementation. This refactoring
is meant to be backward compatible.

Signed-off-by: Leonid Usov <leonid.usov@ibm.com>
qa/tasks/cephfs/filesystem.py
qa/tasks/cephfs/fuse_mount.py
qa/tasks/cephfs/kernel_mount.py
qa/tasks/cephfs/mount.py
qa/tasks/mgr/mgr_test_case.py
qa/tasks/vstart_runner.py

index 97c87da6e8ab163712d434d3f5da49e073c34066..4a3eafdf0a54b3c6bad340107881281666df1818 100644 (file)
@@ -226,7 +226,7 @@ class FSStatus(RunCephCmd):
         #all matching
         return False
 
-class CephCluster(RunCephCmd):
+class CephClusterBase(RunCephCmd):
     @property
     def admin_remote(self):
         first_mon = misc.get_first_mon(self._ctx, None)
@@ -296,8 +296,9 @@ class CephCluster(RunCephCmd):
         log.warn(f'The address {addr} is not blocklisted')
         return False
 
+CephCluster = CephClusterBase
 
-class MDSCluster(CephCluster):
+class MDSClusterBase(CephClusterBase):
     """
     Collective operations on all the MDS daemons in the Ceph cluster.  These
     daemons may be in use by various Filesystems.
@@ -308,7 +309,7 @@ class MDSCluster(CephCluster):
     """
 
     def __init__(self, ctx):
-        super(MDSCluster, self).__init__(ctx)
+        super(MDSClusterBase, self).__init__(ctx)
 
     @property
     def mds_ids(self):
@@ -349,7 +350,7 @@ class MDSCluster(CephCluster):
         get_config specialization of service_type="mds"
         """
         if service_type != "mds":
-            return super(MDSCluster, self).get_config(key, service_type)
+            return super(MDSClusterBase, self).get_config(key, service_type)
 
         # Some tests stop MDS daemons, don't send commands to a dead one:
         running_daemons = [i for i, mds in self.mds_daemons.items() if mds.running()]
@@ -515,8 +516,9 @@ class MDSCluster(CephCluster):
         grace = float(self.get_config("mds_beacon_grace", service_type="mon"))
         return grace*2+15
 
+MDSCluster = MDSClusterBase
 
-class Filesystem(MDSCluster):
+class FilesystemBase(MDSClusterBase):
 
     """
     Generator for all Filesystems in the cluster.
@@ -538,7 +540,7 @@ class Filesystem(MDSCluster):
         kwargs accepts recover: bool, allow_dangerous_metadata_overlay: bool,
         yes_i_really_really_mean_it: bool and fs_ops: list[str]
         """
-        super(Filesystem, self).__init__(ctx)
+        super(FilesystemBase, self).__init__(ctx)
 
         self.name = name
         self.id = None
@@ -1852,3 +1854,5 @@ class Filesystem(MDSCluster):
 
     def kill_op(self, reqid, rank=None):
         return self.rank_tell(['op', 'kill', reqid], rank=rank)
+
+Filesystem = FilesystemBase
index 69b8651a8504f0e91c461465aabf3004db5584bd..5ba1340267fabaad94324b68b815613ccc85a6e2 100644 (file)
@@ -10,17 +10,17 @@ from teuthology.contextutil import safe_while
 from teuthology.orchestra import run
 from teuthology.exceptions import CommandFailedError
 from tasks.ceph_manager import get_valgrind_args
-from tasks.cephfs.mount import CephFSMount, UMOUNT_TIMEOUT
+from tasks.cephfs.mount import CephFSMountBase, UMOUNT_TIMEOUT
 
 log = logging.getLogger(__name__)
 
 # Refer mount.py for docstrings.
-class FuseMount(CephFSMount):
+class FuseMountBase(CephFSMountBase):
     def __init__(self, ctx, test_dir, client_id, client_remote,
                  client_keyring_path=None, cephfs_name=None,
                  cephfs_mntpt=None, hostfs_mntpt=None, brxnet=None,
                  client_config={}):
-        super(FuseMount, self).__init__(ctx=ctx, test_dir=test_dir,
+        super(FuseMountBase, self).__init__(ctx=ctx, test_dir=test_dir,
             client_id=client_id, client_remote=client_remote,
             client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
             cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
@@ -416,7 +416,7 @@ class FuseMount(CephFSMount):
         """
         Whatever the state of the mount, get it gone.
         """
-        super(FuseMount, self).teardown()
+        super(FuseMountBase, self).teardown()
 
         if self.fuse_daemon and not self.fuse_daemon.finished:
             self.fuse_daemon.stdin.close()
@@ -532,3 +532,5 @@ print(_find_admin_socket("{client_name}"))
 
     def get_op_read_count(self):
         return self.admin_socket(['perf', 'dump', 'objecter'])['objecter']['osdop_read']
+
+FuseMount = FuseMountBase
index a919918644e229b35397c6542d6a58c44c2c688c..9de9bbcb7c3703c12ba2f656c4b68c0d5cc89343 100644 (file)
@@ -19,12 +19,12 @@ log = logging.getLogger(__name__)
 # internal metadata directory
 DEBUGFS_META_DIR = 'meta'
 
-class KernelMount(CephFSMount):
+class KernelMountBase(CephFSMount):
     def __init__(self, ctx, test_dir, client_id, client_remote,
                  client_keyring_path=None, hostfs_mntpt=None,
                  cephfs_name=None, cephfs_mntpt=None, brxnet=None,
                  client_config={}):
-        super(KernelMount, self).__init__(ctx=ctx, test_dir=test_dir,
+        super(KernelMountBase, self).__init__(ctx=ctx, test_dir=test_dir,
             client_id=client_id, client_remote=client_remote,
             client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
             cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
@@ -388,3 +388,5 @@ echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control
             else:
                 return 0
         return int(re.findall(r'read.*', buf)[0].split()[1])
+
+KernelMount = KernelMountBase
index fa99aadcd363e0f7c7c2dd69c7f9acd2a93790c5..9e0d2414a98e4cdb53fe6baee3f144af30d0d8eb 100644 (file)
@@ -21,11 +21,10 @@ from tasks.cephfs.filesystem import Filesystem
 
 log = logging.getLogger(__name__)
 
-
 UMOUNT_TIMEOUT = 300
 
 
-class CephFSMount(object):
+class CephFSMountBase(object):
     def __init__(self, ctx, test_dir, client_id, client_remote,
                  client_keyring_path=None, hostfs_mntpt=None,
                  cephfs_name=None, cephfs_mntpt=None, brxnet=None,
@@ -1650,3 +1649,5 @@ class CephFSMount(object):
             subvol_paths = self.ctx.created_subvols[self.cephfs_name]
             path_to_mount = subvol_paths[mount_subvol_num]
             self.cephfs_mntpt = path_to_mount
+
+CephFSMount = CephFSMountBase
index aa5bc6e56a9fac8ffa48178af422d43a4b5704ee..44edbdb989ab91729836099f4177f0f61bbecee7 100644 (file)
@@ -7,15 +7,15 @@ from teuthology import misc
 from tasks.ceph_test_case import CephTestCase
 
 # TODO move definition of CephCluster away from the CephFS stuff
-from tasks.cephfs.filesystem import CephCluster
+from tasks.cephfs.filesystem import CephClusterBase
 
 
 log = logging.getLogger(__name__)
 
 
-class MgrCluster(CephCluster):
+class MgrClusterBase(CephClusterBase):
     def __init__(self, ctx):
-        super(MgrCluster, self).__init__(ctx)
+        super(MgrClusterBase, self).__init__(ctx)
         self.mgr_ids = list(misc.all_roles_of_type(ctx.cluster, 'mgr'))
 
         if len(self.mgr_ids) == 0:
@@ -69,7 +69,7 @@ class MgrCluster(CephCluster):
         if force:
             cmd.append("--force")
         self.mon_manager.raw_cluster_cmd(*cmd)
-
+MgrCluster = MgrClusterBase
 
 class MgrTestCase(CephTestCase):
     MGRS_REQUIRED = 1
index 252184bd663351f15fa3c8d6c54fcba3bcf871ad..262d385cc251a876fb903fbb718fb5a6cedaa0ab 100644 (file)
@@ -168,11 +168,11 @@ if os.path.exists("./CMakeCache.txt") and os.path.exists("./bin"):
 
 try:
     from tasks.ceph_manager import CephManager
-    from tasks.cephfs.fuse_mount import FuseMount
-    from tasks.cephfs.kernel_mount import KernelMount
-    from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster
-    from tasks.cephfs.mount import CephFSMount
-    from tasks.mgr.mgr_test_case import MgrCluster
+    import tasks.cephfs.fuse_mount
+    import tasks.cephfs.kernel_mount
+    import tasks.cephfs.filesystem
+    import tasks.cephfs.mount
+    import tasks.mgr.mgr_test_case
     from teuthology.task import interactive
 except ImportError:
     sys.stderr.write("***\nError importing packages, have you activated your teuthology virtualenv "
@@ -678,15 +678,16 @@ class LocalCephFSMount():
         return self.addr in output
 
 
-class LocalKernelMount(LocalCephFSMount, KernelMount):
-    def __init__(self, ctx, test_dir, client_id=None,
-                 client_keyring_path=None, client_remote=None,
-                 hostfs_mntpt=None, cephfs_name=None, cephfs_mntpt=None,
-                 brxnet=None):
+class LocalKernelMount(LocalCephFSMount, tasks.cephfs.kernel_mount.KernelMountBase):
+    def __init__(self, ctx, test_dir, client_id, client_remote=LocalRemote(),
+                 client_keyring_path=None, hostfs_mntpt=None,
+                 cephfs_name=None, cephfs_mntpt=None, brxnet=None,
+                 client_config={}):
         super(LocalKernelMount, self).__init__(ctx=ctx, test_dir=test_dir,
-            client_id=client_id, client_keyring_path=client_keyring_path,
-            client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt,
-            cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet)
+            client_id=client_id, client_remote=client_remote,
+            client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
+            cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
+            client_config=client_config)
 
         # Make vstart_runner compatible with teuth and qa/tasks/cephfs.
         self._mount_bin = [os.path.join(BIN_PREFIX , 'mount.ceph')]
@@ -706,14 +707,16 @@ class LocalKernelMount(LocalCephFSMount, KernelMount):
                 self.inst = c['inst']
                 return self.inst
 
+tasks.cephfs.kernel_mount.KernelMount = LocalKernelMount
 
-class LocalFuseMount(LocalCephFSMount, FuseMount):
-    def __init__(self, ctx, test_dir, client_id, client_keyring_path=None,
-                 client_remote=None, hostfs_mntpt=None, cephfs_name=None,
-                 cephfs_mntpt=None, brxnet=None):
+class LocalFuseMount(LocalCephFSMount, tasks.cephfs.fuse_mount.FuseMountBase):
+    def __init__(self, ctx, test_dir, client_id, client_remote=LocalRemote(),
+                client_keyring_path=None, cephfs_name=None,
+                cephfs_mntpt=None, hostfs_mntpt=None, brxnet=None,
+                client_config={}):
         super(LocalFuseMount, self).__init__(ctx=ctx, test_dir=test_dir,
-            client_id=client_id, client_keyring_path=client_keyring_path,
-            client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt,
+            client_id=client_id, client_remote=client_remote,
+            client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
             cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet)
 
         # Following block makes tests meant for teuthology compatible with
@@ -775,6 +778,8 @@ class LocalFuseMount(LocalCephFSMount, FuseMount):
             else:
                 pass
 
+tasks.cephfs.fuse_mount.FuseMount = LocalFuseMount
+
 # XXX: this class has nothing to do with the Ceph daemon (ceph-mgr) of
 # the same name.
 class LocalCephManager(CephManager):
@@ -826,7 +831,7 @@ class LocalCephManager(CephManager):
                                    timeout=timeout, stdout=stdout)
 
 
-class LocalCephCluster(CephCluster):
+class LocalCephCluster(tasks.cephfs.filesystem.CephClusterBase):
     def __init__(self, ctx):
         # Deliberately skip calling CephCluster constructor
         self._ctx = ctx
@@ -896,8 +901,9 @@ class LocalCephCluster(CephCluster):
         del self._conf[subsys][key]
         self._write_conf()
 
+tasks.cephfs.filesystem.CephCluster = LocalCephCluster
 
-class LocalMDSCluster(LocalCephCluster, MDSCluster):
+class LocalMDSCluster(LocalCephCluster, tasks.cephfs.filesystem.MDSClusterBase):
     def __init__(self, ctx):
         LocalCephCluster.__init__(self, ctx)
         # Deliberately skip calling MDSCluster constructor
@@ -927,16 +933,18 @@ class LocalMDSCluster(LocalCephCluster, MDSCluster):
         for fs in self.status().get_filesystems():
             LocalFilesystem(ctx=self._ctx, fscid=fs['id']).destroy()
 
+tasks.cephfs.filesystem.MDSCluster = LocalMDSCluster
 
-class LocalMgrCluster(LocalCephCluster, MgrCluster):
+class LocalMgrCluster(LocalCephCluster, tasks.mgr.mgr_test_case.MgrClusterBase):
     def __init__(self, ctx):
         super(LocalMgrCluster, self).__init__(ctx)
 
         self.mgr_ids = ctx.daemons.daemons['ceph.mgr'].keys()
         self.mgr_daemons = dict([(id_, LocalDaemon("mgr", id_)) for id_ in self.mgr_ids])
 
+tasks.mgr.mgr_test_case.MgrCluster = LocalMgrCluster
 
-class LocalFilesystem(LocalMDSCluster, Filesystem):
+class LocalFilesystem(LocalMDSCluster, tasks.cephfs.filesystem.FilesystemBase):
     def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False,
                  **kwargs):
         # Deliberately skip calling Filesystem constructor
@@ -985,15 +993,16 @@ class LocalFilesystem(LocalMDSCluster, Filesystem):
     def set_clients_block(self, blocked, mds_id=None):
         raise NotImplementedError()
 
+tasks.cephfs.filesystem.Filesystem = LocalFilesystem
 
 class LocalCluster(object):
-    def __init__(self, rolename="placeholder"):
+    def __init__(self, rolenames=["mon.","mds.","osd.","mgr."]):
         self.remotes = {
-            LocalRemote(): [rolename]
+            LocalRemote(): rolenames
         }
 
     def only(self, requested):
-        return self.__class__(rolename=requested)
+        return self.__class__(rolenames=[requested])
 
     def run(self, *args, **kwargs):
         r = []
@@ -1006,15 +1015,20 @@ class LocalContext(object):
     def __init__(self):
         FSID = remote.run(args=[os.path.join(BIN_PREFIX, 'ceph'), 'fsid'],
                           stdout=StringIO()).stdout.getvalue()
+        from teuthology.run import get_summary
 
         cluster_name = 'ceph'
         self.archive = "./"
         self.config = {'cluster': cluster_name}
-        self.ceph = {cluster_name: Namespace()}
-        self.ceph[cluster_name].fsid = FSID
+        cluster_namespace = Namespace()
+        cluster_namespace.fsid = FSID
+        cluster_namespace.thrashers = []
+        self.ceph = {cluster_name: cluster_namespace}
         self.teuthology_config = teuth_config
         self.cluster = LocalCluster()
         self.daemons = DaemonGroup()
+
+        self.summary = get_summary("vstart_runner", None)
         if not hasattr(self, 'managers'):
             self.managers = {}
         self.managers[self.config['cluster']] = LocalCephManager(ctx=self)
@@ -1278,6 +1292,12 @@ def launch_entire_suite(overall_suite):
     return testrunner.run(overall_suite)
 
 
+import enum
+
+class Mode(enum.Enum):
+    unittest = enum.auto()
+    config = enum.auto()
+
 def exec_test():
     # Parse arguments
     global opt_interactive_on_error
@@ -1297,12 +1317,19 @@ def exec_test():
     opt_rotate_logs = False
     global opt_exit_on_test_failure
     opt_exit_on_test_failure = True
+    mode = Mode.unittest
 
     args = sys.argv[1:]
     flags = [a for a in args if a.startswith("-")]
     modules = [a for a in args if not a.startswith("-")]
     for f in flags:
-        if f == "--interactive":
+        if f == '-':
+            # using `-` here as a module name for the --config-mode
+            # In config mode modules are config paths,
+            # and `-` means reading the config from stdin
+            # This won't mean much for the unit test mode, but it will fail quickly.
+            modules.append("-")
+        elif f == "--interactive":
             opt_interactive_on_error = True
         elif f == "--create":
             opt_create_cluster = True
@@ -1340,10 +1367,16 @@ def exec_test():
             opt_exit_on_test_failure = False
         elif f == '--debug':
             log.setLevel(logging.DEBUG)
+        elif f == '--config-mode':
+            mode = Mode.config
         else:
             log.error("Unknown option '{0}'".format(f))
             sys.exit(-1)
 
+    if mode == Mode.config and (opt_create_cluster or opt_create_cluster_only):
+        log.error("Incompatible options: --config-mode and --create*")
+        sys.exit(-1)
+
     # Help developers by stopping up-front if their tree isn't built enough for all the
     # tools that the tests might want to use (add more here if needed)
     require_binaries = ["ceph-dencoder", "cephfs-journal-tool", "cephfs-data-scan",
@@ -1356,23 +1389,25 @@ def exec_test():
         sys.exit(-1)
 
     max_required_mds, max_required_clients, \
-            max_required_mgr, require_memstore = scan_tests(modules)
+            max_required_mgr, require_memstore = scan_tests(modules) if mode == Mode.unittest else (1, 1, False, False)
+    # in the config mode we rely on a manually setup vstart cluster
 
     global remote
     remote = LocalRemote()
 
-    CephFSMount.cleanup_stale_netnses_and_bridge(remote)
+    tasks.cephfs.mount.CephFSMountBase.cleanup_stale_netnses_and_bridge(remote)
 
-    # Tolerate no MDSs or clients running at start
-    ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())],
-                        stdout=StringIO()).stdout.getvalue().strip()
-    lines = ps_txt.split("\n")[1:]
-    for line in lines:
-        if 'ceph-fuse' in line or 'ceph-mds' in line:
-            pid = int(line.split()[0])
-            log.warning("Killing stray process {0}".format(line))
-            remote.run(args=f'sudo kill -{signal.SIGKILL.value} {pid}',
-                       omit_sudo=False)
+    if mode == Mode.unittest:
+        # Tolerate no MDSs or clients running at start
+        ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())],
+                            stdout=StringIO()).stdout.getvalue().strip()
+        lines = ps_txt.split("\n")[1:]
+        for line in lines:
+            if 'ceph-fuse' in line or 'ceph-mds' in line:
+                pid = int(line.split()[0])
+                log.warning("Killing stray process {0}".format(line))
+                remote.run(args=f'sudo kill -{signal.SIGKILL.value} {pid}',
+                        omit_sudo=False)
 
     # Fire up the Ceph cluster if the user requested it
     if opt_create_cluster or opt_create_cluster_only:
@@ -1492,6 +1527,10 @@ def exec_test():
     import teuthology.packaging
     teuthology.packaging.get_package_version = _get_package_version
 
+    if mode == Mode.config:
+        run_configs(modules)
+        return
+
     overall_suite = load_tests(modules, decorating_loader)
 
     # Filter out tests that don't lend themselves to interactive running,
@@ -1527,7 +1566,7 @@ def exec_test():
     overall_suite = load_tests(modules, loader.TestLoader())
     result = launch_tests(overall_suite)
 
-    CephFSMount.cleanup_stale_netnses_and_bridge(remote)
+    tasks.cephfs.mount.CephFSMountBase.cleanup_stale_netnses_and_bridge(remote)
     if opt_teardown_cluster:
         teardown_cluster()
 
@@ -1547,6 +1586,14 @@ def exec_test():
     else:
         sys.exit(0)
 
+def run_configs(configs):
+    from teuthology.run import setup_config, run_tasks
+
+    config = setup_config(configs)
+    ctx = LocalContext()
+    tasks = config['tasks']
+    run_tasks(tasks, ctx)
+    sys.exit(0 if ctx.summary['success'] else 1)
 
 if __name__ == "__main__":
     exec_test()