]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
squid: qa/tasks: vstart_runner: introduce --config-mode
authorLeonid Usov <leonid.usov@ibm.com>
Sat, 16 Mar 2024 15:41:47 +0000 (11:41 -0400)
committerLeonid Usov <leonid.usov@ibm.com>
Tue, 28 May 2024 16:06:19 +0000 (19:06 +0300)
The new mode of the vstart_runner allows for passing
paths to yaml configs that will be merged and then
run just as the teuthology would do it.
Building on the standard run method we can even
pass "-" as the config name and provide one on the stdin like

    python3 ../qa/tasks/vstart_runner.py --config-mode "-" << END
    tasks:
      - quiescer:
          quiesce_factor: 0.5
          min_quiesce: 10
          max_quiesce: 10
          initial_delay: 5
          cancelations_cap: 2
          paths:
            - a
            - b
            - c
      - waiter:
          on_exit: 100
    END

This commit does the minimum to allow testing of the quiescer,
but it also lays the groundwork for running arbitrary configs.

The cornerstone of the approach is to inject our local implementations
of the main fs suite classes. To be able to do that, some minor
refactoring was required in the corresponding modules:
the standard classes were renamed to have a *Base suffix, and the
former class name without the suffix is made a module level variable
initialized with the *Base implementation. This refactoring
is meant to be backward compatible.

Signed-off-by: Leonid Usov <leonid.usov@ibm.com>
(cherry picked from commit 274849e544dd1f77158a2c80a4c654cb0363f71d)
Fixes: https://tracker.ceph.com/issues/66103
qa/tasks/cephfs/filesystem.py
qa/tasks/cephfs/fuse_mount.py
qa/tasks/cephfs/kernel_mount.py
qa/tasks/cephfs/mount.py
qa/tasks/mgr/mgr_test_case.py
qa/tasks/vstart_runner.py

index bd416abcd8d3649725383be327caf619d67f62d4..176a5aad98aa9c3e56e7a9b8386d9cbe66d39314 100644 (file)
@@ -226,7 +226,7 @@ class FSStatus(RunCephCmd):
         #all matching
         return False
 
-class CephCluster(RunCephCmd):
+class CephClusterBase(RunCephCmd):
     @property
     def admin_remote(self):
         first_mon = misc.get_first_mon(self._ctx, None)
@@ -296,8 +296,9 @@ class CephCluster(RunCephCmd):
         log.warn(f'The address {addr} is not blocklisted')
         return False
 
+CephCluster = CephClusterBase
 
-class MDSCluster(CephCluster):
+class MDSClusterBase(CephClusterBase):
     """
     Collective operations on all the MDS daemons in the Ceph cluster.  These
     daemons may be in use by various Filesystems.
@@ -308,7 +309,7 @@ class MDSCluster(CephCluster):
     """
 
     def __init__(self, ctx):
-        super(MDSCluster, self).__init__(ctx)
+        super(MDSClusterBase, self).__init__(ctx)
 
     @property
     def mds_ids(self):
@@ -349,7 +350,7 @@ class MDSCluster(CephCluster):
         get_config specialization of service_type="mds"
         """
         if service_type != "mds":
-            return super(MDSCluster, self).get_config(key, service_type)
+            return super(MDSClusterBase, self).get_config(key, service_type)
 
         # Some tests stop MDS daemons, don't send commands to a dead one:
         running_daemons = [i for i, mds in self.mds_daemons.items() if mds.running()]
@@ -511,8 +512,9 @@ class MDSCluster(CephCluster):
         grace = float(self.get_config("mds_beacon_grace", service_type="mon"))
         return grace*2+15
 
+MDSCluster = MDSClusterBase
 
-class Filesystem(MDSCluster):
+class FilesystemBase(MDSClusterBase):
 
     """
     Generator for all Filesystems in the cluster.
@@ -529,7 +531,7 @@ class Filesystem(MDSCluster):
     MDSCluster may be shared with other Filesystems.
     """
     def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False):
-        super(Filesystem, self).__init__(ctx)
+        super(FilesystemBase, self).__init__(ctx)
 
         self.name = name
         self.id = None
@@ -1818,3 +1820,5 @@ class Filesystem(MDSCluster):
 
     def kill_op(self, reqid, rank=None):
         return self.rank_tell(['op', 'kill', reqid], rank=rank)
+
+Filesystem = FilesystemBase
index 9881f8599d47559e56b01f21c4ab0e96f37c2592..9b94a4cc823accda41ccce006ca6477bc29f9efd 100644 (file)
@@ -10,17 +10,17 @@ from teuthology.contextutil import safe_while
 from teuthology.orchestra import run
 from teuthology.exceptions import CommandFailedError
 from tasks.ceph_manager import get_valgrind_args
-from tasks.cephfs.mount import CephFSMount, UMOUNT_TIMEOUT
+from tasks.cephfs.mount import CephFSMountBase, UMOUNT_TIMEOUT
 
 log = logging.getLogger(__name__)
 
 # Refer mount.py for docstrings.
-class FuseMount(CephFSMount):
+class FuseMountBase(CephFSMountBase):
     def __init__(self, ctx, test_dir, client_id, client_remote,
                  client_keyring_path=None, cephfs_name=None,
                  cephfs_mntpt=None, hostfs_mntpt=None, brxnet=None,
                  client_config={}):
-        super(FuseMount, self).__init__(ctx=ctx, test_dir=test_dir,
+        super(FuseMountBase, self).__init__(ctx=ctx, test_dir=test_dir,
             client_id=client_id, client_remote=client_remote,
             client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
             cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
@@ -416,7 +416,7 @@ class FuseMount(CephFSMount):
         """
         Whatever the state of the mount, get it gone.
         """
-        super(FuseMount, self).teardown()
+        super(FuseMountBase, self).teardown()
 
         self.umount()
 
@@ -534,3 +534,5 @@ print(_find_admin_socket("{client_name}"))
 
     def get_op_read_count(self):
         return self.admin_socket(['perf', 'dump', 'objecter'])['objecter']['osdop_read']
+
+FuseMount = FuseMountBase
index c59f661a3cff4842aae20059b8bf62a9630bddaa..6f6deb598ae029fc193105af5a23d5eda594ceb6 100644 (file)
@@ -19,12 +19,12 @@ log = logging.getLogger(__name__)
 # internal metadata directory
 DEBUGFS_META_DIR = 'meta'
 
-class KernelMount(CephFSMount):
+class KernelMountBase(CephFSMount):
     def __init__(self, ctx, test_dir, client_id, client_remote,
                  client_keyring_path=None, hostfs_mntpt=None,
                  cephfs_name=None, cephfs_mntpt=None, brxnet=None,
                  client_config={}):
-        super(KernelMount, self).__init__(ctx=ctx, test_dir=test_dir,
+        super(KernelMountBase, self).__init__(ctx=ctx, test_dir=test_dir,
             client_id=client_id, client_remote=client_remote,
             client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
             cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
@@ -393,3 +393,5 @@ echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control
             else:
                 return 0
         return int(re.findall(r'read.*', buf)[0].split()[1])
+
+KernelMount = KernelMountBase
index 9e6f750242e1a33e19eff4a6e90fdfe2390e7938..6c24c4a3f7a9515dcfef804c0b36242b894b7d4e 100644 (file)
@@ -21,11 +21,10 @@ from tasks.cephfs.filesystem import Filesystem
 
 log = logging.getLogger(__name__)
 
-
 UMOUNT_TIMEOUT = 300
 
 
-class CephFSMount(object):
+class CephFSMountBase(object):
     def __init__(self, ctx, test_dir, client_id, client_remote,
                  client_keyring_path=None, hostfs_mntpt=None,
                  cephfs_name=None, cephfs_mntpt=None, brxnet=None,
@@ -1638,3 +1637,5 @@ class CephFSMount(object):
             subvol_paths = self.ctx.created_subvols[self.cephfs_name]
             path_to_mount = subvol_paths[mount_subvol_num]
             self.cephfs_mntpt = path_to_mount
+
+CephFSMount = CephFSMountBase
index aa5bc6e56a9fac8ffa48178af422d43a4b5704ee..44edbdb989ab91729836099f4177f0f61bbecee7 100644 (file)
@@ -7,15 +7,15 @@ from teuthology import misc
 from tasks.ceph_test_case import CephTestCase
 
 # TODO move definition of CephCluster away from the CephFS stuff
-from tasks.cephfs.filesystem import CephCluster
+from tasks.cephfs.filesystem import CephClusterBase
 
 
 log = logging.getLogger(__name__)
 
 
-class MgrCluster(CephCluster):
+class MgrClusterBase(CephClusterBase):
     def __init__(self, ctx):
-        super(MgrCluster, self).__init__(ctx)
+        super(MgrClusterBase, self).__init__(ctx)
         self.mgr_ids = list(misc.all_roles_of_type(ctx.cluster, 'mgr'))
 
         if len(self.mgr_ids) == 0:
@@ -69,7 +69,7 @@ class MgrCluster(CephCluster):
         if force:
             cmd.append("--force")
         self.mon_manager.raw_cluster_cmd(*cmd)
-
+MgrCluster = MgrClusterBase
 
 class MgrTestCase(CephTestCase):
     MGRS_REQUIRED = 1
index 4fcd0cfe1bb198ec1aea5542996f0ec6bb031e9d..fe3ccd8fc6d615e5ffebda1994bd3f12c054bf47 100644 (file)
@@ -168,11 +168,11 @@ if os.path.exists("./CMakeCache.txt") and os.path.exists("./bin"):
 
 try:
     from tasks.ceph_manager import CephManager
-    from tasks.cephfs.fuse_mount import FuseMount
-    from tasks.cephfs.kernel_mount import KernelMount
-    from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster
-    from tasks.cephfs.mount import CephFSMount
-    from tasks.mgr.mgr_test_case import MgrCluster
+    import tasks.cephfs.fuse_mount
+    import tasks.cephfs.kernel_mount
+    import tasks.cephfs.filesystem
+    import tasks.cephfs.mount
+    import tasks.mgr.mgr_test_case
     from teuthology.task import interactive
 except ImportError:
     sys.stderr.write("***\nError importing packages, have you activated your teuthology virtualenv "
@@ -678,15 +678,16 @@ class LocalCephFSMount():
         return self.addr in output
 
 
-class LocalKernelMount(LocalCephFSMount, KernelMount):
-    def __init__(self, ctx, test_dir, client_id=None,
-                 client_keyring_path=None, client_remote=None,
-                 hostfs_mntpt=None, cephfs_name=None, cephfs_mntpt=None,
-                 brxnet=None):
+class LocalKernelMount(LocalCephFSMount, tasks.cephfs.kernel_mount.KernelMountBase):
+    def __init__(self, ctx, test_dir, client_id, client_remote=LocalRemote(),
+                 client_keyring_path=None, hostfs_mntpt=None,
+                 cephfs_name=None, cephfs_mntpt=None, brxnet=None,
+                 client_config={}):
         super(LocalKernelMount, self).__init__(ctx=ctx, test_dir=test_dir,
-            client_id=client_id, client_keyring_path=client_keyring_path,
-            client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt,
-            cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet)
+            client_id=client_id, client_remote=client_remote,
+            client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
+            cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
+            client_config=client_config)
 
         # Make vstart_runner compatible with teuth and qa/tasks/cephfs.
         self._mount_bin = [os.path.join(BIN_PREFIX , 'mount.ceph')]
@@ -706,14 +707,16 @@ class LocalKernelMount(LocalCephFSMount, KernelMount):
                 self.inst = c['inst']
                 return self.inst
 
+tasks.cephfs.kernel_mount.KernelMount = LocalKernelMount
 
-class LocalFuseMount(LocalCephFSMount, FuseMount):
-    def __init__(self, ctx, test_dir, client_id, client_keyring_path=None,
-                 client_remote=None, hostfs_mntpt=None, cephfs_name=None,
-                 cephfs_mntpt=None, brxnet=None):
+class LocalFuseMount(LocalCephFSMount, tasks.cephfs.fuse_mount.FuseMountBase):
+    def __init__(self, ctx, test_dir, client_id, client_remote=LocalRemote(),
+                client_keyring_path=None, cephfs_name=None,
+                cephfs_mntpt=None, hostfs_mntpt=None, brxnet=None,
+                client_config={}):
         super(LocalFuseMount, self).__init__(ctx=ctx, test_dir=test_dir,
-            client_id=client_id, client_keyring_path=client_keyring_path,
-            client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt,
+            client_id=client_id, client_remote=client_remote,
+            client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
             cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet)
 
         # Following block makes tests meant for teuthology compatible with
@@ -775,6 +778,8 @@ class LocalFuseMount(LocalCephFSMount, FuseMount):
             else:
                 pass
 
+tasks.cephfs.fuse_mount.FuseMount = LocalFuseMount
+
 # XXX: this class has nothing to do with the Ceph daemon (ceph-mgr) of
 # the same name.
 class LocalCephManager(CephManager):
@@ -826,7 +831,7 @@ class LocalCephManager(CephManager):
                                    timeout=timeout, stdout=stdout)
 
 
-class LocalCephCluster(CephCluster):
+class LocalCephCluster(tasks.cephfs.filesystem.CephClusterBase):
     def __init__(self, ctx):
         # Deliberately skip calling CephCluster constructor
         self._ctx = ctx
@@ -896,8 +901,9 @@ class LocalCephCluster(CephCluster):
         del self._conf[subsys][key]
         self._write_conf()
 
+tasks.cephfs.filesystem.CephCluster = LocalCephCluster
 
-class LocalMDSCluster(LocalCephCluster, MDSCluster):
+class LocalMDSCluster(LocalCephCluster, tasks.cephfs.filesystem.MDSClusterBase):
     def __init__(self, ctx):
         LocalCephCluster.__init__(self, ctx)
         # Deliberately skip calling MDSCluster constructor
@@ -927,16 +933,18 @@ class LocalMDSCluster(LocalCephCluster, MDSCluster):
         for fs in self.status().get_filesystems():
             LocalFilesystem(ctx=self._ctx, fscid=fs['id']).destroy()
 
+tasks.cephfs.filesystem.MDSCluster = LocalMDSCluster
 
-class LocalMgrCluster(LocalCephCluster, MgrCluster):
+class LocalMgrCluster(LocalCephCluster, tasks.mgr.mgr_test_case.MgrClusterBase):
     def __init__(self, ctx):
         super(LocalMgrCluster, self).__init__(ctx)
 
         self.mgr_ids = ctx.daemons.daemons['ceph.mgr'].keys()
         self.mgr_daemons = dict([(id_, LocalDaemon("mgr", id_)) for id_ in self.mgr_ids])
 
+tasks.mgr.mgr_test_case.MgrCluster = LocalMgrCluster
 
-class LocalFilesystem(LocalMDSCluster, Filesystem):
+class LocalFilesystem(LocalMDSCluster, tasks.cephfs.filesystem.FilesystemBase):
     def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False):
         # Deliberately skip calling Filesystem constructor
         LocalMDSCluster.__init__(self, ctx)
@@ -979,15 +987,16 @@ class LocalFilesystem(LocalMDSCluster, Filesystem):
     def set_clients_block(self, blocked, mds_id=None):
         raise NotImplementedError()
 
+tasks.cephfs.filesystem.Filesystem = LocalFilesystem
 
 class LocalCluster(object):
-    def __init__(self, rolename="placeholder"):
+    def __init__(self, rolenames=["mon.","mds.","osd.","mgr."]):
         self.remotes = {
-            LocalRemote(): [rolename]
+            LocalRemote(): rolenames
         }
 
     def only(self, requested):
-        return self.__class__(rolename=requested)
+        return self.__class__(rolenames=[requested])
 
     def run(self, *args, **kwargs):
         r = []
@@ -1000,15 +1009,20 @@ class LocalContext(object):
     def __init__(self):
         FSID = remote.run(args=[os.path.join(BIN_PREFIX, 'ceph'), 'fsid'],
                           stdout=StringIO()).stdout.getvalue()
+        from teuthology.run import get_summary
 
         cluster_name = 'ceph'
         self.archive = "./"
         self.config = {'cluster': cluster_name}
-        self.ceph = {cluster_name: Namespace()}
-        self.ceph[cluster_name].fsid = FSID
+        cluster_namespace = Namespace()
+        cluster_namespace.fsid = FSID
+        cluster_namespace.thrashers = []
+        self.ceph = {cluster_name: cluster_namespace}
         self.teuthology_config = teuth_config
         self.cluster = LocalCluster()
         self.daemons = DaemonGroup()
+
+        self.summary = get_summary("vstart_runner", None)
         if not hasattr(self, 'managers'):
             self.managers = {}
         self.managers[self.config['cluster']] = LocalCephManager(ctx=self)
@@ -1272,6 +1286,12 @@ def launch_entire_suite(overall_suite):
     return testrunner.run(overall_suite)
 
 
+import enum
+
+class Mode(enum.Enum):
+    unittest = enum.auto()
+    config = enum.auto()
+
 def exec_test():
     # Parse arguments
     global opt_interactive_on_error
@@ -1291,12 +1311,19 @@ def exec_test():
     opt_rotate_logs = False
     global opt_exit_on_test_failure
     opt_exit_on_test_failure = True
+    mode = Mode.unittest
 
     args = sys.argv[1:]
     flags = [a for a in args if a.startswith("-")]
     modules = [a for a in args if not a.startswith("-")]
     for f in flags:
-        if f == "--interactive":
+        if f == '-':
+            # using `-` here as a module name for the --config-mode
+            # In config mode modules are config paths,
+            # and `-` means reading the config from stdin
+            # This won't mean much for the unit test mode, but it will fail quickly.
+            modules.append("-")
+        elif f == "--interactive":
             opt_interactive_on_error = True
         elif f == "--create":
             opt_create_cluster = True
@@ -1334,10 +1361,16 @@ def exec_test():
             opt_exit_on_test_failure = False
         elif f == '--debug':
             log.setLevel(logging.DEBUG)
+        elif f == '--config-mode':
+            mode = Mode.config
         else:
             log.error("Unknown option '{0}'".format(f))
             sys.exit(-1)
 
+    if mode == Mode.config and (opt_create_cluster or opt_create_cluster_only):
+        log.error("Incompatible options: --config-mode and --create*")
+        sys.exit(-1)
+
     # Help developers by stopping up-front if their tree isn't built enough for all the
     # tools that the tests might want to use (add more here if needed)
     require_binaries = ["ceph-dencoder", "cephfs-journal-tool", "cephfs-data-scan",
@@ -1350,23 +1383,25 @@ def exec_test():
         sys.exit(-1)
 
     max_required_mds, max_required_clients, \
-            max_required_mgr, require_memstore = scan_tests(modules)
+            max_required_mgr, require_memstore = scan_tests(modules) if mode == Mode.unittest else (1, 1, False, False)
+    # in the config mode we rely on a manually setup vstart cluster
 
     global remote
     remote = LocalRemote()
 
-    CephFSMount.cleanup_stale_netnses_and_bridge(remote)
+    tasks.cephfs.mount.CephFSMountBase.cleanup_stale_netnses_and_bridge(remote)
 
-    # Tolerate no MDSs or clients running at start
-    ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())],
-                        stdout=StringIO()).stdout.getvalue().strip()
-    lines = ps_txt.split("\n")[1:]
-    for line in lines:
-        if 'ceph-fuse' in line or 'ceph-mds' in line:
-            pid = int(line.split()[0])
-            log.warning("Killing stray process {0}".format(line))
-            remote.run(args=f'sudo kill -{signal.SIGKILL.value} {pid}',
-                       omit_sudo=False)
+    if mode == Mode.unittest:
+        # Tolerate no MDSs or clients running at start
+        ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())],
+                            stdout=StringIO()).stdout.getvalue().strip()
+        lines = ps_txt.split("\n")[1:]
+        for line in lines:
+            if 'ceph-fuse' in line or 'ceph-mds' in line:
+                pid = int(line.split()[0])
+                log.warning("Killing stray process {0}".format(line))
+                remote.run(args=f'sudo kill -{signal.SIGKILL.value} {pid}',
+                        omit_sudo=False)
 
     # Fire up the Ceph cluster if the user requested it
     if opt_create_cluster or opt_create_cluster_only:
@@ -1486,6 +1521,10 @@ def exec_test():
     import teuthology.packaging
     teuthology.packaging.get_package_version = _get_package_version
 
+    if mode == Mode.config:
+        run_configs(modules)
+        return
+
     overall_suite = load_tests(modules, decorating_loader)
 
     # Filter out tests that don't lend themselves to interactive running,
@@ -1521,7 +1560,7 @@ def exec_test():
     overall_suite = load_tests(modules, loader.TestLoader())
     result = launch_tests(overall_suite)
 
-    CephFSMount.cleanup_stale_netnses_and_bridge(remote)
+    tasks.cephfs.mount.CephFSMountBase.cleanup_stale_netnses_and_bridge(remote)
     if opt_teardown_cluster:
         teardown_cluster()
 
@@ -1541,6 +1580,14 @@ def exec_test():
     else:
         sys.exit(0)
 
+def run_configs(configs):
+    from teuthology.run import setup_config, run_tasks
+
+    config = setup_config(configs)
+    ctx = LocalContext()
+    tasks = config['tasks']
+    run_tasks(tasks, ctx)
+    sys.exit(0 if ctx.summary['success'] else 1)
 
 if __name__ == "__main__":
     exec_test()