]> git-server-git.apps.pok.os.sepia.ceph.com Git - teuthology.git/commitdiff
orchestra: add CephDaemonUnit daemonstate handler
authorSage Weil <sage@redhat.com>
Tue, 12 Nov 2019 18:42:37 +0000 (18:42 +0000)
committerSage Weil <sage@redhat.com>
Wed, 13 Nov 2019 15:33:47 +0000 (15:33 +0000)
This is far from complete, but it's sufficient to get a cluster up and
to start + stop daemons.

Signed-off-by: Sage Weil <sage@redhat.com>
teuthology/orchestra/daemon/cephdaemonunit.py [new file with mode: 0644]
teuthology/orchestra/daemon/group.py
teuthology/orchestra/daemon/state.py

diff --git a/teuthology/orchestra/daemon/cephdaemonunit.py b/teuthology/orchestra/daemon/cephdaemonunit.py
new file mode 100644 (file)
index 0000000..2db48ed
--- /dev/null
@@ -0,0 +1,117 @@
+import logging
+
+from teuthology.orchestra.daemon.state import DaemonState
+
+log = logging.getLogger(__name__)
+
+class CephDaemonUnit(DaemonState):
+    def __init__(self, remote, role, id_, *command_args,
+                 **command_kwargs):
+        super(CephDaemonUnit, self).__init__(
+            remote, role, id_, *command_args, **command_kwargs)
+        self._set_commands()
+        self.log = command_kwargs.get('logger', log)
+        self.is_started = command_kwargs.get('started', False)
+
+    def name(self):
+        return '%s.%s' % (self.type_, self.id_)
+
+    def _get_systemd_cmd(self, action):
+        return ' '.join([
+            'sudo', 'systemctl',
+            action,
+            'ceph-%s@%s.%s' % (self.fsid, self.type_, self.id_),
+        ])
+
+    def _set_commands(self):
+        self.start_cmd = self._get_systemd_cmd('start')
+        self.stop_cmd = self._get_systemd_cmd('stop')
+        self.restart_cmd = self._get_systemd_cmd('restart')
+        self.show_cmd = self._get_systemd_cmd('show')
+        self.status_cmd = self._get_systemd_cmd('status')
+
+    def reset(self):
+        """
+        Does nothing in this implementation
+        """
+        pass
+
+    def restart(self, *args, **kwargs):
+        """
+        Restart with a new command passed in the arguments
+
+        :param args: positional arguments passed to remote.run
+        :param kwargs: keyword arguments passed to remote.run
+        """
+        if not self.running():
+            self.log.info('Restarting %s (starting--it wasn\'t running)...' % self.name())
+            self.remote.sh(self.start_cmd)
+        else:
+            self.log.info('Restarting %s...' % self.name())
+            self.remote.sh(self.restart_cmd)
+        self.is_started = True
+
+    def restart_with_args(self, extra_args):
+        """
+        Restart, adding new paramaters to the current command.
+
+        :param extra_args: Extra keyword arguments to be added.
+        """
+        raise NotImplementedError
+
+    def running(self):
+        """
+        Are we running?
+        """
+        return self.is_started
+
+    def signal(self, sig, silent=False):
+        """
+        Send a signal to associated remote command
+
+        :param sig: signal to send
+        """
+        raise NotImplementedError
+
+    def start(self, timeout=300):
+        """
+        Start this daemon instance.
+        """
+        if self.running():
+            self.log.warn('Restarting a running daemon')
+            self.restart()
+            return
+        self.remote.run(self.start_cmd)
+
+    def stop(self, timeout=300):
+        """
+        Stop this daemon instance.
+
+        Note: this can raise a CommandFailedError,
+        CommandCrashedError, or ConnectionLostError.
+
+        :param timeout: timeout to pass to orchestra.run.wait()
+        """
+        if not self.running():
+            self.log.error('Tried to stop a non-running daemon')
+            return
+        self.log.info('Stopping %s...' % self.name())
+        self.remote.sh(self.stop_cmd)
+        self.is_started = False
+        self.log.info('Stopped %s' % self.name())
+
+    # FIXME why are there two wait methods?
+    def wait(self, timeout=300):
+        """
+        Wait for daemon to exit
+
+        Wait for daemon to stop (but don't trigger the stop).  Pass up
+        any exception.  Mark the daemon as not running.
+        """
+        raise NotImplementedError
+
+    def wait_for_exit(self):
+        """
+        clear remote run command value after waiting for exit.
+        """
+        raise NotImplementedError
index 1e2ee9c9fbe2d51ffc8240eeec6c0ebd6ef8ba84..990f34ab99f17c789bc4a267c49398c0cc15dc3e 100644 (file)
@@ -1,13 +1,14 @@
 from teuthology import misc
 from teuthology.orchestra.daemon.state import DaemonState
 from teuthology.orchestra.daemon.systemd import SystemDState
+from teuthology.orchestra.daemon.cephdaemonunit import CephDaemonUnit
 
 
 class DaemonGroup(object):
     """
     Collection of daemon state instances
     """
-    def __init__(self, use_systemd=False):
+    def __init__(self, use_systemd=False, use_ceph_daemon=False):
         """
         self.daemons is a dictionary indexed by role.  Each entry is a
         dictionary of DaemonState values indexed by an id parameter.
@@ -18,6 +19,7 @@ class DaemonGroup(object):
         """
         self.daemons = {}
         self.use_systemd = use_systemd
+        self.use_ceph_daemon = use_ceph_daemon
 
     def add_daemon(self, remote, type_, id_, *args, **kwargs):
         """
@@ -57,13 +59,16 @@ class DaemonGroup(object):
         if id_ in self.daemons[role]:
             self.daemons[role][id_].stop()
             self.daemons[role][id_] = None
+
         klass = DaemonState
-        if remote.init_system == 'systemd':
+        if self.use_ceph_daemon:
+            klass = CephDaemonUnit
+        elif self.use_systemd and \
+             not any(map(lambda i: i == 'valgrind', args)) and \
+             remote.init_system == 'systemd':
             # We currently cannot use systemd and valgrind together because
             # it would require rewriting the unit files
-            if self.use_systemd and \
-                    not any(map(lambda i: i == 'valgrind', args)):
-                klass = SystemDState
+            klass = SystemDState
         self.daemons[role][id_] = klass(
             remote, role, id_, *args, **kwargs)
 
index 51e2703b3122d9d254919ba75af7613f3ea04e99..3a6a17213d0c70dac390ba9d483c0a346af90300 100644 (file)
@@ -28,6 +28,7 @@ class DaemonState(object):
         self.cluster, self.type_ = self.role.split('.')[0:2]
         self.id_ = id_
         self.log = command_kwargs.get('logger', log)
+        self.fsid = command_kwargs.get('fsid')
         self.proc = None
 
     def check_status(self):