From: Sage Weil Date: Tue, 12 Nov 2019 18:42:37 +0000 (+0000) Subject: orchestra: add CephDaemonUnit daemonstate handler X-Git-Tag: 1.1.0~195^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9fa1736f5ca6b052cc93ba4e14d520c0188957f7;p=teuthology.git orchestra: add CephDaemonUnit daemonstate handler This is far from complete, but it's sufficient to get a cluster up and to start + stop daemons. Signed-off-by: Sage Weil --- diff --git a/teuthology/orchestra/daemon/cephdaemonunit.py b/teuthology/orchestra/daemon/cephdaemonunit.py new file mode 100644 index 00000000..2db48ed6 --- /dev/null +++ b/teuthology/orchestra/daemon/cephdaemonunit.py @@ -0,0 +1,117 @@ +import logging + +from teuthology.orchestra.daemon.state import DaemonState + +log = logging.getLogger(__name__) + +class CephDaemonUnit(DaemonState): + def __init__(self, remote, role, id_, *command_args, + **command_kwargs): + super(CephDaemonUnit, self).__init__( + remote, role, id_, *command_args, **command_kwargs) + self._set_commands() + self.log = command_kwargs.get('logger', log) + self.is_started = command_kwargs.get('started', False) + + def name(self): + return '%s.%s' % (self.type_, self.id_) + + def _get_systemd_cmd(self, action): + return ' '.join([ + 'sudo', 'systemctl', + action, + 'ceph-%s@%s.%s' % (self.fsid, self.type_, self.id_), + ]) + + def _set_commands(self): + self.start_cmd = self._get_systemd_cmd('start') + self.stop_cmd = self._get_systemd_cmd('stop') + self.restart_cmd = self._get_systemd_cmd('restart') + self.show_cmd = self._get_systemd_cmd('show') + self.status_cmd = self._get_systemd_cmd('status') + + def reset(self): + """ + Does nothing in this implementation + """ + pass + + def restart(self, *args, **kwargs): + """ + Restart with a new command passed in the arguments + + :param args: positional arguments passed to remote.run + :param kwargs: keyword arguments passed to remote.run + """ + if not self.running(): + self.log.info('Restarting %s (starting--it wasn\'t running)...' % self.name()) + self.remote.sh(self.start_cmd) + else: + self.log.info('Restarting %s...' % self.name()) + self.remote.sh(self.restart_cmd) + self.is_started = True + + def restart_with_args(self, extra_args): + """ + Restart, adding new paramaters to the current command. + + :param extra_args: Extra keyword arguments to be added. + """ + raise NotImplementedError + + def running(self): + """ + Are we running? + """ + return self.is_started + + def signal(self, sig, silent=False): + """ + Send a signal to associated remote command + + :param sig: signal to send + """ + raise NotImplementedError + + def start(self, timeout=300): + """ + Start this daemon instance. + """ + if self.running(): + self.log.warn('Restarting a running daemon') + self.restart() + return + self.remote.run(self.start_cmd) + + def stop(self, timeout=300): + """ + Stop this daemon instance. + + Note: this can raise a CommandFailedError, + CommandCrashedError, or ConnectionLostError. + + :param timeout: timeout to pass to orchestra.run.wait() + """ + if not self.running(): + self.log.error('Tried to stop a non-running daemon') + return + self.log.info('Stopping %s...' % self.name()) + self.remote.sh(self.stop_cmd) + self.is_started = False + self.log.info('Stopped %s' % self.name()) + + # FIXME why are there two wait methods? + def wait(self, timeout=300): + """ + Wait for daemon to exit + + Wait for daemon to stop (but don't trigger the stop). Pass up + any exception. Mark the daemon as not running. + """ + raise NotImplementedError + + def wait_for_exit(self): + """ + clear remote run command value after waiting for exit. + """ + raise NotImplementedError diff --git a/teuthology/orchestra/daemon/group.py b/teuthology/orchestra/daemon/group.py index 1e2ee9c9..990f34ab 100644 --- a/teuthology/orchestra/daemon/group.py +++ b/teuthology/orchestra/daemon/group.py @@ -1,13 +1,14 @@ from teuthology import misc from teuthology.orchestra.daemon.state import DaemonState from teuthology.orchestra.daemon.systemd import SystemDState +from teuthology.orchestra.daemon.cephdaemonunit import CephDaemonUnit class DaemonGroup(object): """ Collection of daemon state instances """ - def __init__(self, use_systemd=False): + def __init__(self, use_systemd=False, use_ceph_daemon=False): """ self.daemons is a dictionary indexed by role. Each entry is a dictionary of DaemonState values indexed by an id parameter. @@ -18,6 +19,7 @@ class DaemonGroup(object): """ self.daemons = {} self.use_systemd = use_systemd + self.use_ceph_daemon = use_ceph_daemon def add_daemon(self, remote, type_, id_, *args, **kwargs): """ @@ -57,13 +59,16 @@ class DaemonGroup(object): if id_ in self.daemons[role]: self.daemons[role][id_].stop() self.daemons[role][id_] = None + klass = DaemonState - if remote.init_system == 'systemd': + if self.use_ceph_daemon: + klass = CephDaemonUnit + elif self.use_systemd and \ + not any(map(lambda i: i == 'valgrind', args)) and \ + remote.init_system == 'systemd': # We currently cannot use systemd and valgrind together because # it would require rewriting the unit files - if self.use_systemd and \ - not any(map(lambda i: i == 'valgrind', args)): - klass = SystemDState + klass = SystemDState self.daemons[role][id_] = klass( remote, role, id_, *args, **kwargs) diff --git a/teuthology/orchestra/daemon/state.py b/teuthology/orchestra/daemon/state.py index 51e2703b..3a6a1721 100644 --- a/teuthology/orchestra/daemon/state.py +++ b/teuthology/orchestra/daemon/state.py @@ -28,6 +28,7 @@ class DaemonState(object): self.cluster, self.type_ = self.role.split('.')[0:2] self.id_ = id_ self.log = command_kwargs.get('logger', log) + self.fsid = command_kwargs.get('fsid') self.proc = None def check_status(self):