]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
orchestra/daemon/cephdaemonunit: log each daemon's stdout/stderr
authorSage Weil <sage@redhat.com>
Wed, 13 Nov 2019 00:00:41 +0000 (00:00 +0000)
committerSage Weil <sage@redhat.com>
Wed, 13 Nov 2019 15:33:47 +0000 (15:33 +0000)
This provides parity with the legacy ceph-daemon-tool behavior.

Signed-off-by: Sage Weil <sage@redhat.com>
teuthology/orchestra/daemon/cephdaemonunit.py
teuthology/orchestra/daemon/group.py

index 2db48ed6e2892ef7094a4e61e3d20ff9af696c28..e92a423c7c9b75547912bab01a1c86e48c42af48 100644 (file)
@@ -11,7 +11,10 @@ class CephDaemonUnit(DaemonState):
             remote, role, id_, *command_args, **command_kwargs)
         self._set_commands()
         self.log = command_kwargs.get('logger', log)
+        self.use_ceph_daemon = command_kwargs.get('use_ceph_daemon')
         self.is_started = command_kwargs.get('started', False)
+        if self.is_started:
+            self._start_logger()
 
     def name(self):
         return '%s.%s' % (self.type_, self.id_)
@@ -30,6 +33,21 @@ class CephDaemonUnit(DaemonState):
         self.show_cmd = self._get_systemd_cmd('show')
         self.status_cmd = self._get_systemd_cmd('status')
 
+    def _start_logger(self):
+        name = '%s.%s' % (self.type_, self.id_)
+        self.remote_logger = self.remote.run(
+            args=['sudo', self.use_ceph_daemon, 'logs',
+                  '-f',
+                  '--fsid', self.fsid,
+                  '--name', name],
+            logger=logging.getLogger(self.cluster + '.' + name),
+            label=name,
+            wait=False)
+
+    def _join_logger(self):
+        self.remote_logger.wait()
+        self.remote_logger = None
+
     def reset(self):
         """
         Does nothing in this implementation
@@ -46,10 +64,13 @@ class CephDaemonUnit(DaemonState):
         if not self.running():
             self.log.info('Restarting %s (starting--it wasn\'t running)...' % self.name())
             self.remote.sh(self.start_cmd)
+            self._start_logger()
+            self.is_started = True
         else:
             self.log.info('Restarting %s...' % self.name())
             self.remote.sh(self.restart_cmd)
-        self.is_started = True
+            self._join_logger()
+            self._start_logger()
 
     def restart_with_args(self, extra_args):
         """
@@ -82,6 +103,7 @@ class CephDaemonUnit(DaemonState):
             self.restart()
             return
         self.remote.run(self.start_cmd)
+        self._start_logger()
 
     def stop(self, timeout=300):
         """
@@ -98,6 +120,7 @@ class CephDaemonUnit(DaemonState):
         self.log.info('Stopping %s...' % self.name())
         self.remote.sh(self.stop_cmd)
         self.is_started = False
+        self._join_logger()
         self.log.info('Stopped %s' % self.name())
 
     # FIXME why are there two wait methods?
index 990f34ab99f17c789bc4a267c49398c0cc15dc3e..cbeb3cc4fbd76b924cefb9c40769e5149f6b5abc 100644 (file)
@@ -8,7 +8,7 @@ class DaemonGroup(object):
     """
     Collection of daemon state instances
     """
-    def __init__(self, use_systemd=False, use_ceph_daemon=False):
+    def __init__(self, use_systemd=False, use_ceph_daemon=None):
         """
         self.daemons is a dictionary indexed by role.  Each entry is a
         dictionary of DaemonState values indexed by an id parameter.
@@ -63,6 +63,7 @@ class DaemonGroup(object):
         klass = DaemonState
         if self.use_ceph_daemon:
             klass = CephDaemonUnit
+            kwargs['use_ceph_daemon'] = self.use_ceph_daemon
         elif self.use_systemd and \
              not any(map(lambda i: i == 'valgrind', args)) and \
              remote.init_system == 'systemd':