]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: extend stray service detection with a general ignore hook
authorJohn Mulligan <phlogistonjohn@asynchrono.us>
Mon, 12 Aug 2024 14:56:36 +0000 (10:56 -0400)
committerJohn Mulligan <jmulligan@redhat.com>
Tue, 20 Aug 2024 13:53:57 +0000 (09:53 -0400)
Extend the system's current stray service detection with a new method on
the service classes so that new classes can hook into the stray services
in the case that ceph services and cephadm services have differing names
or use subsystems that call into ceph with different names (my use
case).

Signed-off-by: John Mulligan <phlogistonjohn@asynchrono.us>
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/cephadmservice.py

index d95f44dc81846dad675f323ede9e06adb1174e0a..822194e117cbd0323f1dcb1d45bc8e5922ec7312 100644 (file)
@@ -482,7 +482,9 @@ class CephadmServe:
         if self.mgr.warn_on_stray_hosts or self.mgr.warn_on_stray_daemons:
             ls = self.mgr.list_servers()
             self.log.debug(ls)
-            managed = self.mgr.cache.get_daemon_names()
+            managed_daemons = self.mgr.cache.get_daemons()
+            stray_filter = self._build_stray_filter(managed_daemons)
+            managed = [d.name() for d in managed_daemons]
             host_detail = []     # type: List[str]
             host_num_daemons = 0
             daemon_detail = []  # type: List[str]
@@ -496,11 +498,7 @@ class CephadmServe:
                     daemon_id = s.get('id')
                     assert daemon_id
                     name = self._service_reference_name(s.get('type'), daemon_id)
-                    if s.get('type') == 'tcmu-runner':
-                        # because we don't track tcmu-runner daemons in the host cache
-                        # and don't have a way to check if the daemon is part of iscsi service
-                        # we assume that all tcmu-runner daemons are managed by cephadm
-                        managed.append(name)
+                    managed.extend(stray_filter(s.get('type'), daemon_id, name))
                     # Don't mark daemons we just created/removed in the last minute as stray.
                     # It may take some time for the mgr to become aware the daemon
                     # had been created/removed.
@@ -544,6 +542,31 @@ class CephadmServe:
             )
         return name
 
+    def _build_stray_filter(
+        self, managed: List[orchestrator.DaemonDescription]
+    ) -> Callable[[str, str, str], List[str]]:
+        svcs = {
+            daemon_type_to_service(cast(str, dd.daemon_type))
+            for dd in managed
+        }
+        _services = [self.mgr.cephadm_services[dt] for dt in svcs]
+
+        def _filter(
+            service_type: str, daemon_id: str, name: str
+        ) -> List[str]:
+            if service_type == 'tcmu-runner':
+                # because we don't track tcmu-runner daemons in the host cache
+                # and don't have a way to check if the daemon is part of iscsi service
+                # we assume that all tcmu-runner daemons are managed by cephadm
+                return [name]
+            out = []
+            for svc in _services:
+                if svc.ignore_possible_stray(service_type, daemon_id, name):
+                    out.append(name)
+            return out
+
+        return _filter
+
     def _check_for_moved_osds(self) -> None:
         self.log.debug('_check_for_moved_osds')
         all_osds: DefaultDict[int, List[orchestrator.DaemonDescription]] = defaultdict(list)
index 8a41d3a54c2db1db27fc9a52ae838d8a0c0e44a0..f7a360b7243ab288fe5b9393111568d93aaf3774 100644 (file)
@@ -564,6 +564,16 @@ class CephadmService(metaclass=ABCMeta):
         """Called to carry out any purge tasks following service removal"""
         logger.debug(f'Purge called for {self.TYPE} - no action taken')
 
+    def ignore_possible_stray(
+        self, service_type: str, daemon_id: str, name: str
+    ) -> bool:
+        """Called to decide if a possible stray service should be ignored
+        because it "virtually" belongs to a service.
+        This is mainly needed when properly managed services spawn layered ceph
+        services with different names (for example).
+        """
+        return False
+
 
 class CephService(CephadmService):
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: