]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: add "blocking_daemon_hosts" mechanism to scheduler
authorAdam King <adking@redhat.com>
Wed, 12 Feb 2025 19:27:20 +0000 (14:27 -0500)
committerAdam King <adking@redhat.com>
Wed, 12 Feb 2025 21:51:55 +0000 (16:51 -0500)
The idea is the existence of certain daemons on certain hosts
should block the deployment of daemons for whatever spec we're
currently applying on those same hosts. This is initially being
added to support the fact we should not be deploying nvmeof daemons
on hosts that have nvmeof daemons with a different "group" parameter

Signed-off-by: Adam King <adking@redhat.com>
src/pybind/mgr/cephadm/schedule.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/cephadmservice.py

index 04d3712c50ac4cbb30a1b725cb9b9314a0e6fbe4..5cd68299ed42253b0c8f2adc21849b3dbfe772a9 100644 (file)
@@ -153,6 +153,7 @@ class HostAssignment(object):
                  primary_daemon_type: Optional[str] = None,
                  per_host_daemon_type: Optional[str] = None,
                  rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] = None,
+                 blocking_daemon_hosts: Optional[List[orchestrator.HostSpec]] = None,
                  ):
         assert spec
         self.spec = spec  # type: ServiceSpec
@@ -160,6 +161,7 @@ class HostAssignment(object):
         self.hosts: List[orchestrator.HostSpec] = hosts
         self.unreachable_hosts: List[orchestrator.HostSpec] = unreachable_hosts
         self.draining_hosts: List[orchestrator.HostSpec] = draining_hosts
+        self.blocking_daemon_hosts: List[orchestrator.HostSpec] = blocking_daemon_hosts or []
         self.filter_new_host = filter_new_host
         self.service_name = spec.service_name()
         self.daemons = daemons
@@ -333,10 +335,28 @@ class HostAssignment(object):
         existing = existing_active + existing_standby
 
         # build to_add
+        blocking_daemon_hostnames = [
+            h.hostname for h in self.blocking_daemon_hosts
+        ]
+        unreachable_hostnames = [
+            h.hostname for h in self.unreachable_hosts
+        ]
         if not count:
-            to_add = [dd for dd in others if dd.hostname not in [
-                h.hostname for h in self.unreachable_hosts]]
+            to_add = [
+                dd for dd in others if (
+                    dd.hostname not in blocking_daemon_hostnames
+                    and dd.hostname not in unreachable_hostnames
+                )
+            ]
         else:
+            if blocking_daemon_hostnames:
+                to_remove.extend([
+                    dd for dd in existing if dd.hostname in blocking_daemon_hostnames
+                ])
+                existing = [
+                    dd for dd in existing if dd.hostname not in blocking_daemon_hostnames
+                ]
+
             # The number of new slots that need to be selected in order to fulfill count
             need = count - len(existing)
 
@@ -356,7 +376,7 @@ class HostAssignment(object):
                 for dp in matching_dps:
                     if need <= 0:
                         break
-                    if dp.hostname in related_service_hosts and dp.hostname not in [h.hostname for h in self.unreachable_hosts]:
+                    if dp.hostname in related_service_hosts and dp.hostname not in unreachable_hostnames:
                         logger.debug(f'Preferring {dp.hostname} for service {self.service_name} as related daemons have been placed there')
                         to_add.append(dp)
                         need -= 1  # this is last use of need so it can work as a counter
@@ -370,7 +390,10 @@ class HostAssignment(object):
             for dp in others:
                 if need <= 0:
                     break
-                if dp.hostname not in [h.hostname for h in self.unreachable_hosts]:
+                if (
+                    dp.hostname not in unreachable_hostnames
+                    and dp.hostname not in blocking_daemon_hostnames
+                ):
                     to_add.append(dp)
                     need -= 1  # this is last use of need in this function so it can work as a counter
 
index d3a715a146c2e1af3d6c0757db2b23282ea2bc87..b4702e2aca60e105daa550a36930f1b58a7a3041 100644 (file)
@@ -755,6 +755,8 @@ class CephadmServe:
 
         svc = service_registry.get_service(service_type)
         daemons = self.mgr.cache.get_daemons_by_service(service_name)
+
+        blocking_daemon_hosts = svc.get_blocking_daemon_hosts(service_name)
         related_service_daemons = self.mgr.cache.get_related_service_daemons(spec)
 
         public_networks: List[str] = []
@@ -824,6 +826,7 @@ class CephadmServe:
             ) == 'agent' else self.mgr.cache.get_schedulable_hosts(),
             unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
             draining_hosts=self.mgr.cache.get_draining_hosts(),
+            blocking_daemon_hosts=blocking_daemon_hosts,
             daemons=daemons,
             related_service_daemons=related_service_daemons,
             networks=self.mgr.cache.networks,
index 765e7eb5b9453607e29a9e2013727b93e132945e..ade536c2a59fe75b62af324c3926b4906a815225 100644 (file)
@@ -23,7 +23,12 @@ from ceph.deployment.service_spec import (
 )
 from ceph.deployment.utils import is_ipv6, unwrap_ipv6
 from mgr_util import build_url, merge_dicts
-from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus
+from orchestrator import (
+    OrchestratorError,
+    DaemonDescription,
+    DaemonDescriptionStatus,
+    HostSpec
+)
 from orchestrator._interface import daemon_type_to_service
 from cephadm import utils
 from .service_registry import register_cephadm_service
@@ -581,6 +586,9 @@ class CephadmService(metaclass=ABCMeta):
         """
         return False
 
+    def get_blocking_daemon_hosts(self, service_name: str) -> List[HostSpec]:
+        return []
+
 
 class CephService(CephadmService):