From: Adam King Date: Wed, 12 Feb 2025 19:27:20 +0000 (-0500) Subject: mgr/cephadm: add "blocking_daemon_hosts" mechanism to scheduler X-Git-Tag: testing/wip-vshankar-testing-20250311.100342-debug~3^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=838b1f8df1de132b65dfe77786ba7b6804d39aae;p=ceph-ci.git mgr/cephadm: add "blocking_daemon_hosts" mechanism to scheduler The idea is the existence of certain daemons on certain hosts should block the deployment of daemons for whatever spec we're currently applying on those same hosts. This is initially being added to support the fact we should not be deploying nvmeof daemons on hosts that have nvmeof daemons with a different "group" parameter Signed-off-by: Adam King --- diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 04d3712c50a..5cd68299ed4 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -153,6 +153,7 @@ class HostAssignment(object): primary_daemon_type: Optional[str] = None, per_host_daemon_type: Optional[str] = None, rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] = None, + blocking_daemon_hosts: Optional[List[orchestrator.HostSpec]] = None, ): assert spec self.spec = spec # type: ServiceSpec @@ -160,6 +161,7 @@ class HostAssignment(object): self.hosts: List[orchestrator.HostSpec] = hosts self.unreachable_hosts: List[orchestrator.HostSpec] = unreachable_hosts self.draining_hosts: List[orchestrator.HostSpec] = draining_hosts + self.blocking_daemon_hosts: List[orchestrator.HostSpec] = blocking_daemon_hosts or [] self.filter_new_host = filter_new_host self.service_name = spec.service_name() self.daemons = daemons @@ -333,10 +335,28 @@ class HostAssignment(object): existing = existing_active + existing_standby # build to_add + blocking_daemon_hostnames = [ + h.hostname for h in self.blocking_daemon_hosts + ] + unreachable_hostnames = [ + h.hostname for h in self.unreachable_hosts + ] if not count: - to_add = [dd for dd in others if dd.hostname not in [ - h.hostname for h in self.unreachable_hosts]] + to_add = [ + dd for dd in others if ( + dd.hostname not in blocking_daemon_hostnames + and dd.hostname not in unreachable_hostnames + ) + ] else: + if blocking_daemon_hostnames: + to_remove.extend([ + dd for dd in existing if dd.hostname in blocking_daemon_hostnames + ]) + existing = [ + dd for dd in existing if dd.hostname not in blocking_daemon_hostnames + ] + # The number of new slots that need to be selected in order to fulfill count need = count - len(existing) @@ -356,7 +376,7 @@ class HostAssignment(object): for dp in matching_dps: if need <= 0: break - if dp.hostname in related_service_hosts and dp.hostname not in [h.hostname for h in self.unreachable_hosts]: + if dp.hostname in related_service_hosts and dp.hostname not in unreachable_hostnames: logger.debug(f'Preferring {dp.hostname} for service {self.service_name} as related daemons have been placed there') to_add.append(dp) need -= 1 # this is last use of need so it can work as a counter @@ -370,7 +390,10 @@ class HostAssignment(object): for dp in others: if need <= 0: break - if dp.hostname not in [h.hostname for h in self.unreachable_hosts]: + if ( + dp.hostname not in unreachable_hostnames + and dp.hostname not in blocking_daemon_hostnames + ): to_add.append(dp) need -= 1 # this is last use of need in this function so it can work as a counter diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index d3a715a146c..b4702e2aca6 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -755,6 +755,8 @@ class CephadmServe: svc = service_registry.get_service(service_type) daemons = self.mgr.cache.get_daemons_by_service(service_name) + + blocking_daemon_hosts = svc.get_blocking_daemon_hosts(service_name) related_service_daemons = self.mgr.cache.get_related_service_daemons(spec) public_networks: List[str] = [] @@ -824,6 +826,7 @@ class CephadmServe: ) == 'agent' else self.mgr.cache.get_schedulable_hosts(), unreachable_hosts=self.mgr.cache.get_unreachable_hosts(), draining_hosts=self.mgr.cache.get_draining_hosts(), + blocking_daemon_hosts=blocking_daemon_hosts, daemons=daemons, related_service_daemons=related_service_daemons, networks=self.mgr.cache.networks, diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 765e7eb5b94..ade536c2a59 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -23,7 +23,12 @@ from ceph.deployment.service_spec import ( ) from ceph.deployment.utils import is_ipv6, unwrap_ipv6 from mgr_util import build_url, merge_dicts -from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus +from orchestrator import ( + OrchestratorError, + DaemonDescription, + DaemonDescriptionStatus, + HostSpec +) from orchestrator._interface import daemon_type_to_service from cephadm import utils from .service_registry import register_cephadm_service @@ -581,6 +586,9 @@ class CephadmService(metaclass=ABCMeta): """ return False + def get_blocking_daemon_hosts(self, service_name: str) -> List[HostSpec]: + return [] + class CephService(CephadmService):