Basically, if you have a placement that explicitly defines the hosts
to place on, and then add _no_schedule label to one of the hosts (which
should cause all daemons to be removed from the host) cpehadm will simply
fail to apply the spec, saying the host with the _no_schedule label is "Unknown".
This is due to the fact that we remove hosts with the _no_schedule label from
the pool of hosts the scheduler has to work with entirely. If we also provide
the scheduler with a list of currently draining hosts, it can handle this
better and the daemon can be drained off the host as expected.
Fixes: https://tracker.ceph.com/issues/56972
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit
7e8c07a3dd998dd3745b7f36919a21ca613484e4)
h for h in self.mgr.inventory.all_specs() if '_no_schedule' not in h.labels
]
+ def get_draining_hosts(self) -> List[HostSpec]:
+ """
+ Returns all hosts that have _no_schedule label and therefore should have
+ no daemons placed on them, but are potentially still reachable
+ """
+ return [
+ h for h in self.mgr.inventory.all_specs() if '_no_schedule' in h.labels
+ ]
+
def get_unreachable_hosts(self) -> List[HostSpec]:
"""
Return all hosts that are offline or in maintenance mode.
spec=spec,
hosts=self.mgr.inventory.all_specs(),
unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
+ draining_hosts=self.mgr.cache.get_draining_hosts(),
daemons=existing_daemons,
).place()
spec=spec,
hosts=self.cache.get_schedulable_hosts(),
unreachable_hosts=self.cache.get_unreachable_hosts(),
+ draining_hosts=self.cache.get_draining_hosts(),
networks=self.cache.networks,
daemons=self.cache.get_daemons_by_service(spec.service_name()),
allow_colo=svc.allow_colo(),
spec=spec,
hosts=self.inventory.all_specs(), # All hosts, even those without daemon refresh
unreachable_hosts=self.cache.get_unreachable_hosts(),
+ draining_hosts=self.cache.get_draining_hosts(),
networks=self.cache.networks,
daemons=self.cache.get_daemons_by_service(spec.service_name()),
allow_colo=self.cephadm_services[spec.service_type].allow_colo(),
spec, # type: ServiceSpec
hosts: List[orchestrator.HostSpec],
unreachable_hosts: List[orchestrator.HostSpec],
+ draining_hosts: List[orchestrator.HostSpec],
daemons: List[orchestrator.DaemonDescription],
networks: Dict[str, Dict[str, Dict[str, List[str]]]] = {},
filter_new_host=None, # type: Optional[Callable[[str],bool]]
self.primary_daemon_type = primary_daemon_type or spec.service_type
self.hosts: List[orchestrator.HostSpec] = hosts
self.unreachable_hosts: List[orchestrator.HostSpec] = unreachable_hosts
+ self.draining_hosts: List[orchestrator.HostSpec] = draining_hosts
self.filter_new_host = filter_new_host
self.service_name = spec.service_name()
self.daemons = daemons
if self.spec.placement.hosts:
explicit_hostnames = {h.hostname for h in self.spec.placement.hosts}
- unknown_hosts = explicit_hostnames.difference(set(self.get_hostnames()))
+ known_hosts = self.get_hostnames() + [h.hostname for h in self.draining_hosts]
+ unknown_hosts = explicit_hostnames.difference(set(known_hosts))
if unknown_hosts:
raise OrchestratorValidationError(
f'Cannot place {self.spec.one_line_str()} on {", ".join(sorted(unknown_hosts))}: Unknown hosts')
DaemonPlacement(daemon_type=self.primary_daemon_type,
hostname=h.hostname, network=h.network, name=h.name,
ports=self.ports_start)
- for h in self.spec.placement.hosts
+ for h in self.spec.placement.hosts if h.hostname not in [dh.hostname for dh in self.draining_hosts]
]
elif self.spec.placement.label:
ls = [
hosts=self.mgr.cache.get_non_draining_hosts() if spec.service_name(
) == 'agent' else self.mgr.cache.get_schedulable_hosts(),
unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
+ draining_hosts=self.mgr.cache.get_draining_hosts(),
daemons=daemons,
networks=self.mgr.cache.networks,
filter_new_host=(
spec=ServiceSpec('mon', placement=pspec),
hosts=self.mgr.cache.get_schedulable_hosts(),
unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
+ draining_hosts=self.mgr.cache.get_draining_hosts(),
daemons=[],
networks=self.mgr.cache.networks,
)
spec=ServiceSpec('mon', placement=ks.placement),
hosts=self.mgr.cache.get_schedulable_hosts(),
unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
+ draining_hosts=self.mgr.cache.get_draining_hosts(),
daemons=[],
networks=self.mgr.cache.networks,
)
spec=spec,
hosts=hosts,
unreachable_hosts=[],
+ draining_hosts=[],
daemons=daemons,
).place()
if isinstance(host_res, list):
spec=spec,
hosts=hosts,
unreachable_hosts=[],
+ draining_hosts=[],
daemons=daemons
).place()
spec=spec,
hosts=[HostSpec(h, labels=['foo']) for h in hosts],
unreachable_hosts=[],
+ draining_hosts=[],
daemons=daemons,
allow_colo=allow_colo,
rank_map=rank_map,
spec=spec,
hosts=[HostSpec(h, labels=['foo']) for h in available_hosts],
unreachable_hosts=[],
+ draining_hosts=[],
daemons=[],
allow_colo=allow_colo,
).get_candidates()
spec=ServiceSpec(service_type, placement=placement),
hosts=[HostSpec(h, labels=['foo']) for h in hosts],
unreachable_hosts=[],
+ draining_hosts=[],
daemons=daemons,
).place()
assert len(hosts) == expected_len
spec=ServiceSpec(service_type, placement=placement),
hosts=[HostSpec(h) for h in hosts],
unreachable_hosts=[],
+ draining_hosts=[],
daemons=daemons,
).place()
assert len(hosts) == expected_len
spec=spec,
hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()],
unreachable_hosts=[],
+ draining_hosts=[],
daemons=daemons,
allow_colo=True,
networks=networks,
spec=ServiceSpec(service_type, placement=placement),
hosts=[HostSpec(h) for h in hosts],
unreachable_hosts=[],
+ draining_hosts=[],
daemons=daemons,
).place()
assert str(e.value) == expected
spec=spec,
hosts=[HostSpec(h) for h in hosts],
unreachable_hosts=[],
+ draining_hosts=[],
daemons=daemons,
).place()
assert sorted([h.hostname for h in hosts]) in expected
spec=spec,
hosts=[HostSpec(h) for h in hosts],
unreachable_hosts=[HostSpec(h) for h in unreachable_hosts],
+ draining_hosts=[],
daemons=daemons,
).place()
assert sorted([h.hostname for h in to_add]) in expected_add
spec=spec,
hosts=host_specs,
unreachable_hosts=[h for h in host_specs if h.status],
+ draining_hosts=[],
daemons=daemons,
).place()
assert sorted([h.hostname for h in to_add]) in expected_add
def get_unreachable_hosts(self):
return self.unreachable_hosts
+ def get_draining_hosts(self):
+ return []
+
@property
def networks(self):
return {h: {'a': {'b': ['c']}} for h in self.hosts}
'crash', placement=profile.placement),
hosts=self.mgr.cache.get_schedulable_hosts(),
unreachable_hosts=self.mgr.cache.get_unreachable_hosts(),
+ draining_hosts=self.mgr.cache.get_draining_hosts(),
daemons=[],
networks=self.mgr.cache.networks,
)