placements, to_add, to_remove = HostAssignment(
spec=spec,
hosts=self.mgr.inventory.all_specs(),
+ unreachable_hosts=self.mgr._unreachable_hosts(),
daemons=existing_daemons,
).place()
h for h in self.inventory.all_specs()
if (
self.cache.host_had_daemon_refresh(h.hostname)
- and h.status.lower() not in ['maintenance', 'offline']
- and h.hostname not in self.offline_hosts
and '_no_schedule' not in h.labels
)
]
+ def _unreachable_hosts(self) -> List[HostSpec]:
+ """
+ Return all hosts that are offline or in maintenance mode.
+
+ The idea is we should not touch the daemons on these hosts (since
+ in theory the hosts are inaccessible so we CAN'T touch them) but
+ we still want to count daemons that exist on these hosts toward the
+ placement so daemons on these hosts aren't just moved elsewhere
+ """
+ return [
+ h for h in self.inventory.all_specs()
+ if (
+ h.status.lower() in ['maintenance', 'offline']
+ or h.hostname in self.offline_hosts
+ )
+ ]
+
def _check_valid_addr(self, host: str, addr: str) -> str:
# make sure hostname is resolvable before trying to make a connection
try:
ha = HostAssignment(
spec=spec,
hosts=self._schedulable_hosts(),
+ unreachable_hosts=self._unreachable_hosts(),
networks=self.cache.networks,
daemons=self.cache.get_daemons_by_service(spec.service_name()),
allow_colo=svc.allow_colo(),
HostAssignment(
spec=spec,
hosts=self.inventory.all_specs(), # All hosts, even those without daemon refresh
+ unreachable_hosts=self._unreachable_hosts(),
networks=self.cache.networks,
daemons=self.cache.get_daemons_by_service(spec.service_name()),
allow_colo=self.cephadm_services[spec.service_type].allow_colo(),
def __init__(self,
spec, # type: ServiceSpec
hosts: List[orchestrator.HostSpec],
+ unreachable_hosts: List[orchestrator.HostSpec],
daemons: List[orchestrator.DaemonDescription],
networks: Dict[str, Dict[str, Dict[str, List[str]]]] = {},
filter_new_host=None, # type: Optional[Callable[[str],bool]]
self.spec = spec # type: ServiceSpec
self.primary_daemon_type = primary_daemon_type or spec.service_type
self.hosts: List[orchestrator.HostSpec] = hosts
+ self.unreachable_hosts: List[orchestrator.HostSpec] = unreachable_hosts
self.filter_new_host = filter_new_host
self.service_name = spec.service_name()
self.daemons = daemons
to_remove.append(dd)
to_add += host_slots
+ to_remove = [d for d in to_remove if d.hostname not in [
+ h.hostname for h in self.unreachable_hosts]]
+
return slots, to_add, to_remove
def place(self):
existing_active: List[orchestrator.DaemonDescription] = []
existing_standby: List[orchestrator.DaemonDescription] = []
existing_slots: List[DaemonPlacement] = []
+ to_add: List[DaemonPlacement] = []
to_remove: List[orchestrator.DaemonDescription] = []
ranks: List[int] = list(range(len(candidates)))
others: List[DaemonPlacement] = candidates.copy()
if not found:
to_remove.append(dd)
+ # TODO: At some point we want to deploy daemons that are on offline hosts
+ # at what point we do this differs per daemon type. Stateless daemons we could
+ # do quickly to improve availability. Steful daemons we might want to wait longer
+ # to see if the host comes back online
+
existing = existing_active + existing_standby
# build to_add
if not count:
- to_add = others
+ to_add = [dd for dd in others if dd.hostname not in [
+ h.hostname for h in self.unreachable_hosts]]
else:
# The number of new slots that need to be selected in order to fulfill count
need = count - len(existing)
del existing_slots[count:]
return self.place_per_host_daemons(existing_slots, [], to_remove)
- if need > 0:
- to_add = others[:need]
+ for dp in others:
+ if need <= 0:
+ break
+ if dp.hostname not in [h.hostname for h in self.unreachable_hosts]:
+ to_add.append(dp)
+ need -= 1 # this is last use of need in this function so it can work as a counter
if self.rank_map is not None:
# assign unused ranks (and rank_generations) to to_add
ha = HostAssignment(
spec=ServiceSpec('mon', placement=pspec),
hosts=self.mgr._schedulable_hosts(),
+ unreachable_hosts=self.mgr._unreachable_hosts(),
daemons=[],
networks=self.mgr.cache.networks,
)
ha = HostAssignment(
spec=ServiceSpec('mon', placement=ks.placement),
hosts=self.mgr._schedulable_hosts(),
+ unreachable_hosts=self.mgr._unreachable_hosts(),
daemons=[],
networks=self.mgr.cache.networks,
)
ha = HostAssignment(
spec=spec,
hosts=self.mgr._schedulable_hosts(),
+ unreachable_hosts=self.mgr._unreachable_hosts(),
daemons=daemons,
networks=self.mgr.cache.networks,
filter_new_host=(
# candidates for scheduling
candidates = [
h.hostname for h in cephadm_module._schedulable_hosts()]
- assert 'test2' not in candidates
- assert 'test3' not in candidates
+ assert 'test2' in candidates
+ assert 'test3' in candidates
+
+ unreachable = [h.hostname for h in cephadm_module._unreachable_hosts()]
+ assert 'test2' in unreachable
+ assert 'test3' in unreachable
with with_service(cephadm_module, ServiceSpec('crash', placement=PlacementSpec(host_pattern='*'))):
# re-apply services. No mgr should be removed from maint/offline hosts
host_res, to_add, to_remove = HostAssignment(
spec=spec,
hosts=hosts,
+ unreachable_hosts=[],
daemons=daemons,
).place()
if isinstance(host_res, list):
host_res, to_add, to_remove = HostAssignment(
spec=spec,
hosts=hosts,
+ unreachable_hosts=[],
daemons=daemons
).place()
all_slots, to_add, to_remove = HostAssignment(
spec=spec,
hosts=[HostSpec(h, labels=['foo']) for h in hosts],
+ unreachable_hosts=[],
daemons=daemons,
allow_colo=allow_colo,
rank_map=rank_map,
hosts, to_add, to_remove = HostAssignment(
spec=ServiceSpec(service_type, placement=placement),
hosts=[HostSpec(h, labels=['foo']) for h in hosts],
+ unreachable_hosts=[],
daemons=daemons,
).place()
assert len(hosts) == expected_len
hosts, to_add, to_remove = HostAssignment(
spec=ServiceSpec(service_type, placement=placement),
hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[],
daemons=daemons,
).place()
assert len(hosts) == expected_len
all_slots, to_add, to_remove = HostAssignment(
spec=spec,
hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()],
+ unreachable_hosts=[],
daemons=daemons,
allow_colo=True,
networks=networks,
hosts, to_add, to_remove = HostAssignment(
spec=ServiceSpec(service_type, placement=placement),
hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[],
daemons=daemons,
).place()
assert str(e.value) == expected
hosts, to_add, to_remove = HostAssignment(
spec=spec,
hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[],
daemons=daemons,
).place()
assert sorted([h.hostname for h in hosts]) in expected
assert sorted([h.hostname for h in to_add]) in expected_add
assert sorted([h.name() for h in to_remove]) in expected_remove
+
+
+class UnreachableHostsTest(NamedTuple):
+ service_type: str
+ placement: PlacementSpec
+ hosts: List[str]
+ unreachables_hosts: List[str]
+ daemons: List[DaemonDescription]
+ expected_add: List[List[str]]
+ expected_remove: List[List[str]]
+
+
+@pytest.mark.parametrize("service_type,placement,hosts,unreachable_hosts,daemons,expected_add,expected_remove",
+ [
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3'.split(),
+ ['host2'],
+ [],
+ [['host1', 'host3']],
+ [[]],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(hosts=['host3']),
+ 'host1 host2 host3'.split(),
+ ['host1'],
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [[]],
+ [['mgr.b']],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3 host4'.split(),
+ ['host1'],
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [[]],
+ [[]],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=1),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host3'.split(),
+ [
+ DaemonDescription('mgr', 'a', 'host1'),
+ DaemonDescription('mgr', 'b', 'host2'),
+ DaemonDescription('mgr', 'c', 'host3', is_active=True),
+ ],
+ [[]],
+ [['mgr.b']],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3 host4'.split(),
+ ['host2'],
+ [],
+ [['host1', 'host3', 'host4']],
+ [[]],
+ ),
+ UnreachableHostsTest(
+ 'mgr',
+ PlacementSpec(count=3),
+ 'host1 host2 host3 host4'.split(),
+ 'host1 host4'.split(),
+ [],
+ [['host2', 'host3']],
+ [[]],
+ ),
+
+ ])
+def test_unreachable_host(service_type, placement, hosts, unreachable_hosts, daemons, expected_add, expected_remove):
+
+ spec = ServiceSpec(service_type=service_type,
+ service_id=None,
+ placement=placement)
+
+ hosts, to_add, to_remove = HostAssignment(
+ spec=spec,
+ hosts=[HostSpec(h) for h in hosts],
+ unreachable_hosts=[HostSpec(h) for h in unreachable_hosts],
+ daemons=daemons,
+ ).place()
+ assert sorted([h.hostname for h in to_add]) in expected_add
+ assert sorted([h.name() for h in to_remove]) in expected_remove