From: Adam King Date: Thu, 5 Aug 2021 14:09:16 +0000 (-0400) Subject: mgr/cephadm: make scheduler able to accomodate offline/maintenance hosts X-Git-Tag: v16.2.6~25^2~7 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b509163076499d705dae596b3e24e24a62f34789;p=ceph.git mgr/cephadm: make scheduler able to accomodate offline/maintenance hosts Fixes: https://tracker.ceph.com/issues/51027 Signed-off-by: Adam King (cherry picked from commit 01a580b3082f83347181bf835b9ef698cecfc015) --- diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py index cf30d15c28e..07b249df78f 100644 --- a/src/pybind/mgr/cephadm/migrations.py +++ b/src/pybind/mgr/cephadm/migrations.py @@ -93,6 +93,7 @@ class Migrations: placements, to_add, to_remove = HostAssignment( spec=spec, hosts=self.mgr.inventory.all_specs(), + unreachable_hosts=self.mgr._unreachable_hosts(), daemons=existing_daemons, ).place() diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index b62b4548675..88d670ecc07 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1392,12 +1392,27 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, h for h in self.inventory.all_specs() if ( self.cache.host_had_daemon_refresh(h.hostname) - and h.status.lower() not in ['maintenance', 'offline'] - and h.hostname not in self.offline_hosts and '_no_schedule' not in h.labels ) ] + def _unreachable_hosts(self) -> List[HostSpec]: + """ + Return all hosts that are offline or in maintenance mode. + + The idea is we should not touch the daemons on these hosts (since + in theory the hosts are inaccessible so we CAN'T touch them) but + we still want to count daemons that exist on these hosts toward the + placement so daemons on these hosts aren't just moved elsewhere + """ + return [ + h for h in self.inventory.all_specs() + if ( + h.status.lower() in ['maintenance', 'offline'] + or h.hostname in self.offline_hosts + ) + ] + def _check_valid_addr(self, host: str, addr: str) -> str: # make sure hostname is resolvable before trying to make a connection try: @@ -2348,6 +2363,7 @@ Then run the following: ha = HostAssignment( spec=spec, hosts=self._schedulable_hosts(), + unreachable_hosts=self._unreachable_hosts(), networks=self.cache.networks, daemons=self.cache.get_daemons_by_service(spec.service_name()), allow_colo=svc.allow_colo(), @@ -2423,6 +2439,7 @@ Then run the following: HostAssignment( spec=spec, hosts=self.inventory.all_specs(), # All hosts, even those without daemon refresh + unreachable_hosts=self._unreachable_hosts(), networks=self.cache.networks, daemons=self.cache.get_daemons_by_service(spec.service_name()), allow_colo=self.cephadm_services[spec.service_type].allow_colo(), diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 14d44fbfee2..8f4f02e5e4b 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -141,6 +141,7 @@ class HostAssignment(object): def __init__(self, spec, # type: ServiceSpec hosts: List[orchestrator.HostSpec], + unreachable_hosts: List[orchestrator.HostSpec], daemons: List[orchestrator.DaemonDescription], networks: Dict[str, Dict[str, Dict[str, List[str]]]] = {}, filter_new_host=None, # type: Optional[Callable[[str],bool]] @@ -153,6 +154,7 @@ class HostAssignment(object): self.spec = spec # type: ServiceSpec self.primary_daemon_type = primary_daemon_type or spec.service_type self.hosts: List[orchestrator.HostSpec] = hosts + self.unreachable_hosts: List[orchestrator.HostSpec] = unreachable_hosts self.filter_new_host = filter_new_host self.service_name = spec.service_name() self.daemons = daemons @@ -231,6 +233,9 @@ class HostAssignment(object): to_remove.append(dd) to_add += host_slots + to_remove = [d for d in to_remove if d.hostname not in [ + h.hostname for h in self.unreachable_hosts]] + return slots, to_add, to_remove def place(self): @@ -286,6 +291,7 @@ class HostAssignment(object): existing_active: List[orchestrator.DaemonDescription] = [] existing_standby: List[orchestrator.DaemonDescription] = [] existing_slots: List[DaemonPlacement] = [] + to_add: List[DaemonPlacement] = [] to_remove: List[orchestrator.DaemonDescription] = [] ranks: List[int] = list(range(len(candidates))) others: List[DaemonPlacement] = candidates.copy() @@ -308,11 +314,17 @@ class HostAssignment(object): if not found: to_remove.append(dd) + # TODO: At some point we want to deploy daemons that are on offline hosts + # at what point we do this differs per daemon type. Stateless daemons we could + # do quickly to improve availability. Steful daemons we might want to wait longer + # to see if the host comes back online + existing = existing_active + existing_standby # build to_add if not count: - to_add = others + to_add = [dd for dd in others if dd.hostname not in [ + h.hostname for h in self.unreachable_hosts]] else: # The number of new slots that need to be selected in order to fulfill count need = count - len(existing) @@ -323,8 +335,12 @@ class HostAssignment(object): del existing_slots[count:] return self.place_per_host_daemons(existing_slots, [], to_remove) - if need > 0: - to_add = others[:need] + for dp in others: + if need <= 0: + break + if dp.hostname not in [h.hostname for h in self.unreachable_hosts]: + to_add.append(dp) + need -= 1 # this is last use of need in this function so it can work as a counter if self.rank_map is not None: # assign unused ranks (and rank_generations) to to_add diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 071dd4cbef3..1a307f958cb 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -202,6 +202,7 @@ class CephadmServe: ha = HostAssignment( spec=ServiceSpec('mon', placement=pspec), hosts=self.mgr._schedulable_hosts(), + unreachable_hosts=self.mgr._unreachable_hosts(), daemons=[], networks=self.mgr.cache.networks, ) @@ -233,6 +234,7 @@ class CephadmServe: ha = HostAssignment( spec=ServiceSpec('mon', placement=ks.placement), hosts=self.mgr._schedulable_hosts(), + unreachable_hosts=self.mgr._unreachable_hosts(), daemons=[], networks=self.mgr.cache.networks, ) @@ -685,6 +687,7 @@ class CephadmServe: ha = HostAssignment( spec=spec, hosts=self.mgr._schedulable_hosts(), + unreachable_hosts=self.mgr._unreachable_hosts(), daemons=daemons, networks=self.mgr.cache.networks, filter_new_host=( diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 36ec08e7469..6a11c36dc3c 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -1120,8 +1120,12 @@ spec: # candidates for scheduling candidates = [ h.hostname for h in cephadm_module._schedulable_hosts()] - assert 'test2' not in candidates - assert 'test3' not in candidates + assert 'test2' in candidates + assert 'test3' in candidates + + unreachable = [h.hostname for h in cephadm_module._unreachable_hosts()] + assert 'test2' in unreachable + assert 'test3' in unreachable with with_service(cephadm_module, ServiceSpec('crash', placement=PlacementSpec(host_pattern='*'))): # re-apply services. No mgr should be removed from maint/offline hosts diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py index 9b4faf6b179..9a9571409d5 100644 --- a/src/pybind/mgr/cephadm/tests/test_scheduling.py +++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py @@ -132,6 +132,7 @@ def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems): host_res, to_add, to_remove = HostAssignment( spec=spec, hosts=hosts, + unreachable_hosts=[], daemons=daemons, ).place() if isinstance(host_res, list): @@ -147,6 +148,7 @@ def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems): host_res, to_add, to_remove = HostAssignment( spec=spec, hosts=hosts, + unreachable_hosts=[], daemons=daemons ).place() @@ -839,6 +841,7 @@ def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post all_slots, to_add, to_remove = HostAssignment( spec=spec, hosts=[HostSpec(h, labels=['foo']) for h in hosts], + unreachable_hosts=[], daemons=daemons, allow_colo=allow_colo, rank_map=rank_map, @@ -944,6 +947,7 @@ def test_node_assignment2(service_type, placement, hosts, hosts, to_add, to_remove = HostAssignment( spec=ServiceSpec(service_type, placement=placement), hosts=[HostSpec(h, labels=['foo']) for h in hosts], + unreachable_hosts=[], daemons=daemons, ).place() assert len(hosts) == expected_len @@ -977,6 +981,7 @@ def test_node_assignment3(service_type, placement, hosts, hosts, to_add, to_remove = HostAssignment( spec=ServiceSpec(service_type, placement=placement), hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], daemons=daemons, ).place() assert len(hosts) == expected_len @@ -1073,6 +1078,7 @@ def test_node_assignment4(spec, networks, daemons, all_slots, to_add, to_remove = HostAssignment( spec=spec, hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()], + unreachable_hosts=[], daemons=daemons, allow_colo=True, networks=networks, @@ -1158,6 +1164,7 @@ def test_bad_specs(service_type, placement, hosts, daemons, expected): hosts, to_add, to_remove = HostAssignment( spec=ServiceSpec(service_type, placement=placement), hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], daemons=daemons, ).place() assert str(e.value) == expected @@ -1333,8 +1340,105 @@ def test_active_assignment(service_type, placement, hosts, daemons, expected, ex hosts, to_add, to_remove = HostAssignment( spec=spec, hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], daemons=daemons, ).place() assert sorted([h.hostname for h in hosts]) in expected assert sorted([h.hostname for h in to_add]) in expected_add assert sorted([h.name() for h in to_remove]) in expected_remove + + +class UnreachableHostsTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + unreachables_hosts: List[str] + daemons: List[DaemonDescription] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,unreachable_hosts,daemons,expected_add,expected_remove", + [ + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + ['host2'], + [], + [['host1', 'host3']], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(hosts=['host3']), + 'host1 host2 host3'.split(), + ['host1'], + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [['mgr.b']], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + ['host1'], + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3 host4'.split(), + 'host1 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [['mgr.b']], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + ['host2'], + [], + [['host1', 'host3', 'host4']], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + 'host1 host4'.split(), + [], + [['host2', 'host3']], + [[]], + ), + + ]) +def test_unreachable_host(service_type, placement, hosts, unreachable_hosts, daemons, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id=None, + placement=placement) + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[HostSpec(h) for h in unreachable_hosts], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove