From: Adam King Date: Thu, 5 Aug 2021 14:09:16 +0000 (-0400) Subject: mgr/cephadm: make scheduler able to accomodate offline/maintenance hosts X-Git-Tag: v17.1.0~1082^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=01a580b3082f83347181bf835b9ef698cecfc015;p=ceph.git mgr/cephadm: make scheduler able to accomodate offline/maintenance hosts Fixes: https://tracker.ceph.com/issues/51027 Signed-off-by: Adam King --- diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py index ae8d34b60d37..941ede3666de 100644 --- a/src/pybind/mgr/cephadm/migrations.py +++ b/src/pybind/mgr/cephadm/migrations.py @@ -103,6 +103,7 @@ class Migrations: placements, to_add, to_remove = HostAssignment( spec=spec, hosts=self.mgr.inventory.all_specs(), + unreachable_hosts=self.mgr._unreachable_hosts(), daemons=existing_daemons, ).place() diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 62c8f539627f..1233bb54510b 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1389,12 +1389,27 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, h for h in self.inventory.all_specs() if ( self.cache.host_had_daemon_refresh(h.hostname) - and h.status.lower() not in ['maintenance', 'offline'] - and h.hostname not in self.offline_hosts and '_no_schedule' not in h.labels ) ] + def _unreachable_hosts(self) -> List[HostSpec]: + """ + Return all hosts that are offline or in maintenance mode. + + The idea is we should not touch the daemons on these hosts (since + in theory the hosts are inaccessible so we CAN'T touch them) but + we still want to count daemons that exist on these hosts toward the + placement so daemons on these hosts aren't just moved elsewhere + """ + return [ + h for h in self.inventory.all_specs() + if ( + h.status.lower() in ['maintenance', 'offline'] + or h.hostname in self.offline_hosts + ) + ] + def _check_valid_addr(self, host: str, addr: str) -> str: # make sure hostname is resolvable before trying to make a connection try: @@ -2297,6 +2312,7 @@ Then run the following: ha = HostAssignment( spec=spec, hosts=self._schedulable_hosts(), + unreachable_hosts=self._unreachable_hosts(), networks=self.cache.networks, daemons=self.cache.get_daemons_by_service(spec.service_name()), allow_colo=svc.allow_colo(), @@ -2372,6 +2388,7 @@ Then run the following: HostAssignment( spec=spec, hosts=self.inventory.all_specs(), # All hosts, even those without daemon refresh + unreachable_hosts=self._unreachable_hosts(), networks=self.cache.networks, daemons=self.cache.get_daemons_by_service(spec.service_name()), allow_colo=self.cephadm_services[spec.service_type].allow_colo(), diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 14d44fbfee24..8f4f02e5e4b3 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -141,6 +141,7 @@ class HostAssignment(object): def __init__(self, spec, # type: ServiceSpec hosts: List[orchestrator.HostSpec], + unreachable_hosts: List[orchestrator.HostSpec], daemons: List[orchestrator.DaemonDescription], networks: Dict[str, Dict[str, Dict[str, List[str]]]] = {}, filter_new_host=None, # type: Optional[Callable[[str],bool]] @@ -153,6 +154,7 @@ class HostAssignment(object): self.spec = spec # type: ServiceSpec self.primary_daemon_type = primary_daemon_type or spec.service_type self.hosts: List[orchestrator.HostSpec] = hosts + self.unreachable_hosts: List[orchestrator.HostSpec] = unreachable_hosts self.filter_new_host = filter_new_host self.service_name = spec.service_name() self.daemons = daemons @@ -231,6 +233,9 @@ class HostAssignment(object): to_remove.append(dd) to_add += host_slots + to_remove = [d for d in to_remove if d.hostname not in [ + h.hostname for h in self.unreachable_hosts]] + return slots, to_add, to_remove def place(self): @@ -286,6 +291,7 @@ class HostAssignment(object): existing_active: List[orchestrator.DaemonDescription] = [] existing_standby: List[orchestrator.DaemonDescription] = [] existing_slots: List[DaemonPlacement] = [] + to_add: List[DaemonPlacement] = [] to_remove: List[orchestrator.DaemonDescription] = [] ranks: List[int] = list(range(len(candidates))) others: List[DaemonPlacement] = candidates.copy() @@ -308,11 +314,17 @@ class HostAssignment(object): if not found: to_remove.append(dd) + # TODO: At some point we want to deploy daemons that are on offline hosts + # at what point we do this differs per daemon type. Stateless daemons we could + # do quickly to improve availability. Steful daemons we might want to wait longer + # to see if the host comes back online + existing = existing_active + existing_standby # build to_add if not count: - to_add = others + to_add = [dd for dd in others if dd.hostname not in [ + h.hostname for h in self.unreachable_hosts]] else: # The number of new slots that need to be selected in order to fulfill count need = count - len(existing) @@ -323,8 +335,12 @@ class HostAssignment(object): del existing_slots[count:] return self.place_per_host_daemons(existing_slots, [], to_remove) - if need > 0: - to_add = others[:need] + for dp in others: + if need <= 0: + break + if dp.hostname not in [h.hostname for h in self.unreachable_hosts]: + to_add.append(dp) + need -= 1 # this is last use of need in this function so it can work as a counter if self.rank_map is not None: # assign unused ranks (and rank_generations) to to_add diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index b36d24c991d8..a664b3116212 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -194,6 +194,7 @@ class CephadmServe: ha = HostAssignment( spec=ServiceSpec('mon', placement=pspec), hosts=self.mgr._schedulable_hosts(), + unreachable_hosts=self.mgr._unreachable_hosts(), daemons=[], networks=self.mgr.cache.networks, ) @@ -225,6 +226,7 @@ class CephadmServe: ha = HostAssignment( spec=ServiceSpec('mon', placement=ks.placement), hosts=self.mgr._schedulable_hosts(), + unreachable_hosts=self.mgr._unreachable_hosts(), daemons=[], networks=self.mgr.cache.networks, ) @@ -677,6 +679,7 @@ class CephadmServe: ha = HostAssignment( spec=spec, hosts=self.mgr._schedulable_hosts(), + unreachable_hosts=self.mgr._unreachable_hosts(), daemons=daemons, networks=self.mgr.cache.networks, filter_new_host=( diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index e6082a8c778d..b76601245331 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -1116,8 +1116,12 @@ spec: # candidates for scheduling candidates = [ h.hostname for h in cephadm_module._schedulable_hosts()] - assert 'test2' not in candidates - assert 'test3' not in candidates + assert 'test2' in candidates + assert 'test3' in candidates + + unreachable = [h.hostname for h in cephadm_module._unreachable_hosts()] + assert 'test2' in unreachable + assert 'test3' in unreachable with with_service(cephadm_module, ServiceSpec('crash', placement=PlacementSpec(host_pattern='*'))): # re-apply services. No mgr should be removed from maint/offline hosts diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py index 138be4a85b83..e4f8efa07843 100644 --- a/src/pybind/mgr/cephadm/tests/test_scheduling.py +++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py @@ -132,6 +132,7 @@ def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems): host_res, to_add, to_remove = HostAssignment( spec=spec, hosts=hosts, + unreachable_hosts=[], daemons=daemons, ).place() if isinstance(host_res, list): @@ -147,6 +148,7 @@ def run_scheduler_test(results, mk_spec, hosts, daemons, key_elems): host_res, to_add, to_remove = HostAssignment( spec=spec, hosts=hosts, + unreachable_hosts=[], daemons=daemons ).place() @@ -838,6 +840,7 @@ def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post all_slots, to_add, to_remove = HostAssignment( spec=spec, hosts=[HostSpec(h, labels=['foo']) for h in hosts], + unreachable_hosts=[], daemons=daemons, allow_colo=allow_colo, rank_map=rank_map, @@ -943,6 +946,7 @@ def test_node_assignment2(service_type, placement, hosts, hosts, to_add, to_remove = HostAssignment( spec=ServiceSpec(service_type, placement=placement), hosts=[HostSpec(h, labels=['foo']) for h in hosts], + unreachable_hosts=[], daemons=daemons, ).place() assert len(hosts) == expected_len @@ -976,6 +980,7 @@ def test_node_assignment3(service_type, placement, hosts, hosts, to_add, to_remove = HostAssignment( spec=ServiceSpec(service_type, placement=placement), hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], daemons=daemons, ).place() assert len(hosts) == expected_len @@ -1072,6 +1077,7 @@ def test_node_assignment4(spec, networks, daemons, all_slots, to_add, to_remove = HostAssignment( spec=spec, hosts=[HostSpec(h, labels=['foo']) for h in networks.keys()], + unreachable_hosts=[], daemons=daemons, allow_colo=True, networks=networks, @@ -1157,6 +1163,7 @@ def test_bad_specs(service_type, placement, hosts, daemons, expected): hosts, to_add, to_remove = HostAssignment( spec=ServiceSpec(service_type, placement=placement), hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], daemons=daemons, ).place() assert str(e.value) == expected @@ -1332,8 +1339,105 @@ def test_active_assignment(service_type, placement, hosts, daemons, expected, ex hosts, to_add, to_remove = HostAssignment( spec=spec, hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[], daemons=daemons, ).place() assert sorted([h.hostname for h in hosts]) in expected assert sorted([h.hostname for h in to_add]) in expected_add assert sorted([h.name() for h in to_remove]) in expected_remove + + +class UnreachableHostsTest(NamedTuple): + service_type: str + placement: PlacementSpec + hosts: List[str] + unreachables_hosts: List[str] + daemons: List[DaemonDescription] + expected_add: List[List[str]] + expected_remove: List[List[str]] + + +@pytest.mark.parametrize("service_type,placement,hosts,unreachable_hosts,daemons,expected_add,expected_remove", + [ + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + ['host2'], + [], + [['host1', 'host3']], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(hosts=['host3']), + 'host1 host2 host3'.split(), + ['host1'], + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [['mgr.b']], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + ['host1'], + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=1), + 'host1 host2 host3 host4'.split(), + 'host1 host3'.split(), + [ + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mgr', 'b', 'host2'), + DaemonDescription('mgr', 'c', 'host3', is_active=True), + ], + [[]], + [['mgr.b']], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + ['host2'], + [], + [['host1', 'host3', 'host4']], + [[]], + ), + UnreachableHostsTest( + 'mgr', + PlacementSpec(count=3), + 'host1 host2 host3 host4'.split(), + 'host1 host4'.split(), + [], + [['host2', 'host3']], + [[]], + ), + + ]) +def test_unreachable_host(service_type, placement, hosts, unreachable_hosts, daemons, expected_add, expected_remove): + + spec = ServiceSpec(service_type=service_type, + service_id=None, + placement=placement) + + hosts, to_add, to_remove = HostAssignment( + spec=spec, + hosts=[HostSpec(h) for h in hosts], + unreachable_hosts=[HostSpec(h) for h in unreachable_hosts], + daemons=daemons, + ).place() + assert sorted([h.hostname for h in to_add]) in expected_add + assert sorted([h.name() for h in to_remove]) in expected_remove