From b21977c2f1bbabe1402fb7b9cc659d8162d95e3d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 23 Apr 2021 15:28:13 -0400 Subject: [PATCH] mgr/cephadm/schedule: assign/map ranks If we are passed a rank_map, use it maintain one daemon per rank, where the ranks are consecutive non-negative integers starting from 0. A bit of refactoring in place() so that we only do the rank allocations on slots we are going to use (no more than count). Signed-off-by: Sage Weil (cherry picked from commit de03942f806cb16e00259f2397446afb9322c3f1) --- src/pybind/mgr/cephadm/schedule.py | 138 +++++++++-- .../mgr/cephadm/tests/test_scheduling.py | 219 +++++++++++++++++- 2 files changed, 329 insertions(+), 28 deletions(-) diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 47214cf5433eb..14d44fbfee248 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -49,6 +49,52 @@ class DaemonPlacement(NamedTuple): self.rank_generation, ) + def assign_rank(self, rank: int, gen: int) -> 'DaemonPlacement': + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + self.name, + self.ip, + self.ports, + rank, + gen, + ) + + def assign_name(self, name: str) -> 'DaemonPlacement': + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + name, + self.ip, + self.ports, + self.rank, + self.rank_generation, + ) + + def assign_rank_generation( + self, + rank: int, + rank_map: Dict[int, Dict[int, Optional[str]]] + ) -> 'DaemonPlacement': + if rank not in rank_map: + rank_map[rank] = {} + gen = 0 + else: + gen = max(rank_map[rank].keys()) + 1 + rank_map[rank][gen] = None + return DaemonPlacement( + self.daemon_type, + self.hostname, + self.network, + self.name, + self.ip, + self.ports, + rank, + gen, + ) + def matches_daemon(self, dd: DaemonDescription) -> bool: if self.daemon_type != dd.daemon_type: return False @@ -64,6 +110,31 @@ class DaemonPlacement(NamedTuple): return False return True + def matches_rank_map( + self, + dd: DaemonDescription, + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]], + ranks: List[int] + ) -> bool: + if rank_map is None: + # daemon should have no rank + return dd.rank is None + + if dd.rank is None: + return False + + if dd.rank not in rank_map: + return False + if dd.rank not in ranks: + return False + + # must be the highest/newest rank_generation + if dd.rank_generation != max(rank_map[dd.rank].keys()): + return False + + # must be *this* daemon + return rank_map[dd.rank][dd.rank_generation] == dd.daemon_id + class HostAssignment(object): @@ -76,6 +147,7 @@ class HostAssignment(object): allow_colo: bool = False, primary_daemon_type: Optional[str] = None, per_host_daemon_type: Optional[str] = None, + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] = None, ): assert spec self.spec = spec # type: ServiceSpec @@ -88,6 +160,7 @@ class HostAssignment(object): self.allow_colo = allow_colo self.per_host_daemon_type = per_host_daemon_type self.ports_start = spec.get_port_start() + self.rank_map = rank_map def hosts_by_label(self, label: str) -> List[orchestrator.HostSpec]: return [h for h in self.hosts if label in h.labels] @@ -195,12 +268,18 @@ class HostAssignment(object): per_host = 1 + ((count - 1) // len(candidates)) candidates = expand_candidates(candidates, per_host) - # consider active (primary) daemons first - daemons = [ - d for d in self.daemons if d.is_active and d.daemon_type == self.primary_daemon_type - ] + [ - d for d in self.daemons if not d.is_active and d.daemon_type == self.primary_daemon_type - ] + # consider (preserve) existing daemons in a particular order... + daemons = sorted( + [ + d for d in self.daemons if d.daemon_type == self.primary_daemon_type + ], + key=lambda d: ( + not d.is_active, # active before standby + d.rank is not None, # ranked first, then non-ranked + d.rank, # low ranks + 0 - (d.rank_generation or 0), # newer generations first + ) + ) # sort candidates into existing/used slots that already have a # daemon, and others (the rest) @@ -208,16 +287,21 @@ class HostAssignment(object): existing_standby: List[orchestrator.DaemonDescription] = [] existing_slots: List[DaemonPlacement] = [] to_remove: List[orchestrator.DaemonDescription] = [] - others = candidates.copy() + ranks: List[int] = list(range(len(candidates))) + others: List[DaemonPlacement] = candidates.copy() for dd in daemons: found = False for p in others: - if p.matches_daemon(dd): + if p.matches_daemon(dd) and p.matches_rank_map(dd, self.rank_map, ranks): others.remove(p) if dd.is_active: existing_active.append(dd) else: existing_standby.append(dd) + if dd.rank is not None: + assert dd.rank_generation is not None + p = p.assign_rank(dd.rank, dd.rank_generation) + ranks.remove(dd.rank) existing_slots.append(p) found = True break @@ -226,22 +310,34 @@ class HostAssignment(object): existing = existing_active + existing_standby - # If we don't have the list of candidates is definitive. - if count is None: - logger.debug('Provided hosts: %s' % candidates) - return self.place_per_host_daemons(candidates, others, to_remove) + # build to_add + if not count: + to_add = others + else: + # The number of new slots that need to be selected in order to fulfill count + need = count - len(existing) + + # we don't need any additional placements + if need <= 0: + to_remove.extend(existing[count:]) + del existing_slots[count:] + return self.place_per_host_daemons(existing_slots, [], to_remove) + + if need > 0: + to_add = others[:need] - # The number of new slots that need to be selected in order to fulfill count - need = count - len(existing) + if self.rank_map is not None: + # assign unused ranks (and rank_generations) to to_add + assert len(ranks) >= len(to_add) + for i in range(len(to_add)): + to_add[i] = to_add[i].assign_rank_generation(ranks[i], self.rank_map) - # we don't need any additional placements - if need <= 0: - to_remove.extend(existing[count:]) - del existing_slots[count:] - return self.place_per_host_daemons(existing_slots, [], to_remove) + # If we don't have the list of candidates is definitive. + if count is None: + final = existing_slots + to_add + logger.debug('Provided hosts: %s' % final) + return self.place_per_host_daemons(final, to_add, to_remove) - # ask the scheduler to select additional slots - to_add = others[:need] logger.debug('Combine hosts with existing daemons %s + new hosts %s' % ( existing, to_add)) return self.place_per_host_daemons(existing_slots + to_add, to_add, to_remove) diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py index 82085957ffc47..9b4faf6b179cf 100644 --- a/src/pybind/mgr/cephadm/tests/test_scheduling.py +++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py @@ -2,7 +2,7 @@ # fmt: off -from typing import NamedTuple, List, Dict +from typing import NamedTuple, List, Dict, Optional import pytest from ceph.deployment.hostspec import HostSpec @@ -379,12 +379,14 @@ class NodeAssignmentTest(NamedTuple): placement: PlacementSpec hosts: List[str] daemons: List[DaemonDescription] + rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] + post_rank_map: Optional[Dict[int, Dict[int, Optional[str]]]] expected: List[str] expected_add: List[str] expected_remove: List[DaemonDescription] -@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected,expected_add,expected_remove", +@pytest.mark.parametrize("service_type,placement,hosts,daemons,rank_map,post_rank_map,expected,expected_add,expected_remove", [ # noqa: E128 # just hosts NodeAssignmentTest( @@ -392,6 +394,7 @@ class NodeAssignmentTest(NamedTuple): PlacementSpec(hosts=['smithi060']), ['smithi060'], [], + None, None, ['mgr:smithi060'], ['mgr:smithi060'], [] ), # all_hosts @@ -403,6 +406,7 @@ class NodeAssignmentTest(NamedTuple): DaemonDescription('mgr', 'a', 'host1'), DaemonDescription('mgr', 'b', 'host2'), ], + None, None, ['mgr:host1', 'mgr:host2', 'mgr:host3'], ['mgr:host3'], [] @@ -416,6 +420,7 @@ class NodeAssignmentTest(NamedTuple): DaemonDescription('mds', 'a', 'host1'), DaemonDescription('mds', 'b', 'host2'), ], + None, None, ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], ['mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], [] @@ -427,6 +432,7 @@ class NodeAssignmentTest(NamedTuple): PlacementSpec(count=4), 'host1 host2 host3'.split(), [], + None, None, ['mgr:host1', 'mgr:host2', 'mgr:host3'], ['mgr:host1', 'mgr:host2', 'mgr:host3'], [] @@ -437,6 +443,7 @@ class NodeAssignmentTest(NamedTuple): PlacementSpec(count=6), 'host1 host2 host3'.split(), [], + None, None, ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], [] @@ -450,6 +457,7 @@ class NodeAssignmentTest(NamedTuple): DaemonDescription('mgr', 'a', 'host1'), DaemonDescription('mgr', 'b', 'host2'), ], + None, None, ['mgr:host3'], ['mgr:host3'], ['mgr.a', 'mgr.b'] @@ -463,6 +471,7 @@ class NodeAssignmentTest(NamedTuple): DaemonDescription('mds', 'a', 'host1'), DaemonDescription('mds', 'b', 'host2'), ], + None, None, ['mds:host3', 'mds:host3', 'mds:host3'], ['mds:host3', 'mds:host3', 'mds:host3'], ['mds.a', 'mds.b'] @@ -476,6 +485,7 @@ class NodeAssignmentTest(NamedTuple): DaemonDescription('mgr', 'a', 'host1'), DaemonDescription('mgr', 'b', 'host2'), ], + None, None, ['mgr:host3'], ['mgr:host3'], ['mgr.a', 'mgr.b'] @@ -488,6 +498,7 @@ class NodeAssignmentTest(NamedTuple): [ DaemonDescription('mgr', 'a', 'host1'), ], + None, None, ['mgr:host3'], ['mgr:host3'], ['mgr.a'] @@ -500,6 +511,7 @@ class NodeAssignmentTest(NamedTuple): [ DaemonDescription('mgr', 'a', 'host1'), ], + None, None, ['mgr:host1'], [], [] @@ -512,6 +524,7 @@ class NodeAssignmentTest(NamedTuple): [ DaemonDescription('mgr', 'a', 'host2'), ], + None, None, ['mgr:host1'], ['mgr:host1'], ['mgr.a'] @@ -522,6 +535,7 @@ class NodeAssignmentTest(NamedTuple): PlacementSpec(label='foo'), 'host1 host2 host3'.split(), [], + None, None, ['mgr:host1', 'mgr:host2', 'mgr:host3'], ['mgr:host1', 'mgr:host2', 'mgr:host3'], [] @@ -532,6 +546,7 @@ class NodeAssignmentTest(NamedTuple): PlacementSpec(count=4, label='foo'), 'host1 host2 host3'.split(), [], + None, None, ['mgr:host1', 'mgr:host2', 'mgr:host3'], ['mgr:host1', 'mgr:host2', 'mgr:host3'], [] @@ -542,6 +557,7 @@ class NodeAssignmentTest(NamedTuple): PlacementSpec(count=6, label='foo'), 'host1 host2 host3'.split(), [], + None, None, ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], [] @@ -552,6 +568,7 @@ class NodeAssignmentTest(NamedTuple): PlacementSpec(label='foo', count_per_host=3), 'host1 host2 host3'.split(), [], + None, None, ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3'], ['mds:host1', 'mds:host2', 'mds:host3', 'mds:host1', 'mds:host2', 'mds:host3', @@ -564,6 +581,7 @@ class NodeAssignmentTest(NamedTuple): PlacementSpec(host_pattern='mgr*'), 'mgrhost1 mgrhost2 datahost'.split(), [], + None, None, ['mgr:mgrhost1', 'mgr:mgrhost2'], ['mgr:mgrhost1', 'mgr:mgrhost2'], [] @@ -574,6 +592,7 @@ class NodeAssignmentTest(NamedTuple): PlacementSpec(host_pattern='mds*', count_per_host=3), 'mdshost1 mdshost2 datahost'.split(), [], + None, None, ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'], ['mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2', 'mds:mdshost1', 'mds:mdshost2'], [] @@ -584,6 +603,7 @@ class NodeAssignmentTest(NamedTuple): PlacementSpec(count=6, label='foo'), 'host1 host2 host3'.split(), [], + None, None, ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', @@ -600,6 +620,7 @@ class NodeAssignmentTest(NamedTuple): DaemonDescription('rgw', 'b', 'host2', ports=[80]), DaemonDescription('rgw', 'c', 'host1', ports=[82]), ], + None, None, ['rgw:host1(*:80)', 'rgw:host2(*:80)', 'rgw:host3(*:80)', 'rgw:host1(*:81)', 'rgw:host2(*:81)', 'rgw:host3(*:81)'], ['rgw:host1(*:80)', 'rgw:host3(*:80)', @@ -615,12 +636,186 @@ class NodeAssignmentTest(NamedTuple): DaemonDescription('mgr', 'y', 'host1'), DaemonDescription('mgr', 'x', 'host2'), ], + None, None, ['mgr:host1(name=y)', 'mgr:host2(name=x)'], [], [] ), + + # note: host -> rank mapping is psuedo-random based on svc name, so these + # host/rank pairs may seem random but they match the nfs.mynfs seed used by + # the test. + + # ranked, fresh + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [], + {}, + {0: {0: None}, 1: {0: None}, 2: {0: None}}, + ['nfs:host1(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host3(rank=2.0)'], + ['nfs:host1(rank=0.0)', 'nfs:host2(rank=1.0)', 'nfs:host3(rank=2.0)'], + [] + ), + # 21: ranked, exist + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + ], + {0: {1: '0.1'}}, + {0: {1: '0.1'}, 1: {0: None}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host2(rank=1.0)', 'nfs:host3(rank=2.0)'], + ['nfs:host2(rank=1.0)', 'nfs:host3(rank=2.0)'], + [] + ), + # ranked, exist, different ranks + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1), + ], + {0: {1: '0.1'}, 1: {1: '1.1'}}, + {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host2(rank=1.1)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + [] + ), + # ranked, exist, different ranks (2) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.1', 'host1', rank=0, rank_generation=1), + DaemonDescription('nfs', '1.1', 'host3', rank=1, rank_generation=1), + ], + {0: {1: '0.1'}, 1: {1: '1.1'}}, + {0: {1: '0.1'}, 1: {1: '1.1'}, 2: {0: None}}, + ['nfs:host1(rank=0.1)', 'nfs:host3(rank=1.1)', 'nfs:host2(rank=2.0)'], + ['nfs:host2(rank=2.0)'], + [] + ), + # ranked, exist, extra ranks + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '4.5', 'host2', rank=4, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {0: None}}, + ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + ['nfs.4.5'] + ), + # 25: ranked, exist, extra ranks (scale down: kill off high rank) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host3 host2 host1'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '2.5', 'host3', rank=2, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + ['nfs:host1(rank=0.5)', 'nfs:host2(rank=1.5)'], + [], + ['nfs.2.5'] + ), + # ranked, exist, extra ranks (scale down hosts) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host3'.split(), + [ + DaemonDescription('nfs', '0.5', 'host1', rank=0, rank_generation=5), + DaemonDescription('nfs', '1.5', 'host2', rank=1, rank_generation=5), + DaemonDescription('nfs', '2.5', 'host3', rank=4, rank_generation=5), + ], + {0: {5: '0.5'}, 1: {5: '1.5'}, 2: {5: '2.5'}}, + {0: {5: '0.5'}, 1: {5: '1.5', 6: None}, 2: {5: '2.5'}}, + ['nfs:host1(rank=0.5)', 'nfs:host3(rank=1.6)'], + ['nfs:host3(rank=1.6)'], + ['nfs.2.5', 'nfs.1.5'] + ), + # ranked, exist, duplicate rank + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=3), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.0', 'host1', rank=0, rank_generation=0), + DaemonDescription('nfs', '1.1', 'host2', rank=1, rank_generation=1), + DaemonDescription('nfs', '1.2', 'host3', rank=1, rank_generation=2), + ], + {0: {0: '0.0'}, 1: {2: '1.2'}}, + {0: {0: '0.0'}, 1: {2: '1.2'}, 2: {0: None}}, + ['nfs:host1(rank=0.0)', 'nfs:host3(rank=1.2)', 'nfs:host2(rank=2.0)'], + ['nfs:host2(rank=2.0)'], + ['nfs.1.1'] + ), + # 28: ranked, all gens stale (failure during update cycle) + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=2), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3'}}, + {0: {2: '0.2'}, 1: {2: '1.2', 3: '1.3', 4: None}}, + ['nfs:host1(rank=0.2)', 'nfs:host2(rank=1.4)'], + ['nfs:host2(rank=1.4)'], + ['nfs.1.2'] + ), + # ranked, not enough hosts + NodeAssignmentTest( + 'nfs', + PlacementSpec(count=4), + 'host1 host2 host3'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2'}}, + {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {0: None}}, + ['nfs:host1(rank=0.2)', 'nfs:host2(rank=1.2)', 'nfs:host3(rank=2.0)'], + ['nfs:host3(rank=2.0)'], + [] + ), + # ranked, scale down + NodeAssignmentTest( + 'nfs', + PlacementSpec(hosts=['host2']), + 'host1 host2'.split(), + [ + DaemonDescription('nfs', '0.2', 'host1', rank=0, rank_generation=2), + DaemonDescription('nfs', '1.2', 'host2', rank=1, rank_generation=2), + DaemonDescription('nfs', '2.2', 'host3', rank=2, rank_generation=2), + ], + {0: {2: '0.2'}, 1: {2: '1.2'}, 2: {2: '2.2'}}, + {0: {2: '0.2', 3: None}, 1: {2: '1.2'}, 2: {2: '2.2'}}, + ['nfs:host2(rank=0.3)'], + ['nfs:host2(rank=0.3)'], + ['nfs.0.2', 'nfs.1.2', 'nfs.2.2'] + ), + ]) -def test_node_assignment(service_type, placement, hosts, daemons, +def test_node_assignment(service_type, placement, hosts, daemons, rank_map, post_rank_map, expected, expected_add, expected_remove): + spec = None service_id = None allow_colo = False if service_type == 'rgw': @@ -629,18 +824,28 @@ def test_node_assignment(service_type, placement, hosts, daemons, elif service_type == 'mds': service_id = 'myfs' allow_colo = True - - spec = ServiceSpec(service_type=service_type, - service_id=service_id, - placement=placement) + elif service_type == 'nfs': + service_id = 'mynfs' + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement, + pool='foo') + + if not spec: + spec = ServiceSpec(service_type=service_type, + service_id=service_id, + placement=placement) all_slots, to_add, to_remove = HostAssignment( spec=spec, hosts=[HostSpec(h, labels=['foo']) for h in hosts], daemons=daemons, allow_colo=allow_colo, + rank_map=rank_map, ).place() + assert rank_map == post_rank_map + got = [str(p) for p in all_slots] num_wildcard = 0 for i in expected: -- 2.39.5