From 516edc9a3bbca7097cb638f35f1b42a78900f1ac Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 23 Apr 2021 15:31:14 -0400 Subject: [PATCH] mgr/cephadm: support creation of daemons with ranks - we need to assign all names and update the rank_map before we start creating daemons. - if we are using ranks, we should delete old daemons first, and fence them from the cluster (where possible). Signed-off-by: Sage Weil (cherry picked from commit e8b07982b50e35a546a52b092cac5a825c37981d) --- src/pybind/mgr/cephadm/module.py | 4 +- src/pybind/mgr/cephadm/serve.py | 46 ++++++++++++++++--- .../mgr/cephadm/services/cephadmservice.py | 13 ++++++ 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 7bc89c46c2bd4..15e0ae604987b 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -2246,12 +2246,14 @@ Then run the following: 'service_type': spec.service_type, 'data': self._preview_osdspecs(osdspecs=[cast(DriveGroupSpec, spec)])} + svc = self.cephadm_services[spec.service_type] ha = HostAssignment( spec=spec, hosts=self._schedulable_hosts(), networks=self.cache.networks, daemons=self.cache.get_daemons_by_service(spec.service_name()), - allow_colo=self.cephadm_services[spec.service_type].allow_colo(), + allow_colo=svc.allow_colo(), + rank_map=self.spec_store[spec.service_name()].rank_map if svc.ranked() else None ) ha.validate() hosts, to_add, to_remove = ha.place() diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 08cc4445d1bbe..f4499cba07d29 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -664,6 +664,9 @@ class CephadmServe: ) return False + rank_map = None + if svc.ranked(): + rank_map = self.mgr.spec_store[spec.service_name()].rank_map or {} ha = HostAssignment( spec=spec, hosts=self.mgr._schedulable_hosts(), @@ -676,6 +679,7 @@ class CephadmServe: allow_colo=svc.allow_colo(), primary_daemon_type=svc.primary_daemon_type(), per_host_daemon_type=svc.per_host_daemon_type(), + rank_map=rank_map, ) try: @@ -703,6 +707,40 @@ class CephadmServe: self.log.debug('Hosts that will receive new daemons: %s' % slots_to_add) self.log.debug('Daemons that will be removed: %s' % daemons_to_remove) + # assign names + for i in range(len(slots_to_add)): + slot = slots_to_add[i] + slot = slot.assign_name(self.mgr.get_unique_name( + slot.daemon_type, + slot.hostname, + daemons, + prefix=spec.service_id, + forcename=slot.name, + rank=slot.rank, + rank_generation=slot.rank_generation, + )) + slots_to_add[i] = slot + if rank_map is not None: + assert slot.rank is not None + assert slot.rank_generation is not None + assert rank_map[slot.rank][slot.rank_generation] is None + rank_map[slot.rank][slot.rank_generation] = slot.name + + if rank_map: + # record the rank_map before we make changes so that if we fail the + # next mgr will clean up. + self.mgr.spec_store.save_rank_map(spec.service_name(), rank_map) + + # remove daemons now, since we are going to fence them anyway + for d in daemons_to_remove: + assert d.hostname is not None + self._remove_daemon(d.name(), d.hostname) + daemons_to_remove = [] + + # fence them + svc.fence_old_ranks(spec, rank_map, len(all_slots)) + + # create daemons for slot in slots_to_add: # first remove daemon on conflicting port? if slot.ports: @@ -723,13 +761,7 @@ class CephadmServe: break # deploy new daemon - daemon_id = self.mgr.get_unique_name( - slot.daemon_type, - slot.hostname, - daemons, - prefix=spec.service_id, - forcename=slot.name) - + daemon_id = slot.name if not did_config: svc.config(spec, daemon_id) did_config = True diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 66c6a92b7270e..96e459c70a7e7 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -145,6 +145,19 @@ class CephadmService(metaclass=ABCMeta): """ return None + def ranked(self) -> bool: + """ + If True, we will assign a stable rank (0, 1, ...) and monotonically increasing + generation (0, 1, ...) to each daemon we create/deploy. + """ + return False + + def fence_old_ranks(self, + spec: ServiceSpec, + rank_map: Dict[int, Dict[int, Optional[str]]], + num_ranks: int) -> None: + assert False + def make_daemon_spec( self, host: str, -- 2.39.5