From: Joshua Schmid Date: Thu, 2 Apr 2020 09:33:35 +0000 (+0200) Subject: mgr/cephadm: add replacement flag to _create_osd X-Git-Tag: v15.2.2~59^2~12 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2189efd802d89d032aa6c78d8bbc2c719c6f2f6c;p=ceph.git mgr/cephadm: add replacement flag to _create_osd Signed-off-by: Joshua Schmid (cherry picked from commit e670b1ea83f66d977c919ef956cc4ae0db9cb453) --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index a2e5d1c9b959..2adc81e6b4d5 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -2084,15 +2084,18 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): return blink(locs) def get_osd_uuid_map(self, only_up=False): - # type: (bool) -> Dict[str,str] + # type: (bool) -> Dict[str, str] osd_map = self.get('osd_map') r = {} for o in osd_map['osds']: # only include OSDs that have ever started in this map. this way # an interrupted osd create can be repeated and succeed the second # time around. - if not only_up or o['up_from'] > 0: - r[str(o['osd'])] = o['uuid'] + osd_id = o.get('osd') + if osd_id is None: + raise OrchestratorError("Could not retrieve osd_id from osd_map") + if not only_up or (o['up_from'] > 0): + r[str(osd_id)] = o.get('uuid', '') return r @trivial_completion @@ -2126,13 +2129,17 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): def create_osds(self, drive_group: DriveGroupSpec): self.log.debug(f"Processing DriveGroup {drive_group}") ret = [] + drive_group.osd_id_claims = self.find_destroyed_osds() + self.log.info(f"Found osd claims for drivegroup {drive_group.service_id} -> {drive_group.osd_id_claims}") for host, drive_selection in self.prepare_drivegroup(drive_group): self.log.info('Applying %s on host %s...' % (drive_group.service_id, host)) - cmd = self.driveselection_to_ceph_volume(drive_group, drive_selection) + cmd = self.driveselection_to_ceph_volume(drive_group, drive_selection, + drive_group.osd_id_claims.get(host, [])) if not cmd: self.log.debug("No data_devices, skipping DriveGroup: {}".format(drive_group.service_id)) continue - ret_msg = self._create_osd(host, cmd) + ret_msg = self._create_osd(host, cmd, + replace_osd_ids=drive_group.osd_id_claims.get(host, [])) ret.append(ret_msg) return ", ".join(ret) @@ -2143,8 +2150,6 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): host_ds_map = [] # set osd_id_claims - drive_group.osd_id_claims = self.find_destroyed_osds() - self.log.info(f"Found osd claims for drivegroup {drive_group.service_id} -> {drive_group.osd_id_claims}") def _find_inv_for_host(hostname: str, inventory_dict: dict): # This is stupid and needs to be loaded with the host @@ -2165,9 +2170,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): def driveselection_to_ceph_volume(self, drive_group: DriveGroupSpec, drive_selection: DriveSelection, + osd_id_claims: Optional[List[str]] = None, preview: bool = False) -> Optional[str]: self.log.debug(f"Translating DriveGroup <{drive_group}> to ceph-volume command") - cmd: Optional[str] = translate.to_ceph_volume(drive_group, drive_selection, preview=preview).run() + cmd: Optional[str] = translate.to_ceph_volume(drive_group, drive_selection, osd_id_claims, preview=preview).run() self.log.debug(f"Resulting ceph-volume cmd: {cmd}") return cmd @@ -2183,9 +2189,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): drive_groups = [] ret_all = [] for drive_group in drive_groups: + drive_group.osd_id_claims = self.find_destroyed_osds() + self.log.info(f"Found osd claims for drivegroup {drive_group.service_id} -> {drive_group.osd_id_claims}") # prepare driveselection for host, ds in self.prepare_drivegroup(drive_group): - cmd = self.driveselection_to_ceph_volume(drive_group, ds, preview=True) + cmd = self.driveselection_to_ceph_volume(drive_group, ds, + drive_group.osd_id_claims.get(host, []), preview=True) if not cmd: self.log.debug("No data_devices, skipping DriveGroup: {}".format(drive_group.service_name())) continue @@ -2224,7 +2233,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): error_ok=True) return out, err, code - def _create_osd(self, host, cmd): + def _create_osd(self, host, cmd, replace_osd_ids=None): out, err, code = self._run_ceph_volume_command(host, cmd) if code == 1 and ', it is already prepared' in '\n'.join(err): @@ -2256,16 +2265,16 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): if osd['tags']['ceph.cluster_fsid'] != fsid: self.log.debug('mismatched fsid, skipping %s' % osd) continue - if osd_id in before_osd_uuid_map: - # this osd existed before we ran prepare + if osd_id in before_osd_uuid_map and osd_id not in replace_osd_ids: + # if it exists but is part of the replacement operation, don't skip continue if osd_id not in osd_uuid_map: - self.log.debug('osd id %d does not exist in cluster' % osd_id) + self.log.debug('osd id {} does not exist in cluster'.format(osd_id)) continue - if osd_uuid_map[osd_id] != osd['tags']['ceph.osd_fsid']: + if osd_uuid_map.get(osd_id) != osd['tags']['ceph.osd_fsid']: self.log.debug('mismatched osd uuid (cluster has %s, osd ' 'has %s)' % ( - osd_uuid_map[osd_id], + osd_uuid_map.get(osd_id), osd['tags']['ceph.osd_fsid'])) continue @@ -2360,7 +2369,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): if daemon_type == 'osd': if not osd_uuid_map: osd_uuid_map = self.get_osd_uuid_map() - osd_uuid = osd_uuid_map.get(daemon_id, None) + osd_uuid = osd_uuid_map.get(daemon_id) if not osd_uuid: raise OrchestratorError('osd.%d not in osdmap' % daemon_id) extra_args.extend(['--osd-fsid', osd_uuid]) diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 24146c765162..21cf5afe4f53 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -288,7 +288,7 @@ class TestCephadm(object): dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), data_devices=DeviceSelection(paths=devices)) ds = DriveSelection(dg, Devices([Device(path) for path in devices])) preview = preview - out = cephadm_module.driveselection_to_ceph_volume(dg, ds, preview) + out = cephadm_module.driveselection_to_ceph_volume(dg, ds, [], preview) assert out in exp_command @mock.patch("cephadm.module.SpecStore.find") diff --git a/src/python-common/ceph/deployment/translate.py b/src/python-common/ceph/deployment/translate.py index 6f0fd1107e54..2412e8618673 100644 --- a/src/python-common/ceph/deployment/translate.py +++ b/src/python-common/ceph/deployment/translate.py @@ -16,14 +16,14 @@ class to_ceph_volume(object): def __init__(self, spec, # type: DriveGroupSpec selection, # type: DriveSelection - preview=False - host # type: str + osd_id_claims=None, # type: Optional[List[str]] + preview=False # type: bool ): self.spec = spec self.selection = selection self.preview = preview - self.host = host + self.osd_id_claims = osd_id_claims def run(self): # type: () -> Optional[str] @@ -32,7 +32,6 @@ class to_ceph_volume(object): db_devices = [x.path for x in self.selection.db_devices()] wal_devices = [x.path for x in self.selection.wal_devices()] journal_devices = [x.path for x in self.selection.journal_devices()] - reclaimed_ids: List[str] = self.spec.osd_id_claims.get(self.host, []) if not data_devices: return None @@ -60,6 +59,8 @@ class to_ceph_volume(object): not db_devices and \ not wal_devices: cmd = "lvm prepare --bluestore --data %s --no-systemd" % (' '.join(data_devices)) + if self.osd_id_claims: + cmd += " --osd-id {}".format(str(self.osd_id_claims[0])) if self.preview: # Like every horrible hack, this has sideffects on other features. # In this case, 'lvm prepare' has neither a '--report' nor a '--format json' option @@ -90,8 +91,8 @@ class to_ceph_volume(object): if self.spec.osds_per_device: cmd += " --osds-per-device {}".format(self.spec.osds_per_device) - if reclaimed_ids: - cmd += " --osd-ids {}".format(" ".join(reclaimed_ids)) + if self.osd_id_claims: + cmd += " --osd-ids {}".format(" ".join(self.osd_id_claims)) cmd += " --yes" cmd += " --no-systemd" diff --git a/src/python-common/ceph/tests/test_drive_group.py b/src/python-common/ceph/tests/test_drive_group.py index 7cfbf85da048..c0734ced82e3 100644 --- a/src/python-common/ceph/tests/test_drive_group.py +++ b/src/python-common/ceph/tests/test_drive_group.py @@ -72,7 +72,7 @@ def test_ceph_volume_command_0(): ) inventory = _mk_inventory(_mk_device()*2) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel, 'host1').run() + cmd = translate.to_ceph_volume(spec, sel, []).run() assert cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd' @@ -83,7 +83,7 @@ def test_ceph_volume_command_1(): ) inventory = _mk_inventory(_mk_device(rotational=True)*2 + _mk_device(rotational=False)*2) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel, 'host1').run() + cmd = translate.to_ceph_volume(spec, sel, []).run() assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb ' '--db-devices /dev/sdc /dev/sdd --yes --no-systemd') @@ -99,7 +99,7 @@ def test_ceph_volume_command_2(): _mk_device(size="10.0 GB", rotational=False)*2 ) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel, 'host1').run() + cmd = translate.to_ceph_volume(spec, sel, []).run() assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb ' '--db-devices /dev/sdc /dev/sdd --wal-devices /dev/sde /dev/sdf ' '--yes --no-systemd') @@ -117,7 +117,7 @@ def test_ceph_volume_command_3(): _mk_device(size="10.0 GB", rotational=False)*2 ) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel, 'host1').run() + cmd = translate.to_ceph_volume(spec, sel, []).run() assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb ' '--db-devices /dev/sdc /dev/sdd ' '--wal-devices /dev/sde /dev/sdf --dmcrypt ' @@ -139,7 +139,7 @@ def test_ceph_volume_command_4(): _mk_device(size="10.0 GB", rotational=False)*2 ) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel, 'host1').run() + cmd = translate.to_ceph_volume(spec, sel, []).run() assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb ' '--db-devices /dev/sdc /dev/sdd --wal-devices /dev/sde /dev/sdf ' '--block-wal-size 500M --block-db-size 500M --dmcrypt ' @@ -153,7 +153,7 @@ def test_ceph_volume_command_5(): ) inventory = _mk_inventory(_mk_device(rotational=True)*2) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel, 'host1').run() + cmd = translate.to_ceph_volume(spec, sel, []).run() assert cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --filestore --yes --no-systemd' @@ -166,7 +166,7 @@ def test_ceph_volume_command_6(): ) inventory = _mk_inventory(_mk_device(rotational=True)*2 + _mk_device(rotational=False)*2) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel, 'host1').run() + cmd = translate.to_ceph_volume(spec, sel, []).run() assert cmd == ('lvm batch --no-auto /dev/sdc /dev/sdd ' '--journal-size 500M --journal-devices /dev/sda /dev/sdb ' '--filestore --yes --no-systemd') @@ -179,5 +179,5 @@ def test_ceph_volume_command_7(): ) inventory = _mk_inventory(_mk_device(rotational=True)*2) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel, 'host1').run() + cmd = translate.to_ceph_volume(spec, sel, ['0', '1']).run() assert cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --osd-ids 0 1 --yes --no-systemd'