From: Joshua Schmid Date: Wed, 1 Apr 2020 13:56:08 +0000 (+0200) Subject: mgr/cephadm: add support for osd_id_claims X-Git-Tag: v16.1.0~2580^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5b32c3e51b517332b7ac26b161b7b6b4fae05b85;p=ceph.git mgr/cephadm: add support for osd_id_claims Signed-off-by: Joshua Schmid --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 73581c99e4a6..a2e5d1c9b959 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -2099,6 +2099,29 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): def apply_drivegroups(self, specs: List[DriveGroupSpec]): return [self._apply(spec) for spec in specs] + def find_destroyed_osds(self) -> Dict[str, List[str]]: + osd_host_map: Dict[str, List[str]] = dict() + ret, out, err = self.mon_command({ + 'prefix': 'osd tree', + 'states': ['destroyed'], + 'format': 'json' + }) + if ret != 0: + raise OrchestratorError(f"Caught error on calling 'osd tree destroyed' -> {err}") + try: + tree = json.loads(out) + except json.decoder.JSONDecodeError: + self.log.error(f"Could not decode json -> {out}") + return osd_host_map + + nodes = tree.get('nodes', {}) + for node in nodes: + if node.get('type') == 'host': + osd_host_map.update( + {node.get('name'): [str(_id) for _id in node.get('children', list())]} + ) + return osd_host_map + @trivial_completion def create_osds(self, drive_group: DriveGroupSpec): self.log.debug(f"Processing DriveGroup {drive_group}") @@ -2119,6 +2142,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): # 2) Map the inventory to the InventoryHost object host_ds_map = [] + # set osd_id_claims + drive_group.osd_id_claims = self.find_destroyed_osds() + self.log.info(f"Found osd claims for drivegroup {drive_group.service_id} -> {drive_group.osd_id_claims}") + def _find_inv_for_host(hostname: str, inventory_dict: dict): # This is stupid and needs to be loaded with the host for _host, _inventory in inventory_dict.items(): diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 4bd9d3558b4b..24146c765162 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -178,6 +178,57 @@ class TestCephadm(object): r = cephadm_module._apply_service(ServiceSpec('mgr', placement=ps)) assert r + @mock.patch("cephadm.module.CephadmOrchestrator.mon_command") + def test_find_destroyed_osds(self, _mon_cmd, cephadm_module): + dict_out = { + "nodes": [ + { + "id": -1, + "name": "default", + "type": "root", + "type_id": 11, + "children": [ + -3 + ] + }, + { + "id": -3, + "name": "host1", + "type": "host", + "type_id": 1, + "pool_weights": {}, + "children": [ + 0 + ] + }, + { + "id": 0, + "device_class": "hdd", + "name": "osd.0", + "type": "osd", + "type_id": 0, + "crush_weight": 0.0243988037109375, + "depth": 2, + "pool_weights": {}, + "exists": 1, + "status": "destroyed", + "reweight": 1, + "primary_affinity": 1 + } + ], + "stray": [] + } + json_out = json.dumps(dict_out) + _mon_cmd.return_value = (0, json_out, '') + out = cephadm_module.find_destroyed_osds() + assert out == {'host1': ['0']} + + @mock.patch("cephadm.module.CephadmOrchestrator.mon_command") + def test_find_destroyed_osds_cmd_failure(self, _mon_cmd, cephadm_module): + _mon_cmd.return_value = (1, "", "fail_msg") + with pytest.raises(OrchestratorError): + out = cephadm_module.find_destroyed_osds() + @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}')) @mock.patch("cephadm.module.SpecStore.save") def test_apply_osd_save(self, _save_spec, cephadm_module): diff --git a/src/python-common/ceph/deployment/drive_group.py b/src/python-common/ceph/deployment/drive_group.py index d66ac8828c73..0b5b489de98d 100644 --- a/src/python-common/ceph/deployment/drive_group.py +++ b/src/python-common/ceph/deployment/drive_group.py @@ -144,7 +144,7 @@ class DriveGroupSpec(ServiceSpec): encrypted=False, # type: bool db_slots=None, # type: Optional[int] wal_slots=None, # type: Optional[int] - osd_id_claims=None, # type: Optional[Dict[str, DeviceSelection]] + osd_id_claims=None, # type: Optional[Dict[str, List[str]]] block_db_size=None, # type: Optional[int] block_wal_size=None, # type: Optional[int] journal_size=None, # type: Optional[int] @@ -196,10 +196,9 @@ class DriveGroupSpec(ServiceSpec): #: How many OSDs per WAL device self.wal_slots = wal_slots - #: Optional: mapping of OSD id to DeviceSelection, used when the - #: created OSDs are meant to replace previous OSDs on - #: the same node. See :ref:`orchestrator-osd-replace` - self.osd_id_claims = osd_id_claims + #: Optional: mapping of host -> List of osd_ids that should be replaced + #: See :ref:`orchestrator-osd-replace` + self.osd_id_claims = osd_id_claims or dict() @classmethod def _from_json_impl(cls, json_drive_group): diff --git a/src/python-common/ceph/deployment/translate.py b/src/python-common/ceph/deployment/translate.py index 5d5160546554..be2cde200778 100644 --- a/src/python-common/ceph/deployment/translate.py +++ b/src/python-common/ceph/deployment/translate.py @@ -1,7 +1,7 @@ import logging try: - from typing import Optional + from typing import Optional, List except ImportError: pass @@ -17,11 +17,13 @@ class to_ceph_volume(object): spec, # type: DriveGroupSpec selection, # type: DriveSelection preview=False + host # type: str ): self.spec = spec self.selection = selection self.preview = preview + self.host = host def run(self): # type: () -> Optional[str] @@ -30,10 +32,12 @@ class to_ceph_volume(object): db_devices = [x.path for x in self.selection.db_devices()] wal_devices = [x.path for x in self.selection.wal_devices()] journal_devices = [x.path for x in self.selection.journal_devices()] + reclaimed_ids: List[str] = self.spec.osd_id_claims.get(self.host, []) if not data_devices: return None + cmd = "" if self.spec.objectstore == 'filestore': cmd = "lvm batch --no-auto" @@ -86,6 +90,9 @@ class to_ceph_volume(object): if self.spec.osds_per_device: cmd += " --osds-per-device {}".format(self.spec.osds_per_device) + if reclaimed_ids: + cmd += " --osd-id {}".format(" ".join(reclaimed_ids)) + cmd += " --yes" cmd += " --no-systemd" diff --git a/src/python-common/ceph/tests/test_drive_group.py b/src/python-common/ceph/tests/test_drive_group.py index d0cf37c17b29..647129a312af 100644 --- a/src/python-common/ceph/tests/test_drive_group.py +++ b/src/python-common/ceph/tests/test_drive_group.py @@ -72,7 +72,7 @@ def test_ceph_volume_command_0(): ) inventory = _mk_inventory(_mk_device()*2) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel).run() + cmd = translate.to_ceph_volume(spec, sel, 'host1').run() assert cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --yes --no-systemd' @@ -83,7 +83,7 @@ def test_ceph_volume_command_1(): ) inventory = _mk_inventory(_mk_device(rotational=True)*2 + _mk_device(rotational=False)*2) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel).run() + cmd = translate.to_ceph_volume(spec, sel, 'host1').run() assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb ' '--db-devices /dev/sdc /dev/sdd --yes --no-systemd') @@ -99,7 +99,7 @@ def test_ceph_volume_command_2(): _mk_device(size="10.0 GB", rotational=False)*2 ) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel).run() + cmd = translate.to_ceph_volume(spec, sel, 'host1').run() assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb ' '--db-devices /dev/sdc /dev/sdd --wal-devices /dev/sde /dev/sdf ' '--yes --no-systemd') @@ -117,7 +117,7 @@ def test_ceph_volume_command_3(): _mk_device(size="10.0 GB", rotational=False)*2 ) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel).run() + cmd = translate.to_ceph_volume(spec, sel, 'host1').run() assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb ' '--db-devices /dev/sdc /dev/sdd ' '--wal-devices /dev/sde /dev/sdf --dmcrypt ' @@ -139,7 +139,7 @@ def test_ceph_volume_command_4(): _mk_device(size="10.0 GB", rotational=False)*2 ) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel).run() + cmd = translate.to_ceph_volume(spec, sel, 'host1').run() assert cmd == ('lvm batch --no-auto /dev/sda /dev/sdb ' '--db-devices /dev/sdc /dev/sdd --wal-devices /dev/sde /dev/sdf ' '--block-wal-size 500M --block-db-size 500M --dmcrypt ' @@ -153,7 +153,7 @@ def test_ceph_volume_command_5(): ) inventory = _mk_inventory(_mk_device(rotational=True)*2) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel).run() + cmd = translate.to_ceph_volume(spec, sel, 'host1').run() assert cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --filestore --yes --no-systemd' @@ -166,7 +166,18 @@ def test_ceph_volume_command_6(): ) inventory = _mk_inventory(_mk_device(rotational=True)*2 + _mk_device(rotational=False)*2) sel = drive_selection.DriveSelection(spec, inventory) - cmd = translate.to_ceph_volume(spec, sel).run() + cmd = translate.to_ceph_volume(spec, sel, 'host1').run() assert cmd == ('lvm batch --no-auto /dev/sdc /dev/sdd ' '--journal-size 500M --journal-devices /dev/sda /dev/sdb ' '--filestore --yes --no-systemd') + + +def test_ceph_volume_command_7(): + spec = DriveGroupSpec(placement=PlacementSpec(host_pattern='*'), + data_devices=DeviceSelection(all=True), + osd_id_claims={'host1': ['0', '1']} + ) + inventory = _mk_inventory(_mk_device(rotational=True)*2) + sel = drive_selection.DriveSelection(spec, inventory) + cmd = translate.to_ceph_volume(spec, sel, 'host1').run() + assert cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --osd-id 0 1 --yes --no-systemd'