From ef810a4ebf91b538f778da4ca3ea0fdc2968e2fe Mon Sep 17 00:00:00 2001 From: Guillaume Abrioux Date: Fri, 31 Mar 2023 11:27:13 +0200 Subject: [PATCH] cephadm: osd replacement improvement This adds a new parameter `--no-destroy` to the command `ceph orch osd rm`. By default, it removes any VGs/LVs related to the osd being removed. For specific workflows, this can be useful to preserve them. Fixes: https://tracker.ceph.com/issues/59289 Signed-off-by: Guillaume Abrioux --- src/pybind/mgr/cephadm/module.py | 4 +++- src/pybind/mgr/cephadm/services/osd.py | 10 ++++++++-- src/pybind/mgr/orchestrator/_interface.py | 4 +++- src/pybind/mgr/orchestrator/module.py | 6 ++++-- src/pybind/mgr/rook/module.py | 7 ++++++- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 4de7083c3ec85..555a405ee5e2a 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -3172,7 +3172,8 @@ Then run the following: def remove_osds(self, osd_ids: List[str], replace: bool = False, force: bool = False, - zap: bool = False) -> str: + zap: bool = False, + no_destroy: bool = False) -> str: """ Takes a list of OSDs and schedules them for removal. The function that takes care of the actual removal is @@ -3194,6 +3195,7 @@ Then run the following: replace=replace, force=force, zap=zap, + no_destroy=no_destroy, hostname=daemon.hostname, process_started_at=datetime_now(), remove_util=self.to_remove_osds.rm_util)) diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py index 31771fb5fcebe..9f404646f384b 100644 --- a/src/pybind/mgr/cephadm/services/osd.py +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -542,9 +542,12 @@ class RemoveUtil(object): def zap_osd(self, osd: "OSD") -> str: "Zaps all devices that are associated with an OSD" if osd.hostname is not None: + cmd = ['--', 'lvm', 'zap', '--osd-id', str(osd.osd_id)] + if not osd.no_destroy: + cmd.append('--destroy') out, err, code = self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm( osd.hostname, 'osd', 'ceph-volume', - ['--', 'lvm', 'zap', '--destroy', '--osd-id', str(osd.osd_id)], + cmd, error_ok=True)) self.mgr.cache.invalidate_host_devices(osd.hostname) if code: @@ -608,7 +611,8 @@ class OSD: replace: bool = False, force: bool = False, hostname: Optional[str] = None, - zap: bool = False): + zap: bool = False, + no_destroy: bool = False): # the ID of the OSD self.osd_id = osd_id @@ -647,6 +651,8 @@ class OSD: # Whether devices associated with the OSD should be zapped (DATA ERASED) self.zap = zap + # Whether all associated LV devices should be destroyed. + self.no_destroy = no_destroy def start(self) -> None: if self.started: diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 0321ef878c53b..4db5372503116 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -589,12 +589,14 @@ class Orchestrator(object): def remove_osds(self, osd_ids: List[str], replace: bool = False, force: bool = False, - zap: bool = False) -> OrchResult[str]: + zap: bool = False, + no_destroy: bool = False) -> OrchResult[str]: """ :param osd_ids: list of OSD IDs :param replace: marks the OSD as being destroyed. See :ref:`orchestrator-osd-replace` :param force: Forces the OSD removal process without waiting for the data to be drained first. :param zap: Zap/Erase all devices associated with the OSDs (DESTROYS DATA) + :param no_destroy: Do not destroy associated VGs/LVs with the OSD. .. note:: this can only remove OSDs that were successfully diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 963f63259e20e..c32a6dfda53e5 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -1000,9 +1000,11 @@ Usage: osd_id: List[str], replace: bool = False, force: bool = False, - zap: bool = False) -> HandleCommandResult: + zap: bool = False, + no_destroy: bool = False) -> HandleCommandResult: """Remove OSD daemons""" - completion = self.remove_osds(osd_id, replace=replace, force=force, zap=zap) + completion = self.remove_osds(osd_id, replace=replace, force=force, + zap=zap, no_destroy=no_destroy) raise_if_exception(completion) return HandleCommandResult(stdout=completion.result_str()) diff --git a/src/pybind/mgr/rook/module.py b/src/pybind/mgr/rook/module.py index 370cb6582199d..b67349d1bff62 100644 --- a/src/pybind/mgr/rook/module.py +++ b/src/pybind/mgr/rook/module.py @@ -625,7 +625,12 @@ class RookOrchestrator(MgrModule, orchestrator.Orchestrator): } self.set_store("drive_group_map", json.dumps(json_drive_group_map)) - def remove_osds(self, osd_ids: List[str], replace: bool = False, force: bool = False, zap: bool = False) -> OrchResult[str]: + def remove_osds(self, + osd_ids: List[str], + replace: bool = False, + force: bool = False, + zap: bool = False, + no_destroy: bool = False) -> OrchResult[str]: assert self._rook_cluster is not None if zap: raise RuntimeError("Rook does not support zapping devices during OSD removal.") -- 2.39.5